Bootloop detection & recovery (#4793)
* added boot loop detection and config backup * automatic OTA rollback if loading backup does not fix it * added new file handling functions * adding verification of json files, added config restore at bootup if broken * added function to compare contents of two files for future use (currently not used)
This commit is contained in:
125
wled00/util.cpp
125
wled00/util.cpp
@@ -1,6 +1,12 @@
|
||||
#include "wled.h"
|
||||
#include "fcn_declare.h"
|
||||
#include "const.h"
|
||||
#ifdef ESP8266
|
||||
#include "user_interface.h" // for bootloop detection
|
||||
#elif ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0)
|
||||
#include "esp32/rtc.h" // for bootloop detection
|
||||
#include <Update.h>
|
||||
#endif
|
||||
|
||||
|
||||
//helper to get int value at a position in string
|
||||
@@ -706,6 +712,125 @@ void *realloc_malloc(void *ptr, size_t size) {
|
||||
}
|
||||
#endif
|
||||
|
||||
// bootloop detection and handling
|
||||
// checks if the ESP reboots multiple times due to a crash or watchdog timeout
|
||||
// if a bootloop is detected: restore settings from backup, then reset settings, then switch boot image (and repeat)
|
||||
|
||||
#define BOOTLOOP_THRESHOLD 5 // number of consecutive crashes to trigger bootloop detection
|
||||
#define BOOTLOOP_ACTION_RESTORE 0 // default action: restore config from /cfg.bak
|
||||
#define BOOTLOOP_ACTION_RESET 1 // if restore does not work, reset config (rename /cfg.json to /cfg.fault)
|
||||
#define BOOTLOOP_ACTION_OTA 2 // swap the boot partition
|
||||
#define BOOTLOOP_ACTION_DUMP 3 // nothing seems to help, dump files to serial and reboot (until hardware reset)
|
||||
#ifdef ESP8266
|
||||
#define BOOTLOOP_INTERVAL_TICKS (5 * 160000) // time limit between crashes: ~5 seconds in RTC ticks
|
||||
#define BOOT_TIME_IDX 0 // index in RTC memory for boot time
|
||||
#define CRASH_COUNTER_IDX 1 // index in RTC memory for crash counter
|
||||
#define ACTIONT_TRACKER_IDX 2 // index in RTC memory for boot action
|
||||
#else
|
||||
#define BOOTLOOP_INTERVAL_TICKS 5000 // time limit between crashes: ~5 seconds in milliseconds
|
||||
// variables in RTC_NOINIT memory persist between reboots (but not on hardware reset)
|
||||
RTC_NOINIT_ATTR static uint32_t bl_last_boottime;
|
||||
RTC_NOINIT_ATTR static uint32_t bl_crashcounter;
|
||||
RTC_NOINIT_ATTR static uint32_t bl_actiontracker;
|
||||
void bootloopCheckOTA() { bl_actiontracker = BOOTLOOP_ACTION_OTA; } // swap boot image if bootloop is detected instead of restoring config
|
||||
#endif
|
||||
|
||||
// detect bootloop by checking the reset reason and the time since last boot
|
||||
static bool detectBootLoop() {
|
||||
#if !defined(ESP8266)
|
||||
#if ESP_IDF_VERSION >= ESP_IDF_VERSION_VAL(4, 4, 0)
|
||||
uint32_t rtctime = esp_rtc_get_time_us() / 1000; // convert to milliseconds
|
||||
esp_reset_reason_t reason = esp_reset_reason();
|
||||
|
||||
if (!(reason == ESP_RST_PANIC || reason == ESP_RST_WDT || reason == ESP_RST_INT_WDT || reason == ESP_RST_TASK_WDT)) {
|
||||
// no crash detected, init variables
|
||||
bl_crashcounter = 0;
|
||||
bl_last_boottime = rtctime;
|
||||
if(reason != ESP_RST_SW)
|
||||
bl_actiontracker = BOOTLOOP_ACTION_RESTORE; // init action tracker if not an intentional reboot (e.g. from OTA or bootloop handler)
|
||||
} else if (reason == ESP_RST_BROWNOUT) {
|
||||
// crash due to brownout can't be detected unless using flash memory to store bootloop variables
|
||||
// this is a simpler way to preemtively revert the config in case current brownout is caused by a bad choice of settings
|
||||
DEBUG_PRINTLN(F("brownout detected"));
|
||||
//restoreConfig(); // TODO: blindly restoring config if brownout detected is a bad idea, need a better way (if at all)
|
||||
} else {
|
||||
uint32_t rebootinterval = rtctime - bl_last_boottime;
|
||||
bl_last_boottime = rtctime; // store current runtime for next reboot
|
||||
if (rebootinterval < BOOTLOOP_INTERVAL_TICKS) {
|
||||
bl_crashcounter++;
|
||||
if (bl_crashcounter >= BOOTLOOP_THRESHOLD) {
|
||||
DEBUG_PRINTLN(F("!BOOTLOOP DETECTED!"));
|
||||
bl_crashcounter = 0;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
#else // ESP8266
|
||||
rst_info* resetreason = system_get_rst_info();
|
||||
uint32_t bl_last_boottime;
|
||||
uint32_t bl_crashcounter;
|
||||
uint32_t bl_actiontracker;
|
||||
uint32_t rtctime = system_get_rtc_time();
|
||||
|
||||
if (!(resetreason->reason == REASON_EXCEPTION_RST || resetreason->reason == REASON_WDT_RST)) {
|
||||
// no crash detected, init variables
|
||||
bl_crashcounter = 0;
|
||||
ESP.rtcUserMemoryWrite(BOOT_TIME_IDX, &rtctime, sizeof(uint32_t));
|
||||
ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t));
|
||||
if(resetreason->reason != REASON_SOFT_RESTART) {
|
||||
bl_actiontracker = BOOTLOOP_ACTION_RESTORE; // init action tracker if not an intentional reboot (e.g. from OTA or bootloop handler)
|
||||
ESP.rtcUserMemoryWrite(ACTIONT_TRACKER_IDX, &bl_actiontracker, sizeof(uint32_t));
|
||||
}
|
||||
} else {
|
||||
// system has crashed
|
||||
ESP.rtcUserMemoryRead(BOOT_TIME_IDX, &bl_last_boottime, sizeof(uint32_t));
|
||||
ESP.rtcUserMemoryRead(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t));
|
||||
uint32_t rebootinterval = rtctime - bl_last_boottime;
|
||||
ESP.rtcUserMemoryWrite(BOOT_TIME_IDX, &rtctime, sizeof(uint32_t)); // store current ticks for next reboot
|
||||
if (rebootinterval < BOOTLOOP_INTERVAL_TICKS) {
|
||||
bl_crashcounter++;
|
||||
ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t));
|
||||
if (bl_crashcounter >= BOOTLOOP_THRESHOLD) {
|
||||
DEBUG_PRINTLN(F("BOOTLOOP DETECTED"));
|
||||
bl_crashcounter = 0;
|
||||
ESP.rtcUserMemoryWrite(CRASH_COUNTER_IDX, &bl_crashcounter, sizeof(uint32_t));
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif
|
||||
return false; // no bootloop detected
|
||||
}
|
||||
|
||||
void handleBootLoop() {
|
||||
DEBUG_PRINTLN(F("checking for bootloop"));
|
||||
if (!detectBootLoop()) return; // no bootloop detected
|
||||
#ifdef ESP8266
|
||||
uint32_t bl_actiontracker;
|
||||
ESP.rtcUserMemoryRead(ACTIONT_TRACKER_IDX, &bl_actiontracker, sizeof(uint32_t));
|
||||
#endif
|
||||
if (bl_actiontracker == BOOTLOOP_ACTION_RESTORE) {
|
||||
restoreConfig(); // note: if this fails, could reset immediately. instead just let things play out and save a few lines of code
|
||||
bl_actiontracker = BOOTLOOP_ACTION_RESET; // reset config if it keeps bootlooping
|
||||
} else if (bl_actiontracker == BOOTLOOP_ACTION_RESET) {
|
||||
resetConfig();
|
||||
bl_actiontracker = BOOTLOOP_ACTION_OTA; // swap boot partition if it keeps bootlooping. On ESP8266 this is the same as BOOTLOOP_ACTION_NONE
|
||||
}
|
||||
#ifndef ESP8266
|
||||
else if (bl_actiontracker == BOOTLOOP_ACTION_OTA) {
|
||||
if(Update.canRollBack()) {
|
||||
DEBUG_PRINTLN(F("Swapping boot partition..."));
|
||||
Update.rollBack(); // swap boot partition
|
||||
}
|
||||
bl_actiontracker = BOOTLOOP_ACTION_DUMP; // out of options
|
||||
}
|
||||
#endif
|
||||
else
|
||||
dumpFilesToSerial();
|
||||
ESP.restart(); // restart cleanly and don't wait for another crash
|
||||
}
|
||||
|
||||
/*
|
||||
* Fixed point integer based Perlin noise functions by @dedehai
|
||||
* Note: optimized for speed and to mimic fastled inoise functions, not for accuracy or best randomness
|
||||
|
||||
Reference in New Issue
Block a user