From 76cb2e99886e4e7f7188353dcfcb50e14f28e646 Mon Sep 17 00:00:00 2001 From: Damian Schneider Date: Tue, 16 Sep 2025 19:46:16 +0200 Subject: [PATCH] Improvements to heap-memory and PSRAM handling (#4791) * Improved heap and PSRAM handling - Segment `allocateData()` uses more elaborate DRAM checking to reduce fragmentation and allow for larger setups to run on low heap - Segment data allocation fails if minimum contiguous block size runs low to keep the UI working - Increased `MAX_SEGMENT_DATA` to account for better segment data handling - Memory allocation functions try to keep enough DRAM for segment data - Added constant `PSRAM_THRESHOLD` to improve PSARM usage - Increase MIN_HEAP_SIZE to reduce risk of breaking UI due to low memory for JSON response - ESP32 makes use of IRAM (no 8bit access) for pixeluffers, freeing up to 50kB of RAM - Fix to properly get available heap on all platforms: added function `getFreeHeapSize()` - Bugfix for effects that divide by SEGLEN: don't run FX in service() if segment is not active -Syntax fix in AR: calloc() uses (numelements, size) as arguments * Added new functions for allocation and heap checking - added `allocate_buffer()` function that can be used to allocate large buffers: takes parameters to set preferred ram location, including 32bit accessible RAM on ESP32. Returns null if heap runs low or switches to PSRAM - getFreeHeapSize() and getContiguousFreeHeap() helper functions for all platforms to correctly report free useable heap - updated some constants - updated segment data allocation to free the data if it is large - replaced "psramsafe" variable with it's #ifdef: BOARD_HAS_PSRAM and made accomodating changes - added some compile-time checks to handle invalid env. definitions - updated all allocation functions and some of the logic behind them - added use of fast RTC-Memory where available - increased MIN_HEAP_SIZE for all systems (improved stability in tests) - updated memory calculation in web-UI to account for required segment buffer - added UI alerts if buffer allocation fails - made getUsedSegmentData() non-private (used in buffer alloc function) - changed MAX_SEGMENT_DATA - added more detailed memory log to DEBUG output - added debug output to buffer alloc function --- usermods/audioreactive/audio_reactive.cpp | 4 +- wled00/FX.h | 33 ++-- wled00/FX_fcn.cpp | 89 +++++---- wled00/bus_manager.cpp | 48 ++--- wled00/cfg.cpp | 2 +- wled00/const.h | 17 +- wled00/data/settings_leds.htm | 3 +- wled00/fcn_declare.h | 57 +++--- wled00/file.cpp | 4 +- wled00/json.cpp | 10 +- wled00/util.cpp | 228 +++++++++++++++------- wled00/wled.cpp | 62 ++++-- wled00/wled.h | 16 +- wled00/wled_server.cpp | 2 +- wled00/ws.cpp | 8 +- 15 files changed, 364 insertions(+), 219 deletions(-) mode change 100644 => 100755 wled00/FX_fcn.cpp diff --git a/usermods/audioreactive/audio_reactive.cpp b/usermods/audioreactive/audio_reactive.cpp index 06268560a..25b813520 100644 --- a/usermods/audioreactive/audio_reactive.cpp +++ b/usermods/audioreactive/audio_reactive.cpp @@ -224,8 +224,8 @@ void FFTcode(void * parameter) DEBUGSR_PRINT("FFT started on core: "); DEBUGSR_PRINTLN(xPortGetCoreID()); // allocate FFT buffers on first call - if (vReal == nullptr) vReal = (float*) calloc(sizeof(float), samplesFFT); - if (vImag == nullptr) vImag = (float*) calloc(sizeof(float), samplesFFT); + if (vReal == nullptr) vReal = (float*) calloc(samplesFFT, sizeof(float)); + if (vImag == nullptr) vImag = (float*) calloc(samplesFFT, sizeof(float)); if ((vReal == nullptr) || (vImag == nullptr)) { // something went wrong if (vReal) free(vReal); vReal = nullptr; diff --git a/wled00/FX.h b/wled00/FX.h index 097c857ca..9ff0cf72c 100644 --- a/wled00/FX.h +++ b/wled00/FX.h @@ -88,23 +88,26 @@ extern byte realtimeMode; // used in getMappedPixelIndex() #endif #define FPS_CALC_SHIFT 7 // bit shift for fixed point math -/* each segment uses 82 bytes of SRAM memory, so if you're application fails because of - insufficient memory, decreasing MAX_NUM_SEGMENTS may help */ +// heap memory limit for effects data, pixel buffers try to reserve it if PSRAM is available #ifdef ESP8266 #define MAX_NUM_SEGMENTS 16 /* How much data bytes all segments combined may allocate */ - #define MAX_SEGMENT_DATA 5120 + #define MAX_SEGMENT_DATA (6*1024) // 6k by default #elif defined(CONFIG_IDF_TARGET_ESP32S2) - #define MAX_NUM_SEGMENTS 20 - #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*512) // 10k by default (S2 is short on free RAM) + #define MAX_NUM_SEGMENTS 32 + #define MAX_SEGMENT_DATA (20*1024) // 20k by default (S2 is short on free RAM), limit does not apply if PSRAM is available #else - #define MAX_NUM_SEGMENTS 32 // warning: going beyond 32 may consume too much RAM for stable operation - #define MAX_SEGMENT_DATA (MAX_NUM_SEGMENTS*1280) // 40k by default + #ifdef BOARD_HAS_PSRAM + #define MAX_NUM_SEGMENTS 64 + #else + #define MAX_NUM_SEGMENTS 32 + #endif + #define MAX_SEGMENT_DATA (64*1024) // 64k by default, limit does not apply if PSRAM is available #endif /* How much data bytes each segment should max allocate to leave enough space for other segments, assuming each segment uses the same amount of data. 256 for ESP8266, 640 for ESP32. */ -#define FAIR_DATA_PER_SEG (MAX_SEGMENT_DATA / WS2812FX::getMaxSegments()) +#define FAIR_DATA_PER_SEG (MAX_SEGMENT_DATA / MAX_NUM_SEGMENTS) #define MIN_SHOW_DELAY (_frametime < 16 ? 8 : 15) @@ -533,7 +536,6 @@ class Segment { protected: - inline static unsigned getUsedSegmentData() { return Segment::_usedSegmentData; } inline static void addUsedSegmentData(int len) { Segment::_usedSegmentData += len; } inline uint32_t *getPixels() const { return pixels; } @@ -600,8 +602,8 @@ class Segment { , _t(nullptr) { DEBUGFX_PRINTF_P(PSTR("-- Creating segment: %p [%d,%d:%d,%d]\n"), this, (int)start, (int)stop, (int)startY, (int)stopY); - // allocate render buffer (always entire segment) - pixels = static_cast(d_calloc(sizeof(uint32_t), length())); // error handling is also done in isActive() + // allocate render buffer (always entire segment), prefer PSRAM if DRAM is running low. Note: impact on FPS with PSRAM buffer is low (<2% with QSPI PSRAM) + pixels = static_cast(allocate_buffer(length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS | BFRALLOC_CLEAR)); if (!pixels) { DEBUGFX_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); extern byte errorFlag; @@ -623,7 +625,7 @@ class Segment { #endif clearName(); deallocateData(); - d_free(pixels); + p_free(pixels); } Segment& operator= (const Segment &orig); // copy assignment @@ -646,7 +648,7 @@ class Segment { inline uint16_t groupLength() const { return grouping + spacing; } inline uint8_t getLightCapabilities() const { return _capabilities; } inline void deactivate() { setGeometry(0,0); } - inline Segment &clearName() { d_free(name); name = nullptr; return *this; } + inline Segment &clearName() { p_free(name); name = nullptr; return *this; } inline Segment &setName(const String &name) { return setName(name.c_str()); } inline static unsigned vLength() { return Segment::_vLength; } @@ -672,6 +674,7 @@ class Segment { inline uint16_t dataSize() const { return _dataLen; } bool allocateData(size_t len); // allocates effect data buffer in heap and clears it void deallocateData(); // deallocates (frees) effect data buffer from heap + inline static unsigned getUsedSegmentData() { return Segment::_usedSegmentData; } /** * Flags that before the next effect is calculated, * the internal segment state should be reset. @@ -868,8 +871,8 @@ class WS2812FX { } ~WS2812FX() { - d_free(_pixels); - d_free(_pixelCCT); // just in case + p_free(_pixels); + p_free(_pixelCCT); // just in case d_free(customMappingTable); _mode.clear(); _modeData.clear(); diff --git a/wled00/FX_fcn.cpp b/wled00/FX_fcn.cpp old mode 100644 new mode 100755 index 2f8d5515f..95a347b66 --- a/wled00/FX_fcn.cpp +++ b/wled00/FX_fcn.cpp @@ -68,10 +68,10 @@ Segment::Segment(const Segment &orig) { if (!stop) return; // nothing to do if segment is inactive/invalid if (orig.pixels) { // allocate pixel buffer: prefer IRAM/PSRAM - pixels = static_cast(d_malloc(sizeof(uint32_t) * orig.length())); + pixels = static_cast(allocate_buffer(orig.length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS)); if (pixels) { memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length()); - if (orig.name) { name = static_cast(d_malloc(strlen(orig.name)+1)); if (name) strcpy(name, orig.name); } + if (orig.name) { name = static_cast(allocate_buffer(strlen(orig.name)+1, BFRALLOC_PREFER_PSRAM)); if (name) strcpy(name, orig.name); } if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); } } else { DEBUGFX_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); @@ -97,10 +97,10 @@ Segment& Segment::operator= (const Segment &orig) { //DEBUG_PRINTF_P(PSTR("-- Copying segment: %p -> %p\n"), &orig, this); if (this != &orig) { // clean destination - if (name) { d_free(name); name = nullptr; } + if (name) { p_free(name); name = nullptr; } if (_t) stopTransition(); // also erases _t deallocateData(); - d_free(pixels); + p_free(pixels); // copy source memcpy((void*)this, (void*)&orig, sizeof(Segment)); // erase pointers to allocated data @@ -111,10 +111,10 @@ Segment& Segment::operator= (const Segment &orig) { // copy source data if (orig.pixels) { // allocate pixel buffer: prefer IRAM/PSRAM - pixels = static_cast(d_malloc(sizeof(uint32_t) * orig.length())); + pixels = static_cast(allocate_buffer(orig.length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS)); if (pixels) { memcpy(pixels, orig.pixels, sizeof(uint32_t) * orig.length()); - if (orig.name) { name = static_cast(d_malloc(strlen(orig.name)+1)); if (name) strcpy(name, orig.name); } + if (orig.name) { name = static_cast(allocate_buffer(strlen(orig.name)+1, BFRALLOC_PREFER_PSRAM)); if (name) strcpy(name, orig.name); } if (orig.data) { if (allocateData(orig._dataLen)) memcpy(data, orig.data, orig._dataLen); } } else { DEBUG_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); @@ -130,10 +130,10 @@ Segment& Segment::operator= (const Segment &orig) { Segment& Segment::operator= (Segment &&orig) noexcept { //DEBUG_PRINTF_P(PSTR("-- Moving segment: %p -> %p\n"), &orig, this); if (this != &orig) { - if (name) { d_free(name); name = nullptr; } // free old name + if (name) { p_free(name); name = nullptr; } // free old name if (_t) stopTransition(); // also erases _t deallocateData(); // free old runtime data - d_free(pixels); // free old pixel buffer + p_free(pixels); // free old pixel buffer // move source data memcpy((void*)this, (void*)&orig, sizeof(Segment)); orig.name = nullptr; @@ -147,35 +147,38 @@ Segment& Segment::operator= (Segment &&orig) noexcept { // allocates effect data buffer on heap and initialises (erases) it bool Segment::allocateData(size_t len) { - if (len == 0) return false; // nothing to do - if (data && _dataLen >= len) { // already allocated enough (reduce fragmentation) + if (len == 0) return false; // nothing to do + if (data && _dataLen >= len) { // already allocated enough (reduce fragmentation) if (call == 0) { - //DEBUG_PRINTF_P(PSTR("-- Clearing data (%d): %p\n"), len, this); - memset(data, 0, len); // erase buffer if called during effect initialisation + if (_dataLen < FAIR_DATA_PER_SEG) { // segment data is small + //DEBUG_PRINTF_P(PSTR("-- Clearing data (%d): %p\n"), len, this); + memset(data, 0, len); // erase buffer if called during effect initialisation + return true; // no need to reallocate + } } - return true; + else + return true; } //DEBUG_PRINTF_P(PSTR("-- Allocating data (%d): %p\n"), len, this); + // limit to MAX_SEGMENT_DATA if there is no PSRAM, otherwise prefer functionality over speed + #ifndef BOARD_HAS_PSRAM if (Segment::getUsedSegmentData() + len - _dataLen > MAX_SEGMENT_DATA) { // not enough memory - DEBUG_PRINTF_P(PSTR("!!! Not enough RAM: %d/%d !!!\n"), len, Segment::getUsedSegmentData()); + DEBUG_PRINTF_P(PSTR("SegmentData limit reached: %d/%d\n"), len, Segment::getUsedSegmentData()); errorFlag = ERR_NORAM; return false; } - // prefer DRAM over SPI RAM on ESP32 since it is slow - if (data) { - data = (byte*)d_realloc_malloc(data, len); // realloc with malloc fallback - if (!data) { - data = nullptr; - Segment::addUsedSegmentData(-_dataLen); // subtract original buffer size - _dataLen = 0; // reset data length - } - } - else data = (byte*)d_malloc(len); + #endif if (data) { - memset(data, 0, len); // erase buffer - Segment::addUsedSegmentData(len - _dataLen); + d_free(data); // free data and try to allocate again (segment buffer may be blocking contiguous heap) + Segment::addUsedSegmentData(-_dataLen); // subtract buffer size + } + + data = static_cast(allocate_buffer(len, BFRALLOC_PREFER_DRAM | BFRALLOC_CLEAR)); // prefer DRAM over PSRAM for speed + + if (data) { + Segment::addUsedSegmentData(len); _dataLen = len; //DEBUG_PRINTF_P(PSTR("--- Allocated data (%p): %d/%d -> %p\n"), this, len, Segment::getUsedSegmentData(), data); return true; @@ -209,7 +212,11 @@ void Segment::deallocateData() { void Segment::resetIfRequired() { if (!reset || !isActive()) return; //DEBUG_PRINTF_P(PSTR("-- Segment reset: %p\n"), this); - if (data && _dataLen > 0) memset(data, 0, _dataLen); // prevent heap fragmentation (just erase buffer instead of deallocateData()) + if (data && _dataLen > 0) { + if (_dataLen > FAIR_DATA_PER_SEG) deallocateData(); // do not keep large allocations + else memset(data, 0, _dataLen); // can prevent heap fragmentation + DEBUG_PRINTF_P(PSTR("-- Segment %p reset, data cleared\n"), this); + } if (pixels) for (size_t i = 0; i < length(); i++) pixels[i] = BLACK; // clear pixel buffer next_time = 0; step = 0; call = 0; aux0 = 0; aux1 = 0; reset = false; @@ -466,7 +473,7 @@ void Segment::setGeometry(uint16_t i1, uint16_t i2, uint8_t grp, uint8_t spc, ui if (length() != oldLength) { // allocate render buffer (always entire segment), prefer IRAM/PSRAM. Note: impact on FPS with PSRAM buffer is low (<2% with QSPI PSRAM) on S2/S3 p_free(pixels); - pixels = static_cast(d_malloc(sizeof(uint32_t) * length())); + pixels = static_cast(allocate_buffer(length() * sizeof(uint32_t), BFRALLOC_PREFER_PSRAM | BFRALLOC_NOBYTEACCESS)); if (!pixels) { DEBUGFX_PRINTLN(F("!!! Not enough RAM for pixel buffer !!!")); deallocateData(); @@ -581,8 +588,8 @@ Segment &Segment::setName(const char *newName) { if (newName) { const int newLen = min(strlen(newName), (size_t)WLED_MAX_SEGNAME_LEN); if (newLen) { - if (name) d_free(name); // free old name - name = static_cast(d_malloc(newLen+1)); + if (name) p_free(name); // free old name + name = static_cast(allocate_buffer(newLen+1, BFRALLOC_PREFER_PSRAM)); if (mode == FX_MODE_2DSCROLLTEXT) startTransition(strip.getTransition(), true); // if the name changes in scrolling text mode, we need to copy the segment for blending if (name) strlcpy(name, newName, newLen+1); return *this; @@ -1177,7 +1184,10 @@ void WS2812FX::finalizeInit() { mem += bus.memUsage(Bus::isDigital(bus.type) && !Bus::is2Pin(bus.type) ? digitalCount++ : 0); // includes global buffer if (mem <= MAX_LED_MEMORY) { if (BusManager::add(bus) == -1) break; - } else DEBUG_PRINTF_P(PSTR("Out of LED memory! Bus %d (%d) #%u not created."), (int)bus.type, (int)bus.count, digitalCount); + } else { + errorFlag = ERR_NORAM_PX; // alert UI + DEBUG_PRINTF_P(PSTR("Out of LED memory! Bus %d (%d) #%u not created."), (int)bus.type, (int)bus.count, digitalCount); + } } busConfigs.clear(); busConfigs.shrink_to_fit(); @@ -1209,10 +1219,11 @@ void WS2812FX::finalizeInit() { deserializeMap(); // (re)load default ledmap (will also setUpMatrix() if ledmap does not exist) // allocate frame buffer after matrix has been set up (gaps!) - d_free(_pixels); // using realloc on large buffers can cause additional fragmentation instead of reducing it - _pixels = static_cast(d_malloc(getLengthTotal() * sizeof(uint32_t))); + p_free(_pixels); // using realloc on large buffers can cause additional fragmentation instead of reducing it + // use PSRAM if available: there is no measurable perfomance impact between PSRAM and DRAM on S2/S3 with QSPI PSRAM for this buffer + _pixels = static_cast(allocate_buffer(getLengthTotal() * sizeof(uint32_t), BFRALLOC_ENFORCE_PSRAM | BFRALLOC_NOBYTEACCESS | BFRALLOC_CLEAR)); DEBUG_PRINTF_P(PSTR("strip buffer size: %uB\n"), getLengthTotal() * sizeof(uint32_t)); - DEBUG_PRINTF_P(PSTR("Heap after strip init: %uB\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("Heap after strip init: %uB\n"), getFreeHeapSize()); } void WS2812FX::service() { @@ -1552,7 +1563,11 @@ void WS2812FX::blendSegment(const Segment &topSegment) const { } void WS2812FX::show() { - if (!_pixels) return; // no pixels allocated, nothing to show + if (!_pixels) { + DEBUGFX_PRINTLN(F("Error: no _pixels!")); + errorFlag = ERR_NORAM; + return; // no pixels allocated, nothing to show + } unsigned long showNow = millis(); size_t diff = showNow - _lastShow; @@ -1562,7 +1577,7 @@ void WS2812FX::show() { // we need to keep track of each pixel's CCT when blending segments (if CCT is present) // and then set appropriate CCT from that pixel during paint (see below). if ((hasCCTBus() || correctWB) && !cctFromRgb) - _pixelCCT = static_cast(d_malloc(totalLen * sizeof(uint8_t))); // allocate CCT buffer if necessary + _pixelCCT = static_cast(allocate_buffer(totalLen * sizeof(uint8_t), BFRALLOC_PREFER_PSRAM)); // allocate CCT buffer if necessary, prefer PSRAM if (_pixelCCT) memset(_pixelCCT, 127, totalLen); // set neutral (50:50) CCT if (realtimeMode == REALTIME_MODE_INACTIVE || useMainSegmentOnly || realtimeOverride > REALTIME_OVERRIDE_NONE) { @@ -1596,7 +1611,7 @@ void WS2812FX::show() { } Bus::setCCT(oldCCT); // restore old CCT for ABL adjustments - d_free(_pixelCCT); + p_free(_pixelCCT); _pixelCCT = nullptr; // some buses send asynchronously and this method will return before diff --git a/wled00/bus_manager.cpp b/wled00/bus_manager.cpp index 99523bba9..612c0f3aa 100644 --- a/wled00/bus_manager.cpp +++ b/wled00/bus_manager.cpp @@ -39,35 +39,29 @@ uint32_t colorBalanceFromKelvin(uint16_t kelvin, uint32_t rgb); uint8_t realtimeBroadcast(uint8_t type, IPAddress client, uint16_t length, const byte *buffer, uint8_t bri=255, bool isRGBW=false); //util.cpp -// PSRAM allocation wrappers -#if !defined(ESP8266) && !defined(CONFIG_IDF_TARGET_ESP32C3) +// memory allocation wrappers extern "C" { - void *p_malloc(size_t); // prefer PSRAM over DRAM - void *p_calloc(size_t, size_t); // prefer PSRAM over DRAM - void *p_realloc(void *, size_t); // prefer PSRAM over DRAM - void *p_realloc_malloc(void *ptr, size_t size); // realloc with malloc fallback, prefer PSRAM over DRAM - inline void p_free(void *ptr) { heap_caps_free(ptr); } - void *d_malloc(size_t); // prefer DRAM over PSRAM - void *d_calloc(size_t, size_t); // prefer DRAM over PSRAM - void *d_realloc(void *, size_t); // prefer DRAM over PSRAM - void *d_realloc_malloc(void *ptr, size_t size); // realloc with malloc fallback, prefer DRAM over PSRAM + // prefer DRAM over PSRAM (if available) in d_ alloc functions + void *d_malloc(size_t); + void *d_calloc(size_t, size_t); + void *d_realloc_malloc(void *ptr, size_t size); + #ifndef ESP8266 inline void d_free(void *ptr) { heap_caps_free(ptr); } + #else + inline void d_free(void *ptr) { free(ptr); } + #endif + #if defined(BOARD_HAS_PSRAM) + // prefer PSRAM over DRAM in p_ alloc functions + void *p_malloc(size_t); + void *p_calloc(size_t, size_t); + void *p_realloc_malloc(void *ptr, size_t size); + inline void p_free(void *ptr) { heap_caps_free(ptr); } + #else + #define p_malloc d_malloc + #define p_calloc d_calloc + #define p_free d_free + #endif } -#else -extern "C" { - void *realloc_malloc(void *ptr, size_t size); -} -#define p_malloc malloc -#define p_calloc calloc -#define p_realloc realloc -#define p_realloc_malloc realloc_malloc -#define p_free free -#define d_malloc malloc -#define d_calloc calloc -#define d_realloc realloc -#define d_realloc_malloc realloc_malloc -#define d_free free -#endif //color mangling macros #define RGBW32(r,g,b,w) (uint32_t((byte(w) << 24) | (byte(r) << 16) | (byte(g) << 8) | (byte(b)))) @@ -902,7 +896,7 @@ void BusManager::esp32RMTInvertIdle() { else if (lvl == RMT_IDLE_LEVEL_LOW) lvl = RMT_IDLE_LEVEL_HIGH; else continue; rmt_set_idle_level(ch, idle_out, lvl); - u++ + u++; } } #endif diff --git a/wled00/cfg.cpp b/wled00/cfg.cpp index eac6ea25a..9d7477c7a 100644 --- a/wled00/cfg.cpp +++ b/wled00/cfg.cpp @@ -201,7 +201,7 @@ bool deserializeConfig(JsonObject doc, bool fromFS) { } #endif - DEBUG_PRINTF_P(PSTR("Heap before buses: %d\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("Heap before buses: %d\n"), getFreeHeapSize()); JsonArray ins = hw_led["ins"]; if (!ins.isNull()) { int s = 0; // bus iterator diff --git a/wled00/const.h b/wled00/const.h index 1abf24539..7a7ca976c 100644 --- a/wled00/const.h +++ b/wled00/const.h @@ -546,8 +546,21 @@ static_assert(WLED_MAX_BUSSES <= 32, "WLED_MAX_BUSSES exceeds hard limit"); #endif #endif -// minimum heap size required to process web requests -#define MIN_HEAP_SIZE 8192 +// minimum heap size required to process web requests: try to keep free heap above this value +#ifdef ESP8266 + #define MIN_HEAP_SIZE (9*1024) +#else + #define MIN_HEAP_SIZE (15*1024) // WLED allocation functions (util.cpp) try to keep this much contiguous heap free for other tasks +#endif +// threshold for PSRAM use: if heap is running low, requests to allocate_buffer(prefer DRAM) above PSRAM_THRESHOLD may be put in PSRAM +// if heap is depleted, PSRAM will be used regardless of threshold +#if defined(CONFIG_IDF_TARGET_ESP32S3) + #define PSRAM_THRESHOLD (12*1024) // S3 has plenty of DRAM +#elif defined(CONFIG_IDF_TARGET_ESP32) + #define PSRAM_THRESHOLD (5*1024) +#else + #define PSRAM_THRESHOLD (2*1024) // S2 does not have a lot of RAM. C3 and ESP8266 do not support PSRAM: the value is not used +#endif // Web server limits #ifdef ESP8266 diff --git a/wled00/data/settings_leds.htm b/wled00/data/settings_leds.htm index 928da1175..6424815cc 100644 --- a/wled00/data/settings_leds.htm +++ b/wled00/data/settings_leds.htm @@ -195,7 +195,6 @@ if (isAna(t)) return 5; // analog let len = parseInt(d.getElementsByName("LC"+n)[0].value); len += parseInt(d.getElementsByName("SL"+n)[0].value); // skipped LEDs are allocated too - let dbl = 0; let ch = 3*hasRGB(t) + hasW(t) + hasCCT(t); let mul = 1; if (isDig(t)) { @@ -207,7 +206,7 @@ mul = 2; } } - return len * ch * mul + dbl; + return len * ch * mul + len * 4; // add 4 bytes per LED for segment buffer (TODO: how to account for global buffer?) } function UI(change=false) diff --git a/wled00/fcn_declare.h b/wled00/fcn_declare.h index 4c3ec81fb..1d81655d6 100644 --- a/wled00/fcn_declare.h +++ b/wled00/fcn_declare.h @@ -434,35 +434,44 @@ inline uint8_t hw_random8() { return HW_RND_REGISTER; }; inline uint8_t hw_random8(uint32_t upperlimit) { return (hw_random8() * upperlimit) >> 8; }; // input range 0-255 inline uint8_t hw_random8(uint32_t lowerlimit, uint32_t upperlimit) { uint32_t range = upperlimit - lowerlimit; return lowerlimit + hw_random8(range); }; // input range 0-255 -// PSRAM allocation wrappers -#if !defined(ESP8266) && !defined(CONFIG_IDF_TARGET_ESP32C3) +// memory allocation wrappers (util.cpp) extern "C" { - void *p_malloc(size_t); // prefer PSRAM over DRAM - void *p_calloc(size_t, size_t); // prefer PSRAM over DRAM - void *p_realloc(void *, size_t); // prefer PSRAM over DRAM - void *p_realloc_malloc(void *ptr, size_t size); // realloc with malloc fallback, prefer PSRAM over DRAM - inline void p_free(void *ptr) { heap_caps_free(ptr); } - void *d_malloc(size_t); // prefer DRAM over PSRAM - void *d_calloc(size_t, size_t); // prefer DRAM over PSRAM - void *d_realloc(void *, size_t); // prefer DRAM over PSRAM - void *d_realloc_malloc(void *ptr, size_t size); // realloc with malloc fallback, prefer DRAM over PSRAM + // prefer DRAM in d_xalloc functions, PSRAM as fallback + void *d_malloc(size_t); + void *d_calloc(size_t, size_t); + void *d_realloc_malloc(void *ptr, size_t size); + #ifndef ESP8266 inline void d_free(void *ptr) { heap_caps_free(ptr); } + #else + inline void d_free(void *ptr) { free(ptr); } + #endif + #if defined(BOARD_HAS_PSRAM) + // prefer PSRAM in p_xalloc functions, DRAM as fallback + void *p_malloc(size_t); + void *p_calloc(size_t, size_t); + void *p_realloc_malloc(void *ptr, size_t size); + inline void p_free(void *ptr) { heap_caps_free(ptr); } + #else + #define p_malloc d_malloc + #define p_calloc d_calloc + #define p_realloc_malloc d_realloc_malloc + #define p_free d_free + #endif } +#ifndef ESP8266 +inline size_t getFreeHeapSize() { return heap_caps_get_free_size(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns free heap (ESP.getFreeHeap() can include other memory types) +inline size_t getContiguousFreeHeap() { return heap_caps_get_largest_free_block(MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); } // returns largest contiguous free block #else -extern "C" { - void *realloc_malloc(void *ptr, size_t size); -} -#define p_malloc malloc -#define p_calloc calloc -#define p_realloc realloc -#define p_realloc_malloc realloc_malloc -#define p_free free -#define d_malloc malloc -#define d_calloc calloc -#define d_realloc realloc -#define d_realloc_malloc realloc_malloc -#define d_free free +inline size_t getFreeHeapSize() { return ESP.getFreeHeap(); } // returns free heap +inline size_t getContiguousFreeHeap() { return ESP.getMaxFreeBlockSize(); } // returns largest contiguous free block #endif +#define BFRALLOC_NOBYTEACCESS (1 << 0) // ESP32 has 32bit accessible DRAM (usually ~50kB free) that must not be byte-accessed +#define BFRALLOC_PREFER_DRAM (1 << 1) // prefer DRAM over PSRAM +#define BFRALLOC_ENFORCE_DRAM (1 << 2) // use DRAM only, no PSRAM +#define BFRALLOC_PREFER_PSRAM (1 << 3) // prefer PSRAM over DRAM +#define BFRALLOC_ENFORCE_PSRAM (1 << 4) // use PSRAM if available, otherwise uses DRAM +#define BFRALLOC_CLEAR (1 << 5) // clear allocated buffer after allocation +void *allocate_buffer(size_t size, uint32_t type); void handleBootLoop(); // detect and handle bootloops #ifndef ESP8266 diff --git a/wled00/file.cpp b/wled00/file.cpp index 108c41bd4..9f1dd6225 100644 --- a/wled00/file.cpp +++ b/wled00/file.cpp @@ -422,8 +422,8 @@ bool handleFileRead(AsyncWebServerRequest* request, String path){ DEBUGFS_PRINT(F("WS FileRead: ")); DEBUGFS_PRINTLN(path); if(path.endsWith("/")) path += "index.htm"; if(path.indexOf(F("sec")) > -1) return false; - #ifdef ARDUINO_ARCH_ESP32 - if (psramSafe && psramFound() && path.endsWith(FPSTR(getPresetsFileName()))) { + #ifdef BOARD_HAS_PSRAM + if (path.endsWith(FPSTR(getPresetsFileName()))) { size_t psize; const uint8_t *presets = getPresetCache(psize); if (presets) { diff --git a/wled00/json.cpp b/wled00/json.cpp index e8ebaaba2..5f4c3cfe0 100644 --- a/wled00/json.cpp +++ b/wled00/json.cpp @@ -812,7 +812,7 @@ void serializeInfo(JsonObject root) root[F("clock")] = ESP.getCpuFreqMHz(); root[F("flash")] = (ESP.getFlashChipSize()/1024)/1024; #ifdef WLED_DEBUG - root[F("maxalloc")] = ESP.getMaxAllocHeap(); + root[F("maxalloc")] = getContiguousFreeHeap(); root[F("resetReason0")] = (int)rtc_get_reset_reason(0); root[F("resetReason1")] = (int)rtc_get_reset_reason(1); #endif @@ -823,15 +823,15 @@ void serializeInfo(JsonObject root) root[F("clock")] = ESP.getCpuFreqMHz(); root[F("flash")] = (ESP.getFlashChipSize()/1024)/1024; #ifdef WLED_DEBUG - root[F("maxalloc")] = ESP.getMaxFreeBlockSize(); + root[F("maxalloc")] = getContiguousFreeHeap(); root[F("resetReason")] = (int)ESP.getResetInfoPtr()->reason; #endif root[F("lwip")] = LWIP_VERSION_MAJOR; #endif - root[F("freeheap")] = ESP.getFreeHeap(); - #if defined(ARDUINO_ARCH_ESP32) - if (psramFound()) root[F("psram")] = ESP.getFreePsram(); + root[F("freeheap")] = getFreeHeapSize(); + #if defined(BOARD_HAS_PSRAM) + root[F("psram")] = ESP.getFreePsram(); #endif root[F("uptime")] = millis()/1000 + rolloverMillis*4294967; diff --git a/wled00/util.cpp b/wled00/util.cpp index 8299904d5..8cc01a543 100644 --- a/wled00/util.cpp +++ b/wled00/util.cpp @@ -629,92 +629,186 @@ int32_t hw_random(int32_t lowerlimit, int32_t upperlimit) { return hw_random(diff) + lowerlimit; } -#if !defined(ESP8266) && !defined(CONFIG_IDF_TARGET_ESP32C3) // ESP8266 does not support PSRAM, ESP32-C3 does not have PSRAM -// p_x prefer PSRAM, d_x prefer DRAM -void *p_malloc(size_t size) { - int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; - if (psramSafe) { - if (heap_caps_get_free_size(caps2) > 3*MIN_HEAP_SIZE && size < 512) std::swap(caps1, caps2); // use DRAM for small alloactions & when heap is plenty - return heap_caps_malloc_prefer(size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists - } - return heap_caps_malloc(size, caps2); -} +// PSRAM compile time checks to provide info for misconfigured env +#if defined(BOARD_HAS_PSRAM) + #if defined(IDF_TARGET_ESP32C3) || defined(ESP8266) + #error "ESP32-C3 and ESP8266 with PSRAM is not supported, please remove BOARD_HAS_PSRAM definition" + #else + // BOARD_HAS_PSRAM also means that compiler flag "-mfix-esp32-psram-cache-issue" has to be used + #warning "BOARD_HAS_PSRAM defined, make sure to use -mfix-esp32-psram-cache-issue to prevent issues on rev.1 ESP32 boards \ + see https://docs.espressif.com/projects/esp-idf/en/stable/esp32/api-guides/external-ram.html#esp32-rev-v1-0" + #endif +#else + #if !defined(IDF_TARGET_ESP32C3) && !defined(ESP8266) + #pragma message("BOARD_HAS_PSRAM not defined, not using PSRAM.") + #endif +#endif -void *p_realloc(void *ptr, size_t size) { - int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; - if (psramSafe) { - if (heap_caps_get_free_size(caps2) > 3*MIN_HEAP_SIZE && size < 512) std::swap(caps1, caps2); // use DRAM for small alloactions & when heap is plenty - return heap_caps_realloc_prefer(ptr, size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists +// memory allocation functions with minimum free heap size check +#ifdef ESP8266 +static void *validateFreeHeap(void *buffer) { + // make sure there is enough free heap left if buffer was allocated in DRAM region, free it if not + if (getContiguousFreeHeap() < MIN_HEAP_SIZE) { + free(buffer); + return nullptr; } - return heap_caps_realloc(ptr, size, caps2); -} - -// realloc with malloc fallback, original buffer is freed if realloc fails but not copied! -void *p_realloc_malloc(void *ptr, size_t size) { - void *newbuf = p_realloc(ptr, size); // try realloc first - if (newbuf) return newbuf; // realloc successful - p_free(ptr); // free old buffer if realloc failed - return p_malloc(size); // fallback to malloc -} - -void *p_calloc(size_t count, size_t size) { - int caps1 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; - if (psramSafe) { - if (heap_caps_get_free_size(caps2) > 3*MIN_HEAP_SIZE && size < 512) std::swap(caps1, caps2); // use DRAM for small alloactions & when heap is plenty - return heap_caps_calloc_prefer(count, size, 2, caps1, caps2); // otherwise prefer PSRAM if it exists - } - return heap_caps_calloc(count, size, caps2); + return buffer; } void *d_malloc(size_t size) { - int caps1 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - if (psramSafe) { - if (heap_caps_get_largest_free_block(caps1) < 3*MIN_HEAP_SIZE && size > MIN_HEAP_SIZE) std::swap(caps1, caps2); // prefer PSRAM for large alloactions & when DRAM is low - return heap_caps_malloc_prefer(size, 2, caps1, caps2); // otherwise prefer DRAM - } - return heap_caps_malloc(size, caps1); + // note: using "if (getContiguousFreeHeap() > MIN_HEAP_SIZE + size)" did perform worse in tests with regards to keeping heap healthy and UI working + void *buffer = malloc(size); + return validateFreeHeap(buffer); } -void *d_realloc(void *ptr, size_t size) { - int caps1 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - if (psramSafe) { - if (heap_caps_get_largest_free_block(caps1) < 3*MIN_HEAP_SIZE && size > MIN_HEAP_SIZE) std::swap(caps1, caps2); // prefer PSRAM for large alloactions & when DRAM is low - return heap_caps_realloc_prefer(ptr, size, 2, caps1, caps2); // otherwise prefer DRAM +void *d_calloc(size_t count, size_t size) { + void *buffer = calloc(count, size); + return validateFreeHeap(buffer); +} + +// realloc with malloc fallback, note: on ESPS8266 there is no safe way to ensure MIN_HEAP_SIZE during realloc()s, free buffer and allocate new one +void *d_realloc_malloc(void *ptr, size_t size) { + //void *buffer = realloc(ptr, size); + //buffer = validateFreeHeap(buffer); + //if (buffer) return buffer; // realloc successful + //d_free(ptr); // free old buffer if realloc failed (or min heap was exceeded) + //return d_malloc(size); // fallback to malloc + free(ptr); + return d_malloc(size); +} +#else +static void *validateFreeHeap(void *buffer) { + // make sure there is enough free heap left if buffer was allocated in DRAM region, free it if not + // TODO: between allocate and free, heap can run low (async web access), only IDF V5 allows for a pre-allocation-check of all free blocks + if ((uintptr_t)buffer > SOC_DRAM_LOW && (uintptr_t)buffer < SOC_DRAM_HIGH && getContiguousFreeHeap() < MIN_HEAP_SIZE) { + free(buffer); + return nullptr; } - return heap_caps_realloc(ptr, size, caps1); + return buffer; +} + +void *d_malloc(size_t size) { + void *buffer; + #if defined(CONFIG_IDF_TARGET_ESP32C3) || defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) + // the newer ESP32 variants have byte-accessible fast RTC memory that can be used as heap, access speed is on-par with DRAM + // the system does prefer normal DRAM until full, since free RTC memory is ~7.5k only, its below the minimum heap threshold and needs to be allocated explicitly + // use RTC RAM for small allocations to improve fragmentation or if DRAM is running low + if (size < 256 || getContiguousFreeHeap() < 2*MIN_HEAP_SIZE + size) + buffer = heap_caps_malloc_prefer(size, 2, MALLOC_CAP_RTCRAM, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + else + #endif + buffer = heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); // allocate in any available heap memory + buffer = validateFreeHeap(buffer); // make sure there is enough free heap left + #ifdef BOARD_HAS_PSRAM + if (!buffer) + return heap_caps_malloc(size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); // DRAM failed, use PSRAM if available + #endif + return buffer; +} + +void *d_calloc(size_t count, size_t size) { + void *buffer = d_malloc(count * size); + if (buffer) memset(buffer, 0, count * size); // clear allocated buffer + return buffer; } // realloc with malloc fallback, original buffer is freed if realloc fails but not copied! void *d_realloc_malloc(void *ptr, size_t size) { - void *newbuf = d_realloc(ptr, size); // try realloc first - if (newbuf) return newbuf; // realloc successful - d_free(ptr); // free old buffer if realloc failed + void *buffer = heap_caps_realloc(ptr, size, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + buffer = validateFreeHeap(buffer); + if (buffer) return buffer; // realloc successful + d_free(ptr); // free old buffer if realloc failed (or min heap was exceeded) return d_malloc(size); // fallback to malloc } -void *d_calloc(size_t count, size_t size) { - int caps1 = MALLOC_CAP_DEFAULT | MALLOC_CAP_8BIT; - int caps2 = MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT; - if (psramSafe) { - if (size > MIN_HEAP_SIZE) std::swap(caps1, caps2); // prefer PSRAM for large alloactions - return heap_caps_calloc_prefer(count, size, 2, caps1, caps2); // otherwise prefer DRAM - } - return heap_caps_calloc(count, size, caps1); +#ifdef BOARD_HAS_PSRAM +// p_xalloc: prefer PSRAM, use DRAM as fallback +void *p_malloc(size_t size) { + void *buffer = heap_caps_malloc_prefer(size, 2, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); + return validateFreeHeap(buffer); } -#else // ESP8266 & ESP32-C3 + +void *p_calloc(size_t count, size_t size) { + void *buffer = p_malloc(count * size); + if (buffer) memset(buffer, 0, count * size); // clear allocated buffer + return buffer; +} + // realloc with malloc fallback, original buffer is freed if realloc fails but not copied! -void *realloc_malloc(void *ptr, size_t size) { - void *newbuf = realloc(ptr, size); // try realloc first - if (newbuf) return newbuf; // realloc successful - free(ptr); // free old buffer if realloc failed - return malloc(size); // fallback to malloc +void *p_realloc_malloc(void *ptr, size_t size) { + void *buffer = heap_caps_realloc(ptr, size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); + if (buffer) return buffer; // realloc successful + p_free(ptr); // free old buffer if realloc failed + return p_malloc(size); // fallback to malloc } #endif +#endif + +// allocation function for buffers like pixel-buffers and segment data +// optimises the use of memory types to balance speed and heap availability, always favours DRAM if possible +// if multiple conflicting types are defined, the lowest bits of "type" take priority (see fcn_declare.h for types) +void *allocate_buffer(size_t size, uint32_t type) { + void *buffer = nullptr; + #ifdef CONFIG_IDF_TARGET_ESP32 + // only classic ESP32 has "32bit accessible only" aka IRAM type. Using it frees up normal DRAM for other purposes + // this memory region is used for IRAM_ATTR functions, whatever is left is unused and can be used for pixel buffers + // prefer this type over PSRAM as it is slightly faster, except for _pixels where it is on-par as PSRAM-caching does a good job for mostly sequential access + if (type & BFRALLOC_NOBYTEACCESS) { + // prefer 32bit region, then PSRAM, fallback to any heap. Note: if adding "INTERNAL"-flag this wont work + buffer = heap_caps_malloc_prefer(size, 3, MALLOC_CAP_32BIT, MALLOC_CAP_SPIRAM, MALLOC_CAP_8BIT); + buffer = validateFreeHeap(buffer); + } + else + #endif + #if !defined(BOARD_HAS_PSRAM) + buffer = d_malloc(size); + #else + if (type & BFRALLOC_PREFER_DRAM) { + if (getContiguousFreeHeap() < 3*(MIN_HEAP_SIZE/2) + size && size > PSRAM_THRESHOLD) + buffer = p_malloc(size); // prefer PSRAM for large allocations & when DRAM is low + else + buffer = d_malloc(size); // allocate in DRAM if enough free heap is available, PSRAM as fallback + } + else if (type & BFRALLOC_ENFORCE_DRAM) + buffer = heap_caps_malloc(size, MALLOC_CAP_INTERNAL | MALLOC_CAP_8BIT); // use DRAM only, otherwise return nullptr + else if (type & BFRALLOC_PREFER_PSRAM) { + // if DRAM is plenty, prefer it over PSRAM for speed, reserve enough DRAM for segment data: if MAX_SEGMENT_DATA is exceeded, always uses PSRAM + if (getContiguousFreeHeap() > 4*MIN_HEAP_SIZE + size + ((uint32_t)(MAX_SEGMENT_DATA - Segment::getUsedSegmentData()))) + buffer = d_malloc(size); + else + buffer = p_malloc(size); // prefer PSRAM + } + else if (type & BFRALLOC_ENFORCE_PSRAM) + buffer = heap_caps_malloc(size, MALLOC_CAP_SPIRAM | MALLOC_CAP_8BIT); // use PSRAM only, otherwise return nullptr + buffer = validateFreeHeap(buffer); + #endif + if (buffer && (type & BFRALLOC_CLEAR)) + memset(buffer, 0, size); // clear allocated buffer + /* + #if !defined(ESP8266) && defined(WLED_DEBUG) + if (buffer) { + DEBUG_PRINTF_P(PSTR("*Buffer allocated: size:%d, address:%p"), size, (uintptr_t)buffer); + if ((uintptr_t)buffer > SOC_DRAM_LOW && (uintptr_t)buffer < SOC_DRAM_HIGH) + DEBUG_PRINTLN(F(" in DRAM")); + #ifndef CONFIG_IDF_TARGET_ESP32C3 + else if ((uintptr_t)buffer > SOC_EXTRAM_DATA_LOW && (uintptr_t)buffer < SOC_EXTRAM_DATA_HIGH) + DEBUG_PRINTLN(F(" in PSRAM")); + #endif + #ifdef CONFIG_IDF_TARGET_ESP32 + else if ((uintptr_t)buffer > SOC_IRAM_LOW && (uintptr_t)buffer < SOC_IRAM_HIGH) + DEBUG_PRINTLN(F(" in IRAM")); // only used on ESP32 (MALLOC_CAP_32BIT) + #else + else if ((uintptr_t)buffer > SOC_RTC_DRAM_LOW && (uintptr_t)buffer < SOC_RTC_DRAM_HIGH) + DEBUG_PRINTLN(F(" in RTCRAM")); // not available on ESP32 + #endif + else + DEBUG_PRINTLN(F(" in ???")); // unknown (check soc.h for other memory regions) + } else + DEBUG_PRINTF_P(PSTR("Buffer allocation failed: size:%d\n"), size); + #endif + */ + return buffer; +} // bootloop detection and handling // checks if the ESP reboots multiple times due to a crash or watchdog timeout diff --git a/wled00/wled.cpp b/wled00/wled.cpp index 9b15f7a32..923688106 100644 --- a/wled00/wled.cpp +++ b/wled00/wled.cpp @@ -171,7 +171,7 @@ void WLED::loop() // reconnect WiFi to clear stale allocations if heap gets too low if (millis() - heapTime > 15000) { - uint32_t heap = ESP.getFreeHeap(); + uint32_t heap = getFreeHeapSize(); if (heap < MIN_HEAP_SIZE && lastHeap < MIN_HEAP_SIZE) { DEBUG_PRINTF_P(PSTR("Heap too low! %u\n"), heap); forceReconnect = true; @@ -241,13 +241,37 @@ void WLED::loop() DEBUG_PRINTLN(F("---DEBUG INFO---")); DEBUG_PRINTF_P(PSTR("Runtime: %lu\n"), millis()); DEBUG_PRINTF_P(PSTR("Unix time: %u,%03u\n"), toki.getTime().sec, toki.getTime().ms); - DEBUG_PRINTF_P(PSTR("Free heap: %u\n"), ESP.getFreeHeap()); #if defined(ARDUINO_ARCH_ESP32) + DEBUG_PRINTLN(F("=== Memory Info ===")); + // Internal DRAM (standard 8-bit accessible heap) + size_t dram_free = heap_caps_get_free_size(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); + size_t dram_largest = heap_caps_get_largest_free_block(MALLOC_CAP_8BIT | MALLOC_CAP_INTERNAL); + DEBUG_PRINTF_P(PSTR("DRAM 8-bit: Free: %7u bytes | Largest block: %7u bytes\n"), dram_free, dram_largest); + #ifdef BOARD_HAS_PSRAM + size_t psram_free = heap_caps_get_free_size(MALLOC_CAP_SPIRAM); + size_t psram_largest = heap_caps_get_largest_free_block(MALLOC_CAP_SPIRAM); + DEBUG_PRINTF_P(PSTR("PSRAM: Free: %7u bytes | Largest block: %6u bytes\n"), psram_free, psram_largest); + #endif + #if defined(CONFIG_IDF_TARGET_ESP32) + // 32-bit DRAM (not byte accessible, only available on ESP32) + size_t dram32_free = heap_caps_get_free_size(MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL) - dram_free; // returns all 32bit DRAM, subtract 8bit DRAM + //size_t dram32_largest = heap_caps_get_largest_free_block(MALLOC_CAP_32BIT | MALLOC_CAP_INTERNAL); // returns largest DRAM block -> not useful + DEBUG_PRINTF_P(PSTR("DRAM 32-bit: Free: %7u bytes | Largest block: N/A\n"), dram32_free); + #else + // Fast RTC Memory (not available on ESP32) + size_t rtcram_free = heap_caps_get_free_size(MALLOC_CAP_RTCRAM); + size_t rtcram_largest = heap_caps_get_largest_free_block(MALLOC_CAP_RTCRAM); + DEBUG_PRINTF_P(PSTR("RTC RAM: Free: %7u bytes | Largest block: %7u bytes\n"), rtcram_free, rtcram_largest); + #endif if (psramFound()) { DEBUG_PRINTF_P(PSTR("PSRAM: %dkB/%dkB\n"), ESP.getFreePsram()/1024, ESP.getPsramSize()/1024); - if (!psramSafe) DEBUG_PRINTLN(F("Not using PSRAM.")); + #ifndef BOARD_HAS_PSRAM + DEBUG_PRINTLN(F("BOARD_HAS_PSRAM not defined, not using PSRAM.")); + #endif } DEBUG_PRINTF_P(PSTR("TX power: %d/%d\n"), WiFi.getTxPower(), txPower); + #else // ESP8266 + DEBUG_PRINTF_P(PSTR("Free heap/contiguous: %u/%u\n"), getFreeHeapSize(), getContiguousFreeHeap()); #endif DEBUG_PRINTF_P(PSTR("Wifi state: %d\n"), WiFi.status()); #ifndef WLED_DISABLE_ESPNOW @@ -367,20 +391,16 @@ void WLED::setup() DEBUG_PRINTF_P(PSTR("esp8266 @ %u MHz.\nCore: %s\n"), ESP.getCpuFreqMHz(), ESP.getCoreVersion()); DEBUG_PRINTF_P(PSTR("FLASH: %u MB\n"), (ESP.getFlashChipSize()/1024)/1024); #endif - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); + +#if defined(BOARD_HAS_PSRAM) + // if JSON buffer allocation fails requestJsonBufferLock() will always return false preventing crashes + pDoc = new PSRAMDynamicJsonDocument(2 * JSON_BUFFER_SIZE); + DEBUG_PRINTF_P(PSTR("JSON buffer size: %ubytes\n"), (2 * JSON_BUFFER_SIZE)); + DEBUG_PRINTF_P(PSTR("PSRAM: %dkB/%dkB\n"), ESP.getFreePsram()/1024, ESP.getPsramSize()/1024); +#endif #if defined(ARDUINO_ARCH_ESP32) - // BOARD_HAS_PSRAM also means that a compiler flag "-mfix-esp32-psram-cache-issue" was used and so PSRAM is safe to use on rev.1 ESP32 - #if !defined(BOARD_HAS_PSRAM) && !(defined(CONFIG_IDF_TARGET_ESP32S2) || defined(CONFIG_IDF_TARGET_ESP32S3) || defined(CONFIG_IDF_TARGET_ESP32C3)) - if (psramFound() && ESP.getChipRevision() < 3) psramSafe = false; - if (!psramSafe) DEBUG_PRINTLN(F("Not using PSRAM.")); - #endif - pDoc = new PSRAMDynamicJsonDocument((psramSafe && psramFound() ? 2 : 1)*JSON_BUFFER_SIZE); - DEBUG_PRINTF_P(PSTR("JSON buffer allocated: %u\n"), (psramSafe && psramFound() ? 2 : 1)*JSON_BUFFER_SIZE); - // if the above fails requestJsonBufferLock() will always return false preventing crashes - if (psramFound()) { - DEBUG_PRINTF_P(PSTR("PSRAM: %dkB/%dkB\n"), ESP.getFreePsram()/1024, ESP.getPsramSize()/1024); - } DEBUG_PRINTF_P(PSTR("TX power: %d/%d\n"), WiFi.getTxPower(), txPower); #endif @@ -395,7 +415,7 @@ void WLED::setup() PinManager::allocatePin(2, true, PinOwner::DMX); #endif - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); bool fsinit = false; DEBUGFS_PRINTLN(F("Mount FS")); @@ -433,7 +453,7 @@ void WLED::setup() } DEBUG_PRINTLN(F("Reading config")); bool needsCfgSave = deserializeConfigFromFS(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); #if defined(STATUSLED) && STATUSLED>=0 if (!PinManager::isPinAllocated(STATUSLED)) { @@ -445,12 +465,12 @@ void WLED::setup() DEBUG_PRINTLN(F("Initializing strip")); beginStrip(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); DEBUG_PRINTLN(F("Usermods setup")); userSetup(); UsermodManager::setup(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); if (needsCfgSave) serializeConfigToFS(); // usermods required new parameters; need to wait for strip to be initialised #4752 @@ -515,13 +535,13 @@ void WLED::setup() // HTTP server page init DEBUG_PRINTLN(F("initServer")); initServer(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); #ifndef WLED_DISABLE_INFRARED // init IR DEBUG_PRINTLN(F("initIR")); initIR(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); #endif // Seed FastLED random functions with an esp random value, which already works properly at this point. diff --git a/wled00/wled.h b/wled00/wled.h index 1bbb8c260..5115b65b7 100644 --- a/wled00/wled.h +++ b/wled00/wled.h @@ -167,16 +167,13 @@ // The following is a construct to enable code to compile without it. // There is a code that will still not use PSRAM though: // AsyncJsonResponse is a derived class that implements DynamicJsonDocument (AsyncJson-v6.h) -#if defined(ARDUINO_ARCH_ESP32) -extern bool psramSafe; +#if defined(BOARD_HAS_PSRAM) struct PSRAM_Allocator { void* allocate(size_t size) { - if (psramSafe && psramFound()) return ps_malloc(size); // use PSRAM if it exists - else return malloc(size); // fallback + return ps_malloc(size); // use PSRAM } void* reallocate(void* ptr, size_t new_size) { - if (psramSafe && psramFound()) return ps_realloc(ptr, new_size); // use PSRAM if it exists - else return realloc(ptr, new_size); // fallback + return ps_realloc(ptr, new_size); // use PSRAM } void deallocate(void* pointer) { free(pointer); @@ -894,8 +891,6 @@ WLED_GLOBAL byte optionType; WLED_GLOBAL bool configNeedsWrite _INIT(false); // flag to initiate saving of config WLED_GLOBAL bool doReboot _INIT(false); // flag to initiate reboot from async handlers -WLED_GLOBAL bool psramSafe _INIT(true); // is it safe to use PSRAM (on ESP32 rev.1; compiler fix used "-mfix-esp32-psram-cache-issue") - // status led #if defined(STATUSLED) WLED_GLOBAL unsigned long ledStatusLastMillis _INIT(0); @@ -969,8 +964,11 @@ WLED_GLOBAL int8_t spi_sclk _INIT(SPISCLKPIN); // global ArduinoJson buffer #if defined(ARDUINO_ARCH_ESP32) -WLED_GLOBAL JsonDocument *pDoc _INIT(nullptr); WLED_GLOBAL SemaphoreHandle_t jsonBufferLockMutex _INIT(xSemaphoreCreateRecursiveMutex()); +#endif +#ifdef BOARD_HAS_PSRAM +// if board has PSRAM, use it for JSON document (allocated in setup()) +WLED_GLOBAL JsonDocument *pDoc _INIT(nullptr); #else WLED_GLOBAL StaticJsonDocument gDoc; WLED_GLOBAL JsonDocument *pDoc _INIT(&gDoc); diff --git a/wled00/wled_server.cpp b/wled00/wled_server.cpp index 12e286295..65d958590 100644 --- a/wled00/wled_server.cpp +++ b/wled00/wled_server.cpp @@ -368,7 +368,7 @@ void initServer() }); server.on(F("/freeheap"), HTTP_GET, [](AsyncWebServerRequest *request){ - request->send(200, FPSTR(CONTENT_TYPE_PLAIN), (String)ESP.getFreeHeap()); + request->send(200, FPSTR(CONTENT_TYPE_PLAIN), (String)getFreeHeapSize()); }); #ifdef WLED_ENABLE_USERMOD_PAGE diff --git a/wled00/ws.cpp b/wled00/ws.cpp index 45640b68c..4522e1815 100644 --- a/wled00/ws.cpp +++ b/wled00/ws.cpp @@ -124,8 +124,8 @@ void sendDataWs(AsyncWebSocketClient * client) DEBUG_PRINTF_P(PSTR("JSON buffer size: %u for WS request (%u).\n"), pDoc->memoryUsage(), len); // the following may no longer be necessary as heap management has been fixed by @willmmiles in AWS - size_t heap1 = ESP.getFreeHeap(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + size_t heap1 = getFreeHeapSize(); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); #ifdef ESP8266 if (len>heap1) { DEBUG_PRINTLN(F("Out of memory (WS)!")); @@ -134,8 +134,8 @@ void sendDataWs(AsyncWebSocketClient * client) #endif AsyncWebSocketBuffer buffer(len); #ifdef ESP8266 - size_t heap2 = ESP.getFreeHeap(); - DEBUG_PRINTF_P(PSTR("heap %u\n"), ESP.getFreeHeap()); + size_t heap2 = getFreeHeapSize(); + DEBUG_PRINTF_P(PSTR("heap %u\n"), getFreeHeapSize()); #else size_t heap2 = 0; // ESP32 variants do not have the same issue and will work without checking heap allocation #endif