use video scaling instead of NPB luminance & new ABL (#4798)

* updated color scaling to preserve hue at low brightness resulting in much better colors
* replace NPBlg with NPB, moved brightness scaling to bus manager
* improved gamma table calculation: fixed mismatch in inverting gamma table calculation: inversion should now be as good as it gets
* code cleanup, fixed gamma being applied in unnecessary places

Improvements to ABL handling:
- removed strip level handling, ist now all done on bus level
- limiter now respects pixel mapping
- proper handling of white channel
- improved current estimation
- current is now always correctly reported to UI
- minimal FPS impact if the ABL is not limiting but slighly higher impact for global ABL limit due to double-scaling

- moved brightness scaling to BusDigital
- created new header file colors.h to be able to access color functions in bus-manager.
- updated colo_fade() with better video scaling to preserve hue's at low brightness
- added IRAM_ATTR to color_fade (negligible speed impact when compared to inline and benefits other functions)
- added IRAM_ATTR to color_blend as it is used a lot throughout the code, did not test speed impact but adding it to color_fade made it almost on-par with an inlined function

Additional changes:
- fixes for properly handling `scaledBri()` (by @blazoncek)
- also use bit-shift instead of division in blending for ESP8266
- improvements for faster "softlight" calculation in blending
- changed some variables to uint8_t to maybe let the compiler optimize better, uint8_t can be faster if read, store and set are all done in uint8_t, which is the case in the ones I changed
- various minor code formatting changes
This commit is contained in:
Damian Schneider
2025-08-29 17:12:10 +02:00
committed by GitHub
parent 8aeb9e1abe
commit d5d7fde30f
15 changed files with 452 additions and 538 deletions

View File

@@ -8,7 +8,7 @@
* color blend function, based on FastLED blend function
* the calculation for each color is: result = (A*(amountOfA) + A + B*(amountOfB) + B) / 256 with amountOfA = 255 - amountOfB
*/
uint32_t color_blend(uint32_t color1, uint32_t color2, uint8_t blend) {
uint32_t IRAM_ATTR color_blend(uint32_t color1, uint32_t color2, uint8_t blend) {
// min / max blend checking is omitted: calls with 0 or 255 are rare, checking lowers overall performance
const uint32_t TWO_CHANNEL_MASK = 0x00FF00FF; // mask for R and B channels or W and G if negated (poorman's SIMD; https://github.com/wled/WLED/pull/4568#discussion_r1986587221)
uint32_t rb1 = color1 & TWO_CHANNEL_MASK; // extract R & B channels from color1
@@ -64,26 +64,26 @@ uint32_t color_add(uint32_t c1, uint32_t c2, bool preserveCR)
* fades color toward black
* if using "video" method the resulting color will never become black unless it is already black
*/
uint32_t color_fade(uint32_t c1, uint8_t amount, bool video)
{
uint32_t IRAM_ATTR color_fade(uint32_t c1, uint8_t amount, bool video) {
if (c1 == 0 || amount == 0) return 0; // black or no change
if (amount == 255) return c1;
if (c1 == BLACK || amount == 0) return BLACK;
uint32_t scaledcolor; // color order is: W R G B from MSB to LSB
uint32_t scale = amount; // 32bit for faster calculation
uint32_t addRemains = 0;
if (!video) scale++; // add one for correct scaling using bitshifts
else { // video scaling: make sure colors do not dim to zero if they started non-zero
addRemains = R(c1) ? 0x00010000 : 0;
addRemains |= G(c1) ? 0x00000100 : 0;
addRemains |= B(c1) ? 0x00000001 : 0;
addRemains |= W(c1) ? 0x01000000 : 0;
if (!video) amount++; // add one for correct scaling using bitshifts
else {
// video scaling: make sure colors do not dim to zero if they started non-zero unless they distort the hue
uint8_t r = byte(c1>>16), g = byte(c1>>8), b = byte(c1), w = byte(c1>>24); // extract r, g, b, w channels
uint8_t maxc = (r > g) ? ((r > b) ? r : b) : ((g > b) ? g : b); // determine dominant channel for hue preservation
uint8_t quarterMax = maxc >> 2; // note: using half of max results in color artefacts
addRemains = r && r > quarterMax ? 0x00010000 : 0;
addRemains |= g && g > quarterMax ? 0x00000100 : 0;
addRemains |= b && b > quarterMax ? 0x00000001 : 0;
addRemains |= w ? 0x01000000 : 0;
}
const uint32_t TWO_CHANNEL_MASK = 0x00FF00FF;
uint32_t rb = (((c1 & TWO_CHANNEL_MASK) * scale) >> 8) & TWO_CHANNEL_MASK; // scale red and blue
uint32_t wg = (((c1 >> 8) & TWO_CHANNEL_MASK) * scale) & ~TWO_CHANNEL_MASK; // scale white and green
scaledcolor = (rb | wg) + addRemains;
return scaledcolor;
uint32_t rb = (((c1 & TWO_CHANNEL_MASK) * amount) >> 8) & TWO_CHANNEL_MASK; // scale red and blue
uint32_t wg = (((c1 >> 8) & TWO_CHANNEL_MASK) * amount) & ~TWO_CHANNEL_MASK; // scale white and green
return (rb | wg) + addRemains;
}
/*
@@ -92,7 +92,7 @@ uint32_t color_fade(uint32_t c1, uint8_t amount, bool video)
note: inputs are 32bit to speed up the function, useful input value ranges are 0-255
*/
uint32_t adjust_color(uint32_t rgb, uint32_t hueShift, uint32_t lighten, uint32_t brighten) {
if(rgb == 0 | hueShift + lighten + brighten == 0) return rgb; // black or no change
if (rgb == 0 | hueShift + lighten + brighten == 0) return rgb; // black or no change
CHSV32 hsv;
rgb2hsv(rgb, hsv); //convert to HSV
hsv.h += (hueShift << 8); // shift hue (hue is 16 bits)
@@ -104,8 +104,7 @@ uint32_t adjust_color(uint32_t rgb, uint32_t hueShift, uint32_t lighten, uint32_
}
// 1:1 replacement of fastled function optimized for ESP, slightly faster, more accurate and uses less flash (~ -200bytes)
uint32_t ColorFromPaletteWLED(const CRGBPalette16& pal, unsigned index, uint8_t brightness, TBlendType blendType)
{
uint32_t ColorFromPaletteWLED(const CRGBPalette16& pal, unsigned index, uint8_t brightness, TBlendType blendType) {
if (blendType == LINEARBLEND_NOWRAP) {
index = (index * 0xF0) >> 8; // Blend range is affected by lo4 blend of values, remap to avoid wrapping
}
@@ -120,16 +119,16 @@ uint32_t ColorFromPaletteWLED(const CRGBPalette16& pal, unsigned index, uint8_t
else ++entry;
unsigned f2 = (lo4 << 4);
unsigned f1 = 256 - f2;
red1 = (red1 * f1 + (unsigned)entry->r * f2) >> 8; // note: using color_blend() is 20% slower
red1 = (red1 * f1 + (unsigned)entry->r * f2) >> 8; // note: using color_blend() is slower
green1 = (green1 * f1 + (unsigned)entry->g * f2) >> 8;
blue1 = (blue1 * f1 + (unsigned)entry->b * f2) >> 8;
blue1 = (blue1 * f1 + (unsigned)entry->b * f2) >> 8;
}
if (brightness < 255) { // note: zero checking could be done to return black but that is hardly ever used so it is omitted
// actually color_fade(c1, brightness)
// actually same as color_fade(), using color_fade() is slower
uint32_t scale = brightness + 1; // adjust for rounding (bitshift)
red1 = (red1 * scale) >> 8; // note: using color_fade() is 30% slower
red1 = (red1 * scale) >> 8;
green1 = (green1 * scale) >> 8;
blue1 = (blue1 * scale) >> 8;
blue1 = (blue1 * scale) >> 8;
}
return RGBW32(red1,green1,blue1,0);
}
@@ -589,10 +588,13 @@ uint8_t NeoGammaWLEDMethod::gammaT_inv[256];
void NeoGammaWLEDMethod::calcGammaTable(float gamma)
{
float gamma_inv = 1.0f / gamma; // inverse gamma
for (size_t i = 0; i < 256; i++) {
for (size_t i = 1; i < 256; i++) {
gammaT[i] = (int)(powf((float)i / 255.0f, gamma) * 255.0f + 0.5f);
gammaT_inv[i] = (int)(powf((float)i / 255.0f, gamma_inv) * 255.0f + 0.5f);
gammaT_inv[i] = (int)(powf(((float)i - 0.5f) / 255.0f, gamma_inv) * 255.0f + 0.5f);
//DEBUG_PRINTF_P(PSTR("gammaT[%d] = %d gammaT_inv[%d] = %d\n"), i, gammaT[i], i, gammaT_inv[i]);
}
gammaT[0] = 0;
gammaT_inv[0] = 0;
}
uint8_t IRAM_ATTR_YN NeoGammaWLEDMethod::Correct(uint8_t value)
@@ -601,21 +603,6 @@ uint8_t IRAM_ATTR_YN NeoGammaWLEDMethod::Correct(uint8_t value)
return gammaT[value];
}
// used for color gamma correction
uint32_t IRAM_ATTR_YN NeoGammaWLEDMethod::Correct32(uint32_t color)
{
if (!gammaCorrectCol) return color;
uint8_t w = W(color);
uint8_t r = R(color);
uint8_t g = G(color);
uint8_t b = B(color);
w = gammaT[w];
r = gammaT[r];
g = gammaT[g];
b = gammaT[b];
return RGBW32(r, g, b, w);
}
uint32_t IRAM_ATTR_YN NeoGammaWLEDMethod::inverseGamma32(uint32_t color)
{
if (!gammaCorrectCol) return color;