Added integer based sin()/cos() functions, changed all trig functions to wled_math

- `sin16_t() / cos16_t()` are faster and more accurate than fastled versions
- `sin_approx() / cos_approx()` are float wrappers for `sin16_t() / cos16_t()` and are accurate enough to replace `sinf()/cosf()`
- `atan2()` is used only in octopus to calculate center offset, new approximated version saves flash
- `tan(), atan(), asin(), acos(), floor(), fmod()` are used only for sunrise/sunset calculation, using wled_math version saves flash
- `beatsinx()` replacements are to make use of new `sin16_t()/sin8_t()` functions to reduce flash size
- Extensively tested surnise/sunset calculation: deviation is 1min. max
- Tested some of the relevant FX and found no visual difference: Julia, 2D Drift, Drift Rose, Ghost rider, Rotozoomer, Palette, Arc 1D expansion
- total flash savings: 7.4k
This commit is contained in:
Damian Schneider
2024-10-05 12:29:31 +02:00
parent ba636b17a0
commit 7b855c851d
5 changed files with 288 additions and 157 deletions

View File

@@ -12,6 +12,12 @@
#define modd(x, y) ((x) - (int)((x) / (y)) * (y))
// Note: cos_t, sin_t and tan_t are very accurate but may be slow
// the math.h functions use several kB of flash and are to be avoided if possible
// sin16_t / cos16_t are faster and much more accurate than the fastled variants
// sin_approx and cos_approx are float wrappers for sin16_t/cos16_t and have an accuracy of +/-0.0015 compared to sinf()
// sin8_t / cos8_t are fastled replacements and use sin16_t / cos16_t. Slightly slower than fastled version but very accurate
float cos_t(float phi)
{
float x = modd(phi, TWO_PI);
@@ -31,8 +37,8 @@ float cos_t(float phi)
return res;
}
float sin_t(float x) {
float res = cos_t(HALF_PI - x);
float sin_t(float phi) {
float res = cos_t(HALF_PI - phi);
#ifdef WLED_DEBUG_MATH
Serial.printf("sin: %f,%f,%f,(%f)\n",x,res,sin(x),res-sin(x));
#endif
@@ -49,6 +55,85 @@ float tan_t(float x) {
return res;
}
// 16-bit, integer based Bhaskara I's sine approximation: 16*x*(pi - x) / (5*pi^2 - 4*x*(pi - x))
// input is 16bit unsigned (0-65535), output is 16bit signed (-32767 to +32767)
// optimized integer implementation by @dedehai
int16_t sin16_t(uint16_t theta) {
int scale = 1;
if (theta > 0x7FFF) {
theta = 0xFFFF - theta;
scale = -1; // second half of the sine function is negative (pi - 2*pi)
}
uint32_t precal = theta * (0x7FFF - theta);
uint64_t numerator = (uint64_t)precal * (4 * 0x7FFF); // 64bit required
int32_t denominator = 1342095361 - precal; // 1342095361 is 5 * 0x7FFF^2 / 4
int16_t result = numerator / denominator;
return result * scale;
}
int16_t cos16_t(uint16_t theta) {
return sin16_t(theta + 16384); //cos(x) = sin(x+pi/2)
}
uint8_t sin8_t(uint8_t theta) {
int32_t sin16 = sin16_t((uint16_t)theta * 257); // 255 * 257 = 0xFFFF
sin16 += 0x7FFF; //shift result to range 0-0xFFFF
return sin16 >> 8;
}
uint8_t cos8_t(uint8_t theta) {
return sin8_t(theta + 64); //cos(x) = sin(x+pi/2)
}
float sin_approx(float theta)
{
theta = modd(theta, TWO_PI); // modulo: bring to -2pi to 2pi range
if(theta < 0) theta += M_TWOPI; // 0-2pi range
uint16_t scaled_theta = (uint16_t)(theta * (0xFFFF / M_TWOPI));
int32_t result = sin16_t(scaled_theta);
float sin = float(result) / 0x7FFF;
return sin;
}
float cos_approx(float theta)
{
return sin_approx(theta + M_PI_2);
}
float tan_approx(float x) {
float c = cos_approx(x);
if (c==0.0f) return 0;
float res = sin_approx(x) / c;
return res;
}
#define ATAN2_CONST_A 0.1963f
#define ATAN2_CONST_B 0.9817f
// fast atan2() approximation source: public domain
float atan2_t(float y, float x) {
if (x == 0.0f) return (y > 0.0f) ? M_PI_2 : (y < 0.0f) ? -M_PI_2 : 0.0f;
float abs_y = (y < 0.0f) ? -y : y + 1e-10f; // make sure y is not zero to prevent division by 0
float z = abs_y / x;
float atan_approx;
if (z < 1.0f) {
atan_approx = z / (1.0f + ATAN2_CONST_A * z * z);
if (x < 0.0f) {
return (y >= 0.0f) ? atan_approx + PI : atan_approx - PI;
}
}
else {
z = x / abs_y;
atan_approx = M_PI_2 - z / (1.0f + ATAN2_CONST_A * z * z);
if (y < 0.0f) {
return -atan_approx;
}
}
return atan_approx;
}
//https://stackoverflow.com/questions/3380628
// Absolute error <= 6.7e-5
float acos_t(float x) {