Electronics > Microcontrollers

Low-accuracy fast CORDIC

(1/4) > >>

Picuino:
Sometimes it is necessary to calculate sines and cosines relatively quickly and more accurately than a Look Up Table can provide.
One solution may be to calculate the sine and cosine with the mathematical library, but in many cases (especially in small microcontrollers) this involves a lot of computational load and a lot of calculation time.

The solution in these cases is to use the CORDIC algorithm which can give results with arbitrary precision (the number of correct bits depends on the number of iterations) and in a fast way (it only needs additions and shifts).

Starting from the algorithm that appears in wikipedia, I have prepared a Python file to calculate the sine and cosine of an angle in integer format with 14 bits of accuracy (2 bytes integer value):

--- Code: ---import math

BITS = 14
theta_table = [int(math.atan2(1, 2**i)*(2**BITS)) for i in range(BITS)]

def compute_Kn():
k = 2 ** BITS
for i in range(BITS):
k /= math.sqrt(1 + 2 ** (-2 * i))
return round(k)

def CORDIC_integer(alpha):
theta = 0
x = compute_Kn()  # Constant value for a certain number of bits
y = 0
shift = 0
for arc_tangent in theta_table:
sx = x >> shift
sy = y >> shift
shift += 1
if theta < alpha:
theta += arc_tangent
x -= sy
y += sx
else:
theta -= arc_tangent
x += sy
y -= sx
return x , y

def main():
print("BITS =", BITS)
print("Kn =", compute_Kn())
print("theta_table = [")
for theta in theta_table:
print(f"    {theta},")
print("    ]")
print("  x        sin(x)        Error           cos(x)        Error")
for x in range(-90, 91, 5):
cordic_cos, cordic_sin = CORDIC_integer(alpha)
cordic_cos /= (2 ** BITS)
cordic_sin /= (2 ** BITS)
print(f"{x:+05.1f}°  {cordic_sin:+.9f} ({cordic_sin-sin:+.9f})   {cordic_cos:+.9f} ({cordic_cos-cos:+.9f})")

if __name__ == "__main__":
main()

--- End code ---

Output:

--- Code: ---BITS = 14
Kn = 9949
theta_table = [
12868,
7596,
4014,
2037,
1023,
512,
256,
128,
64,
32,
16,
8,
4,
2,
]
x        sin(x)        Error           cos(x)        Error
-90.0°  -0.999938965 (+0.000061035)   -0.000183105 (-0.000183105)
-85.0°  -0.996032715 (+0.000161983)   +0.087097168 (-0.000058575)
-80.0°  -0.984741211 (+0.000066542)   +0.173583984 (-0.000064193)
-75.0°  -0.965942383 (-0.000016557)   +0.258850098 (+0.000031053)
-70.0°  -0.939514160 (+0.000178461)   +0.341796875 (-0.000223268)
-65.0°  -0.906433105 (-0.000125318)   +0.422668457 (+0.000050195)
-60.0°  -0.866149902 (-0.000124499)   +0.499755859 (-0.000244141)
-55.0°  -0.819213867 (-0.000061823)   +0.573486328 (-0.000090108)
-50.0°  -0.766174316 (-0.000129873)   +0.642517090 (-0.000270520)
-45.0°  -0.707092285 (+0.000014496)   +0.706970215 (-0.000136566)
-40.0°  -0.642761230 (+0.000026379)   +0.765991211 (-0.000053232)
-35.0°  -0.573669434 (-0.000092997)   +0.818847656 (-0.000304388)
-30.0°  -0.499877930 (+0.000122070)   +0.866210938 (+0.000185534)
-25.0°  -0.422546387 (+0.000071875)   +0.906372070 (+0.000064283)
-20.0°  -0.342163086 (-0.000142943)   +0.939941406 (+0.000248785)
-15.0°  -0.258789062 (+0.000029983)   +0.965881348 (-0.000044479)
-10.0°  -0.173889160 (-0.000240982)   +0.984802246 (-0.000005507)
-05.0°  -0.087280273 (-0.000124531)   +0.996215820 (+0.000021122)
+00.0°  -0.000061035 (-0.000061035)   +1.000244141 (+0.000244141)
+05.0°  +0.087280273 (+0.000124531)   +0.996032715 (-0.000161983)
+10.0°  +0.173767090 (+0.000118912)   +0.984741211 (-0.000066542)
+15.0°  +0.258789062 (-0.000029983)   +0.965942383 (+0.000016557)
+20.0°  +0.342102051 (+0.000081907)   +0.939514160 (-0.000178461)
+25.0°  +0.422485352 (-0.000132910)   +0.906433105 (+0.000125318)
+30.0°  +0.499816895 (-0.000183105)   +0.866149902 (+0.000124499)
+35.0°  +0.573425293 (-0.000151143)   +0.819213867 (+0.000061823)
+40.0°  +0.642639160 (-0.000148450)   +0.766174316 (+0.000129873)
+45.0°  +0.707153320 (+0.000046539)   +0.707092285 (-0.000014496)
+50.0°  +0.766052246 (+0.000007803)   +0.642761230 (-0.000026379)
+55.0°  +0.819091797 (-0.000060247)   +0.573547363 (-0.000029073)
+60.0°  +0.866149902 (+0.000124499)   +0.499816895 (-0.000183105)
+65.0°  +0.906433105 (+0.000125318)   +0.422485352 (-0.000132910)
+70.0°  +0.939514160 (-0.000178461)   +0.342102051 (+0.000081907)
+75.0°  +0.965942383 (+0.000016557)   +0.258789062 (-0.000029983)
+80.0°  +0.984741211 (-0.000066542)   +0.173889160 (+0.000240982)
+85.0°  +0.996032715 (-0.000161983)   +0.087280273 (+0.000124531)
+90.0°  +0.999938965 (-0.000061035)   +0.000244141 (+0.000244141)

--- End code ---

Translating it to C code is not complicated.

Picuino:
New version with integer math.

--- Code: ---import math

BITS = 15
RANGE = (2 ** BITS) - 1
theta_table = [int(math.atan2(1, 2**i)*RANGE*2/math.pi) for i in range(BITS)]

def compute_Kn():
k = RANGE
for i in range(BITS):
k *= 1.0 / math.sqrt(1 + 2 ** (-2 * i))
return int(k)

def CORDIC_integer(alpha):
theta = 0
x = compute_Kn()  # Constant value for a certain number of bits
if theta < alpha:
theta += theta_table[0]
y = x
else:
theta -= theta_table[0]
y = -x
shift = 1
for arc_tangent in theta_table[1:]:
sx = x >> shift
sy = y >> shift
shift += 1
if theta < alpha:
theta += arc_tangent
x -= sy
y += sx
else:
theta -= arc_tangent
x += sy
y -= sx
return x, y

def main():
print("BITS =", BITS)
print("Kn =", compute_Kn())
print("theta_table = [")
for theta in theta_table:
print(f"    {theta},")
print("    ]")

sin_desvest = 0
cos_desvest = 0
num_deg = 0
print("\n  x         x_int            sin(x)  Error         cos(x)  Error")
for x in range(-90, 91, 5):
alpha = int(math.radians(x) * RANGE * 2 / math.pi)
cordic_cos, cordic_sin = CORDIC_integer(alpha)
sin_desvest += (cordic_sin - sin) ** 2
cos_desvest += (cordic_cos - cos) ** 2
num_deg += 1
print(f"{x:+05.1f}°  {alpha:+12d}   {cordic_sin:+12d} ({cordic_sin-sin:+4d})   {cordic_cos:+12d} ({cordic_cos-cos:+4d})")
print()
print("sin_desvest =", math.sqrt(sin_desvest/(num_deg-1)))
print("cos_desvest =", math.sqrt(cos_desvest/(num_deg-1)))

if __name__ == "__main__":
main()

--- End code ---

Results:

--- Code: ---BITS = 15
Kn = 19897
theta_table = [
16383,
9671,
5110,
2594,
1302,
651,
325,
162,
81,
40,
20,
10,
5,
2,
1,
]

x         x_int            sin(x)  Error         cos(x)  Error
-90.0°        -32767         -32766 (  +1)             -5 (  -5)
-85.0°        -30946         -32640 (  +2)          +2846 (  -9)
-80.0°        -29126         -32267 (  +2)          +5685 (  -4)
-75.0°        -27305         -31650 (  +0)          +8485 (  +5)
-70.0°        -25485         -30788 (  +2)         +11197 (  -9)
-65.0°        -23665         -29697 (  -1)         +13854 (  +7)
-60.0°        -21844         -28379 (  -2)         +16380 (  -3)
-55.0°        -20024         -26842 (  -1)         +18793 (  -1)
-50.0°        -18203         -25103 (  -3)         +21058 (  -4)
-45.0°        -16383         -23170 (  -1)         +23165 (  -4)
-40.0°        -14563         -21058 (  +4)         +25103 (  +3)
-35.0°        -12742         -18795 (  -1)         +26838 (  -3)
-30.0°        -10922         -16384 (  -1)         +28378 (  +1)
-25.0°         -9101         -13852 (  -5)         +29695 (  -1)
-20.0°         -7281         -11205 (  +1)         +30796 (  +6)
-15.0°         -5461          -8483 (  -3)         +31648 (  -2)
-10.0°         -3640          -5691 (  -2)         +32269 (  +0)
-05.0°         -1820          -2854 (  +1)         +32644 (  +2)
+00.0°            +0             -2 (  -2)         +32771 (  +4)
+05.0°         +1820          +2852 (  -3)         +32639 (  -3)
+10.0°         +3640          +5689 (  +0)         +32266 (  -3)
+15.0°         +5461          +8483 (  +3)         +31649 (  -1)
+20.0°         +7281         +11205 (  -1)         +30788 (  -2)
+25.0°         +9101         +13852 (  +5)         +29697 (  +1)
+30.0°        +10922         +16380 (  -3)         +28379 (  +2)
+35.0°        +12742         +18791 (  -3)         +26842 (  +1)
+40.0°        +14563         +21058 (  -4)         +25103 (  +3)
+45.0°        +16383         +23167 (  -2)         +23170 (  +1)
+50.0°        +18203         +25103 (  +3)         +21058 (  -4)
+55.0°        +20024         +26840 (  -1)         +18795 (  +1)
+60.0°        +21844         +28378 (  +1)         +16384 (  +1)
+65.0°        +23665         +29697 (  +1)         +13852 (  +5)
+70.0°        +25485         +30788 (  -2)         +11205 (  -1)
+75.0°        +27305         +31649 (  -1)          +8483 (  +3)
+80.0°        +29126         +32266 (  -3)          +5691 (  +2)
+85.0°        +30946         +32639 (  -3)          +2854 (  -1)
+90.0°        +32767         +32768 (  +1)             -1 (  -1)

sin_desvest = 2.4094720491334933
cos_desvest = 3.7155828016013257

--- End code ---

Sensorcat:
Nice, but in which way is this more accurate than a LUT? The accuracy that can be achieved with a LUT is limited only by the amount of memory that you can (or are willing to) set aside for the table.

peter-h:
This may be a similar thing. Super fast and super accurate. I use it for a waveform generator. No idea where I found it :)

SPC is "samples per cycle" e.g. 1024.

--- Code: ---
// Compact sin(x) function which doesn't need math.h
// Uses Horner's method
// i is from 0 to SPC-1

#define PI 3.14159265358979323846
#define SCALE 13107

int sin2(int i)
{
//for (int n=3; sin != old_sin; n+=2)
for (int n=3; (sin != old_sin) && (n < 30); n+=2)
{
old_sin = sin;
sin += term;
}
return (sin * SCALE) + (sin >= 0 ? 0.5 : -0.5);
}

--- End code ---

Picuino:

--- Quote from: Sensorcat on June 12, 2024, 07:51:38 pm ---Nice, but in which way is this more accurate than a LUT? The accuracy that can be achieved with a LUT is limited only by the amount of memory that you can (or are willing to) set aside for the table.

--- End quote ---
To achieve the same accuracy you would need a LUT of approximately 16384 x 2 Byte entries (32kBytes total). It could be reduced by taking into account some trigonometric properties, but the table is still too large.
That LUT is unacceptable for most small and not so small micros.

--- Quote from: peter-h on June 13, 2024, 05:57:59 am ---This may be a similar thing. Super fast and super accurate. I use it for a waveform generator. No idea where I found it :)

SPC is "samples per cycle" e.g. 1024.

--- End quote ---
Note that on micros that do not have hardware acceleration to perform multiplications, the CORDIC method is still much faster, because it avoids multiplications that are very costly in machine cycles.
The CORDIC method only performs shifts and additions/subtractions, and also does not have many cycles. As many as bits of precision.