Something is not right with that comparison between curve fitting and Goertzel.
With the pre-computed coefficients (as you would do in 'the real world') the Goertzel calculations done on each block are:
q0=0.0f; q1=0.0f; q2=0.0f;
for(i=0; i<numSamples; i++) {
q0 = coeff * q1 - q2 + data[i];
q2 = q1;
q1 = q0;
}
real = (q1 - q2 * cosine) / scalingFactor;
imag = (q2 * sine) / scalingFactor;
*amplitude = sqrtf(real*real + imag*imag);
*angle = atan2(real, -imag) * 180.0f / (float)M_PI;
...and the curve fit code is this which has many more multiplications:
for(int i = 0; i < BUF_SIZE; i++){
XiZi += datax_lut[i] * data[i];
YiZi += datay_lut[i] * data[i];
Zi += data[i];
}
float A[3][3]= {{XiXi, XiYi, Xi}, {XiYi, YiYi, Yi}, {Xi, Yi, (float)BUF_SIZE}};
float C[3][3] = {{0.0f}};
//Calculate the determinant of A
float n2 = A[0][2] * A[1][0] * A[2][1];
n2 += A[0][1] * A[1][2] * A[2][0];
n2 += A[0][0] * A[1][1] * A[2][2];
n2 -= A[2][2] * A[1][0] * A[0][1];
n2 -= A[2][1] * A[1][2] * A[0][0];
n2 -= A[2][0]*A[1][1]*A[0][2];
if(n2 > 0.0f){
float x = 1.0f/n2;
//Calculate the adjunct matrix to A
C[0][0] = (A[1][1] * A[2][2]) - (A[2][1] * A[1][2]);
C[0][1] = (A[0][2] * A[2][1]) - (A[0][1] * A[2][2]);
C[0][2] = (A[0][1] * A[1][2]) - (A[0][2] * A[1][1]);
C[1][0] = (A[1][2] * A[2][0]) - (A[1][0] * A[2][2]);
C[1][1] = (A[0][0] * A[2][2]) - (A[0][2] * A[2][0]);
C[1][2] = (A[0][2] * A[1][0]) - (A[0][0] * A[1][2]);
C[2][0] = (A[1][0] * A[2][1]) - (A[1][1] * A[2][0]);
C[2][1] = (A[0][1] * A[2][0]) - (A[0][0] * A[2][1]);
C[2][2] = (A[0][0] * A[1][1]) - (A[0][1] * A[1][0]);
for(int i = 0; i < 3; i++){
for(int j = 0; j < 3; j++){
C[i][j] *= x;
}
}
p[0] = XiZi;
p[1] = YiZi;
p[2] = Zi;
a = C[0][0] * p[0] + C[0][1] * p[1] + C[0][2] * p[2];
b = C[1][0] * p[0] + C[1][1] * p[1] + C[1][2] * p[2];
//c = C[2][0] * p[0] + C[2][1] * p[1] + C[2][2] * p[2];
} else
{
printf("Determinant lower than zero, least squares fit not possible");
a = 1.0f;
b = 1.0f;
//c = 0;
}
*amplitude = sqrtf(a*a + b*b);
*angle = atan2f(a, b) * 180.0f / (float)M_PI;
*_a = a;
*_b = b;
So, something going on with gcc or PC processing.
I think these comparisons need to be done on the target microcontroller to be meaningful, and then the Goertzel will be faster.