static double log_base_e ( double Argument) /* 1.0 <= Argument <= 4.0 */
{
assert(Argument >= 1.0 && Argument <= 4.01);
uint32_t a = Argument * 512*1024*1024;
uint32_t x = 512*1024*1024;
uint32_t y = 0;
for ( int k = 0; k < N_CONSTANTS; k++ ) {
uint32_t z = x + (x>>k);
if ( z <= a) {
x = z;
y += A_e_i[k];
}
}
return y/(1024.0*1024.0*1024.0*2.0);
}
#define N_CONSTANTS (31)
unsigned int A_e_i[N_CONSTANTS] =
{
1488522235, 870729689, 479197127,
252937143, 130190384, 66081633,
33294987, 16712019, 8372266,
4190213, 2096128, 1048320,
524224, 262128, 131068,
65535, 32767, 16383,
8191, 4095, 2047,
1023, 511, 255,
127, 63, 31,
15, 7, 3,
1
};
static const unsigned int A_2_i [N_CONSTANTS] =
{
2147483648, 1256197404, 691335319,
364911161, 187825021, 95335645,
48034512 , 24110347, 12078627,
6045199, 3024074, 1512406,
756295, 378170, 189091,
94547, 47273, 23637,
11818, 5909, 2954,
1477, 738, 369,
184, 92, 46,
23, 11, 5,
2
};
<snip>coming from oldschool and now obsolete ray casting</snip>
If you haven't already, before starting on turbo codes have a look at block and convolutional codes more generally. Block codes are a really cool example of applied linear algebra and some of the decoding techniques used in convolutional codes (particularly the Viterbi algorithm) are neat. The Viterbi algorithm is pretty simple, easy to visualize, and pops up in other places where a maximum likelihood path through a hidden Markov model is needed.I'd add that if you are interested in turbo codes, also look at LDPC and polar codes. I find LDPC particularly interesting because it was published in the 1960s but sunk without trace, because the industry wasn't ready for it. Then it was rediscovered decades later, and is now widely used.
I'd add that if you are interested in turbo codes, also look at LDPC and polar codes. I find LDPC particularly interesting because it was published in the 1960s but sunk without trace, because the industry wasn't ready for it. Then it was rediscovered decades later, and is now widely used.
I've never heard of Polar Codes before. Thanks!Same here!
(starts looking at http://pfister.ee.duke.edu/courses/ecen655/polar.pdf (http://pfister.ee.duke.edu/courses/ecen655/polar.pdf) - A Brief Introduction to Polar Codes)Just in case you didn't already notice, there's more interesting stuff to be found on the ECEN 655 course page (http://pfister.ee.duke.edu/courses/ecen655/). For example the one on partition functions of non-fucked-up factor graphs. Or whatever's your mathematical poison really. ;D
The hard thing is that unless you know the names you don't know what to look for.
This might come in handy someday:Neat! While scribbling those down and looking at the resulting graphs & indices I noticed that it contained cyclic subgroups. Taking the above graphs as example, and start at number k=3, so an entry in the "Storage" graph. That maps to i=6 in the "In-order" graph. Then jump to k=6 in the other graph again, and repeat mapping.
(https://i.stack.imgur.com/LXLmK.png)
I found these when I needed a mapping between in-order and heap/storage-order perfect binary search trees (but I wasn't the first; they're OEIS A131987 (https://oeis.org/A131987) and A101279 (https://oeis.org/A101279)).
k(i,N) = (b(i + 2N - 1))/2
i(k,N) = (k - 2⌊log2 k⌋) 2N - ⌊log2 k⌋ + 2N - ⌊log2 k⌋ - 1
1 : 1
--------------------------------------------------------------------------------
1 : 01, 10
3 : 11
--------------------------------------------------------------------------------
1 : 001, 100
2 : 010
3 : 011, 110, 101
7 : 111
--------------------------------------------------------------------------------
1 : 0001, 1000
2 : 0010, 0100
3 : 0011, 1100, 1001
5 : 0101, 0110, 1010
7 : 0111, 1110, 1101, 1011
15 : 1111
--------------------------------------------------------------------------------
1 : 00001, 10000
2 : 00010, 01000
3 : 00011, 11000, 10001
4 : 00100
5 : 00101, 01100, 10010
6 : 00110, 10100, 01001
7 : 00111, 11100, 11001, 10011
10 : 01010
11 : 01011, 01110, 11010, 10101
13 : 01101, 10110
15 : 01111, 11110, 11101, 11011, 10111
31 : 11111
--------------------------------------------------------------------------------
1 : 000001, 100000
2 : 000010, 010000
3 : 000011, 110000, 100001
4 : 000100, 001000
5 : 000101, 011000, 100010
6 : 000110, 101000, 010001
7 : 000111, 111000, 110001, 100011
9 : 001001, 001100, 100100
10 : 001010, 010100, 010010
11 : 001011, 011100, 110010, 100101
13 : 001101, 101100, 011001, 100110
14 : 001110, 110100, 101001, 010011
15 : 001111, 111100, 111001, 110011, 100111
21 : 010101, 010110, 011010, 101010
23 : 010111, 011110, 111010, 110101, 101011
27 : 011011, 101110, 011101, 110110, 101101
31 : 011111, 111110, 111101, 111011, 110111, 101111
63 : 111111
--------------------------------------------------------------------------------
1 : 0000001, 1000000
2 : 0000010, 0100000
3 : 0000011, 1100000, 1000001
4 : 0000100, 0010000
5 : 0000101, 0110000, 1000010
6 : 0000110, 1010000, 0100001
7 : 0000111, 1110000, 1100001, 1000011
8 : 0001000
9 : 0001001, 0011000, 1000100
10 : 0001010, 0101000, 0100010
11 : 0001011, 0111000, 1100010, 1000101
12 : 0001100, 1001000, 0010001
13 : 0001101, 1011000, 0110001, 1000110
14 : 0001110, 1101000, 1010001, 0100011
15 : 0001111, 1111000, 1110001, 1100011, 1000111
18 : 0010010, 0010100, 0100100
19 : 0010011, 0011100, 1100100, 1001001
21 : 0010101, 0101100, 0110010, 1001010
22 : 0010110, 0110100, 1010010, 0100101
23 : 0010111, 0111100, 1110010, 1100101, 1001011
25 : 0011001, 1001100
26 : 0011010, 1010100, 0101001, 0100110
27 : 0011011, 1011100, 0111001, 1100110, 1001101
29 : 0011101, 1101100, 1011001, 0110011, 1001110
30 : 0011110, 1110100, 1101001, 1010011, 0100111
31 : 0011111, 1111100, 1111001, 1110011, 1100111, 1001111
42 : 0101010
43 : 0101011, 0101110, 0111010, 1101010, 1010101
45 : 0101101, 0110110, 1011010, 0110101, 1010110
47 : 0101111, 0111110, 1111010, 1110101, 1101011, 1010111
55 : 0110111, 1011110, 0111101, 1110110, 1101101, 1011011
59 : 0111011, 1101110, 1011101
63 : 0111111, 1111110, 1111101, 1111011, 1110111, 1101111, 1011111
127 : 1111111
--------------------------------------------------------------------------------
1 : 00000001, 10000000
2 : 00000010, 01000000
3 : 00000011, 11000000, 10000001
4 : 00000100, 00100000
5 : 00000101, 01100000, 10000010
6 : 00000110, 10100000, 01000001
7 : 00000111, 11100000, 11000001, 10000011
8 : 00001000, 00010000
9 : 00001001, 00110000, 10000100
10 : 00001010, 01010000, 01000010
11 : 00001011, 01110000, 11000010, 10000101
12 : 00001100, 10010000, 00100001
13 : 00001101, 10110000, 01100001, 10000110
14 : 00001110, 11010000, 10100001, 01000011
15 : 00001111, 11110000, 11100001, 11000011, 10000111
17 : 00010001, 00011000, 10001000
18 : 00010010, 00101000, 01000100
19 : 00010011, 00111000, 11000100, 10001001
20 : 00010100, 01001000, 00100010
21 : 00010101, 01011000, 01100010, 10001010
22 : 00010110, 01101000, 10100010, 01000101
23 : 00010111, 01111000, 11100010, 11000101, 10001011
25 : 00011001, 10011000, 00110001, 10001100
26 : 00011010, 10101000, 01010001, 01000110
27 : 00011011, 10111000, 01110001, 11000110, 10001101
28 : 00011100, 11001000, 10010001, 00100011
29 : 00011101, 11011000, 10110001, 01100011, 10001110
30 : 00011110, 11101000, 11010001, 10100011, 01000111
31 : 00011111, 11111000, 11110001, 11100011, 11000111, 10001111
36 : 00100100
37 : 00100101, 00101100, 01100100, 10010010
38 : 00100110, 00110100, 10100100, 01001001
39 : 00100111, 00111100, 11100100, 11001001, 10010011
41 : 00101001, 01001100, 00110010, 10010100
42 : 00101010, 01010100, 01010010, 01001010
43 : 00101011, 01011100, 01110010, 11001010, 10010101
45 : 00101101, 01101100, 10110010, 01100101, 10010110
46 : 00101110, 01110100, 11010010, 10100101, 01001011
47 : 00101111, 01111100, 11110010, 11100101, 11001011, 10010111
51 : 00110011, 10011100, 00111001, 11001100, 10011001
53 : 00110101, 10101100, 01011001, 01100110, 10011010
54 : 00110110, 10110100, 01101001, 10100110, 01001101
55 : 00110111, 10111100, 01111001, 11100110, 11001101, 10011011
58 : 00111010, 11010100, 10101001, 01010011, 01001110
59 : 00111011, 11011100, 10111001, 01110011, 11001110, 10011101
61 : 00111101, 11101100, 11011001, 10110011, 01100111, 10011110
62 : 00111110, 11110100, 11101001, 11010011, 10100111, 01001111
63 : 00111111, 11111100, 11111001, 11110011, 11100111, 11001111, 10011111
85 : 01010101, 01010110, 01011010, 01101010, 10101010
87 : 01010111, 01011110, 01111010, 11101010, 11010101, 10101011
91 : 01011011, 01101110, 10111010, 01110101, 11010110, 10101101
93 : 01011101, 01110110, 11011010, 10110101, 01101011, 10101110
95 : 01011111, 01111110, 11111010, 11110101, 11101011, 11010111, 10101111
109 : 01101101, 10110110
111 : 01101111, 10111110, 01111101, 11110110, 11101101, 11011011, 10110111
119 : 01110111, 11011110, 10111101, 01111011, 11101110, 11011101, 10111011
127 : 01111111, 11111110, 11111101, 11111011, 11110111, 11101111, 11011111, 10111111
255 : 11111111
--------------------------------------------------------------------------------
1 : 000000001, 100000000
2 : 000000010, 010000000
3 : 000000011, 110000000, 100000001
4 : 000000100, 001000000
5 : 000000101, 011000000, 100000010
6 : 000000110, 101000000, 010000001
7 : 000000111, 111000000, 110000001, 100000011
8 : 000001000, 000100000
9 : 000001001, 001100000, 100000100
10 : 000001010, 010100000, 010000010
11 : 000001011, 011100000, 110000010, 100000101
12 : 000001100, 100100000, 001000001
13 : 000001101, 101100000, 011000001, 100000110
14 : 000001110, 110100000, 101000001, 010000011
15 : 000001111, 111100000, 111000001, 110000011, 100000111
16 : 000010000
17 : 000010001, 000110000, 100001000
18 : 000010010, 001010000, 010000100
19 : 000010011, 001110000, 110000100, 100001001
20 : 000010100, 010010000, 001000010
21 : 000010101, 010110000, 011000010, 100001010
22 : 000010110, 011010000, 101000010, 010000101
23 : 000010111, 011110000, 111000010, 110000101, 100001011
24 : 000011000, 100010000, 000100001
25 : 000011001, 100110000, 001100001, 100001100
26 : 000011010, 101010000, 010100001, 010000110
27 : 000011011, 101110000, 011100001, 110000110, 100001101
28 : 000011100, 110010000, 100100001, 001000011
29 : 000011101, 110110000, 101100001, 011000011, 100001110
30 : 000011110, 111010000, 110100001, 101000011, 010000111
31 : 000011111, 111110000, 111100001, 111000011, 110000111, 100001111
34 : 000100010, 000101000, 010001000
35 : 000100011, 000111000, 110001000, 100010001
36 : 000100100, 001001000, 001000100
37 : 000100101, 001011000, 011000100, 100010010
38 : 000100110, 001101000, 101000100, 010001001
39 : 000100111, 001111000, 111000100, 110001001, 100010011
41 : 000101001, 010011000, 001100010, 100010100
42 : 000101010, 010101000, 010100010, 010001010
43 : 000101011, 010111000, 011100010, 110001010, 100010101
44 : 000101100, 011001000, 100100010, 001000101
45 : 000101101, 011011000, 101100010, 011000101, 100010110
46 : 000101110, 011101000, 110100010, 101000101, 010001011
47 : 000101111, 011111000, 111100010, 111000101, 110001011, 100010111
49 : 000110001, 100011000
50 : 000110010, 100101000, 001010001, 010001100
51 : 000110011, 100111000, 001110001, 110001100, 100011001
52 : 000110100, 101001000, 010010001, 001000110
53 : 000110101, 101011000, 010110001, 011000110, 100011010
54 : 000110110, 101101000, 011010001, 101000110, 010001101
55 : 000110111, 101111000, 011110001, 111000110, 110001101, 100011011
57 : 000111001, 110011000, 100110001, 001100011, 100011100
58 : 000111010, 110101000, 101010001, 010100011, 010001110
59 : 000111011, 110111000, 101110001, 011100011, 110001110, 100011101
60 : 000111100, 111001000, 110010001, 100100011, 001000111
61 : 000111101, 111011000, 110110001, 101100011, 011000111, 100011110
62 : 000111110, 111101000, 111010001, 110100011, 101000111, 010001111
63 : 000111111, 111111000, 111110001, 111100011, 111000111, 110001111, 100011111
73 : 001001001, 001001100, 001100100, 100100100
74 : 001001010, 001010100, 010100100, 010010010
75 : 001001011, 001011100, 011100100, 110010010, 100100101
77 : 001001101, 001101100, 101100100, 011001001, 100100110
78 : 001001110, 001110100, 110100100, 101001001, 010010011
79 : 001001111, 001111100, 111100100, 111001001, 110010011, 100100111
82 : 001010010, 010010100
83 : 001010011, 010011100, 001110010, 110010100, 100101001
85 : 001010101, 010101100, 010110010, 011001010, 100101010
86 : 001010110, 010110100, 011010010, 101001010, 010010101
87 : 001010111, 010111100, 011110010, 111001010, 110010101, 100101011
89 : 001011001, 011001100, 100110010, 001100101, 100101100
90 : 001011010, 011010100, 101010010, 010100101, 010010110
91 : 001011011, 011011100, 101110010, 011100101, 110010110, 100101101
93 : 001011101, 011101100, 110110010, 101100101, 011001011, 100101110
94 : 001011110, 011110100, 111010010, 110100101, 101001011, 010010111
95 : 001011111, 011111100, 111110010, 111100101, 111001011, 110010111, 100101111
102 : 001100110, 100110100, 001101001, 101001100, 010011001
103 : 001100111, 100111100, 001111001, 111001100, 110011001, 100110011
106 : 001101010, 101010100, 010101001, 010100110, 010011010
107 : 001101011, 101011100, 010111001, 011100110, 110011010, 100110101
109 : 001101101, 101101100, 011011001, 101100110, 011001101, 100110110
110 : 001101110, 101110100, 011101001, 110100110, 101001101, 010011011
111 : 001101111, 101111100, 011111001, 111100110, 111001101, 110011011, 100110111
115 : 001110011, 110011100, 100111001
117 : 001110101, 110101100, 101011001, 010110011, 011001110, 100111010
118 : 001110110, 110110100, 101101001, 011010011, 101001110, 010011101
119 : 001110111, 110111100, 101111001, 011110011, 111001110, 110011101, 100111011
122 : 001111010, 111010100, 110101001, 101010011, 010100111, 010011110
123 : 001111011, 111011100, 110111001, 101110011, 011100111, 110011110, 100111101
125 : 001111101, 111101100, 111011001, 110110011, 101100111, 011001111, 100111110
126 : 001111110, 111110100, 111101001, 111010011, 110100111, 101001111, 010011111
127 : 001111111, 111111100, 111111001, 111110011, 111100111, 111001111, 110011111, 100111111
170 : 010101010
171 : 010101011, 010101110, 010111010, 011101010, 110101010, 101010101
173 : 010101101, 010110110, 011011010, 101101010, 011010101, 101010110
175 : 010101111, 010111110, 011111010, 111101010, 111010101, 110101011, 101010111
181 : 010110101, 011010110, 101011010
183 : 010110111, 011011110, 101111010, 011110101, 111010110, 110101101, 101011011
187 : 010111011, 011101110, 110111010, 101110101, 011101011, 110101110, 101011101
189 : 010111101, 011110110, 111011010, 110110101, 101101011, 011010111, 101011110
191 : 010111111, 011111110, 111111010, 111110101, 111101011, 111010111, 110101111, 101011111
219 : 011011011, 101101110, 011011101, 101110110, 011101101, 110110110, 101101101
223 : 011011111, 101111110, 011111101, 111110110, 111101101, 111011011, 110110111, 101101111
239 : 011101111, 110111110, 101111101, 011111011, 111101110, 111011101, 110111011, 101110111
247 : 011110111, 111011110, 110111101, 101111011
255 : 011111111, 111111110, 111111101, 111111011, 111110111, 111101111, 111011111, 110111111, 101111111
511 : 111111111
Absolutely; digit reversal is one of those things that surprised the heck out of me when I first encountered it. I thought it only appeared in games and tricks, not in real-world tasks, and not in stuff related to graphs!
My intuition says the cyclic subgraphs are related to digit reversal, but my math-fu isn't good enough to be sure.
I haven't understood where the "digit reversal" is useful. Can you guys give an example :-//Maybe you managed to miss this post (https://www.eevblog.com/forum/programming/neat-algorithms/msg2548206/#msg2548206)? :-//
Am thinking of looking at turbo codes - are they nice to study?
So is that a yes, or a no? ;DAm thinking of looking at turbo codes - are they nice to study?
I did my thesis on them ;)
Well, yes. It was quite rewarding to get the "point". But as far as I remember, it's not exactly something to tack on in a hurry. Takes a fair amount of thinking and resources.Pretty much any advanced coding scheme has to be planned into a comms system from the start, from simple 2D convolutional codes up to the latest stuff like polar codes.
All that was 20 years ago with very green Ice-Tea, so my memories may be a bit colored ;)
I haven't understood where it's useful.
The main problem with this scheme is that I have not yet found an efficient enough way to describe it to other programmers; and the C code that implements it, tends to utterly confound and exasperate anyone who does not understand the underlying idea first.
But I know how to do it, how to write it, even if I used simply because I read a book about filesystems and XFS was described as the "original B-tree filesystem".
I also need to do vice versa: I need to map a machine address exported by the debugger into a line of one of the n files that compose a project, and for both these two purposes, I am thinking about recycling the same b-tree algorithm used for the toy filesystem.Yup, you essentially have an ordered 1:1 map between machine addresses and source lines, and given one, need to match the other, with the twist that if the key is not listed, then use the nearest previous key instead. There are many ways to implement it, but algorithmic reuse makes sense, as it makes the project easier to maintain, and a few percent speed difference between implementations is utterly insignificant in the grand scheme of things.
That's why I got surprised to see an other binary-tree scheme.Oh, that binary search tree stuff is for LARGE datasets. As in won't fit in RAM. With sparse keys, too, say 2128 or larger keyspace. The reason to not use b-trees is when the dataset has a simple binary partitioning function, but no d-way partitioning function, nor any single-dimensional order. For example, if your data are k-dimensional points, you can use a hyperplane to split the leftover set in two in any direction. If the dataset is one-dimensional, then b-tree is more efficient (and you typically only need a small number of levels, maybe five to eight at most, even for huuuuge datasets, considering the currently-used page sizes, given typical key sizes).
But I know how to do it, how to write it, even if I used simply because I read a book about filesystems and XFS was described as the "original B-tree filesystem".
1993?
Apple's HFS, for example, was just a little earlier in 1985.
say 2128 or larger keyspace
#include <math.h>
#include <stdio.h>
#define SAMPLES 1000
#define HALF_WIDTH 11 /* e.g. 11 filters from -11 to 11 */
float x[SAMPLES];
int main(int argc, char *argv[]) {
int i;
/* Build some test data */
for(i = 0; i < SAMPLES; i++) {
x[i] = cos(2*M_PI*i/10.3);
}
/* Now apply the Hilbert Transform and see what you get */
/* It should be close to sin(2*M_PI*i/10.3) */
for(i = HALF_WIDTH; i < SAMPLES-HALF_WIDTH-1; i++) {
double h = 0;
int j;
/* Apply the kernel */
for(j = 1; j <= HALF_WIDTH; j+=2)
h += (x[i-j]-x[i+j]) * 2.0/(j*M_PI);
/* Print result */
printf("%8.5f, %8.5f\n", x[i], h);
}
}
/*
This computes an in-place complex-to-complex FFT
x and y are the real and imaginary arrays of 2^m points.
dir = 1 gives forward transform
dir = -1 gives reverse transform
*/
int FFT(int dir, long m)
{
long n, i, i1, j, k, i2, l, l1, l2;
double c1, c2, tx, ty, t1, t2, u1, u2, z;
/* Calculate the number of points */
n = 1;
for (i = 0; i < m; i++)
n *= 2;
/* Do the bit reversal */
i2 = n >> 1;
j = 0;
for (i = 0; i < n - 1; i++)
{
if (i < j)
{
tx = x[i];
ty = y[i];
x[i] = x[j];
y[i] = y[j];
x[j] = tx;
y[j] = ty;
}
k = i2;
while (k <= j)
{
j -= k;
k >>= 1;
}
j += k;
}
/* Compute the FFT */
c1 = -1.0;
c2 = 0.0;
l2 = 1;
for (l = 0; l < m; l++)
{
l1 = l2;
l2 <<= 1;
u1 = 1.0;
u2 = 0.0;
for (j = 0; j < l1; j++)
{
for (i = j; i < n; i += l2)
{
i1 = i + l1;
t1 = u1 * x[i1] - u2 * y[i1];
t2 = u1 * y[i1] + u2 * x[i1];
x[i1] = x[i] - t1;
y[i1] = y[i] - t2;
x[i] += t1;
y[i] += t2;
}
z = u1 * c1 - u2 * c2;
u2 = u1 * c2 + u2 * c1;
u1 = z;
}
c2 = Math.Sqrt((1.0 - c1) / 2.0);
if (dir == 1)
c2 = -c2;
c1 = Math.Sqrt((1.0 + c1) / 2.0);
}
/* Scaling for forward transform */
if (dir == 1)
{
for (i = 0; i < n; i++)
{
x[i] /= n;
y[i] /= n;
}
}
return (1);
}
I eventually found oneWhere? Written by whom? What's the license?
Cant remember which site I got it off or who wrote it.QuoteI eventually found oneWhere? Written by whom? What's the license?
QuoteI eventually found oneWhere? Written by whom? What's the license?
The contents of this web site are © Copyright Paul Bourke or a third party contributor where indicated. You may print or save an electronic copy of parts of this web site for your own personal use. Permission must be sought for any other use. Any source code found here may be freely used provided credits are given to the author. Purchase of credit free licenses of material found on this site can be negotiated with the author. The author can also quote for unique variations and/or higher resolution versions of the images found on this site.
Shows you the "care" typical proprietary developers have with copyrights... :palm:Open source mate !
So... because somebody put the code on a web page, you have the right to use it in your projects? No, that's not how copyright works, mate.Shows you the "care" typical proprietary developers have with copyrights... :palm:Open source mate !
https://csharp.hotexamples.com/examples/-/FFT/-/php-fft-class-examples.html
Any source code found here may be freely used provided credits are given to the author.I had suspected that that part had not been noticed, or followed... A *LOT* of web-published SW contains such provisions. As a hobbyist, giving credit where it is due is the least you can do. As a profession, it's really important, legally, to keep track of the provenance of any found software that you use. You probably need a lawyer or legal team to say "for sure" whether your use of OSSW will change the terms under which you can make money with your final product, but you should AT LEAST be able to say "I found this code at xxx site, read the license, and I think it's ok for me to use it in my product..."
QuoteAny source code found here may be freely used provided credits are given to the author.I had suspected that that part had not been noticed, or followed... A *LOT* of web-published SW contains such provisions. As a hobbyist, giving credit where it is due is the least you can do. As a profession, it's really important, legally, to keep track of the provenance of any found software that you use. You probably need a lawyer or legal team to say "for sure" whether your use of OSSW will change the terms under which you can make money with your final product, but you should AT LEAST be able to say "I found this code at xxx site, read the license, and I think it's ok for me to use it in my product..."
This gets a bit fuzzier if we're talking about those 4-line "neat algorithms" with an obvious implementation. I wouldn't worry about that "bit counting using shifts, ands, and adds", for example. But if you have a piece of code as large as an FFT subroutine... Pay more attention!
QuoteAny source code found here may be freely used provided credits are given to the author.I had suspected that that part had not been noticed, or followed... A *LOT* of web-published SW contains such provisions. As a hobbyist, giving credit where it is due is the least you can do. As a profession, it's really important, legally, to keep track of the provenance of any found software that you use. You probably need a lawyer or legal team to say "for sure" whether your use of OSSW will change the terms under which you can make money with your final product, but you should AT LEAST be able to say "I found this code at xxx site, read the license, and I think it's ok for me to use it in my product..."
Thing is, open source is based on competition
Then, once getting widespread, it is glorified by the mob, so that people believe it is actually the best.Human mobs are irrational, absolutely true.
A good example is FreeRTOS, which is fairly mediocre compared to other RTOSes which were present on the market several years ago. By now, people believe FreeRTOS is the best, and market for RTOSes is destroyed entirely.Don't blame FreeRTOS, blame humans who are satisfied with mediocre features and performance.
Don't blame FreeRTOS, blame humans who are satisfied with mediocre features and performance.
Many otherwise good tools in the embedded space - compilers, RTOSes, GUI libraries, etc. - have become a huge liability for people by suddenly disappearing from the general market when one player (e.g. a silicon vendor) bought them. I think the RTOS area had reached the tipping point where people have lost trust in things that someone else controls their use of. Of course, the price of FreeRTOS doesn't hurt. :)Thing is, open source is based on competition
Open source certainly participate in competition, and often wins because of low price, despite better products may be available. Then, once getting widespread, it is glorified by the mob, so that people believe it is actually the best.
A good example is FreeRTOS, which is fairly mediocre compared to other RTOSes which were present on the market several years ago. By now, people believe FreeRTOS is the best, and market for RTOSes is destroyed entirely.
Meanwhile, the FreeRTOS itself is bought and re-purposed by Amazon. I'm sure Amazon could buy one of better RTOSes as the companies making RTOSes are certainly not in good shape and would definitely sell. But no. Amazon sees more value in popularity than in quality.
You cannot really blame people for preferring something free to something expensive.I can, if there is a significant difference in quality. Here, we are talking about tools, whose quality will immediately affect the quality of the result.
But this would destroy the mechanism which forces the car manufacturers to produce better and cheaper cars.The current mechanism which forces the car manufacturers to produce better and cheaper cars.
If the mechanism is destroyed (as in Soviet Union, or in software market) people stop producing and everything goes down the drain.No, that happened in Soviet Union because competition was eradicated.
Many otherwise good tools in the embedded space - compilers, RTOSes, GUI libraries, etc. - have become a huge liability for people by suddenly disappearing from the general market when one player (e.g. a silicon vendor) bought them.
A major problem in current software choices is that ever since Microsoft gained dominance, humans have accepted that software is unreliable; that that is the nature of software. There are a few niches (like medical devices) that are supposed to have different values, but experience (their failure rates and typical bugs) shows that is simply not true, they're just as crap as every other piece of software. That attitude, that software is fundamentally unreliable, is pervasive throughout human computer use: we're basically programmed for that now. Yet, Microsofts products are nonfree, and cost money, even though better free alternatives exist.
Thing is, open source is based on competition
Open source certainly participate in competition, and often wins because of low price, despite better products may be available.
why Microsoft -- an agent, not a market -- deserves specific blame?My opinion is biased by my own experiences, obviously.
The total cost is what really matters. For a given commercial product, it may cost a lot less overal to select a good commercial RTOS rather than FreeRTOS. Problem is, too many managers still don't realize this.So true. With FOSS, the immediate price may be zero, but there are lots of other costs; the most important thing being that being a "FOSS customer" means absolutely nothing, unlike when you have a commercial relationship. As a FOSS user/"customer", you have zero leverage on the developers, unless you spend money, time, or other resources. It definitely is not zero-cost.
Fact is, all industries have gone through a fundamental revolution changing business models every couple of generations for a couple of centuries now.
In the last twenty years or so, I've seen shitty code become more and more ubiquitous, and robust and reliable code rarer and rarer.
Is it just me? Could be, but I don't think so.
In the last twenty years or so, I've seen shitty code become more and more ubiquitous, and robust and reliable code rarer and rarer.
Is it just me? Could be, but I don't think so.
That has been my experience too. However, I attribute this to the way people write software now. Programmers of older days are all but extinct.
No argument there. However, you seem to assert that there is only one software business model in capitalism, and I'm saying that due to changes in technology (various costs), business models must change and adapt in capitalistic systems to survive. I base that on history, and on that long-lived tech companies have changed their business tactics every couple of generations.Fact is, all industries have gone through a fundamental revolution changing business models every couple of generations for a couple of centuries now.Capitalism is not a market model. It is a fundamental mechanism which guides the economy.
Where software needs to be reliable, and people pay for it to be reliable, then it is reliable. I've been running complex equipment in production for 5+ years, 24x7, without any downtime.Are you sure you haven't simply paid for enough hardware to cover the cases where the software shits on you, without it being visible to your end users?
Are you sure you haven't simply paid for enough hardware to cover the cases where the software shits on you, without it being visible to your end users?
Because that is the only way I know how to get arbitrary stuff done right now without writing, or configuring and fixing, the software myself.
There are many more ways to do business in software than package them for sale on store shelves.
opening btree file [mydata.bin] .. done
> h
integer followed by "I" to Insert
integer followed by "D" to Delete
integer followed by "S" to Search { found, not found }
integer followed by "A" to Search* { found, closest left-value }
"." to show the tree
"Q" to quit
> 98 99 10 d
item 98 99 100 have been deleted
> 100 a
Search* (100):
{{ 253 752 1003 1254 1505 1756 2007 2258 2509 2760 3011 3262 3513 }
{ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 101
102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118
119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169
170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203
204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220
221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237
238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 } }
Not found, the closest left-value is 97
>
Search* (100):
... 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 .. .. .. 101 102 103 ...
Not found, the closest left-value is 97
But, the real challenge was ... how to find the "closest" item when it doesn't exist.Situations like that are surprisingly common. The idea/algorithm itself may be easy to implement, but getting the engineering right so it works for practical, imperfect data, takes a surprising amount of effort.
digraph {
"root" -> "left";
"root" -> "right";
}
You can add node modifiers, like "root" [ shape=hexagon ]; as well as edge attributes like labels and such. It is just text, which makes it especially useful for debug visualization from any program or script. Graphviz Dot (dot/neato/twopi/circo) will handle the layout for you, using a number of algorithms described in peer-reviewed articles (https://www.graphviz.org/theory/). It is a surprisingly powerful tool -- or a set of tools, really. I like to generate the graphs as SVG, then hand-tweak them in Inkscape, for maximum quality and usefulness. See the example gallery (https://www.graphviz.org/gallery/) for stuff it can do.A new algorithm is good but ... these things consume a lot of time when you use them for something practical.Yet, that is the only way to truly learn/grok a new algorithm, and integrate it to ones "tool kit".
B*trees is a special case of B+trees which guarantee that nodes are at least 2/3 full. This is good for my purpose because it improves my filesystem. But I have a serious problem: I cannot avoid recursion! B*tree is more prone to have recursion, and I'd like to avoid it as far as possible.
B*trees is a special case of B+trees which guarantee that nodes are at least 2/3 full. This is good for my purpose because it improves my filesystem. But I have a serious problem: I cannot avoid recursion! B*tree is more prone to have recursion, and I'd like to avoid it as far as possible.
Why do you want to avoid recursion?
int factorial(int x) {
if(x == 1)
return 1;
else
return x* factorial(x-1);
}
Why do you want to avoid recursion?
It is seen as an honorable goal if you have limited stack space, or want a bounded run time.
Isn't the main objection against recursive algorithms, in general, about memory?The problem is not that they are memory intensive. Its that unless you take special action their memory use can be unconstrained. A routine that always calls itself a well known range of times has a known maximum memory requirement, and is OK. Many recursive routines call themselves an arbitrary number of times, and will eventually blow the stack up unless special action is taken to constrain the call depth and perhaps declare a failure to compute the true result.
The fact that are using the computer stack (to store intermediate results obtained during each recursion step) thus making them slower and memory intensive.
A routine that always calls itself a well known range of times has a known maximum memory requirement
There are many algorithms which are recursive, but have an easy to identify maximum for the depth they will ever call to. Many divide and conquer type algorithms are like this. You usually know the maximum resolution you will ever be able to divide to, so you know the maximum depth of recursion you will ever get.A routine that always calls itself a well known range of times has a known maximum memory requirement
This requires you to have an additional field in the structure for counting the number of calls, which correspond to used resources, therefore it *somehow* workarounds the problem making things back to deterministic, but it sucks a lot when you have to instrument the code for the test-report because it generates too many triplets, and a function that calls itself can consume more ram, and usually does consume more ram.
unsigned int Factorial(unsigned int n)
{
if (n <= 1)
return 1;
else
return n * Factorial(n - 1);
}
gcc -O3 -S TailRecursion.cFactorial:
.seh_endprologue
movl $1, %eax
cmpl $1, %ecx
jbe .L1
.p2align 4,,10
.p2align 3
.L2:
movl %ecx, %edx
subl $1, %ecx
imull %edx, %eax
cmpl $1, %ecx
jne .L2
.L1:
ret
It can be very hard, or sometimes even impossible for other, less trivial forms of recursion.
If you create your own stack, and save state in that, you can turn anything recursive into something you can claim to be non-recursive :)It can be very hard, or sometimes even impossible for other, less trivial forms of recursion.
Are there any examples, or a demonstration about algorithms that are impossible to convert?
Asking just out of curiosity, because I always thought *any* recursive algorithm can be translated into a non recursive one.
If you create your own stack, and save state in that, you can turn anything recursive into something you can claim to be non-recursive :)It can be very hard, or sometimes even impossible for other, less trivial forms of recursion.
Are there any examples, or a demonstration about algorithms that are impossible to convert?
Asking just out of curiosity, because I always thought *any* recursive algorithm can be translated into a non recursive one.
send(to, from, count)
register short *to, *from;
register count;
{
register n = (count + 7) / 8;
switch (count % 8) {
case 0: do { *to = *from++;
case 7: *to = *from++;
case 6: *to = *from++;
case 5: *to = *from++;
case 4: *to = *from++;
case 3: *to = *from++;
case 2: *to = *from++;
case 1: *to = *from++;
} while (--n > 0);
}
}
Recursion is a powerful tool and most of all, when it applies, it makes code easier to write, more readable, look more like maths, and also it's usually easier to verify pre and post conditions.
To me, the most impressive algorithms are the simplest ones, and the absolute winner so far is Von Neuman with his unbiasing a biased coin algorithm.In that case, I recommend you look up Metropolis-Hastings algorithm (https://en.wikipedia.org/wiki/Metropolis-Hastings_algorithm) (for the inverse; to generate pseudorandom numbers according to a specific probability distribution P(x), when you only know a function f(x) that is proportional to P). From there, go on to e.g. simulated annealing (https://en.wikipedia.org/wiki/Simulated_annealing), which can be used to find global minima or maxima of a function; it is very similar to Metropolis-Hastings.
Recursion is a powerful tool and most of all, when it applies, it makes code easier to write, more readable, look more like maths, and also it's usually easier to verify pre and post conditions.
So tell me why recursion isn't allowed for safety critical code in automotive and avionics applications, and I have never been authorized to use it.
- Recursion may happen without you realizing it. Beyond the simple case of a function directly calling itself, it may do so indirectly through a chain of calls of other functions. In moderately complex software, this is not necessarily easy to spot. Using code analysis tools may help.
There are many algorithms which are recursive, but have an easy to identify maximum for the depth they will ever call to. Many divide and conquer type algorithms are like this. You usually know the maximum resolution you will ever be able to divide to, so you know the maximum depth of recursion you will ever get.
So, if you really worry about the maximum recursion depth for B+ trees, you must be using too small a branching factor b for the number of records you use.
#include <stdlib.h>
#include <stdio.h>
char * bytesToHR(unsigned long bytes)
{
static const char * suffix[] = {"B", "KiB", "MiB", "GiB", "TiB", "PiB", NULL};
const char ** s = suffix;
float b = (float)bytes;
while(b > 1023.0f && *(s+1))
{
b /= 1024.0f;
++s;
}
int len = snprintf(NULL, 0, "%.2f %s", b, *s);
char * out = malloc(len + 1);
sprintf(out, "%.2f %s", b, *s);
return out;
}
int main(int argc, char * argv[])
{
char * s = bytesToHR(12345678);
printf("%s\n", s);
free(s);
return 0;
}11.77 MiB
tell me why recursion isn't allowed for safety critical code in automotive and avionics applications, and I have never been authorized to use it.Well, you take fears that were probably relevant 30+ years ago, put them together into a "standard" that dates back 20+ years, and then hand the "approval for exceptions" process off to "management" that hasn't been following the science, or the art, for about the same length of time. "No one ever gets fired for failing to approve "risky" code"... (not that they ever really realize that it's probably far less risky to take a modern and proven recursive, dynamic-allocation, recursive library and use it, than to ask their mid-level engineers to achieve the same thing from scratch without "modern techniques.") Sigh.
We fill the stack area with a known pattern 0xdeadbeaf, and we check how many times the pattern appears after the algorithm has completed its execution (https://www.eevblog.com/forum/Smileys/default/xgrin.gif.pagespeed.ic.QVVz6XIT20.png)What? Only during test? We had that check in production code, for every process (recursive or not), at least "periodically" (I forget whether this was a "background" check, or something that happened in the scheduler.) Of course, that WAS in an environment where probably the worst that would happen is irate customers would call up with "it rebooted and the last thing it said was something about "process has low stack." Please fix it!"...)
This works for a given environment and it cannot be written in a formal test-report without a manual justification, but it gives us useful information during the engineering steps.
Well, you take fears that were probably relevant 30+ years ago, put them together into a "standard" that dates back 20+ years, and then hand the "approval for exceptions" process off to "management" that hasn't been following the science, or the art, for about the same length of time.
What? Only during test?
So 200 guys employed on daily (testing, development, documentation, QA compliance, etc) activities in avionics are all wrong, as well as 20 senior-engineers with 30 years of experience :-//
Or, perhaps NOT obeying all those rules will nosedive into the ground 10 airplanes daily.
Anecdotal references are not a proof.
Anecdotal references are not a proof.which may be either completely right, or false. Depends on what you consider here. Even just one occurence of a problem is complete proof that the problem CAN happen, and in safety-critical settings, this should be enough to take action to improve things based on that single occurence only. Constant improvement.
Admittedly, they can tend to give a false sense of safety though
I've never used recursion in any critical piece of "embedded" code even in settings where this was not a rule. A mix of common sense, risk mitigation and often no real need for/benefit of recursion for rather low-level stuff. I've never set a "no recursion" rule though when it was not strictly required from a regulatory POV.
Admittedly, they can tend to give a false sense of safety though
It's not correct and it sounds rather misleading. Concerning software (the hardware follows different directives, although similar), the whole DO178B/level{A..D} describes how the engineering and testing activities have to be done, while the MISRA-C and SPARK-ADA tell us some rules to manage the job.
Our directives do not only consider the source, but rather even how it's tested concerning equipment, procedures and human resources, so if a senior says that recursion shall be avoided, he/she knows that it might work, but it will be more difficult to manage within the software life cycle.
Well, you take fears that were probably relevant 30+ years ago, put them together into a "standard" that dates back 20+ years, and then hand the "approval for exceptions" process off to "management" that hasn't been following the science, or the art, for about the same length of time.
So 200 guys employed on daily (testing, development, documentation, QA compliance, etc) activities in avionics are all wrong, as well as 20 senior-engineers with 30 years of experience :-//
... so it's clearly and formally marked as under your responsibility ...
Everything you do or accept is your responsibility.
What exactly is not correct and sounds misleading? [...] Then a "false sense of safety" means that engineers would think those rules would protect them from ever fucking up.
Everything you do or accept is your responsibility.
It's a different kind of responsibility because in that case you temporarily become a sort of team leader regarding the project, and this means that you have to somehow coordinate people around you, as well as other people by emails and video conference. In short, since they see your name on Doors, you are the person who will be called for everything concerning the project.
Can we please return to the "Neat algorithms", topic?
carry, Rd = Rs + Rs ;; all Rx and math operations are 16bits, 2's comp.
if (carry) Rd = Rd + 1;
But this would be relatively slow, and addition is nicely commutative, and addition is commutative and associative, and 1's comp addition is associative WRT byteswapping, so IP checksum functions usually get highly optimized. for (word16 in words) {
ck32 = ck32 + word16
}
while (ck32 >> 16) {
ck32 = (ck32 & 0xFFFF) + (ck32 >> 16)
}
But that's usually pretty silly on an 8bit CPU, and if you have both a CPU and language that allow access to the carry bit (for multi-precision math), you can do a lot better doing the end-around carry together with adding the NEXT byte: prevcarry = 0
for () {
prevcarry, ck8low = ck8low+nextbyte+prevcarry // (and increment to following byte)
prevcarry, ck8hi = ck8hi+nextbyte+carry
}
prevcarry, ck8low = ck8low + prevcarry // final end-around carry
ck8hi = ck8hi + prevcarry
Note that you'll need some sort of looping code that doesn't destroy the prevcarry bit...If you want to use recursion you have to [six steps, including "approval by 3 seniors" and "the QA team"Exactly. It's so much easier to just say "no."
static inline uint32_t argb8888_blend_half(const uint32_t argb1, const uint32_t argb2)
{
const uint32_t rb1 = (argb1 << 7) & 0x7F807F80u,
ag1 = (argb1 >> 1) & 0x7F807F80u,
rb2 = (argb2 << 7) & 0x7F807F80u,
ag2 = (argb2 >> 1) & 0x7F807F80u;
return (((rb1 + rb2) & 0xFF00FF00u) >> 8)
| ((ag1 + ag2) & 0xFF00FF00u);
}
The exact same approach (just different numeric constants) can be used to calculate 25%, 12.5%, 6.25%, 3.125%, 1.5625%, 0.78125%, and 0.390625% blends as well.static inline uint32_t argb8888_blend(const uint32_t argb0,
const uint32_t argb1,
const unsigned int p)
{
const uint64_t c0 = (argb0 & 0x00FF00FF) | ((uint64_t)(argb0 & 0xFF00FF00) << 24),
c1 = (argb1 & 0x00FF00FF) | ((uint64_t)(argb1 & 0xFF00FF00) << 24),
p0 = 256 - p,
p1 = p;
const uint64_t c = p0*c0 + p1*c1;
return ((c >> 8) & 0x00FF00FF) | ((c >> 32) & 0xFF00FF00);
}
and you can even apply that to R10G10B10 (30-bit color) by changing the numerical constants. The "trick" is trivial; second (and fourth) component are shifted up and replaced with enough zeroes to hold the product.static inline uint32_t rgb8888_blend(const uint32_t argb0, const uint32_t argb1, const uint32_t p)
{
const uint32_t rb0 = argb0 & 0x00FF00FF,
ag0 = (argb0 >> 8) & 0x00FF00FF,
rb1 = argb1 & 0x00FF00FF,
ag1 = (argb1 >> 8) & 0x00FF00FF,
p0 = 256 - p,
p1 = p;
return (((p0*rb0 + p1*rb1) & 0xFF00FF00u) >> 8)
| ((p0*ag0 + p1*ag1) & 0xFF00FF00u);
}
(Total cost being four 24×9=32bit multiplications, three eight-bit right shifts, six binary ANDs, one binary OR, two additions, and one subtraction.)static inline uint16_t rgb565_blend(const uint16_t rgb0, const uint16_t rgb1, const uint16_t p)
{
const uint32_t c0 = (rgb0 & 0xF81F) | ((uint32_t)(rgb0 & 0x07E0) << 16);
const uint32_t c1 = (rgb1 & 0xF81F) | ((uint32_r)(rgb1 & 0x07E0) << 16);
const uint32_t p0 = 32 - p, p1 = p;
const uint32_t c = p0*c0 + p1*c1;
return ((c >> 5) & 0xF81F) | ((c >> 21) & 0x07E0);
}
On 64-bit architectures, you can trivially expand that to blending pairs of color values, for about the same cost.
Why do you want to avoid recursion?
The recursion can make the algorithm not deterministic about the use of ram, it might consume too much ram, and I am limited to 4Mbyte on that board, with other stuff that needs ram. I am not even using malloc because I cannot have virtual memory, the CPU does not even have an MMU. Everything must be static and deterministic, with the worst-case known and calculated in advance.
Meh. Trees (and graphs) are so inherently, obviously, and elegantly recursive that avoiding recursion in code that implements them is ... cutting off your nose to spite your face.
The algorithms involved are usually well defined and understood wrt their resource usage, and the insistence in some “safety standards” that programmers come up with some less-well-debugged, harder to-understand loop-based equivalent (or use a poorer data structure) is one of the main objections that I have toward such standards. :-(
Isn't the main objection against recursive algorithms, in general, about memory?The problem is not that they are memory intensive. Its that unless you take special action their memory use can be unconstrained. A routine that always calls itself a well known range of times has a known maximum memory requirement, and is OK. Many recursive routines call themselves an arbitrary number of times, and will eventually blow the stack up unless special action is taken to constrain the call depth and perhaps declare a failure to compute the true result.
The fact that are using the computer stack (to store intermediate results obtained during each recursion step) thus making them slower and memory intensive.
OTOH, if the trees are THAT shallow, how much code complexity does the recursion save you?
If you have a nice recursive algorithm but fear of overrunning stack, you can easily keep track of your recursion depth with an additional parameter you decrement each call and compare to a (hopefully reasonably choosen) maximum recursion depth (exiting with an error when the threshold has been reached).
This obviously uses extra stack space (and a little performance), so its up to you to to decide if you want to keep it in production code or remove it and call your testing (and additional safety marging due to its removal) good enough to be safe.
Avoiding recursion results in having an acyclic function call graph, which code analyzers can exploit to prove limits on stack use and boundedness of exe-cutions.
There is another topic about following or not the programming guidelines.
Quoting from the attached PDF: https://www.eevblog.com/forum/chat/dogmatic-coding-standards/msg2636658/#msg2636658 (https://www.eevblog.com/forum/chat/dogmatic-coding-standards/msg2636658/#msg2636658)QuoteAvoiding recursion results in having an acyclic function call graph, which code analyzers can exploit to prove limits on stack use and boundedness of exe-cutions.
... I've been wondering about the "control" on stack usage in recursion programming, seeing people raised lots of concerns.
My simple question is, if running a recursive program, without any control or aware of the stack limitation, isn't that like using a cpu by designed, that can't handle say like div by zero (eg. NMI or etc) from the 1st place ?
If you have a nice recursive algorithm but fear of overrunning stack, you can easily keep track of your recursion depth with an additional parameter you decrement each call and compare to a (hopefully reasonably choosen) maximum recursion depth (exiting with an error when the threshold has been reached).
This obviously uses extra stack space (and a little performance), so its up to you to to decide if you want to keep it in production code or remove it and call your testing (and additional safety marging due to its removal) good enough to be safe.
The whole issue of blowing the stack is a big deal for any program. Recursive routines are just a notoriously troublesome example.If you have a nice recursive algorithm but fear of overrunning stack, you can easily keep track of your recursion depth with an additional parameter you decrement each call and compare to a (hopefully reasonably choosen) maximum recursion depth (exiting with an error when the threshold has been reached).
This obviously uses extra stack space (and a little performance), so its up to you to to decide if you want to keep it in production code or remove it and call your testing (and additional safety marging due to its removal) good enough to be safe.
Not an expert in programming, nor experienced.
I've been wondering about the "control" on stack usage in recursion programming, seeing people raised lots of concerns.
My simple question is, if running a recursive program, without any control or aware of the stack limitation, isn't that like using a cpu by designed, that can't handle say like div by zero (eg. NMI or etc) from the 1st place ?
Edit : Simpler analogy, driving a tank with eyes closed, that you can't really be sure that you're crushing your enemy (expected & wanted) or your friends. :-DD
Cmiiw.
In most cases it is very difficult to work out the worst case biggest possible stack requirement, and make sure you have allowed enough stack space for that.
In most cases it is very difficult to work out the worst case biggest possible stack requirement, and make sure you have allowed enough stack space for that.
I don't think so. At any rate, it's much harder to predict heap behaviour, which, unlike stack, can be fragmented, possibly after days or months of running.
Whilst its true that heap analysis is harder, most small embedded designs completely avoid using a heap. They can't help using the stack, and they can't afford to waste more than a few bytes. When your worst case stack use is an usually deep point in some calls, and all the interrupts having nested, hitting worst case can be a very rare event. It has to be analysed, but most development tool chains doesn't really take that need seriously, and try to help.In most cases it is very difficult to work out the worst case biggest possible stack requirement, and make sure you have allowed enough stack space for that.
I don't think so. At any rate, it's much harder to predict heap behaviour, which, unlike stack, can be fragmented, possibly after days or months of running.
Feynman worked out in some detail the program for
computing Hopfield's network on the Connection Ma-
chine. The part that he was proudest of was the
subroutine for computing a logarithm. I mention it here
not only because it is a clever algorithm, but also
because it is a specific contribution Richard made to the
mainstream of computer science. He had invented it at
Los Alamos.
Consider the problem of finding the logarithm of a
fractional number between 1 and 2. (The algorithm can be
generalized without too much difficulty.) Feynman ob-
served that any such number can be uniquely represented
as a product of numbers of the form 1 + 2 -k, where k is an
integer. Testing for the presence of each of these factors in
a binary representation is simply a matter of a shift and a
subtraction. Once the factors are determined, the loga-
rithm can be computed by adding together the precomput-
ed logarithms of the factors. The algorithm fit the
Connection Machine especially well because the small
table of the logarithms of 1 + 2 -k could be shared by all
the processors. The entire computation took less time
than doing a division.
Quarter square multiplication (https://en.wikipedia.org/wiki/Shift-and-add_algorithm#Quarter_square_multiplication), one of the shift-and-add algorithms: Let's say you know Sn = floor(n2/4), i.e. Sn is a quarter of n squared, rounded towards zero. If x ≥ y ≥ 0, then x·y = Sx+y - Sx-y.
I wonder if this could be used to implement a fast multiplication on an FPGA? I dunno.
for larger ones, I think the size of the required look-up table would quickly get impractical. Ideas?I experimented with some approaches, but for larger ones, a binary shift-and-add ("barrel multiplier"?) wins. (It is difficult to keep N×N-bit product with 2N-bit result under 2N ops.) So, this only makes sense if you have Si precalculated -- actually, Si = floor(abs(i)2/4) for i = 1-2N .. 2N+1-1, so that for 0≤a,b≤2N-1, a×b becomes Sa+b - Sa-b. That makes the lookup table size just under 6 N 2N bits, turning the multiplication into two lookups, two subtractions, and one addition. I do not know how expensive such ROM tables are in general.
Quarter square multiplication (https://en.wikipedia.org/wiki/Shift-and-add_algorithm#Quarter_square_multiplication), one of the shift-and-add algorithms: Let's say you know Sn = floor(n2/4), i.e. Sn is a quarter of n squared, rounded towards zero. If x ≥ y ≥ 0, then x·y = Sx+y - Sx-y.The critical importance of fast, silicon efficient, multiplication for DSP applications means a huge number of avenues to faster multiplication have been tried. You need to try something a bit more novel than a well established mathematical identity to have a good hope of breaking new ground. There are quite a few tricks for finding approximate answers, that people generally only know about if they work in an area where the approximation is effective. There aren't many obscure tricks for generating exact results, because everyone wants those.
I wonder if this could be used to implement a fast multiplication on an FPGA? I dunno.
how much code complexity does the recursion save you?
In most cases it is very difficult to work out the worst case biggest possible stack requirement, and make sure you have allowed enough stack space for that.
On bigger machines most people do little more than allow lots of stack space and hope for the best. On small embedded machines you have to get your stack allocation right, as there is little RAM to waste. You might get some help from static analysis tools, but those tools are not generally foolproof. For example, they don't usually have the ability to work out the worst case program call depth + the worst case mix of nested interrupts.
We're talking about B-trees here. With disk-based blocks. What's your block size?
btree_key_t key[DEFAULT_ORDER];
btree_iblock_t iblock[DEFAULT_ORDER];
In most cases it is very difficult to work out the worst case biggest possible stack requirementYeah, that's the claim. But I don't think it's true.In many cases, you're dealing with standard algorithms whose stack depth requirements have been analyzed to death, providing many published papers and textbooks and theses. That leaves only the stack requirements for each level as "unknown", and that's easy to measure for a particular compiler/cpu.Sure, "not all recursive algorithms" are that well analyzed. But you're throwing away the ones that are for no good reason.
Sure, but the solution space is even more huge. This is just one of those utterly simple mathematical identities that people overlook, exactly because it is too simple.Quarter square multiplication (https://en.wikipedia.org/wiki/Shift-and-add_algorithm#Quarter_square_multiplication), one of the shift-and-add algorithms: Let's say you know Sn = floor(n2/4), i.e. Sn is a quarter of n squared, rounded towards zero. If x ≥ y ≥ 0, then x·y = Sx+y - Sx-y.The critical importance of fast, silicon efficient, multiplication for DSP applications means a huge number of avenues to faster multiplication have been tried.
I wonder if this could be used to implement a fast multiplication on an FPGA? I dunno.
Again, I have zero experience with FPGAs, so I do not know.
The big FPGAs are mostly designed as DSP engines, so they have lots of dedicated hardware multipliers. In smaller FPGAs its still common to use macros to create multipliers out of logic blocks.Again, I have zero experience with FPGAs, so I do not know.FPGAs usually have dedicated DSP units on silicon and vendors usually do not tell you about implementation details.
about multiplications
Anyway, the division module is still problematic for me because it still takes 32 clocks for computing 32 bit. The reson is that I haven't yet found an algorithm with an acceptable complexity.What are you using now? Binary long division (https://en.wikipedia.org/wiki/Long_division#Non-decimal_radix) (i.e., shift and subtract)?
What are you using now? Binary long division (i.e., shift and subtract)?
But do you have a working ADA compiler for your Arise-v2 core yet? :P
Division is going to be slow no matter how you do it.Yeah. Come up with a way to divide that's as fast as multiplying and generations of algorithms working around the slowness of divide go out the window. :)
Division is going to be slow no matter how you do it.Yeah. Come up with a way to divide that's as fast as multiplying and generations of algorithms working around the slowness of divide go out the window. :)
Obviously for really small numbers of bits you can often use a lookup approach. As the bits expand you are quickly into successive approximation approaches, typically with a skip ahead for small values, to avoid the logic exploding to unrealistic numbers of gates. The Russian division technique is basically successive approximation, although its often expressed in a way that doesn't immediately look like it is. After so many years of people failing to find the division equivalent of Wallace or Booth, that can be implemented in a realistic number of gates, with all the carry look ahead needed to avoid severe ripple delays, it seem unlikely we are going to do better than we do today.Division is going to be slow no matter how you do it.Yeah. Come up with a way to divide that's as fast as multiplying and generations of algorithms working around the slowness of divide go out the window. :)
That's like saying you need a shorter piece of string.
What size data are you dealing with? 128/64 bit? 8/8 bit? Or something else? How many Tcy does your current algorithm take? In other words, there are some fairly fast algorithms for division. The so-called Kenyan (aka Russian and a lot of other names) and PicList has some others.
The fastest way of dividing, of course, being not to divide at all.The fastest way to divide is typically to reformulate the problems until it turns into a multiply.
>:D
What size data are you dealing with?
How many Tcy does your current algorithm take?
Kenyan (aka Russian and a lot of other names) and PicList has some others.
So, let's assume: 32bitHow many Tcy does your current algorithm take?
Currently, it evolves 1 clock to compute 1 bit, so 32 clock edges to compute quotient and reminder, both of 32bit of size, plus 2 clocks for handling the sign and the fsm.
;Inputs:
; Dividend - AARGB0:AARGB1:AARGB2 (0 - most significant!)
; Divisor - BARGB0:BARGB1
;Temporary:
; Counter - LOOPCOUNT
; Remainder- REMB0:REMB1
;Output:
; Quotient - AARGB0:AARGB1:AARGB2
;
; Size: 28
; Max timing: 4+24*(6+6+4+3+6)-1+3+2=608 cycles (with return)
; Min timing: 4+24*(6+6+5+6)-1+3+2=560 cycles (with return)
Currently, it evolves 1 clock to compute 1 bit, so 32 clock edges to compute quotient and reminder, both of 32bit of size, plus 2 clocks for handling the sign and the fsm.
My intention was to move the talk about guidelines to its own topic, but I failed, sorry. ;D
Feynman's algorithm for logarithmsQuote from: doi:10.1063/1.881196Feynman worked out in some detail the program for
computing Hopfield's network on the Connection Ma-
chine.
...
How Euler Did It is an online MAA column, written by Ed Sandifer of Western Connecticut State University from 2003 to 2010. Each article examines a specific work or concept developed by Leonhard Euler, with the topics ranging from number theory to geography to fluid mechanics.
The Euler Archive, in collaboration with the MAA, hosts the article collection for the How Euler Did It series. The series was published in two volumes, both volumes are available from the MAA: How Euler Did It (2007), How Euler Did Even More (2014).
What is Project Euler?
Project Euler is a series of challenging mathematical/computer programming problems that will require more than just mathematical insights to solve. Although mathematics will help you arrive at elegant and efficient methods, the use of a computer and programming skills will be required to solve most problems.
The motivation for starting Project Euler, and its continuation, is to provide a platform for the inquiring mind to delve into unfamiliar areas and learn new concepts in a fun and recreational context.
Currently, it evolves 1 clock to compute 1 bit, so 32 clock edges to compute quotient and reminder, both of 32bit of size, plus 2 clocks for handling the sign and the fsm.
16-bit PICs do 16-bit division in 18 cycles (16 + 2). What a coincidence ;)
You would if you were a hardware engineer, looking for the fastest or smallest way to implement an instruction set.Currently, it evolves 1 clock to compute 1 bit, so 32 clock edges to compute quotient and reminder, both of 32bit of size, plus 2 clocks for handling the sign and the fsm.
16-bit PICs do 16-bit division in 18 cycles (16 + 2). What a coincidence ;)
That's a good point, and every time I struggle with multiply or divide with my my little PIC16F chips , I wonder about making the jump to a PIC24F. However, I would not consider such slick hardware solutions, "neat algorithms."
Tango trees? Is there any good C implementation? Where is it used?"Is there any good C implementation?" ... Only several ways to find out!
Tango trees?"Preferred paths ... most recently accessed..."
Anywhere you'd use a balanced binary search tree
Anywhere you'd use a balanced binary search tree
yeah, it's a log(log(n)). But it looks adding more complexity, therefore it seems that it makes sense only for large data-set, when "n" is a very big number, and so big that log(n) >> log(log(n)).
#include <stdio.h>
#include <string.h>
#include <stdlib.h>
struct node {char *key; node *left; node *right;};
int node_cnt;
#define BAL_OK 0
#define ALPHA 0.666667
#define TABSZ 50
int minNodesForDepth[TABSZ];
void init_table(){
double A = 1.0/ALPHA;
for (int n=0; n<TABSZ; ++n, A*=1.0/ALPHA){
minNodesForDepth[n] = A;
}
}
void printtree(node *root, int depth=0){
if (!root) return;
printtree(root->left, depth+1);
for (int i=0; i<depth; ++i) printf(" ");
printf("%s", root->key);
printtree(root->right, depth+1);
}
node *find(node *root, char *key){
int cmp = strcmp(key, root->key);
if (cmp == 0) return root;
return find(cmp<0 ? root->left : root->right, key);
}
node *makelinear(node *root, node *lst){
if (!root) return lst;
root->right = makelinear(root->right, lst);
node *left = root->left;
root->left = 0;
return makelinear(left, root);
}
node *maketree(node *&lst, int n){
if (n == 0) return 0;
node *left = maketree(lst, (n-1)/2);
node *root = lst; lst = lst->right;
root->left = left;
root->right = maketree(lst, n/2);
return root;
}
int cntSz(node *root){
return !root ? 0 : 1 + cntSz(root->left) + cntSz(root->right);
}
int insert(node *&root, node *n, int depth=0){
if (root == 0){
root = n;
return ++node_cnt < minNodesForDepth[depth] ? 1 : BAL_OK;
}
int cmp = strcmp(n->key, root->key);
int sz = insert(cmp<0 ? root->left : root->right, n, depth+1);
if (sz == BAL_OK) return sz;
int tot = sz + 1 + cntSz(cmp<0 ? root->right : root->left);
if (sz <= tot*ALPHA) return tot;
node *lst = makelinear(root, 0);
root = maketree(lst, tot);
return BAL_OK;
}
int main(){
char buf[1000];
init_table();
node *tree = 0;
while (fgets(buf, 1000, stdin)){
node *n = (node*)malloc(sizeof(node));
n->key = strdup(buf);
n->left = n->right = 0;
insert(tree, n);
}
printtree(tree);
return 0;
}
Once you get that data block locally you should make as much use of it as possible.
That means you should be using a B{*,+}-tree not a binary tree for vastly reduced tree depth and hence number of access time delays.
Note that many software implementations of trees in general, when manipulated in RAM, are a lot less efficient than expected from the algorithms due to accesses all over the place that lead to many cache misses. So in that respect, the choice of algorithm and data structures are key. It's not just a problem with RAM either, any medium that can be accessed more efficiently as consecutive blocks (hard drives being also that way for instance) are concerned.
struct hash_entry {
struct hash_entry *next;
size_t hash;
/* Data */
};
struct hash_table {
size_t size; /* Number of entries in entry pointer array */
size_t used; /* Number of entries in the table */
struct hash_entry **entry;
/* pthread_rwlock_t rwlock; for thread-safe operation */
};
This is definitely not the most efficient way to go about it. Pointer chaining trades some efficiency for robustness: if you have an entry, insert never fails. Including the hash value itself in each node trades memory for ease of use, and reduces the number of full comparisons needed when a pointer chain grows long.many software implementations of trees in general, when manipulated in RAM, are a lot less efficient than expected from the algorithms due to accesses all over the place that lead to many cache misses.In theory, that doesn't matter, because you're only affecting K in the overall t = K * f(N) performance equation. If your N really justifies going from a log(N) algorithm to log(log(N)) (Tango Trees, say. No, wait - that's the "competitive ratio", a term I don't understand), then cache misses are going to need to be REALLY REALLY EXPENSIVE before the improvement goes away. I mean, for 264 items, log2(n) is 64 and log2(log2(n)) is ... 6. But going from the O(N) to O(log(N)) is going to pretty clearly be a huge win, even with ~64 "cache misses", compared to 264 "perfect" memory accesses.
A valuable lesson about modern CPUs is, computation is almost for free.Unless the computation is mobile, where every unnecessary instruction drains the battery a little more.
This was the era of spinning disk hard drives, and I/O speeds were much slower than they are today with SSD drives and multi-gigabyte RAM sizes (making many workloads completely cacheable in RAM). Reading the input was the clear bottleneck. If you first read the input to memory, then sort them, then you wait for all I/O to complete before you start computation (sorting). If you read the lines into a sorting data structure like a tree, you essentially do the computation while I/O is still underway; and your sort is basically complete, when I/O completes. This means that the read-then-sort takes much longer, using real-world wall-clock measurement, than reading the lines into a tree; even if the read-to-tree uses somewhat more CPU time.
Quotemany software implementations of trees in general, when manipulated in RAM, are a lot less efficient than expected from the algorithms due to accesses all over the place that lead to many cache misses.In theory, that doesn't matter, because you're only affecting K in the overall t = K * f(N) performance equation. If your N really justifies going from a log(N) algorithm to log(log(N)) (Tango Trees, say. No, wait - that's the "competitive ratio", a term I don't understand), then cache misses are going to need to be REALLY REALLY EXPENSIVE before the improvement goes away.
A valuable lesson about modern CPUs is, computation is almost for free.Unless the computation is mobile, where every unnecessary instruction drains the battery a little more.
It's all in the break-even point for N for different algorithms on a given platform.Well, sure. But if you're contemplating moving from an N2 algorithm to an N*log(N) algorithm because N is getting "big", you probably don't need to immediately worry that you'll end up with an Nlog(N) algorithm that is sub-optimal due to cache locality issues... (What's that saying?: "best is the enemy of better")
Mobile, shmobile. Who the fuck cares about mobile, except for people wanting to make a phone call or people looking to cultivate an attention deficit disorder? Yeah yeah, phone faster than some old chippie in 1960's rocket, got it. Still too slow!A valuable lesson about modern CPUs is, computation is almost for free.Unless the computation is mobile, where every unnecessary instruction drains the battery a little more.
But orders of magnitude less than an unnecessary memory load, especially from actual RAM.
A large list of primes on which you want to do fast search operations is also a nice example of where you would like log(log(n)) instead of log(n). For ~ 232 entries a regular binary tree takes ~ 32 operations to do a search. If you can do that in log(log(n)) operations you go from 32 to 5. A factor of 6 in runtime can be sufficient motivation to use something a little more complex.
A large list of primes on which you want to do fast search operations is also a nice example of where you would like log(log(n)) instead of log(n). For ~ 232 entries a regular binary tree takes ~ 32 operations to do a search. If you can do that in log(log(n)) operations you go from 32 to 5. A factor of 6 in runtime can be sufficient motivation to use something a little more complex.
If you search primes within 32-bit numbers, you have only 2G numbers to test (after eliminating even numbers which are not primes). So, you only need 2G-bit lookup table - 256MBytes of memory. Today, this is not much by any means. 4GByte lookup table will give you all primes in 40-bit numbers. And this is still quite modest amount of RAM by today's standards. One lookup and you're done. That is how you get things done when you have lots of resources, not by applying older algorithms which were designed to circumvent resource scarcity.
Yeah, my bad. I could have typed a bit more to be more clear. With "a tree with ~ 232 entries" I meant a tree with exactly 232 -1 unique entries that are prime numbers. So a binary tree with N=4294967295 nodes, each node being a prime number, and each number being unique. When viewed as a sorted list, the numbers are not required to be consecutive primes, nor is the smallest number required to be 2. Whatever set is convenient for the problem at hand.A large list of primes on which you want to do fast search operations is also a nice example of where you would like log(log(n)) instead of log(n). For ~ 232 entries a regular binary tree takes ~ 32 operations to do a search. If you can do that in log(log(n)) operations you go from 32 to 5. A factor of 6 in runtime can be sufficient motivation to use something a little more complex.
If you search primes within 32-bit numbers, you have only 2G numbers to test (after eliminating even numbers which are not primes). So, you only need 2G-bit lookup table - 256MBytes of memory. Today, this is not much by any means. 4GByte lookup table will give you all primes in 40-bit numbers. And this is still quite modest amount of RAM by today's standards. One lookup and you're done. That is how you get things done when you have lots of resources, not by applying older algorithms which were designed to circumvent resource scarcity.
In some cases, there are also ways to circumvent the need and translate it to something else. For instance, here, you may actually not NEED to test any integer number being prime or not. It's sometimes possible to transform your approach by generating prime numbers instead of having to test them, something that's much faster than testing for primality. You sometimes need to think outside the box.Absolutely. Transforming a problem into another form is my favorite way of solving problems. It's also the main reason (or so I tell myself ;D) why I try to solve toy problems and little puzzles. Not so much for the actual solution, but more as a good way to learn different ways of solving various problems. For example Project Euler (https://projecteuler.net/) has a nice collection. And after solving a particular problem you can take a look on the forum to see how other people did it. Some of it is "Yeah yeah, I did that too", or "Hah, my way is better!", but there is also plenty of "Doh! Why didn't I think of that?".
For example Project Euler (https://projecteuler.net/) has a nice collection. And after solving a particular problem you can take a look on the forum to see how other people did it. Some of it is "Yeah yeah, I did that too", or "Hah, my way is better!", but there is also plenty of "Doh! Why didn't I think of that?".
Anyways, the main point being: "4294967295 unique prime numbers", as opposed to "all prime numbers below 4294967296".
And while we're at it ... a 4 GByte LUT as described (eliminate even numbers, keep track of the rest) unfortunately only gets you up to 36-bit numbers, not 40-bit numbers.
That said, I agree that you should use the resources available, and not use old shit that is no longer relevant. Unfortunately infinity is fucking big, even if it is countable.
I just registered for some fun. Ran into some issue with the first problem though: https://projecteuler.net/problem=1Just solved it, so seems to check out fine for me. Mainly two sanity checks:
I double and triple-checked my answer, and the site still thinks it's erroneous. Could you check? Maybe I'm just tired. ::)
Anyways, seems to works fine. Maybe just add some coffee. ;D
[SPOILERS]
.
.
.
.
.
.
.
.
.
function summultiples(below) {
function triangular(n) { return (n*n + n) / 2; };
below--;
var threes = triangular(Math.floor(below / 3)) * 3;
var fives = triangular(Math.floor(below / 5)) * 5;
var fifteens = triangular(Math.floor(below / 15)) * 15;
return threes + fives - fifteens;
}
long summultiples(long below)
{
long i, sum=0;
for (i=3; i < below; i+=3)
sum += i;
for (i=5; i < below; i+=5)
sum += i;
for (i=15; i < below; i+=15)
sum -= i;
return sum;
}a 4-byte access for basically every statement, that's a big stinker right there.Well, yeah. But summultiples(1000) doesn't fit in 16bits...
divisions don't matter, they are by constants so can be transformed to clever multiplications by a sufficiently clever compiler.That'd be nice. It doesn't seem to happen with either the ARM or AVR gcc compilers, though. At least, not for 3, 5, and 15.
Ah. the ARM gcc DOES do this on the m4 (where it barely matters.)But not on the M0plus. Presumably it's upset that there is no 32*32->64bit multiply. Hmmph.divisions can be transformed to clever multiplicationsThat'd be nice. It doesn't seem to happen with either the ARM or AVR gcc compilers, though.
(...)(...)
- me - something like (... I pulled out my PDA from my pocket, and wrote the code to show him the result ... ) qsort([]) -> []; qsort([Pivot|Rest]) -> qsort([ X || X <- Rest, X < Pivot]) ++ [Pivot] ++ qsort([ Y || Y <- Rest, Y >= Pivot])
One bleeding-edge algorithm quite relevant to electronics engineering that I find is not very well known is "compressive sensing".Compressive sensing is an interesting concept. It has been discussed on this forum a few times.
Does anyone have any recommendation for a rather fast single precision float logarithm for ARM? (M4, M7 cores with FPU)?
I don't care whether log natural or log2. I take the faster one! (Need for some a real time audio dynamic processing). Guessing log2 shall be easiest of them due to the float nature of storing the mantissa and exponent separately, taking the log2 just of the mantissa and add the exponent.
Does anyone have any recommendation for a rather fast single precision float logarithm for ARM? (M4, M7 cores with FPU)?
I don't care whether log natural or log2. I take the faster one! (Need for some a real time audio dynamic processing). Guessing log2 shall be easiest of them due to the float nature of storing the mantissa and exponent separately, taking the log2 just of the mantissa and add the exponent.
static inline uint8_t Log2Int(uint32_t n)
{
return (uint8_t)(8*sizeof(n) - 1 - ((uint8_t) __builtin_clzl(n)));
}
// Binary log2(n) with nFrac fractional bits.
// Assumes: n >= 1
//
float BinaryLog2b(uint32_t n, uint8_t nFrac)
{
const uint32_t n1 = 1;
uint8_t nLog2, i;
float x, y, b;
nLog2 = Log2Int(n);
x = ((float) n) / ((float)(n1 << nLog2));
y = (float) nLog2;
for (i = 0, b = 0.5f; i < nFrac; i++, b *= 0.5f)
{
x = x * x;
if (x >= 2.0f)
{
y += b;
x *= 0.5f;
}
}
return y;
}
Testing is absolutely vital and you need to throw lots of different data at an algorithm to try and catch it out.
Testing is absolutely vital and you need to throw lots of different data at an algorithm to try and catch it out.