If you need to conserve flash, use
#include <avr/io.h>
#include <avr/pgmspace.h>
const unsigned char highest_bit_table[256] PROGMEM = {
8,0,1,8,2,8,8,8,3,8,8,8,8,8,8,8,4,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,5,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
6,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,
};
unsigned char highest_bit_set(unsigned char c)
{
if (c & 128)
return 7;
else
return pgm_read_byte_near(highest_bit_table + c);
}
which takes 148 bytes of flash, and 5 (if highest bit is set) or 9 cycles (otherwise).
Note that if the inputs are uniformly random, the average is the same (7 cycles).
To trade speed for less flash use, use
#include <avr/io.h>
#include <avr/pgmspace.h>
const unsigned char highest_bit_table_3[16] PROGMEM = {
0x88, 0x40, 0x51, 0x88, 0x62, 0x88, 0x88, 0x88, 0x73, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88, 0x88,
};
unsigned char highest_bit_set_3(unsigned char c)
{
if (c & 0xF0) {
if (c & 0x0F)
return 8;
else
return pgm_read_byte_near(highest_bit_table_3 + (c >> 4)) >> 4;
} else
return pgm_read_byte_near(highest_bit_table_3 + c) & 0x0F;
}
which takes 65 bytes of flash, and between 8 and 18 cycles.
If you don't have even that much flash available, use the loop approach, but with a nibble swap since AVR's support that in hardware:
unsigned char highest_bit_set(unsigned char c)
{
unsigned char result;
if (c & 0xF0) {
c >>= 4;
result = 4;
} else {
result = 0;
}
while (c) {
c >>= 1;
result++;
}
return result;
}
Pretty sure the first conditions on these 3 solutions can be changed to comparisons (c > 0x0F), which is non destructive (doesn't require a temporary register to compute) so would save a move instruction.
And a 16 byte lookup table on the last solution would be neat.