So, the challenge got to me, and I spent the night learning AVRGCC and wrote a simple WS2812b driver - it is all in one file, and checking timing on the scope they are with +/- 50 ns.
Here's the code - it is not the most beautiful or compact code out there, but it does weigh in at about 300 bytes.:
/******************************************
* ws2812b_demo.c
*
* Author : Mike Field <hamster@snap.net.nz<
*
* A really simple driver for WS2812B LEDs for
* the Arduino Uno. Must be running at 16MHz.
*
*********************************************/
#define OUTPUT_PIN 12
unsigned char leds[5][3];
void outputWS2812Bbytes(unsigned char (*leds)[3], unsigned char length)
{
delay(1);
asm volatile(
" cli \n\t" // Disable interrupts
" mov 18,%1\n\t" // Copy length
" add %1, 18\n\t" // Add it back
" add %1, 18\n\t" // Add it back, so it is now x3 what it was
"L_next%=:" "\n\t"
///////////////////////////////////////////////////////////
// Bit 7
" SBI 5, 4 \n\t" // Set port b bit 4 Arduino pin 12
" NOP \n\t" // A pause
" LD 18, Z \n\t" // Load and post increment - two cycles
" ANDI 18, 128 \n\t" // Test the bit - one cycle
" BRNE L_bit7%= \n\t" // Skip the clear if the bit is set
" CBI 5, 4 \n\t" // clear port b bit 4
"L_bit7%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" cbi 5, 4 \n\t" // clear port b bit 4
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
///////////////////////////////////////////////////////////
// Bit 6
" SBI 5, 4 \n\t" // Set port b bit 4 Arduino pin 12
" NOP \n\t" // A pause
" LD 18, Z \n\t" // Load and post increment - two cycles
" ANDI 18, 64 \n\t" // Test the bit - one cycle
" BRNE L_bit6_0_%= \n\t" // Skip the clear if the bit is set
" CBI 5, 4 \n\t" // clear port b bit 4
" JMP L_bit6_1_%= \n\t"
"L_bit6_0_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
"L_bit6_1_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" cbi 5, 4 \n\t" // clear port b bit 4
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
///////////////////////////////////////////////////////////
// Bit 5
" SBI 5, 4 \n\t" // Set port b bit 4 Arduino pin 12
" NOP \n\t" // A pause
" LD 18, Z \n\t" // Load and post increment - two cycles
" ANDI 18, 32 \n\t" // Test the bit - one cycle
" BRNE L_bit5_0_%= \n\t" // Skip the clear if the bit is set
" CBI 5, 4 \n\t" // clear port b bit 4
" JMP L_bit5_1_%= \n\t"
"L_bit5_0_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
"L_bit5_1_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" cbi 5, 4 \n\t" // clear port b bit 4
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
///////////////////////////////////////////////////////////
// Bit 4
" SBI 5, 4 \n\t" // Set port b bit 4 Arduino pin 12
" NOP \n\t" // A pause
" LD 18, Z \n\t" // Load and post increment - two cycles
" ANDI 18, 16 \n\t" // Test the bit - one cycle
" BRNE L_bit4_0_%= \n\t" // Skip the clear if the bit is set
" CBI 5, 4 \n\t" // clear port b bit 4
" JMP L_bit4_1_%= \n\t"
"L_bit4_0_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
"L_bit4_1_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" cbi 5, 4 \n\t" // clear port b bit 4
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
///////////////////////////////////////////////////////////
// Bit 3
" SBI 5, 4 \n\t" // Set port b bit 4 Arduino pin 12
" NOP \n\t" // A pause
" LD 18, Z \n\t" // Load and post increment - two cycles
" ANDI 18, 8 \n\t" // Test the bit - one cycle
" BRNE L_bit3_0_%= \n\t" // Skip the clear if the bit is set
" CBI 5, 4 \n\t" // clear port b bit 4
" JMP L_bit3_1_%= \n\t"
"L_bit3_0_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
"L_bit3_1_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" cbi 5, 4 \n\t" // clear port b bit 4
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
///////////////////////////////////////////////////////////
// Bit 2
" SBI 5, 4 \n\t" // Set port b bit 4 Arduino pin 12
" NOP \n\t" // A pause
" LD 18, Z \n\t" // Load and post increment - two cycles
" ANDI 18, 4 \n\t" // Test the bit - one cycle
" BRNE L_bit2_0_%= \n\t" // Skip the clear if the bit is set
" CBI 5, 4 \n\t" // clear port b bit 4
" JMP L_bit2_1_%= \n\t"
"L_bit2_0_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
"L_bit2_1_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" cbi 5, 4 \n\t" // clear port b bit 4
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
///////////////////////////////////////////////////////////
// Bit 1
" SBI 5, 4 \n\t" // Set port b bit 4 Arduino pin 12
" NOP \n\t" // A pause
" LD 18, Z \n\t" // Load and post increment - two cycles
" ANDI 18, 2 \n\t" // Test the bit - one cycle
" BRNE L_bit1_0_%= \n\t" // Skip the clear if the bit is set
" CBI 5, 4 \n\t" // clear port b bit 4
" JMP L_bit1_1_%= \n\t"
"L_bit1_0_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
"L_bit1_1_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" cbi 5, 4 \n\t" // clear port b bit 4
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
///////////////////////////////////////////////////////////
// Bit 0
" SBI 5, 4 \n\t" // Set port b bit 4 Arduino pin 12
" NOP \n\t" // A pause
" LD 18, Z+ \n\t" // Load and post increment - two cycles
" ANDI 18, 1 \n\t" // Test the bit - one cycle
" BRNE L_bit0_0_%= \n\t" // Skip the clear if the bit is set
" CBI 5, 4 \n\t" // clear port b bit 4
" JMP L_bit0_1_%= \n\t"
"L_bit0_0_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" NOP \n\t" // A pause
"L_bit0_1_%=:" "\n\t"
" NOP \n\t" // A pause
" NOP \n\t" // A pause
" cbi 5, 4 \n\t" // clear port b bit 4
" SUBI %1,1 \n\t" // Decrement the count
" BREQ L_done%= \n\t" // Exit when finished
" JMP L_next%= \n\t" // Jump back if we still have more to do
// All Finshed
"L_done%=: \n\t"
" sei \n\t"
: : "z" (leds), "d" (length));
}
void setup() {
// put your setup code here, to run once:
pinMode(OUTPUT_PIN,OUTPUT);
}
void loop() {
// put your main code here, to run repeatedly:
leds[0][0] = 0x3F;
leds[0][1] = 0x3F;
leds[0][2] = 0x3F;
leds[1][0] = 0x6F;
leds[1][1] = 0;
leds[1][2] = 0;
leds[2][0] = 0;
leds[2][1] = 0x6F;
leds[2][2] = 0;
leds[3][0] = 0;
leds[3][1] = 0;
leds[3][2] = 0x6F;
leds[4][0] = 0x3F;
leds[4][1] = 0x3F;
leds[4][2] = 0x3F;
outputWS2812Bbytes(leds,5);
}