EDIT: the first version produced digits in the opposite order, i.e. least significant digit leftmost, not rightmost. Fixed.
ultoa() etc. are typically based on a divide-by-ten (or divide-by-radix) approach, i.e.
const char digit[] = "0123456789abcdefghijklmnopqrstuvwxyz";
char *ultoa(unsigned long val, char *dst, int radix)
{
char *d = dst;
do {
*(d++) = digit[val % radix];
val /= radix;
} while (val);
*d = '\0';
// Swap digit order
{
char *b = dst;
char *q = d;
while (b < q) {
char *cb = *b;
char *cq = *(--q);
*(b++) = cq;
*q = cb;
}
}
return d;
}
Here are equivalent 32-bit unsigned and signed integer versions, using a 32-bit:8-bit division with remainder (instead of the ABI __udivmodsi4 32-bit:32-bit division with remainder):
#include <stdint.h>
typedef struct {
uint8_t q;
uint8_t r;
} div8bit;
div8bit divmod8bit_loop(uint8_t r, uint8_t b, const uint8_t v, const uint8_t d)
{
uint8_t q = 0;
while (b) {
r <<= 1;
if (b & v)
r++;
if (r >= d) {
r -= d;
q |= b;
}
b >>= 1;
}
return (div8bit){ .q = q, .r = r };
}
static uint8_t highbit(uint8_t v)
{
uint8_t b = 0x80;
while (!(b & v))
b >>= 1;
return b;
}
// Divide *vptr by d, d >= 1, d < 128. Returns remainder.
uint8_t divmod8bit(uint32_t *const vptr, const uint8_t d)
{
const uint32_t val = *vptr;
if (!val)
return 0;
uint32_t q = 0;
uint8_t b, w;
div8bit t = { .q = 0, .r = 0 };
if ((w = (uint8_t)(val >> 24))) {
b = highbit(w);
goto b3;
} else
if ((w = (uint8_t)(val >> 16))) {
b = highbit(w);
goto b2;
} else
if ((w = (uint8_t)(val >> 8))) {
b = highbit(w);
goto b1;
} else {
b = highbit((uint8_t)(val));
goto b0;
}
b3:
t = divmod8bit_loop(t.r, b, (uint8_t)(val >> 24), d);
q |= (uint32_t)(t.q) << 24;
b = 0x80;
b2:
t = divmod8bit_loop(t.r, b, (uint8_t)(val >> 16), d);
q |= (uint32_t)(t.q) << 16;
b = 0x80;
b1:
t = divmod8bit_loop(t.r, b, (uint8_t)(val >> 8), d);
q |= (uint32_t)(t.q) << 8;
b = 0x80;
b0:
t = divmod8bit_loop(t.r, b, (uint8_t)(val), d);
q |= (uint32_t)(t.q);
*vptr = q;
return t.r;
}
char *u32_str(char *dst, uint32_t val, const char *base, uint8_t radix)
{
uint32_t v = val;
char *d = dst;
uint8_t n = 0;
do {
*(d++) = base[divmod8bit(&v, radix)];
n++;
} while (v);
*d = '\0';
// Swap digit order
n >>= 1;
{
char *b = dst;
char *q = d;
while (n-->0) {
const char cb = *b;
const char cq = *(--q);
*(b++) = cq;
*(q) = cb;
}
}
return d;
}
char *i32_str(char *dst, int32_t val, const char *base, uint8_t radix)
{
if (val < 0) {
*(dst++) = '-';
return u32_str(dst, -val, base, radix);
} else
return u32_str(dst, val, base, radix);
}
u32_str(dst, value, "0123456789", 10) has been verified to produce correct results for all 32-bit unsigned integer values.
In addition to the radix characters string, divmod8bit() and divmod8bit_loop() take 346 bytes, u32_str() 194 bytes, and i32_str() additional 38 bytes, for a total of 572 bytes, using avr-gcc-5.4.0 -Os -mmcu=atmega328p.
Standard 32-bit:32-bit unsigned integer binary division with remainder can be written as
#include <stdint.h>
typedef struct {
uint32_t quotient;
uint32_t remainder;
} u32_qr;
u32_qr u32_divmod(uint32_t n, uint32_t d)
{
// Check for division by zero.
if (!d)
return (u32_qr){ .quotient = 0, .remainder = 0 };
// Zero numerator must be handled separately. We combine all n < d cases here.
if (n < d)
return (u32_qr){ .quotient = 0, .remainder = n };
// Find highest bit set.
uint32_t bit = (uint32_t)(1) << 31;
while (!(bit & n))
bit >>= 1;
// One iteration for each bit left.
u32_qr result = { .quotient = 0, .remainder = 0 };
while (bit) {
result.remainder <<= 1;
result.remainder |= !!(n & bit); // 1 if bit set, 0 otherwise.
if (result.remainder >= d) {
result.remainder -= d;
result.quotient |= bit;
}
bit >>= 1;
}
return result;
}
Because remainder is at most one bit larger than divisor, and the divisor in these radix calculations is always less than 128, the AVR version of divmod8bit() splits the calculation into four steps, each step working on a single byte. It should speed up the division, without increasing the function size much.