Here is some working code in C for BCD fp mul:
I compiled it and tested in avr, arm and pic32 (mips). The given time is for mips. It encodes one BCD (base-10) per byte. I went for base 100 because you get the same speed for two times the number of digits.
#ifdef SMALL
#define BCD_MAXDIGITS 8
#define BCD_MAXWIDTH 13
#define BCD_MAX_EXP 99
#define BCD_MIN_EXP -99
typedef int8_t exp_t;
#else
#define BCD_MAXDIGITS 20 // we should safely get 16 decimal places
#define BCD_MAXWIDTH 24
#define BCD_MAX_EXP 999
#define BCD_MIN_EXP -999
typedef int16_t exp_t;
#endif
#define BCD_LASTDIGIT (BCD_MAXDIGITS-1)
typedef struct {
uint8_t sign;
exp_t dexp;
uint8_t frac[BCD_MAXDIGITS];
} T_SBCDREAL;
_CNT uint8_t mullow[][16] = { { 0 },
{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 },
{ 0, 2, 4, 6, 8, 0, 2, 4, 6, 8 },
{ 0, 3, 6, 9, 2, 5, 8, 1, 4, 7 },
{ 0, 4, 8, 2, 6, 0, 4, 8, 2, 6 },
{ 0, 5, 0, 5, 0, 5, 0, 5, 0, 5 },
{ 0, 6, 2, 8, 4, 0, 6, 2, 8, 4 },
{ 0, 7, 4, 1, 8, 5, 2, 9, 6, 3 },
{ 0, 8, 6, 4, 2, 0, 8, 6, 4, 2 },
{ 0, 9, 8, 7, 6, 5, 4, 3, 2, 1 } };
_CNT uint8_t mulhigh[][16] = { { 0 },
{ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 },
{ 0, 0, 0, 0, 0, 1, 1, 1, 1, 1 },
{ 0, 0, 0, 0, 1, 1, 1, 2, 2, 2 },
{ 0, 0, 0, 1, 1, 2, 2, 2, 3, 3 },
{ 0, 0, 1, 1, 2, 2, 3, 3, 4, 4 },
{ 0, 0, 1, 1, 2, 3, 3, 4, 4, 5 },
{ 0, 0, 1, 2, 2, 3, 4, 4, 5, 6 },
{ 0, 0, 1, 2, 3, 4, 4, 5, 6, 7 },
{ 0, 0, 1, 2, 3, 4, 5, 6, 7, 8 } };
/**
* Multiplies 2 BCD numbers
* it can use the same number as argument and result
* 99.99999999*99.99999999 takes 1833*2 with -O3
*/
void t_bcd_mul(T_SBCDREAL *r, T_SBCDREAL *a, T_SBCDREAL *b)
{
int8_t i, j, k, y, c;
uint8_t frac[BCD_MAXDIGITS*2];
uint8_t *xl, *xh;
r->dexp = a->dexp + b->dexp;
r->sign = a->sign ^ b->sign;
for (i = 0; i < BCD_MAXDIGITS+BCD_MAXDIGITS; i++) frac[i] = 0;
for (i = BCD_MAXDIGITS-1; i >= 0; i--)
{
j = b->frac[i];
if (j) {
xl = (uint8_t *)&mullow[j][0];
xh = (uint8_t *)&mulhigh[j][0];
k = BCD_MAXDIGITS + i;
c = 0;
for (j = BCD_MAXDIGITS-1; j >= 0; j--, k--) {
y = *(xl+a->frac[j]) + c + frac[k];
c = *(xh+a->frac[j]);
if (y >= 10)
{
c++;
y -= 10;
}
frac[k] = y;
}
frac[k] = c; // last carry
}
}
if (c) { // MSD is the carry
for (i = 0; i < BCD_MAXDIGITS; i++)
r->frac[i] = frac[i];
r->dexp++;
r->frac[0] = c;
}
else
for (i = 0; i < BCD_MAXDIGITS; i++)
r->frac[i] = frac[i+1];
}