Hmmm, continuing work on my encoder webpage, adding a decode verification step -- seem to be having issues. Not entirely sure it's with the codec, getting weird results from my Huffman code too. (The packed and unpacked "codecs" work just fine, but they're almost trivial, I'm not surprised.)
Here is the ported code:
- It's structured much the same as the original, one decoded byte in/out per call, encoded channel output/input on demand. In this case the functions are packed into objects to easily swap them out; just imagine this was Java or C++ with an interface implemented by classes.
- Encoding seems to work, I didn't check if the arithmetic coding is bit-accurate to sample data (or to the original C, which I suppose I should just get off my ass and compile natively already..), but my Huffman code at least is. So I seem to be having decode errors there, and it may be the same error for both?
- I'm particularly suspicious of the initializer step here (copied from the original; note "coder" easily ports to "state", and "->" to "."),
for (int i = 0; i < 16; i++)
coder->value = (coder->value << 1) | input_bit(coder);
which unconditionally reads in 16 bits of input data, which seems dangerous when the stream is especially short. And indeed, in the per-row per-character packing, I'm seeing encoded streams as short as two bytes! Should this be considered a bug?
/**
* Encoder/decoder function pointers.
* Assign a set of start (initializer), step (input a byte, outputs
* a byte whenever) and end (finalizer, flush, etc.) functions to
* implement an encoding method.
*
* Properties:
*
* encodeStart(state, symbols)
* decodeStart(state, symbols)
* Initializes state.
* state: the state vector. An empty Object is passed in.
* Suggest assigning properties to it.
* symbols: an array of {val, freq} objects.
* val: palette index / pixel value
* freq: the number of times it occurs in the image
* Dividing by the total gets PDF, incrementally summing
* them gets CDF. Use to calculate weights, entropy, etc.
* Note that these data must be outputted with the encoded data,
* for any encoder that uses them.
*
* encodeStep(input, state, output)
* decodeStep(input, state)
* Encoder: processes one symbol, or input byte, using and mutating
* existing state, optionally outputting byte(s) to the output
* array using a output.push() call.
* Decoder: input is an iterator, to draw bytes from as needed.
* Returns one output byte/symbol per call.
*
* encodeEnd(state, output)
* Finalizes the encoder state, flushing any remaining data
* to the output.
* decodeEnd is not needed: exactly the dimensions of the
* image are Step'd over, leaving no extra data.
*/
const codecArithAdapt = {
encodeStart: function(state, symbols) {
state.TOP_VALUE = 0xffff;
state.MAX_SCALE = 0x3fff;
state.FIRST_QTR = (Math.floor(state.TOP_VALUE / 4) + 1);
state.HALF = (2 * state.FIRST_QTR);
state.THIRD_QTR = (3 * state.FIRST_QTR);
state.ACODER_N = symbols.length;
state.low = 0;
state.high = state.TOP_VALUE;
state.pending = 0;
state.byte = 0;
state.bit = 0;
state.cdf = [];
for (var i = 0; i < state.ACODER_N + 1; i++) {
state.cdf.push(i);
}
},
encodeStep: function(input, state, output) {
var range = state.high - state.low + 1;
state.high = Math.floor(state.low + (range * state.cdf[input + 1]) / state.cdf[state.ACODER_N] - 1);
state.low = Math.floor(state.low + (range * state.cdf[input]) / state.cdf[state.ACODER_N]);
while (1) {
if (state.high < state.HALF) {
codecArithAdapt_output_bit_and_pending(state, output, 0);
} else if (state.low >= state.HALF) {
codecArithAdapt_output_bit_and_pending(state, output, 1);
state.low -= state.HALF;
state.high -= state.HALF;
} else if (state.low >= state.FIRST_QTR && state.high < state.THIRD_QTR) {
state.pending++;
state.low -= state.FIRST_QTR;
state.high -= state.FIRST_QTR;
} else
break;
state.low = state.low * 2;
state.high = state.high * 2 + 1;
}
codecArithAdapt_model_update(state, input);
},
encodeEnd: function(state, output) {
state.pending++;
if (state.low < state.FIRST_QTR)
codecArithAdapt_output_bit_and_pending(state, output, 0);
else
codecArithAdapt_output_bit_and_pending(state, output, 1);
output.push(state.byte);
},
decodeStart: function(state, symbols) {
state.TOP_VALUE = 0xffff;
state.MAX_SCALE = 0x3fff;
state.FIRST_QTR = (Math.floor(state.TOP_VALUE / 4) + 1);
state.HALF = (2 * state.FIRST_QTR);
state.THIRD_QTR = (3 * state.FIRST_QTR);
state.ACODER_N = symbols.length;
state.low = 0;
state.high = state.TOP_VALUE;
state.value = 0;
state.bit = 7;
state.firstRun = true;
state.cdf = [];
for (var i = 0; i < state.ACODER_N + 1; i++) {
state.cdf.push(i);
}
},
decodeStep: function(input, state) {
// Moved from decodeStart because it's not passed any input
if (state.firstRun) {
for (var i = 0; i < 16; i++)
state.value = (state.value << 1) | codecArithAdapt_input_bit(state, input);
state.firstRun = false;
}
var range = state.high - state.low + 1;
var val = Math.floor(((state.value - state.low + 1) * state.cdf[state.ACODER_N] - 1) / range);
var b;
for (b = state.ACODER_N; val < state.cdf[b]; b--);
state.high = Math.floor(state.low + (range * state.cdf[b + 1]) / state.cdf[state.ACODER_N] - 1);
state.low = Math.floor(state.low + (range * state.cdf[b]) / state.cdf[state.ACODER_N]);
while (1) {
if (state.high < state.HALF) {
// Do nothing
} else if (state.low >= state.HALF) {
state.value -= state.HALF;
state.low -= state.HALF;
state.high -= state.HALF;
} else if (state.low >= state.FIRST_QTR && state.high < state.THIRD_QTR) {
state.value -= state.FIRST_QTR;
state.low -= state.FIRST_QTR;
state.high -= state.FIRST_QTR;
} else {
break;
}
state.low = state.low * 2;
state.high = state.high * 2 + 1;
state.value = (state.value << 1) | codecArithAdapt_input_bit(state, input);
}
codecArithAdapt_model_update(state, b);
return b;
}
};
// Helper functions
function codecArithAdapt_output_bit_and_pending(state, output, bit) {
codecArithAdapt_output_bit(state, output, bit);
for (; state.pending; state.pending--) {
codecArithAdapt_output_bit(state, output, !bit);
}
}
function codecArithAdapt_output_bit(state, output, value) {
state.byte |= (value << state.bit);
if (8 == ++state.bit) {
output.push(state.byte);
state.byte = 0;
state.bit = 0;
}
}
var consumedBytes = 0;
function codecArithAdapt_input_bit(state, input) {
var res;
state.bit++;
if (8 == state.bit) {
state.byte = input.next().value[1];
consumedBytes++;
state.bit = 0;
}
res = state.byte & 1;
state.byte >>>= 1;
return res;
}
function codecArithAdapt_model_update(state, input) {
var last = 0;
var value = 0;
if (state.cdf[state.ACODER_N] == state.MAX_SCALE) {
for (var i = 0; i < state.ACODER_N; i++) {
value = state.cdf[i + 1] - last;
last = state.cdf[i + 1];
state.cdf[i + 1] = state.cdf[i] + (value + 1) / 2;
}
}
for (var i = input; i < state.ACODER_N; i++) {
state.cdf[i + 1]++;
}
}
Typical usage is like so; variables should be fairly self-explanatory?
...
var codec = codecArithAdapt;
var encodedChars = [];
...
// Blocks, column first
for (var i = 0; i < charStarts.length; i++) {
encodedChars.push([]);
var encoderState = new Object();
codec.encodeStart(encoderState, palette);
for (var x = 0; x < charWidths[i]; x++) {
for (var y = 0; y < imgHeight; y++) {
// encodeStep(input, state, output)
codec.encodeStep(imgData[x + charStarts[i] + y * imgWidth], encoderState, encodedChars[i]);
}
}
codec.encodeEnd(encoderState, encodedChars[i]);
}