rANS compression algorithm - can you identify the bug(s) in this implementation? [closed]

2 weeks ago 18
ARTICLE AD BOX

I have a custom compression library for some proprietary data. Part of what the encoder needs to store for the decoder is a long, sparse bit array (where one-bits are much rarer than zero-bits).

I currently use Arithmetic encoding, which works, but I thought I could do better. AI suggested rANS Duda with the implementation below. However, it doesn't work. The paper is beyond my level of math comprehension, can someone point out the errors?

//Common declarations: static class Rans { public const uint L = 1u << 23; // Lower bound of state public const int SCALE_BITS = 12; // TOTAL = 4096 public const uint TOTAL = 1u << 12; // 4096 } struct BitModel { public readonly uint freq0; public readonly uint freq1; public BitModel(uint count0, uint count1) { double p0 = (double)count0 / (count0 + count1); freq0 = Math.Max(1, (uint)(p0 * Rans.TOTAL)); freq1 = Rans.TOTAL - freq0; } } //Encoder: static class RansEncoder { public static byte[] Encode(string bits, uint count0, uint count1) { var model = new BitModel(count0, count1); uint x = Rans.L; var output = new List<byte>(); for (int i = bits.Length - 1; i >= 0; i--) { char s = bits[i]; uint freq = (s == '0') ? model.freq0 : model.freq1; uint cum = (s == '0') ? 0u : model.freq0; while (x >= Rans.L * model.total) { output.Add((byte)(x & 0xFF)); x >>= 8; } x = (x / freq) * model.total + (x % freq) + cum; } for (int i = 0; i < 4; i++) { output.Add((byte)(x & 0xFF)); x >>= 8; } return output.ToArray(); } } //Decoder: static class RansDecoder { public static string Decode(byte[] data, int bitCount, uint count0, uint count1) { var model = new BitModel(count0, count1); int idx = data.Length - 1; uint x = 0; while (idx >= 0 && x < Rans.L) { x = (x << 8) | data[idx--]; } char[] output = new char[bitCount]; for (int i = 0; i < bitCount; i++) { uint v = x & (Rans.TOTAL - 1); bool isZero; uint freq, cum; if (v < model.freq0) { isZero = true; freq = model.freq0; cum = 0; } else { isZero = false; freq = model.freq1; cum = model.freq0; } output[i] = isZero ? '0' : '1'; x = freq * (x >> Rans.SCALE_BITS) + (v - cum); while (x < Rans.L && idx >= 0) { x = (x << 8) | data[idx--]; } } return new string(output); } } //Driver code: var bits = "0110001000100010001000100010001000100010001000100010001000100010001000100010"; uint count0 = (uint)bits.Count(c => c == '0'); uint count1 = (uint)bits.Length - count0; var e = RansEncoder.Encode(bits, count0, count1); var d = RansDecoder.Decode(e, bits.Length, count0, count1); Console.WriteLine(bits); Console.WriteLine(d);
Read Entire Article