|
const Decompress = @This(); const std = @import("std"); const assert = std.debug.assert; const Reader = std.io.Reader; const Limit = std.io.Limit; const zstd = @import("../zstd.zig"); const Writer = std.io.Writer; |
OptionsThe output buffer is asserted to have capacity for |
|
init()When connecting |
input: *Reader, reader: Reader, state: State, verify_checksum: bool, window_len: u32, err: ?Error = null, |
init()This could be improved so that when an amount is discarded that includes an entire frame, skip decoding that frame. |
const State = union(enum) { new_frame, in_frame: InFrame, skipping_frame: usize, end, |
FrameReturns the window size required to decompress a frame, or |
const InFrame = struct { frame: Frame, checksum: ?u32, decompressed_size: usize, decode: Frame.Zstandard.Decode, }; |
Kindthe (reversed) literal bitstream's first byte does not have any bits set |
}; |
kind()
|
pub const Options = struct { /// Verifying checksums is not implemented yet and will cause a panic if /// you set this to true. verify_checksum: bool = false, |
isSkippable()on the first call if one of the sequence FSE tables is set to repeat mode |
/// The output buffer is asserted to have capacity for `window_len` plus /// `zstd.block_size_max`. /// /// If `window_len` is too small, then some streams will fail to decompress /// with `error.OutputBufferUndersize`. window_len: u32 = zstd.default_window_len, |
Kindan FSE table has an invalid accuracy |
}; |
Zstandardfailed decoding an FSE table |
pub const Error = error{ BadMagic, BlockOversize, ChecksumFailure, ContentOversize, DictionaryIdFlagUnsupported, EndOfStream, HuffmanTreeIncomplete, InvalidBitStream, MalformedAccuracyLog, MalformedBlock, MalformedCompressedBlock, MalformedFrame, MalformedFseBits, MalformedFseTable, MalformedHuffmanTree, MalformedLiteralsHeader, MalformedLiteralsLength, MalformedLiteralsSection, MalformedSequence, MissingStartBit, OutputBufferUndersize, InputBufferUndersize, ReadFailed, RepeatModeFirst, ReservedBitSet, ReservedBlock, SequenceBufferUndersize, TreelessLiteralsFirst, UnexpectedEndOfLiteralStream, WindowOversize, WindowSizeUnknown, |
DecodeErrorinput stream ends before all FSE tables are read |
}; |
HeaderPrepare the decoder to decode a compressed block. Loads the
literals stream and Huffman tree from |
/// When connecting `reader` to a `Writer`, `buffer` should be empty, and /// `Writer.buffer` capacity has requirements based on `Options.window_len`. /// /// Otherwise, `buffer` has those requirements. pub fn init(input: *Reader, buffer: []u8, options: Options) Decompress { return .{ .input = input, .state = .new_frame, .verify_checksum = options.verify_checksum, .window_len = options.window_len, .reader = .{ .vtable = &.{ .stream = stream, .rebase = rebase, .discard = discard, .readVec = readVec, }, .buffer = buffer, .seek = 0, .end = 0, }, }; |
DecodeErrorRead initial FSE states for sequence decoding. |
} |
DecodeErrorTODO: don't use |
fn rebase(r: *Reader, capacity: usize) Reader.RebaseError!void { const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); assert(capacity <= r.buffer.len - d.window_len); assert(r.end + capacity > r.buffer.len); const discard_n = r.end - d.window_len; const keep = r.buffer[discard_n..r.end]; @memmove(r.buffer[0..keep.len], keep); r.end = keep.len; r.seek -= discard_n; |
readInitialFseState()TODO: don't use |
} |
windowSize()Decode one sequence from |
/// This could be improved so that when an amount is discarded that includes an /// entire frame, skip decoding that frame. fn discard(r: *Reader, limit: std.Io.Limit) Reader.Error!usize { const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); r.rebase(d.window_len) catch unreachable; var writer: Writer = .{ .vtable = &.{ .drain = std.Io.Writer.Discarding.drain, .sendFile = std.Io.Writer.Discarding.sendFile, }, .buffer = r.buffer, .end = r.end, }; defer { r.end = writer.end; r.seek = r.end; } const n = r.stream(&writer, limit) catch |err| switch (err) { error.WriteFailed => unreachable, error.ReadFailed => return error.ReadFailed, error.EndOfStream => return error.EndOfStream, }; assert(n <= @intFromEnum(limit)); return n; |
readInitialFseState()Decode |
} |
HeaderTODO: don't use |
fn readVec(r: *Reader, data: [][]u8) Reader.Error!usize { _ = data; const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); assert(r.seek == r.end); r.rebase(d.window_len) catch unreachable; var writer: Writer = .{ .buffer = r.buffer, .end = r.end, .vtable = &.{ .drain = Writer.fixedDrain }, }; r.end += r.vtable.stream(r, &writer, .limited(writer.buffer.len - writer.end)) catch |err| switch (err) { error.WriteFailed => unreachable, else => |e| return e, }; return 0; |
readInitialFseState()Frame uses a dictionary. |
} |
DecodeFrame does not have a valid window size. |
fn stream(r: *Reader, w: *Writer, limit: Limit) Reader.StreamError!usize { const d: *Decompress = @alignCast(@fieldParentPtr("reader", r)); const in = d.input; |
PrepareErrorWindow size exceeds |
switch (d.state) { .new_frame => { // Allow error.EndOfStream only on the frame magic. const magic = try in.takeEnumNonexhaustive(Frame.Magic, .little); initFrame(d, w.buffer.len, magic) catch |err| { d.err = err; return error.ReadFailed; }; return readInFrame(d, w, limit, &d.state.in_frame) catch |err| switch (err) { error.ReadFailed => return error.ReadFailed, error.WriteFailed => return error.WriteFailed, else => |e| { d.err = e; return error.ReadFailed; }, }; }, .in_frame => |*in_frame| { return readInFrame(d, w, limit, in_frame) catch |err| switch (err) { error.ReadFailed => return error.ReadFailed, error.WriteFailed => return error.WriteFailed, else => |e| { d.err = e; return error.ReadFailed; }, }; }, .skipping_frame => |*remaining| { const n = in.discard(.limited(remaining.*)) catch |err| { d.err = err; return error.ReadFailed; }; remaining.* -= n; if (remaining.* == 0) d.state = .new_frame; return 0; }, .end => return error.EndOfStream, } |
readInitialFseState()Frame header indicates a content size exceeding max |
} |
readInitialFseState()Validates |
fn initFrame(d: *Decompress, window_size_max: usize, magic: Frame.Magic) !void { const in = d.input; switch (magic.kind() orelse return error.BadMagic) { .zstandard => { const header = try Frame.Zstandard.Header.decode(in); d.state = .{ .in_frame = .{ .frame = try Frame.init(header, window_size_max, d.verify_checksum), .checksum = null, .decompressed_size = 0, .decode = .init, } }; }, .skippable => { const frame_size = try in.takeInt(u32, .little); d.state = .{ .skipping_frame = frame_size }; }, } |
query()Decode a literals section header. |
} |
magic_min:Invalid header. |
fn readInFrame(d: *Decompress, w: *Writer, limit: Limit, state: *State.InFrame) !usize { const in = d.input; const window_len = d.window_len; |
magic_max:Decoding errors. |
const block_header = try in.takeStruct(Frame.Zstandard.Block.Header, .little); const block_size = block_header.size; const frame_block_size_max = state.frame.block_size_max; if (frame_block_size_max < block_size) return error.BlockOversize; if (@intFromEnum(limit) < block_size) return error.OutputBufferUndersize; var bytes_written: usize = 0; switch (block_header.type) { .raw => { try in.streamExactPreserve(w, window_len, block_size); bytes_written = block_size; }, .rle => { const byte = try in.takeByte(); try w.splatBytePreserve(window_len, byte, block_size); bytes_written = block_size; }, .compressed => { var literals_buffer: [zstd.block_size_max]u8 = undefined; var sequence_buffer: [zstd.block_size_max]u8 = undefined; var remaining: Limit = .limited(block_size); const literals = try LiteralsSection.decode(in, &remaining, &literals_buffer); const sequences_header = try SequencesSection.Header.decode(in, &remaining); |
HeaderCompressed literals have invalid accuracy. |
const decode = &state.decode; try decode.prepare(in, &remaining, literals, sequences_header); |
init()Compressed literals have invalid FSE table. |
{ if (sequence_buffer.len < @intFromEnum(remaining)) return error.SequenceBufferUndersize; const seq_slice = remaining.slice(&sequence_buffer); try in.readSliceAll(seq_slice); var bit_stream = try ReverseBitReader.init(seq_slice); |
LiteralsSectionFailed decoding a Huffamn tree. |
if (sequences_header.sequence_count > 0) { try decode.readInitialFseState(&bit_stream); |
StreamsNot enough bytes to complete the section. |
// Ensures the following calls to `decodeSequence` will not flush. if (window_len + frame_block_size_max > w.buffer.len) return error.OutputBufferUndersize; const dest = (try w.writableSliceGreedyPreserve(window_len, frame_block_size_max))[0..frame_block_size_max]; const write_pos = dest.ptr - w.buffer.ptr; for (0..sequences_header.sequence_count - 1) |_| { bytes_written += try decode.decodeSequence(w.buffer, write_pos + bytes_written, &bit_stream); try decode.updateState(.literal, &bit_stream); try decode.updateState(.match, &bit_stream); try decode.updateState(.offset, &bit_stream); } bytes_written += try decode.decodeSequence(w.buffer, write_pos + bytes_written, &bit_stream); if (bytes_written > dest.len) return error.MalformedSequence; w.advance(bytes_written); } |
HeaderFor reading the reversed bit streams used to encode FSE compressed data. |
if (!bit_stream.isEmpty()) { return error.MalformedCompressedBlock; } } |
decode() |
if (decode.literal_written_count < literals.header.regenerated_size) { const len = literals.header.regenerated_size - decode.literal_written_count; try decode.decodeLiterals(w, len); decode.literal_written_count += len; bytes_written += len; } |
BlockType |
switch (decode.literal_header.block_type) { .treeless, .compressed => { if (!decode.isLiteralStreamEmpty()) return error.MalformedCompressedBlock; }, .raw, .rle => {}, } |
HuffmanTree |
if (bytes_written > frame_block_size_max) return error.BlockOversize; }, .reserved => return error.ReservedBlock, } |
PrefixedSymbol |
if (state.frame.hasher_opt) |*hasher| { if (bytes_written > 0) { _ = hasher; @panic("TODO all those bytes written needed to go through the hasher too"); } } |
Result |
state.decompressed_size += bytes_written; |
query() |
if (block_header.last) { if (state.frame.has_checksum) { const expected_checksum = try in.takeInt(u32, .little); if (state.frame.hasher_opt) |*hasher| { const actual_checksum: u32 = @truncate(hasher.final()); if (expected_checksum != actual_checksum) return error.ChecksumFailure; } } if (state.frame.content_size) |content_size| { if (content_size != state.decompressed_size) { return error.MalformedFrame; } } d.state = .new_frame; } else if (state.frame.content_size) |content_size| { if (state.decompressed_size > content_size) return error.MalformedFrame; } |
weightToBitCount() |
return bytes_written; |
StreamCount |
} |
decode() |
pub const Frame = struct { hasher_opt: ?std.hash.XxHash64, window_size: usize, has_checksum: bool, block_size_max: usize, content_size: ?usize, |
StreamCount |
pub const Magic = enum(u32) { zstandard = 0xFD2FB528, _, |
streamCount() |
pub fn kind(m: Magic) ?Kind { return switch (@intFromEnum(m)) { @intFromEnum(Magic.zstandard) => .zstandard, @intFromEnum(Skippable.magic_min)...@intFromEnum(Skippable.magic_max) => .skippable, else => null, }; } |
DecodeError |
pub fn isSkippable(m: Magic) bool { return switch (@intFromEnum(m)) { @intFromEnum(Skippable.magic_min)...@intFromEnum(Skippable.magic_max) => true, else => false, }; } }; |
decode() |
pub const Kind = enum { zstandard, skippable }; |
SequencesSection |
pub const Zstandard = struct { pub const magic: Magic = .zstandard; |
Header |
header: Header, data_blocks: []Block, checksum: ?u32, |
Mode |
pub const Header = struct { descriptor: Descriptor, window_descriptor: ?u8, dictionary_id: ?u32, content_size: ?u64, |
DecodeError |
pub const Descriptor = packed struct { dictionary_id_flag: u2, content_checksum_flag: bool, reserved: bool, unused: bool, single_segment_flag: bool, content_size_flag: u2, }; |
decode() |
pub const DecodeError = Reader.Error || error{ReservedBitSet}; |
Table |
pub fn decode(in: *Reader) DecodeError!Header { const descriptor: Descriptor = @bitCast(try in.takeByte()); |
Fse |
if (descriptor.reserved) return error.ReservedBitSet; |
decode() |
const window_descriptor: ?u8 = if (descriptor.single_segment_flag) null else try in.takeByte(); |
build() |
const dictionary_id: ?u32 = if (descriptor.dictionary_id_flag > 0) d: { // if flag is 3 then field_size = 4, else field_size = flag const field_size = (@as(u4, 1) << descriptor.dictionary_id_flag) >> 1; break :d try in.takeVarInt(u32, .little, field_size); } else null; |
Test: build |
const content_size: ?u64 = if (descriptor.single_segment_flag or descriptor.content_size_flag > 0) c: { const field_size = @as(u4, 1) << descriptor.content_size_flag; const content_size = try in.takeVarInt(u64, .little, field_size); break :c if (field_size == 2) content_size + 256 else content_size; } else null; |
predefined_literal: |
return .{ .descriptor = descriptor, .window_descriptor = window_descriptor, .dictionary_id = dictionary_id, .content_size = content_size, }; } |
predefined_match: |
/// Returns the window size required to decompress a frame, or `null` if it /// cannot be determined (which indicates a malformed frame header). pub fn windowSize(header: Header) ?u64 { if (header.window_descriptor) |descriptor| { const exponent = (descriptor & 0b11111000) >> 3; const mantissa = descriptor & 0b00000111; const window_log = 10 + exponent; const window_base = @as(u64, 1) << @as(u6, @intCast(window_log)); const window_add = (window_base / 8) * mantissa; return window_base + window_add; } else return header.content_size; } }; |
predefined_offset: |
pub const Block = struct { pub const Header = packed struct(u24) { last: bool, type: Type, size: u21, }; pub const Type = enum(u2) { raw, rle, compressed, reserved, }; }; pub const Decode = struct { repeat_offsets: [3]u32, offset: StateData(8), match: StateData(9), literal: StateData(9), literal_fse_buffer: [zstd.table_size_max.literal]Table.Fse, match_fse_buffer: [zstd.table_size_max.match]Table.Fse, offset_fse_buffer: [zstd.table_size_max.offset]Table.Fse, fse_tables_undefined: bool, literal_stream_reader: ReverseBitReader, literal_stream_index: usize, literal_streams: LiteralsSection.Streams, literal_header: LiteralsSection.Header, huffman_tree: ?LiteralsSection.HuffmanTree, literal_written_count: usize, fn StateData(comptime max_accuracy_log: comptime_int) type { return struct { state: @This().State, table: Table, accuracy_log: u8, const State = std.meta.Int(.unsigned, max_accuracy_log); }; } const init: Decode = .{ .repeat_offsets = .{ zstd.start_repeated_offset_1, zstd.start_repeated_offset_2, zstd.start_repeated_offset_3, }, .offset = undefined, .match = undefined, .literal = undefined, .literal_fse_buffer = undefined, .match_fse_buffer = undefined, .offset_fse_buffer = undefined, .fse_tables_undefined = true, .literal_written_count = 0, .literal_header = undefined, .literal_streams = undefined, .literal_stream_reader = undefined, .literal_stream_index = undefined, .huffman_tree = null, }; pub const PrepareError = error{ /// the (reversed) literal bitstream's first byte does not have any bits set MissingStartBit, /// `literals` is a treeless literals section and the decode state does not /// have a Huffman tree from a previous block TreelessLiteralsFirst, /// on the first call if one of the sequence FSE tables is set to repeat mode RepeatModeFirst, /// an FSE table has an invalid accuracy MalformedAccuracyLog, /// failed decoding an FSE table MalformedFseTable, /// input stream ends before all FSE tables are read EndOfStream, ReadFailed, InputBufferUndersize, }; /// Prepare the decoder to decode a compressed block. Loads the /// literals stream and Huffman tree from `literals` and reads the /// FSE tables from `in`. pub fn prepare( self: *Decode, in: *Reader, remaining: *Limit, literals: LiteralsSection, sequences_header: SequencesSection.Header, ) PrepareError!void { self.literal_written_count = 0; self.literal_header = literals.header; self.literal_streams = literals.streams; if (literals.huffman_tree) |tree| { self.huffman_tree = tree; } else if (literals.header.block_type == .treeless and self.huffman_tree == null) { return error.TreelessLiteralsFirst; } switch (literals.header.block_type) { .raw, .rle => {}, .compressed, .treeless => { self.literal_stream_index = 0; switch (literals.streams) { .one => |slice| try self.initLiteralStream(slice), .four => |streams| try self.initLiteralStream(streams[0]), } }, } if (sequences_header.sequence_count > 0) { try self.updateFseTable(in, remaining, .literal, sequences_header.literal_lengths); try self.updateFseTable(in, remaining, .offset, sequences_header.offsets); try self.updateFseTable(in, remaining, .match, sequences_header.match_lengths); self.fse_tables_undefined = false; } } /// Read initial FSE states for sequence decoding. pub fn readInitialFseState(self: *Decode, bit_reader: *ReverseBitReader) error{EndOfStream}!void { self.literal.state = try bit_reader.readBitsNoEof(u9, self.literal.accuracy_log); self.offset.state = try bit_reader.readBitsNoEof(u8, self.offset.accuracy_log); self.match.state = try bit_reader.readBitsNoEof(u9, self.match.accuracy_log); } fn updateRepeatOffset(self: *Decode, offset: u32) void { self.repeat_offsets[2] = self.repeat_offsets[1]; self.repeat_offsets[1] = self.repeat_offsets[0]; self.repeat_offsets[0] = offset; } fn useRepeatOffset(self: *Decode, index: usize) u32 { if (index == 1) std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[1]) else if (index == 2) { std.mem.swap(u32, &self.repeat_offsets[0], &self.repeat_offsets[2]); std.mem.swap(u32, &self.repeat_offsets[1], &self.repeat_offsets[2]); } return self.repeat_offsets[0]; } const WhichFse = enum { offset, match, literal }; /// TODO: don't use `@field` fn updateState( self: *Decode, comptime choice: WhichFse, bit_reader: *ReverseBitReader, ) error{ MalformedFseBits, EndOfStream }!void { switch (@field(self, @tagName(choice)).table) { .rle => {}, .fse => |table| { const data = table[@field(self, @tagName(choice)).state]; const T = @TypeOf(@field(self, @tagName(choice))).State; const bits_summand = try bit_reader.readBitsNoEof(T, data.bits); const next_state = std.math.cast( @TypeOf(@field(self, @tagName(choice))).State, data.baseline + bits_summand, ) orelse return error.MalformedFseBits; @field(self, @tagName(choice)).state = next_state; }, } } const FseTableError = error{ MalformedFseTable, MalformedAccuracyLog, RepeatModeFirst, EndOfStream, }; /// TODO: don't use `@field` fn updateFseTable( self: *Decode, in: *Reader, remaining: *Limit, comptime choice: WhichFse, mode: SequencesSection.Header.Mode, ) !void { const field_name = @tagName(choice); switch (mode) { .predefined => { @field(self, field_name).accuracy_log = @field(zstd.default_accuracy_log, field_name); @field(self, field_name).table = @field(Table, "predefined_" ++ field_name); }, .rle => { @field(self, field_name).accuracy_log = 0; remaining.* = remaining.subtract(1) orelse return error.EndOfStream; @field(self, field_name).table = .{ .rle = try in.takeByte() }; }, .fse => { const max_table_size = 2048; const peek_len: usize = remaining.minInt(max_table_size); if (in.buffer.len < peek_len) return error.InputBufferUndersize; const limited_buffer = try in.peek(peek_len); var bit_reader: BitReader = .{ .bytes = limited_buffer }; const table_size = try Table.decode( &bit_reader, @field(zstd.table_symbol_count_max, field_name), @field(zstd.table_accuracy_log_max, field_name), &@field(self, field_name ++ "_fse_buffer"), ); @field(self, field_name).table = .{ .fse = (&@field(self, field_name ++ "_fse_buffer"))[0..table_size], }; @field(self, field_name).accuracy_log = std.math.log2_int_ceil(usize, table_size); in.toss(bit_reader.index); remaining.* = remaining.subtract(bit_reader.index).?; }, .repeat => if (self.fse_tables_undefined) return error.RepeatModeFirst, } } const Sequence = struct { literal_length: u32, match_length: u32, offset: u32, }; fn nextSequence( self: *Decode, bit_reader: *ReverseBitReader, ) error{ InvalidBitStream, EndOfStream }!Sequence { const raw_code = self.getCode(.offset); const offset_code = std.math.cast(u5, raw_code) orelse { return error.InvalidBitStream; }; const offset_value = (@as(u32, 1) << offset_code) + try bit_reader.readBitsNoEof(u32, offset_code); const match_code = self.getCode(.match); if (match_code >= zstd.match_length_code_table.len) return error.InvalidBitStream; const match = zstd.match_length_code_table[match_code]; const match_length = match[0] + try bit_reader.readBitsNoEof(u32, match[1]); const literal_code = self.getCode(.literal); if (literal_code >= zstd.literals_length_code_table.len) return error.InvalidBitStream; const literal = zstd.literals_length_code_table[literal_code]; const literal_length = literal[0] + try bit_reader.readBitsNoEof(u32, literal[1]); const offset = if (offset_value > 3) offset: { const offset = offset_value - 3; self.updateRepeatOffset(offset); break :offset offset; } else offset: { if (literal_length == 0) { if (offset_value == 3) { const offset = self.repeat_offsets[0] - 1; self.updateRepeatOffset(offset); break :offset offset; } break :offset self.useRepeatOffset(offset_value); } break :offset self.useRepeatOffset(offset_value - 1); }; if (offset == 0) return error.InvalidBitStream; return .{ .literal_length = literal_length, .match_length = match_length, .offset = offset, }; } /// Decode one sequence from `bit_reader` into `dest`. Updates FSE states /// if `last_sequence` is `false`. Assumes `prepare` called for the block /// before attempting to decode sequences. fn decodeSequence( decode: *Decode, dest: []u8, write_pos: usize, bit_reader: *ReverseBitReader, ) !usize { const sequence = try decode.nextSequence(bit_reader); const literal_length: usize = sequence.literal_length; const match_length: usize = sequence.match_length; const sequence_length = literal_length + match_length; const copy_start = std.math.sub(usize, write_pos + sequence.literal_length, sequence.offset) catch return error.MalformedSequence; if (decode.literal_written_count + literal_length > decode.literal_header.regenerated_size) return error.MalformedLiteralsLength; var sub_bw: Writer = .fixed(dest[write_pos..]); try decodeLiterals(decode, &sub_bw, literal_length); decode.literal_written_count += literal_length; // This is not a @memmove; it intentionally repeats patterns // caused by iterating one byte at a time. for ( dest[write_pos + literal_length ..][0..match_length], dest[copy_start..][0..match_length], ) |*d, s| d.* = s; return sequence_length; } fn nextLiteralMultiStream(self: *Decode) error{MissingStartBit}!void { self.literal_stream_index += 1; try self.initLiteralStream(self.literal_streams.four[self.literal_stream_index]); } fn initLiteralStream(self: *Decode, bytes: []const u8) error{MissingStartBit}!void { self.literal_stream_reader = try ReverseBitReader.init(bytes); } fn isLiteralStreamEmpty(self: *Decode) bool { switch (self.literal_streams) { .one => return self.literal_stream_reader.isEmpty(), .four => return self.literal_stream_index == 3 and self.literal_stream_reader.isEmpty(), } } const LiteralBitsError = error{ MissingStartBit, UnexpectedEndOfLiteralStream, }; fn readLiteralsBits( self: *Decode, bit_count_to_read: u16, ) LiteralBitsError!u16 { return self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch bits: { if (self.literal_streams == .four and self.literal_stream_index < 3) { try self.nextLiteralMultiStream(); break :bits self.literal_stream_reader.readBitsNoEof(u16, bit_count_to_read) catch return error.UnexpectedEndOfLiteralStream; } else { return error.UnexpectedEndOfLiteralStream; } }; } /// Decode `len` bytes of literals into `w`. fn decodeLiterals(d: *Decode, w: *Writer, len: usize) !void { switch (d.literal_header.block_type) { .raw => { try w.writeAll(d.literal_streams.one[d.literal_written_count..][0..len]); }, .rle => { try w.splatByteAll(d.literal_streams.one[0], len); }, .compressed, .treeless => { if (len > w.buffer.len) return error.OutputBufferUndersize; const buf = try w.writableSlice(len); const huffman_tree = d.huffman_tree.?; const max_bit_count = huffman_tree.max_bit_count; const starting_bit_count = LiteralsSection.HuffmanTree.weightToBitCount( huffman_tree.nodes[huffman_tree.symbol_count_minus_one].weight, max_bit_count, ); var bits_read: u4 = 0; var huffman_tree_index: usize = huffman_tree.symbol_count_minus_one; var bit_count_to_read: u4 = starting_bit_count; for (buf) |*out| { var prefix: u16 = 0; while (true) { const new_bits = try d.readLiteralsBits(bit_count_to_read); prefix <<= bit_count_to_read; prefix |= new_bits; bits_read += bit_count_to_read; const result = try huffman_tree.query(huffman_tree_index, prefix); switch (result) { .symbol => |sym| { out.* = sym; bit_count_to_read = starting_bit_count; bits_read = 0; huffman_tree_index = huffman_tree.symbol_count_minus_one; break; }, .index => |index| { huffman_tree_index = index; const bit_count = LiteralsSection.HuffmanTree.weightToBitCount( huffman_tree.nodes[index].weight, max_bit_count, ); bit_count_to_read = bit_count - bits_read; }, } } } }, } } /// TODO: don't use `@field` fn getCode(self: *Decode, comptime choice: WhichFse) u32 { return switch (@field(self, @tagName(choice)).table) { .rle => |value| value, .fse => |table| table[@field(self, @tagName(choice)).state].symbol, }; } }; }; pub const Skippable = struct { pub const magic_min: Magic = @enumFromInt(0x184D2A50); pub const magic_max: Magic = @enumFromInt(0x184D2A5F); pub const Header = struct { magic_number: u32, frame_size: u32, }; }; const InitError = error{ /// Frame uses a dictionary. DictionaryIdFlagUnsupported, /// Frame does not have a valid window size. WindowSizeUnknown, /// Window size exceeds `window_size_max` or max `usize` value. WindowOversize, /// Frame header indicates a content size exceeding max `usize` value. ContentOversize, }; /// Validates `frame_header` and returns the associated `Frame`. pub fn init( frame_header: Frame.Zstandard.Header, window_size_max: usize, verify_checksum: bool, ) InitError!Frame { if (frame_header.descriptor.dictionary_id_flag != 0) return error.DictionaryIdFlagUnsupported; const window_size_raw = frame_header.windowSize() orelse return error.WindowSizeUnknown; const window_size = if (window_size_raw > window_size_max) return error.WindowOversize else std.math.cast(usize, window_size_raw) orelse return error.WindowOversize; const should_compute_checksum = frame_header.descriptor.content_checksum_flag and verify_checksum; const content_size = if (frame_header.content_size) |size| std.math.cast(usize, size) orelse return error.ContentOversize else null; return .{ .hasher_opt = if (should_compute_checksum) std.hash.XxHash64.init(0) else null, .window_size = window_size, .has_checksum = frame_header.descriptor.content_checksum_flag, .block_size_max = @min(zstd.block_size_max, window_size), .content_size = content_size, }; } }; pub const LiteralsSection = struct { header: Header, huffman_tree: ?HuffmanTree, streams: Streams, pub const Streams = union(enum) { one: []const u8, four: [4][]const u8, fn decode(size_format: u2, stream_data: []const u8) !Streams { if (size_format == 0) { return .{ .one = stream_data }; } if (stream_data.len < 6) return error.MalformedLiteralsSection; const stream_1_length: usize = std.mem.readInt(u16, stream_data[0..2], .little); const stream_2_length: usize = std.mem.readInt(u16, stream_data[2..4], .little); const stream_3_length: usize = std.mem.readInt(u16, stream_data[4..6], .little); const stream_1_start = 6; const stream_2_start = stream_1_start + stream_1_length; const stream_3_start = stream_2_start + stream_2_length; const stream_4_start = stream_3_start + stream_3_length; if (stream_data.len < stream_4_start) return error.MalformedLiteralsSection; return .{ .four = .{ stream_data[stream_1_start .. stream_1_start + stream_1_length], stream_data[stream_2_start .. stream_2_start + stream_2_length], stream_data[stream_3_start .. stream_3_start + stream_3_length], stream_data[stream_4_start..], } }; } }; pub const Header = struct { block_type: BlockType, size_format: u2, regenerated_size: u20, compressed_size: ?u18, /// Decode a literals section header. pub fn decode(in: *Reader, remaining: *Limit) !Header { remaining.* = remaining.subtract(1) orelse return error.EndOfStream; const byte0 = try in.takeByte(); const block_type: BlockType = @enumFromInt(byte0 & 0b11); const size_format: u2 = @intCast((byte0 & 0b1100) >> 2); var regenerated_size: u20 = undefined; var compressed_size: ?u18 = null; switch (block_type) { .raw, .rle => { switch (size_format) { 0, 2 => { regenerated_size = byte0 >> 3; }, 1 => { remaining.* = remaining.subtract(1) orelse return error.EndOfStream; regenerated_size = (byte0 >> 4) + (@as(u20, try in.takeByte()) << 4); }, 3 => { remaining.* = remaining.subtract(2) orelse return error.EndOfStream; regenerated_size = (byte0 >> 4) + (@as(u20, try in.takeByte()) << 4) + (@as(u20, try in.takeByte()) << 12); }, } }, .compressed, .treeless => { remaining.* = remaining.subtract(2) orelse return error.EndOfStream; const byte1 = try in.takeByte(); const byte2 = try in.takeByte(); switch (size_format) { 0, 1 => { regenerated_size = (byte0 >> 4) + ((@as(u20, byte1) & 0b00111111) << 4); compressed_size = ((byte1 & 0b11000000) >> 6) + (@as(u18, byte2) << 2); }, 2 => { remaining.* = remaining.subtract(1) orelse return error.EndOfStream; const byte3 = try in.takeByte(); regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00000011) << 12); compressed_size = ((byte2 & 0b11111100) >> 2) + (@as(u18, byte3) << 6); }, 3 => { remaining.* = remaining.subtract(2) orelse return error.EndOfStream; const byte3 = try in.takeByte(); const byte4 = try in.takeByte(); regenerated_size = (byte0 >> 4) + (@as(u20, byte1) << 4) + ((@as(u20, byte2) & 0b00111111) << 12); compressed_size = ((byte2 & 0b11000000) >> 6) + (@as(u18, byte3) << 2) + (@as(u18, byte4) << 10); }, } }, } return .{ .block_type = block_type, .size_format = size_format, .regenerated_size = regenerated_size, .compressed_size = compressed_size, }; } }; pub const BlockType = enum(u2) { raw, rle, compressed, treeless, }; pub const HuffmanTree = struct { max_bit_count: u4, symbol_count_minus_one: u8, nodes: [256]PrefixedSymbol, pub const PrefixedSymbol = struct { symbol: u8, prefix: u16, weight: u4, }; pub const Result = union(enum) { symbol: u8, index: usize, }; pub fn query(self: HuffmanTree, index: usize, prefix: u16) error{HuffmanTreeIncomplete}!Result { var node = self.nodes[index]; const weight = node.weight; var i: usize = index; while (node.weight == weight) { if (node.prefix == prefix) return .{ .symbol = node.symbol }; if (i == 0) return error.HuffmanTreeIncomplete; i -= 1; node = self.nodes[i]; } return .{ .index = i }; } pub fn weightToBitCount(weight: u4, max_bit_count: u4) u4 { return if (weight == 0) 0 else ((max_bit_count + 1) - weight); } pub const DecodeError = Reader.Error || error{ MalformedHuffmanTree, MalformedFseTable, MalformedAccuracyLog, EndOfStream, MissingStartBit, }; pub fn decode(in: *Reader, remaining: *Limit) HuffmanTree.DecodeError!HuffmanTree { remaining.* = remaining.subtract(1) orelse return error.EndOfStream; const header = try in.takeByte(); if (header < 128) { return decodeFse(in, remaining, header); } else { return decodeDirect(in, remaining, header - 127); } } fn decodeDirect( in: *Reader, remaining: *Limit, encoded_symbol_count: usize, ) HuffmanTree.DecodeError!HuffmanTree { var weights: [256]u4 = undefined; const weights_byte_count = (encoded_symbol_count + 1) / 2; remaining.* = remaining.subtract(weights_byte_count) orelse return error.EndOfStream; for (0..weights_byte_count) |i| { const byte = try in.takeByte(); weights[2 * i] = @as(u4, @intCast(byte >> 4)); weights[2 * i + 1] = @as(u4, @intCast(byte & 0xF)); } const symbol_count = encoded_symbol_count + 1; return build(&weights, symbol_count); } fn decodeFse( in: *Reader, remaining: *Limit, compressed_size: usize, ) HuffmanTree.DecodeError!HuffmanTree { var weights: [256]u4 = undefined; remaining.* = remaining.subtract(compressed_size) orelse return error.EndOfStream; const compressed_buffer = try in.take(compressed_size); var bit_reader: BitReader = .{ .bytes = compressed_buffer }; var entries: [1 << 6]Table.Fse = undefined; const table_size = try Table.decode(&bit_reader, 256, 6, &entries); const accuracy_log = std.math.log2_int_ceil(usize, table_size); const remaining_buffer = bit_reader.bytes[bit_reader.index..]; const symbol_count = try assignWeights(remaining_buffer, accuracy_log, &entries, &weights); return build(&weights, symbol_count); } fn assignWeights( huff_bits_buffer: []const u8, accuracy_log: u16, entries: *[1 << 6]Table.Fse, weights: *[256]u4, ) !usize { var huff_bits = try ReverseBitReader.init(huff_bits_buffer); var i: usize = 0; var even_state: u32 = try huff_bits.readBitsNoEof(u32, accuracy_log); var odd_state: u32 = try huff_bits.readBitsNoEof(u32, accuracy_log); while (i < 254) { const even_data = entries[even_state]; var read_bits: u16 = 0; const even_bits = huff_bits.readBits(u32, even_data.bits, &read_bits) catch unreachable; weights[i] = std.math.cast(u4, even_data.symbol) orelse return error.MalformedHuffmanTree; i += 1; if (read_bits < even_data.bits) { weights[i] = std.math.cast(u4, entries[odd_state].symbol) orelse return error.MalformedHuffmanTree; i += 1; break; } even_state = even_data.baseline + even_bits; read_bits = 0; const odd_data = entries[odd_state]; const odd_bits = huff_bits.readBits(u32, odd_data.bits, &read_bits) catch unreachable; weights[i] = std.math.cast(u4, odd_data.symbol) orelse return error.MalformedHuffmanTree; i += 1; if (read_bits < odd_data.bits) { if (i == 255) return error.MalformedHuffmanTree; weights[i] = std.math.cast(u4, entries[even_state].symbol) orelse return error.MalformedHuffmanTree; i += 1; break; } odd_state = odd_data.baseline + odd_bits; } else return error.MalformedHuffmanTree; if (!huff_bits.isEmpty()) { return error.MalformedHuffmanTree; } return i + 1; // stream contains all but the last symbol } fn assignSymbols(weight_sorted_prefixed_symbols: []PrefixedSymbol, weights: [256]u4) usize { for (0..weight_sorted_prefixed_symbols.len) |i| { weight_sorted_prefixed_symbols[i] = .{ .symbol = @as(u8, @intCast(i)), .weight = undefined, .prefix = undefined, }; } std.mem.sort( PrefixedSymbol, weight_sorted_prefixed_symbols, weights, lessThanByWeight, ); var prefix: u16 = 0; var prefixed_symbol_count: usize = 0; var sorted_index: usize = 0; const symbol_count = weight_sorted_prefixed_symbols.len; while (sorted_index < symbol_count) { var symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; const weight = weights[symbol]; if (weight == 0) { sorted_index += 1; continue; } while (sorted_index < symbol_count) : ({ sorted_index += 1; prefixed_symbol_count += 1; prefix += 1; }) { symbol = weight_sorted_prefixed_symbols[sorted_index].symbol; if (weights[symbol] != weight) { prefix = ((prefix - 1) >> (weights[symbol] - weight)) + 1; break; } weight_sorted_prefixed_symbols[prefixed_symbol_count].symbol = symbol; weight_sorted_prefixed_symbols[prefixed_symbol_count].prefix = prefix; weight_sorted_prefixed_symbols[prefixed_symbol_count].weight = weight; } } return prefixed_symbol_count; } fn build(weights: *[256]u4, symbol_count: usize) error{MalformedHuffmanTree}!HuffmanTree { var weight_power_sum_big: u32 = 0; for (weights[0 .. symbol_count - 1]) |value| { weight_power_sum_big += (@as(u16, 1) << value) >> 1; } if (weight_power_sum_big >= 1 << 11) return error.MalformedHuffmanTree; const weight_power_sum = @as(u16, @intCast(weight_power_sum_big)); // advance to next power of two (even if weight_power_sum is a power of 2) // TODO: is it valid to have weight_power_sum == 0? const max_number_of_bits = if (weight_power_sum == 0) 1 else std.math.log2_int(u16, weight_power_sum) + 1; const next_power_of_two = @as(u16, 1) << max_number_of_bits; weights[symbol_count - 1] = std.math.log2_int(u16, next_power_of_two - weight_power_sum) + 1; var weight_sorted_prefixed_symbols: [256]PrefixedSymbol = undefined; const prefixed_symbol_count = assignSymbols(weight_sorted_prefixed_symbols[0..symbol_count], weights.*); const tree: HuffmanTree = .{ .max_bit_count = max_number_of_bits, .symbol_count_minus_one = @as(u8, @intCast(prefixed_symbol_count - 1)), .nodes = weight_sorted_prefixed_symbols, }; return tree; } fn lessThanByWeight( weights: [256]u4, lhs: PrefixedSymbol, rhs: PrefixedSymbol, ) bool { // NOTE: this function relies on the use of a stable sorting algorithm, // otherwise a special case of if (weights[lhs] == weights[rhs]) return lhs < rhs; // should be added return weights[lhs.symbol] < weights[rhs.symbol]; } }; pub const StreamCount = enum { one, four }; pub fn streamCount(size_format: u2, block_type: BlockType) StreamCount { return switch (block_type) { .raw, .rle => .one, .compressed, .treeless => if (size_format == 0) .one else .four, }; } pub const DecodeError = error{ /// Invalid header. MalformedLiteralsHeader, /// Decoding errors. MalformedLiteralsSection, /// Compressed literals have invalid accuracy. MalformedAccuracyLog, /// Compressed literals have invalid FSE table. MalformedFseTable, /// Failed decoding a Huffamn tree. MalformedHuffmanTree, /// Not enough bytes to complete the section. EndOfStream, ReadFailed, MissingStartBit, }; pub fn decode(in: *Reader, remaining: *Limit, buffer: []u8) DecodeError!LiteralsSection { const header = try Header.decode(in, remaining); switch (header.block_type) { .raw => { if (buffer.len < header.regenerated_size) return error.MalformedLiteralsSection; remaining.* = remaining.subtract(header.regenerated_size) orelse return error.EndOfStream; try in.readSliceAll(buffer[0..header.regenerated_size]); return .{ .header = header, .huffman_tree = null, .streams = .{ .one = buffer }, }; }, .rle => { remaining.* = remaining.subtract(1) orelse return error.EndOfStream; buffer[0] = try in.takeByte(); return .{ .header = header, .huffman_tree = null, .streams = .{ .one = buffer[0..1] }, }; }, .compressed, .treeless => { const before_remaining = remaining.*; const huffman_tree = if (header.block_type == .compressed) try HuffmanTree.decode(in, remaining) else null; const huffman_tree_size = @intFromEnum(before_remaining) - @intFromEnum(remaining.*); const total_streams_size = std.math.sub(usize, header.compressed_size.?, huffman_tree_size) catch return error.MalformedLiteralsSection; if (total_streams_size > buffer.len) return error.MalformedLiteralsSection; remaining.* = remaining.subtract(total_streams_size) orelse return error.EndOfStream; try in.readSliceAll(buffer[0..total_streams_size]); const stream_data = buffer[0..total_streams_size]; const streams = try Streams.decode(header.size_format, stream_data); return .{ .header = header, .huffman_tree = huffman_tree, .streams = streams, }; }, } } }; pub const SequencesSection = struct { header: Header, literals_length_table: Table, offset_table: Table, match_length_table: Table, pub const Header = struct { sequence_count: u24, match_lengths: Mode, offsets: Mode, literal_lengths: Mode, pub const Mode = enum(u2) { predefined, rle, fse, repeat, }; pub const DecodeError = error{ ReservedBitSet, EndOfStream, ReadFailed, }; pub fn decode(in: *Reader, remaining: *Limit) DecodeError!Header { var sequence_count: u24 = undefined; remaining.* = remaining.subtract(1) orelse return error.EndOfStream; const byte0 = try in.takeByte(); if (byte0 == 0) { return .{ .sequence_count = 0, .offsets = undefined, .match_lengths = undefined, .literal_lengths = undefined, }; } else if (byte0 < 128) { remaining.* = remaining.subtract(1) orelse return error.EndOfStream; sequence_count = byte0; } else if (byte0 < 255) { remaining.* = remaining.subtract(2) orelse return error.EndOfStream; sequence_count = (@as(u24, (byte0 - 128)) << 8) + try in.takeByte(); } else { remaining.* = remaining.subtract(3) orelse return error.EndOfStream; sequence_count = (try in.takeByte()) + (@as(u24, try in.takeByte()) << 8) + 0x7F00; } const compression_modes = try in.takeByte(); const matches_mode: Header.Mode = @enumFromInt((compression_modes & 0b00001100) >> 2); const offsets_mode: Header.Mode = @enumFromInt((compression_modes & 0b00110000) >> 4); const literal_mode: Header.Mode = @enumFromInt((compression_modes & 0b11000000) >> 6); if (compression_modes & 0b11 != 0) return error.ReservedBitSet; return .{ .sequence_count = sequence_count, .offsets = offsets_mode, .match_lengths = matches_mode, .literal_lengths = literal_mode, }; } }; }; pub const Table = union(enum) { fse: []const Fse, rle: u8, pub const Fse = struct { symbol: u8, baseline: u16, bits: u8, }; pub fn decode( bit_reader: *BitReader, expected_symbol_count: usize, max_accuracy_log: u4, entries: []Table.Fse, ) !usize { const accuracy_log_biased = try bit_reader.readBitsNoEof(u4, 4); if (accuracy_log_biased > max_accuracy_log -| 5) return error.MalformedAccuracyLog; const accuracy_log = accuracy_log_biased + 5; var values: [256]u16 = undefined; var value_count: usize = 0; const total_probability = @as(u16, 1) << accuracy_log; var accumulated_probability: u16 = 0; while (accumulated_probability < total_probability) { // WARNING: The RFC is poorly worded, and would suggest std.math.log2_int_ceil is correct here, // but power of two (remaining probabilities + 1) need max bits set to 1 more. const max_bits = std.math.log2_int(u16, total_probability - accumulated_probability + 1) + 1; const small = try bit_reader.readBitsNoEof(u16, max_bits - 1); const cutoff = (@as(u16, 1) << max_bits) - 1 - (total_probability - accumulated_probability + 1); const value = if (small < cutoff) small else value: { const value_read = small + (try bit_reader.readBitsNoEof(u16, 1) << (max_bits - 1)); break :value if (value_read < @as(u16, 1) << (max_bits - 1)) value_read else value_read - cutoff; }; accumulated_probability += if (value != 0) value - 1 else 1; values[value_count] = value; value_count += 1; if (value == 1) { while (true) { const repeat_flag = try bit_reader.readBitsNoEof(u2, 2); if (repeat_flag + value_count > 256) return error.MalformedFseTable; for (0..repeat_flag) |_| { values[value_count] = 1; value_count += 1; } if (repeat_flag < 3) break; } } if (value_count == 256) break; } bit_reader.alignToByte(); if (value_count < 2) return error.MalformedFseTable; if (accumulated_probability != total_probability) return error.MalformedFseTable; if (value_count > expected_symbol_count) return error.MalformedFseTable; const table_size = total_probability; try build(values[0..value_count], entries[0..table_size]); return table_size; } pub fn build(values: []const u16, entries: []Table.Fse) !void { const total_probability = @as(u16, @intCast(entries.len)); const accuracy_log = std.math.log2_int(u16, total_probability); assert(total_probability <= 1 << 9); var less_than_one_count: usize = 0; for (values, 0..) |value, i| { if (value == 0) { entries[entries.len - 1 - less_than_one_count] = Table.Fse{ .symbol = @as(u8, @intCast(i)), .baseline = 0, .bits = accuracy_log, }; less_than_one_count += 1; } } var position: usize = 0; var temp_states: [1 << 9]u16 = undefined; for (values, 0..) |value, symbol| { if (value == 0 or value == 1) continue; const probability = value - 1; const state_share_dividend = std.math.ceilPowerOfTwo(u16, probability) catch return error.MalformedFseTable; const share_size = @divExact(total_probability, state_share_dividend); const double_state_count = state_share_dividend - probability; const single_state_count = probability - double_state_count; const share_size_log = std.math.log2_int(u16, share_size); for (0..probability) |i| { temp_states[i] = @as(u16, @intCast(position)); position += (entries.len >> 1) + (entries.len >> 3) + 3; position &= entries.len - 1; while (position >= entries.len - less_than_one_count) { position += (entries.len >> 1) + (entries.len >> 3) + 3; position &= entries.len - 1; } } std.mem.sort(u16, temp_states[0..probability], {}, std.sort.asc(u16)); for (0..probability) |i| { entries[temp_states[i]] = if (i < double_state_count) Table.Fse{ .symbol = @as(u8, @intCast(symbol)), .bits = share_size_log + 1, .baseline = single_state_count * share_size + @as(u16, @intCast(i)) * 2 * share_size, } else Table.Fse{ .symbol = @as(u8, @intCast(symbol)), .bits = share_size_log, .baseline = (@as(u16, @intCast(i)) - double_state_count) * share_size, }; } } } test build { const literals_length_default_values = [36]u16{ 5, 4, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 4, 3, 2, 2, 2, 2, 2, 0, 0, 0, 0, }; const match_lengths_default_values = [53]u16{ 2, 5, 4, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, 0, 0, }; const offset_codes_default_values = [29]u16{ 2, 2, 2, 2, 2, 2, 3, 3, 3, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 0, 0, 0, 0, 0, }; var entries: [64]Table.Fse = undefined; try build(&literals_length_default_values, &entries); try std.testing.expectEqualSlices(Table.Fse, Table.predefined_literal.fse, &entries); try build(&match_lengths_default_values, &entries); try std.testing.expectEqualSlices(Table.Fse, Table.predefined_match.fse, &entries); try build(&offset_codes_default_values, entries[0..32]); try std.testing.expectEqualSlices(Table.Fse, Table.predefined_offset.fse, entries[0..32]); } pub const predefined_literal: Table = .{ .fse = &[64]Table.Fse{ .{ .symbol = 0, .bits = 4, .baseline = 0 }, .{ .symbol = 0, .bits = 4, .baseline = 16 }, .{ .symbol = 1, .bits = 5, .baseline = 32 }, .{ .symbol = 3, .bits = 5, .baseline = 0 }, .{ .symbol = 4, .bits = 5, .baseline = 0 }, .{ .symbol = 6, .bits = 5, .baseline = 0 }, .{ .symbol = 7, .bits = 5, .baseline = 0 }, .{ .symbol = 9, .bits = 5, .baseline = 0 }, .{ .symbol = 10, .bits = 5, .baseline = 0 }, .{ .symbol = 12, .bits = 5, .baseline = 0 }, .{ .symbol = 14, .bits = 6, .baseline = 0 }, .{ .symbol = 16, .bits = 5, .baseline = 0 }, .{ .symbol = 18, .bits = 5, .baseline = 0 }, .{ .symbol = 19, .bits = 5, .baseline = 0 }, .{ .symbol = 21, .bits = 5, .baseline = 0 }, .{ .symbol = 22, .bits = 5, .baseline = 0 }, .{ .symbol = 24, .bits = 5, .baseline = 0 }, .{ .symbol = 25, .bits = 5, .baseline = 32 }, .{ .symbol = 26, .bits = 5, .baseline = 0 }, .{ .symbol = 27, .bits = 6, .baseline = 0 }, .{ .symbol = 29, .bits = 6, .baseline = 0 }, .{ .symbol = 31, .bits = 6, .baseline = 0 }, .{ .symbol = 0, .bits = 4, .baseline = 32 }, .{ .symbol = 1, .bits = 4, .baseline = 0 }, .{ .symbol = 2, .bits = 5, .baseline = 0 }, .{ .symbol = 4, .bits = 5, .baseline = 32 }, .{ .symbol = 5, .bits = 5, .baseline = 0 }, .{ .symbol = 7, .bits = 5, .baseline = 32 }, .{ .symbol = 8, .bits = 5, .baseline = 0 }, .{ .symbol = 10, .bits = 5, .baseline = 32 }, .{ .symbol = 11, .bits = 5, .baseline = 0 }, .{ .symbol = 13, .bits = 6, .baseline = 0 }, .{ .symbol = 16, .bits = 5, .baseline = 32 }, .{ .symbol = 17, .bits = 5, .baseline = 0 }, .{ .symbol = 19, .bits = 5, .baseline = 32 }, .{ .symbol = 20, .bits = 5, .baseline = 0 }, .{ .symbol = 22, .bits = 5, .baseline = 32 }, .{ .symbol = 23, .bits = 5, .baseline = 0 }, .{ .symbol = 25, .bits = 4, .baseline = 0 }, .{ .symbol = 25, .bits = 4, .baseline = 16 }, .{ .symbol = 26, .bits = 5, .baseline = 32 }, .{ .symbol = 28, .bits = 6, .baseline = 0 }, .{ .symbol = 30, .bits = 6, .baseline = 0 }, .{ .symbol = 0, .bits = 4, .baseline = 48 }, .{ .symbol = 1, .bits = 4, .baseline = 16 }, .{ .symbol = 2, .bits = 5, .baseline = 32 }, .{ .symbol = 3, .bits = 5, .baseline = 32 }, .{ .symbol = 5, .bits = 5, .baseline = 32 }, .{ .symbol = 6, .bits = 5, .baseline = 32 }, .{ .symbol = 8, .bits = 5, .baseline = 32 }, .{ .symbol = 9, .bits = 5, .baseline = 32 }, .{ .symbol = 11, .bits = 5, .baseline = 32 }, .{ .symbol = 12, .bits = 5, .baseline = 32 }, .{ .symbol = 15, .bits = 6, .baseline = 0 }, .{ .symbol = 17, .bits = 5, .baseline = 32 }, .{ .symbol = 18, .bits = 5, .baseline = 32 }, .{ .symbol = 20, .bits = 5, .baseline = 32 }, .{ .symbol = 21, .bits = 5, .baseline = 32 }, .{ .symbol = 23, .bits = 5, .baseline = 32 }, .{ .symbol = 24, .bits = 5, .baseline = 32 }, .{ .symbol = 35, .bits = 6, .baseline = 0 }, .{ .symbol = 34, .bits = 6, .baseline = 0 }, .{ .symbol = 33, .bits = 6, .baseline = 0 }, .{ .symbol = 32, .bits = 6, .baseline = 0 }, }, }; pub const predefined_match: Table = .{ .fse = &[64]Table.Fse{ .{ .symbol = 0, .bits = 6, .baseline = 0 }, .{ .symbol = 1, .bits = 4, .baseline = 0 }, .{ .symbol = 2, .bits = 5, .baseline = 32 }, .{ .symbol = 3, .bits = 5, .baseline = 0 }, .{ .symbol = 5, .bits = 5, .baseline = 0 }, .{ .symbol = 6, .bits = 5, .baseline = 0 }, .{ .symbol = 8, .bits = 5, .baseline = 0 }, .{ .symbol = 10, .bits = 6, .baseline = 0 }, .{ .symbol = 13, .bits = 6, .baseline = 0 }, .{ .symbol = 16, .bits = 6, .baseline = 0 }, .{ .symbol = 19, .bits = 6, .baseline = 0 }, .{ .symbol = 22, .bits = 6, .baseline = 0 }, .{ .symbol = 25, .bits = 6, .baseline = 0 }, .{ .symbol = 28, .bits = 6, .baseline = 0 }, .{ .symbol = 31, .bits = 6, .baseline = 0 }, .{ .symbol = 33, .bits = 6, .baseline = 0 }, .{ .symbol = 35, .bits = 6, .baseline = 0 }, .{ .symbol = 37, .bits = 6, .baseline = 0 }, .{ .symbol = 39, .bits = 6, .baseline = 0 }, .{ .symbol = 41, .bits = 6, .baseline = 0 }, .{ .symbol = 43, .bits = 6, .baseline = 0 }, .{ .symbol = 45, .bits = 6, .baseline = 0 }, .{ .symbol = 1, .bits = 4, .baseline = 16 }, .{ .symbol = 2, .bits = 4, .baseline = 0 }, .{ .symbol = 3, .bits = 5, .baseline = 32 }, .{ .symbol = 4, .bits = 5, .baseline = 0 }, .{ .symbol = 6, .bits = 5, .baseline = 32 }, .{ .symbol = 7, .bits = 5, .baseline = 0 }, .{ .symbol = 9, .bits = 6, .baseline = 0 }, .{ .symbol = 12, .bits = 6, .baseline = 0 }, .{ .symbol = 15, .bits = 6, .baseline = 0 }, .{ .symbol = 18, .bits = 6, .baseline = 0 }, .{ .symbol = 21, .bits = 6, .baseline = 0 }, .{ .symbol = 24, .bits = 6, .baseline = 0 }, .{ .symbol = 27, .bits = 6, .baseline = 0 }, .{ .symbol = 30, .bits = 6, .baseline = 0 }, .{ .symbol = 32, .bits = 6, .baseline = 0 }, .{ .symbol = 34, .bits = 6, .baseline = 0 }, .{ .symbol = 36, .bits = 6, .baseline = 0 }, .{ .symbol = 38, .bits = 6, .baseline = 0 }, .{ .symbol = 40, .bits = 6, .baseline = 0 }, .{ .symbol = 42, .bits = 6, .baseline = 0 }, .{ .symbol = 44, .bits = 6, .baseline = 0 }, .{ .symbol = 1, .bits = 4, .baseline = 32 }, .{ .symbol = 1, .bits = 4, .baseline = 48 }, .{ .symbol = 2, .bits = 4, .baseline = 16 }, .{ .symbol = 4, .bits = 5, .baseline = 32 }, .{ .symbol = 5, .bits = 5, .baseline = 32 }, .{ .symbol = 7, .bits = 5, .baseline = 32 }, .{ .symbol = 8, .bits = 5, .baseline = 32 }, .{ .symbol = 11, .bits = 6, .baseline = 0 }, .{ .symbol = 14, .bits = 6, .baseline = 0 }, .{ .symbol = 17, .bits = 6, .baseline = 0 }, .{ .symbol = 20, .bits = 6, .baseline = 0 }, .{ .symbol = 23, .bits = 6, .baseline = 0 }, .{ .symbol = 26, .bits = 6, .baseline = 0 }, .{ .symbol = 29, .bits = 6, .baseline = 0 }, .{ .symbol = 52, .bits = 6, .baseline = 0 }, .{ .symbol = 51, .bits = 6, .baseline = 0 }, .{ .symbol = 50, .bits = 6, .baseline = 0 }, .{ .symbol = 49, .bits = 6, .baseline = 0 }, .{ .symbol = 48, .bits = 6, .baseline = 0 }, .{ .symbol = 47, .bits = 6, .baseline = 0 }, .{ .symbol = 46, .bits = 6, .baseline = 0 }, }, }; pub const predefined_offset: Table = .{ .fse = &[32]Table.Fse{ .{ .symbol = 0, .bits = 5, .baseline = 0 }, .{ .symbol = 6, .bits = 4, .baseline = 0 }, .{ .symbol = 9, .bits = 5, .baseline = 0 }, .{ .symbol = 15, .bits = 5, .baseline = 0 }, .{ .symbol = 21, .bits = 5, .baseline = 0 }, .{ .symbol = 3, .bits = 5, .baseline = 0 }, .{ .symbol = 7, .bits = 4, .baseline = 0 }, .{ .symbol = 12, .bits = 5, .baseline = 0 }, .{ .symbol = 18, .bits = 5, .baseline = 0 }, .{ .symbol = 23, .bits = 5, .baseline = 0 }, .{ .symbol = 5, .bits = 5, .baseline = 0 }, .{ .symbol = 8, .bits = 4, .baseline = 0 }, .{ .symbol = 14, .bits = 5, .baseline = 0 }, .{ .symbol = 20, .bits = 5, .baseline = 0 }, .{ .symbol = 2, .bits = 5, .baseline = 0 }, .{ .symbol = 7, .bits = 4, .baseline = 16 }, .{ .symbol = 11, .bits = 5, .baseline = 0 }, .{ .symbol = 17, .bits = 5, .baseline = 0 }, .{ .symbol = 22, .bits = 5, .baseline = 0 }, .{ .symbol = 4, .bits = 5, .baseline = 0 }, .{ .symbol = 8, .bits = 4, .baseline = 16 }, .{ .symbol = 13, .bits = 5, .baseline = 0 }, .{ .symbol = 19, .bits = 5, .baseline = 0 }, .{ .symbol = 1, .bits = 5, .baseline = 0 }, .{ .symbol = 6, .bits = 4, .baseline = 16 }, .{ .symbol = 10, .bits = 5, .baseline = 0 }, .{ .symbol = 16, .bits = 5, .baseline = 0 }, .{ .symbol = 28, .bits = 5, .baseline = 0 }, .{ .symbol = 27, .bits = 5, .baseline = 0 }, .{ .symbol = 26, .bits = 5, .baseline = 0 }, .{ .symbol = 25, .bits = 5, .baseline = 0 }, .{ .symbol = 24, .bits = 5, .baseline = 0 }, }, }; }; const low_bit_mask = [9]u8{ 0b00000000, 0b00000001, 0b00000011, 0b00000111, 0b00001111, 0b00011111, 0b00111111, 0b01111111, 0b11111111, }; fn Bits(comptime T: type) type { return struct { T, u16 }; } /// For reading the reversed bit streams used to encode FSE compressed data. const ReverseBitReader = struct { bytes: []const u8, remaining: usize, bits: u8, count: u4, fn init(bytes: []const u8) error{MissingStartBit}!ReverseBitReader { var result: ReverseBitReader = .{ .bytes = bytes, .remaining = bytes.len, .bits = 0, .count = 0, }; if (bytes.len == 0) return result; for (0..8) |_| if (0 != (result.readBitsNoEof(u1, 1) catch unreachable)) return result; return error.MissingStartBit; } fn initBits(comptime T: type, out: anytype, num: u16) Bits(T) { const UT = std.meta.Int(.unsigned, @bitSizeOf(T)); return .{ @bitCast(@as(UT, @intCast(out))), num, }; } fn readBitsNoEof(self: *ReverseBitReader, comptime T: type, num: u16) error{EndOfStream}!T { const b, const c = try self.readBitsTuple(T, num); if (c < num) return error.EndOfStream; return b; } fn readBits(self: *ReverseBitReader, comptime T: type, num: u16, out_bits: *u16) !T { const b, const c = try self.readBitsTuple(T, num); out_bits.* = c; return b; } fn readBitsTuple(self: *ReverseBitReader, comptime T: type, num: u16) !Bits(T) { const UT = std.meta.Int(.unsigned, @bitSizeOf(T)); const U = if (@bitSizeOf(T) < 8) u8 else UT; if (num <= self.count) return initBits(T, self.removeBits(@intCast(num)), num); var out_count: u16 = self.count; var out: U = self.removeBits(self.count); const full_bytes_left = (num - out_count) / 8; for (0..full_bytes_left) |_| { const byte = takeByte(self) catch |err| switch (err) { error.EndOfStream => return initBits(T, out, out_count), }; if (U == u8) out = 0 else out <<= 8; out |= byte; out_count += 8; } const bits_left = num - out_count; const keep = 8 - bits_left; if (bits_left == 0) return initBits(T, out, out_count); const final_byte = takeByte(self) catch |err| switch (err) { error.EndOfStream => return initBits(T, out, out_count), }; out <<= @intCast(bits_left); out |= final_byte >> @intCast(keep); self.bits = final_byte & low_bit_mask[keep]; self.count = @intCast(keep); return initBits(T, out, num); } fn takeByte(rbr: *ReverseBitReader) error{EndOfStream}!u8 { if (rbr.remaining == 0) return error.EndOfStream; rbr.remaining -= 1; return rbr.bytes[rbr.remaining]; } fn isEmpty(self: *const ReverseBitReader) bool { return self.remaining == 0 and self.count == 0; } fn removeBits(self: *ReverseBitReader, num: u4) u8 { if (num == 8) { self.count = 0; return self.bits; } const keep = self.count - num; const bits = self.bits >> @intCast(keep); self.bits &= low_bit_mask[keep]; self.count = keep; return bits; } }; const BitReader = struct { bytes: []const u8, index: usize = 0, bits: u8 = 0, count: u4 = 0, fn initBits(comptime T: type, out: anytype, num: u16) Bits(T) { const UT = std.meta.Int(.unsigned, @bitSizeOf(T)); return .{ @bitCast(@as(UT, @intCast(out))), num, }; } fn readBitsNoEof(self: *@This(), comptime T: type, num: u16) !T { const b, const c = try self.readBitsTuple(T, num); if (c < num) return error.EndOfStream; return b; } fn readBits(self: *@This(), comptime T: type, num: u16, out_bits: *u16) !T { const b, const c = try self.readBitsTuple(T, num); out_bits.* = c; return b; } fn readBitsTuple(self: *@This(), comptime T: type, num: u16) !Bits(T) { const UT = std.meta.Int(.unsigned, @bitSizeOf(T)); const U = if (@bitSizeOf(T) < 8) u8 else UT; if (num <= self.count) return initBits(T, self.removeBits(@intCast(num)), num); var out_count: u16 = self.count; var out: U = self.removeBits(self.count); const full_bytes_left = (num - out_count) / 8; for (0..full_bytes_left) |_| { const byte = takeByte(self) catch |err| switch (err) { error.EndOfStream => return initBits(T, out, out_count), }; const pos = @as(U, byte) << @intCast(out_count); out |= pos; out_count += 8; } const bits_left = num - out_count; const keep = 8 - bits_left; if (bits_left == 0) return initBits(T, out, out_count); const final_byte = takeByte(self) catch |err| switch (err) { error.EndOfStream => return initBits(T, out, out_count), }; const pos = @as(U, final_byte & low_bit_mask[bits_left]) << @intCast(out_count); out |= pos; self.bits = final_byte >> @intCast(bits_left); self.count = @intCast(keep); return initBits(T, out, num); } fn takeByte(br: *BitReader) error{EndOfStream}!u8 { if (br.bytes.len - br.index == 0) return error.EndOfStream; const result = br.bytes[br.index]; br.index += 1; return result; } fn removeBits(self: *@This(), num: u4) u8 { if (num == 8) { self.count = 0; return self.bits; } const keep = self.count - num; const bits = self.bits & low_bit_mask[num]; self.bits >>= @intCast(num); self.count = keep; return bits; } fn alignToByte(self: *@This()) void { self.bits = 0; self.count = 0; } }; test { _ = Table; } |
Generated by zstd-live on 2025-08-13 02:35:12 UTC. |