zig/lib/std / compress/flate.zig

When decompressing, the output buffer is used as the history window, so less than this may result in failure to decompress streams that were compressed with a larger window.

const std = @import("../std.zig");

max_window_len

Deflate is a lossless data compression file format that uses a combination of LZ77 and Huffman coding.


/// When decompressing, the output buffer is used as the history window, so
/// less than this may result in failure to decompress streams that were
/// compressed with a larger window.
pub const max_window_len = history_len * 2;

history_len

Inflate is the decoding process that consumes a Deflate bitstream and produces the original full-size data.


pub const history_len = 32768;

Compress

flate/Compress.zig

Compression without Lempel-Ziv match searching. Faster compression, less memory requirements but bigger compressed sizes.


/// Deflate is a lossless data compression file format that uses a combination
/// of LZ77 and Huffman coding.
pub const Compress = @import("flate/Compress.zig");

Decompress

flate/Decompress.zig

Container of the deflate bit stream body. Container adds header before deflate bit stream and footer after. It can bi gzip, zlib or raw (no header, no footer, raw bit stream). Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes addler 32 checksum. Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes crc32 checksum and 4 bytes of uncompressed data length. rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4 rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5


/// Inflate is the decoding process that consumes a Deflate bitstream and
/// produces the original full-size data.
pub const Decompress = @import("flate/Decompress.zig");

HuffmanEncoder

flate/HuffmanEncoder.zig

/// Compression without Lempel-Ziv match searching. Faster compression, less
/// memory requirements but bigger compressed sizes.
pub const HuffmanEncoder = @import("flate/HuffmanEncoder.zig");

Container


/// Container of the deflate bit stream body. Container adds header before
/// deflate bit stream and footer after. It can bi gzip, zlib or raw (no header,
/// no footer, raw bit stream).
///
/// Zlib format is defined in rfc 1950. Header has 2 bytes and footer 4 bytes
/// addler 32 checksum.
///
/// Gzip format is defined in rfc 1952. Header has 10+ bytes and footer 4 bytes
/// crc32 checksum and 4 bytes of uncompressed data length.
///
/// rfc 1950: https://datatracker.ietf.org/doc/html/rfc1950#page-4
/// rfc 1952: https://datatracker.ietf.org/doc/html/rfc1952#page-5
pub const Container = enum {
    raw, // no header or footer
    gzip, // gzip header and footer
    zlib, // zlib header and footer

size()


    pub fn size(w: Container) usize {
        return headerSize(w) + footerSize(w);
    }

headerSize()


    pub fn headerSize(w: Container) usize {
        return header(w).len;
    }

footerSize()


    pub fn footerSize(w: Container) usize {
        return switch (w) {
            .gzip => 8,
            .zlib => 4,
            .raw => 0,
        };
    }

list


    pub const list = [_]Container{ .raw, .gzip, .zlib };

Error


    pub const Error = error{
        BadGzipHeader,
        BadZlibHeader,
        WrongGzipChecksum,
        WrongGzipSize,
        WrongZlibChecksum,
    };

header()


    pub fn header(container: Container) []const u8 {
        return switch (container) {
            // GZIP 10 byte header (https://datatracker.ietf.org/doc/html/rfc1952#page-5):
            //  - ID1 (IDentification 1), always 0x1f
            //  - ID2 (IDentification 2), always 0x8b
            //  - CM (Compression Method), always 8 = deflate
            //  - FLG (Flags), all set to 0
            //  - 4 bytes, MTIME (Modification time), not used, all set to zero
            //  - XFL (eXtra FLags), all set to zero
            //  - OS (Operating System), 03 = Unix
            .gzip => &[_]u8{ 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03 },
            // ZLIB has a two-byte header (https://datatracker.ietf.org/doc/html/rfc1950#page-4):
            // 1st byte:
            //  - First four bits is the CINFO (compression info), which is 7 for the default deflate window size.
            //  - The next four bits is the CM (compression method), which is 8 for deflate.
            // 2nd byte:
            //  - Two bits is the FLEVEL (compression level). Values are: 0=fastest, 1=fast, 2=default, 3=best.
            //  - The next bit, FDICT, is set if a dictionary is given.
            //  - The final five FCHECK bits form a mod-31 checksum.
            //
            // CINFO = 7, CM = 8, FLEVEL = 0b10, FDICT = 0, FCHECK = 0b11100
            .zlib => &[_]u8{ 0x78, 0b10_0_11100 },
            .raw => &.{},
        };
    }

Hasher


    pub const Hasher = union(Container) {
        raw: void,
        gzip: struct {
            crc: std.hash.Crc32 = .init(),
            count: u32 = 0,
        },
        zlib: std.hash.Adler32,

init()


        pub fn init(containter: Container) Hasher {
            return switch (containter) {
                .gzip => .{ .gzip = .{} },
                .zlib => .{ .zlib = .{} },
                .raw => .raw,
            };
        }

container()


        pub fn container(h: Hasher) Container {
            return h;
        }

update()


        pub fn update(h: *Hasher, buf: []const u8) void {
            switch (h.*) {
                .raw => {},
                .gzip => |*gzip| {
                    gzip.update(buf);
                    gzip.count +%= buf.len;
                },
                .zlib => |*zlib| {
                    zlib.update(buf);
                },
                inline .gzip, .zlib => |*x| x.update(buf),
            }
        }

writeFooter()


        pub fn writeFooter(hasher: *Hasher, writer: *std.Io.Writer) std.Io.Writer.Error!void {
            var bits: [4]u8 = undefined;
            switch (hasher.*) {
                .gzip => |*gzip| {
                    // GZIP 8 bytes footer
                    //  - 4 bytes, CRC32 (CRC-32)
                    //  - 4 bytes, ISIZE (Input SIZE) - size of the original (uncompressed) input data modulo 2^32
                    std.mem.writeInt(u32, &bits, gzip.final(), .little);
                    try writer.writeAll(&bits);

Metadata


                    std.mem.writeInt(u32, &bits, gzip.bytes_read, .little);
                    try writer.writeAll(&bits);
                },
                .zlib => |*zlib| {
                    // ZLIB (RFC 1950) is big-endian, unlike GZIP (RFC 1952).
                    // 4 bytes of ADLER32 (Adler-32 checksum)
                    // Checksum value of the uncompressed data (excluding any
                    // dictionary data) computed according to Adler-32
                    // algorithm.
                    std.mem.writeInt(u32, &bits, zlib.final, .big);
                    try writer.writeAll(&bits);
                },
                .raw => {},
            }
        }
    };

init()


    pub const Metadata = union(Container) {
        raw: void,
        gzip: struct {
            crc: u32 = 0,
            count: u32 = 0,
        },
        zlib: struct {
            adler: u32 = 0,
        },

container()


        pub fn init(containter: Container) Metadata {
            return switch (containter) {
                .gzip => .{ .gzip = .{} },
                .zlib => .{ .zlib = .{} },
                .raw => .raw,
            };
        }

        pub fn container(m: Metadata) Container {
            return m;
        }
    };
};

test {
    _ = HuffmanEncoder;
    _ = Compress;
    _ = Decompress;
}