1 files changed, 697 insertions, 0 deletions
diff --git a/lzx.c b/lzx.c
new file mode 100644
index 0000000..3c404b9
--- /dev/null
+++ b/lzx.c
@@ -0,0 +1,697 @@
+#include <assert.h>
+#include <stddef.h>
+
+#include "halibut.h"
+#include "huffman.h"
+#include "lz77.h"
+#include "lzx.h"
+
+#define OUR_LZX_WINSIZE 0x10000
+#define LZX_MINMATCHLEN 2
+#define LZX_MAXMATCHLEN 257
+
+int lzx_compute_position_slot(int pos, int *footer_bits)
+{
+    if (pos < 4) {
+        /* The bottom four position slots cover one value each. */
+        *footer_bits = 0;
+        return pos;
+    } else if (pos >= 0x40000) {
+        /* _All_ slots from 36 onwards are 2^17 values wide. */
+        *footer_bits = 17;
+        return 34 + (pos >> 17);
+    } else {
+        /* In between, there are two slots for each power-of-2 size,
+         * so that slots 4,5 have width 2^1, 6,7 have width 2^2, 8,9
+         * have width 2^3, ..., and 34,35 have width 2^16. */
+        int bits = 16;
+        int shifted = pos;
+        if (shifted < (1<<(18-8))) shifted <<= 8, bits -= 8;
+        if (shifted < (1<<(18-4))) shifted <<= 4, bits -= 4;
+        if (shifted < (1<<(18-2))) shifted <<= 2, bits -= 2;
+        if (shifted < (1<<(18-1))) shifted <<= 1, bits -= 1;
+        *footer_bits = bits;
+        return 2 + 2*bits + ((shifted >> 16) & 1);
+    }
+}
+
+typedef enum LZXSymType {
+    LST_MAINTREE, LST_LENTREE, LST_ALIGNOFFTREE,
+    LST_MAINTREE_PRETREE_1, LST_MAINTREE_PRETREE_2, LST_LENTREE_PRETREE,
+    LST_NTREES, dummy_enum_const = LST_NTREES-1,
+    LST_REALIGN_BITSTREAM,
+    LST_RAWBITS_BASE /* add the number of actual bits to this code */
+} LZXSymType;
+
+typedef struct LZXSym {
+    LZXSymType type;
+    int value;
+} LZXSym;
+
+typedef struct LZXBuffer {
+    LZXSym *syms;
+    int nsyms, symsize;
+} LZXBuffer;
+
+typedef struct LZXInfo {
+    LZXBuffer *buf;
+    int r0, r1, r2;                    /* saved match offsets */
+} LZXInfo;
+
+static void lzx_buffer_init(LZXBuffer *buf)
+{
+    buf->syms = NULL;
+    buf->nsyms = buf->symsize = 0;
+}
+
+static void lzx_addsym(LZXBuffer *buf, LZXSymType type, int value)
+{
+    if (buf->nsyms >= buf->symsize) {
+        assert(buf->nsyms == buf->symsize);
+        buf->symsize = buf->nsyms * 5 / 4 + 16384;
+        buf->syms = sresize(buf->syms, buf->symsize, LZXSym);
+    }
+    buf->syms[buf->nsyms].type = type;
+    buf->syms[buf->nsyms].value = value;
+    buf->nsyms++;
+}
+
+static void lzx_literal(struct LZ77Context *ctx, unsigned char c)
+{
+    LZXBuffer *buf = ((LZXInfo *)ctx->userdata)->buf;
+    lzx_addsym(buf, LST_MAINTREE, c);
+}
+
+static void lzx_match(struct LZ77Context *ctx, int match_offset, int totallen)
+{
+    LZXInfo *info = (LZXInfo *)ctx->userdata;
+    LZXBuffer *buf = info->buf;
+
+    /*
+     * First, this variant of LZX has a maximum match length of 257
+     * bytes, so if lz77.c reports a longer match than that, we must
+     * break it up.
+     */
+    while (totallen > 0) {
+        int len, length_header, length_footer, len_pos_header;
+        int formatted_offset, position_slot, position_verbatim_bits;
+        int position_verbatim_value, position_aligned_offset;
+
+        if (totallen <= LZX_MAXMATCHLEN) {
+            /* We can emit all of the (remaining) match length in one go. */
+            len = totallen;
+        } else if (totallen >= LZX_MAXMATCHLEN+LZX_MINMATCHLEN) {
+            /* There's enough match left that we can emit a
+             * maximum-length chunk and still be assured of being able
+             * to emit what's left as a viable followup match. */
+            len = LZX_MAXMATCHLEN;
+        } else {
+            /* The in-between case, where we have _only just_ too long
+             * a match to emit in one go, so that if we emitted a
+             * max-size chunk then what's left would be under the min
+             * size and we couldn't emit it. */
+            len = totallen - LZX_MINMATCHLEN;
+        }
+        totallen -= len;
+
+        /*
+         * Now we're outputting a single LZX-level match of length
+         * 'len'. Break the length up into a 'header' (included in the
+         * starting LST_MAINTREE symbol) and a 'footer' (tacked on
+         * afterwards using LST_LENTREE).
+         */
+        if (len < 9) {
+            length_header = len - 2;   /* in the range {0,...,6} */
+            length_footer = -1;        /* not transmitted at all */
+        } else {
+            length_header = 7;         /* header indicates more to come */
+            length_footer = len - 9;   /* in the range {0,...,248} */
+        }
+
+        /*
+         * Meanwhile, the raw backward distance is first transformed
+         * into the 'formatted offset', by either adding 2 or using
+         * one of the low-numbered special codes meaning to use one of
+         * the three most recent match distances.
+         */
+        if (match_offset == info->r0) {
+            /* Reuse the most recent distance */
+            formatted_offset = 0;
+        } else if (match_offset == info->r1) {
+            /* Reuse the 2nd most recent, and swap it into first place */
+            int tmp = info->r1;
+            info->r1 = info->r0;
+            info->r0 = tmp;
+            formatted_offset = 1;
+        } else if (match_offset == info->r2) {
+            /* Reuse the 3rd most recent and swap it to first place.
+             * This is intentionally not quite a move-to-front
+             * shuffle, which would permute (r0,r1,r2)->(r2,r0,r1); MS
+             * decided that just swapping r0 with r2 was a better
+             * performance tradeoff. */
+            int tmp = info->r2;
+            info->r2 = info->r0;
+            info->r0 = tmp;
+            formatted_offset = 2;
+        } else {
+            /* This offset matches none of the three saved values.
+             * Put it in r0, and move up the rest of the list. */
+            info->r2 = info->r1;
+            info->r1 = info->r0;
+            info->r0 = match_offset;
+            formatted_offset = match_offset + 2;
+        }
+
+        /*
+         * The formatted offset now breaks up into a 'position slot'
+         * (encoded as part of the starting symbol) and an offset from
+         * the smallest position value covered by that slot. The
+         * system of slots is designed so that every slot's width is a
+         * power of two and its base value is a multiple of its width,
+         * so we can get the offset just by taking the bottom n bits
+         * of the full formatted offset, once the choice of position
+         * slot tells us what n is.
+         */
+        position_slot = lzx_compute_position_slot(
+            formatted_offset, &position_verbatim_bits);
+        position_verbatim_value = formatted_offset &
+            ((1 << position_verbatim_bits)-1);
+
+        /*
+         * If there are three or more additional bits, then the last 3
+         * of them are (potentially, depending on block type which we
+         * haven't decided about yet) transmitted using the aligned
+         * offset tree. The rest are sent verbatim.
+         */
+        if (position_verbatim_bits >= 3) {
+            position_aligned_offset = position_verbatim_value & 7;
+            position_verbatim_bits -= 3;
+            position_verbatim_value >>= 3;
+        } else {
+            position_aligned_offset = -1; /* not transmitted */
+        }
+
+        /*
+         * Combine the length header and position slot into the full
+         * set of information encoded by the starting symbol.
+         */
+        len_pos_header = position_slot * 8 + length_header;
+
+        /*
+         * And now we've finished figuring out _what_ to output, so
+         * output it.
+         */
+        lzx_addsym(buf, LST_MAINTREE, 256 + len_pos_header);
+        if (length_footer >= 0)
+            lzx_addsym(buf, LST_LENTREE, length_footer);
+        if (position_verbatim_bits > 0)
+            lzx_addsym(buf, LST_RAWBITS_BASE + position_verbatim_bits,
+                       position_verbatim_value);
+        if (position_aligned_offset >= 0)
+            lzx_addsym(buf, LST_ALIGNOFFTREE, position_aligned_offset);
+    }
+}
+
+void lzx_lz77_inner(LZXInfo *info, const unsigned char *data, int len)
+{
+    struct LZ77Context lz77c;
+    lz77_init(&lz77c, OUR_LZX_WINSIZE);
+    lz77c.literal = lzx_literal;
+    lz77c.match = lzx_match;
+    lz77c.userdata = info;
+    lz77_compress(&lz77c, data, len, TRUE);
+    lz77_cleanup(&lz77c);
+}
+
+void lzx_lz77(LZXBuffer *buf, const unsigned char *data,
+              int totallen, int realign_interval)
+{
+    LZXInfo info;
+
+    info.r0 = info.r1 = info.r2 = 1;
+    info.buf = buf;
+
+    while (totallen > 0) {
+        int thislen =
+            totallen < realign_interval ? totallen : realign_interval;
+        lzx_lz77_inner(&info, data, thislen);
+        data += thislen;
+        totallen -= thislen;
+        if (totallen > 0)
+            lzx_addsym(info.buf, LST_REALIGN_BITSTREAM, 0);
+    }
+}
+
+typedef struct LZXHuf {
+    int nsyms;
+    unsigned char *lengths;
+    unsigned char *oldlengths; /* for pretree encoding to diff against */
+    int *codes;
+} LZXHuf;
+
+typedef struct LZXHufs {
+    LZXHuf hufs[LST_NTREES];
+} LZXHufs;
+
+void lzx_build_tree(LZXSym *syms, int nsyms, LZXSymType which, LZXHufs *hufs)
+{
+    int i, max_code_len;
+    int *freqs;
+    LZXHuf *huf = &hufs->hufs[which];
+
+    switch (which) {
+      default:
+        assert(0 && "Bad lzx_build_tree tree type");
+      case LST_MAINTREE:
+        /*
+         * Trees encoded via a pretree have a max code length of 16,
+         * because that's the limit of what the pretree alphabet can
+         * represent.
+         */
+        max_code_len = 16;
+
+        /*
+         * Number of symbols in the main tree is 256 literals, plus 8n
+         * match header symbols where n is the largest position slot
+         * number that might be needed to address any offset in the
+         * window.
+         */
+        {
+            int ignored, last_slot;
+            last_slot = lzx_compute_position_slot(OUR_LZX_WINSIZE-1, &ignored);
+            huf->nsyms = 8 * (last_slot+1) + 256;
+        }
+        break;
+      case LST_LENTREE:
+        max_code_len = 16;             /* pretree again */
+        huf->nsyms = 249;              /* a fixed value in the spec */
+        break;
+      case LST_MAINTREE_PRETREE_1:
+      case LST_MAINTREE_PRETREE_2:
+      case LST_LENTREE_PRETREE:
+        /* Pretree code lengths are stored in 4-bit fields, so they
+         * can't go above 15. There are a standard 20 symbols in the
+         * pretree alphabet. */
+        max_code_len = 15;
+        huf->nsyms = 20;
+        break;
+      case LST_ALIGNOFFTREE:
+        /* The aligned-offset tree has 8 elements stored in 3-bit
+         * fields. */
+        max_code_len = 7;
+        huf->nsyms = 8;
+        break;
+    }
+
+    freqs = snewn(huf->nsyms, int);
+
+    /*
+     * Count up the symbol frequencies.
+     */
+    for (i = 0; i < huf->nsyms; i++)
+        freqs[i] = 0;
+    for (i = 0; i < nsyms; i++)
+        if (syms[i].type == which)
+            freqs[syms[i].value]++;
+
+    /*
+     * Build the Huffman table.
+     */
+    huf->lengths = snewn(huf->nsyms, unsigned char);
+    build_huffman_tree(freqs, huf->lengths, huf->nsyms, max_code_len);
+    huf->codes = snewn(huf->nsyms, int);
+    compute_huffman_codes(huf->lengths, huf->codes, huf->nsyms);
+
+    /*
+     * Cleanup.
+     */
+    sfree(freqs);
+}
+
+void lzx_tree_with_pretree(LZXHuf *huf, int symoffset, int symlimit,
+                           LZXBuffer *buf, LZXSymType pretree_symtype)
+{
+    int i, r;
+
+    if (!huf->oldlengths) {
+        huf->oldlengths = snewn(huf->nsyms, unsigned char);
+        for (i = 0; i < huf->nsyms; i++)
+            huf->oldlengths[i] = 0;
+    }
+
+    for (i = symoffset; i < symlimit; i++) {
+        for (r = 1; i+r < symlimit; r++)
+            if (huf->lengths[i+r] != huf->lengths[i])
+                break;
+
+        if (r >= 4) {
+            /*
+             * We have at least one run of the same code length long
+             * enough to use one of the run-length encoding symbols.
+             */
+            while (r >= 4) {
+                int thisrun;
+                if (huf->lengths[i] == 0) {
+                    thisrun = r > 20+31 ? 20+31 : r;
+                    if (thisrun >= 20) {
+                        lzx_addsym(buf, pretree_symtype, 18);
+                        lzx_addsym(buf, LST_RAWBITS_BASE + 5, thisrun - 20);
+                    } else {
+                        lzx_addsym(buf, pretree_symtype, 17);
+                        lzx_addsym(buf, LST_RAWBITS_BASE + 4, thisrun - 4);
+                    }
+                } else {
+                    thisrun = r > 5 ? 5 : r;
+                    lzx_addsym(buf, pretree_symtype, 19);
+                    lzx_addsym(buf, LST_RAWBITS_BASE + 1, thisrun - 4);
+                    lzx_addsym(buf, pretree_symtype,
+                               (huf->oldlengths[i]-huf->lengths[i] + 17) % 17);
+                }
+                r -= thisrun;
+                i += thisrun;
+            }
+
+            if (r == 0) {
+                i--;        /* compensate for normal loop increment */
+                continue;
+            }
+        }
+
+        /*
+         * Otherwise, emit a normal non-encoded symbol.
+         */
+        lzx_addsym(buf, pretree_symtype,
+                   (huf->oldlengths[i]-huf->lengths[i] + 17) % 17);
+    }
+}
+
+void lzx_tree_simple(LZXHuf *huf, LZXBuffer *buf, int bits)
+{
+    int i;
+    for (i = 0; i < huf->nsyms; i++)
+        lzx_addsym(buf, LST_RAWBITS_BASE + bits, huf->lengths[i]);
+}
+
+typedef struct LZXBitstream {
+    struct LZXEncodedFile *ef;
+    size_t data_size, resets_size;
+    unsigned short bitbuffer;
+    int nbits;
+    int first_block;
+} LZXBitstream;
+
+void lzx_write_bits(LZXBitstream *bs, int value, int bits)
+{
+    while (bs->nbits + bits >= 16) {
+        int thisbits = 16 - bs->nbits;
+        bs->bitbuffer = (bs->bitbuffer << thisbits) |
+            (value >> (bits-thisbits));
+
+        if (bs->ef->data_len+2 > bs->data_size) {
+            bs->data_size = bs->ef->data_len * 5 / 4 + 65536;
+            bs->ef->data = sresize(bs->ef->data, bs->data_size,
+                                   unsigned char);
+        }
+        bs->ef->data[bs->ef->data_len++] = bs->bitbuffer;
+        bs->ef->data[bs->ef->data_len++] = bs->bitbuffer >> 8;
+
+        bs->bitbuffer = 0;
+        bs->nbits = 0;
+
+        bits -= thisbits;
+        value &= (1<<bits) - 1;
+    }
+
+    bs->bitbuffer = (bs->bitbuffer << bits) | value;
+    bs->nbits += bits;
+}
+
+void lzx_realign(LZXBitstream *bs)
+{
+    lzx_write_bits(bs, 0, 15 & -(unsigned)bs->nbits);
+}
+
+void lzx_write_reset_table_entry(LZXBitstream *bs)
+{
+    lzx_write_bits(bs, 0, 15 & -(unsigned)bs->nbits);
+
+    if (bs->ef->n_resets >= bs->resets_size) {
+        bs->resets_size = bs->ef->n_resets * 5 / 4 + 256;
+        bs->ef->reset_byte_offsets = sresize(bs->ef->reset_byte_offsets,
+                                             bs->resets_size, size_t);
+    }
+    bs->ef->reset_byte_offsets[bs->ef->n_resets++] = bs->ef->data_len;
+}
+
+void lzx_huf_encode(LZXSym *syms, int nsyms, LZXHufs *hufs, LZXBitstream *bs)
+{
+    int i;
+    for (i = 0; i < nsyms; i++) {
+        LZXSymType type = syms[i].type;
+        int value = syms[i].value;
+
+        if (type >= LST_RAWBITS_BASE) {
+            lzx_write_bits(bs, value, type - LST_RAWBITS_BASE);
+        } else if (type == LST_REALIGN_BITSTREAM) {
+            /* Realign the bitstream to a 16-bit boundary, and write a
+             * reset table entry giving the resulting byte offset. */
+            lzx_realign(bs);
+            lzx_write_reset_table_entry(bs);
+        } else {
+            lzx_write_bits(bs, hufs->hufs[type].codes[value],
+                           hufs->hufs[type].lengths[value]);
+        }
+    }
+}
+
+void lzx_encode_block(LZXSym *syms, int nsyms, int blocksize,
+                      LZXHufs *hufs, LZXBitstream *bs)
+{
+    LZXBuffer header[8];
+    int i, blocktype;
+
+    for (i = 0; i < (int)lenof(header); i++)
+        lzx_buffer_init(&header[i]);
+
+    /*
+     * Build the Huffman trees for the main alphabets used in the
+     * block.
+     */
+    lzx_build_tree(syms, nsyms, LST_MAINTREE, hufs);
+    lzx_build_tree(syms, nsyms, LST_LENTREE, hufs);
+    lzx_build_tree(syms, nsyms, LST_ALIGNOFFTREE, hufs);
+
+    /*
+     * Encode each of those as a sequence of pretree symbols.
+     */
+    lzx_tree_with_pretree(&hufs->hufs[LST_MAINTREE], 0, 256,
+                          &header[3], LST_MAINTREE_PRETREE_1);
+    lzx_tree_with_pretree(&hufs->hufs[LST_MAINTREE], 256,
+                          hufs->hufs[LST_MAINTREE].nsyms,
+                          &header[5], LST_MAINTREE_PRETREE_2);
+    lzx_tree_with_pretree(&hufs->hufs[LST_LENTREE], 0,
+                          hufs->hufs[LST_LENTREE].nsyms,
+                          &header[7], LST_LENTREE_PRETREE);
+
+    /*
+     * Build the pretree for each of those encodings.
+     */
+    lzx_build_tree(header[3].syms, header[3].nsyms,
+                   LST_MAINTREE_PRETREE_1, hufs);
+    lzx_build_tree(header[5].syms, header[5].nsyms,
+                   LST_MAINTREE_PRETREE_2, hufs);
+    lzx_build_tree(header[7].syms, header[7].nsyms,
+                   LST_LENTREE_PRETREE, hufs);
+
+    /*
+     * Decide whether we're keeping the aligned offset tree or not.
+     */
+    {
+        int with, without;
+
+        with = 3*8;                    /* cost of transmitting tree */
+        without = 0;                   /* or not */
+
+        for (i = 0; i < nsyms; i++)
+            if (syms[i].type == LST_ALIGNOFFTREE) {
+                with += hufs->hufs[LST_ALIGNOFFTREE].lengths[syms[i].value];
+                without += 3;
+            }
+
+        if (with < without) {
+            /* Yes, it's a win to use the aligned offset tree. */
+            blocktype = 2;
+        } else {
+            /* No, we do better by throwing it away. */
+            blocktype = 1;
+
+            /* Easiest way to simulate that is to pretend we're still
+             * using an aligned offset tree in the encoding, but to
+             * chuck away our code lengths and replace them with the
+             * fixed-length trivial tree. */
+            for (i = 0; i < 8; i++) {
+                hufs->hufs[LST_ALIGNOFFTREE].lengths[i] = 3;
+                hufs->hufs[LST_ALIGNOFFTREE].codes[i] = i;
+            }
+        }
+    }
+
+    /*
+     * Encode all the simply encoded trees (the three pretrees and the
+     * aligned offset tree).
+     */
+    lzx_tree_simple(&hufs->hufs[LST_MAINTREE_PRETREE_1], &header[2], 4);
+    lzx_tree_simple(&hufs->hufs[LST_MAINTREE_PRETREE_2], &header[4], 4);
+    lzx_tree_simple(&hufs->hufs[LST_LENTREE_PRETREE], &header[6], 4);
+    if (blocktype == 2)
+        lzx_tree_simple(&hufs->hufs[LST_ALIGNOFFTREE], &header[1], 3);
+
+    /*
+     * Top-level block header.
+     */
+    if (bs->first_block) {
+        /*
+         * Also include the whole-file header which says whether E8
+         * call translation is on. We never turn it on, because we
+         * don't support it (since in this use case it doesn't seem
+         * likely to be particularly useful anyway).
+         *
+         * It looks like a layer violation to put the output of this
+         * whole-file header inside the per-block function like this,
+         * but in fact it has to be done here because the first reset
+         * table entry really is supposed to point to the _start_ of
+         * the whole-file header.
+         */
+        lzx_addsym(&header[0], LST_RAWBITS_BASE + 1, 0);
+        bs->first_block = FALSE;
+    }
+    lzx_addsym(&header[0], LST_RAWBITS_BASE + 3, blocktype);
+    lzx_addsym(&header[0], LST_RAWBITS_BASE + 24, blocksize);
+
+    /*
+     * Ensure the bit stream starts off aligned, and output an initial
+     * reset-table entry.
+     */
+    lzx_realign(bs);
+    lzx_write_reset_table_entry(bs);
+
+    /*
+     * Write out all of our symbol sequences in order: all of those
+     * assorted header fragments, then the main LZ77 token sequence.
+     */
+    for (i = 0; i < (int)lenof(header); i++)
+        lzx_huf_encode(header[i].syms, header[i].nsyms, hufs, bs);
+    lzx_huf_encode(syms, nsyms, hufs, bs);
+
+    /*
+     * Clean up.
+     */
+    for (i = 0; i < (int)lenof(header); i++)
+        sfree(header[i].syms);
+    for (i = 0; i < (int)lenof(hufs->hufs); i++) {
+        sfree(hufs->hufs[i].codes);
+        sfree(hufs->hufs[i].lengths);
+    }
+}
+
+struct LZXEncodedFile *lzx(const void *vdata, int totallen,
+                           int realign_interval, int reset_interval)
+{
+    const unsigned char *data = (const unsigned char *)vdata;
+    LZXBitstream bs;
+    LZXHufs hufs;
+    int i;
+
+    bs.ef = snew(struct LZXEncodedFile);
+    bs.ef->data = NULL;
+    bs.ef->reset_byte_offsets = NULL;
+    bs.ef->data_len = bs.data_size = 0;
+    bs.ef->n_resets = bs.resets_size = 0;
+    bs.bitbuffer = 0;
+    bs.nbits = 0;
+
+    for (i = 0; i < (int)lenof(hufs.hufs); i++)
+        hufs.hufs[i].oldlengths = NULL;
+
+    while (totallen > 0) {
+        int thislen =
+            totallen < reset_interval ? totallen : reset_interval;
+        LZXBuffer buf;
+
+        lzx_buffer_init(&buf);
+
+        lzx_lz77(&buf, data, thislen, realign_interval);
+        data += thislen;
+        totallen -= thislen;
+
+        /*
+         * Block boundaries are chosen completely trivially: since we
+         * have to terminate a block every time we reach the (fairly
+         * short) reset interval in any case, it doesn't hurt us much
+         * to just fix the assumption that every (reset_interval)
+         * bytes of the input turn into exactly one block, i.e. the
+         * whole of buf.syms that we just constructed is output in one
+         * go. We _could_ try improving on this by clever
+         * block-boundary heuristics, but I don't really think it's
+         * worth it.
+         */
+        bs.first_block = TRUE; /* reset every time we reset the LZ state */
+        lzx_encode_block(buf.syms, buf.nsyms, thislen, &hufs, &bs);
+
+        sfree(buf.syms);
+    }
+
+    for (i = 0; i < (int)lenof(hufs.hufs); i++)
+        sfree(hufs.hufs[i].oldlengths);
+
+    /* Realign to a 16-bit boundary, i.e. flush out any last few
+     * unwritten bits. */
+    lzx_realign(&bs);
+
+    return bs.ef;
+}
+
+#ifdef LZX_TEST
+/*
+gcc -g -O0 -DLZX_TEST -o lzxtest -Icharset lzx.c lz77.c huffman.c malloc.c
+*/
+#include <err.h>
+int main(int argc, char **argv)
+{
+    FILE *fp;
+    long insize;
+    unsigned char *inbuf;
+    struct LZXEncodedFile *ef;
+
+    if (argc != 3)
+        errx(1, "expected infile and outfile arguments");
+
+    fp = fopen(argv[1], "rb");
+    if (!fp)
+        err(1, "%s: open", argv[1]);
+    fseek(fp, 0, SEEK_END);
+    insize = ftell(fp);
+    rewind(fp);
+    inbuf = snewn(insize, unsigned char);
+    fread(inbuf, 1, insize, fp);
+    fclose(fp);
+
+    ef = lzx(inbuf, insize, 0x8000, 0x10000);
+
+    fp = fopen(argv[2], "wb");
+    if (!fp)
+        err(1, "%s: open", argv[2]);
+    fwrite(ef->data, 1, ef->data_len, fp);
+    fclose(fp);
+
+    sfree(ef->data);
+    sfree(ef->reset_byte_offsets);
+    sfree(ef);
+    sfree(inbuf);
+
+    return 0;
+}
+
+wchar_t *ustrdup(wchar_t const *s) { assert(0 && "should be unused"); }
+void fatalerr_nomemory(void) { errx(1, "out of memory"); }
+#endif