Factor LZ77 and Huffman routines out of deflate.c.

The general routines for analysing a buffer into an LZ77ish stream of literals and matches, and for constructing a Huffman tree in canonical format, now live in their own source files so that they can be reused for other similar compression formats. Deflate-specific details like the exact file encoding are left in deflate.c.
author: Simon Tatham <anakin@pobox.com> 2017-05-10 07:10:14 +0100
committer: Simon Tatham <anakin@pobox.com> 2017-05-13 18:22:09 +0100
commit: 715a3bef377aeee898c427be99b1acf440b4a5e5 (patch)
tree: 6ff1d6eecb42df91a4e174644adb0954da44d9fa
parent: e446ba3cf1f72dca390e9c9a5fe987f3dcccd440 (diff)
download: halibut-715a3bef377aeee898c427be99b1acf440b4a5e5.zip
halibut-715a3bef377aeee898c427be99b1acf440b4a5e5.tar.gz
halibut-715a3bef377aeee898c427be99b1acf440b4a5e5.tar.bz2
halibut-715a3bef377aeee898c427be99b1acf440b4a5e5.tar.xz
6 files changed, 702 insertions, 620 deletions
diff --git a/Makefile b/Makefile
index a60bac7..6264624 100644
--- a/Makefile
+++ b/Makefile
@@ -95,7 +95,7 @@ include $(LIBCHARSET_SRCDIR)Makefile
 MODULES := main malloc ustring error help licence version misc tree234
 MODULES += input in_afm in_pf in_sfnt keywords contents index biblio
 MODULES += bk_text bk_html bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf
-MODULES += winhelp deflate psdata wcwidth
+MODULES += winhelp deflate lz77 huffman psdata wcwidth
 
 OBJECTS := $(addsuffix .o,$(MODULES)) $(LIBCHARSET_OBJS)
 DEPS := $(addsuffix .d,$(MODULES))
diff --git a/deflate.c b/deflate.c
index 7c3f3bc..6619c2c 100644
--- a/deflate.c
+++ b/deflate.c
@@ -61,20 +61,11 @@
 #include <stdlib.h>
 #include <assert.h>
 
+#include "halibut.h"
+#include "huffman.h"
+#include "lz77.h"
 #include "deflate.h"
 
-#define snew(type) ( (type *) malloc(sizeof(type)) )
-#define snewn(n, type) ( (type *) malloc((n) * sizeof(type)) )
-#define sresize(x, n, type) ( (type *) realloc((x), (n) * sizeof(type)) )
-#define sfree(x) ( free((x)) )
-
-#define lenof(x) (sizeof((x)) / sizeof(*(x)))
-
-#ifndef FALSE
-#define FALSE 0
-#define TRUE (!FALSE)
-#endif
-
 /* ----------------------------------------------------------------------
  * This file can be compiled in a number of modes.
  * 
@@ -107,330 +98,28 @@ int analyse_level = 0;
 #endif
 
 /* ----------------------------------------------------------------------
- * Basic LZ77 code. This bit is designed modularly, so it could be
- * ripped out and used in a different LZ77 compressor. Go to it,
- * and good luck :-)
- */
-
-struct LZ77InternalContext;
-struct LZ77Context {
-    struct LZ77InternalContext *ictx;
-    void *userdata;
-    void (*literal) (struct LZ77Context * ctx, unsigned char c);
-    void (*match) (struct LZ77Context * ctx, int distance, int len);
-};
-
-/*
- * Initialise the private fields of an LZ77Context. It's up to the
- * user to initialise the public fields.
- */
-static int lz77_init(struct LZ77Context *ctx);
-
-/*
- * Supply data to be compressed. Will update the private fields of
- * the LZ77Context, and will call literal() and match() to output.
- * If `compress' is FALSE, it will never emit a match, but will
- * instead call literal() for everything.
- */
-static void lz77_compress(struct LZ77Context *ctx,
-			  const unsigned char *data, int len, int compress);
-
-/*
- * Modifiable parameters.
- */
-#define WINSIZE 32768		       /* window size. Must be power of 2! */
-#define HASHMAX 2039		       /* one more than max hash value */
-#define MAXMATCH 32		       /* how many matches we track */
-#define HASHCHARS 3		       /* how many chars make a hash */
-
-/*
- * This compressor takes a less slapdash approach than the
- * gzip/zlib one. Rather than allowing our hash chains to fall into
- * disuse near the far end, we keep them doubly linked so we can
- * _find_ the far end, and then every time we add a new byte to the
- * window (thus rolling round by one and removing the previous
- * byte), we can carefully remove the hash chain entry.
- */
-
-#define INVALID -1		       /* invalid hash _and_ invalid offset */
-struct WindowEntry {
-    short next, prev;		       /* array indices within the window */
-    short hashval;
-};
-
-struct HashEntry {
-    short first;		       /* window index of first in chain */
-};
-
-struct Match {
-    int distance, len;
-};
-
-struct LZ77InternalContext {
-    struct WindowEntry win[WINSIZE];
-    unsigned char data[WINSIZE];
-    int winpos;
-    struct HashEntry hashtab[HASHMAX];
-    unsigned char pending[HASHCHARS];
-    int npending;
-};
-
-static int lz77_hash(const unsigned char *data)
-{
-    return (257 * data[0] + 263 * data[1] + 269 * data[2]) % HASHMAX;
-}
-
-static int lz77_init(struct LZ77Context *ctx)
-{
-    struct LZ77InternalContext *st;
-    int i;
-
-    st = snew(struct LZ77InternalContext);
-    if (!st)
-	return 0;
-
-    ctx->ictx = st;
-
-    for (i = 0; i < WINSIZE; i++)
-	st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
-    for (i = 0; i < HASHMAX; i++)
-	st->hashtab[i].first = INVALID;
-    st->winpos = 0;
-
-    st->npending = 0;
-
-    return 1;
-}
-
-static void lz77_advance(struct LZ77InternalContext *st,
-			 unsigned char c, int hash)
-{
-    int off;
-
-    /*
-     * Remove the hash entry at winpos from the tail of its chain,
-     * or empty the chain if it's the only thing on the chain.
-     */
-    if (st->win[st->winpos].prev != INVALID) {
-	st->win[st->win[st->winpos].prev].next = INVALID;
-    } else if (st->win[st->winpos].hashval != INVALID) {
-	st->hashtab[st->win[st->winpos].hashval].first = INVALID;
-    }
-
-    /*
-     * Create a new entry at winpos and add it to the head of its
-     * hash chain.
-     */
-    st->win[st->winpos].hashval = hash;
-    st->win[st->winpos].prev = INVALID;
-    off = st->win[st->winpos].next = st->hashtab[hash].first;
-    st->hashtab[hash].first = st->winpos;
-    if (off != INVALID)
-	st->win[off].prev = st->winpos;
-    st->data[st->winpos] = c;
-
-    /*
-     * Advance the window pointer.
-     */
-    st->winpos = (st->winpos + 1) & (WINSIZE - 1);
-}
-
-#define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)&(WINSIZE-1)] : data[k] )
-
-static void lz77_compress(struct LZ77Context *ctx,
-			  const unsigned char *data, int len, int compress)
-{
-    struct LZ77InternalContext *st = ctx->ictx;
-    int i, hash, distance, off, nmatch, matchlen, advance;
-    struct Match defermatch, matches[MAXMATCH];
-    int deferchr;
-
-    /*
-     * Add any pending characters from last time to the window. (We
-     * might not be able to.)
-     */
-    for (i = 0; i < st->npending; i++) {
-	unsigned char foo[HASHCHARS];
-	int j;
-	if (len + st->npending - i < HASHCHARS) {
-	    /* Update the pending array. */
-	    for (j = i; j < st->npending; j++)
-		st->pending[j - i] = st->pending[j];
-	    break;
-	}
-	for (j = 0; j < HASHCHARS; j++)
-	    foo[j] = (i + j < st->npending ? st->pending[i + j] :
-		      data[i + j - st->npending]);
-	lz77_advance(st, foo[0], lz77_hash(foo));
-    }
-    st->npending -= i;
-
-    defermatch.len = 0;
-    deferchr = '\0';
-    while (len > 0) {
-
-	/* Don't even look for a match, if we're not compressing. */
-	if (compress && len >= HASHCHARS) {
-	    /*
-	     * Hash the next few characters.
-	     */
-	    hash = lz77_hash(data);
-
-	    /*
-	     * Look the hash up in the corresponding hash chain and see
-	     * what we can find.
-	     */
-	    nmatch = 0;
-	    for (off = st->hashtab[hash].first;
-		 off != INVALID; off = st->win[off].next) {
-		/* distance = 1       if off == st->winpos-1 */
-		/* distance = WINSIZE if off == st->winpos   */
-		distance =
-		    WINSIZE - (off + WINSIZE - st->winpos) % WINSIZE;
-		for (i = 0; i < HASHCHARS; i++)
-		    if (CHARAT(i) != CHARAT(i - distance))
-			break;
-		if (i == HASHCHARS) {
-		    matches[nmatch].distance = distance;
-		    matches[nmatch].len = 3;
-		    if (++nmatch >= MAXMATCH)
-			break;
-		}
-	    }
-	} else {
-	    nmatch = 0;
-	    hash = INVALID;
-	}
-
-	if (nmatch > 0) {
-	    /*
-	     * We've now filled up matches[] with nmatch potential
-	     * matches. Follow them down to find the longest. (We
-	     * assume here that it's always worth favouring a
-	     * longer match over a shorter one.)
-	     */
-	    matchlen = HASHCHARS;
-	    while (matchlen < len) {
-		int j;
-		for (i = j = 0; i < nmatch; i++) {
-		    if (CHARAT(matchlen) ==
-			CHARAT(matchlen - matches[i].distance)) {
-			matches[j++] = matches[i];
-		    }
-		}
-		if (j == 0)
-		    break;
-		matchlen++;
-		nmatch = j;
-	    }
-
-	    /*
-	     * We've now got all the longest matches. We favour the
-	     * shorter distances, which means we go with matches[0].
-	     * So see if we want to defer it or throw it away.
-	     */
-	    matches[0].len = matchlen;
-	    if (defermatch.len > 0) {
-		if (matches[0].len > defermatch.len + 1) {
-		    /* We have a better match. Emit the deferred char,
-		     * and defer this match. */
-		    ctx->literal(ctx, (unsigned char) deferchr);
-		    defermatch = matches[0];
-		    deferchr = data[0];
-		    advance = 1;
-		} else {
-		    /* We don't have a better match. Do the deferred one. */
-		    ctx->match(ctx, defermatch.distance, defermatch.len);
-		    advance = defermatch.len - 1;
-		    defermatch.len = 0;
-		}
-	    } else {
-		/* There was no deferred match. Defer this one. */
-		defermatch = matches[0];
-		deferchr = data[0];
-		advance = 1;
-	    }
-	} else {
-	    /*
-	     * We found no matches. Emit the deferred match, if
-	     * any; otherwise emit a literal.
-	     */
-	    if (defermatch.len > 0) {
-		ctx->match(ctx, defermatch.distance, defermatch.len);
-		advance = defermatch.len - 1;
-		defermatch.len = 0;
-	    } else {
-		ctx->literal(ctx, data[0]);
-		advance = 1;
-	    }
-	}
-
-	/*
-	 * Now advance the position by `advance' characters,
-	 * keeping the window and hash chains consistent.
-	 */
-	while (advance > 0) {
-	    if (len >= HASHCHARS) {
-		lz77_advance(st, *data, lz77_hash(data));
-	    } else {
-		st->pending[st->npending++] = *data;
-	    }
-	    data++;
-	    len--;
-	    advance--;
-	}
-    }
-}
-
-/* ----------------------------------------------------------------------
  * Deflate functionality common to both compression and decompression.
  */
 
+#define DWINSIZE 32768
+
 static const unsigned char lenlenmap[] = {
     16, 17, 18, 0, 8, 7, 9, 6, 10, 5, 11, 4, 12, 3, 13, 2, 14, 1, 15
 };
 
-#define MAXCODELEN 16
-
 /*
- * Given a sequence of Huffman code lengths, compute the actual
- * codes, in the final form suitable for feeding to outbits (i.e.
- * already bit-mirrored).
- *
- * Returns the maximum code length found. Can also return -1 to
- * indicate the table was overcommitted (too many or too short
- * codes to exactly cover the possible space), or -2 to indicate it
- * was undercommitted (too few or too long codes).
+ * Given a sequence of Huffman code lengths, compute the actual codes
+ * in the final form suitable for feeding to outbits (i.e. already
+ * bit-mirrored). Returns the same as compute_huffman_codes.
  */
-static int hufcodes(const unsigned char *lengths, int *codes, int nsyms)
+static int deflate_hufcodes(const unsigned char *lengths,
+                            int *codes, int nsyms)
 {
-    int count[MAXCODELEN], startcode[MAXCODELEN];
-    int code, maxlen;
+    int maxlen = compute_huffman_codes(lengths, codes, nsyms);
     int i, j;
 
-    /* Count the codes of each length. */
-    maxlen = 0;
-    for (i = 1; i < MAXCODELEN; i++)
-	count[i] = 0;
-    for (i = 0; i < nsyms; i++) {
-	count[lengths[i]]++;
-	if (maxlen < lengths[i])
-	    maxlen = lengths[i];
-    }
-    /* Determine the starting code for each length block. */
-    code = 0;
-    for (i = 1; i < MAXCODELEN; i++) {
-	startcode[i] = code;
-	code += count[i];
-	if (code > (1 << i))
-	    maxlen = -1;	       /* overcommitted */
-	code <<= 1;
-    }
-    if (code < (1 << MAXCODELEN))
-	maxlen = -2;		       /* undercommitted */
-    /* Determine the code for each symbol. Mirrored, of course. */
     for (i = 0; i < nsyms; i++) {
-	code = startcode[lengths[i]]++;
+        int code = codes[i];
 	codes[i] = 0;
 	for (j = 0; j < lengths[i]; j++) {
 	    codes[i] = (codes[i] << 1) | (code & 1);
@@ -680,289 +369,6 @@ static void outbits(deflate_compress_ctx *out,
 }
 
 /*
- * Binary heap functions used by buildhuf(). Each one assumes the
- * heap to be stored in an array of ints, with two ints per node
- * (user data and key). They take in the old heap length, and
- * return the new one.
- */
-#define HEAPPARENT(x) (((x)-2)/4*2)
-#define HEAPLEFT(x) ((x)*2+2)
-#define HEAPRIGHT(x) ((x)*2+4)
-static int addheap(int *heap, int len, int userdata, int key)
-{
-    int me, dad, tmp;
-
-    me = len;
-    heap[len++] = userdata;
-    heap[len++] = key;
-
-    while (me > 0) {
-	dad = HEAPPARENT(me);
-	if (heap[me+1] < heap[dad+1]) {
-	    tmp = heap[me]; heap[me] = heap[dad]; heap[dad] = tmp;
-	    tmp = heap[me+1]; heap[me+1] = heap[dad+1]; heap[dad+1] = tmp;
-	    me = dad;
-	} else
-	    break;
-    }
-
-    return len;
-}
-static int rmheap(int *heap, int len, int *userdata, int *key)
-{
-    int me, lc, rc, c, tmp;
-
-    len -= 2;
-    *userdata = heap[0];
-    *key = heap[1];
-    heap[0] = heap[len];
-    heap[1] = heap[len+1];
-
-    me = 0;
-
-    while (1) {
-	lc = HEAPLEFT(me);
-	rc = HEAPRIGHT(me);
-	if (lc >= len)
-	    break;
-	else if (rc >= len || heap[lc+1] < heap[rc+1])
-	    c = lc;
-	else
-	    c = rc;
-	if (heap[me+1] > heap[c+1]) {
-	    tmp = heap[me]; heap[me] = heap[c]; heap[c] = tmp;
-	    tmp = heap[me+1]; heap[me+1] = heap[c+1]; heap[c+1] = tmp;
-	} else
-	    break;
-	me = c;
-    }
-
-    return len;
-}
-
-/*
- * The core of the Huffman algorithm: takes an input array of
- * symbol frequencies, and produces an output array of code
- * lengths.
- *
- * This is basically a generic Huffman implementation, but it has
- * one zlib-related quirk which is that it caps the output code
- * lengths to fit in an unsigned char (which is safe since Deflate
- * will reject anything longer than 15 anyway). Anyone wanting to
- * rip it out and use it in another context should find that easy
- * to remove.
- */
-#define HUFMAX 286
-static void buildhuf(const int *freqs, unsigned char *lengths, int nsyms)
-{
-    int parent[2*HUFMAX-1];
-    int length[2*HUFMAX-1];
-    int heap[2*HUFMAX];
-    int heapsize;
-    int i, j, n;
-    int si, sj;
-
-    assert(nsyms <= HUFMAX);
-
-    memset(parent, 0, sizeof(parent));
-
-    /*
-     * Begin by building the heap.
-     */
-    heapsize = 0;
-    for (i = 0; i < nsyms; i++)
-	if (freqs[i] > 0)	       /* leave unused symbols out totally */
-	    heapsize = addheap(heap, heapsize, i, freqs[i]);
-
-    /*
-     * Now repeatedly take two elements off the heap and merge
-     * them.
-     */
-    n = HUFMAX;
-    while (heapsize > 2) {
-	heapsize = rmheap(heap, heapsize, &i, &si);
-	heapsize = rmheap(heap, heapsize, &j, &sj);
-	parent[i] = n;
-	parent[j] = n;
-	heapsize = addheap(heap, heapsize, n, si + sj);
-	n++;
-    }
-
-    /*
-     * Now we have our tree, in the form of a link from each node
-     * to the index of its parent. Count back down the tree to
-     * determine the code lengths.
-     */
-    memset(length, 0, sizeof(length));
-    /* The tree root has length 0 after that, which is correct. */
-    for (i = n-1; i-- ;)
-	if (parent[i] > 0)
-	    length[i] = 1 + length[parent[i]];
-
-    /*
-     * And that's it. (Simple, wasn't it?) Copy the lengths into
-     * the output array and leave.
-     * 
-     * Here we cap lengths to fit in unsigned char.
-     */
-    for (i = 0; i < nsyms; i++)
-	lengths[i] = (length[i] > 255 ? 255 : length[i]);
-}
-
-/*
- * Wrapper around buildhuf() which enforces the Deflate restriction
- * that no code length may exceed 15 bits, or 7 for the auxiliary
- * code length alphabet. This function has the same calling
- * semantics as buildhuf(), except that it might modify the freqs
- * array.
- */
-static void deflate_buildhuf(int *freqs, unsigned char *lengths,
-			     int nsyms, int limit)
-{
-    int smallestfreq, totalfreq, nactivesyms;
-    int num, denom, adjust;
-    int i;
-    int maxprob;
-
-    /*
-     * Nasty special case: if the frequency table has fewer than
-     * two non-zero elements, we must invent some, because we can't
-     * have fewer than one bit encoding a symbol.
-     */
-    assert(nsyms >= 2);
-    {
-	int count = 0;
-	for (i = 0; i < nsyms; i++)
-	    if (freqs[i] > 0)
-		count++;
-	if (count < 2) {
-	    for (i = 0; i < nsyms && count > 0; i++)
-		if (freqs[i] == 0) {
-		    freqs[i] = 1;
-		    count--;
-		}
-	}
-    }
-
-    /*
-     * First, try building the Huffman table the normal way. If
-     * this works, it's optimal, so we don't want to mess with it.
-     */
-    buildhuf(freqs, lengths, nsyms);
-
-    for (i = 0; i < nsyms; i++)
-	if (lengths[i] > limit)
-	    break;
-
-    if (i == nsyms)
-	return;			       /* OK */
-
-    /*
-     * The Huffman algorithm can only ever generate a code length
-     * of N bits or more if there is a symbol whose probability is
-     * less than the reciprocal of the (N+2)th Fibonacci number
-     * (counting from F_0=0 and F_1=1), i.e. 1/2584 for N=16, or
-     * 1/55 for N=8. (This is a necessary though not sufficient
-     * condition.)
-     *
-     * Why is this? Well, consider the input symbol with the
-     * smallest probability. Let that probability be x. In order
-     * for this symbol to have a code length of at least 1, the
-     * Huffman algorithm will have to merge it with some other
-     * node; and since x is the smallest probability, the node it
-     * gets merged with must be at least x. Thus, the probability
-     * of the resulting combined node will be at least 2x. Now in
-     * order for our node to reach depth 2, this 2x-node must be
-     * merged again. But what with? We can't assume the node it
-     * merges with is at least 2x, because this one might only be
-     * the _second_ smallest remaining node. But we do know the
-     * node it merges with must be at least x, so our order-2
-     * internal node is at least 3x.
-     *
-     * How small a node can merge with _that_ to get an order-3
-     * internal node? Well, it must be at least 2x, because if it
-     * was smaller than that then it would have been one of the two
-     * smallest nodes in the previous step and been merged at that
-     * point. So at least 3x, plus at least 2x, comes to at least
-     * 5x for an order-3 node.
-     *
-     * And so it goes on: at every stage we must merge our current
-     * node with a node at least as big as the bigger of this one's
-     * two parents, and from this starting point that gives rise to
-     * the Fibonacci sequence. So we find that in order to have a
-     * node n levels deep (i.e. a maximum code length of n), the
-     * overall probability of the root of the entire tree must be
-     * at least F_{n+2} times the probability of the rarest symbol.
-     * In other words, since the overall probability is 1, it is a
-     * necessary condition for a code length of 16 or more that
-     * there must be at least one symbol with probability <=
-     * 1/F_18.
-     *
-     * (To demonstrate that a probability this big really can give
-     * rise to a code length of 16, consider the set of input
-     * frequencies { 1-epsilon, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55,
-     * 89, 144, 233, 377, 610, 987 }, for arbitrarily small
-     * epsilon.)
-     *
-     * So here buildhuf() has returned us an overlong code. So to
-     * ensure it doesn't do it again, we add a constant to all the
-     * (non-zero) symbol frequencies, causing them to become more
-     * balanced and removing the danger. We can then feed the
-     * results back to the standard buildhuf() and be
-     * assert()-level confident that the resulting code lengths
-     * contain nothing outside the permitted range.
-     */
-    assert(limit == 15 || limit == 7);
-    maxprob = (limit == 15 ? 2584 : 55);   /* no point in computing full F_n */
-    totalfreq = nactivesyms = 0;
-    smallestfreq = -1;
-    for (i = 0; i < nsyms; i++) {
-	if (freqs[i] == 0)
-	    continue;
-	if (smallestfreq < 0 || smallestfreq > freqs[i])
-	    smallestfreq = freqs[i];
-	totalfreq += freqs[i];
-	nactivesyms++;
-    }
-    assert(smallestfreq <= totalfreq / maxprob);
-
-    /*
-     * We want to find the smallest integer `adjust' such that
-     * (totalfreq + nactivesyms * adjust) / (smallestfreq +
-     * adjust) is less than maxprob. A bit of algebra tells us
-     * that the threshold value is equal to
-     *
-     *   totalfreq - maxprob * smallestfreq
-     *   ----------------------------------
-     *          maxprob - nactivesyms
-     *
-     * rounded up, of course. And we'll only even be trying
-     * this if
-     */
-    num = totalfreq - smallestfreq * maxprob;
-    denom = maxprob - nactivesyms;
-    adjust = (num + denom - 1) / denom;
-
-    /*
-     * Now add `adjust' to all the input symbol frequencies.
-     */
-    for (i = 0; i < nsyms; i++)
-	if (freqs[i] != 0)
-	    freqs[i] += adjust;
-
-    /*
-     * Rebuild the Huffman tree...
-     */
-    buildhuf(freqs, lengths, nsyms);
-
-    /*
-     * ... and this time it ought to be OK.
-     */
-    for (i = 0; i < nsyms; i++)
-	assert(lengths[i] <= limit);
-}
-
-/*
  * Compute the bit length of a symbol, given the three Huffman
  * trees.
  */
@@ -1079,10 +485,10 @@ static void outblock(deflate_compress_ctx *out,
 	    freqs2[sym]++;
 	}
     }
-    deflate_buildhuf(freqs1, len1, lenof(freqs1), 15);
-    deflate_buildhuf(freqs2, len2, lenof(freqs2), 15);
-    hufcodes(len1, code1, lenof(freqs1));
-    hufcodes(len2, code2, lenof(freqs2));
+    build_huffman_tree(freqs1, len1, lenof(freqs1), 15);
+    build_huffman_tree(freqs2, len2, lenof(freqs2), 15);
+    deflate_hufcodes(len1, code1, lenof(freqs1));
+    deflate_hufcodes(len2, code2, lenof(freqs2));
 
     /*
      * Determine HLIT and HDIST.
@@ -1194,8 +600,8 @@ static void outblock(deflate_compress_ctx *out,
 	    freqs3[sym]++;
 	}
     }
-    deflate_buildhuf(freqs3, len3, lenof(freqs3), 7);
-    hufcodes(len3, code3, lenof(freqs3));
+    build_huffman_tree(freqs3, len3, lenof(freqs3), 7);
+    deflate_hufcodes(len3, code3, lenof(freqs3));
 
     /*
      * Reorder the code length codes into transmission order, and
@@ -1550,7 +956,7 @@ deflate_compress_ctx *deflate_compress_new(int type)
     deflate_compress_ctx *out;
     struct LZ77Context *ectx = snew(struct LZ77Context);
 
-    lz77_init(ectx);
+    lz77_init(ectx, DWINSIZE);
     ectx->literal = literal;
     ectx->match = match;
 
@@ -1584,8 +990,10 @@ deflate_compress_ctx *deflate_compress_new(int type)
 	for (i = 0; i < (int)lenof(out->static_len2); i++)
 	    out->static_len2[i] = 5;
     }
-    hufcodes(out->static_len1, out->static_code1, lenof(out->static_code1));
-    hufcodes(out->static_len2, out->static_code2, lenof(out->static_code2));
+    deflate_hufcodes(out->static_len1, out->static_code1,
+                     lenof(out->static_code1));
+    deflate_hufcodes(out->static_len2, out->static_code2,
+                     lenof(out->static_code2));
     out->sht.len_litlen = out->static_len1;
     out->sht.len_dist = out->static_len2;
     out->sht.len_codelen = NULL;
@@ -1604,7 +1012,7 @@ void deflate_compress_free(deflate_compress_ctx *out)
     struct LZ77Context *ectx = out->lzc;
 
     sfree(out->syms);
-    sfree(ectx->ictx);
+    lz77_cleanup(ectx);
     sfree(ectx);
     sfree(out);
 }
@@ -1773,8 +1181,6 @@ struct table {
 
 #define MAXSYMS 288
 
-#define DWINSIZE 32768
-
 /*
  * Build a single-level decode table for elements
  * [minlength,maxlength) of the provided code/length tables, and
@@ -1852,7 +1258,7 @@ static struct table *mktable(unsigned char *lengths, int nlengths,
     }
 #endif
 
-    maxlen = hufcodes(lengths, codes, nlengths);
+    maxlen = deflate_hufcodes(lengths, codes, nlengths);
 
     if (maxlen < 0) {
 	*error = (maxlen == -1 ? DEFLATE_ERR_LARGE_HUFTABLE :
diff --git a/huffman.c b/huffman.c
new file mode 100644
index 0000000..3fada10
--- /dev/null
+++ b/huffman.c
@@ -0,0 +1,345 @@
+/*
+ * huffman.c: implementation of huffman.h.
+ */
+
+#include <assert.h>
+
+#include "halibut.h"
+#include "huffman.h"
+
+static const unsigned fibonacci[] = {
+    0, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55, 89, 144, 233, 377, 610, 987, 1597,
+    2584, 4181, 6765, 10946, 17711, 28657, 46368, 75025, 121393, 196418,
+    317811, 514229, 832040, 1346269, 2178309, 3524578, 5702887, 9227465,
+    14930352, 24157817, 39088169, 63245986, 102334155, 165580141, 267914296,
+    433494437, 701408733, 1134903170, 1836311903, 2971215073,
+};
+
+/*
+ * Binary heap functions used by buildhuf(). Each one assumes the
+ * heap to be stored in an array of ints, with two ints per node
+ * (user data and key). They take in the old heap length, and
+ * return the new one.
+ */
+#define HEAPPARENT(x) (((x)-2)/4*2)
+#define HEAPLEFT(x) ((x)*2+2)
+#define HEAPRIGHT(x) ((x)*2+4)
+static int addheap(int *heap, int len, int userdata, int key)
+{
+    int me, dad, tmp;
+
+    me = len;
+    heap[len++] = userdata;
+    heap[len++] = key;
+
+    while (me > 0) {
+	dad = HEAPPARENT(me);
+	if (heap[me+1] < heap[dad+1]) {
+	    tmp = heap[me]; heap[me] = heap[dad]; heap[dad] = tmp;
+	    tmp = heap[me+1]; heap[me+1] = heap[dad+1]; heap[dad+1] = tmp;
+	    me = dad;
+	} else
+	    break;
+    }
+
+    return len;
+}
+static int rmheap(int *heap, int len, int *userdata, int *key)
+{
+    int me, lc, rc, c, tmp;
+
+    len -= 2;
+    *userdata = heap[0];
+    *key = heap[1];
+    heap[0] = heap[len];
+    heap[1] = heap[len+1];
+
+    me = 0;
+
+    while (1) {
+	lc = HEAPLEFT(me);
+	rc = HEAPRIGHT(me);
+	if (lc >= len)
+	    break;
+	else if (rc >= len || heap[lc+1] < heap[rc+1])
+	    c = lc;
+	else
+	    c = rc;
+	if (heap[me+1] > heap[c+1]) {
+	    tmp = heap[me]; heap[me] = heap[c]; heap[c] = tmp;
+	    tmp = heap[me+1]; heap[me+1] = heap[c+1]; heap[c+1] = tmp;
+	} else
+	    break;
+	me = c;
+    }
+
+    return len;
+}
+
+struct hufscratch {
+    int *parent, *length, *heap;
+};
+
+/*
+ * The core of the Huffman algorithm: takes an input array of
+ * symbol frequencies, and produces an output array of code
+ * lengths.
+ *
+ * We cap the output code lengths to fit in an unsigned char (which is
+ * safe since our clients will impose some smaller limit on code
+ * length anyway). So if you see 255 in the output, it means '255 or
+ * more' and is a sign that whatever limit you really wanted has
+ * certainly been overflowed.
+ */
+static void buildhuf(struct hufscratch *sc, const int *freqs,
+                     unsigned char *lengths, int nsyms)
+{
+    int heapsize;
+    int i, j, n;
+    int si, sj;
+
+    for (i = 0; i < nsyms; i++)
+        sc->parent[i] = 0;
+
+    /*
+     * Begin by building the heap.
+     */
+    heapsize = 0;
+    for (i = 0; i < nsyms; i++)
+	if (freqs[i] > 0)	       /* leave unused symbols out totally */
+	    heapsize = addheap(sc->heap, heapsize, i, freqs[i]);
+
+    /*
+     * Now repeatedly take two elements off the heap and merge
+     * them.
+     */
+    n = nsyms;
+    while (heapsize > 2) {
+	heapsize = rmheap(sc->heap, heapsize, &i, &si);
+	heapsize = rmheap(sc->heap, heapsize, &j, &sj);
+	sc->parent[i] = n;
+	sc->parent[j] = n;
+	heapsize = addheap(sc->heap, heapsize, n, si + sj);
+	n++;
+    }
+
+    /*
+     * Now we have our tree, in the form of a link from each node
+     * to the index of its parent. Count back down the tree to
+     * determine the code lengths.
+     */
+    for (i = 0; i < 2*nsyms+1; i++)
+        sc->length[i] = 0;
+    /* The tree root has length 0 after that, which is correct. */
+    for (i = n-1; i-- ;)
+	if (sc->parent[i] > 0)
+	    sc->length[i] = 1 + sc->length[sc->parent[i]];
+
+    /*
+     * And that's it. (Simple, wasn't it?) Copy the lengths into
+     * the output array and leave.
+     * 
+     * Here we cap lengths to fit in unsigned char.
+     */
+    for (i = 0; i < nsyms; i++)
+	lengths[i] = (sc->length[i] > 255 ? 255 : sc->length[i]);
+}
+
+/*
+ * Wrapper around buildhuf() which enforces the restriction on code
+ * length.
+ */
+void build_huffman_tree(int *freqs, unsigned char *lengths,
+                        int nsyms, int limit)
+{
+    struct hufscratch hsc, *sc = &hsc;
+    int smallestfreq, totalfreq, nactivesyms;
+    int num, denom, adjust;
+    int i;
+    int maxprob;
+
+    sc->parent = snewn(2*nsyms+1, int);
+    sc->length = snewn(2*nsyms+1, int);
+    sc->heap = snewn(2*nsyms, int);
+
+    /*
+     * Nasty special case: if the frequency table has fewer than
+     * two non-zero elements, we must invent some, because we can't
+     * have fewer than one bit encoding a symbol.
+     */
+    assert(nsyms >= 2);
+    {
+	int count = 0;
+	for (i = 0; i < nsyms; i++)
+	    if (freqs[i] > 0)
+		count++;
+	if (count < 2) {
+	    for (i = 0; i < nsyms && count > 0; i++)
+		if (freqs[i] == 0) {
+		    freqs[i] = 1;
+		    count--;
+		}
+	}
+    }
+
+    /*
+     * First, try building the Huffman table the normal way. If
+     * this works, it's optimal, so we don't want to mess with it.
+     */
+    buildhuf(sc, freqs, lengths, nsyms);
+
+    for (i = 0; i < nsyms; i++)
+	if (lengths[i] > limit)
+	    break;
+
+    if (i == nsyms)
+        goto cleanup;                  /* OK */
+
+    /*
+     * The Huffman algorithm can only ever generate a code length
+     * of N bits or more if there is a symbol whose probability is
+     * less than the reciprocal of the (N+2)th Fibonacci number
+     * (counting from F_0=0 and F_1=1), i.e. 1/2584 for N=16, or
+     * 1/55 for N=8. (This is a necessary though not sufficient
+     * condition.)
+     *
+     * Why is this? Well, consider the input symbol with the
+     * smallest probability. Let that probability be x. In order
+     * for this symbol to have a code length of at least 1, the
+     * Huffman algorithm will have to merge it with some other
+     * node; and since x is the smallest probability, the node it
+     * gets merged with must be at least x. Thus, the probability
+     * of the resulting combined node will be at least 2x. Now in
+     * order for our node to reach depth 2, this 2x-node must be
+     * merged again. But what with? We can't assume the node it
+     * merges with is at least 2x, because this one might only be
+     * the _second_ smallest remaining node. But we do know the
+     * node it merges with must be at least x, so our order-2
+     * internal node is at least 3x.
+     *
+     * How small a node can merge with _that_ to get an order-3
+     * internal node? Well, it must be at least 2x, because if it
+     * was smaller than that then it would have been one of the two
+     * smallest nodes in the previous step and been merged at that
+     * point. So at least 3x, plus at least 2x, comes to at least
+     * 5x for an order-3 node.
+     *
+     * And so it goes on: at every stage we must merge our current
+     * node with a node at least as big as the bigger of this one's
+     * two parents, and from this starting point that gives rise to
+     * the Fibonacci sequence. So we find that in order to have a
+     * node n levels deep (i.e. a maximum code length of n), the
+     * overall probability of the root of the entire tree must be
+     * at least F_{n+2} times the probability of the rarest symbol.
+     * In other words, since the overall probability is 1, it is a
+     * necessary condition for a code length of 16 or more that
+     * there must be at least one symbol with probability <=
+     * 1/F_18.
+     *
+     * (To demonstrate that a probability this big really can give
+     * rise to a code length of 16, consider the set of input
+     * frequencies { 1-epsilon, 1, 1, 2, 3, 5, 8, 13, 21, 34, 55,
+     * 89, 144, 233, 377, 610, 987 }, for arbitrarily small
+     * epsilon.)
+     *
+     * So here buildhuf() has returned us an overlong code. So to
+     * ensure it doesn't do it again, we add a constant to all the
+     * (non-zero) symbol frequencies, causing them to become more
+     * balanced and removing the danger. We can then feed the
+     * results back to the standard buildhuf() and be
+     * assert()-level confident that the resulting code lengths
+     * contain nothing outside the permitted range.
+     */
+    assert(limit+3 < (int)lenof(fibonacci));
+    maxprob = fibonacci[limit+3];
+    totalfreq = nactivesyms = 0;
+    smallestfreq = -1;
+    for (i = 0; i < nsyms; i++) {
+	if (freqs[i] == 0)
+	    continue;
+	if (smallestfreq < 0 || smallestfreq > freqs[i])
+	    smallestfreq = freqs[i];
+	totalfreq += freqs[i];
+	nactivesyms++;
+    }
+    assert(smallestfreq <= totalfreq / maxprob);
+
+    /*
+     * We want to find the smallest integer `adjust' such that
+     * (totalfreq + nactivesyms * adjust) / (smallestfreq +
+     * adjust) is less than maxprob. A bit of algebra tells us
+     * that the threshold value is equal to
+     *
+     *   totalfreq - maxprob * smallestfreq
+     *   ----------------------------------
+     *          maxprob - nactivesyms
+     *
+     * rounded up, of course. And we'll only even be trying
+     * this if
+     */
+    num = totalfreq - smallestfreq * maxprob;
+    denom = maxprob - nactivesyms;
+    adjust = (num + denom - 1) / denom;
+
+    /*
+     * Now add `adjust' to all the input symbol frequencies.
+     */
+    for (i = 0; i < nsyms; i++)
+	if (freqs[i] != 0)
+	    freqs[i] += adjust;
+
+    /*
+     * Rebuild the Huffman tree...
+     */
+    buildhuf(sc, freqs, lengths, nsyms);
+
+    /*
+     * ... and this time it ought to be OK.
+     */
+    for (i = 0; i < nsyms; i++)
+	assert(lengths[i] <= limit);
+
+  cleanup:
+    /*
+     * Finally, free our scratch space.
+     */
+    sfree(sc->parent);
+    sfree(sc->length);
+    sfree(sc->heap);
+}
+
+#define MAXCODELEN 31                  /* codes must fit in an int */
+
+int compute_huffman_codes(const unsigned char *lengths, int *codes, int nsyms)
+{
+    unsigned count[MAXCODELEN], startcode[MAXCODELEN], code;
+    int maxlen, i;
+
+    /* Count the codes of each length. */
+    maxlen = 0;
+    for (i = 1; i < MAXCODELEN; i++)
+	count[i] = 0;
+    for (i = 0; i < nsyms; i++) {
+	count[lengths[i]]++;
+	if (maxlen < lengths[i])
+	    maxlen = lengths[i];
+    }
+
+    /* Determine the starting code for each length block. */
+    code = 0;
+    for (i = 1; i < MAXCODELEN; i++) {
+	startcode[i] = code;
+	code += count[i];
+	if (code > (1U << i))
+	    maxlen = -1;	       /* overcommitted */
+	code <<= 1;
+    }
+    if (code < (1U << MAXCODELEN))
+	maxlen = -2;		       /* undercommitted */
+
+    /* Determine the code for each symbol. */
+    for (i = 0; i < nsyms; i++)
+	codes[i] = startcode[lengths[i]]++;
+
+    return maxlen;
+}
diff --git a/huffman.h b/huffman.h
new file mode 100644
index 0000000..91faa8e
--- /dev/null
+++ b/huffman.h
@@ -0,0 +1,33 @@
+/*
+ * huffman.h: Huffman tree-building routines common to Deflate and LZX.
+ */
+
+/*
+ * Take an input array 'freqs' of size 'nsyms' giving each symbol's
+ * frequency. Return an output array 'lengths' of the same size giving
+ * each symbol's code length. Enforce during construction that no code
+ * length is greater than 'limit'.
+ *
+ * The 'freqs' array may be modified, as a side effect of the limit
+ * enforcement.
+ */
+void build_huffman_tree(int *freqs, unsigned char *lengths,
+                        int nsyms, int limit);
+
+/*
+ * Given a sequence of Huffman code lengths, compute the actual code
+ * values. Each one is returned in codes[i] and occupies the bottom
+ * lengths[i] bits of the integer. They are in natural big-endian
+ * format, i.e. the initial bit of the code, governing the choice of
+ * direction at the root node of the notional decode tree, is in the
+ * most significant bit position.
+ *
+ * Returns the maximum code length found. Can also return -1 to
+ * indicate the table was overcommitted (too many or too short codes
+ * to exactly cover the possible space), which is a fatal error (the
+ * output codes will not form a usable Huffman tree), or -2 to
+ * indicate it was undercommitted (too few or too long codes), which
+ * is a non-fatal error (the resulting tree would be usable, just
+ * inefficient).
+ */
+int compute_huffman_codes(const unsigned char *lengths, int *codes, int nsyms);
diff --git a/lz77.c b/lz77.c
new file mode 100644
index 0000000..c0528a1
--- /dev/null
+++ b/lz77.c
@@ -0,0 +1,263 @@
+/*
+ * lz77.c: common LZ77 compression code between Deflate and LZX.
+ */
+
+#include "halibut.h" /* only for snew, sfree etc */
+#include "lz77.h"
+
+/*
+ * Modifiable parameters.
+ */
+#define HASHMAX 2039		       /* one more than max hash value */
+#define MAXMATCH 32		       /* how many matches we track */
+#define HASHCHARS 3		       /* how many chars make a hash */
+
+/*
+ * This compressor takes a less slapdash approach than the
+ * gzip/zlib one. Rather than allowing our hash chains to fall into
+ * disuse near the far end, we keep them doubly linked so we can
+ * _find_ the far end, and then every time we add a new byte to the
+ * window (thus rolling round by one and removing the previous
+ * byte), we can carefully remove the hash chain entry.
+ */
+
+#define INVALID -1		       /* invalid hash _and_ invalid offset */
+struct WindowEntry {
+    short next, prev;		       /* array indices within the window */
+    short hashval;
+};
+
+struct HashEntry {
+    short first;		       /* window index of first in chain */
+};
+
+struct Match {
+    int distance, len;
+};
+
+struct LZ77InternalContext {
+    int winsize;
+    struct WindowEntry *win; /* [winsize] */
+    unsigned char *data; /* [winsize] */
+
+    int winpos;
+    struct HashEntry hashtab[HASHMAX];
+    unsigned char pending[HASHCHARS];
+    int npending;
+};
+
+static int lz77_hash(const unsigned char *data)
+{
+    return (257 * data[0] + 263 * data[1] + 269 * data[2]) % HASHMAX;
+}
+
+void lz77_init(struct LZ77Context *ctx, int winsize)
+{
+    struct LZ77InternalContext *st;
+    int i;
+
+    st = snew(struct LZ77InternalContext);
+
+    ctx->ictx = st;
+
+    st->winsize = winsize;
+    st->win = snewn(st->winsize, struct WindowEntry);
+    st->data = snewn(st->winsize, unsigned char);
+
+    for (i = 0; i < st->winsize; i++)
+	st->win[i].next = st->win[i].prev = st->win[i].hashval = INVALID;
+    for (i = 0; i < HASHMAX; i++)
+	st->hashtab[i].first = INVALID;
+    st->winpos = 0;
+
+    st->npending = 0;
+}
+
+void lz77_cleanup(struct LZ77Context *ctx)
+{
+    struct LZ77InternalContext *st = ctx->ictx;
+    sfree(st->win);
+    sfree(st->data);
+    sfree(st);
+}
+
+static void lz77_advance(struct LZ77InternalContext *st,
+			 unsigned char c, int hash)
+{
+    int off;
+
+    /*
+     * Remove the hash entry at winpos from the tail of its chain,
+     * or empty the chain if it's the only thing on the chain.
+     */
+    if (st->win[st->winpos].prev != INVALID) {
+	st->win[st->win[st->winpos].prev].next = INVALID;
+    } else if (st->win[st->winpos].hashval != INVALID) {
+	st->hashtab[st->win[st->winpos].hashval].first = INVALID;
+    }
+
+    /*
+     * Create a new entry at winpos and add it to the head of its
+     * hash chain.
+     */
+    st->win[st->winpos].hashval = hash;
+    st->win[st->winpos].prev = INVALID;
+    off = st->win[st->winpos].next = st->hashtab[hash].first;
+    st->hashtab[hash].first = st->winpos;
+    if (off != INVALID)
+	st->win[off].prev = st->winpos;
+    st->data[st->winpos] = c;
+
+    /*
+     * Advance the window pointer.
+     */
+    st->winpos = (st->winpos + 1) % st->winsize;
+}
+
+#define CHARAT(k) ( (k)<0 ? st->data[(st->winpos+k)%st->winsize] : data[k] )
+
+void lz77_compress(struct LZ77Context *ctx,
+                   const unsigned char *data, int len, int compress)
+{
+    struct LZ77InternalContext *st = ctx->ictx;
+    int i, hash, distance, off, nmatch, matchlen, advance;
+    struct Match defermatch, matches[MAXMATCH];
+    int deferchr;
+
+    /*
+     * Add any pending characters from last time to the window. (We
+     * might not be able to.)
+     */
+    for (i = 0; i < st->npending; i++) {
+	unsigned char foo[HASHCHARS];
+	int j;
+	if (len + st->npending - i < HASHCHARS) {
+	    /* Update the pending array. */
+	    for (j = i; j < st->npending; j++)
+		st->pending[j - i] = st->pending[j];
+	    break;
+	}
+	for (j = 0; j < HASHCHARS; j++)
+	    foo[j] = (i + j < st->npending ? st->pending[i + j] :
+		      data[i + j - st->npending]);
+	lz77_advance(st, foo[0], lz77_hash(foo));
+    }
+    st->npending -= i;
+
+    defermatch.len = 0;
+    deferchr = '\0';
+    while (len > 0) {
+
+	/* Don't even look for a match, if we're not compressing. */
+	if (compress && len >= HASHCHARS) {
+	    /*
+	     * Hash the next few characters.
+	     */
+	    hash = lz77_hash(data);
+
+	    /*
+	     * Look the hash up in the corresponding hash chain and see
+	     * what we can find.
+	     */
+	    nmatch = 0;
+	    for (off = st->hashtab[hash].first;
+		 off != INVALID; off = st->win[off].next) {
+		/* distance = 1       if off == st->winpos-1 */
+		/* distance = winsize if off == st->winpos   */
+		distance = st->winsize -
+                    (off + st->winsize - st->winpos) % st->winsize;
+		for (i = 0; i < HASHCHARS; i++)
+		    if (CHARAT(i) != CHARAT(i - distance))
+			break;
+		if (i == HASHCHARS) {
+		    matches[nmatch].distance = distance;
+		    matches[nmatch].len = 3;
+		    if (++nmatch >= MAXMATCH)
+			break;
+		}
+	    }
+	} else {
+	    nmatch = 0;
+	    hash = INVALID;
+	}
+
+	if (nmatch > 0) {
+	    /*
+	     * We've now filled up matches[] with nmatch potential
+	     * matches. Follow them down to find the longest. (We
+	     * assume here that it's always worth favouring a
+	     * longer match over a shorter one.)
+	     */
+	    matchlen = HASHCHARS;
+	    while (matchlen < len) {
+		int j;
+		for (i = j = 0; i < nmatch; i++) {
+		    if (CHARAT(matchlen) ==
+			CHARAT(matchlen - matches[i].distance)) {
+			matches[j++] = matches[i];
+		    }
+		}
+		if (j == 0)
+		    break;
+		matchlen++;
+		nmatch = j;
+	    }
+
+	    /*
+	     * We've now got all the longest matches. We favour the
+	     * shorter distances, which means we go with matches[0].
+	     * So see if we want to defer it or throw it away.
+	     */
+	    matches[0].len = matchlen;
+	    if (defermatch.len > 0) {
+		if (matches[0].len > defermatch.len + 1) {
+		    /* We have a better match. Emit the deferred char,
+		     * and defer this match. */
+		    ctx->literal(ctx, (unsigned char) deferchr);
+		    defermatch = matches[0];
+		    deferchr = data[0];
+		    advance = 1;
+		} else {
+		    /* We don't have a better match. Do the deferred one. */
+		    ctx->match(ctx, defermatch.distance, defermatch.len);
+		    advance = defermatch.len - 1;
+		    defermatch.len = 0;
+		}
+	    } else {
+		/* There was no deferred match. Defer this one. */
+		defermatch = matches[0];
+		deferchr = data[0];
+		advance = 1;
+	    }
+	} else {
+	    /*
+	     * We found no matches. Emit the deferred match, if
+	     * any; otherwise emit a literal.
+	     */
+	    if (defermatch.len > 0) {
+		ctx->match(ctx, defermatch.distance, defermatch.len);
+		advance = defermatch.len - 1;
+		defermatch.len = 0;
+	    } else {
+		ctx->literal(ctx, data[0]);
+		advance = 1;
+	    }
+	}
+
+	/*
+	 * Now advance the position by `advance' characters,
+	 * keeping the window and hash chains consistent.
+	 */
+	while (advance > 0) {
+	    if (len >= HASHCHARS) {
+		lz77_advance(st, *data, lz77_hash(data));
+	    } else {
+		st->pending[st->npending++] = *data;
+	    }
+	    data++;
+	    len--;
+	    advance--;
+	}
+    }
+}
+
diff --git a/lz77.h b/lz77.h
new file mode 100644
index 0000000..58cb9bb
--- /dev/null
+++ b/lz77.h
@@ -0,0 +1,35 @@
+/*
+ * lz77.h: common LZ77 compression code between Deflate and LZX.
+ */
+
+/*
+ * The parameter structure you pass to the lz77.c routines to give
+ * them a way to return the compressed output stream.
+ */
+struct LZ77InternalContext;
+struct LZ77Context {
+    struct LZ77InternalContext *ictx;
+    void *userdata;
+    void (*literal) (struct LZ77Context *ctx, unsigned char c);
+    void (*match) (struct LZ77Context *ctx, int distance, int len);
+};
+
+/*
+ * Initialise the private fields of an LZ77Context. It's up to the
+ * user to initialise the public fields.
+ */
+void lz77_init(struct LZ77Context *ctx, int winsize);
+
+/*
+ * Clean up the private fields of an LZ77Context.
+ */
+void lz77_cleanup(struct LZ77Context *ctx);
+
+/*
+ * Supply data to be compressed. Will update the private fields of
+ * the LZ77Context, and will call literal() and match() to output.
+ * If `compress' is FALSE, it will never emit a match, but will
+ * instead call literal() for everything.
+ */
+void lz77_compress(struct LZ77Context *ctx,
+                   const unsigned char *data, int len, int compress);
author	Simon Tatham <anakin@pobox.com>	2017-05-10 07:10:14 +0100
committer	Simon Tatham <anakin@pobox.com>	2017-05-13 18:22:09 +0100
commit	715a3bef377aeee898c427be99b1acf440b4a5e5 (patch)
tree	6ff1d6eecb42df91a4e174644adb0954da44d9fa
parent	e446ba3cf1f72dca390e9c9a5fe987f3dcccd440 (diff)
download	halibut-715a3bef377aeee898c427be99b1acf440b4a5e5.zip halibut-715a3bef377aeee898c427be99b1acf440b4a5e5.tar.gz halibut-715a3bef377aeee898c427be99b1acf440b4a5e5.tar.bz2 halibut-715a3bef377aeee898c427be99b1acf440b4a5e5.tar.xz