16 files changed, 2768 insertions, 180 deletions
diff --git a/Buildscr b/Buildscr
index d954d1b..4ddbce9 100644
--- a/Buildscr
+++ b/Buildscr
@@ -35,6 +35,7 @@ in halibut/doc do make
 deliver halibut/*.tar.gz $@
 deliver halibut/doc/halibut.pdf $@
 deliver halibut/doc/halibut.txt $@
+deliver halibut/doc/halibut.chm $@
 deliver halibut/doc/*.html $@
 
 # FIXME: it'd be nice to add a Windows delegation here so we can
diff --git a/Makefile b/Makefile
index 6264624..c9499e4 100644
--- a/Makefile
+++ b/Makefile
@@ -95,7 +95,7 @@ include $(LIBCHARSET_SRCDIR)Makefile
 MODULES := main malloc ustring error help licence version misc tree234
 MODULES += input in_afm in_pf in_sfnt keywords contents index biblio
 MODULES += bk_text bk_html bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf
-MODULES += winhelp deflate lz77 huffman psdata wcwidth
+MODULES += winhelp winchm deflate lzx lz77 huffman psdata wcwidth
 
 OBJECTS := $(addsuffix .o,$(MODULES)) $(LIBCHARSET_OBJS)
 DEPS := $(addsuffix .d,$(MODULES))
diff --git a/bk_html.c b/bk_html.c
index 9a08fa2..784e3ca 100644
--- a/bk_html.c
+++ b/bk_html.c
@@ -24,6 +24,7 @@
 #include <assert.h>
 #include <limits.h>
 #include "halibut.h"
+#include "winchm.h"
 
 #define is_heading_type(type) ( (type) == para_Title || \
 				(type) == para_Chapter || \
@@ -57,6 +58,8 @@ typedef struct {
     char *chm_filename, *hhp_filename, *hhc_filename, *hhk_filename;
     char **template_fragments;
     int ntfragments;
+    char **chm_extrafiles, **chm_extranames;
+    int nchmextrafiles, chmextrafilesize;
     char *head_end, *body_start, *body_end, *addr_start, *addr_end;
     char *body_tag, *nav_attr;
     wchar_t *author, *description;
@@ -94,6 +97,10 @@ struct htmlfile {
      * more than once.
      */
     int temp;
+    /*
+     * CHM section structure, if we're generating a CHM.
+     */
+    struct chm_section *chmsect;
 };
 
 struct htmlsect {
@@ -193,6 +200,48 @@ void ho_setup_stdio(htmloutput *ho, FILE *fp)
     ho->write = ho_write_stdio;
     ho->write_ctx = fp;
 }
+
+struct chm_output {
+    struct chm *chm;
+    char *filename;
+    rdstringc rs;
+};
+void ho_write_chm(void *write_ctx, const char *data, int len)
+{
+    struct chm_output *co = (struct chm_output *)write_ctx;
+    if (len == -1) {
+        chm_add_file(co->chm, co->filename, co->rs.text, co->rs.pos);
+        sfree(co->filename);
+        sfree(co->rs.text);
+        sfree(co);
+    } else {
+        rdaddsn(&co->rs, data, len);
+    }
+}
+void ho_setup_chm(htmloutput *ho, struct chm *chm, const char *filename)
+{
+    struct chm_output *co = snew(struct chm_output);
+
+    co->chm = chm;
+    co->rs = empty_rdstringc;
+    co->filename = dupstr(filename);
+
+    ho->write_ctx = co;
+    ho->write = ho_write_chm;
+}
+
+void ho_write_rdstringc(void *write_ctx, const char *data, int len)
+{
+    rdstringc *rs = (rdstringc *)write_ctx;
+    if (len > 0)
+        rdaddsn(rs, data, len);
+}
+void ho_setup_rdstringc(htmloutput *ho, rdstringc *rs)
+{
+    ho->write_ctx = rs;
+    ho->write = ho_write_rdstringc;
+}
+
 void ho_string(htmloutput *ho, const char *string)
 {
     ho->write(ho->write_ctx, string, strlen(string));
@@ -286,14 +335,15 @@ static void html_section_title(htmloutput *ho, htmlsect *s,
 			       htmlfile *thisfile, keywordlist *keywords,
 			       htmlconfig *cfg, int real);
 
-static htmlconfig html_configure(paragraph *source) {
+static htmlconfig html_configure(paragraph *source, int chm_mode)
+{
     htmlconfig ret;
     paragraph *p;
 
     /*
      * Defaults.
      */
-    ret.leaf_level = 2;
+    ret.leaf_level = chm_mode ? -1 /* infinite */ : 2;
     ret.achapter.just_numbers = FALSE;
     ret.achapter.number_at_all = TRUE;
     ret.achapter.number_suffix = L": ";
@@ -305,20 +355,29 @@ static htmlconfig html_configure(paragraph *source) {
     ret.ncdepths = 0;
     ret.contents_depths = 0;
     ret.visible_version_id = TRUE;
-    ret.address_section = TRUE;
+    ret.address_section = chm_mode ? FALSE : TRUE;
     ret.leaf_contains_contents = FALSE;
     ret.leaf_smallest_contents = 4;
-    ret.navlinks = TRUE;
+    ret.navlinks = chm_mode ? FALSE : TRUE;
     ret.rellinks = TRUE;
     ret.single_filename = dupstr("Manual.html");
     ret.contents_filename = dupstr("Contents.html");
     ret.index_filename = dupstr("IndexPage.html");
     ret.template_filename = dupstr("%n.html");
-    ret.chm_filename = ret.hhp_filename = NULL;
-    ret.hhc_filename = ret.hhk_filename = NULL;
+    if (chm_mode) {
+        ret.chm_filename = dupstr("output.chm");
+        ret.hhc_filename = dupstr("contents.hhc");
+        ret.hhk_filename = dupstr("index.hhk");
+        ret.hhp_filename = NULL;
+    } else {
+        ret.chm_filename = ret.hhp_filename = NULL;
+        ret.hhc_filename = ret.hhk_filename = NULL;
+    }
     ret.ntfragments = 1;
     ret.template_fragments = snewn(ret.ntfragments, char *);
     ret.template_fragments[0] = dupstr("%b");
+    ret.chm_extrafiles = ret.chm_extranames = NULL;
+    ret.nchmextrafiles = ret.chmextrafilesize = 0;
     ret.head_end = ret.body_tag = ret.body_start = ret.body_end =
 	ret.addr_start = ret.addr_end = ret.nav_attr = NULL;
     ret.author = ret.description = NULL;
@@ -368,11 +427,20 @@ static htmlconfig html_configure(paragraph *source) {
     for (p = source; p; p = p->next) {
 	if (p->type == para_Config) {
 	    wchar_t *k = p->keyword;
+            int generic = FALSE;
 
-            if (!ustrnicmp(k, L"html-", 5)) {
+            if (!chm_mode && !ustrnicmp(k, L"html-", 5)) {
                 k += 5;
-            } else if (!ustrnicmp(k, L"xhtml-", 6)) {
+            } else if (!chm_mode && !ustrnicmp(k, L"xhtml-", 6)) {
                 k += 6;
+            } else if (chm_mode && !ustrnicmp(k, L"chm-", 4)) {
+                k += 4;
+            } else if (!ustrnicmp(k, L"htmlall-", 8)) {
+                k += 8;
+                /* In this mode, only accept directives that don't
+                 * vary completely between the HTML and CHM output
+                 * types. */
+                generic = TRUE;
             } else {
                 continue;
             }
@@ -578,39 +646,78 @@ static htmlconfig html_configure(paragraph *source) {
 		ret.pre_versionid = uadv(k);
 	    } else if (!ustricmp(k, L"post-versionid")) {
 		ret.post_versionid = uadv(k);
-	    } else if (!ustricmp(k, L"mshtmlhelp-chm")) {
+	    } else if (!generic && !ustricmp(
+                           k, chm_mode ? L"filename" : L"mshtmlhelp-chm")) {
 		sfree(ret.chm_filename);
 		ret.chm_filename = dupstr(adv(p->origkeyword));
-	    } else if (!ustricmp(k, L"mshtmlhelp-project")) {
-		sfree(ret.hhp_filename);
-		ret.hhp_filename = dupstr(adv(p->origkeyword));
-	    } else if (!ustricmp(k, L"mshtmlhelp-contents")) {
+	    } else if (!generic && !ustricmp(
+                           k, chm_mode ? L"contents-name" :
+                           L"mshtmlhelp-contents")) {
 		sfree(ret.hhc_filename);
 		ret.hhc_filename = dupstr(adv(p->origkeyword));
-	    } else if (!ustricmp(k, L"mshtmlhelp-index")) {
+	    } else if (!generic && !ustricmp(
+                           k, chm_mode ? L"index-name" :
+                           L"mshtmlhelp-index")) {
 		sfree(ret.hhk_filename);
 		ret.hhk_filename = dupstr(adv(p->origkeyword));
+	    } else if (!generic && !chm_mode &&
+                       !ustricmp(k, L"mshtmlhelp-project")) {
+		sfree(ret.hhp_filename);
+		ret.hhp_filename = dupstr(adv(p->origkeyword));
+	    } else if (!generic && chm_mode &&
+                       !ustricmp(k, L"extra-file")) {
+                char *diskname, *chmname;
+
+                diskname = adv(p->origkeyword);
+                if (*diskname) {
+                    chmname = adv(diskname);
+                    if (!*chmname)
+                        chmname = diskname;
+
+                    if (chmname[0] == '#' || chmname[0] == '$')
+                        err_chm_badname(&p->fpos, chmname);
+
+                    if (ret.nchmextrafiles >= ret.chmextrafilesize) {
+                        ret.chmextrafilesize = ret.nchmextrafiles * 5 / 4 + 32;
+                        ret.chm_extrafiles = sresize(
+                            ret.chm_extrafiles, ret.chmextrafilesize, char *);
+                        ret.chm_extranames = sresize(
+                            ret.chm_extranames, ret.chmextrafilesize, char *);
+                    }
+                    ret.chm_extrafiles[ret.nchmextrafiles] = dupstr(diskname);
+                    ret.chm_extranames[ret.nchmextrafiles] =
+                        dupstr(chmname);
+                    ret.nchmextrafiles++;
+                }
 	    }
 	}
     }
 
-    /*
-     * Enforce that the CHM and HHP filenames must either be both
-     * present or both absent. If one is present but not the other,
-     * turn both off.
-     */
-    if (!ret.chm_filename ^ !ret.hhp_filename) {
-	err_chmnames();
-	sfree(ret.chm_filename); ret.chm_filename = NULL;
-	sfree(ret.hhp_filename); ret.hhp_filename = NULL;
-    }
-    /*
-     * And if we're not generating an HHP, there's no need for HHC
-     * or HHK.
-     */
-    if (!ret.hhp_filename) {
-	sfree(ret.hhc_filename); ret.hhc_filename = NULL;
-	sfree(ret.hhk_filename); ret.hhk_filename = NULL;
+    if (!chm_mode) {
+        /*
+         * If we're in HTML mode but using the old-style options to
+         * output HTML Help Workshop auxiliary files, do some
+         * consistency checking.
+         */
+
+        /*
+         * Enforce that the CHM and HHP filenames must either be both
+         * present or both absent. If one is present but not the other,
+         * turn both off.
+         */
+        if (!ret.chm_filename ^ !ret.hhp_filename) {
+            err_chmnames();
+            sfree(ret.chm_filename); ret.chm_filename = NULL;
+            sfree(ret.hhp_filename); ret.hhp_filename = NULL;
+        }
+        /*
+         * And if we're not generating an HHP, there's no need for HHC
+         * or HHK.
+         */
+        if (!ret.hhp_filename) {
+            sfree(ret.hhc_filename); ret.hhc_filename = NULL;
+            sfree(ret.hhk_filename); ret.hhk_filename = NULL;
+        }
     }
 
     /*
@@ -644,20 +751,23 @@ paragraph *html_config_filename(char *filename)
     return p;
 }
 
-void html_backend(paragraph *sourceform, keywordlist *keywords,
-		  indexdata *idx, void *unused)
+paragraph *chm_config_filename(char *filename)
+{
+    return cmdline_cfg_simple("chm-filename", filename, NULL);
+}
+
+static void html_backend_common(paragraph *sourceform, keywordlist *keywords,
+                                indexdata *idx, int chm_mode)
 {
     paragraph *p;
     htmlsect *topsect;
     htmlconfig conf;
     htmlfilelist files = { NULL, NULL, NULL, NULL, NULL, NULL };
     htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
-    char *hhk_filename;
-    int has_index;
+    struct chm *chm = NULL;
+    int has_index, hhk_needed = FALSE;
 
-    IGNORE(unused);
-
-    conf = html_configure(sourceform);
+    conf = html_configure(sourceform, chm_mode);
 
     /*
      * We're going to make heavy use of paragraphs' private data
@@ -732,10 +842,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	/*
 	 * And the index, if we have one. Note that we don't output
 	 * an index as an HTML file if we're outputting one as a
-	 * .HHK.
+	 * .HHK (in either of the HTML or CHM output modes).
 	 */
 	has_index = (count234(idx->entries) > 0);
-	if (has_index && !conf.hhk_filename) {
+	if (has_index && !chm_mode && !conf.hhk_filename) {
 	    sect = html_new_sect(&sects, NULL, &conf);
 	    sect->text = NULL;
 	    sect->type = INDEX;
@@ -901,6 +1011,9 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	}
     }
 
+    if (chm_mode)
+        chm = chm_new();
+
     /*
      * Now we're ready to write out the actual HTML files.
      * 
@@ -936,7 +1049,9 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 #define listname(lt) ( (lt)==UL ? "ul" : (lt)==OL ? "ol" : "dl" )
 #define itemname(lt) ( (lt)==LI ? "li" : (lt)==DT ? "dt" : "dd" )
 
-	    if (!strcmp(f->filename, "-"))
+            if (chm)
+                ho_setup_chm(&ho, chm, f->filename);
+	    else if (!strcmp(f->filename, "-"))
                 ho_setup_stdio(&ho, stdout);
             else
                 ho_setup_file(&ho, f->filename);
@@ -1728,8 +1843,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
      * whether there's even going to _be_ an index file: we omit it
      * if the index contains nothing.
      */
-    hhk_filename = conf.hhk_filename;
-    if (hhk_filename) {
+    if (chm_mode || conf.hhk_filename) {
 	int ok = FALSE;
 	int i;
 	indexentry *entry;
@@ -1743,8 +1857,138 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	    }
 	}
 
-	if (!ok)
-	    hhk_filename = NULL;
+	if (ok)
+	    hhk_needed = TRUE;
+    }
+
+    /*
+     * If we're doing direct CHM output, tell winchm.c all the things
+     * it will need to know aside from the various HTML files'
+     * contents.
+     */
+    if (chm) {
+        chm_contents_filename(chm, conf.hhc_filename);
+        if (has_index)
+            chm_index_filename(chm, conf.hhk_filename);
+        chm_default_window(chm, "main");
+
+        {
+            htmloutput ho;
+            rdstringc rs = {0, 0, NULL};
+
+            ho.charset = CS_CP1252; /* as far as I know, CHM is */
+            ho.restrict_charset = CS_CP1252; /* hardwired to this charset */
+            ho.cstate = charset_init_state;
+            ho.ver = HTML_4;	       /* *shrug* */
+            ho.state = HO_NEUTRAL;
+            ho.contents_level = 0;
+            ho.hackflags = HO_HACK_QUOTENOTHING;
+
+            ho_setup_rdstringc(&ho, &rs);
+
+            ho.hacklimit = 255;
+            html_words(&ho, topsect->title->words, NOTHING,
+                       NULL, keywords, &conf);
+
+            rdaddc(&rs, '\0');
+            chm_title(chm, rs.text);
+
+            chm_default_topic(chm, files.head->filename);
+
+            chm_add_window(chm, "main", rs.text,
+                           conf.hhc_filename, conf.hhk_filename,
+                           files.head->filename,
+                           /* This first magic number is
+                            * fsWinProperties, controlling Navigation
+                            * Pane options and the like. Constants
+                            * HHWIN_PROP_* in htmlhelp.h. */
+                           0x62520,
+                           /* This second number is fsToolBarFlags,
+                            * mainly controlling toolbar buttons.
+                            * Constants HHWIN_BUTTON_*. NOTE: there
+                            * are two pairs of bits for Next/Previous
+                            * buttons: 7/8 (which do nothing useful),
+                            * and 21/22 (which work). (Neither of
+                            * these are exposed in the HHW UI, but
+                            * they work fine in HH.) We use the
+                            * latter. */
+                           0x70304e);
+
+            sfree(rs.text);
+        }
+
+        {
+            htmlfile *f;
+
+            for (f = files.head; f; f = f->next)
+                f->chmsect = NULL;
+            for (f = files.head; f; f = f->next) {
+                htmlsect *s = f->first;
+                htmloutput ho;
+                rdstringc rs = {0, 0, NULL};
+
+                ho.charset = CS_CP1252;
+                ho.restrict_charset = CS_CP1252;
+                ho.cstate = charset_init_state;
+                ho.ver = HTML_4;	       /* *shrug* */
+                ho.state = HO_NEUTRAL;
+                ho.contents_level = 0;
+                ho.hackflags = HO_HACK_QUOTENOTHING;
+
+                ho_setup_rdstringc(&ho, &rs);
+                ho.hacklimit = 255;
+
+                if (f->first->title)
+                    html_words(&ho, f->first->title->words, NOTHING,
+                               NULL, keywords, &conf);
+                else if (f->first->type == INDEX)
+                    html_text(&ho, conf.index_text);
+                rdaddc(&rs, '\0');
+                
+                while (s && s->file == f)
+                    s = s->parent;
+
+                /*
+                 * Special case, as below: the TOP file is not
+                 * considered to be the parent of everything else.
+                 */
+                if (s && s->type == TOP)
+                    s = NULL;
+
+                f->chmsect = chm_add_section(chm, s ? s->file->chmsect : NULL,
+                                             rs.text, f->filename);
+
+                sfree(rs.text);
+            }
+        }
+
+        {
+            int i;
+
+            for (i = 0; i < conf.nchmextrafiles; i++) {
+                const char *fname = conf.chm_extrafiles[i];
+                FILE *fp;
+                long size;
+                char *data;
+
+                fp = fopen(fname, "rb");
+                if (!fp) {
+                    err_cantopen(fname);
+                    continue;
+                }
+
+                fseek(fp, 0, SEEK_END);
+                size = ftell(fp);
+                rewind(fp);
+
+                data = snewn(size, char);
+                size = fread(data, 1, size, fp);
+                fclose(fp);
+
+                chm_add_file(chm, conf.chm_extranames[i], data, size);
+                sfree(data);
+            }
+        }
     }
 
     /*
@@ -1800,7 +2044,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
             ho_string(&ho, conf.hhc_filename);
             ho_string(&ho, "\n");
         }
-	if (hhk_filename) {
+	if (hhk_needed) {
             ho_string(&ho, "Index file=");
             ho_string(&ho, conf.hhk_filename);
             ho_string(&ho, "\n");
@@ -1817,8 +2061,8 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
         if (conf.hhc_filename)
             ho_string(&ho, conf.hhc_filename);
         ho_string(&ho, "\",\"");
-        if (hhk_filename)
-            ho_string(&ho, hhk_filename);
+        if (hhk_needed)
+            ho_string(&ho, conf.hhk_filename);
         ho_string(&ho, "\",\"");
         ho_string(&ho, files.head->filename);
         ho_string(&ho, "\",,,,,,"
@@ -1848,7 +2092,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 
         ho_finish(&ho);
     }
-    if (conf.hhc_filename) {
+    if (chm || conf.hhc_filename) {
 	htmlfile *f;
 	htmlsect *s, *a;
 	htmloutput ho;
@@ -1862,7 +2106,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	ho.contents_level = 0;
 	ho.hackflags = HO_HACK_QUOTEQUOTES;
 
-	ho_setup_file(&ho, conf.hhc_filename);
+        if (chm)
+            ho_setup_chm(&ho, chm, conf.hhc_filename);
+        else
+            ho_setup_file(&ho, conf.hhc_filename);
 
 	/*
 	 * Magic DOCTYPE which seems to work for .HHC files. I'm
@@ -1955,7 +2202,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 
 	cleanup(&ho);
     }
-    if (hhk_filename) {
+    if (hhk_needed) {
 	htmlfile *f;
 	htmloutput ho;
 	indexentry *entry;
@@ -1976,7 +2223,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	ho.contents_level = 0;
 	ho.hackflags = HO_HACK_QUOTEQUOTES;
 
-	ho_setup_file(&ho, hhk_filename);
+        if (chm)
+            ho_setup_chm(&ho, chm, conf.hhk_filename);
+        else
+            ho_setup_file(&ho, conf.hhk_filename);
 
 	/*
 	 * Magic DOCTYPE which seems to work for .HHK files. I'm
@@ -2041,6 +2291,26 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	cleanup(&ho);
     }
 
+    if (chm) {
+        /*
+         * Finalise and write out the CHM file.
+         */
+        const char *data;
+        int len;
+        FILE *fp;
+
+        fp = fopen(conf.chm_filename, "wb");
+        if (!fp) {
+            err_cantopenw(conf.chm_filename);
+        } else {
+            data = chm_build(chm, &len);
+            fwrite(data, 1, len, fp);
+            fclose(fp);
+        }
+
+        chm_free(chm);
+    }
+
     /*
      * Go through and check that no index fragments were referenced
      * without being generated, or indeed vice versa.
@@ -2139,6 +2409,25 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
     while (conf.ntfragments--)
 	sfree(conf.template_fragments[conf.ntfragments]);
     sfree(conf.template_fragments);
+    while (conf.nchmextrafiles--) {
+	sfree(conf.chm_extrafiles[conf.nchmextrafiles]);
+	sfree(conf.chm_extranames[conf.nchmextrafiles]);
+    }
+    sfree(conf.chm_extrafiles);
+}
+
+void html_backend(paragraph *sourceform, keywordlist *keywords,
+                  indexdata *idx, void *unused)
+{
+    IGNORE(unused);
+    html_backend_common(sourceform, keywords, idx, FALSE);
+}
+
+void chm_backend(paragraph *sourceform, keywordlist *keywords,
+                 indexdata *idx, void *unused)
+{
+    IGNORE(unused);
+    html_backend_common(sourceform, keywords, idx, TRUE);
 }
 
 static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
diff --git a/doc/Makefile b/doc/Makefile
index 81a1fd8..e0cc27a 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -11,7 +11,7 @@ all: index.html halibut.1
 
 index.html: $(INPUTS) $(HALIBUT)
 	$(HALIBUT) --text=halibut.txt --html --info=halibut.info \
-		--ps=halibut.ps --pdf=halibut.pdf $(INPUTS)
+		--ps=halibut.ps --pdf=halibut.pdf --chm=halibut.chm $(INPUTS)
 
 halibut.1: manpage.but
 	$(HALIBUT) --man=halibut.1 manpage.but
@@ -21,8 +21,4 @@ install:
 	$(INSTALL) -m 644 halibut.1 $(man1dir)/halibut.1
 
 clean:
-	rm -f *.html *.txt *.hlp *.cnt *.1 *.info* *.ps *.pdf *.hh* *.chm
-
-chm: halibut.hhp
-halibut.hhp: $(INPUTS) $(HALIBUT) chm.but
-	$(HALIBUT) --html $(INPUTS) chm.but
+	rm -f *.html *.txt *.hlp *.cnt *.1 *.info* *.ps *.pdf *.chm
diff --git a/doc/chm.but b/doc/chm.but
deleted file mode 100644
index ef21ecc..0000000
--- a/doc/chm.but
+++ /dev/null
@@ -1,17 +0,0 @@
-\# File containing the magic HTML configuration directives to create
-\# an MS HTML Help project. We put this on the end of the Halibut
-\# docs build command line to build the HHP and friends.
-
-\cfg{html-leaf-level}{infinite}
-\cfg{html-leaf-contains-contents}{false}
-\cfg{html-suppress-navlinks}{true}
-\cfg{html-suppress-address}{true}
-
-\cfg{html-contents-filename}{index.html}
-\cfg{html-template-filename}{%k.html}
-\cfg{html-template-fragment}{%k}
-
-\cfg{html-mshtmlhelp-chm}{halibut.chm}
-\cfg{html-mshtmlhelp-project}{halibut.hhp}
-\cfg{html-mshtmlhelp-contents}{halibut.hhc}
-\cfg{html-mshtmlhelp-index}{halibut.hhk}
diff --git a/doc/intro.but b/doc/intro.but
index 2e5ada1..ce1668c 100644
--- a/doc/intro.but
+++ b/doc/intro.but
@@ -25,10 +25,9 @@ Currently Halibut supports the following output formats:
 
 \b PostScript.
 
-\b Old-style Windows Help (\cw{.HLP}).
+\b Windows HTML Help (\cw{.CHM}).
 
-(By setting suitable options, the HTML output can also be made
-suitable for feeding to the newer-style Windows HTML Help compiler.)
+\b Old-style Windows Help (\cw{.HLP}).
 
 \H{intro-features} Features supported by Halibut
 
diff --git a/doc/manpage.but b/doc/manpage.but
index 56048f6..a13b195 100644
--- a/doc/manpage.but
+++ b/doc/manpage.but
@@ -43,13 +43,21 @@ produced as output; this, and the file names, will be as specified
 in the input files, or given a set of default names starting with
 \c{Contents.html} if none is specified at all.
 
+\dt \cw{--chm}[\cw{=}\e{filename}]
+
+\dd Makes Halibut generate an output file in Windows HTML Help
+format. If the optional \e{filename} parameter is supplied, the output
+help file will be given that name. Otherwise, the name of the output
+help file will be as specified in the input files, or \c{output.chm}
+if none is specified at all.
+
 \dt \cw{--winhelp}[\cw{=}\e{filename}]
 
-\dd Makes Halibut generate an output file in Windows Help format. If
-the optional \e{filename} parameter is supplied, the output help
-file will be given that name. Otherwise, the name of the output help
-file will be as specified in the input files, or \c{output.hlp} if
-none is specified at all.
+\dd Makes Halibut generate an output file in old-style Windows Help
+format. If the optional \e{filename} parameter is supplied, the output
+help file will be given that name. Otherwise, the name of the output
+help file will be as specified in the input files, or \c{output.hlp}
+if none is specified at all.
 
 \lcont{
 The output help file must have a name ending in \c{.hlp}; if it does
diff --git a/doc/output.but b/doc/output.but
index 9309b82..ccb99df 100644
--- a/doc/output.but
+++ b/doc/output.but
@@ -858,13 +858,202 @@ name="description">} tag in the output HTML files, so that browsers
 which support this can easily pick out a brief \I{description, of
 document}description of the document.
 
-\S{output-html-mshtmlhelp} Generating MS Windows \i{HTML Help}
+\S{output-html-defaults} Default settings
+
+The \i{default settings} for Halibut's HTML output format are:
+
+\c \cfg{html-contents-filename}{Contents.html}
+\c \cfg{html-index-filename}{IndexPage.html}
+\c \cfg{html-template-filename}{%n.html}
+\c \cfg{html-single-filename}{Manual.html}
+\c
+\c \cfg{html-leaf-level}{2}
+\c \cfg{html-leaf-contains-contents}{false}
+\c \cfg{html-leaf-smallest-contents}{4}
+\c \cfg{html-contents-depth}{0}{2}
+\c \cfg{html-contents-depth}{1}{3}
+\c ... and so on for all section levels below this ...
+\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+\c
+\c \cfg{html-head-end}{}
+\c \cfg{html-body-tag}{<body>}
+\c \cfg{html-body-start}{}
+\c \cfg{html-body-end}{}
+\c \cfg{html-address-start}{}
+\c \cfg{html-address-end}{}
+\c \cfg{html-navigation-attributes}{}
+\c
+\c \cfg{html-chapter-numeric}{false}
+\c \cfg{html-chapter-shownumber}{true}
+\c \cfg{html-chapter-suffix}{: }
+\c
+\c \cfg{html-section-numeric}{0}{true}
+\c \cfg{html-section-shownumber}{0}{true}
+\c \cfg{html-section-suffix}{0}{ }
+\c
+\c \cfg{html-section-numeric}{1}{true}
+\c \cfg{html-section-shownumber}{1}{true}
+\c \cfg{html-section-suffix}{1}{ }
+\c
+\c ... and so on for all section levels below this ...
+\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+\c
+\c \cfg{html-preamble-text}{Preamble}
+\c \cfg{html-contents-text}{Contents}
+\c \cfg{html-index-text}{Index}
+\c \cfg{html-title-separator}{ - }
+\c \cfg{html-index-main-separator}{: }
+\c \cfg{html-index-multiple-separator}{, }
+\c \cfg{html-pre-versionid}{[}
+\c \cfg{html-post-versionid}{]}
+\c \cfg{html-nav-prev-text}{Previous}
+\c \cfg{html-nav-next-text}{Next}
+\c \cfg{html-nav-up-text}{Up}
+\c \cfg{html-nav-separator}{ | }
+\c
+\c \cfg{html-output-charset}{ASCII}
+\c \cfg{html-restrict-charset}{UTF-8}
+\c \cfg{html-quotes}{\u2018}{\u2019}{"}{"}
+\c
+\c \cfg{html-version}{html4}
+\c \cfg{html-template-fragment}{%b}
+\c \cfg{html-versionid}{true}
+\c \cfg{html-rellinks}{true}
+\c \cfg{html-suppress-navlinks{false}
+\c \cfg{html-suppress-address}{false}
+\c \cfg{html-author}{}
+\c \cfg{html-description}{}
+
+\H{output-chm} Windows \i{HTML Help}
+
+This output format generates a \c{.chm} file suitable for use with the
+Windows HTML Help system.
+
+Older versions of Halibut could only generate HTML Help by writing out
+a set of source files acceptable to the MS help compiler. Nowadays
+Halibut can generate CHM directly, so that's no longer necessary.
+However, the legacy method is still available if you need it; see
+\k{output-html-mshtmlhelp} for details.
+
+\S{output-chm-file} Output file name
+
+\dt \I{\cw{\\cfg\{chm-filename\}}}\cw{\\cfg\{chm-filename\}\{}\e{filename}\cw{\}}
+
+\dd Sets the \i{output file name} in which to store the HTML Help
+file. This directive is implicitly generated if you provide a file
+name parameter after the command-line option \i\c{--chm} (see
+\k{running-options}).
+
+\S{output-chm-mostconfig} Configuration shared with the HTML back end
+
+As the name suggests, an HTML Help file is mostly a compressed
+container for HTML files. So the CHM back end shares a great deal of
+its code with the HTML back end, and as a result, it supports the same
+range of format configuration options.
+
+(One exception to this general rule is that the configuration options
+relating to generating \e{HTML Help compiler input} are not supported
+in CHM mode, because they wouldn't make any sense! The
+\cw{html-mshtmlhelp-*} options described in \k{output-html-mshtmlhelp}
+have no analogue starting \cw{chm-}.)
+
+However, because HTML and CHM are used in different ways, you may need
+to configure the two back ends differently. So in CHM mode, Halibut
+supports all the same configuration directives described in
+\k{output-html}, but with their names changed so that they begin with
+\cq{chm-} in place of \cq{html-}. This lets you maintain two sets of
+configuration independently; for example, you could specify
+\c{\\cfg\{html-chapter-numeric\}\{true\}} and
+\c{\\cfg\{chm-chapter-numeric\}\{false\}} in the same source file, and
+then when you ran Halibut with both the \c{--html} and \c{--chm}
+options, it would produce purely numeric chapter titles in the HTML
+output but not in the CHM file.
+
+If you do decide to apply a piece of configuration across both these
+back ends, you can prefix it with \cq{htmlall-} instead of \cq{html-}
+or \cq{chm-}. For example,
+\c{\\cfg\{htmlall-chapter-numeric\}\{true\}} will enable purely
+numeric chapter titles in \e{both} the HTML and CHM output.
+
+\S{output-chm-extra} Including extra files in the CHM
+
+CHM files are mostly a container for HTML, and the HTML files inside
+them are allowed to cross-refer to all the usual other kinds of file
+that HTML might refer to, such as images, stylesheets and even
+Javascript. If you want to make use of this capability, you need to
+tell Halibut what extra files it needs to incorporate into the CHM
+container.
+
+\dt \I{\cw{\\cfg\{chm-extra-file\}}}\cw{\\cfg\{chm-extra-file\}\{}\e{filename}\cw{\}}
+
+\dt \I{\cw{\\cfg\{chm-extra-file\}}}\cw{\\cfg\{chm-extra-file\}\{}\e{filename}\cw{\}\{}\e{name inside CHM}\cw{\}}
+
+\dd Tells Halibut to read an additional input file from \e{filename}
+and incorporate it into the CHM.
+
+\lcont{
+
+In the first form of the directive, the file will be given the same
+name within the CHM's internal namespace (i.e. for the purposes of
+linking to it from HTML files) as Halibut used to load it from disk.
+If you need to include the file with a different internal name, you
+can use the second form of the directive, which separately specifies
+the name under which Halibut should look for the input file and the
+name it should give it inside the CHM.
+
+You can specify this directive multiple times, to include more than
+one file.
+
+}
+
+\S{output-chm-internalnames} Renaming the CHM internal support files
+
+As well as ordinary HTML, there are also two special files inside a
+CHM, containing the table of contents and the index. Halibut generates
+these automatically, and you normally don't have to worry about them.
+However, it is \e{just} possible (though very unlikely!) that you
+might find they conflict with the name of some file you wanted to
+include in the CHM yourself, and hence, Halibut provides configuration
+options to change them if you need to.
+
+\dt \I{\cw{\\cfg\{chm-contents-name\}}}\cw{\\cfg\{chm-contents-name\}\{}\e{filename}\cw{\}}
+
+\dd Controls the name of the internal contents file in the CHM.
+
+\dt \I{\cw{\\cfg\{chm-index-name\}}}\cw{\\cfg\{chm-index-name\}\{}\e{filename}\cw{\}}
+
+\dd Controls the name of the internal index file in the CHM.
+
+\S{output-chm-defaults} Default settings
+
+The \i{default settings} for Halibut's CHM output format are mostly
+the same as for the standard HTML output. However, a few defaults are
+changed to be more in line with the way CHM wants to do things.
+
+\c \cfg{chm-filename}{output.chm}
+\c \cfg{chm-contents-name}{contents.hhc}
+\c \cfg{chm-index-name}{index.hhk}
+\c \cfg{chm-leaf-level}{infinite}
+\c \cfg{chm-suppress-navlinks{true}
+\c \cfg{chm-suppress-address}{true}
 
-The HTML files output from Halibut's HTML back end can be used as
-input to the MS Windows HTML Help compiler. In order to do this, you
-also need some auxiliary files: a project file, and (probably) a
-contents file and an index file. Halibut can optionally generate
-those as well.
+\S{output-html-mshtmlhelp} Generating input to the MS Windows \i{HTML
+Help compiler}
+
+Before Halibut gained the ability to write out CHM files directly, it
+used a more cumbersome system in which you could run it in HTML mode
+and enable some extra options that would write out supporting files
+needed by the official Windows HTML Help compiler, so that you could
+still generate a CHM file from your Halibut source in multiple build
+steps.
+
+This legacy system for HTML Help generation is still supported, partly
+to avoid backwards-compatibility breakage for anyone already using it,
+and also because it permits more flexibility in the resulting CHM
+files: Halibut's own CHM file generation makes some fixed decisions
+about window layout and styling, whereas if you use the official help
+compiler you can start from Halibut's default project file and make
+whatever manual changes you like to that sort of thing.
 
 To enable the generation of MS HTML Help auxiliary files, use the
 following configuration directives:
@@ -940,18 +1129,16 @@ MS HTML Help compiler (\cw{HHC.EXE}), or load into the MS HTML Help
 Workshop (\cw{HHW.EXE}).
 
 You may also wish to alter other HTML configuration options to make
-the resulting help file look more like a help file and less like a
-web page. A suggested set of additional configuration options for
-HTML Help is as follows:
+the resulting help file look more like a help file and less like a web
+page. If you use Halibut's direct CHM output, this is done for you
+automatically (see \k{output-chm-defaults}); but if you're using the
+HTML output mode then I recommend the following changes.
 
 \b \cw{\\cfg\{html-leaf-level\}\{infinite\}}, because HTML Help
 works best with lots of small files (\q{topics}) rather than a few
 large ones. In particular, the contents and index mechanisms can
 only reference files, not subsections within files.
 
-\b \cw{\\cfg\{html-leaf-contains-contents\}\{false\}}, to suppress
-the contents list above the main text of each bottom-level file.
-
 \b \cw{\\cfg\{html-suppress-navlinks\}\{true\}}, because HTML Help
 has its own navigation facilities and it looks a bit strange to
 duplicate them.
@@ -960,83 +1147,15 @@ duplicate them.
 \cw{<ADDRESS>} section makes less sense in a help file than it does
 on a web page.
 
-\S{output-html-defaults} Default settings
-
-The \i{default settings} for Halibut's HTML output format are:
-
-\c \cfg{html-contents-filename}{Contents.html}
-\c \cfg{html-index-filename}{IndexPage.html}
-\c \cfg{html-template-filename}{%n.html}
-\c \cfg{html-single-filename}{Manual.html}
-\c
-\c \cfg{html-leaf-level}{2}
-\c \cfg{html-leaf-contains-contents}{false}
-\c \cfg{html-leaf-smallest-contents}{4}
-\c \cfg{html-contents-depth}{0}{2}
-\c \cfg{html-contents-depth}{1}{3}
-\c ... and so on for all section levels below this ...
-\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
-\c
-\c \cfg{html-head-end}{}
-\c \cfg{html-body-tag}{<body>}
-\c \cfg{html-body-start}{}
-\c \cfg{html-body-end}{}
-\c \cfg{html-address-start}{}
-\c \cfg{html-address-end}{}
-\c \cfg{html-navigation-attributes}{}
-\c
-\c \cfg{html-chapter-numeric}{false}
-\c \cfg{html-chapter-shownumber}{true}
-\c \cfg{html-chapter-suffix}{: }
-\c
-\c \cfg{html-section-numeric}{0}{true}
-\c \cfg{html-section-shownumber}{0}{true}
-\c \cfg{html-section-suffix}{0}{ }
-\c
-\c \cfg{html-section-numeric}{1}{true}
-\c \cfg{html-section-shownumber}{1}{true}
-\c \cfg{html-section-suffix}{1}{ }
-\c
-\c ... and so on for all section levels below this ...
-\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
-\c
-\c \cfg{html-preamble-text}{Preamble}
-\c \cfg{html-contents-text}{Contents}
-\c \cfg{html-index-text}{Index}
-\c \cfg{html-title-separator}{ - }
-\c \cfg{html-index-main-separator}{: }
-\c \cfg{html-index-multiple-separator}{, }
-\c \cfg{html-pre-versionid}{[}
-\c \cfg{html-post-versionid}{]}
-\c \cfg{html-nav-prev-text}{Previous}
-\c \cfg{html-nav-next-text}{Next}
-\c \cfg{html-nav-up-text}{Up}
-\c \cfg{html-nav-separator}{ | }
-\c
-\c \cfg{html-output-charset}{ASCII}
-\c \cfg{html-restrict-charset}{UTF-8}
-\c \cfg{html-quotes}{\u2018}{\u2019}{"}{"}
-\c
-\c \cfg{html-version}{html4}
-\c \cfg{html-template-fragment}{%b}
-\c \cfg{html-versionid}{true}
-\c \cfg{html-rellinks}{true}
-\c \cfg{html-suppress-navlinks{false}
-\c \cfg{html-suppress-address}{false}
-\c \cfg{html-author}{}
-\c \cfg{html-description}{}
-
-\H{output-whlp} Windows Help
+\H{output-whlp} Legacy Windows Help
 
-This output format generates data that can be used by the \i{Windows
-Help} program \cw{WINHLP32.EXE}. There are two actual files
+This output format generates data that can be used by the legacy
+\i{Windows Help} program \cw{WINHLP32.EXE}. There are two actual files
 generated, one ending in \c{.hlp} and the other ending in \c{.cnt}.
 
-Note that as of 2006, MS is discontinuing the Windows Help format in
-favour of the newer HTML Help format (\c{.chm} files). Halibut is
-not currently able to generate \c{.chm} files directly, but its HTML
-back end can write out project files suitable for use as input to
-the MS HTML Help compiler. See \k{output-html-mshtmlhelp} for more
+This legacy Windows Help format was discontinued in 2006 in favour of
+HTML Help, which Halibut can also generate. You probably want to use
+that instead for any new project. See \k{output-chm} for more
 information on this.
 
 Currently, the Windows Help output is hardcoded to be in the
diff --git a/doc/running.but b/doc/running.but
index 39e1715..6c2b6c6 100644
--- a/doc/running.but
+++ b/doc/running.but
@@ -12,22 +12,19 @@ This will generate a large set of \i{output files}:
 \b \i\c{output.txt} will be a \i{plain text} version of the input
 document.
 
+\b \i\c{output.chm} will be a Windows \i{HTML Help} version of the
+same thing. (Note that to do this Halibut does not require any
+external software such as a \i{Help compiler}. It \e{directly}
+generates Windows HTML Help files, and therefore it doesn't need to be
+run on Windows to do so: it can generate them even when run from an
+automated script on a Unix machine.)
+
 \b \i\c{output.hlp} and \i\c{output.cnt} will be an old-style
 \i{Windows Help} version of the same thing. (Most of the text is in
 \c{output.hlp}; \c{output.cnt} contains additional contents data
 used by the Windows help topic selector. If you lose the latter, the
 former should still be usable, but it will look less modern.)
 
-\lcont{
-
-Note that to do this Halibut does not require any external software
-such as a \i{Help compiler}. It \e{directly} generates old-style
-Windows Help files, and therefore it doesn't need to be run on
-Windows to do so: it can generate them even when run from an
-automated script on a Unix machine.
-
-}
-
 \b \c{output.1} will be a Unix \i{\cw{man} page}.
 
 \b The set of files \c{*.html} will contain an \i{HTML} version of
@@ -79,6 +76,13 @@ line, using the \c{-C} option).
 
 \dd Synonym for \c{--html}.
 
+\dt \i\cw{--chm}[\cw{=}\e{filename}]
+
+\dd Specifies that you want to generate Windows HTML Help
+output. You can optionally specify a file name (e.g.
+\c{\-\-chm=myfile.chm}), in which case Halibut will change the
+name of the output file as well.
+
 \dt \i\cw{--winhelp}[\cw{=}\e{filename}]
 
 \dd Specifies that you want to generate old-style Windows Help
diff --git a/error.c b/error.c
index 8aa83b5..7589175 100644
--- a/error.c
+++ b/error.c
@@ -363,3 +363,9 @@ void err_sfntbadglyph(const filepos *fpos, unsigned wc)
              "warning: character U+%04X references a non-existent glyph",
              wc);
 }
+
+void err_chm_badname(const filepos *fpos, const char *sp)
+{
+    do_error(fpos, "CHM internal file name `%s' begins with"
+             " a reserved character", sp);
+}
diff --git a/halibut.h b/halibut.h
index ec0ce34..327562e 100644
--- a/halibut.h
+++ b/halibut.h
@@ -322,6 +322,8 @@ void err_sfnttablevers(const filepos *fpos, const char *sp);
 void err_sfntbadhdr(const filepos *fpos);
 /* sfnt cmap references bad glyph */
 void err_sfntbadglyph(const filepos *fpos, unsigned wc);
+/* CHM internal file names can't start with # or $ */
+void err_chm_badname(const filepos *fpos, const char *sp);
 
 /*
  * malloc.c
@@ -563,7 +565,9 @@ paragraph *text_config_filename(char *filename);
  * bk_html.c
  */
 void html_backend(paragraph *, keywordlist *, indexdata *, void *);
+void chm_backend(paragraph *, keywordlist *, indexdata *, void *);
 paragraph *html_config_filename(char *filename);
+paragraph *chm_config_filename(char *filename);
 
 /*
  * bk_whlp.c
diff --git a/lzx.c b/lzx.c
new file mode 100644
index 0000000..3c404b9
--- /dev/null
+++ b/lzx.c
@@ -0,0 +1,697 @@
+#include <assert.h>
+#include <stddef.h>
+
+#include "halibut.h"
+#include "huffman.h"
+#include "lz77.h"
+#include "lzx.h"
+
+#define OUR_LZX_WINSIZE 0x10000
+#define LZX_MINMATCHLEN 2
+#define LZX_MAXMATCHLEN 257
+
+int lzx_compute_position_slot(int pos, int *footer_bits)
+{
+    if (pos < 4) {
+        /* The bottom four position slots cover one value each. */
+        *footer_bits = 0;
+        return pos;
+    } else if (pos >= 0x40000) {
+        /* _All_ slots from 36 onwards are 2^17 values wide. */
+        *footer_bits = 17;
+        return 34 + (pos >> 17);
+    } else {
+        /* In between, there are two slots for each power-of-2 size,
+         * so that slots 4,5 have width 2^1, 6,7 have width 2^2, 8,9
+         * have width 2^3, ..., and 34,35 have width 2^16. */
+        int bits = 16;
+        int shifted = pos;
+        if (shifted < (1<<(18-8))) shifted <<= 8, bits -= 8;
+        if (shifted < (1<<(18-4))) shifted <<= 4, bits -= 4;
+        if (shifted < (1<<(18-2))) shifted <<= 2, bits -= 2;
+        if (shifted < (1<<(18-1))) shifted <<= 1, bits -= 1;
+        *footer_bits = bits;
+        return 2 + 2*bits + ((shifted >> 16) & 1);
+    }
+}
+
+typedef enum LZXSymType {
+    LST_MAINTREE, LST_LENTREE, LST_ALIGNOFFTREE,
+    LST_MAINTREE_PRETREE_1, LST_MAINTREE_PRETREE_2, LST_LENTREE_PRETREE,
+    LST_NTREES, dummy_enum_const = LST_NTREES-1,
+    LST_REALIGN_BITSTREAM,
+    LST_RAWBITS_BASE /* add the number of actual bits to this code */
+} LZXSymType;
+
+typedef struct LZXSym {
+    LZXSymType type;
+    int value;
+} LZXSym;
+
+typedef struct LZXBuffer {
+    LZXSym *syms;
+    int nsyms, symsize;
+} LZXBuffer;
+
+typedef struct LZXInfo {
+    LZXBuffer *buf;
+    int r0, r1, r2;                    /* saved match offsets */
+} LZXInfo;
+
+static void lzx_buffer_init(LZXBuffer *buf)
+{
+    buf->syms = NULL;
+    buf->nsyms = buf->symsize = 0;
+}
+
+static void lzx_addsym(LZXBuffer *buf, LZXSymType type, int value)
+{
+    if (buf->nsyms >= buf->symsize) {
+        assert(buf->nsyms == buf->symsize);
+        buf->symsize = buf->nsyms * 5 / 4 + 16384;
+        buf->syms = sresize(buf->syms, buf->symsize, LZXSym);
+    }
+    buf->syms[buf->nsyms].type = type;
+    buf->syms[buf->nsyms].value = value;
+    buf->nsyms++;
+}
+
+static void lzx_literal(struct LZ77Context *ctx, unsigned char c)
+{
+    LZXBuffer *buf = ((LZXInfo *)ctx->userdata)->buf;
+    lzx_addsym(buf, LST_MAINTREE, c);
+}
+
+static void lzx_match(struct LZ77Context *ctx, int match_offset, int totallen)
+{
+    LZXInfo *info = (LZXInfo *)ctx->userdata;
+    LZXBuffer *buf = info->buf;
+
+    /*
+     * First, this variant of LZX has a maximum match length of 257
+     * bytes, so if lz77.c reports a longer match than that, we must
+     * break it up.
+     */
+    while (totallen > 0) {
+        int len, length_header, length_footer, len_pos_header;
+        int formatted_offset, position_slot, position_verbatim_bits;
+        int position_verbatim_value, position_aligned_offset;
+
+        if (totallen <= LZX_MAXMATCHLEN) {
+            /* We can emit all of the (remaining) match length in one go. */
+            len = totallen;
+        } else if (totallen >= LZX_MAXMATCHLEN+LZX_MINMATCHLEN) {
+            /* There's enough match left that we can emit a
+             * maximum-length chunk and still be assured of being able
+             * to emit what's left as a viable followup match. */
+            len = LZX_MAXMATCHLEN;
+        } else {
+            /* The in-between case, where we have _only just_ too long
+             * a match to emit in one go, so that if we emitted a
+             * max-size chunk then what's left would be under the min
+             * size and we couldn't emit it. */
+            len = totallen - LZX_MINMATCHLEN;
+        }
+        totallen -= len;
+
+        /*
+         * Now we're outputting a single LZX-level match of length
+         * 'len'. Break the length up into a 'header' (included in the
+         * starting LST_MAINTREE symbol) and a 'footer' (tacked on
+         * afterwards using LST_LENTREE).
+         */
+        if (len < 9) {
+            length_header = len - 2;   /* in the range {0,...,6} */
+            length_footer = -1;        /* not transmitted at all */
+        } else {
+            length_header = 7;         /* header indicates more to come */
+            length_footer = len - 9;   /* in the range {0,...,248} */
+        }
+
+        /*
+         * Meanwhile, the raw backward distance is first transformed
+         * into the 'formatted offset', by either adding 2 or using
+         * one of the low-numbered special codes meaning to use one of
+         * the three most recent match distances.
+         */
+        if (match_offset == info->r0) {
+            /* Reuse the most recent distance */
+            formatted_offset = 0;
+        } else if (match_offset == info->r1) {
+            /* Reuse the 2nd most recent, and swap it into first place */
+            int tmp = info->r1;
+            info->r1 = info->r0;
+            info->r0 = tmp;
+            formatted_offset = 1;
+        } else if (match_offset == info->r2) {
+            /* Reuse the 3rd most recent and swap it to first place.
+             * This is intentionally not quite a move-to-front
+             * shuffle, which would permute (r0,r1,r2)->(r2,r0,r1); MS
+             * decided that just swapping r0 with r2 was a better
+             * performance tradeoff. */
+            int tmp = info->r2;
+            info->r2 = info->r0;
+            info->r0 = tmp;
+            formatted_offset = 2;
+        } else {
+            /* This offset matches none of the three saved values.
+             * Put it in r0, and move up the rest of the list. */
+            info->r2 = info->r1;
+            info->r1 = info->r0;
+            info->r0 = match_offset;
+            formatted_offset = match_offset + 2;
+        }
+
+        /*
+         * The formatted offset now breaks up into a 'position slot'
+         * (encoded as part of the starting symbol) and an offset from
+         * the smallest position value covered by that slot. The
+         * system of slots is designed so that every slot's width is a
+         * power of two and its base value is a multiple of its width,
+         * so we can get the offset just by taking the bottom n bits
+         * of the full formatted offset, once the choice of position
+         * slot tells us what n is.
+         */
+        position_slot = lzx_compute_position_slot(
+            formatted_offset, &position_verbatim_bits);
+        position_verbatim_value = formatted_offset &
+            ((1 << position_verbatim_bits)-1);
+
+        /*
+         * If there are three or more additional bits, then the last 3
+         * of them are (potentially, depending on block type which we
+         * haven't decided about yet) transmitted using the aligned
+         * offset tree. The rest are sent verbatim.
+         */
+        if (position_verbatim_bits >= 3) {
+            position_aligned_offset = position_verbatim_value & 7;
+            position_verbatim_bits -= 3;
+            position_verbatim_value >>= 3;
+        } else {
+            position_aligned_offset = -1; /* not transmitted */
+        }
+
+        /*
+         * Combine the length header and position slot into the full
+         * set of information encoded by the starting symbol.
+         */
+        len_pos_header = position_slot * 8 + length_header;
+
+        /*
+         * And now we've finished figuring out _what_ to output, so
+         * output it.
+         */
+        lzx_addsym(buf, LST_MAINTREE, 256 + len_pos_header);
+        if (length_footer >= 0)
+            lzx_addsym(buf, LST_LENTREE, length_footer);
+        if (position_verbatim_bits > 0)
+            lzx_addsym(buf, LST_RAWBITS_BASE + position_verbatim_bits,
+                       position_verbatim_value);
+        if (position_aligned_offset >= 0)
+            lzx_addsym(buf, LST_ALIGNOFFTREE, position_aligned_offset);
+    }
+}
+
+void lzx_lz77_inner(LZXInfo *info, const unsigned char *data, int len)
+{
+    struct LZ77Context lz77c;
+    lz77_init(&lz77c, OUR_LZX_WINSIZE);
+    lz77c.literal = lzx_literal;
+    lz77c.match = lzx_match;
+    lz77c.userdata = info;
+    lz77_compress(&lz77c, data, len, TRUE);
+    lz77_cleanup(&lz77c);
+}
+
+void lzx_lz77(LZXBuffer *buf, const unsigned char *data,
+              int totallen, int realign_interval)
+{
+    LZXInfo info;
+
+    info.r0 = info.r1 = info.r2 = 1;
+    info.buf = buf;
+
+    while (totallen > 0) {
+        int thislen =
+            totallen < realign_interval ? totallen : realign_interval;
+        lzx_lz77_inner(&info, data, thislen);
+        data += thislen;
+        totallen -= thislen;
+        if (totallen > 0)
+            lzx_addsym(info.buf, LST_REALIGN_BITSTREAM, 0);
+    }
+}
+
+typedef struct LZXHuf {
+    int nsyms;
+    unsigned char *lengths;
+    unsigned char *oldlengths; /* for pretree encoding to diff against */
+    int *codes;
+} LZXHuf;
+
+typedef struct LZXHufs {
+    LZXHuf hufs[LST_NTREES];
+} LZXHufs;
+
+void lzx_build_tree(LZXSym *syms, int nsyms, LZXSymType which, LZXHufs *hufs)
+{
+    int i, max_code_len;
+    int *freqs;
+    LZXHuf *huf = &hufs->hufs[which];
+
+    switch (which) {
+      default:
+        assert(0 && "Bad lzx_build_tree tree type");
+      case LST_MAINTREE:
+        /*
+         * Trees encoded via a pretree have a max code length of 16,
+         * because that's the limit of what the pretree alphabet can
+         * represent.
+         */
+        max_code_len = 16;
+
+        /*
+         * Number of symbols in the main tree is 256 literals, plus 8n
+         * match header symbols where n is the largest position slot
+         * number that might be needed to address any offset in the
+         * window.
+         */
+        {
+            int ignored, last_slot;
+            last_slot = lzx_compute_position_slot(OUR_LZX_WINSIZE-1, &ignored);
+            huf->nsyms = 8 * (last_slot+1) + 256;
+        }
+        break;
+      case LST_LENTREE:
+        max_code_len = 16;             /* pretree again */
+        huf->nsyms = 249;              /* a fixed value in the spec */
+        break;
+      case LST_MAINTREE_PRETREE_1:
+      case LST_MAINTREE_PRETREE_2:
+      case LST_LENTREE_PRETREE:
+        /* Pretree code lengths are stored in 4-bit fields, so they
+         * can't go above 15. There are a standard 20 symbols in the
+         * pretree alphabet. */
+        max_code_len = 15;
+        huf->nsyms = 20;
+        break;
+      case LST_ALIGNOFFTREE:
+        /* The aligned-offset tree has 8 elements stored in 3-bit
+         * fields. */
+        max_code_len = 7;
+        huf->nsyms = 8;
+        break;
+    }
+
+    freqs = snewn(huf->nsyms, int);
+
+    /*
+     * Count up the symbol frequencies.
+     */
+    for (i = 0; i < huf->nsyms; i++)
+        freqs[i] = 0;
+    for (i = 0; i < nsyms; i++)
+        if (syms[i].type == which)
+            freqs[syms[i].value]++;
+
+    /*
+     * Build the Huffman table.
+     */
+    huf->lengths = snewn(huf->nsyms, unsigned char);
+    build_huffman_tree(freqs, huf->lengths, huf->nsyms, max_code_len);
+    huf->codes = snewn(huf->nsyms, int);
+    compute_huffman_codes(huf->lengths, huf->codes, huf->nsyms);
+
+    /*
+     * Cleanup.
+     */
+    sfree(freqs);
+}
+
+void lzx_tree_with_pretree(LZXHuf *huf, int symoffset, int symlimit,
+                           LZXBuffer *buf, LZXSymType pretree_symtype)
+{
+    int i, r;
+
+    if (!huf->oldlengths) {
+        huf->oldlengths = snewn(huf->nsyms, unsigned char);
+        for (i = 0; i < huf->nsyms; i++)
+            huf->oldlengths[i] = 0;
+    }
+
+    for (i = symoffset; i < symlimit; i++) {
+        for (r = 1; i+r < symlimit; r++)
+            if (huf->lengths[i+r] != huf->lengths[i])
+                break;
+
+        if (r >= 4) {
+            /*
+             * We have at least one run of the same code length long
+             * enough to use one of the run-length encoding symbols.
+             */
+            while (r >= 4) {
+                int thisrun;
+                if (huf->lengths[i] == 0) {
+                    thisrun = r > 20+31 ? 20+31 : r;
+                    if (thisrun >= 20) {
+                        lzx_addsym(buf, pretree_symtype, 18);
+                        lzx_addsym(buf, LST_RAWBITS_BASE + 5, thisrun - 20);
+                    } else {
+                        lzx_addsym(buf, pretree_symtype, 17);
+                        lzx_addsym(buf, LST_RAWBITS_BASE + 4, thisrun - 4);
+                    }
+                } else {
+                    thisrun = r > 5 ? 5 : r;
+                    lzx_addsym(buf, pretree_symtype, 19);
+                    lzx_addsym(buf, LST_RAWBITS_BASE + 1, thisrun - 4);
+                    lzx_addsym(buf, pretree_symtype,
+                               (huf->oldlengths[i]-huf->lengths[i] + 17) % 17);
+                }
+                r -= thisrun;
+                i += thisrun;
+            }
+
+            if (r == 0) {
+                i--;        /* compensate for normal loop increment */
+                continue;
+            }
+        }
+
+        /*
+         * Otherwise, emit a normal non-encoded symbol.
+         */
+        lzx_addsym(buf, pretree_symtype,
+                   (huf->oldlengths[i]-huf->lengths[i] + 17) % 17);
+    }
+}
+
+void lzx_tree_simple(LZXHuf *huf, LZXBuffer *buf, int bits)
+{
+    int i;
+    for (i = 0; i < huf->nsyms; i++)
+        lzx_addsym(buf, LST_RAWBITS_BASE + bits, huf->lengths[i]);
+}
+
+typedef struct LZXBitstream {
+    struct LZXEncodedFile *ef;
+    size_t data_size, resets_size;
+    unsigned short bitbuffer;
+    int nbits;
+    int first_block;
+} LZXBitstream;
+
+void lzx_write_bits(LZXBitstream *bs, int value, int bits)
+{
+    while (bs->nbits + bits >= 16) {
+        int thisbits = 16 - bs->nbits;
+        bs->bitbuffer = (bs->bitbuffer << thisbits) |
+            (value >> (bits-thisbits));
+
+        if (bs->ef->data_len+2 > bs->data_size) {
+            bs->data_size = bs->ef->data_len * 5 / 4 + 65536;
+            bs->ef->data = sresize(bs->ef->data, bs->data_size,
+                                   unsigned char);
+        }
+        bs->ef->data[bs->ef->data_len++] = bs->bitbuffer;
+        bs->ef->data[bs->ef->data_len++] = bs->bitbuffer >> 8;
+
+        bs->bitbuffer = 0;
+        bs->nbits = 0;
+
+        bits -= thisbits;
+        value &= (1<<bits) - 1;
+    }
+
+    bs->bitbuffer = (bs->bitbuffer << bits) | value;
+    bs->nbits += bits;
+}
+
+void lzx_realign(LZXBitstream *bs)
+{
+    lzx_write_bits(bs, 0, 15 & -(unsigned)bs->nbits);
+}
+
+void lzx_write_reset_table_entry(LZXBitstream *bs)
+{
+    lzx_write_bits(bs, 0, 15 & -(unsigned)bs->nbits);
+
+    if (bs->ef->n_resets >= bs->resets_size) {
+        bs->resets_size = bs->ef->n_resets * 5 / 4 + 256;
+        bs->ef->reset_byte_offsets = sresize(bs->ef->reset_byte_offsets,
+                                             bs->resets_size, size_t);
+    }
+    bs->ef->reset_byte_offsets[bs->ef->n_resets++] = bs->ef->data_len;
+}
+
+void lzx_huf_encode(LZXSym *syms, int nsyms, LZXHufs *hufs, LZXBitstream *bs)
+{
+    int i;
+    for (i = 0; i < nsyms; i++) {
+        LZXSymType type = syms[i].type;
+        int value = syms[i].value;
+
+        if (type >= LST_RAWBITS_BASE) {
+            lzx_write_bits(bs, value, type - LST_RAWBITS_BASE);
+        } else if (type == LST_REALIGN_BITSTREAM) {
+            /* Realign the bitstream to a 16-bit boundary, and write a
+             * reset table entry giving the resulting byte offset. */
+            lzx_realign(bs);
+            lzx_write_reset_table_entry(bs);
+        } else {
+            lzx_write_bits(bs, hufs->hufs[type].codes[value],
+                           hufs->hufs[type].lengths[value]);
+        }
+    }
+}
+
+void lzx_encode_block(LZXSym *syms, int nsyms, int blocksize,
+                      LZXHufs *hufs, LZXBitstream *bs)
+{
+    LZXBuffer header[8];
+    int i, blocktype;
+
+    for (i = 0; i < (int)lenof(header); i++)
+        lzx_buffer_init(&header[i]);
+
+    /*
+     * Build the Huffman trees for the main alphabets used in the
+     * block.
+     */
+    lzx_build_tree(syms, nsyms, LST_MAINTREE, hufs);
+    lzx_build_tree(syms, nsyms, LST_LENTREE, hufs);
+    lzx_build_tree(syms, nsyms, LST_ALIGNOFFTREE, hufs);
+
+    /*
+     * Encode each of those as a sequence of pretree symbols.
+     */
+    lzx_tree_with_pretree(&hufs->hufs[LST_MAINTREE], 0, 256,
+                          &header[3], LST_MAINTREE_PRETREE_1);
+    lzx_tree_with_pretree(&hufs->hufs[LST_MAINTREE], 256,
+                          hufs->hufs[LST_MAINTREE].nsyms,
+                          &header[5], LST_MAINTREE_PRETREE_2);
+    lzx_tree_with_pretree(&hufs->hufs[LST_LENTREE], 0,
+                          hufs->hufs[LST_LENTREE].nsyms,
+                          &header[7], LST_LENTREE_PRETREE);
+
+    /*
+     * Build the pretree for each of those encodings.
+     */
+    lzx_build_tree(header[3].syms, header[3].nsyms,
+                   LST_MAINTREE_PRETREE_1, hufs);
+    lzx_build_tree(header[5].syms, header[5].nsyms,
+                   LST_MAINTREE_PRETREE_2, hufs);
+    lzx_build_tree(header[7].syms, header[7].nsyms,
+                   LST_LENTREE_PRETREE, hufs);
+
+    /*
+     * Decide whether we're keeping the aligned offset tree or not.
+     */
+    {
+        int with, without;
+
+        with = 3*8;                    /* cost of transmitting tree */
+        without = 0;                   /* or not */
+
+        for (i = 0; i < nsyms; i++)
+            if (syms[i].type == LST_ALIGNOFFTREE) {
+                with += hufs->hufs[LST_ALIGNOFFTREE].lengths[syms[i].value];
+                without += 3;
+            }
+
+        if (with < without) {
+            /* Yes, it's a win to use the aligned offset tree. */
+            blocktype = 2;
+        } else {
+            /* No, we do better by throwing it away. */
+            blocktype = 1;
+
+            /* Easiest way to simulate that is to pretend we're still
+             * using an aligned offset tree in the encoding, but to
+             * chuck away our code lengths and replace them with the
+             * fixed-length trivial tree. */
+            for (i = 0; i < 8; i++) {
+                hufs->hufs[LST_ALIGNOFFTREE].lengths[i] = 3;
+                hufs->hufs[LST_ALIGNOFFTREE].codes[i] = i;
+            }
+        }
+    }
+
+    /*
+     * Encode all the simply encoded trees (the three pretrees and the
+     * aligned offset tree).
+     */
+    lzx_tree_simple(&hufs->hufs[LST_MAINTREE_PRETREE_1], &header[2], 4);
+    lzx_tree_simple(&hufs->hufs[LST_MAINTREE_PRETREE_2], &header[4], 4);
+    lzx_tree_simple(&hufs->hufs[LST_LENTREE_PRETREE], &header[6], 4);
+    if (blocktype == 2)
+        lzx_tree_simple(&hufs->hufs[LST_ALIGNOFFTREE], &header[1], 3);
+
+    /*
+     * Top-level block header.
+     */
+    if (bs->first_block) {
+        /*
+         * Also include the whole-file header which says whether E8
+         * call translation is on. We never turn it on, because we
+         * don't support it (since in this use case it doesn't seem
+         * likely to be particularly useful anyway).
+         *
+         * It looks like a layer violation to put the output of this
+         * whole-file header inside the per-block function like this,
+         * but in fact it has to be done here because the first reset
+         * table entry really is supposed to point to the _start_ of
+         * the whole-file header.
+         */
+        lzx_addsym(&header[0], LST_RAWBITS_BASE + 1, 0);
+        bs->first_block = FALSE;
+    }
+    lzx_addsym(&header[0], LST_RAWBITS_BASE + 3, blocktype);
+    lzx_addsym(&header[0], LST_RAWBITS_BASE + 24, blocksize);
+
+    /*
+     * Ensure the bit stream starts off aligned, and output an initial
+     * reset-table entry.
+     */
+    lzx_realign(bs);
+    lzx_write_reset_table_entry(bs);
+
+    /*
+     * Write out all of our symbol sequences in order: all of those
+     * assorted header fragments, then the main LZ77 token sequence.
+     */
+    for (i = 0; i < (int)lenof(header); i++)
+        lzx_huf_encode(header[i].syms, header[i].nsyms, hufs, bs);
+    lzx_huf_encode(syms, nsyms, hufs, bs);
+
+    /*
+     * Clean up.
+     */
+    for (i = 0; i < (int)lenof(header); i++)
+        sfree(header[i].syms);
+    for (i = 0; i < (int)lenof(hufs->hufs); i++) {
+        sfree(hufs->hufs[i].codes);
+        sfree(hufs->hufs[i].lengths);
+    }
+}
+
+struct LZXEncodedFile *lzx(const void *vdata, int totallen,
+                           int realign_interval, int reset_interval)
+{
+    const unsigned char *data = (const unsigned char *)vdata;
+    LZXBitstream bs;
+    LZXHufs hufs;
+    int i;
+
+    bs.ef = snew(struct LZXEncodedFile);
+    bs.ef->data = NULL;
+    bs.ef->reset_byte_offsets = NULL;
+    bs.ef->data_len = bs.data_size = 0;
+    bs.ef->n_resets = bs.resets_size = 0;
+    bs.bitbuffer = 0;
+    bs.nbits = 0;
+
+    for (i = 0; i < (int)lenof(hufs.hufs); i++)
+        hufs.hufs[i].oldlengths = NULL;
+
+    while (totallen > 0) {
+        int thislen =
+            totallen < reset_interval ? totallen : reset_interval;
+        LZXBuffer buf;
+
+        lzx_buffer_init(&buf);
+
+        lzx_lz77(&buf, data, thislen, realign_interval);
+        data += thislen;
+        totallen -= thislen;
+
+        /*
+         * Block boundaries are chosen completely trivially: since we
+         * have to terminate a block every time we reach the (fairly
+         * short) reset interval in any case, it doesn't hurt us much
+         * to just fix the assumption that every (reset_interval)
+         * bytes of the input turn into exactly one block, i.e. the
+         * whole of buf.syms that we just constructed is output in one
+         * go. We _could_ try improving on this by clever
+         * block-boundary heuristics, but I don't really think it's
+         * worth it.
+         */
+        bs.first_block = TRUE; /* reset every time we reset the LZ state */
+        lzx_encode_block(buf.syms, buf.nsyms, thislen, &hufs, &bs);
+
+        sfree(buf.syms);
+    }
+
+    for (i = 0; i < (int)lenof(hufs.hufs); i++)
+        sfree(hufs.hufs[i].oldlengths);
+
+    /* Realign to a 16-bit boundary, i.e. flush out any last few
+     * unwritten bits. */
+    lzx_realign(&bs);
+
+    return bs.ef;
+}
+
+#ifdef LZX_TEST
+/*
+gcc -g -O0 -DLZX_TEST -o lzxtest -Icharset lzx.c lz77.c huffman.c malloc.c
+*/
+#include <err.h>
+int main(int argc, char **argv)
+{
+    FILE *fp;
+    long insize;
+    unsigned char *inbuf;
+    struct LZXEncodedFile *ef;
+
+    if (argc != 3)
+        errx(1, "expected infile and outfile arguments");
+
+    fp = fopen(argv[1], "rb");
+    if (!fp)
+        err(1, "%s: open", argv[1]);
+    fseek(fp, 0, SEEK_END);
+    insize = ftell(fp);
+    rewind(fp);
+    inbuf = snewn(insize, unsigned char);
+    fread(inbuf, 1, insize, fp);
+    fclose(fp);
+
+    ef = lzx(inbuf, insize, 0x8000, 0x10000);
+
+    fp = fopen(argv[2], "wb");
+    if (!fp)
+        err(1, "%s: open", argv[2]);
+    fwrite(ef->data, 1, ef->data_len, fp);
+    fclose(fp);
+
+    sfree(ef->data);
+    sfree(ef->reset_byte_offsets);
+    sfree(ef);
+    sfree(inbuf);
+
+    return 0;
+}
+
+wchar_t *ustrdup(wchar_t const *s) { assert(0 && "should be unused"); }
+void fatalerr_nomemory(void) { errx(1, "out of memory"); }
+#endif
diff --git a/lzx.h b/lzx.h
new file mode 100644
index 0000000..ff78f5d
--- /dev/null
+++ b/lzx.h
@@ -0,0 +1,24 @@
+/*
+ * lzx.h: LZX encoder for Windows CHM files.
+ */
+
+struct LZXEncodedFile {
+    unsigned char *data;
+    size_t data_len;
+
+    size_t *reset_byte_offsets;
+    size_t n_resets;
+};
+
+/*
+ * Produce an LZX-compressed encoding of an input data block. Return
+ * it, along with a list of byte offsets where the data stream is
+ * realigned to a 16-bit boundary because one of realign_interval and
+ * reset_interval has run out.
+ *
+ * The output structure and its fields 'data' and 'reset_byte_offsets'
+ * are all dynamically allocated, and need freeing by the receiver
+ * when finished with.
+ */
+struct LZXEncodedFile *lzx(const void *data, int len,
+                           int realign_interval, int reset_interval);
diff --git a/main.c b/main.c
index 405e6ef..535f024 100644
--- a/main.c
+++ b/main.c
@@ -35,6 +35,7 @@ static const struct backend {
     {"info", info_backend, info_config_filename, 0x0010, 0},
     {"ps", ps_backend, ps_config_filename, 0x0020, 0x0001},
     {"pdf", pdf_backend, pdf_config_filename, 0x0040, 0x0001},
+    {"chm", chm_backend, chm_config_filename, 0x0080, 0},
 };
 
 int main(int argc, char **argv) {
diff --git a/winchm.c b/winchm.c
new file mode 100644
index 0000000..cb21715
--- /dev/null
+++ b/winchm.c
@@ -0,0 +1,1436 @@
+/*
+ * winchm.c: direct output of .CHM files.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "halibut.h"
+#include "tree234.h"
+#include "lzx.h"
+
+#define PUT_32BIT_LSB_FIRST(cp, value) do { \
+  ((unsigned char *)cp)[0] = 0xFF & (value);      \
+  ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); \
+  ((unsigned char *)cp)[2] = 0xFF & ((value) >> 16); \
+  ((unsigned char *)cp)[3] = 0xFF & ((value) >> 24); } while (0)
+
+#define PUT_32BIT_MSB_FIRST(cp, value) do { \
+  ((unsigned char *)cp)[3] = 0xFF & (value); \
+  ((unsigned char *)cp)[2] = 0xFF & ((value) >> 8); \
+  ((unsigned char *)cp)[1] = 0xFF & ((value) >> 16); \
+  ((unsigned char *)cp)[0] = 0xFF & ((value) >> 24); } while (0)
+
+#define PUT_16BIT_LSB_FIRST(cp, value) do { \
+  ((unsigned char *)cp)[0] = 0xFF & (value); \
+  ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); } while (0)
+
+#define RDADD_32BIT_LSB_FIRST(rs, value) do { \
+        unsigned char out[4]; \
+        PUT_32BIT_LSB_FIRST(out, value); \
+        rdaddsn(rs, (void *)out, sizeof(out));  \
+    } while (0)
+
+#define RDADD_32BIT_MSB_FIRST(rs, value) do { \
+        unsigned char out[4]; \
+        PUT_32BIT_MSB_FIRST(out, value); \
+        rdaddsn(rs, (void *)out, sizeof(out)); \
+    } while (0)
+
+#define RDADD_16BIT_LSB_FIRST(rs, value) do { \
+        unsigned char out[2]; \
+        PUT_16BIT_LSB_FIRST(out, value); \
+        rdaddsn(rs, (void *)out, sizeof(out)); \
+    } while (0)
+
+static void guid(rdstringc *rs, unsigned long w0,
+                 unsigned short h0, unsigned short h1,
+                 unsigned char b0, unsigned char b1,
+                 unsigned char b2, unsigned char b3,
+                 unsigned char b4, unsigned char b5,
+                 unsigned char b6, unsigned char b7)
+{
+    RDADD_32BIT_LSB_FIRST(rs, w0);
+    RDADD_16BIT_LSB_FIRST(rs, h0);
+    RDADD_16BIT_LSB_FIRST(rs, h1);
+    rdaddc(rs, b0);
+    rdaddc(rs, b1);
+    rdaddc(rs, b2);
+    rdaddc(rs, b3);
+    rdaddc(rs, b4);
+    rdaddc(rs, b5);
+    rdaddc(rs, b6);
+    rdaddc(rs, b7);
+}
+
+static void itsf(rdstringc *rs,
+                 const rdstringc *directory, const rdstringc *content0)
+{
+    int headersize_field;
+    int headersect_off, headersect_off_field, headersect_size_field;
+    int directory_off_field, content0_off_field, filesize_field;
+
+    /* Main file header */
+    rdaddsc(rs, "ITSF");               /* main file magic number */
+    RDADD_32BIT_LSB_FIRST(rs, 3);      /* file format version */
+    headersize_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0);      /* size of main header; fill in later */
+    RDADD_32BIT_LSB_FIRST(rs, 1);      /* unknown, always observed to be 1 */
+    RDADD_32BIT_MSB_FIRST(rs, 0x12345678); /* timestamp (FIXME) */
+    RDADD_32BIT_LSB_FIRST(rs, 0x809); /* language code (FIXME: configurable) */
+    guid(rs,0x7C01FD10,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
+    guid(rs,0x7C01FD11,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
+    headersect_off_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* header section offset; fill in later */
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+    headersect_size_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* header section size; fill in later */
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+    directory_off_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* directory offset; fill in later */
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+    RDADD_32BIT_LSB_FIRST(rs, directory->pos);
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+    content0_off_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* content section 0 offset; fill in later */
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+    PUT_32BIT_LSB_FIRST(rs->text + headersize_field, rs->pos);
+
+    /* 'Header section' */
+    headersect_off = rs->pos;
+    PUT_32BIT_LSB_FIRST(rs->text + headersect_off_field, rs->pos);
+    RDADD_32BIT_LSB_FIRST(rs, 0x1FE); /* magic number */
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
+    filesize_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* file size; fill in later */
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
+    PUT_32BIT_LSB_FIRST(rs->text + headersect_size_field,
+                        rs->pos - headersect_off);
+
+    PUT_32BIT_LSB_FIRST(rs->text + directory_off_field, rs->pos);
+    rdaddsn(rs, directory->text, directory->pos);
+
+    PUT_32BIT_LSB_FIRST(rs->text + content0_off_field, rs->pos);
+    rdaddsn(rs, content0->text, content0->pos);
+
+    PUT_32BIT_LSB_FIRST(rs->text + filesize_field, rs->pos);
+}
+
+static void encint(rdstringc *rs, unsigned val)
+{
+    int i, j, topbit;
+
+    /* ENCINT in the CHM format is big-endian, but it's easier to
+     * write little-endian and byte-reverse afterwards. */
+
+    i = rs->pos; /* first byte index */
+
+    topbit = 0;
+    while (val >= 0x80) {
+        rdaddc(rs, (val & 0x7F) | topbit);
+        val >>= 7;
+        topbit = 0x80;
+    }
+
+    j = rs->pos; /* last byte index */
+    rdaddc(rs, val | topbit);
+
+    while (j > i) {
+        char tmp = rs->text[i];
+        rs->text[i] = rs->text[j];
+        rs->text[j] = tmp;
+        i++;
+        j--;
+    }
+}
+
+struct chm_directory_entry {
+    char *filename;                    /* free this when done */
+    int which_content_section;
+    int offset_in_content_section;
+    int file_size;
+};
+
+static int strcmp_chm(const char *a, const char *b)
+{
+    /*
+     * CHM directory sorting criterion appears to be case-insensitive,
+     * and based on sorting the _lowercased_ text. (Hence, in
+     * particular, '_' sorts before any alphabetic character.)
+     */
+    while (*a || *b) {
+        char ac = *a, bc = *b;
+        if (ac >= 'A' && ac <= 'Z') ac += 'a'-'A';
+        if (bc >= 'A' && bc <= 'Z') bc += 'a'-'A';
+        if (ac != bc)
+            return ac < bc ? -1 : +1;
+        a++;
+        b++;
+    }
+
+    return 0;
+}
+
+int chm_directory_entry_cmp(void *av, void *bv)
+{
+    const struct chm_directory_entry
+        *a = (const struct chm_directory_entry *)av,
+        *b = (const struct chm_directory_entry *)bv;
+    return strcmp_chm(a->filename, b->filename);
+}
+
+int chm_directory_entry_find(void *av, void *bv)
+{
+    const char *a = (const char *)av;
+    const struct chm_directory_entry
+        *b = (const struct chm_directory_entry *)bv;
+    return strcmp_chm(a, b->filename);
+}
+
+struct chm_index_entry {
+    char *first_filename; /* shared pointer with some chm_directory_entry */
+    int chunk_index;
+};
+
+static void directory(rdstringc *rs, tree234 *files)
+{
+    const int chunksize = 4096;
+    const int encoded_density = 2;
+    const int useful_density = 1 + (1 << encoded_density);
+    int dirhdr_size_field, dirhdr_size2_field, dirhdr_depth_field;
+    int dirhdr_root_field, dirhdr_tail_field, dirhdr_nchunks_field;
+    int curr_chunk, depth, filename_index;
+    tree234 *index;
+
+    assert(rs->pos == 0);
+    assert(count234(files) > 0);
+
+    /* Directory header */
+    rdaddsc(rs, "ITSP");               /* directory header magic number */
+    RDADD_32BIT_LSB_FIRST(rs, 1);      /* format version */
+    dirhdr_size_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */
+    RDADD_32BIT_LSB_FIRST(rs, 10);     /* unknown; observed to be 10 */
+    RDADD_32BIT_LSB_FIRST(rs, chunksize);
+    RDADD_32BIT_LSB_FIRST(rs, encoded_density);
+    dirhdr_depth_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* B-tree depth; fill in later */
+    dirhdr_root_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* root chunk index; fill in later */
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* head of PMGL chunk list; always 0 here */
+    dirhdr_tail_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* tail of PMGL chunk list; fill in later */
+    RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+    dirhdr_nchunks_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* total number of chunks; fill in later */
+    RDADD_32BIT_LSB_FIRST(rs, 0x409);  /* language (FIXME) */
+    guid(rs,0x5D02926A,0x212E,0x11D0,0x9D,0xF9,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
+    dirhdr_size2_field = rs->pos;
+    RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */
+    RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+    RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+    RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+    PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size_field, rs->pos);
+    PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size2_field, rs->pos);
+
+    index = newtree234(NULL);
+    curr_chunk = 0;
+    depth = 1;
+    /* Write out lowest-level PMGL chunks full of actual directory entries */
+    filename_index = 0;
+    while (filename_index < count234(files)) {
+        rdstringc chunk = {0, 0, NULL};
+        rdstringc reversed_quickref = {0, 0, NULL};
+        int chunk_endlen_field, chunk_nextptr_field;
+        int n_entries, offset_of_first_entry;
+        int saved_pos, saved_rq_pos, i;
+
+        rdaddsc(&chunk, "PMGL");
+        chunk_endlen_field = chunk.pos;
+        RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */
+        RDADD_32BIT_LSB_FIRST(&chunk, 0); /* unknown; observed to be 0 */
+        if (curr_chunk == 0) {
+            RDADD_32BIT_LSB_FIRST(&chunk, 0xFFFFFFFF); /* 'null' prev ptr */
+        } else {
+            RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk - 1);
+        }
+        chunk_nextptr_field = chunk.pos; /* may overwrite 'next' ptr later */
+        RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk + 1);
+
+        /* Enter this chunk in our index for the next level of the
+         * B-tree (if we end up needing one). */
+        {
+            struct chm_directory_entry *ent = (struct chm_directory_entry *)
+                index234(files, filename_index);
+            struct chm_index_entry *ient = snew(struct chm_index_entry);
+            assert(ent);
+            ient->first_filename = ent->filename;
+            ient->chunk_index = curr_chunk;
+            addpos234(index, ient, count234(index));
+        }
+
+        /* Start accumulating the quick-reference index at the end of this
+         * chunk. We'll build it up backwards, and reverse it halfwordwise
+         * when we copy it into the end of our output chunk. */
+        RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0);
+        offset_of_first_entry = chunk.pos;
+
+        n_entries = 0;
+        /* Write filenames into this chunk until it's full, or until
+         * we run out of filenames. */
+        while (1) {
+            struct chm_directory_entry *ent = (struct chm_directory_entry *)
+                index234(files, filename_index++);
+            if (!ent) {
+                /* Run out of filenames, so this is the last PMGL chunk.
+                 * Reset its 'next' pointer to the 'null' -1 value. */
+                PUT_32BIT_LSB_FIRST(chunk.text + chunk_nextptr_field,
+                                    0xFFFFFFFFU);
+                /* And point the directory header's tail pointer at
+                 * this chunk. */
+                PUT_32BIT_LSB_FIRST(rs->text + dirhdr_tail_field, curr_chunk);
+                break;
+            }
+
+            /* Save the sizes of stuff in this chunk, so we can put
+             * them back if this entry turns out to overflow. */
+            saved_pos = chunk.pos;
+            saved_rq_pos = reversed_quickref.pos;
+
+            if (n_entries > 0 && n_entries % useful_density == 0) {
+                /* Add a quick-reference index pointer. */
+                RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos -
+                                      offset_of_first_entry);
+            }
+
+            encint(&chunk, strlen(ent->filename));
+            rdaddsc(&chunk, ent->filename);
+            encint(&chunk, ent->which_content_section);
+            encint(&chunk, ent->offset_in_content_section);
+            encint(&chunk, ent->file_size);
+            if (chunk.pos + reversed_quickref.pos > chunksize) {
+                filename_index--;
+                chunk.pos = saved_pos;
+                reversed_quickref.pos = saved_rq_pos;
+                break;
+            }
+
+            /* If we didn't overflow, then commit to this entry and
+             * loop round for the next one. */
+            n_entries++;
+        }
+
+        /* Finalise the chunk. */
+        assert(chunk.pos + reversed_quickref.pos <= chunksize);
+        PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
+                            chunksize - chunk.pos);
+        PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
+        while (chunk.pos + reversed_quickref.pos < chunksize)
+            rdaddc(&chunk, 0);         /* zero-pad */
+        for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
+            rdaddsn(&chunk, reversed_quickref.text+i, 2);
+
+        assert(chunk.pos == chunksize);
+        rdaddsn(rs, chunk.text, chunk.pos);
+        sfree(chunk.text);
+        sfree(reversed_quickref.text);
+        curr_chunk++;
+    }
+
+    /* Write out as many layers of PMGI index chunks as it takes to
+     * reduce the total number of chunks at the current level to 1. */
+    while (count234(index) > 1) {
+        tree234 *prev_index;
+        int index_index = 0;
+
+        prev_index = index;
+        index = newtree234(NULL);
+        depth++;
+
+        while (index_index < count234(prev_index)) {
+            rdstringc chunk = {0, 0, NULL};
+            rdstringc reversed_quickref = {0, 0, NULL};
+            int chunk_endlen_field;
+            int n_entries, offset_of_first_entry;
+            int saved_pos, saved_rq_pos, i;
+
+            rdaddsc(&chunk, "PMGI");
+            chunk_endlen_field = chunk.pos;
+            RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */
+
+            /* Enter this chunk in our index for the next level of the
+             * B-tree (if we end up needing one). */
+            {
+                struct chm_index_entry *ent = (struct chm_index_entry *)
+                    index234(prev_index, index_index);
+                struct chm_index_entry *ient = snew(struct chm_index_entry);
+                assert(ent);
+                ient->first_filename = ent->first_filename;
+                ient->chunk_index = curr_chunk;
+                addpos234(index, ient, count234(index));
+            }
+
+            /* Start accumulating the quick-reference index at the end
+             * of this chunk, as above. */
+            RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0);
+            offset_of_first_entry = chunk.pos;
+
+            n_entries = 0;
+            /* Write index entries into this chunk until it's full, or
+             * until we run out of chunks at the previous level. */
+            while (1) {
+                struct chm_index_entry *ent = (struct chm_index_entry *)
+                index234(prev_index, index_index++);
+                if (!ent)
+                    break;
+
+                /* Save the sizes of stuff in this chunk, so we can put
+                 * them back if this entry turns out to overflow. */
+                saved_pos = chunk.pos;
+                saved_rq_pos = reversed_quickref.pos;
+
+                if (n_entries > 0 && n_entries % useful_density == 0) {
+                    /* Add a quick-reference index pointer. */
+                    RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos -
+                                          offset_of_first_entry);
+                }
+
+                encint(&chunk, strlen(ent->first_filename));
+                rdaddsc(&chunk, ent->first_filename);
+                encint(&chunk, ent->chunk_index);
+                if (chunk.pos + reversed_quickref.pos > chunksize) {
+                    index_index--;
+                    chunk.pos = saved_pos;
+                    reversed_quickref.pos = saved_rq_pos;
+                    break;
+                }
+
+                /* If we didn't overflow, then commit to this entry and
+                 * loop round for the next one. */
+                n_entries++;
+            }
+
+            /* Finalise the chunk. */
+            assert(chunk.pos + reversed_quickref.pos <= chunksize);
+            PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
+                                chunksize - chunk.pos);
+            PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
+            while (chunk.pos + reversed_quickref.pos < chunksize)
+                rdaddc(&chunk, 0);         /* zero-pad */
+            for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
+                rdaddsn(&chunk, reversed_quickref.text+i, 2);
+
+            assert(chunk.pos == chunksize);
+            rdaddsn(rs, chunk.text, chunk.pos);
+            sfree(chunk.text);
+            sfree(reversed_quickref.text);
+            curr_chunk++;
+        }
+
+        /*
+         * Now free the old index.
+         */
+        while (1) {
+            struct chm_index_entry *ent = (struct chm_index_entry *)
+                delpos234(prev_index, 0);
+            if (!ent)
+                break;
+            sfree(ent);
+        }
+        freetree234(prev_index);
+    }
+
+    /*
+     * Finished! We've reduced to a single chunk. Free the remaining
+     * index (which must have size 1).
+     */
+    assert(count234(index) == 1);
+    sfree(delpos234(index, 0));
+    freetree234(index);
+
+    /* Fill in the deferred fields in the main header. */
+    PUT_32BIT_LSB_FIRST(rs->text + dirhdr_depth_field, depth);
+    PUT_32BIT_LSB_FIRST(rs->text + dirhdr_root_field, curr_chunk-1);
+    PUT_32BIT_LSB_FIRST(rs->text + dirhdr_nchunks_field, curr_chunk);
+}
+
+static int sys_start(rdstringc *rs, int code)
+{
+    int toret = rs->pos;
+    RDADD_16BIT_LSB_FIRST(rs, code);
+    RDADD_16BIT_LSB_FIRST(rs, 0);      /* length; overwrite later */
+    return toret;
+}
+static void sys_end(rdstringc *rs, int recstart)
+{
+    PUT_16BIT_LSB_FIRST(rs->text + recstart+2, rs->pos - (recstart+4));
+}
+
+struct chm_window {
+    char *name;
+    char *title;
+    char *contentsfile;
+    char *indexfile;
+    char *rootfile;
+    int navpaneflags;
+    int toolbarflags;
+};
+
+struct chm {
+    tree234 *files;
+    tree234 *windows;
+    tree234 *stringtab;
+    rdstringc content0;                /* outer uncompressed container */
+    rdstringc content1;                /* compressed subfile */
+    rdstringc outfile;
+    rdstringc stringsfile;
+    char *title, *contents_filename, *index_filename, *default_topic;
+    char *default_window;
+    struct chm_section *rootsecthead, *rootsecttail;
+    struct chm_section *allsecthead, *allsecttail;
+};
+
+struct chm_section {
+    /* Logical links within the section tree structure */
+    struct chm_section *firstchild, *lastchild, *nextsibling, *parent;
+    /* Link all chm_sections together into one big list, in a
+     * topological order (i.e. every section comes after its
+     * parent) */
+    struct chm_section *next;
+
+    char *title, *url;
+    int tocidx_offset_1, tocidx_offset_2;
+    int topic_index, urltbl_offset, urlstr_offset;
+};
+
+struct chm_stringtab_entry {
+    struct chm *chm;
+    int strtab_offset;
+};
+
+static int chm_stringtab_cmp(void *av, void *bv)
+{
+    const struct chm_stringtab_entry
+        *a = (const struct chm_stringtab_entry *)av,
+        *b = (const struct chm_stringtab_entry *)bv;
+    return strcmp(a->chm->stringsfile.text + a->strtab_offset,
+                  b->chm->stringsfile.text + b->strtab_offset);
+}
+
+static int chm_stringtab_find(void *av, void *bv)
+{
+    const char *a = (const char *)av;
+    const struct chm_stringtab_entry
+        *b = (const struct chm_stringtab_entry *)bv;
+    return strcmp(a, b->chm->stringsfile.text + b->strtab_offset);
+}
+
+int chm_intern_string(struct chm *chm, const char *string)
+{
+    struct chm_stringtab_entry *ent;
+    int size;
+
+    if (!string)
+        return 0;
+
+    if ((ent = (struct chm_stringtab_entry *)find234(
+             chm->stringtab, (void *)string, chm_stringtab_find)) == NULL) {
+        ent = snew(struct chm_stringtab_entry);
+        ent->chm = chm;
+
+        /* Pad to ensure the string doesn't cross a page boundary. */
+        size = strlen(string) + 1;  /* include the NUL terminator */
+        assert(size < 0x1000);  /* avoid really serious trouble */
+        while ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12)
+            rdaddc(&chm->stringsfile, 0);
+
+        ent->strtab_offset = chm->stringsfile.pos;
+        rdaddsc(&chm->stringsfile, string);
+        rdaddc(&chm->stringsfile, '\0');
+        add234(chm->stringtab, ent);
+    }
+    return ent->strtab_offset;
+}
+
+struct chm *chm_new(void)
+{
+    struct chm *chm = snew(struct chm);
+    chm->files = newtree234(chm_directory_entry_cmp);
+    chm->windows = newtree234(NULL);
+    chm->stringtab = newtree234(chm_stringtab_cmp);
+    chm->content0 = empty_rdstringc;
+    chm->content1 = empty_rdstringc;
+    chm->outfile = empty_rdstringc;
+    chm->stringsfile = empty_rdstringc;
+    chm->title = NULL;
+    chm->contents_filename = NULL;
+    chm->index_filename = NULL;
+    chm->default_topic = NULL;
+    chm->default_window = NULL;
+    chm->rootsecthead = chm->rootsecttail = NULL;
+    chm->allsecthead = chm->allsecttail = NULL;
+    chm_intern_string(chm, "");        /* preinitialise the strings table */
+    return chm;
+}
+
+void chm_free(struct chm *chm)
+{
+    struct chm_directory_entry *ent;
+    struct chm_window *win;
+    struct chm_stringtab_entry *str;
+    struct chm_section *sect;
+
+    while ((ent = delpos234(chm->files, 0)) != NULL) {
+        sfree(ent->filename);
+        sfree(ent);
+    }
+    freetree234(chm->files);
+
+    while ((win = delpos234(chm->windows, 0)) != NULL) {
+        sfree(win->name);
+        sfree(win->title);
+        sfree(win->contentsfile);
+        sfree(win->indexfile);
+        sfree(win->rootfile);
+        sfree(win);
+    }
+    freetree234(chm->windows);
+
+    while ((str = delpos234(chm->stringtab, 0)) != NULL) {
+        sfree(str);
+    }
+    freetree234(chm->stringtab);
+
+    for (sect = chm->allsecthead; sect ;) {
+        struct chm_section *tmp = sect->next;
+        sfree(sect->title);
+        sfree(sect->url);
+        sfree(sect);
+        sect = tmp;
+    }
+
+    sfree(chm->content0.text);
+    sfree(chm->content1.text);
+    sfree(chm->outfile.text);
+    sfree(chm->stringsfile.text);
+
+    sfree(chm->title);
+    sfree(chm->contents_filename);
+    sfree(chm->index_filename);
+    sfree(chm->default_topic);
+    sfree(chm->default_window);
+
+    sfree(chm);
+}
+
+static void chm_add_file_internal(struct chm *chm, const char *name,
+                                  const char *data, int len,
+                                  rdstringc *sect, int which_sect)
+{
+    struct chm_directory_entry *ent = snew(struct chm_directory_entry);
+    ent->filename = dupstr(name);
+    ent->which_content_section = which_sect;
+    ent->offset_in_content_section = sect->pos;
+    ent->file_size = len;
+    add234(chm->files, ent);
+    rdaddsn(sect, data, len);
+}
+
+static struct chm_directory_entry *chm_find_file(
+    struct chm *chm, const char *name)
+{
+    return find234(chm->files, (void *)name, chm_directory_entry_find);
+}
+
+static char *add_leading_slash(const char *str)
+{
+    char *toret = snewn(2 + strlen(str), char);
+    toret[0] = '/';
+    strcpy(toret+1, str);
+    return toret;
+}
+
+void chm_add_file(struct chm *chm, const char *name, const char *data, int len)
+{
+    char *name_with_slash = add_leading_slash(name);
+    chm_add_file_internal(chm, name_with_slash, data, len, &chm->content1, 1);
+    sfree(name_with_slash);
+}
+
+void chm_title(struct chm *chm, const char *title)
+{
+    chm->title = dupstr(title);
+}
+
+void chm_contents_filename(struct chm *chm, const char *name)
+{
+    chm->contents_filename = dupstr(name);
+}
+
+void chm_index_filename(struct chm *chm, const char *name)
+{
+    chm->index_filename = dupstr(name);
+}
+
+void chm_default_topic(struct chm *chm, const char *name)
+{
+    chm->default_topic = dupstr(name);
+}
+
+void chm_default_window(struct chm *chm, const char *name)
+{
+    chm->default_window = dupstr(name);
+}
+
+void chm_add_window(struct chm *chm, const char *winname, const char *title,
+                    const char *contentsfile, const char *indexfile,
+                    const char *rootfile, int navpaneflags, int toolbarflags)
+{
+    struct chm_window *win = snew(struct chm_window);
+    win->name = dupstr(winname);
+    win->title = dupstr(title);
+    win->contentsfile = contentsfile ? dupstr(contentsfile) : NULL;
+    win->indexfile = indexfile ? dupstr(indexfile) : NULL;
+    win->rootfile = dupstr(rootfile);
+    win->navpaneflags = navpaneflags;
+    win->toolbarflags = toolbarflags;
+    addpos234(chm->windows, win, count234(chm->windows));
+}
+
+struct chm_section *chm_add_section(struct chm *chm,
+                                    struct chm_section *parent,
+                                    const char *title, const char *url)
+{
+    struct chm_section *sect = snew(struct chm_section);
+    sect->title = dupstr(title);
+    sect->url = dupstr(url);
+    sect->firstchild = sect->lastchild = sect->nextsibling = sect->next = NULL;
+    if (parent) {
+        sect->parent = parent;
+        if (parent->lastchild) {
+            parent->lastchild->nextsibling = sect;
+        } else {
+            parent->firstchild = sect;
+        }
+        parent->lastchild = sect;
+    } else {
+        sect->parent = NULL;
+        if (chm->rootsecttail) {
+            chm->rootsecttail->nextsibling = sect;
+        } else {
+            chm->rootsecthead = sect;
+        }
+        chm->rootsecttail = sect;
+    }
+    if (chm->allsecttail) {
+        chm->allsecttail->next = sect;
+    } else {
+        chm->allsecthead = sect;
+    }
+    chm->allsecttail = sect;
+    return sect;
+}
+
+struct chm_urltbl_entry {
+    /*
+     * Records of #URLTBL, before their order is finalised.
+     *
+     * The first word of this record is listed as 'unknown, perhaps
+     * some kind of unique ID' in chmspec. But my observation in HTML
+     * Help Workshop's output is that it's actually a hash of the
+     * target URL, and the file is sorted by them. chm_url_hash()
+     * below implements the hash algorithm.
+     */
+    unsigned long hash;
+    int topic_index;
+    int urlstr_pos;
+    int topics_offset_to_update;
+};
+
+int chm_urltbl_entry_cmp(void *av, void *bv)
+{
+    const struct chm_urltbl_entry
+        *a = (const struct chm_urltbl_entry *)av,
+        *b = (const struct chm_urltbl_entry *)bv;
+    if (a->hash < b->hash) return -1;
+    if (a->hash > b->hash) return +1;
+    if (a->topic_index < b->topic_index) return -1;
+    if (a->topic_index > b->topic_index) return -1;
+    return 0;
+}
+
+static unsigned long chm_url_hash(const char *str)
+{
+    const char *p;
+    unsigned long hash;
+
+    hash = 0;
+    for (p = str; *p; p++) {
+        /*
+         * Multiply `hash' by 43.
+         */
+        {
+            unsigned long bottom, top;
+            bottom = (hash & 0xFFFFUL) * 43;
+            top = ((hash >> 16) & 0xFFFFUL) * 43;
+            top += (bottom >> 16);
+            bottom &= 0xFFFFUL;
+            top &= 0xFFFFUL;
+            hash = (top << 16) | bottom;
+        }
+
+        /*
+         * Add the mapping value for this byte to `hash'.
+         */
+        {
+            int c = (signed char)*p;
+
+            /*
+             * Translation rule determined by getting hhc.exe to hash
+             * a lot of strings and analysing the results. I was able
+             * to confirm this mapping rule for all byte values except
+             * for NUL, CR, LF, ^Z and backslash: the first four of
+             * those I couldn't find any way to get hhc to insert into
+             * a URL, and the last one is automatically translated
+             * into '/', presumably for reasons of Windows vs URI path
+             * syntax normalisation.
+             */
+            int val = (c == '/' ? 0x2c : c <= 'Z' ? c-0x30 : c-0x50);
+
+            if (val > 0 && hash > (0xFFFFFFFFUL - val)) {
+                hash -= (0xFFFFFFFFUL - val) + 1;
+            } else if (val < 0 && hash < (unsigned long)-val) {
+                hash += (0xFFFFFFFFUL + val) + 1;
+            } else
+                hash += val;
+        }
+    }
+
+    /*
+     * Special case: an output hash of 0 is turned into 1, which I
+     * conjecture is so that in some context or other 0 can be
+     * reserved to mean something like 'null' or 'no hash value
+     * available'.
+     */
+    if (hash == 0)
+        hash = 1;
+
+    return hash;
+}
+
+const char *chm_build(struct chm *chm, int *outlen)
+{
+    rdstringc dir = {0, 0, NULL};
+    rdstringc sysfile = {0, 0, NULL};
+    struct LZXEncodedFile *ef;
+    int rec;
+
+    chm_add_file_internal(chm, "/", "", 0, &chm->content0, 0);
+
+    RDADD_32BIT_LSB_FIRST(&sysfile, 3); /* #SYSTEM file version */
+
+    rec = sys_start(&sysfile, 9);  /* identify CHM-producing tool */
+    rdaddsc(&sysfile, "Halibut, ");
+    rdaddsc(&sysfile, version);
+    rdaddc(&sysfile, '\0');
+    sys_end(&sysfile, rec);
+
+    rec = sys_start(&sysfile, 12);  /* number of 'information types' */
+    RDADD_32BIT_LSB_FIRST(&sysfile, 0);
+    sys_end(&sysfile, rec);
+    rec = sys_start(&sysfile, 15);  /* checksum of 'information types' */
+    RDADD_32BIT_LSB_FIRST(&sysfile, 0);
+    sys_end(&sysfile, rec);
+    /* actual section of 'information types', whatever those might be */
+    chm_add_file_internal(chm, "/#ITBITS", "", 0, &chm->content0, 0);
+
+    if (chm->title) {
+        rec = sys_start(&sysfile, 3);  /* document title */
+        rdaddsc(&sysfile, chm->title);
+        rdaddc(&sysfile, '\0');
+        sys_end(&sysfile, rec);
+    }
+
+    if (chm->default_topic) {
+        rec = sys_start(&sysfile, 2);
+        rdaddsc(&sysfile, chm->default_topic);
+        rdaddc(&sysfile, '\0');
+        sys_end(&sysfile, rec);
+    }
+
+    if (chm->contents_filename) {
+        rec = sys_start(&sysfile, 0);
+        rdaddsc(&sysfile, chm->contents_filename);
+        rdaddc(&sysfile, '\0');
+        sys_end(&sysfile, rec);
+    }
+
+    if (chm->index_filename) {
+        rec = sys_start(&sysfile, 1);
+        rdaddsc(&sysfile, chm->index_filename);
+        rdaddc(&sysfile, '\0');
+        sys_end(&sysfile, rec);
+    }
+
+    if (chm->default_window) {
+        rec = sys_start(&sysfile, 5);
+        rdaddsc(&sysfile, chm->default_window);
+        rdaddc(&sysfile, '\0');
+        sys_end(&sysfile, rec);
+    }
+
+    rec = sys_start(&sysfile, 4);
+    RDADD_32BIT_LSB_FIRST(&sysfile, 0x809); /* language again (FIXME) */
+    RDADD_32BIT_LSB_FIRST(&sysfile, 0);     /* DBCS: off */
+    RDADD_32BIT_LSB_FIRST(&sysfile, 1);     /* full-text search: on */
+    RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no KLinks (whatever they are) */
+    RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no ALinks (whatever they are) */
+    RDADD_32BIT_LSB_FIRST(&sysfile, 0x11223344); /* timestamp LSW (FIXME) */
+    RDADD_32BIT_LSB_FIRST(&sysfile, 0x55667788); /* timestamp MSW (FIXME) */
+    RDADD_32BIT_LSB_FIRST(&sysfile, 0);          /* unknown */
+    RDADD_32BIT_LSB_FIRST(&sysfile, 0);          /* unknown */
+    sys_end(&sysfile, rec);
+
+    {
+        rdstringc winfile = {0, 0, NULL};
+        int i, j, s;
+        struct chm_window *win;
+
+        RDADD_32BIT_LSB_FIRST(&winfile, count234(chm->windows));
+        RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of each entry */
+        for (i = 0;
+             (win = (struct chm_window *)index234(chm->windows, i)) != NULL;
+             i++) {
+            RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of entry */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* not Unicode */
+            s = chm_intern_string(chm, win->name);
+            RDADD_32BIT_LSB_FIRST(&winfile, s);
+            /* Bitmap of which fields are used: 2 means nav pane
+             * style, 0x200 means whether nav pane is initially
+             * closed, 0x400 means tab position */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0x502);
+            /* Nav pane styles:
+             *  0x40000 = user can control window size/pos
+             *  0x20000 = advanced full-text search UI
+             *  0x00400 = include a search tab
+             *  0x00100 = keep contents/index in sync with current topic
+             *  0x00020 = three-pane window */
+            RDADD_32BIT_LSB_FIRST(&winfile, win->navpaneflags);
+            s = chm_intern_string(chm, win->title);
+            RDADD_32BIT_LSB_FIRST(&winfile, s);
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window styles */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window ex styles */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.left */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.top */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.right */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.bottom */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window show state */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane width */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.left */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.top */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.right */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.bottom */
+            s = chm_intern_string(chm, win->contentsfile);
+            RDADD_32BIT_LSB_FIRST(&winfile, s);
+            s = chm_intern_string(chm, win->indexfile);
+            RDADD_32BIT_LSB_FIRST(&winfile, s);
+            s = chm_intern_string(chm, win->rootfile);
+            RDADD_32BIT_LSB_FIRST(&winfile, s);
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Home button target */
+            RDADD_32BIT_LSB_FIRST(&winfile, win->toolbarflags);
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane initially open */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* default nav pane = TOC */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane tabs at top */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* WM_NOTIFY id */
+            for (j = 0; j < 20; j++)
+                rdaddc(&winfile, 0);            /* tab order block */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* history to keep */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button target */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button target */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button text */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button text */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.left */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.top */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.right */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.bottom */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no information types */
+            RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no custom tabs */
+        }
+        assert(winfile.pos == 8 + 196 * count234(chm->windows));
+        chm_add_file_internal(chm, "/#WINDOWS", winfile.text, winfile.pos,
+                              &chm->content1, 1);
+        sfree(winfile.text);
+    }
+
+    {
+        struct chm_section *sect;
+        rdstringc tocidx = {0, 0, NULL};
+        rdstringc topics = {0, 0, NULL};
+        rdstringc urltbl = {0, 0, NULL};
+        rdstringc urlstr = {0, 0, NULL};
+        int i, index, s, n_tocidx_3;
+        struct chm_directory_entry *contentsfile = NULL, *indexfile = NULL;
+        tree234 *urltbl_pre;
+        struct chm_urltbl_entry *urltbl_entry;
+
+        urltbl_pre = newtree234(chm_urltbl_entry_cmp);
+
+        for (i = 0; i < 0x1000; i++)
+            rdaddc(&tocidx, 0);
+
+        /* Write a header of one zero byte at the start of #URLSTR.
+         * chmspec says this doesn't always appear, and is unclear on
+         * what this is for, but I suspect it serves the same purpose
+         * as the zero byte at the start of #STRINGS, namely that it
+         * arranges that an absent string in the following records can
+         * be represented by an offset of zero which will
+         * automatically point to this byte and hence indicate the
+         * empty string. */
+        rdaddc(&urlstr, 0);
+
+        if (chm->contents_filename) {
+            char *withslash = add_leading_slash(chm->contents_filename);
+            contentsfile = chm_find_file(chm, withslash);
+            sfree(withslash);
+            assert(contentsfile);
+        }
+        if (chm->index_filename) {
+            char *withslash = add_leading_slash(chm->index_filename);
+            indexfile = chm_find_file(chm, withslash);
+            sfree(withslash);
+            assert(indexfile);
+        }
+
+        index = 0;
+
+        /* #TOCIDX header field pointing at start of type-1 records */
+        PUT_32BIT_LSB_FIRST(tocidx.text + 0, tocidx.pos);
+
+        /*
+         * First pass over the section structure, generating in
+         * parallel one of the multiple structure types in #TOCIDX and
+         * the sole record in all the other files.
+         */
+        for (sect = chm->allsecthead; sect; sect = sect->next) {
+            /* Size of the first kind of #TOCIDX record varies between
+             * leaf and internal nodes */
+            int tocidx_size_1 = (sect->firstchild ? 0x1c : 0x14);
+
+            /*
+             * Flags:
+             *  - 8 means there's a local filename, which in _our_ CHM
+             *    files there always is. If you unset this flag, you
+             *    get a node in the contents treeview which doesn't
+             *    open any page when clicked, and exists solely to
+             *    contain children; in that situation the topic index
+             *    field at position 0x08 in this record also stops
+             *    being an index into #TOPICS and instead becomes an
+             *    index into #STRINGS giving the node's title.
+             *  - 4 apparently means the node should have the 'book'
+             *    rather than 'page' icon in the TOC tree view in the
+             *    help viewer
+             *  - 1 means the node has a subtree in the tree view,
+             *    which I take to mean (contrary to chmspec) that
+             *    _this_ is the flag that means this node is a
+             *    non-leaf node and hence has the two extra fields for
+             *    first-child and whatever the other one means
+             */
+            unsigned tocidx_1_flags = (sect->firstchild ? 0x5 : 0) | 8;
+
+            int urlstr_size;
+
+            /* Pad to ensure the record isn't split between
+             * 0x1000-byte pages of the file */
+            while ((tocidx.pos ^ (tocidx.pos + tocidx_size_1 - 1)) >> 12)
+                RDADD_32BIT_LSB_FIRST(&tocidx, 0);
+
+            sect->topic_index = index++;
+
+            /* Write the type-1 record in #TOCIDX */
+            sect->tocidx_offset_1 = tocidx.pos;
+            RDADD_16BIT_LSB_FIRST(&tocidx, 0); /* unknown */
+            /* chmspec thinks this 16-bit field is 'unknown', but in
+             * my observations it appears to be the index of an entry
+             * in the #TOCIDX type-3 region. But I still don't know
+             * what those are really for. */
+            RDADD_16BIT_LSB_FIRST(&tocidx, sect->topic_index);
+            RDADD_32BIT_LSB_FIRST(&tocidx, tocidx_1_flags);
+            RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
+            RDADD_32BIT_LSB_FIRST(&tocidx, sect->parent ?
+                                  sect->parent->tocidx_offset_1 : 0);
+            RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* 'next' ptr; fill in later */
+            if (sect->firstchild) {
+                RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* child; fill in later */
+                RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* unknown */
+            }
+            assert(tocidx.pos == sect->tocidx_offset_1 + tocidx_size_1);
+
+            /* Figure out our offset in #URLSTR, by ensuring we're not
+             * going to overrun a page boundary (as usual). For this
+             * we need our record length, which is two 32-bit fields
+             * plus a NUL-terminated copy of the target file name / URL. */
+            urlstr_size = 8 + strlen(sect->url) + 1;
+            assert(urlstr_size < 0x1000); /* must _fit_ in a page! */
+            while ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12)
+                rdaddc(&urlstr, 0);
+
+            /*
+             * Save everything we know so far about the #URLTBL record
+             * we'll need to write.
+             */
+            urltbl_entry = snew(struct chm_urltbl_entry);
+            urltbl_entry->hash = chm_url_hash(sect->url);
+            urltbl_entry->topic_index = sect->topic_index;
+            urltbl_entry->urlstr_pos = urlstr.pos;
+            add234(urltbl_pre, urltbl_entry);
+
+            /* Write the #TOPICS entry */
+            RDADD_32BIT_LSB_FIRST(&topics, sect->tocidx_offset_1);
+            s = chm_intern_string(chm, sect->title);
+            RDADD_32BIT_LSB_FIRST(&topics, s);
+            urltbl_entry->topics_offset_to_update = topics.pos;
+            RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
+            RDADD_16BIT_LSB_FIRST(&topics, 6); /* flag as 'in contents' */
+            RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
+
+            /*
+             * Write the #URLSTR entry.
+             */
+            RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
+            RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
+            rdaddsc(&urlstr, sect->url);       /* 'Local' */
+            rdaddc(&urlstr, '\0');
+        }
+
+        /*
+         * Add entries in #URLTBL, #URLSTR and #TOPICS for the
+         * contents and index files. They don't form part of the tree
+         * in #TOCIDX, though.
+         */
+        if (chm->contents_filename) {
+            urltbl_entry = snew(struct chm_urltbl_entry);
+            urltbl_entry->hash = chm_url_hash(chm->contents_filename);
+            urltbl_entry->topic_index = index;
+            urltbl_entry->urlstr_pos = urlstr.pos;
+            add234(urltbl_pre, urltbl_entry);
+
+            /* #TOPICS entry */
+            RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */
+            RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */
+            urltbl_entry->topics_offset_to_update = topics.pos;
+            RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
+            RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */
+            RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
+
+            /* #URLSTR entry */
+            RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
+            RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
+            rdaddsc(&urlstr, chm->contents_filename); /* 'Local' */
+            rdaddc(&urlstr, '\0');
+
+            /* And add the entry in #SYSTEM that cites the hash of the
+             * #URLTBL entry. */
+            rec = sys_start(&sysfile, 11);
+            RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash);
+            sys_end(&sysfile, rec);
+
+            index++;
+        }
+        if (chm->index_filename) {
+            urltbl_entry = snew(struct chm_urltbl_entry);
+            urltbl_entry->hash = chm_url_hash(chm->index_filename);
+            urltbl_entry->topic_index = index;
+            urltbl_entry->urlstr_pos = urlstr.pos;
+            add234(urltbl_pre, urltbl_entry);
+
+            /* #TOPICS entry */
+            RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */
+            RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */
+            urltbl_entry->topics_offset_to_update = topics.pos;
+            RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
+            RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */
+            RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
+
+            /* #URLSTR entry */
+            RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
+            RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
+            rdaddsc(&urlstr, chm->index_filename); /* 'Local' */
+            rdaddc(&urlstr, '\0');
+
+            /* And add the entry in #SYSTEM that cites the hash of the
+             * #URLTBL entry. */
+            rec = sys_start(&sysfile, 7);
+            RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash);
+            sys_end(&sysfile, rec);
+
+            index++;
+        }
+
+        /*
+         * Now we've got all our #URLTBL entries, so we can write out
+         * #URLTBL itself.
+         */
+        while ((urltbl_entry = delpos234(urltbl_pre, 0)) != NULL) {
+            /* Pad #URLTBL to the beginning of this section's entry.
+             * Entries are all 12 bytes long, but again there's some
+             * padding to ensure that they don't cross a page
+             * boundary. */
+            while ((urltbl.pos ^ (urltbl.pos + 12 - 1)) >> 12)
+                RDADD_32BIT_LSB_FIRST(&urltbl, 0);
+
+            /* Fill in the link from #TOPICS to this entry's offset */
+            PUT_32BIT_LSB_FIRST(topics.text +
+                                urltbl_entry->topics_offset_to_update,
+                                urltbl.pos);
+
+            /* Write the entry itself. */
+            RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->hash);
+            RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->topic_index);
+            RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->urlstr_pos);
+
+            sfree(urltbl_entry);
+        }
+        freetree234(urltbl_pre);
+
+        /*
+         * Small follow-up pass filling in forward-pointing offset
+         * fields in the #TOCIDX type-1 records which the previous
+         * pass didn't know yet.
+         */
+        for (sect = chm->allsecthead; sect; sect = sect->next) {
+            if (sect->nextsibling)
+                PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x10,
+                                    sect->nextsibling->tocidx_offset_1);
+            if (sect->firstchild)
+                PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x14,
+                                    sect->firstchild->tocidx_offset_1);
+        }
+
+        /* #TOCIDX header field pointing at start of type-2 records */
+        PUT_32BIT_LSB_FIRST(tocidx.text + 0xC, tocidx.pos);
+
+        /*
+         * Write the #TOCIDX type-2 records, which are just 4 bytes
+         * long and just contain another copy of each topic's index,
+         * but we need to have them there so that the type-3 records
+         * can refer to them by offset.
+         */
+        for (sect = chm->allsecthead; sect; sect = sect->next) {
+            sect->tocidx_offset_2 = tocidx.pos;
+            RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
+        }
+
+        /* Align the current #TOCIDX offset to 16 bytes */
+        while (tocidx.pos & 0xF)
+            rdaddc(&tocidx, 0);
+
+        /* #TOCIDX header field pointing at start of type-3 records */
+        PUT_32BIT_LSB_FIRST(tocidx.text + 0x4, tocidx.pos);
+
+        /*
+         * Write the #TOCIDX type-3 records.
+         *
+         * In help files I've examined, there are fewer of these than
+         * you might expect; apparently not all sections rate one for
+         * some reason. For the moment I'm just writing out one for
+         * every section.
+         */
+        n_tocidx_3 = 0;
+        for (sect = chm->allsecthead; sect; sect = sect->next) {
+            RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_1);
+            RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index + 666); /* ?! */
+            RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_2);
+            RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
+            n_tocidx_3++;
+        }
+
+        /* #TOCIDX header field giving number of type-3 records */
+        PUT_32BIT_LSB_FIRST(tocidx.text + 0x8, n_tocidx_3);
+
+        chm_add_file_internal(chm, "/#TOCIDX", tocidx.text, tocidx.pos,
+                              &chm->content1, 1);
+        chm_add_file_internal(chm, "/#TOPICS", topics.text, topics.pos,
+                              &chm->content1, 1);
+        chm_add_file_internal(chm, "/#URLTBL", urltbl.text, urltbl.pos,
+                              &chm->content1, 1);
+        chm_add_file_internal(chm, "/#URLSTR", urlstr.text, urlstr.pos,
+                              &chm->content1, 1);
+
+        /*
+         * Write #IDXHDR (and its mirror in #SYSTEM), which we
+         * couldn't do until we knew how many topic nodes there were.
+         */
+        {
+            int idxhdr_start;
+
+            rec = sys_start(&sysfile, 13);
+            idxhdr_start = sysfile.pos;
+
+            rdaddsc(&sysfile, "T#SM");     /* #IDXHDR magic */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0x12345678); /* checksum? FIXME */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
+            RDADD_32BIT_LSB_FIRST(&sysfile, index); /* number of topic nodes */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no image list */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* top-level node is
+                                                 * not a folder */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no bg colour */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no fg colour */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no font spec */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window style */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no ex win style */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* unknown */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no frame name */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window name */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no information types */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no merge files */
+            RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+            while (sysfile.pos - idxhdr_start < 4096)
+                rdaddc(&sysfile, 0);
+
+            chm_add_file_internal(chm, "/#IDXHDR", sysfile.text + idxhdr_start,
+                                  sysfile.pos - idxhdr_start,
+                                  &chm->content1, 1);
+            sys_end(&sysfile, rec);
+        }
+
+        sfree(tocidx.text);
+        sfree(topics.text);
+        sfree(urltbl.text);
+        sfree(urlstr.text);
+    }
+
+    /* Missing from #SYSTEM: */
+    /* 10 (4-byte timestamp) */
+    /* 6 (logical file name) */
+
+    chm_add_file_internal(chm, "/#SYSTEM", sysfile.text, sysfile.pos,
+                          &chm->content0, 0);
+    sfree(sysfile.text);
+
+    chm_add_file_internal(chm, "/#STRINGS", chm->stringsfile.text,
+                          chm->stringsfile.pos, &chm->content1, 1);
+
+    /*
+     * ::DataSpace/NameList, giving the names of the two content sections.
+     */
+    {
+        rdstringc dsnl = {0, 0, NULL};
+        const char *p;
+        int stringstart;
+
+        RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* total file size; fill in later */
+        RDADD_16BIT_LSB_FIRST(&dsnl, 2); /* number of names */
+
+        RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */
+        stringstart = dsnl.pos;
+        for (p = "Uncompressed"; *p; p++)
+            RDADD_16BIT_LSB_FIRST(&dsnl, *p);
+        PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2,
+                            (dsnl.pos - stringstart) / 2);
+        RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */
+
+        RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */
+        stringstart = dsnl.pos;
+        for (p = "MSCompressed"; *p; p++)
+            RDADD_16BIT_LSB_FIRST(&dsnl, *p);
+        PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2,
+                            (dsnl.pos - stringstart) / 2);
+        RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */
+
+        PUT_16BIT_LSB_FIRST(dsnl.text, dsnl.pos / 2);
+
+        chm_add_file_internal(chm, "::DataSpace/NameList", dsnl.text, dsnl.pos,
+                              &chm->content0, 0);
+
+        sfree(dsnl.text);
+    }
+
+    /*
+     * Actually compress the compressed-data section, load the
+     * compressed version of it into the containing uncompressed
+     * section, and write the auxiliary files describing it.
+     */
+    {
+        rdstringc rs = {0, 0, NULL};
+        const char *p;
+        int orig_decomp_size = chm->content1.pos;
+        size_t i;
+
+        while (chm->content1.pos & 0x7FFF)
+            rdaddc(&chm->content1, 0); /* pad to a realign-interval boundary */
+        ef = lzx(chm->content1.text, chm->content1.pos, 0x8000, 0x10000);
+        chm_add_file_internal(
+            chm, "::DataSpace/Storage/MSCompressed/Content",
+            (char *)ef->data, ef->data_len, &chm->content0, 0);
+
+        for (p = "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}"; *p; p++)
+            RDADD_16BIT_LSB_FIRST(&rs, *p);
+        rs.pos = 0x26; /* this file is always written truncated :-) */
+        chm_add_file_internal(
+            chm, "::DataSpace/Storage/MSCompressed/Transform/List",
+            rs.text, rs.pos, &chm->content0, 0);
+        rs.pos = 0;
+
+        RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size);
+        RDADD_32BIT_LSB_FIRST(&rs, 0); /* high word of 64-bit size */
+        chm_add_file_internal(
+            chm, "::DataSpace/Storage/MSCompressed/SpanInfo",
+            rs.text, rs.pos, &chm->content0, 0);
+        rs.pos = 0;
+
+        RDADD_32BIT_LSB_FIRST(&rs, 6); /* file size */
+        rdaddsc(&rs, "LZXC");          /* compression type identifier */
+        RDADD_32BIT_LSB_FIRST(&rs, 2); /* version */
+        RDADD_32BIT_LSB_FIRST(&rs, 2); /* reset interval in units of 2^15 */
+        RDADD_32BIT_LSB_FIRST(&rs, 2); /* window size in units of 2^15 */
+        RDADD_32BIT_LSB_FIRST(&rs, 1); /* reset interval multiplier */
+        RDADD_32BIT_LSB_FIRST(&rs, 0); /* unknown */
+        chm_add_file_internal(
+            chm, "::DataSpace/Storage/MSCompressed/ControlData",
+            rs.text, rs.pos, &chm->content0, 0);
+        rs.pos = 0;
+
+        RDADD_32BIT_LSB_FIRST(&rs, 2); /* unknown (version number?) */
+        RDADD_32BIT_LSB_FIRST(&rs, ef->n_resets); /* reset table length */
+        RDADD_32BIT_LSB_FIRST(&rs, 8); /* reset table entry size */
+        RDADD_32BIT_LSB_FIRST(&rs, 0x28); /* reset table offset */
+        RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size); /* uncompressed len */
+        RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+        RDADD_32BIT_LSB_FIRST(&rs, ef->data_len); /* compressed len */
+        RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+        RDADD_32BIT_LSB_FIRST(&rs, 0x8000); /* realign interval */
+        RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+        for (i = 0; i < ef->n_resets; i++) {
+            RDADD_32BIT_LSB_FIRST(&rs, ef->reset_byte_offsets[i]);
+            RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+        }
+        chm_add_file_internal(
+            chm, "::DataSpace/Storage/MSCompressed/Transform/"
+            "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable",
+            rs.text, rs.pos, &chm->content0, 0);
+        rs.pos = 0;
+    }
+
+    sfree(ef->data);
+    sfree(ef->reset_byte_offsets);
+    sfree(ef);
+
+    directory(&dir, chm->files);
+    itsf(&chm->outfile, &dir, &chm->content0);
+    sfree(dir.text);
+
+    assert(outlen);
+    *outlen = chm->outfile.pos;
+    return chm->outfile.text;
+}
diff --git a/winchm.h b/winchm.h
new file mode 100644
index 0000000..caee3fc
--- /dev/null
+++ b/winchm.h
@@ -0,0 +1,21 @@
+struct chm;
+
+struct chm *chm_new(void);
+void chm_free(struct chm *chm);
+void chm_add_file(struct chm *chm, const char *name,
+                  const char *data, int len);
+void chm_title(struct chm *chm, const char *title);
+void chm_contents_filename(struct chm *chm, const char *name);
+void chm_index_filename(struct chm *chm, const char *name);
+void chm_default_topic(struct chm *chm, const char *name);
+void chm_default_window(struct chm *chm, const char *name);
+void chm_add_window(struct chm *chm, const char *winname, const char *title,
+                    const char *contentsfile, const char *indexfile,
+                    const char *rootfile, int navpaneflags, int toolbarflags);
+
+struct chm_section;
+struct chm_section *chm_add_section(struct chm *chm,
+                                    struct chm_section *parent,
+                                    const char *title, const char *url);
+
+const char *chm_build(struct chm *chm, int *outlen);