summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Buildscr1
-rw-r--r--Makefile2
-rw-r--r--bk_html.c393
-rw-r--r--doc/Makefile8
-rw-r--r--doc/chm.but17
-rw-r--r--doc/intro.but5
-rw-r--r--doc/manpage.but18
-rw-r--r--doc/output.but291
-rw-r--r--doc/running.but24
-rw-r--r--error.c6
-rw-r--r--halibut.h4
-rw-r--r--lzx.c697
-rw-r--r--lzx.h24
-rw-r--r--main.c1
-rw-r--r--winchm.c1436
-rw-r--r--winchm.h21
16 files changed, 2768 insertions, 180 deletions
diff --git a/Buildscr b/Buildscr
index d954d1b..4ddbce9 100644
--- a/Buildscr
+++ b/Buildscr
@@ -35,6 +35,7 @@ in halibut/doc do make
deliver halibut/*.tar.gz $@
deliver halibut/doc/halibut.pdf $@
deliver halibut/doc/halibut.txt $@
+deliver halibut/doc/halibut.chm $@
deliver halibut/doc/*.html $@
# FIXME: it'd be nice to add a Windows delegation here so we can
diff --git a/Makefile b/Makefile
index 6264624..c9499e4 100644
--- a/Makefile
+++ b/Makefile
@@ -95,7 +95,7 @@ include $(LIBCHARSET_SRCDIR)Makefile
MODULES := main malloc ustring error help licence version misc tree234
MODULES += input in_afm in_pf in_sfnt keywords contents index biblio
MODULES += bk_text bk_html bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf
-MODULES += winhelp deflate lz77 huffman psdata wcwidth
+MODULES += winhelp winchm deflate lzx lz77 huffman psdata wcwidth
OBJECTS := $(addsuffix .o,$(MODULES)) $(LIBCHARSET_OBJS)
DEPS := $(addsuffix .d,$(MODULES))
diff --git a/bk_html.c b/bk_html.c
index 9a08fa2..784e3ca 100644
--- a/bk_html.c
+++ b/bk_html.c
@@ -24,6 +24,7 @@
#include <assert.h>
#include <limits.h>
#include "halibut.h"
+#include "winchm.h"
#define is_heading_type(type) ( (type) == para_Title || \
(type) == para_Chapter || \
@@ -57,6 +58,8 @@ typedef struct {
char *chm_filename, *hhp_filename, *hhc_filename, *hhk_filename;
char **template_fragments;
int ntfragments;
+ char **chm_extrafiles, **chm_extranames;
+ int nchmextrafiles, chmextrafilesize;
char *head_end, *body_start, *body_end, *addr_start, *addr_end;
char *body_tag, *nav_attr;
wchar_t *author, *description;
@@ -94,6 +97,10 @@ struct htmlfile {
* more than once.
*/
int temp;
+ /*
+ * CHM section structure, if we're generating a CHM.
+ */
+ struct chm_section *chmsect;
};
struct htmlsect {
@@ -193,6 +200,48 @@ void ho_setup_stdio(htmloutput *ho, FILE *fp)
ho->write = ho_write_stdio;
ho->write_ctx = fp;
}
+
+struct chm_output {
+ struct chm *chm;
+ char *filename;
+ rdstringc rs;
+};
+void ho_write_chm(void *write_ctx, const char *data, int len)
+{
+ struct chm_output *co = (struct chm_output *)write_ctx;
+ if (len == -1) {
+ chm_add_file(co->chm, co->filename, co->rs.text, co->rs.pos);
+ sfree(co->filename);
+ sfree(co->rs.text);
+ sfree(co);
+ } else {
+ rdaddsn(&co->rs, data, len);
+ }
+}
+void ho_setup_chm(htmloutput *ho, struct chm *chm, const char *filename)
+{
+ struct chm_output *co = snew(struct chm_output);
+
+ co->chm = chm;
+ co->rs = empty_rdstringc;
+ co->filename = dupstr(filename);
+
+ ho->write_ctx = co;
+ ho->write = ho_write_chm;
+}
+
+void ho_write_rdstringc(void *write_ctx, const char *data, int len)
+{
+ rdstringc *rs = (rdstringc *)write_ctx;
+ if (len > 0)
+ rdaddsn(rs, data, len);
+}
+void ho_setup_rdstringc(htmloutput *ho, rdstringc *rs)
+{
+ ho->write_ctx = rs;
+ ho->write = ho_write_rdstringc;
+}
+
void ho_string(htmloutput *ho, const char *string)
{
ho->write(ho->write_ctx, string, strlen(string));
@@ -286,14 +335,15 @@ static void html_section_title(htmloutput *ho, htmlsect *s,
htmlfile *thisfile, keywordlist *keywords,
htmlconfig *cfg, int real);
-static htmlconfig html_configure(paragraph *source) {
+static htmlconfig html_configure(paragraph *source, int chm_mode)
+{
htmlconfig ret;
paragraph *p;
/*
* Defaults.
*/
- ret.leaf_level = 2;
+ ret.leaf_level = chm_mode ? -1 /* infinite */ : 2;
ret.achapter.just_numbers = FALSE;
ret.achapter.number_at_all = TRUE;
ret.achapter.number_suffix = L": ";
@@ -305,20 +355,29 @@ static htmlconfig html_configure(paragraph *source) {
ret.ncdepths = 0;
ret.contents_depths = 0;
ret.visible_version_id = TRUE;
- ret.address_section = TRUE;
+ ret.address_section = chm_mode ? FALSE : TRUE;
ret.leaf_contains_contents = FALSE;
ret.leaf_smallest_contents = 4;
- ret.navlinks = TRUE;
+ ret.navlinks = chm_mode ? FALSE : TRUE;
ret.rellinks = TRUE;
ret.single_filename = dupstr("Manual.html");
ret.contents_filename = dupstr("Contents.html");
ret.index_filename = dupstr("IndexPage.html");
ret.template_filename = dupstr("%n.html");
- ret.chm_filename = ret.hhp_filename = NULL;
- ret.hhc_filename = ret.hhk_filename = NULL;
+ if (chm_mode) {
+ ret.chm_filename = dupstr("output.chm");
+ ret.hhc_filename = dupstr("contents.hhc");
+ ret.hhk_filename = dupstr("index.hhk");
+ ret.hhp_filename = NULL;
+ } else {
+ ret.chm_filename = ret.hhp_filename = NULL;
+ ret.hhc_filename = ret.hhk_filename = NULL;
+ }
ret.ntfragments = 1;
ret.template_fragments = snewn(ret.ntfragments, char *);
ret.template_fragments[0] = dupstr("%b");
+ ret.chm_extrafiles = ret.chm_extranames = NULL;
+ ret.nchmextrafiles = ret.chmextrafilesize = 0;
ret.head_end = ret.body_tag = ret.body_start = ret.body_end =
ret.addr_start = ret.addr_end = ret.nav_attr = NULL;
ret.author = ret.description = NULL;
@@ -368,11 +427,20 @@ static htmlconfig html_configure(paragraph *source) {
for (p = source; p; p = p->next) {
if (p->type == para_Config) {
wchar_t *k = p->keyword;
+ int generic = FALSE;
- if (!ustrnicmp(k, L"html-", 5)) {
+ if (!chm_mode && !ustrnicmp(k, L"html-", 5)) {
k += 5;
- } else if (!ustrnicmp(k, L"xhtml-", 6)) {
+ } else if (!chm_mode && !ustrnicmp(k, L"xhtml-", 6)) {
k += 6;
+ } else if (chm_mode && !ustrnicmp(k, L"chm-", 4)) {
+ k += 4;
+ } else if (!ustrnicmp(k, L"htmlall-", 8)) {
+ k += 8;
+ /* In this mode, only accept directives that don't
+ * vary completely between the HTML and CHM output
+ * types. */
+ generic = TRUE;
} else {
continue;
}
@@ -578,39 +646,78 @@ static htmlconfig html_configure(paragraph *source) {
ret.pre_versionid = uadv(k);
} else if (!ustricmp(k, L"post-versionid")) {
ret.post_versionid = uadv(k);
- } else if (!ustricmp(k, L"mshtmlhelp-chm")) {
+ } else if (!generic && !ustricmp(
+ k, chm_mode ? L"filename" : L"mshtmlhelp-chm")) {
sfree(ret.chm_filename);
ret.chm_filename = dupstr(adv(p->origkeyword));
- } else if (!ustricmp(k, L"mshtmlhelp-project")) {
- sfree(ret.hhp_filename);
- ret.hhp_filename = dupstr(adv(p->origkeyword));
- } else if (!ustricmp(k, L"mshtmlhelp-contents")) {
+ } else if (!generic && !ustricmp(
+ k, chm_mode ? L"contents-name" :
+ L"mshtmlhelp-contents")) {
sfree(ret.hhc_filename);
ret.hhc_filename = dupstr(adv(p->origkeyword));
- } else if (!ustricmp(k, L"mshtmlhelp-index")) {
+ } else if (!generic && !ustricmp(
+ k, chm_mode ? L"index-name" :
+ L"mshtmlhelp-index")) {
sfree(ret.hhk_filename);
ret.hhk_filename = dupstr(adv(p->origkeyword));
+ } else if (!generic && !chm_mode &&
+ !ustricmp(k, L"mshtmlhelp-project")) {
+ sfree(ret.hhp_filename);
+ ret.hhp_filename = dupstr(adv(p->origkeyword));
+ } else if (!generic && chm_mode &&
+ !ustricmp(k, L"extra-file")) {
+ char *diskname, *chmname;
+
+ diskname = adv(p->origkeyword);
+ if (*diskname) {
+ chmname = adv(diskname);
+ if (!*chmname)
+ chmname = diskname;
+
+ if (chmname[0] == '#' || chmname[0] == '$')
+ err_chm_badname(&p->fpos, chmname);
+
+ if (ret.nchmextrafiles >= ret.chmextrafilesize) {
+ ret.chmextrafilesize = ret.nchmextrafiles * 5 / 4 + 32;
+ ret.chm_extrafiles = sresize(
+ ret.chm_extrafiles, ret.chmextrafilesize, char *);
+ ret.chm_extranames = sresize(
+ ret.chm_extranames, ret.chmextrafilesize, char *);
+ }
+ ret.chm_extrafiles[ret.nchmextrafiles] = dupstr(diskname);
+ ret.chm_extranames[ret.nchmextrafiles] =
+ dupstr(chmname);
+ ret.nchmextrafiles++;
+ }
}
}
}
- /*
- * Enforce that the CHM and HHP filenames must either be both
- * present or both absent. If one is present but not the other,
- * turn both off.
- */
- if (!ret.chm_filename ^ !ret.hhp_filename) {
- err_chmnames();
- sfree(ret.chm_filename); ret.chm_filename = NULL;
- sfree(ret.hhp_filename); ret.hhp_filename = NULL;
- }
- /*
- * And if we're not generating an HHP, there's no need for HHC
- * or HHK.
- */
- if (!ret.hhp_filename) {
- sfree(ret.hhc_filename); ret.hhc_filename = NULL;
- sfree(ret.hhk_filename); ret.hhk_filename = NULL;
+ if (!chm_mode) {
+ /*
+ * If we're in HTML mode but using the old-style options to
+ * output HTML Help Workshop auxiliary files, do some
+ * consistency checking.
+ */
+
+ /*
+ * Enforce that the CHM and HHP filenames must either be both
+ * present or both absent. If one is present but not the other,
+ * turn both off.
+ */
+ if (!ret.chm_filename ^ !ret.hhp_filename) {
+ err_chmnames();
+ sfree(ret.chm_filename); ret.chm_filename = NULL;
+ sfree(ret.hhp_filename); ret.hhp_filename = NULL;
+ }
+ /*
+ * And if we're not generating an HHP, there's no need for HHC
+ * or HHK.
+ */
+ if (!ret.hhp_filename) {
+ sfree(ret.hhc_filename); ret.hhc_filename = NULL;
+ sfree(ret.hhk_filename); ret.hhk_filename = NULL;
+ }
}
/*
@@ -644,20 +751,23 @@ paragraph *html_config_filename(char *filename)
return p;
}
-void html_backend(paragraph *sourceform, keywordlist *keywords,
- indexdata *idx, void *unused)
+paragraph *chm_config_filename(char *filename)
+{
+ return cmdline_cfg_simple("chm-filename", filename, NULL);
+}
+
+static void html_backend_common(paragraph *sourceform, keywordlist *keywords,
+ indexdata *idx, int chm_mode)
{
paragraph *p;
htmlsect *topsect;
htmlconfig conf;
htmlfilelist files = { NULL, NULL, NULL, NULL, NULL, NULL };
htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
- char *hhk_filename;
- int has_index;
+ struct chm *chm = NULL;
+ int has_index, hhk_needed = FALSE;
- IGNORE(unused);
-
- conf = html_configure(sourceform);
+ conf = html_configure(sourceform, chm_mode);
/*
* We're going to make heavy use of paragraphs' private data
@@ -732,10 +842,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
/*
* And the index, if we have one. Note that we don't output
* an index as an HTML file if we're outputting one as a
- * .HHK.
+ * .HHK (in either of the HTML or CHM output modes).
*/
has_index = (count234(idx->entries) > 0);
- if (has_index && !conf.hhk_filename) {
+ if (has_index && !chm_mode && !conf.hhk_filename) {
sect = html_new_sect(&sects, NULL, &conf);
sect->text = NULL;
sect->type = INDEX;
@@ -901,6 +1011,9 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
}
}
+ if (chm_mode)
+ chm = chm_new();
+
/*
* Now we're ready to write out the actual HTML files.
*
@@ -936,7 +1049,9 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
#define listname(lt) ( (lt)==UL ? "ul" : (lt)==OL ? "ol" : "dl" )
#define itemname(lt) ( (lt)==LI ? "li" : (lt)==DT ? "dt" : "dd" )
- if (!strcmp(f->filename, "-"))
+ if (chm)
+ ho_setup_chm(&ho, chm, f->filename);
+ else if (!strcmp(f->filename, "-"))
ho_setup_stdio(&ho, stdout);
else
ho_setup_file(&ho, f->filename);
@@ -1728,8 +1843,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
* whether there's even going to _be_ an index file: we omit it
* if the index contains nothing.
*/
- hhk_filename = conf.hhk_filename;
- if (hhk_filename) {
+ if (chm_mode || conf.hhk_filename) {
int ok = FALSE;
int i;
indexentry *entry;
@@ -1743,8 +1857,138 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
}
}
- if (!ok)
- hhk_filename = NULL;
+ if (ok)
+ hhk_needed = TRUE;
+ }
+
+ /*
+ * If we're doing direct CHM output, tell winchm.c all the things
+ * it will need to know aside from the various HTML files'
+ * contents.
+ */
+ if (chm) {
+ chm_contents_filename(chm, conf.hhc_filename);
+ if (has_index)
+ chm_index_filename(chm, conf.hhk_filename);
+ chm_default_window(chm, "main");
+
+ {
+ htmloutput ho;
+ rdstringc rs = {0, 0, NULL};
+
+ ho.charset = CS_CP1252; /* as far as I know, CHM is */
+ ho.restrict_charset = CS_CP1252; /* hardwired to this charset */
+ ho.cstate = charset_init_state;
+ ho.ver = HTML_4; /* *shrug* */
+ ho.state = HO_NEUTRAL;
+ ho.contents_level = 0;
+ ho.hackflags = HO_HACK_QUOTENOTHING;
+
+ ho_setup_rdstringc(&ho, &rs);
+
+ ho.hacklimit = 255;
+ html_words(&ho, topsect->title->words, NOTHING,
+ NULL, keywords, &conf);
+
+ rdaddc(&rs, '\0');
+ chm_title(chm, rs.text);
+
+ chm_default_topic(chm, files.head->filename);
+
+ chm_add_window(chm, "main", rs.text,
+ conf.hhc_filename, conf.hhk_filename,
+ files.head->filename,
+ /* This first magic number is
+ * fsWinProperties, controlling Navigation
+ * Pane options and the like. Constants
+ * HHWIN_PROP_* in htmlhelp.h. */
+ 0x62520,
+ /* This second number is fsToolBarFlags,
+ * mainly controlling toolbar buttons.
+ * Constants HHWIN_BUTTON_*. NOTE: there
+ * are two pairs of bits for Next/Previous
+ * buttons: 7/8 (which do nothing useful),
+ * and 21/22 (which work). (Neither of
+ * these are exposed in the HHW UI, but
+ * they work fine in HH.) We use the
+ * latter. */
+ 0x70304e);
+
+ sfree(rs.text);
+ }
+
+ {
+ htmlfile *f;
+
+ for (f = files.head; f; f = f->next)
+ f->chmsect = NULL;
+ for (f = files.head; f; f = f->next) {
+ htmlsect *s = f->first;
+ htmloutput ho;
+ rdstringc rs = {0, 0, NULL};
+
+ ho.charset = CS_CP1252;
+ ho.restrict_charset = CS_CP1252;
+ ho.cstate = charset_init_state;
+ ho.ver = HTML_4; /* *shrug* */
+ ho.state = HO_NEUTRAL;
+ ho.contents_level = 0;
+ ho.hackflags = HO_HACK_QUOTENOTHING;
+
+ ho_setup_rdstringc(&ho, &rs);
+ ho.hacklimit = 255;
+
+ if (f->first->title)
+ html_words(&ho, f->first->title->words, NOTHING,
+ NULL, keywords, &conf);
+ else if (f->first->type == INDEX)
+ html_text(&ho, conf.index_text);
+ rdaddc(&rs, '\0');
+
+ while (s && s->file == f)
+ s = s->parent;
+
+ /*
+ * Special case, as below: the TOP file is not
+ * considered to be the parent of everything else.
+ */
+ if (s && s->type == TOP)
+ s = NULL;
+
+ f->chmsect = chm_add_section(chm, s ? s->file->chmsect : NULL,
+ rs.text, f->filename);
+
+ sfree(rs.text);
+ }
+ }
+
+ {
+ int i;
+
+ for (i = 0; i < conf.nchmextrafiles; i++) {
+ const char *fname = conf.chm_extrafiles[i];
+ FILE *fp;
+ long size;
+ char *data;
+
+ fp = fopen(fname, "rb");
+ if (!fp) {
+ err_cantopen(fname);
+ continue;
+ }
+
+ fseek(fp, 0, SEEK_END);
+ size = ftell(fp);
+ rewind(fp);
+
+ data = snewn(size, char);
+ size = fread(data, 1, size, fp);
+ fclose(fp);
+
+ chm_add_file(chm, conf.chm_extranames[i], data, size);
+ sfree(data);
+ }
+ }
}
/*
@@ -1800,7 +2044,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
ho_string(&ho, conf.hhc_filename);
ho_string(&ho, "\n");
}
- if (hhk_filename) {
+ if (hhk_needed) {
ho_string(&ho, "Index file=");
ho_string(&ho, conf.hhk_filename);
ho_string(&ho, "\n");
@@ -1817,8 +2061,8 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
if (conf.hhc_filename)
ho_string(&ho, conf.hhc_filename);
ho_string(&ho, "\",\"");
- if (hhk_filename)
- ho_string(&ho, hhk_filename);
+ if (hhk_needed)
+ ho_string(&ho, conf.hhk_filename);
ho_string(&ho, "\",\"");
ho_string(&ho, files.head->filename);
ho_string(&ho, "\",,,,,,"
@@ -1848,7 +2092,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
ho_finish(&ho);
}
- if (conf.hhc_filename) {
+ if (chm || conf.hhc_filename) {
htmlfile *f;
htmlsect *s, *a;
htmloutput ho;
@@ -1862,7 +2106,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
ho.contents_level = 0;
ho.hackflags = HO_HACK_QUOTEQUOTES;
- ho_setup_file(&ho, conf.hhc_filename);
+ if (chm)
+ ho_setup_chm(&ho, chm, conf.hhc_filename);
+ else
+ ho_setup_file(&ho, conf.hhc_filename);
/*
* Magic DOCTYPE which seems to work for .HHC files. I'm
@@ -1955,7 +2202,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
cleanup(&ho);
}
- if (hhk_filename) {
+ if (hhk_needed) {
htmlfile *f;
htmloutput ho;
indexentry *entry;
@@ -1976,7 +2223,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
ho.contents_level = 0;
ho.hackflags = HO_HACK_QUOTEQUOTES;
- ho_setup_file(&ho, hhk_filename);
+ if (chm)
+ ho_setup_chm(&ho, chm, conf.hhk_filename);
+ else
+ ho_setup_file(&ho, conf.hhk_filename);
/*
* Magic DOCTYPE which seems to work for .HHK files. I'm
@@ -2041,6 +2291,26 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
cleanup(&ho);
}
+ if (chm) {
+ /*
+ * Finalise and write out the CHM file.
+ */
+ const char *data;
+ int len;
+ FILE *fp;
+
+ fp = fopen(conf.chm_filename, "wb");
+ if (!fp) {
+ err_cantopenw(conf.chm_filename);
+ } else {
+ data = chm_build(chm, &len);
+ fwrite(data, 1, len, fp);
+ fclose(fp);
+ }
+
+ chm_free(chm);
+ }
+
/*
* Go through and check that no index fragments were referenced
* without being generated, or indeed vice versa.
@@ -2139,6 +2409,25 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
while (conf.ntfragments--)
sfree(conf.template_fragments[conf.ntfragments]);
sfree(conf.template_fragments);
+ while (conf.nchmextrafiles--) {
+ sfree(conf.chm_extrafiles[conf.nchmextrafiles]);
+ sfree(conf.chm_extranames[conf.nchmextrafiles]);
+ }
+ sfree(conf.chm_extrafiles);
+}
+
+void html_backend(paragraph *sourceform, keywordlist *keywords,
+ indexdata *idx, void *unused)
+{
+ IGNORE(unused);
+ html_backend_common(sourceform, keywords, idx, FALSE);
+}
+
+void chm_backend(paragraph *sourceform, keywordlist *keywords,
+ indexdata *idx, void *unused)
+{
+ IGNORE(unused);
+ html_backend_common(sourceform, keywords, idx, TRUE);
}
static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
diff --git a/doc/Makefile b/doc/Makefile
index 81a1fd8..e0cc27a 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -11,7 +11,7 @@ all: index.html halibut.1
index.html: $(INPUTS) $(HALIBUT)
$(HALIBUT) --text=halibut.txt --html --info=halibut.info \
- --ps=halibut.ps --pdf=halibut.pdf $(INPUTS)
+ --ps=halibut.ps --pdf=halibut.pdf --chm=halibut.chm $(INPUTS)
halibut.1: manpage.but
$(HALIBUT) --man=halibut.1 manpage.but
@@ -21,8 +21,4 @@ install:
$(INSTALL) -m 644 halibut.1 $(man1dir)/halibut.1
clean:
- rm -f *.html *.txt *.hlp *.cnt *.1 *.info* *.ps *.pdf *.hh* *.chm
-
-chm: halibut.hhp
-halibut.hhp: $(INPUTS) $(HALIBUT) chm.but
- $(HALIBUT) --html $(INPUTS) chm.but
+ rm -f *.html *.txt *.hlp *.cnt *.1 *.info* *.ps *.pdf *.chm
diff --git a/doc/chm.but b/doc/chm.but
deleted file mode 100644
index ef21ecc..0000000
--- a/doc/chm.but
+++ /dev/null
@@ -1,17 +0,0 @@
-\# File containing the magic HTML configuration directives to create
-\# an MS HTML Help project. We put this on the end of the Halibut
-\# docs build command line to build the HHP and friends.
-
-\cfg{html-leaf-level}{infinite}
-\cfg{html-leaf-contains-contents}{false}
-\cfg{html-suppress-navlinks}{true}
-\cfg{html-suppress-address}{true}
-
-\cfg{html-contents-filename}{index.html}
-\cfg{html-template-filename}{%k.html}
-\cfg{html-template-fragment}{%k}
-
-\cfg{html-mshtmlhelp-chm}{halibut.chm}
-\cfg{html-mshtmlhelp-project}{halibut.hhp}
-\cfg{html-mshtmlhelp-contents}{halibut.hhc}
-\cfg{html-mshtmlhelp-index}{halibut.hhk}
diff --git a/doc/intro.but b/doc/intro.but
index 2e5ada1..ce1668c 100644
--- a/doc/intro.but
+++ b/doc/intro.but
@@ -25,10 +25,9 @@ Currently Halibut supports the following output formats:
\b PostScript.
-\b Old-style Windows Help (\cw{.HLP}).
+\b Windows HTML Help (\cw{.CHM}).
-(By setting suitable options, the HTML output can also be made
-suitable for feeding to the newer-style Windows HTML Help compiler.)
+\b Old-style Windows Help (\cw{.HLP}).
\H{intro-features} Features supported by Halibut
diff --git a/doc/manpage.but b/doc/manpage.but
index 56048f6..a13b195 100644
--- a/doc/manpage.but
+++ b/doc/manpage.but
@@ -43,13 +43,21 @@ produced as output; this, and the file names, will be as specified
in the input files, or given a set of default names starting with
\c{Contents.html} if none is specified at all.
+\dt \cw{--chm}[\cw{=}\e{filename}]
+
+\dd Makes Halibut generate an output file in Windows HTML Help
+format. If the optional \e{filename} parameter is supplied, the output
+help file will be given that name. Otherwise, the name of the output
+help file will be as specified in the input files, or \c{output.chm}
+if none is specified at all.
+
\dt \cw{--winhelp}[\cw{=}\e{filename}]
-\dd Makes Halibut generate an output file in Windows Help format. If
-the optional \e{filename} parameter is supplied, the output help
-file will be given that name. Otherwise, the name of the output help
-file will be as specified in the input files, or \c{output.hlp} if
-none is specified at all.
+\dd Makes Halibut generate an output file in old-style Windows Help
+format. If the optional \e{filename} parameter is supplied, the output
+help file will be given that name. Otherwise, the name of the output
+help file will be as specified in the input files, or \c{output.hlp}
+if none is specified at all.
\lcont{
The output help file must have a name ending in \c{.hlp}; if it does
diff --git a/doc/output.but b/doc/output.but
index 9309b82..ccb99df 100644
--- a/doc/output.but
+++ b/doc/output.but
@@ -858,13 +858,202 @@ name="description">} tag in the output HTML files, so that browsers
which support this can easily pick out a brief \I{description, of
document}description of the document.
-\S{output-html-mshtmlhelp} Generating MS Windows \i{HTML Help}
+\S{output-html-defaults} Default settings
+
+The \i{default settings} for Halibut's HTML output format are:
+
+\c \cfg{html-contents-filename}{Contents.html}
+\c \cfg{html-index-filename}{IndexPage.html}
+\c \cfg{html-template-filename}{%n.html}
+\c \cfg{html-single-filename}{Manual.html}
+\c
+\c \cfg{html-leaf-level}{2}
+\c \cfg{html-leaf-contains-contents}{false}
+\c \cfg{html-leaf-smallest-contents}{4}
+\c \cfg{html-contents-depth}{0}{2}
+\c \cfg{html-contents-depth}{1}{3}
+\c ... and so on for all section levels below this ...
+\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+\c
+\c \cfg{html-head-end}{}
+\c \cfg{html-body-tag}{<body>}
+\c \cfg{html-body-start}{}
+\c \cfg{html-body-end}{}
+\c \cfg{html-address-start}{}
+\c \cfg{html-address-end}{}
+\c \cfg{html-navigation-attributes}{}
+\c
+\c \cfg{html-chapter-numeric}{false}
+\c \cfg{html-chapter-shownumber}{true}
+\c \cfg{html-chapter-suffix}{: }
+\c
+\c \cfg{html-section-numeric}{0}{true}
+\c \cfg{html-section-shownumber}{0}{true}
+\c \cfg{html-section-suffix}{0}{ }
+\c
+\c \cfg{html-section-numeric}{1}{true}
+\c \cfg{html-section-shownumber}{1}{true}
+\c \cfg{html-section-suffix}{1}{ }
+\c
+\c ... and so on for all section levels below this ...
+\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
+\c
+\c \cfg{html-preamble-text}{Preamble}
+\c \cfg{html-contents-text}{Contents}
+\c \cfg{html-index-text}{Index}
+\c \cfg{html-title-separator}{ - }
+\c \cfg{html-index-main-separator}{: }
+\c \cfg{html-index-multiple-separator}{, }
+\c \cfg{html-pre-versionid}{[}
+\c \cfg{html-post-versionid}{]}
+\c \cfg{html-nav-prev-text}{Previous}
+\c \cfg{html-nav-next-text}{Next}
+\c \cfg{html-nav-up-text}{Up}
+\c \cfg{html-nav-separator}{ | }
+\c
+\c \cfg{html-output-charset}{ASCII}
+\c \cfg{html-restrict-charset}{UTF-8}
+\c \cfg{html-quotes}{\u2018}{\u2019}{"}{"}
+\c
+\c \cfg{html-version}{html4}
+\c \cfg{html-template-fragment}{%b}
+\c \cfg{html-versionid}{true}
+\c \cfg{html-rellinks}{true}
+\c \cfg{html-suppress-navlinks{false}
+\c \cfg{html-suppress-address}{false}
+\c \cfg{html-author}{}
+\c \cfg{html-description}{}
+
+\H{output-chm} Windows \i{HTML Help}
+
+This output format generates a \c{.chm} file suitable for use with the
+Windows HTML Help system.
+
+Older versions of Halibut could only generate HTML Help by writing out
+a set of source files acceptable to the MS help compiler. Nowadays
+Halibut can generate CHM directly, so that's no longer necessary.
+However, the legacy method is still available if you need it; see
+\k{output-html-mshtmlhelp} for details.
+
+\S{output-chm-file} Output file name
+
+\dt \I{\cw{\\cfg\{chm-filename\}}}\cw{\\cfg\{chm-filename\}\{}\e{filename}\cw{\}}
+
+\dd Sets the \i{output file name} in which to store the HTML Help
+file. This directive is implicitly generated if you provide a file
+name parameter after the command-line option \i\c{--chm} (see
+\k{running-options}).
+
+\S{output-chm-mostconfig} Configuration shared with the HTML back end
+
+As the name suggests, an HTML Help file is mostly a compressed
+container for HTML files. So the CHM back end shares a great deal of
+its code with the HTML back end, and as a result, it supports the same
+range of format configuration options.
+
+(One exception to this general rule is that the configuration options
+relating to generating \e{HTML Help compiler input} are not supported
+in CHM mode, because they wouldn't make any sense! The
+\cw{html-mshtmlhelp-*} options described in \k{output-html-mshtmlhelp}
+have no analogue starting \cw{chm-}.)
+
+However, because HTML and CHM are used in different ways, you may need
+to configure the two back ends differently. So in CHM mode, Halibut
+supports all the same configuration directives described in
+\k{output-html}, but with their names changed so that they begin with
+\cq{chm-} in place of \cq{html-}. This lets you maintain two sets of
+configuration independently; for example, you could specify
+\c{\\cfg\{html-chapter-numeric\}\{true\}} and
+\c{\\cfg\{chm-chapter-numeric\}\{false\}} in the same source file, and
+then when you ran Halibut with both the \c{--html} and \c{--chm}
+options, it would produce purely numeric chapter titles in the HTML
+output but not in the CHM file.
+
+If you do decide to apply a piece of configuration across both these
+back ends, you can prefix it with \cq{htmlall-} instead of \cq{html-}
+or \cq{chm-}. For example,
+\c{\\cfg\{htmlall-chapter-numeric\}\{true\}} will enable purely
+numeric chapter titles in \e{both} the HTML and CHM output.
+
+\S{output-chm-extra} Including extra files in the CHM
+
+CHM files are mostly a container for HTML, and the HTML files inside
+them are allowed to cross-refer to all the usual other kinds of file
+that HTML might refer to, such as images, stylesheets and even
+Javascript. If you want to make use of this capability, you need to
+tell Halibut what extra files it needs to incorporate into the CHM
+container.
+
+\dt \I{\cw{\\cfg\{chm-extra-file\}}}\cw{\\cfg\{chm-extra-file\}\{}\e{filename}\cw{\}}
+
+\dt \I{\cw{\\cfg\{chm-extra-file\}}}\cw{\\cfg\{chm-extra-file\}\{}\e{filename}\cw{\}\{}\e{name inside CHM}\cw{\}}
+
+\dd Tells Halibut to read an additional input file from \e{filename}
+and incorporate it into the CHM.
+
+\lcont{
+
+In the first form of the directive, the file will be given the same
+name within the CHM's internal namespace (i.e. for the purposes of
+linking to it from HTML files) as Halibut used to load it from disk.
+If you need to include the file with a different internal name, you
+can use the second form of the directive, which separately specifies
+the name under which Halibut should look for the input file and the
+name it should give it inside the CHM.
+
+You can specify this directive multiple times, to include more than
+one file.
+
+}
+
+\S{output-chm-internalnames} Renaming the CHM internal support files
+
+As well as ordinary HTML, there are also two special files inside a
+CHM, containing the table of contents and the index. Halibut generates
+these automatically, and you normally don't have to worry about them.
+However, it is \e{just} possible (though very unlikely!) that you
+might find they conflict with the name of some file you wanted to
+include in the CHM yourself, and hence, Halibut provides configuration
+options to change them if you need to.
+
+\dt \I{\cw{\\cfg\{chm-contents-name\}}}\cw{\\cfg\{chm-contents-name\}\{}\e{filename}\cw{\}}
+
+\dd Controls the name of the internal contents file in the CHM.
+
+\dt \I{\cw{\\cfg\{chm-index-name\}}}\cw{\\cfg\{chm-index-name\}\{}\e{filename}\cw{\}}
+
+\dd Controls the name of the internal index file in the CHM.
+
+\S{output-chm-defaults} Default settings
+
+The \i{default settings} for Halibut's CHM output format are mostly
+the same as for the standard HTML output. However, a few defaults are
+changed to be more in line with the way CHM wants to do things.
+
+\c \cfg{chm-filename}{output.chm}
+\c \cfg{chm-contents-name}{contents.hhc}
+\c \cfg{chm-index-name}{index.hhk}
+\c \cfg{chm-leaf-level}{infinite}
+\c \cfg{chm-suppress-navlinks{true}
+\c \cfg{chm-suppress-address}{true}
-The HTML files output from Halibut's HTML back end can be used as
-input to the MS Windows HTML Help compiler. In order to do this, you
-also need some auxiliary files: a project file, and (probably) a
-contents file and an index file. Halibut can optionally generate
-those as well.
+\S{output-html-mshtmlhelp} Generating input to the MS Windows \i{HTML
+Help compiler}
+
+Before Halibut gained the ability to write out CHM files directly, it
+used a more cumbersome system in which you could run it in HTML mode
+and enable some extra options that would write out supporting files
+needed by the official Windows HTML Help compiler, so that you could
+still generate a CHM file from your Halibut source in multiple build
+steps.
+
+This legacy system for HTML Help generation is still supported, partly
+to avoid backwards-compatibility breakage for anyone already using it,
+and also because it permits more flexibility in the resulting CHM
+files: Halibut's own CHM file generation makes some fixed decisions
+about window layout and styling, whereas if you use the official help
+compiler you can start from Halibut's default project file and make
+whatever manual changes you like to that sort of thing.
To enable the generation of MS HTML Help auxiliary files, use the
following configuration directives:
@@ -940,18 +1129,16 @@ MS HTML Help compiler (\cw{HHC.EXE}), or load into the MS HTML Help
Workshop (\cw{HHW.EXE}).
You may also wish to alter other HTML configuration options to make
-the resulting help file look more like a help file and less like a
-web page. A suggested set of additional configuration options for
-HTML Help is as follows:
+the resulting help file look more like a help file and less like a web
+page. If you use Halibut's direct CHM output, this is done for you
+automatically (see \k{output-chm-defaults}); but if you're using the
+HTML output mode then I recommend the following changes.
\b \cw{\\cfg\{html-leaf-level\}\{infinite\}}, because HTML Help
works best with lots of small files (\q{topics}) rather than a few
large ones. In particular, the contents and index mechanisms can
only reference files, not subsections within files.
-\b \cw{\\cfg\{html-leaf-contains-contents\}\{false\}}, to suppress
-the contents list above the main text of each bottom-level file.
-
\b \cw{\\cfg\{html-suppress-navlinks\}\{true\}}, because HTML Help
has its own navigation facilities and it looks a bit strange to
duplicate them.
@@ -960,83 +1147,15 @@ duplicate them.
\cw{<ADDRESS>} section makes less sense in a help file than it does
on a web page.
-\S{output-html-defaults} Default settings
-
-The \i{default settings} for Halibut's HTML output format are:
-
-\c \cfg{html-contents-filename}{Contents.html}
-\c \cfg{html-index-filename}{IndexPage.html}
-\c \cfg{html-template-filename}{%n.html}
-\c \cfg{html-single-filename}{Manual.html}
-\c
-\c \cfg{html-leaf-level}{2}
-\c \cfg{html-leaf-contains-contents}{false}
-\c \cfg{html-leaf-smallest-contents}{4}
-\c \cfg{html-contents-depth}{0}{2}
-\c \cfg{html-contents-depth}{1}{3}
-\c ... and so on for all section levels below this ...
-\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
-\c
-\c \cfg{html-head-end}{}
-\c \cfg{html-body-tag}{<body>}
-\c \cfg{html-body-start}{}
-\c \cfg{html-body-end}{}
-\c \cfg{html-address-start}{}
-\c \cfg{html-address-end}{}
-\c \cfg{html-navigation-attributes}{}
-\c
-\c \cfg{html-chapter-numeric}{false}
-\c \cfg{html-chapter-shownumber}{true}
-\c \cfg{html-chapter-suffix}{: }
-\c
-\c \cfg{html-section-numeric}{0}{true}
-\c \cfg{html-section-shownumber}{0}{true}
-\c \cfg{html-section-suffix}{0}{ }
-\c
-\c \cfg{html-section-numeric}{1}{true}
-\c \cfg{html-section-shownumber}{1}{true}
-\c \cfg{html-section-suffix}{1}{ }
-\c
-\c ... and so on for all section levels below this ...
-\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii
-\c
-\c \cfg{html-preamble-text}{Preamble}
-\c \cfg{html-contents-text}{Contents}
-\c \cfg{html-index-text}{Index}
-\c \cfg{html-title-separator}{ - }
-\c \cfg{html-index-main-separator}{: }
-\c \cfg{html-index-multiple-separator}{, }
-\c \cfg{html-pre-versionid}{[}
-\c \cfg{html-post-versionid}{]}
-\c \cfg{html-nav-prev-text}{Previous}
-\c \cfg{html-nav-next-text}{Next}
-\c \cfg{html-nav-up-text}{Up}
-\c \cfg{html-nav-separator}{ | }
-\c
-\c \cfg{html-output-charset}{ASCII}
-\c \cfg{html-restrict-charset}{UTF-8}
-\c \cfg{html-quotes}{\u2018}{\u2019}{"}{"}
-\c
-\c \cfg{html-version}{html4}
-\c \cfg{html-template-fragment}{%b}
-\c \cfg{html-versionid}{true}
-\c \cfg{html-rellinks}{true}
-\c \cfg{html-suppress-navlinks{false}
-\c \cfg{html-suppress-address}{false}
-\c \cfg{html-author}{}
-\c \cfg{html-description}{}
-
-\H{output-whlp} Windows Help
+\H{output-whlp} Legacy Windows Help
-This output format generates data that can be used by the \i{Windows
-Help} program \cw{WINHLP32.EXE}. There are two actual files
+This output format generates data that can be used by the legacy
+\i{Windows Help} program \cw{WINHLP32.EXE}. There are two actual files
generated, one ending in \c{.hlp} and the other ending in \c{.cnt}.
-Note that as of 2006, MS is discontinuing the Windows Help format in
-favour of the newer HTML Help format (\c{.chm} files). Halibut is
-not currently able to generate \c{.chm} files directly, but its HTML
-back end can write out project files suitable for use as input to
-the MS HTML Help compiler. See \k{output-html-mshtmlhelp} for more
+This legacy Windows Help format was discontinued in 2006 in favour of
+HTML Help, which Halibut can also generate. You probably want to use
+that instead for any new project. See \k{output-chm} for more
information on this.
Currently, the Windows Help output is hardcoded to be in the
diff --git a/doc/running.but b/doc/running.but
index 39e1715..6c2b6c6 100644
--- a/doc/running.but
+++ b/doc/running.but
@@ -12,22 +12,19 @@ This will generate a large set of \i{output files}:
\b \i\c{output.txt} will be a \i{plain text} version of the input
document.
+\b \i\c{output.chm} will be a Windows \i{HTML Help} version of the
+same thing. (Note that to do this Halibut does not require any
+external software such as a \i{Help compiler}. It \e{directly}
+generates Windows HTML Help files, and therefore it doesn't need to be
+run on Windows to do so: it can generate them even when run from an
+automated script on a Unix machine.)
+
\b \i\c{output.hlp} and \i\c{output.cnt} will be an old-style
\i{Windows Help} version of the same thing. (Most of the text is in
\c{output.hlp}; \c{output.cnt} contains additional contents data
used by the Windows help topic selector. If you lose the latter, the
former should still be usable, but it will look less modern.)
-\lcont{
-
-Note that to do this Halibut does not require any external software
-such as a \i{Help compiler}. It \e{directly} generates old-style
-Windows Help files, and therefore it doesn't need to be run on
-Windows to do so: it can generate them even when run from an
-automated script on a Unix machine.
-
-}
-
\b \c{output.1} will be a Unix \i{\cw{man} page}.
\b The set of files \c{*.html} will contain an \i{HTML} version of
@@ -79,6 +76,13 @@ line, using the \c{-C} option).
\dd Synonym for \c{--html}.
+\dt \i\cw{--chm}[\cw{=}\e{filename}]
+
+\dd Specifies that you want to generate Windows HTML Help
+output. You can optionally specify a file name (e.g.
+\c{\-\-chm=myfile.chm}), in which case Halibut will change the
+name of the output file as well.
+
\dt \i\cw{--winhelp}[\cw{=}\e{filename}]
\dd Specifies that you want to generate old-style Windows Help
diff --git a/error.c b/error.c
index 8aa83b5..7589175 100644
--- a/error.c
+++ b/error.c
@@ -363,3 +363,9 @@ void err_sfntbadglyph(const filepos *fpos, unsigned wc)
"warning: character U+%04X references a non-existent glyph",
wc);
}
+
+void err_chm_badname(const filepos *fpos, const char *sp)
+{
+ do_error(fpos, "CHM internal file name `%s' begins with"
+ " a reserved character", sp);
+}
diff --git a/halibut.h b/halibut.h
index ec0ce34..327562e 100644
--- a/halibut.h
+++ b/halibut.h
@@ -322,6 +322,8 @@ void err_sfnttablevers(const filepos *fpos, const char *sp);
void err_sfntbadhdr(const filepos *fpos);
/* sfnt cmap references bad glyph */
void err_sfntbadglyph(const filepos *fpos, unsigned wc);
+/* CHM internal file names can't start with # or $ */
+void err_chm_badname(const filepos *fpos, const char *sp);
/*
* malloc.c
@@ -563,7 +565,9 @@ paragraph *text_config_filename(char *filename);
* bk_html.c
*/
void html_backend(paragraph *, keywordlist *, indexdata *, void *);
+void chm_backend(paragraph *, keywordlist *, indexdata *, void *);
paragraph *html_config_filename(char *filename);
+paragraph *chm_config_filename(char *filename);
/*
* bk_whlp.c
diff --git a/lzx.c b/lzx.c
new file mode 100644
index 0000000..3c404b9
--- /dev/null
+++ b/lzx.c
@@ -0,0 +1,697 @@
+#include <assert.h>
+#include <stddef.h>
+
+#include "halibut.h"
+#include "huffman.h"
+#include "lz77.h"
+#include "lzx.h"
+
+#define OUR_LZX_WINSIZE 0x10000
+#define LZX_MINMATCHLEN 2
+#define LZX_MAXMATCHLEN 257
+
+int lzx_compute_position_slot(int pos, int *footer_bits)
+{
+ if (pos < 4) {
+ /* The bottom four position slots cover one value each. */
+ *footer_bits = 0;
+ return pos;
+ } else if (pos >= 0x40000) {
+ /* _All_ slots from 36 onwards are 2^17 values wide. */
+ *footer_bits = 17;
+ return 34 + (pos >> 17);
+ } else {
+ /* In between, there are two slots for each power-of-2 size,
+ * so that slots 4,5 have width 2^1, 6,7 have width 2^2, 8,9
+ * have width 2^3, ..., and 34,35 have width 2^16. */
+ int bits = 16;
+ int shifted = pos;
+ if (shifted < (1<<(18-8))) shifted <<= 8, bits -= 8;
+ if (shifted < (1<<(18-4))) shifted <<= 4, bits -= 4;
+ if (shifted < (1<<(18-2))) shifted <<= 2, bits -= 2;
+ if (shifted < (1<<(18-1))) shifted <<= 1, bits -= 1;
+ *footer_bits = bits;
+ return 2 + 2*bits + ((shifted >> 16) & 1);
+ }
+}
+
+typedef enum LZXSymType {
+ LST_MAINTREE, LST_LENTREE, LST_ALIGNOFFTREE,
+ LST_MAINTREE_PRETREE_1, LST_MAINTREE_PRETREE_2, LST_LENTREE_PRETREE,
+ LST_NTREES, dummy_enum_const = LST_NTREES-1,
+ LST_REALIGN_BITSTREAM,
+ LST_RAWBITS_BASE /* add the number of actual bits to this code */
+} LZXSymType;
+
+typedef struct LZXSym {
+ LZXSymType type;
+ int value;
+} LZXSym;
+
+typedef struct LZXBuffer {
+ LZXSym *syms;
+ int nsyms, symsize;
+} LZXBuffer;
+
+typedef struct LZXInfo {
+ LZXBuffer *buf;
+ int r0, r1, r2; /* saved match offsets */
+} LZXInfo;
+
+static void lzx_buffer_init(LZXBuffer *buf)
+{
+ buf->syms = NULL;
+ buf->nsyms = buf->symsize = 0;
+}
+
+static void lzx_addsym(LZXBuffer *buf, LZXSymType type, int value)
+{
+ if (buf->nsyms >= buf->symsize) {
+ assert(buf->nsyms == buf->symsize);
+ buf->symsize = buf->nsyms * 5 / 4 + 16384;
+ buf->syms = sresize(buf->syms, buf->symsize, LZXSym);
+ }
+ buf->syms[buf->nsyms].type = type;
+ buf->syms[buf->nsyms].value = value;
+ buf->nsyms++;
+}
+
+static void lzx_literal(struct LZ77Context *ctx, unsigned char c)
+{
+ LZXBuffer *buf = ((LZXInfo *)ctx->userdata)->buf;
+ lzx_addsym(buf, LST_MAINTREE, c);
+}
+
+static void lzx_match(struct LZ77Context *ctx, int match_offset, int totallen)
+{
+ LZXInfo *info = (LZXInfo *)ctx->userdata;
+ LZXBuffer *buf = info->buf;
+
+ /*
+ * First, this variant of LZX has a maximum match length of 257
+ * bytes, so if lz77.c reports a longer match than that, we must
+ * break it up.
+ */
+ while (totallen > 0) {
+ int len, length_header, length_footer, len_pos_header;
+ int formatted_offset, position_slot, position_verbatim_bits;
+ int position_verbatim_value, position_aligned_offset;
+
+ if (totallen <= LZX_MAXMATCHLEN) {
+ /* We can emit all of the (remaining) match length in one go. */
+ len = totallen;
+ } else if (totallen >= LZX_MAXMATCHLEN+LZX_MINMATCHLEN) {
+ /* There's enough match left that we can emit a
+ * maximum-length chunk and still be assured of being able
+ * to emit what's left as a viable followup match. */
+ len = LZX_MAXMATCHLEN;
+ } else {
+ /* The in-between case, where we have _only just_ too long
+ * a match to emit in one go, so that if we emitted a
+ * max-size chunk then what's left would be under the min
+ * size and we couldn't emit it. */
+ len = totallen - LZX_MINMATCHLEN;
+ }
+ totallen -= len;
+
+ /*
+ * Now we're outputting a single LZX-level match of length
+ * 'len'. Break the length up into a 'header' (included in the
+ * starting LST_MAINTREE symbol) and a 'footer' (tacked on
+ * afterwards using LST_LENTREE).
+ */
+ if (len < 9) {
+ length_header = len - 2; /* in the range {0,...,6} */
+ length_footer = -1; /* not transmitted at all */
+ } else {
+ length_header = 7; /* header indicates more to come */
+ length_footer = len - 9; /* in the range {0,...,248} */
+ }
+
+ /*
+ * Meanwhile, the raw backward distance is first transformed
+ * into the 'formatted offset', by either adding 2 or using
+ * one of the low-numbered special codes meaning to use one of
+ * the three most recent match distances.
+ */
+ if (match_offset == info->r0) {
+ /* Reuse the most recent distance */
+ formatted_offset = 0;
+ } else if (match_offset == info->r1) {
+ /* Reuse the 2nd most recent, and swap it into first place */
+ int tmp = info->r1;
+ info->r1 = info->r0;
+ info->r0 = tmp;
+ formatted_offset = 1;
+ } else if (match_offset == info->r2) {
+ /* Reuse the 3rd most recent and swap it to first place.
+ * This is intentionally not quite a move-to-front
+ * shuffle, which would permute (r0,r1,r2)->(r2,r0,r1); MS
+ * decided that just swapping r0 with r2 was a better
+ * performance tradeoff. */
+ int tmp = info->r2;
+ info->r2 = info->r0;
+ info->r0 = tmp;
+ formatted_offset = 2;
+ } else {
+ /* This offset matches none of the three saved values.
+ * Put it in r0, and move up the rest of the list. */
+ info->r2 = info->r1;
+ info->r1 = info->r0;
+ info->r0 = match_offset;
+ formatted_offset = match_offset + 2;
+ }
+
+ /*
+ * The formatted offset now breaks up into a 'position slot'
+ * (encoded as part of the starting symbol) and an offset from
+ * the smallest position value covered by that slot. The
+ * system of slots is designed so that every slot's width is a
+ * power of two and its base value is a multiple of its width,
+ * so we can get the offset just by taking the bottom n bits
+ * of the full formatted offset, once the choice of position
+ * slot tells us what n is.
+ */
+ position_slot = lzx_compute_position_slot(
+ formatted_offset, &position_verbatim_bits);
+ position_verbatim_value = formatted_offset &
+ ((1 << position_verbatim_bits)-1);
+
+ /*
+ * If there are three or more additional bits, then the last 3
+ * of them are (potentially, depending on block type which we
+ * haven't decided about yet) transmitted using the aligned
+ * offset tree. The rest are sent verbatim.
+ */
+ if (position_verbatim_bits >= 3) {
+ position_aligned_offset = position_verbatim_value & 7;
+ position_verbatim_bits -= 3;
+ position_verbatim_value >>= 3;
+ } else {
+ position_aligned_offset = -1; /* not transmitted */
+ }
+
+ /*
+ * Combine the length header and position slot into the full
+ * set of information encoded by the starting symbol.
+ */
+ len_pos_header = position_slot * 8 + length_header;
+
+ /*
+ * And now we've finished figuring out _what_ to output, so
+ * output it.
+ */
+ lzx_addsym(buf, LST_MAINTREE, 256 + len_pos_header);
+ if (length_footer >= 0)
+ lzx_addsym(buf, LST_LENTREE, length_footer);
+ if (position_verbatim_bits > 0)
+ lzx_addsym(buf, LST_RAWBITS_BASE + position_verbatim_bits,
+ position_verbatim_value);
+ if (position_aligned_offset >= 0)
+ lzx_addsym(buf, LST_ALIGNOFFTREE, position_aligned_offset);
+ }
+}
+
+void lzx_lz77_inner(LZXInfo *info, const unsigned char *data, int len)
+{
+ struct LZ77Context lz77c;
+ lz77_init(&lz77c, OUR_LZX_WINSIZE);
+ lz77c.literal = lzx_literal;
+ lz77c.match = lzx_match;
+ lz77c.userdata = info;
+ lz77_compress(&lz77c, data, len, TRUE);
+ lz77_cleanup(&lz77c);
+}
+
+void lzx_lz77(LZXBuffer *buf, const unsigned char *data,
+ int totallen, int realign_interval)
+{
+ LZXInfo info;
+
+ info.r0 = info.r1 = info.r2 = 1;
+ info.buf = buf;
+
+ while (totallen > 0) {
+ int thislen =
+ totallen < realign_interval ? totallen : realign_interval;
+ lzx_lz77_inner(&info, data, thislen);
+ data += thislen;
+ totallen -= thislen;
+ if (totallen > 0)
+ lzx_addsym(info.buf, LST_REALIGN_BITSTREAM, 0);
+ }
+}
+
+typedef struct LZXHuf {
+ int nsyms;
+ unsigned char *lengths;
+ unsigned char *oldlengths; /* for pretree encoding to diff against */
+ int *codes;
+} LZXHuf;
+
+typedef struct LZXHufs {
+ LZXHuf hufs[LST_NTREES];
+} LZXHufs;
+
+void lzx_build_tree(LZXSym *syms, int nsyms, LZXSymType which, LZXHufs *hufs)
+{
+ int i, max_code_len;
+ int *freqs;
+ LZXHuf *huf = &hufs->hufs[which];
+
+ switch (which) {
+ default:
+ assert(0 && "Bad lzx_build_tree tree type");
+ case LST_MAINTREE:
+ /*
+ * Trees encoded via a pretree have a max code length of 16,
+ * because that's the limit of what the pretree alphabet can
+ * represent.
+ */
+ max_code_len = 16;
+
+ /*
+ * Number of symbols in the main tree is 256 literals, plus 8n
+ * match header symbols where n is the largest position slot
+ * number that might be needed to address any offset in the
+ * window.
+ */
+ {
+ int ignored, last_slot;
+ last_slot = lzx_compute_position_slot(OUR_LZX_WINSIZE-1, &ignored);
+ huf->nsyms = 8 * (last_slot+1) + 256;
+ }
+ break;
+ case LST_LENTREE:
+ max_code_len = 16; /* pretree again */
+ huf->nsyms = 249; /* a fixed value in the spec */
+ break;
+ case LST_MAINTREE_PRETREE_1:
+ case LST_MAINTREE_PRETREE_2:
+ case LST_LENTREE_PRETREE:
+ /* Pretree code lengths are stored in 4-bit fields, so they
+ * can't go above 15. There are a standard 20 symbols in the
+ * pretree alphabet. */
+ max_code_len = 15;
+ huf->nsyms = 20;
+ break;
+ case LST_ALIGNOFFTREE:
+ /* The aligned-offset tree has 8 elements stored in 3-bit
+ * fields. */
+ max_code_len = 7;
+ huf->nsyms = 8;
+ break;
+ }
+
+ freqs = snewn(huf->nsyms, int);
+
+ /*
+ * Count up the symbol frequencies.
+ */
+ for (i = 0; i < huf->nsyms; i++)
+ freqs[i] = 0;
+ for (i = 0; i < nsyms; i++)
+ if (syms[i].type == which)
+ freqs[syms[i].value]++;
+
+ /*
+ * Build the Huffman table.
+ */
+ huf->lengths = snewn(huf->nsyms, unsigned char);
+ build_huffman_tree(freqs, huf->lengths, huf->nsyms, max_code_len);
+ huf->codes = snewn(huf->nsyms, int);
+ compute_huffman_codes(huf->lengths, huf->codes, huf->nsyms);
+
+ /*
+ * Cleanup.
+ */
+ sfree(freqs);
+}
+
+void lzx_tree_with_pretree(LZXHuf *huf, int symoffset, int symlimit,
+ LZXBuffer *buf, LZXSymType pretree_symtype)
+{
+ int i, r;
+
+ if (!huf->oldlengths) {
+ huf->oldlengths = snewn(huf->nsyms, unsigned char);
+ for (i = 0; i < huf->nsyms; i++)
+ huf->oldlengths[i] = 0;
+ }
+
+ for (i = symoffset; i < symlimit; i++) {
+ for (r = 1; i+r < symlimit; r++)
+ if (huf->lengths[i+r] != huf->lengths[i])
+ break;
+
+ if (r >= 4) {
+ /*
+ * We have at least one run of the same code length long
+ * enough to use one of the run-length encoding symbols.
+ */
+ while (r >= 4) {
+ int thisrun;
+ if (huf->lengths[i] == 0) {
+ thisrun = r > 20+31 ? 20+31 : r;
+ if (thisrun >= 20) {
+ lzx_addsym(buf, pretree_symtype, 18);
+ lzx_addsym(buf, LST_RAWBITS_BASE + 5, thisrun - 20);
+ } else {
+ lzx_addsym(buf, pretree_symtype, 17);
+ lzx_addsym(buf, LST_RAWBITS_BASE + 4, thisrun - 4);
+ }
+ } else {
+ thisrun = r > 5 ? 5 : r;
+ lzx_addsym(buf, pretree_symtype, 19);
+ lzx_addsym(buf, LST_RAWBITS_BASE + 1, thisrun - 4);
+ lzx_addsym(buf, pretree_symtype,
+ (huf->oldlengths[i]-huf->lengths[i] + 17) % 17);
+ }
+ r -= thisrun;
+ i += thisrun;
+ }
+
+ if (r == 0) {
+ i--; /* compensate for normal loop increment */
+ continue;
+ }
+ }
+
+ /*
+ * Otherwise, emit a normal non-encoded symbol.
+ */
+ lzx_addsym(buf, pretree_symtype,
+ (huf->oldlengths[i]-huf->lengths[i] + 17) % 17);
+ }
+}
+
+void lzx_tree_simple(LZXHuf *huf, LZXBuffer *buf, int bits)
+{
+ int i;
+ for (i = 0; i < huf->nsyms; i++)
+ lzx_addsym(buf, LST_RAWBITS_BASE + bits, huf->lengths[i]);
+}
+
+typedef struct LZXBitstream {
+ struct LZXEncodedFile *ef;
+ size_t data_size, resets_size;
+ unsigned short bitbuffer;
+ int nbits;
+ int first_block;
+} LZXBitstream;
+
+void lzx_write_bits(LZXBitstream *bs, int value, int bits)
+{
+ while (bs->nbits + bits >= 16) {
+ int thisbits = 16 - bs->nbits;
+ bs->bitbuffer = (bs->bitbuffer << thisbits) |
+ (value >> (bits-thisbits));
+
+ if (bs->ef->data_len+2 > bs->data_size) {
+ bs->data_size = bs->ef->data_len * 5 / 4 + 65536;
+ bs->ef->data = sresize(bs->ef->data, bs->data_size,
+ unsigned char);
+ }
+ bs->ef->data[bs->ef->data_len++] = bs->bitbuffer;
+ bs->ef->data[bs->ef->data_len++] = bs->bitbuffer >> 8;
+
+ bs->bitbuffer = 0;
+ bs->nbits = 0;
+
+ bits -= thisbits;
+ value &= (1<<bits) - 1;
+ }
+
+ bs->bitbuffer = (bs->bitbuffer << bits) | value;
+ bs->nbits += bits;
+}
+
+void lzx_realign(LZXBitstream *bs)
+{
+ lzx_write_bits(bs, 0, 15 & -(unsigned)bs->nbits);
+}
+
+void lzx_write_reset_table_entry(LZXBitstream *bs)
+{
+ lzx_write_bits(bs, 0, 15 & -(unsigned)bs->nbits);
+
+ if (bs->ef->n_resets >= bs->resets_size) {
+ bs->resets_size = bs->ef->n_resets * 5 / 4 + 256;
+ bs->ef->reset_byte_offsets = sresize(bs->ef->reset_byte_offsets,
+ bs->resets_size, size_t);
+ }
+ bs->ef->reset_byte_offsets[bs->ef->n_resets++] = bs->ef->data_len;
+}
+
+void lzx_huf_encode(LZXSym *syms, int nsyms, LZXHufs *hufs, LZXBitstream *bs)
+{
+ int i;
+ for (i = 0; i < nsyms; i++) {
+ LZXSymType type = syms[i].type;
+ int value = syms[i].value;
+
+ if (type >= LST_RAWBITS_BASE) {
+ lzx_write_bits(bs, value, type - LST_RAWBITS_BASE);
+ } else if (type == LST_REALIGN_BITSTREAM) {
+ /* Realign the bitstream to a 16-bit boundary, and write a
+ * reset table entry giving the resulting byte offset. */
+ lzx_realign(bs);
+ lzx_write_reset_table_entry(bs);
+ } else {
+ lzx_write_bits(bs, hufs->hufs[type].codes[value],
+ hufs->hufs[type].lengths[value]);
+ }
+ }
+}
+
+void lzx_encode_block(LZXSym *syms, int nsyms, int blocksize,
+ LZXHufs *hufs, LZXBitstream *bs)
+{
+ LZXBuffer header[8];
+ int i, blocktype;
+
+ for (i = 0; i < (int)lenof(header); i++)
+ lzx_buffer_init(&header[i]);
+
+ /*
+ * Build the Huffman trees for the main alphabets used in the
+ * block.
+ */
+ lzx_build_tree(syms, nsyms, LST_MAINTREE, hufs);
+ lzx_build_tree(syms, nsyms, LST_LENTREE, hufs);
+ lzx_build_tree(syms, nsyms, LST_ALIGNOFFTREE, hufs);
+
+ /*
+ * Encode each of those as a sequence of pretree symbols.
+ */
+ lzx_tree_with_pretree(&hufs->hufs[LST_MAINTREE], 0, 256,
+ &header[3], LST_MAINTREE_PRETREE_1);
+ lzx_tree_with_pretree(&hufs->hufs[LST_MAINTREE], 256,
+ hufs->hufs[LST_MAINTREE].nsyms,
+ &header[5], LST_MAINTREE_PRETREE_2);
+ lzx_tree_with_pretree(&hufs->hufs[LST_LENTREE], 0,
+ hufs->hufs[LST_LENTREE].nsyms,
+ &header[7], LST_LENTREE_PRETREE);
+
+ /*
+ * Build the pretree for each of those encodings.
+ */
+ lzx_build_tree(header[3].syms, header[3].nsyms,
+ LST_MAINTREE_PRETREE_1, hufs);
+ lzx_build_tree(header[5].syms, header[5].nsyms,
+ LST_MAINTREE_PRETREE_2, hufs);
+ lzx_build_tree(header[7].syms, header[7].nsyms,
+ LST_LENTREE_PRETREE, hufs);
+
+ /*
+ * Decide whether we're keeping the aligned offset tree or not.
+ */
+ {
+ int with, without;
+
+ with = 3*8; /* cost of transmitting tree */
+ without = 0; /* or not */
+
+ for (i = 0; i < nsyms; i++)
+ if (syms[i].type == LST_ALIGNOFFTREE) {
+ with += hufs->hufs[LST_ALIGNOFFTREE].lengths[syms[i].value];
+ without += 3;
+ }
+
+ if (with < without) {
+ /* Yes, it's a win to use the aligned offset tree. */
+ blocktype = 2;
+ } else {
+ /* No, we do better by throwing it away. */
+ blocktype = 1;
+
+ /* Easiest way to simulate that is to pretend we're still
+ * using an aligned offset tree in the encoding, but to
+ * chuck away our code lengths and replace them with the
+ * fixed-length trivial tree. */
+ for (i = 0; i < 8; i++) {
+ hufs->hufs[LST_ALIGNOFFTREE].lengths[i] = 3;
+ hufs->hufs[LST_ALIGNOFFTREE].codes[i] = i;
+ }
+ }
+ }
+
+ /*
+ * Encode all the simply encoded trees (the three pretrees and the
+ * aligned offset tree).
+ */
+ lzx_tree_simple(&hufs->hufs[LST_MAINTREE_PRETREE_1], &header[2], 4);
+ lzx_tree_simple(&hufs->hufs[LST_MAINTREE_PRETREE_2], &header[4], 4);
+ lzx_tree_simple(&hufs->hufs[LST_LENTREE_PRETREE], &header[6], 4);
+ if (blocktype == 2)
+ lzx_tree_simple(&hufs->hufs[LST_ALIGNOFFTREE], &header[1], 3);
+
+ /*
+ * Top-level block header.
+ */
+ if (bs->first_block) {
+ /*
+ * Also include the whole-file header which says whether E8
+ * call translation is on. We never turn it on, because we
+ * don't support it (since in this use case it doesn't seem
+ * likely to be particularly useful anyway).
+ *
+ * It looks like a layer violation to put the output of this
+ * whole-file header inside the per-block function like this,
+ * but in fact it has to be done here because the first reset
+ * table entry really is supposed to point to the _start_ of
+ * the whole-file header.
+ */
+ lzx_addsym(&header[0], LST_RAWBITS_BASE + 1, 0);
+ bs->first_block = FALSE;
+ }
+ lzx_addsym(&header[0], LST_RAWBITS_BASE + 3, blocktype);
+ lzx_addsym(&header[0], LST_RAWBITS_BASE + 24, blocksize);
+
+ /*
+ * Ensure the bit stream starts off aligned, and output an initial
+ * reset-table entry.
+ */
+ lzx_realign(bs);
+ lzx_write_reset_table_entry(bs);
+
+ /*
+ * Write out all of our symbol sequences in order: all of those
+ * assorted header fragments, then the main LZ77 token sequence.
+ */
+ for (i = 0; i < (int)lenof(header); i++)
+ lzx_huf_encode(header[i].syms, header[i].nsyms, hufs, bs);
+ lzx_huf_encode(syms, nsyms, hufs, bs);
+
+ /*
+ * Clean up.
+ */
+ for (i = 0; i < (int)lenof(header); i++)
+ sfree(header[i].syms);
+ for (i = 0; i < (int)lenof(hufs->hufs); i++) {
+ sfree(hufs->hufs[i].codes);
+ sfree(hufs->hufs[i].lengths);
+ }
+}
+
+struct LZXEncodedFile *lzx(const void *vdata, int totallen,
+ int realign_interval, int reset_interval)
+{
+ const unsigned char *data = (const unsigned char *)vdata;
+ LZXBitstream bs;
+ LZXHufs hufs;
+ int i;
+
+ bs.ef = snew(struct LZXEncodedFile);
+ bs.ef->data = NULL;
+ bs.ef->reset_byte_offsets = NULL;
+ bs.ef->data_len = bs.data_size = 0;
+ bs.ef->n_resets = bs.resets_size = 0;
+ bs.bitbuffer = 0;
+ bs.nbits = 0;
+
+ for (i = 0; i < (int)lenof(hufs.hufs); i++)
+ hufs.hufs[i].oldlengths = NULL;
+
+ while (totallen > 0) {
+ int thislen =
+ totallen < reset_interval ? totallen : reset_interval;
+ LZXBuffer buf;
+
+ lzx_buffer_init(&buf);
+
+ lzx_lz77(&buf, data, thislen, realign_interval);
+ data += thislen;
+ totallen -= thislen;
+
+ /*
+ * Block boundaries are chosen completely trivially: since we
+ * have to terminate a block every time we reach the (fairly
+ * short) reset interval in any case, it doesn't hurt us much
+ * to just fix the assumption that every (reset_interval)
+ * bytes of the input turn into exactly one block, i.e. the
+ * whole of buf.syms that we just constructed is output in one
+ * go. We _could_ try improving on this by clever
+ * block-boundary heuristics, but I don't really think it's
+ * worth it.
+ */
+ bs.first_block = TRUE; /* reset every time we reset the LZ state */
+ lzx_encode_block(buf.syms, buf.nsyms, thislen, &hufs, &bs);
+
+ sfree(buf.syms);
+ }
+
+ for (i = 0; i < (int)lenof(hufs.hufs); i++)
+ sfree(hufs.hufs[i].oldlengths);
+
+ /* Realign to a 16-bit boundary, i.e. flush out any last few
+ * unwritten bits. */
+ lzx_realign(&bs);
+
+ return bs.ef;
+}
+
+#ifdef LZX_TEST
+/*
+gcc -g -O0 -DLZX_TEST -o lzxtest -Icharset lzx.c lz77.c huffman.c malloc.c
+*/
+#include <err.h>
+int main(int argc, char **argv)
+{
+ FILE *fp;
+ long insize;
+ unsigned char *inbuf;
+ struct LZXEncodedFile *ef;
+
+ if (argc != 3)
+ errx(1, "expected infile and outfile arguments");
+
+ fp = fopen(argv[1], "rb");
+ if (!fp)
+ err(1, "%s: open", argv[1]);
+ fseek(fp, 0, SEEK_END);
+ insize = ftell(fp);
+ rewind(fp);
+ inbuf = snewn(insize, unsigned char);
+ fread(inbuf, 1, insize, fp);
+ fclose(fp);
+
+ ef = lzx(inbuf, insize, 0x8000, 0x10000);
+
+ fp = fopen(argv[2], "wb");
+ if (!fp)
+ err(1, "%s: open", argv[2]);
+ fwrite(ef->data, 1, ef->data_len, fp);
+ fclose(fp);
+
+ sfree(ef->data);
+ sfree(ef->reset_byte_offsets);
+ sfree(ef);
+ sfree(inbuf);
+
+ return 0;
+}
+
+wchar_t *ustrdup(wchar_t const *s) { assert(0 && "should be unused"); }
+void fatalerr_nomemory(void) { errx(1, "out of memory"); }
+#endif
diff --git a/lzx.h b/lzx.h
new file mode 100644
index 0000000..ff78f5d
--- /dev/null
+++ b/lzx.h
@@ -0,0 +1,24 @@
+/*
+ * lzx.h: LZX encoder for Windows CHM files.
+ */
+
+struct LZXEncodedFile {
+ unsigned char *data;
+ size_t data_len;
+
+ size_t *reset_byte_offsets;
+ size_t n_resets;
+};
+
+/*
+ * Produce an LZX-compressed encoding of an input data block. Return
+ * it, along with a list of byte offsets where the data stream is
+ * realigned to a 16-bit boundary because one of realign_interval and
+ * reset_interval has run out.
+ *
+ * The output structure and its fields 'data' and 'reset_byte_offsets'
+ * are all dynamically allocated, and need freeing by the receiver
+ * when finished with.
+ */
+struct LZXEncodedFile *lzx(const void *data, int len,
+ int realign_interval, int reset_interval);
diff --git a/main.c b/main.c
index 405e6ef..535f024 100644
--- a/main.c
+++ b/main.c
@@ -35,6 +35,7 @@ static const struct backend {
{"info", info_backend, info_config_filename, 0x0010, 0},
{"ps", ps_backend, ps_config_filename, 0x0020, 0x0001},
{"pdf", pdf_backend, pdf_config_filename, 0x0040, 0x0001},
+ {"chm", chm_backend, chm_config_filename, 0x0080, 0},
};
int main(int argc, char **argv) {
diff --git a/winchm.c b/winchm.c
new file mode 100644
index 0000000..cb21715
--- /dev/null
+++ b/winchm.c
@@ -0,0 +1,1436 @@
+/*
+ * winchm.c: direct output of .CHM files.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "halibut.h"
+#include "tree234.h"
+#include "lzx.h"
+
+#define PUT_32BIT_LSB_FIRST(cp, value) do { \
+ ((unsigned char *)cp)[0] = 0xFF & (value); \
+ ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); \
+ ((unsigned char *)cp)[2] = 0xFF & ((value) >> 16); \
+ ((unsigned char *)cp)[3] = 0xFF & ((value) >> 24); } while (0)
+
+#define PUT_32BIT_MSB_FIRST(cp, value) do { \
+ ((unsigned char *)cp)[3] = 0xFF & (value); \
+ ((unsigned char *)cp)[2] = 0xFF & ((value) >> 8); \
+ ((unsigned char *)cp)[1] = 0xFF & ((value) >> 16); \
+ ((unsigned char *)cp)[0] = 0xFF & ((value) >> 24); } while (0)
+
+#define PUT_16BIT_LSB_FIRST(cp, value) do { \
+ ((unsigned char *)cp)[0] = 0xFF & (value); \
+ ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); } while (0)
+
+#define RDADD_32BIT_LSB_FIRST(rs, value) do { \
+ unsigned char out[4]; \
+ PUT_32BIT_LSB_FIRST(out, value); \
+ rdaddsn(rs, (void *)out, sizeof(out)); \
+ } while (0)
+
+#define RDADD_32BIT_MSB_FIRST(rs, value) do { \
+ unsigned char out[4]; \
+ PUT_32BIT_MSB_FIRST(out, value); \
+ rdaddsn(rs, (void *)out, sizeof(out)); \
+ } while (0)
+
+#define RDADD_16BIT_LSB_FIRST(rs, value) do { \
+ unsigned char out[2]; \
+ PUT_16BIT_LSB_FIRST(out, value); \
+ rdaddsn(rs, (void *)out, sizeof(out)); \
+ } while (0)
+
+static void guid(rdstringc *rs, unsigned long w0,
+ unsigned short h0, unsigned short h1,
+ unsigned char b0, unsigned char b1,
+ unsigned char b2, unsigned char b3,
+ unsigned char b4, unsigned char b5,
+ unsigned char b6, unsigned char b7)
+{
+ RDADD_32BIT_LSB_FIRST(rs, w0);
+ RDADD_16BIT_LSB_FIRST(rs, h0);
+ RDADD_16BIT_LSB_FIRST(rs, h1);
+ rdaddc(rs, b0);
+ rdaddc(rs, b1);
+ rdaddc(rs, b2);
+ rdaddc(rs, b3);
+ rdaddc(rs, b4);
+ rdaddc(rs, b5);
+ rdaddc(rs, b6);
+ rdaddc(rs, b7);
+}
+
+static void itsf(rdstringc *rs,
+ const rdstringc *directory, const rdstringc *content0)
+{
+ int headersize_field;
+ int headersect_off, headersect_off_field, headersect_size_field;
+ int directory_off_field, content0_off_field, filesize_field;
+
+ /* Main file header */
+ rdaddsc(rs, "ITSF"); /* main file magic number */
+ RDADD_32BIT_LSB_FIRST(rs, 3); /* file format version */
+ headersize_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* size of main header; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 1); /* unknown, always observed to be 1 */
+ RDADD_32BIT_MSB_FIRST(rs, 0x12345678); /* timestamp (FIXME) */
+ RDADD_32BIT_LSB_FIRST(rs, 0x809); /* language code (FIXME: configurable) */
+ guid(rs,0x7C01FD10,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
+ guid(rs,0x7C01FD11,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
+ headersect_off_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* header section offset; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ headersect_size_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* header section size; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ directory_off_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* directory offset; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ RDADD_32BIT_LSB_FIRST(rs, directory->pos);
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ content0_off_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* content section 0 offset; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ PUT_32BIT_LSB_FIRST(rs->text + headersize_field, rs->pos);
+
+ /* 'Header section' */
+ headersect_off = rs->pos;
+ PUT_32BIT_LSB_FIRST(rs->text + headersect_off_field, rs->pos);
+ RDADD_32BIT_LSB_FIRST(rs, 0x1FE); /* magic number */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
+ filesize_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* file size; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
+ PUT_32BIT_LSB_FIRST(rs->text + headersect_size_field,
+ rs->pos - headersect_off);
+
+ PUT_32BIT_LSB_FIRST(rs->text + directory_off_field, rs->pos);
+ rdaddsn(rs, directory->text, directory->pos);
+
+ PUT_32BIT_LSB_FIRST(rs->text + content0_off_field, rs->pos);
+ rdaddsn(rs, content0->text, content0->pos);
+
+ PUT_32BIT_LSB_FIRST(rs->text + filesize_field, rs->pos);
+}
+
+static void encint(rdstringc *rs, unsigned val)
+{
+ int i, j, topbit;
+
+ /* ENCINT in the CHM format is big-endian, but it's easier to
+ * write little-endian and byte-reverse afterwards. */
+
+ i = rs->pos; /* first byte index */
+
+ topbit = 0;
+ while (val >= 0x80) {
+ rdaddc(rs, (val & 0x7F) | topbit);
+ val >>= 7;
+ topbit = 0x80;
+ }
+
+ j = rs->pos; /* last byte index */
+ rdaddc(rs, val | topbit);
+
+ while (j > i) {
+ char tmp = rs->text[i];
+ rs->text[i] = rs->text[j];
+ rs->text[j] = tmp;
+ i++;
+ j--;
+ }
+}
+
+struct chm_directory_entry {
+ char *filename; /* free this when done */
+ int which_content_section;
+ int offset_in_content_section;
+ int file_size;
+};
+
+static int strcmp_chm(const char *a, const char *b)
+{
+ /*
+ * CHM directory sorting criterion appears to be case-insensitive,
+ * and based on sorting the _lowercased_ text. (Hence, in
+ * particular, '_' sorts before any alphabetic character.)
+ */
+ while (*a || *b) {
+ char ac = *a, bc = *b;
+ if (ac >= 'A' && ac <= 'Z') ac += 'a'-'A';
+ if (bc >= 'A' && bc <= 'Z') bc += 'a'-'A';
+ if (ac != bc)
+ return ac < bc ? -1 : +1;
+ a++;
+ b++;
+ }
+
+ return 0;
+}
+
+int chm_directory_entry_cmp(void *av, void *bv)
+{
+ const struct chm_directory_entry
+ *a = (const struct chm_directory_entry *)av,
+ *b = (const struct chm_directory_entry *)bv;
+ return strcmp_chm(a->filename, b->filename);
+}
+
+int chm_directory_entry_find(void *av, void *bv)
+{
+ const char *a = (const char *)av;
+ const struct chm_directory_entry
+ *b = (const struct chm_directory_entry *)bv;
+ return strcmp_chm(a, b->filename);
+}
+
+struct chm_index_entry {
+ char *first_filename; /* shared pointer with some chm_directory_entry */
+ int chunk_index;
+};
+
+static void directory(rdstringc *rs, tree234 *files)
+{
+ const int chunksize = 4096;
+ const int encoded_density = 2;
+ const int useful_density = 1 + (1 << encoded_density);
+ int dirhdr_size_field, dirhdr_size2_field, dirhdr_depth_field;
+ int dirhdr_root_field, dirhdr_tail_field, dirhdr_nchunks_field;
+ int curr_chunk, depth, filename_index;
+ tree234 *index;
+
+ assert(rs->pos == 0);
+ assert(count234(files) > 0);
+
+ /* Directory header */
+ rdaddsc(rs, "ITSP"); /* directory header magic number */
+ RDADD_32BIT_LSB_FIRST(rs, 1); /* format version */
+ dirhdr_size_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 10); /* unknown; observed to be 10 */
+ RDADD_32BIT_LSB_FIRST(rs, chunksize);
+ RDADD_32BIT_LSB_FIRST(rs, encoded_density);
+ dirhdr_depth_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* B-tree depth; fill in later */
+ dirhdr_root_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* root chunk index; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* head of PMGL chunk list; always 0 here */
+ dirhdr_tail_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* tail of PMGL chunk list; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+ dirhdr_nchunks_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* total number of chunks; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0x409); /* language (FIXME) */
+ guid(rs,0x5D02926A,0x212E,0x11D0,0x9D,0xF9,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
+ dirhdr_size2_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+ RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+ RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size_field, rs->pos);
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size2_field, rs->pos);
+
+ index = newtree234(NULL);
+ curr_chunk = 0;
+ depth = 1;
+ /* Write out lowest-level PMGL chunks full of actual directory entries */
+ filename_index = 0;
+ while (filename_index < count234(files)) {
+ rdstringc chunk = {0, 0, NULL};
+ rdstringc reversed_quickref = {0, 0, NULL};
+ int chunk_endlen_field, chunk_nextptr_field;
+ int n_entries, offset_of_first_entry;
+ int saved_pos, saved_rq_pos, i;
+
+ rdaddsc(&chunk, "PMGL");
+ chunk_endlen_field = chunk.pos;
+ RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */
+ RDADD_32BIT_LSB_FIRST(&chunk, 0); /* unknown; observed to be 0 */
+ if (curr_chunk == 0) {
+ RDADD_32BIT_LSB_FIRST(&chunk, 0xFFFFFFFF); /* 'null' prev ptr */
+ } else {
+ RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk - 1);
+ }
+ chunk_nextptr_field = chunk.pos; /* may overwrite 'next' ptr later */
+ RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk + 1);
+
+ /* Enter this chunk in our index for the next level of the
+ * B-tree (if we end up needing one). */
+ {
+ struct chm_directory_entry *ent = (struct chm_directory_entry *)
+ index234(files, filename_index);
+ struct chm_index_entry *ient = snew(struct chm_index_entry);
+ assert(ent);
+ ient->first_filename = ent->filename;
+ ient->chunk_index = curr_chunk;
+ addpos234(index, ient, count234(index));
+ }
+
+ /* Start accumulating the quick-reference index at the end of this
+ * chunk. We'll build it up backwards, and reverse it halfwordwise
+ * when we copy it into the end of our output chunk. */
+ RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0);
+ offset_of_first_entry = chunk.pos;
+
+ n_entries = 0;
+ /* Write filenames into this chunk until it's full, or until
+ * we run out of filenames. */
+ while (1) {
+ struct chm_directory_entry *ent = (struct chm_directory_entry *)
+ index234(files, filename_index++);
+ if (!ent) {
+ /* Run out of filenames, so this is the last PMGL chunk.
+ * Reset its 'next' pointer to the 'null' -1 value. */
+ PUT_32BIT_LSB_FIRST(chunk.text + chunk_nextptr_field,
+ 0xFFFFFFFFU);
+ /* And point the directory header's tail pointer at
+ * this chunk. */
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_tail_field, curr_chunk);
+ break;
+ }
+
+ /* Save the sizes of stuff in this chunk, so we can put
+ * them back if this entry turns out to overflow. */
+ saved_pos = chunk.pos;
+ saved_rq_pos = reversed_quickref.pos;
+
+ if (n_entries > 0 && n_entries % useful_density == 0) {
+ /* Add a quick-reference index pointer. */
+ RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos -
+ offset_of_first_entry);
+ }
+
+ encint(&chunk, strlen(ent->filename));
+ rdaddsc(&chunk, ent->filename);
+ encint(&chunk, ent->which_content_section);
+ encint(&chunk, ent->offset_in_content_section);
+ encint(&chunk, ent->file_size);
+ if (chunk.pos + reversed_quickref.pos > chunksize) {
+ filename_index--;
+ chunk.pos = saved_pos;
+ reversed_quickref.pos = saved_rq_pos;
+ break;
+ }
+
+ /* If we didn't overflow, then commit to this entry and
+ * loop round for the next one. */
+ n_entries++;
+ }
+
+ /* Finalise the chunk. */
+ assert(chunk.pos + reversed_quickref.pos <= chunksize);
+ PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
+ chunksize - chunk.pos);
+ PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
+ while (chunk.pos + reversed_quickref.pos < chunksize)
+ rdaddc(&chunk, 0); /* zero-pad */
+ for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
+ rdaddsn(&chunk, reversed_quickref.text+i, 2);
+
+ assert(chunk.pos == chunksize);
+ rdaddsn(rs, chunk.text, chunk.pos);
+ sfree(chunk.text);
+ sfree(reversed_quickref.text);
+ curr_chunk++;
+ }
+
+ /* Write out as many layers of PMGI index chunks as it takes to
+ * reduce the total number of chunks at the current level to 1. */
+ while (count234(index) > 1) {
+ tree234 *prev_index;
+ int index_index = 0;
+
+ prev_index = index;
+ index = newtree234(NULL);
+ depth++;
+
+ while (index_index < count234(prev_index)) {
+ rdstringc chunk = {0, 0, NULL};
+ rdstringc reversed_quickref = {0, 0, NULL};
+ int chunk_endlen_field;
+ int n_entries, offset_of_first_entry;
+ int saved_pos, saved_rq_pos, i;
+
+ rdaddsc(&chunk, "PMGI");
+ chunk_endlen_field = chunk.pos;
+ RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */
+
+ /* Enter this chunk in our index for the next level of the
+ * B-tree (if we end up needing one). */
+ {
+ struct chm_index_entry *ent = (struct chm_index_entry *)
+ index234(prev_index, index_index);
+ struct chm_index_entry *ient = snew(struct chm_index_entry);
+ assert(ent);
+ ient->first_filename = ent->first_filename;
+ ient->chunk_index = curr_chunk;
+ addpos234(index, ient, count234(index));
+ }
+
+ /* Start accumulating the quick-reference index at the end
+ * of this chunk, as above. */
+ RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0);
+ offset_of_first_entry = chunk.pos;
+
+ n_entries = 0;
+ /* Write index entries into this chunk until it's full, or
+ * until we run out of chunks at the previous level. */
+ while (1) {
+ struct chm_index_entry *ent = (struct chm_index_entry *)
+ index234(prev_index, index_index++);
+ if (!ent)
+ break;
+
+ /* Save the sizes of stuff in this chunk, so we can put
+ * them back if this entry turns out to overflow. */
+ saved_pos = chunk.pos;
+ saved_rq_pos = reversed_quickref.pos;
+
+ if (n_entries > 0 && n_entries % useful_density == 0) {
+ /* Add a quick-reference index pointer. */
+ RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos -
+ offset_of_first_entry);
+ }
+
+ encint(&chunk, strlen(ent->first_filename));
+ rdaddsc(&chunk, ent->first_filename);
+ encint(&chunk, ent->chunk_index);
+ if (chunk.pos + reversed_quickref.pos > chunksize) {
+ index_index--;
+ chunk.pos = saved_pos;
+ reversed_quickref.pos = saved_rq_pos;
+ break;
+ }
+
+ /* If we didn't overflow, then commit to this entry and
+ * loop round for the next one. */
+ n_entries++;
+ }
+
+ /* Finalise the chunk. */
+ assert(chunk.pos + reversed_quickref.pos <= chunksize);
+ PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
+ chunksize - chunk.pos);
+ PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
+ while (chunk.pos + reversed_quickref.pos < chunksize)
+ rdaddc(&chunk, 0); /* zero-pad */
+ for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
+ rdaddsn(&chunk, reversed_quickref.text+i, 2);
+
+ assert(chunk.pos == chunksize);
+ rdaddsn(rs, chunk.text, chunk.pos);
+ sfree(chunk.text);
+ sfree(reversed_quickref.text);
+ curr_chunk++;
+ }
+
+ /*
+ * Now free the old index.
+ */
+ while (1) {
+ struct chm_index_entry *ent = (struct chm_index_entry *)
+ delpos234(prev_index, 0);
+ if (!ent)
+ break;
+ sfree(ent);
+ }
+ freetree234(prev_index);
+ }
+
+ /*
+ * Finished! We've reduced to a single chunk. Free the remaining
+ * index (which must have size 1).
+ */
+ assert(count234(index) == 1);
+ sfree(delpos234(index, 0));
+ freetree234(index);
+
+ /* Fill in the deferred fields in the main header. */
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_depth_field, depth);
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_root_field, curr_chunk-1);
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_nchunks_field, curr_chunk);
+}
+
+static int sys_start(rdstringc *rs, int code)
+{
+ int toret = rs->pos;
+ RDADD_16BIT_LSB_FIRST(rs, code);
+ RDADD_16BIT_LSB_FIRST(rs, 0); /* length; overwrite later */
+ return toret;
+}
+static void sys_end(rdstringc *rs, int recstart)
+{
+ PUT_16BIT_LSB_FIRST(rs->text + recstart+2, rs->pos - (recstart+4));
+}
+
+struct chm_window {
+ char *name;
+ char *title;
+ char *contentsfile;
+ char *indexfile;
+ char *rootfile;
+ int navpaneflags;
+ int toolbarflags;
+};
+
+struct chm {
+ tree234 *files;
+ tree234 *windows;
+ tree234 *stringtab;
+ rdstringc content0; /* outer uncompressed container */
+ rdstringc content1; /* compressed subfile */
+ rdstringc outfile;
+ rdstringc stringsfile;
+ char *title, *contents_filename, *index_filename, *default_topic;
+ char *default_window;
+ struct chm_section *rootsecthead, *rootsecttail;
+ struct chm_section *allsecthead, *allsecttail;
+};
+
+struct chm_section {
+ /* Logical links within the section tree structure */
+ struct chm_section *firstchild, *lastchild, *nextsibling, *parent;
+ /* Link all chm_sections together into one big list, in a
+ * topological order (i.e. every section comes after its
+ * parent) */
+ struct chm_section *next;
+
+ char *title, *url;
+ int tocidx_offset_1, tocidx_offset_2;
+ int topic_index, urltbl_offset, urlstr_offset;
+};
+
+struct chm_stringtab_entry {
+ struct chm *chm;
+ int strtab_offset;
+};
+
+static int chm_stringtab_cmp(void *av, void *bv)
+{
+ const struct chm_stringtab_entry
+ *a = (const struct chm_stringtab_entry *)av,
+ *b = (const struct chm_stringtab_entry *)bv;
+ return strcmp(a->chm->stringsfile.text + a->strtab_offset,
+ b->chm->stringsfile.text + b->strtab_offset);
+}
+
+static int chm_stringtab_find(void *av, void *bv)
+{
+ const char *a = (const char *)av;
+ const struct chm_stringtab_entry
+ *b = (const struct chm_stringtab_entry *)bv;
+ return strcmp(a, b->chm->stringsfile.text + b->strtab_offset);
+}
+
+int chm_intern_string(struct chm *chm, const char *string)
+{
+ struct chm_stringtab_entry *ent;
+ int size;
+
+ if (!string)
+ return 0;
+
+ if ((ent = (struct chm_stringtab_entry *)find234(
+ chm->stringtab, (void *)string, chm_stringtab_find)) == NULL) {
+ ent = snew(struct chm_stringtab_entry);
+ ent->chm = chm;
+
+ /* Pad to ensure the string doesn't cross a page boundary. */
+ size = strlen(string) + 1; /* include the NUL terminator */
+ assert(size < 0x1000); /* avoid really serious trouble */
+ while ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12)
+ rdaddc(&chm->stringsfile, 0);
+
+ ent->strtab_offset = chm->stringsfile.pos;
+ rdaddsc(&chm->stringsfile, string);
+ rdaddc(&chm->stringsfile, '\0');
+ add234(chm->stringtab, ent);
+ }
+ return ent->strtab_offset;
+}
+
+struct chm *chm_new(void)
+{
+ struct chm *chm = snew(struct chm);
+ chm->files = newtree234(chm_directory_entry_cmp);
+ chm->windows = newtree234(NULL);
+ chm->stringtab = newtree234(chm_stringtab_cmp);
+ chm->content0 = empty_rdstringc;
+ chm->content1 = empty_rdstringc;
+ chm->outfile = empty_rdstringc;
+ chm->stringsfile = empty_rdstringc;
+ chm->title = NULL;
+ chm->contents_filename = NULL;
+ chm->index_filename = NULL;
+ chm->default_topic = NULL;
+ chm->default_window = NULL;
+ chm->rootsecthead = chm->rootsecttail = NULL;
+ chm->allsecthead = chm->allsecttail = NULL;
+ chm_intern_string(chm, ""); /* preinitialise the strings table */
+ return chm;
+}
+
+void chm_free(struct chm *chm)
+{
+ struct chm_directory_entry *ent;
+ struct chm_window *win;
+ struct chm_stringtab_entry *str;
+ struct chm_section *sect;
+
+ while ((ent = delpos234(chm->files, 0)) != NULL) {
+ sfree(ent->filename);
+ sfree(ent);
+ }
+ freetree234(chm->files);
+
+ while ((win = delpos234(chm->windows, 0)) != NULL) {
+ sfree(win->name);
+ sfree(win->title);
+ sfree(win->contentsfile);
+ sfree(win->indexfile);
+ sfree(win->rootfile);
+ sfree(win);
+ }
+ freetree234(chm->windows);
+
+ while ((str = delpos234(chm->stringtab, 0)) != NULL) {
+ sfree(str);
+ }
+ freetree234(chm->stringtab);
+
+ for (sect = chm->allsecthead; sect ;) {
+ struct chm_section *tmp = sect->next;
+ sfree(sect->title);
+ sfree(sect->url);
+ sfree(sect);
+ sect = tmp;
+ }
+
+ sfree(chm->content0.text);
+ sfree(chm->content1.text);
+ sfree(chm->outfile.text);
+ sfree(chm->stringsfile.text);
+
+ sfree(chm->title);
+ sfree(chm->contents_filename);
+ sfree(chm->index_filename);
+ sfree(chm->default_topic);
+ sfree(chm->default_window);
+
+ sfree(chm);
+}
+
+static void chm_add_file_internal(struct chm *chm, const char *name,
+ const char *data, int len,
+ rdstringc *sect, int which_sect)
+{
+ struct chm_directory_entry *ent = snew(struct chm_directory_entry);
+ ent->filename = dupstr(name);
+ ent->which_content_section = which_sect;
+ ent->offset_in_content_section = sect->pos;
+ ent->file_size = len;
+ add234(chm->files, ent);
+ rdaddsn(sect, data, len);
+}
+
+static struct chm_directory_entry *chm_find_file(
+ struct chm *chm, const char *name)
+{
+ return find234(chm->files, (void *)name, chm_directory_entry_find);
+}
+
+static char *add_leading_slash(const char *str)
+{
+ char *toret = snewn(2 + strlen(str), char);
+ toret[0] = '/';
+ strcpy(toret+1, str);
+ return toret;
+}
+
+void chm_add_file(struct chm *chm, const char *name, const char *data, int len)
+{
+ char *name_with_slash = add_leading_slash(name);
+ chm_add_file_internal(chm, name_with_slash, data, len, &chm->content1, 1);
+ sfree(name_with_slash);
+}
+
+void chm_title(struct chm *chm, const char *title)
+{
+ chm->title = dupstr(title);
+}
+
+void chm_contents_filename(struct chm *chm, const char *name)
+{
+ chm->contents_filename = dupstr(name);
+}
+
+void chm_index_filename(struct chm *chm, const char *name)
+{
+ chm->index_filename = dupstr(name);
+}
+
+void chm_default_topic(struct chm *chm, const char *name)
+{
+ chm->default_topic = dupstr(name);
+}
+
+void chm_default_window(struct chm *chm, const char *name)
+{
+ chm->default_window = dupstr(name);
+}
+
+void chm_add_window(struct chm *chm, const char *winname, const char *title,
+ const char *contentsfile, const char *indexfile,
+ const char *rootfile, int navpaneflags, int toolbarflags)
+{
+ struct chm_window *win = snew(struct chm_window);
+ win->name = dupstr(winname);
+ win->title = dupstr(title);
+ win->contentsfile = contentsfile ? dupstr(contentsfile) : NULL;
+ win->indexfile = indexfile ? dupstr(indexfile) : NULL;
+ win->rootfile = dupstr(rootfile);
+ win->navpaneflags = navpaneflags;
+ win->toolbarflags = toolbarflags;
+ addpos234(chm->windows, win, count234(chm->windows));
+}
+
+struct chm_section *chm_add_section(struct chm *chm,
+ struct chm_section *parent,
+ const char *title, const char *url)
+{
+ struct chm_section *sect = snew(struct chm_section);
+ sect->title = dupstr(title);
+ sect->url = dupstr(url);
+ sect->firstchild = sect->lastchild = sect->nextsibling = sect->next = NULL;
+ if (parent) {
+ sect->parent = parent;
+ if (parent->lastchild) {
+ parent->lastchild->nextsibling = sect;
+ } else {
+ parent->firstchild = sect;
+ }
+ parent->lastchild = sect;
+ } else {
+ sect->parent = NULL;
+ if (chm->rootsecttail) {
+ chm->rootsecttail->nextsibling = sect;
+ } else {
+ chm->rootsecthead = sect;
+ }
+ chm->rootsecttail = sect;
+ }
+ if (chm->allsecttail) {
+ chm->allsecttail->next = sect;
+ } else {
+ chm->allsecthead = sect;
+ }
+ chm->allsecttail = sect;
+ return sect;
+}
+
+struct chm_urltbl_entry {
+ /*
+ * Records of #URLTBL, before their order is finalised.
+ *
+ * The first word of this record is listed as 'unknown, perhaps
+ * some kind of unique ID' in chmspec. But my observation in HTML
+ * Help Workshop's output is that it's actually a hash of the
+ * target URL, and the file is sorted by them. chm_url_hash()
+ * below implements the hash algorithm.
+ */
+ unsigned long hash;
+ int topic_index;
+ int urlstr_pos;
+ int topics_offset_to_update;
+};
+
+int chm_urltbl_entry_cmp(void *av, void *bv)
+{
+ const struct chm_urltbl_entry
+ *a = (const struct chm_urltbl_entry *)av,
+ *b = (const struct chm_urltbl_entry *)bv;
+ if (a->hash < b->hash) return -1;
+ if (a->hash > b->hash) return +1;
+ if (a->topic_index < b->topic_index) return -1;
+ if (a->topic_index > b->topic_index) return -1;
+ return 0;
+}
+
+static unsigned long chm_url_hash(const char *str)
+{
+ const char *p;
+ unsigned long hash;
+
+ hash = 0;
+ for (p = str; *p; p++) {
+ /*
+ * Multiply `hash' by 43.
+ */
+ {
+ unsigned long bottom, top;
+ bottom = (hash & 0xFFFFUL) * 43;
+ top = ((hash >> 16) & 0xFFFFUL) * 43;
+ top += (bottom >> 16);
+ bottom &= 0xFFFFUL;
+ top &= 0xFFFFUL;
+ hash = (top << 16) | bottom;
+ }
+
+ /*
+ * Add the mapping value for this byte to `hash'.
+ */
+ {
+ int c = (signed char)*p;
+
+ /*
+ * Translation rule determined by getting hhc.exe to hash
+ * a lot of strings and analysing the results. I was able
+ * to confirm this mapping rule for all byte values except
+ * for NUL, CR, LF, ^Z and backslash: the first four of
+ * those I couldn't find any way to get hhc to insert into
+ * a URL, and the last one is automatically translated
+ * into '/', presumably for reasons of Windows vs URI path
+ * syntax normalisation.
+ */
+ int val = (c == '/' ? 0x2c : c <= 'Z' ? c-0x30 : c-0x50);
+
+ if (val > 0 && hash > (0xFFFFFFFFUL - val)) {
+ hash -= (0xFFFFFFFFUL - val) + 1;
+ } else if (val < 0 && hash < (unsigned long)-val) {
+ hash += (0xFFFFFFFFUL + val) + 1;
+ } else
+ hash += val;
+ }
+ }
+
+ /*
+ * Special case: an output hash of 0 is turned into 1, which I
+ * conjecture is so that in some context or other 0 can be
+ * reserved to mean something like 'null' or 'no hash value
+ * available'.
+ */
+ if (hash == 0)
+ hash = 1;
+
+ return hash;
+}
+
+const char *chm_build(struct chm *chm, int *outlen)
+{
+ rdstringc dir = {0, 0, NULL};
+ rdstringc sysfile = {0, 0, NULL};
+ struct LZXEncodedFile *ef;
+ int rec;
+
+ chm_add_file_internal(chm, "/", "", 0, &chm->content0, 0);
+
+ RDADD_32BIT_LSB_FIRST(&sysfile, 3); /* #SYSTEM file version */
+
+ rec = sys_start(&sysfile, 9); /* identify CHM-producing tool */
+ rdaddsc(&sysfile, "Halibut, ");
+ rdaddsc(&sysfile, version);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+
+ rec = sys_start(&sysfile, 12); /* number of 'information types' */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0);
+ sys_end(&sysfile, rec);
+ rec = sys_start(&sysfile, 15); /* checksum of 'information types' */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0);
+ sys_end(&sysfile, rec);
+ /* actual section of 'information types', whatever those might be */
+ chm_add_file_internal(chm, "/#ITBITS", "", 0, &chm->content0, 0);
+
+ if (chm->title) {
+ rec = sys_start(&sysfile, 3); /* document title */
+ rdaddsc(&sysfile, chm->title);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+ }
+
+ if (chm->default_topic) {
+ rec = sys_start(&sysfile, 2);
+ rdaddsc(&sysfile, chm->default_topic);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+ }
+
+ if (chm->contents_filename) {
+ rec = sys_start(&sysfile, 0);
+ rdaddsc(&sysfile, chm->contents_filename);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+ }
+
+ if (chm->index_filename) {
+ rec = sys_start(&sysfile, 1);
+ rdaddsc(&sysfile, chm->index_filename);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+ }
+
+ if (chm->default_window) {
+ rec = sys_start(&sysfile, 5);
+ rdaddsc(&sysfile, chm->default_window);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+ }
+
+ rec = sys_start(&sysfile, 4);
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0x809); /* language again (FIXME) */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* DBCS: off */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* full-text search: on */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no KLinks (whatever they are) */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no ALinks (whatever they are) */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0x11223344); /* timestamp LSW (FIXME) */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0x55667788); /* timestamp MSW (FIXME) */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+ sys_end(&sysfile, rec);
+
+ {
+ rdstringc winfile = {0, 0, NULL};
+ int i, j, s;
+ struct chm_window *win;
+
+ RDADD_32BIT_LSB_FIRST(&winfile, count234(chm->windows));
+ RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of each entry */
+ for (i = 0;
+ (win = (struct chm_window *)index234(chm->windows, i)) != NULL;
+ i++) {
+ RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of entry */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* not Unicode */
+ s = chm_intern_string(chm, win->name);
+ RDADD_32BIT_LSB_FIRST(&winfile, s);
+ /* Bitmap of which fields are used: 2 means nav pane
+ * style, 0x200 means whether nav pane is initially
+ * closed, 0x400 means tab position */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0x502);
+ /* Nav pane styles:
+ * 0x40000 = user can control window size/pos
+ * 0x20000 = advanced full-text search UI
+ * 0x00400 = include a search tab
+ * 0x00100 = keep contents/index in sync with current topic
+ * 0x00020 = three-pane window */
+ RDADD_32BIT_LSB_FIRST(&winfile, win->navpaneflags);
+ s = chm_intern_string(chm, win->title);
+ RDADD_32BIT_LSB_FIRST(&winfile, s);
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window styles */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window ex styles */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.left */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.top */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.right */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.bottom */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window show state */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane width */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.left */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.top */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.right */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.bottom */
+ s = chm_intern_string(chm, win->contentsfile);
+ RDADD_32BIT_LSB_FIRST(&winfile, s);
+ s = chm_intern_string(chm, win->indexfile);
+ RDADD_32BIT_LSB_FIRST(&winfile, s);
+ s = chm_intern_string(chm, win->rootfile);
+ RDADD_32BIT_LSB_FIRST(&winfile, s);
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Home button target */
+ RDADD_32BIT_LSB_FIRST(&winfile, win->toolbarflags);
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane initially open */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* default nav pane = TOC */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane tabs at top */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* WM_NOTIFY id */
+ for (j = 0; j < 20; j++)
+ rdaddc(&winfile, 0); /* tab order block */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* history to keep */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button target */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button target */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button text */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button text */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.left */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.top */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.right */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.bottom */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no information types */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no custom tabs */
+ }
+ assert(winfile.pos == 8 + 196 * count234(chm->windows));
+ chm_add_file_internal(chm, "/#WINDOWS", winfile.text, winfile.pos,
+ &chm->content1, 1);
+ sfree(winfile.text);
+ }
+
+ {
+ struct chm_section *sect;
+ rdstringc tocidx = {0, 0, NULL};
+ rdstringc topics = {0, 0, NULL};
+ rdstringc urltbl = {0, 0, NULL};
+ rdstringc urlstr = {0, 0, NULL};
+ int i, index, s, n_tocidx_3;
+ struct chm_directory_entry *contentsfile = NULL, *indexfile = NULL;
+ tree234 *urltbl_pre;
+ struct chm_urltbl_entry *urltbl_entry;
+
+ urltbl_pre = newtree234(chm_urltbl_entry_cmp);
+
+ for (i = 0; i < 0x1000; i++)
+ rdaddc(&tocidx, 0);
+
+ /* Write a header of one zero byte at the start of #URLSTR.
+ * chmspec says this doesn't always appear, and is unclear on
+ * what this is for, but I suspect it serves the same purpose
+ * as the zero byte at the start of #STRINGS, namely that it
+ * arranges that an absent string in the following records can
+ * be represented by an offset of zero which will
+ * automatically point to this byte and hence indicate the
+ * empty string. */
+ rdaddc(&urlstr, 0);
+
+ if (chm->contents_filename) {
+ char *withslash = add_leading_slash(chm->contents_filename);
+ contentsfile = chm_find_file(chm, withslash);
+ sfree(withslash);
+ assert(contentsfile);
+ }
+ if (chm->index_filename) {
+ char *withslash = add_leading_slash(chm->index_filename);
+ indexfile = chm_find_file(chm, withslash);
+ sfree(withslash);
+ assert(indexfile);
+ }
+
+ index = 0;
+
+ /* #TOCIDX header field pointing at start of type-1 records */
+ PUT_32BIT_LSB_FIRST(tocidx.text + 0, tocidx.pos);
+
+ /*
+ * First pass over the section structure, generating in
+ * parallel one of the multiple structure types in #TOCIDX and
+ * the sole record in all the other files.
+ */
+ for (sect = chm->allsecthead; sect; sect = sect->next) {
+ /* Size of the first kind of #TOCIDX record varies between
+ * leaf and internal nodes */
+ int tocidx_size_1 = (sect->firstchild ? 0x1c : 0x14);
+
+ /*
+ * Flags:
+ * - 8 means there's a local filename, which in _our_ CHM
+ * files there always is. If you unset this flag, you
+ * get a node in the contents treeview which doesn't
+ * open any page when clicked, and exists solely to
+ * contain children; in that situation the topic index
+ * field at position 0x08 in this record also stops
+ * being an index into #TOPICS and instead becomes an
+ * index into #STRINGS giving the node's title.
+ * - 4 apparently means the node should have the 'book'
+ * rather than 'page' icon in the TOC tree view in the
+ * help viewer
+ * - 1 means the node has a subtree in the tree view,
+ * which I take to mean (contrary to chmspec) that
+ * _this_ is the flag that means this node is a
+ * non-leaf node and hence has the two extra fields for
+ * first-child and whatever the other one means
+ */
+ unsigned tocidx_1_flags = (sect->firstchild ? 0x5 : 0) | 8;
+
+ int urlstr_size;
+
+ /* Pad to ensure the record isn't split between
+ * 0x1000-byte pages of the file */
+ while ((tocidx.pos ^ (tocidx.pos + tocidx_size_1 - 1)) >> 12)
+ RDADD_32BIT_LSB_FIRST(&tocidx, 0);
+
+ sect->topic_index = index++;
+
+ /* Write the type-1 record in #TOCIDX */
+ sect->tocidx_offset_1 = tocidx.pos;
+ RDADD_16BIT_LSB_FIRST(&tocidx, 0); /* unknown */
+ /* chmspec thinks this 16-bit field is 'unknown', but in
+ * my observations it appears to be the index of an entry
+ * in the #TOCIDX type-3 region. But I still don't know
+ * what those are really for. */
+ RDADD_16BIT_LSB_FIRST(&tocidx, sect->topic_index);
+ RDADD_32BIT_LSB_FIRST(&tocidx, tocidx_1_flags);
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->parent ?
+ sect->parent->tocidx_offset_1 : 0);
+ RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* 'next' ptr; fill in later */
+ if (sect->firstchild) {
+ RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* child; fill in later */
+ RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* unknown */
+ }
+ assert(tocidx.pos == sect->tocidx_offset_1 + tocidx_size_1);
+
+ /* Figure out our offset in #URLSTR, by ensuring we're not
+ * going to overrun a page boundary (as usual). For this
+ * we need our record length, which is two 32-bit fields
+ * plus a NUL-terminated copy of the target file name / URL. */
+ urlstr_size = 8 + strlen(sect->url) + 1;
+ assert(urlstr_size < 0x1000); /* must _fit_ in a page! */
+ while ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12)
+ rdaddc(&urlstr, 0);
+
+ /*
+ * Save everything we know so far about the #URLTBL record
+ * we'll need to write.
+ */
+ urltbl_entry = snew(struct chm_urltbl_entry);
+ urltbl_entry->hash = chm_url_hash(sect->url);
+ urltbl_entry->topic_index = sect->topic_index;
+ urltbl_entry->urlstr_pos = urlstr.pos;
+ add234(urltbl_pre, urltbl_entry);
+
+ /* Write the #TOPICS entry */
+ RDADD_32BIT_LSB_FIRST(&topics, sect->tocidx_offset_1);
+ s = chm_intern_string(chm, sect->title);
+ RDADD_32BIT_LSB_FIRST(&topics, s);
+ urltbl_entry->topics_offset_to_update = topics.pos;
+ RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
+ RDADD_16BIT_LSB_FIRST(&topics, 6); /* flag as 'in contents' */
+ RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
+
+ /*
+ * Write the #URLSTR entry.
+ */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
+ rdaddsc(&urlstr, sect->url); /* 'Local' */
+ rdaddc(&urlstr, '\0');
+ }
+
+ /*
+ * Add entries in #URLTBL, #URLSTR and #TOPICS for the
+ * contents and index files. They don't form part of the tree
+ * in #TOCIDX, though.
+ */
+ if (chm->contents_filename) {
+ urltbl_entry = snew(struct chm_urltbl_entry);
+ urltbl_entry->hash = chm_url_hash(chm->contents_filename);
+ urltbl_entry->topic_index = index;
+ urltbl_entry->urlstr_pos = urlstr.pos;
+ add234(urltbl_pre, urltbl_entry);
+
+ /* #TOPICS entry */
+ RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */
+ RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */
+ urltbl_entry->topics_offset_to_update = topics.pos;
+ RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
+ RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */
+ RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
+
+ /* #URLSTR entry */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
+ rdaddsc(&urlstr, chm->contents_filename); /* 'Local' */
+ rdaddc(&urlstr, '\0');
+
+ /* And add the entry in #SYSTEM that cites the hash of the
+ * #URLTBL entry. */
+ rec = sys_start(&sysfile, 11);
+ RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash);
+ sys_end(&sysfile, rec);
+
+ index++;
+ }
+ if (chm->index_filename) {
+ urltbl_entry = snew(struct chm_urltbl_entry);
+ urltbl_entry->hash = chm_url_hash(chm->index_filename);
+ urltbl_entry->topic_index = index;
+ urltbl_entry->urlstr_pos = urlstr.pos;
+ add234(urltbl_pre, urltbl_entry);
+
+ /* #TOPICS entry */
+ RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */
+ RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */
+ urltbl_entry->topics_offset_to_update = topics.pos;
+ RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
+ RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */
+ RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
+
+ /* #URLSTR entry */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
+ rdaddsc(&urlstr, chm->index_filename); /* 'Local' */
+ rdaddc(&urlstr, '\0');
+
+ /* And add the entry in #SYSTEM that cites the hash of the
+ * #URLTBL entry. */
+ rec = sys_start(&sysfile, 7);
+ RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash);
+ sys_end(&sysfile, rec);
+
+ index++;
+ }
+
+ /*
+ * Now we've got all our #URLTBL entries, so we can write out
+ * #URLTBL itself.
+ */
+ while ((urltbl_entry = delpos234(urltbl_pre, 0)) != NULL) {
+ /* Pad #URLTBL to the beginning of this section's entry.
+ * Entries are all 12 bytes long, but again there's some
+ * padding to ensure that they don't cross a page
+ * boundary. */
+ while ((urltbl.pos ^ (urltbl.pos + 12 - 1)) >> 12)
+ RDADD_32BIT_LSB_FIRST(&urltbl, 0);
+
+ /* Fill in the link from #TOPICS to this entry's offset */
+ PUT_32BIT_LSB_FIRST(topics.text +
+ urltbl_entry->topics_offset_to_update,
+ urltbl.pos);
+
+ /* Write the entry itself. */
+ RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->hash);
+ RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->topic_index);
+ RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->urlstr_pos);
+
+ sfree(urltbl_entry);
+ }
+ freetree234(urltbl_pre);
+
+ /*
+ * Small follow-up pass filling in forward-pointing offset
+ * fields in the #TOCIDX type-1 records which the previous
+ * pass didn't know yet.
+ */
+ for (sect = chm->allsecthead; sect; sect = sect->next) {
+ if (sect->nextsibling)
+ PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x10,
+ sect->nextsibling->tocidx_offset_1);
+ if (sect->firstchild)
+ PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x14,
+ sect->firstchild->tocidx_offset_1);
+ }
+
+ /* #TOCIDX header field pointing at start of type-2 records */
+ PUT_32BIT_LSB_FIRST(tocidx.text + 0xC, tocidx.pos);
+
+ /*
+ * Write the #TOCIDX type-2 records, which are just 4 bytes
+ * long and just contain another copy of each topic's index,
+ * but we need to have them there so that the type-3 records
+ * can refer to them by offset.
+ */
+ for (sect = chm->allsecthead; sect; sect = sect->next) {
+ sect->tocidx_offset_2 = tocidx.pos;
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
+ }
+
+ /* Align the current #TOCIDX offset to 16 bytes */
+ while (tocidx.pos & 0xF)
+ rdaddc(&tocidx, 0);
+
+ /* #TOCIDX header field pointing at start of type-3 records */
+ PUT_32BIT_LSB_FIRST(tocidx.text + 0x4, tocidx.pos);
+
+ /*
+ * Write the #TOCIDX type-3 records.
+ *
+ * In help files I've examined, there are fewer of these than
+ * you might expect; apparently not all sections rate one for
+ * some reason. For the moment I'm just writing out one for
+ * every section.
+ */
+ n_tocidx_3 = 0;
+ for (sect = chm->allsecthead; sect; sect = sect->next) {
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_1);
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index + 666); /* ?! */
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_2);
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
+ n_tocidx_3++;
+ }
+
+ /* #TOCIDX header field giving number of type-3 records */
+ PUT_32BIT_LSB_FIRST(tocidx.text + 0x8, n_tocidx_3);
+
+ chm_add_file_internal(chm, "/#TOCIDX", tocidx.text, tocidx.pos,
+ &chm->content1, 1);
+ chm_add_file_internal(chm, "/#TOPICS", topics.text, topics.pos,
+ &chm->content1, 1);
+ chm_add_file_internal(chm, "/#URLTBL", urltbl.text, urltbl.pos,
+ &chm->content1, 1);
+ chm_add_file_internal(chm, "/#URLSTR", urlstr.text, urlstr.pos,
+ &chm->content1, 1);
+
+ /*
+ * Write #IDXHDR (and its mirror in #SYSTEM), which we
+ * couldn't do until we knew how many topic nodes there were.
+ */
+ {
+ int idxhdr_start;
+
+ rec = sys_start(&sysfile, 13);
+ idxhdr_start = sysfile.pos;
+
+ rdaddsc(&sysfile, "T#SM"); /* #IDXHDR magic */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0x12345678); /* checksum? FIXME */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, index); /* number of topic nodes */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no image list */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* top-level node is
+ * not a folder */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no bg colour */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no fg colour */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no font spec */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window style */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no ex win style */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no frame name */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window name */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no information types */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no merge files */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+ while (sysfile.pos - idxhdr_start < 4096)
+ rdaddc(&sysfile, 0);
+
+ chm_add_file_internal(chm, "/#IDXHDR", sysfile.text + idxhdr_start,
+ sysfile.pos - idxhdr_start,
+ &chm->content1, 1);
+ sys_end(&sysfile, rec);
+ }
+
+ sfree(tocidx.text);
+ sfree(topics.text);
+ sfree(urltbl.text);
+ sfree(urlstr.text);
+ }
+
+ /* Missing from #SYSTEM: */
+ /* 10 (4-byte timestamp) */
+ /* 6 (logical file name) */
+
+ chm_add_file_internal(chm, "/#SYSTEM", sysfile.text, sysfile.pos,
+ &chm->content0, 0);
+ sfree(sysfile.text);
+
+ chm_add_file_internal(chm, "/#STRINGS", chm->stringsfile.text,
+ chm->stringsfile.pos, &chm->content1, 1);
+
+ /*
+ * ::DataSpace/NameList, giving the names of the two content sections.
+ */
+ {
+ rdstringc dsnl = {0, 0, NULL};
+ const char *p;
+ int stringstart;
+
+ RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* total file size; fill in later */
+ RDADD_16BIT_LSB_FIRST(&dsnl, 2); /* number of names */
+
+ RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */
+ stringstart = dsnl.pos;
+ for (p = "Uncompressed"; *p; p++)
+ RDADD_16BIT_LSB_FIRST(&dsnl, *p);
+ PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2,
+ (dsnl.pos - stringstart) / 2);
+ RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */
+
+ RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */
+ stringstart = dsnl.pos;
+ for (p = "MSCompressed"; *p; p++)
+ RDADD_16BIT_LSB_FIRST(&dsnl, *p);
+ PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2,
+ (dsnl.pos - stringstart) / 2);
+ RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */
+
+ PUT_16BIT_LSB_FIRST(dsnl.text, dsnl.pos / 2);
+
+ chm_add_file_internal(chm, "::DataSpace/NameList", dsnl.text, dsnl.pos,
+ &chm->content0, 0);
+
+ sfree(dsnl.text);
+ }
+
+ /*
+ * Actually compress the compressed-data section, load the
+ * compressed version of it into the containing uncompressed
+ * section, and write the auxiliary files describing it.
+ */
+ {
+ rdstringc rs = {0, 0, NULL};
+ const char *p;
+ int orig_decomp_size = chm->content1.pos;
+ size_t i;
+
+ while (chm->content1.pos & 0x7FFF)
+ rdaddc(&chm->content1, 0); /* pad to a realign-interval boundary */
+ ef = lzx(chm->content1.text, chm->content1.pos, 0x8000, 0x10000);
+ chm_add_file_internal(
+ chm, "::DataSpace/Storage/MSCompressed/Content",
+ (char *)ef->data, ef->data_len, &chm->content0, 0);
+
+ for (p = "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}"; *p; p++)
+ RDADD_16BIT_LSB_FIRST(&rs, *p);
+ rs.pos = 0x26; /* this file is always written truncated :-) */
+ chm_add_file_internal(
+ chm, "::DataSpace/Storage/MSCompressed/Transform/List",
+ rs.text, rs.pos, &chm->content0, 0);
+ rs.pos = 0;
+
+ RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size);
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* high word of 64-bit size */
+ chm_add_file_internal(
+ chm, "::DataSpace/Storage/MSCompressed/SpanInfo",
+ rs.text, rs.pos, &chm->content0, 0);
+ rs.pos = 0;
+
+ RDADD_32BIT_LSB_FIRST(&rs, 6); /* file size */
+ rdaddsc(&rs, "LZXC"); /* compression type identifier */
+ RDADD_32BIT_LSB_FIRST(&rs, 2); /* version */
+ RDADD_32BIT_LSB_FIRST(&rs, 2); /* reset interval in units of 2^15 */
+ RDADD_32BIT_LSB_FIRST(&rs, 2); /* window size in units of 2^15 */
+ RDADD_32BIT_LSB_FIRST(&rs, 1); /* reset interval multiplier */
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* unknown */
+ chm_add_file_internal(
+ chm, "::DataSpace/Storage/MSCompressed/ControlData",
+ rs.text, rs.pos, &chm->content0, 0);
+ rs.pos = 0;
+
+ RDADD_32BIT_LSB_FIRST(&rs, 2); /* unknown (version number?) */
+ RDADD_32BIT_LSB_FIRST(&rs, ef->n_resets); /* reset table length */
+ RDADD_32BIT_LSB_FIRST(&rs, 8); /* reset table entry size */
+ RDADD_32BIT_LSB_FIRST(&rs, 0x28); /* reset table offset */
+ RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size); /* uncompressed len */
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+ RDADD_32BIT_LSB_FIRST(&rs, ef->data_len); /* compressed len */
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+ RDADD_32BIT_LSB_FIRST(&rs, 0x8000); /* realign interval */
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+ for (i = 0; i < ef->n_resets; i++) {
+ RDADD_32BIT_LSB_FIRST(&rs, ef->reset_byte_offsets[i]);
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+ }
+ chm_add_file_internal(
+ chm, "::DataSpace/Storage/MSCompressed/Transform/"
+ "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable",
+ rs.text, rs.pos, &chm->content0, 0);
+ rs.pos = 0;
+ }
+
+ sfree(ef->data);
+ sfree(ef->reset_byte_offsets);
+ sfree(ef);
+
+ directory(&dir, chm->files);
+ itsf(&chm->outfile, &dir, &chm->content0);
+ sfree(dir.text);
+
+ assert(outlen);
+ *outlen = chm->outfile.pos;
+ return chm->outfile.text;
+}
diff --git a/winchm.h b/winchm.h
new file mode 100644
index 0000000..caee3fc
--- /dev/null
+++ b/winchm.h
@@ -0,0 +1,21 @@
+struct chm;
+
+struct chm *chm_new(void);
+void chm_free(struct chm *chm);
+void chm_add_file(struct chm *chm, const char *name,
+ const char *data, int len);
+void chm_title(struct chm *chm, const char *title);
+void chm_contents_filename(struct chm *chm, const char *name);
+void chm_index_filename(struct chm *chm, const char *name);
+void chm_default_topic(struct chm *chm, const char *name);
+void chm_default_window(struct chm *chm, const char *name);
+void chm_add_window(struct chm *chm, const char *winname, const char *title,
+ const char *contentsfile, const char *indexfile,
+ const char *rootfile, int navpaneflags, int toolbarflags);
+
+struct chm_section;
+struct chm_section *chm_add_section(struct chm *chm,
+ struct chm_section *parent,
+ const char *title, const char *url);
+
+const char *chm_build(struct chm *chm, int *outlen);