diff options
| -rw-r--r-- | Buildscr | 1 | ||||
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | bk_html.c | 393 | ||||
| -rw-r--r-- | doc/Makefile | 8 | ||||
| -rw-r--r-- | doc/chm.but | 17 | ||||
| -rw-r--r-- | doc/intro.but | 5 | ||||
| -rw-r--r-- | doc/manpage.but | 18 | ||||
| -rw-r--r-- | doc/output.but | 291 | ||||
| -rw-r--r-- | doc/running.but | 24 | ||||
| -rw-r--r-- | error.c | 6 | ||||
| -rw-r--r-- | halibut.h | 4 | ||||
| -rw-r--r-- | lzx.c | 697 | ||||
| -rw-r--r-- | lzx.h | 24 | ||||
| -rw-r--r-- | main.c | 1 | ||||
| -rw-r--r-- | winchm.c | 1436 | ||||
| -rw-r--r-- | winchm.h | 21 |
16 files changed, 2768 insertions, 180 deletions
@@ -35,6 +35,7 @@ in halibut/doc do make deliver halibut/*.tar.gz $@ deliver halibut/doc/halibut.pdf $@ deliver halibut/doc/halibut.txt $@ +deliver halibut/doc/halibut.chm $@ deliver halibut/doc/*.html $@ # FIXME: it'd be nice to add a Windows delegation here so we can @@ -95,7 +95,7 @@ include $(LIBCHARSET_SRCDIR)Makefile MODULES := main malloc ustring error help licence version misc tree234 MODULES += input in_afm in_pf in_sfnt keywords contents index biblio MODULES += bk_text bk_html bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf -MODULES += winhelp deflate lz77 huffman psdata wcwidth +MODULES += winhelp winchm deflate lzx lz77 huffman psdata wcwidth OBJECTS := $(addsuffix .o,$(MODULES)) $(LIBCHARSET_OBJS) DEPS := $(addsuffix .d,$(MODULES)) @@ -24,6 +24,7 @@ #include <assert.h> #include <limits.h> #include "halibut.h" +#include "winchm.h" #define is_heading_type(type) ( (type) == para_Title || \ (type) == para_Chapter || \ @@ -57,6 +58,8 @@ typedef struct { char *chm_filename, *hhp_filename, *hhc_filename, *hhk_filename; char **template_fragments; int ntfragments; + char **chm_extrafiles, **chm_extranames; + int nchmextrafiles, chmextrafilesize; char *head_end, *body_start, *body_end, *addr_start, *addr_end; char *body_tag, *nav_attr; wchar_t *author, *description; @@ -94,6 +97,10 @@ struct htmlfile { * more than once. */ int temp; + /* + * CHM section structure, if we're generating a CHM. + */ + struct chm_section *chmsect; }; struct htmlsect { @@ -193,6 +200,48 @@ void ho_setup_stdio(htmloutput *ho, FILE *fp) ho->write = ho_write_stdio; ho->write_ctx = fp; } + +struct chm_output { + struct chm *chm; + char *filename; + rdstringc rs; +}; +void ho_write_chm(void *write_ctx, const char *data, int len) +{ + struct chm_output *co = (struct chm_output *)write_ctx; + if (len == -1) { + chm_add_file(co->chm, co->filename, co->rs.text, co->rs.pos); + sfree(co->filename); + sfree(co->rs.text); + sfree(co); + } else { + rdaddsn(&co->rs, data, len); + } +} +void ho_setup_chm(htmloutput *ho, struct chm *chm, const char *filename) +{ + struct chm_output *co = snew(struct chm_output); + + co->chm = chm; + co->rs = empty_rdstringc; + co->filename = dupstr(filename); + + ho->write_ctx = co; + ho->write = ho_write_chm; +} + +void ho_write_rdstringc(void *write_ctx, const char *data, int len) +{ + rdstringc *rs = (rdstringc *)write_ctx; + if (len > 0) + rdaddsn(rs, data, len); +} +void ho_setup_rdstringc(htmloutput *ho, rdstringc *rs) +{ + ho->write_ctx = rs; + ho->write = ho_write_rdstringc; +} + void ho_string(htmloutput *ho, const char *string) { ho->write(ho->write_ctx, string, strlen(string)); @@ -286,14 +335,15 @@ static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile, keywordlist *keywords, htmlconfig *cfg, int real); -static htmlconfig html_configure(paragraph *source) { +static htmlconfig html_configure(paragraph *source, int chm_mode) +{ htmlconfig ret; paragraph *p; /* * Defaults. */ - ret.leaf_level = 2; + ret.leaf_level = chm_mode ? -1 /* infinite */ : 2; ret.achapter.just_numbers = FALSE; ret.achapter.number_at_all = TRUE; ret.achapter.number_suffix = L": "; @@ -305,20 +355,29 @@ static htmlconfig html_configure(paragraph *source) { ret.ncdepths = 0; ret.contents_depths = 0; ret.visible_version_id = TRUE; - ret.address_section = TRUE; + ret.address_section = chm_mode ? FALSE : TRUE; ret.leaf_contains_contents = FALSE; ret.leaf_smallest_contents = 4; - ret.navlinks = TRUE; + ret.navlinks = chm_mode ? FALSE : TRUE; ret.rellinks = TRUE; ret.single_filename = dupstr("Manual.html"); ret.contents_filename = dupstr("Contents.html"); ret.index_filename = dupstr("IndexPage.html"); ret.template_filename = dupstr("%n.html"); - ret.chm_filename = ret.hhp_filename = NULL; - ret.hhc_filename = ret.hhk_filename = NULL; + if (chm_mode) { + ret.chm_filename = dupstr("output.chm"); + ret.hhc_filename = dupstr("contents.hhc"); + ret.hhk_filename = dupstr("index.hhk"); + ret.hhp_filename = NULL; + } else { + ret.chm_filename = ret.hhp_filename = NULL; + ret.hhc_filename = ret.hhk_filename = NULL; + } ret.ntfragments = 1; ret.template_fragments = snewn(ret.ntfragments, char *); ret.template_fragments[0] = dupstr("%b"); + ret.chm_extrafiles = ret.chm_extranames = NULL; + ret.nchmextrafiles = ret.chmextrafilesize = 0; ret.head_end = ret.body_tag = ret.body_start = ret.body_end = ret.addr_start = ret.addr_end = ret.nav_attr = NULL; ret.author = ret.description = NULL; @@ -368,11 +427,20 @@ static htmlconfig html_configure(paragraph *source) { for (p = source; p; p = p->next) { if (p->type == para_Config) { wchar_t *k = p->keyword; + int generic = FALSE; - if (!ustrnicmp(k, L"html-", 5)) { + if (!chm_mode && !ustrnicmp(k, L"html-", 5)) { k += 5; - } else if (!ustrnicmp(k, L"xhtml-", 6)) { + } else if (!chm_mode && !ustrnicmp(k, L"xhtml-", 6)) { k += 6; + } else if (chm_mode && !ustrnicmp(k, L"chm-", 4)) { + k += 4; + } else if (!ustrnicmp(k, L"htmlall-", 8)) { + k += 8; + /* In this mode, only accept directives that don't + * vary completely between the HTML and CHM output + * types. */ + generic = TRUE; } else { continue; } @@ -578,39 +646,78 @@ static htmlconfig html_configure(paragraph *source) { ret.pre_versionid = uadv(k); } else if (!ustricmp(k, L"post-versionid")) { ret.post_versionid = uadv(k); - } else if (!ustricmp(k, L"mshtmlhelp-chm")) { + } else if (!generic && !ustricmp( + k, chm_mode ? L"filename" : L"mshtmlhelp-chm")) { sfree(ret.chm_filename); ret.chm_filename = dupstr(adv(p->origkeyword)); - } else if (!ustricmp(k, L"mshtmlhelp-project")) { - sfree(ret.hhp_filename); - ret.hhp_filename = dupstr(adv(p->origkeyword)); - } else if (!ustricmp(k, L"mshtmlhelp-contents")) { + } else if (!generic && !ustricmp( + k, chm_mode ? L"contents-name" : + L"mshtmlhelp-contents")) { sfree(ret.hhc_filename); ret.hhc_filename = dupstr(adv(p->origkeyword)); - } else if (!ustricmp(k, L"mshtmlhelp-index")) { + } else if (!generic && !ustricmp( + k, chm_mode ? L"index-name" : + L"mshtmlhelp-index")) { sfree(ret.hhk_filename); ret.hhk_filename = dupstr(adv(p->origkeyword)); + } else if (!generic && !chm_mode && + !ustricmp(k, L"mshtmlhelp-project")) { + sfree(ret.hhp_filename); + ret.hhp_filename = dupstr(adv(p->origkeyword)); + } else if (!generic && chm_mode && + !ustricmp(k, L"extra-file")) { + char *diskname, *chmname; + + diskname = adv(p->origkeyword); + if (*diskname) { + chmname = adv(diskname); + if (!*chmname) + chmname = diskname; + + if (chmname[0] == '#' || chmname[0] == '$') + err_chm_badname(&p->fpos, chmname); + + if (ret.nchmextrafiles >= ret.chmextrafilesize) { + ret.chmextrafilesize = ret.nchmextrafiles * 5 / 4 + 32; + ret.chm_extrafiles = sresize( + ret.chm_extrafiles, ret.chmextrafilesize, char *); + ret.chm_extranames = sresize( + ret.chm_extranames, ret.chmextrafilesize, char *); + } + ret.chm_extrafiles[ret.nchmextrafiles] = dupstr(diskname); + ret.chm_extranames[ret.nchmextrafiles] = + dupstr(chmname); + ret.nchmextrafiles++; + } } } } - /* - * Enforce that the CHM and HHP filenames must either be both - * present or both absent. If one is present but not the other, - * turn both off. - */ - if (!ret.chm_filename ^ !ret.hhp_filename) { - err_chmnames(); - sfree(ret.chm_filename); ret.chm_filename = NULL; - sfree(ret.hhp_filename); ret.hhp_filename = NULL; - } - /* - * And if we're not generating an HHP, there's no need for HHC - * or HHK. - */ - if (!ret.hhp_filename) { - sfree(ret.hhc_filename); ret.hhc_filename = NULL; - sfree(ret.hhk_filename); ret.hhk_filename = NULL; + if (!chm_mode) { + /* + * If we're in HTML mode but using the old-style options to + * output HTML Help Workshop auxiliary files, do some + * consistency checking. + */ + + /* + * Enforce that the CHM and HHP filenames must either be both + * present or both absent. If one is present but not the other, + * turn both off. + */ + if (!ret.chm_filename ^ !ret.hhp_filename) { + err_chmnames(); + sfree(ret.chm_filename); ret.chm_filename = NULL; + sfree(ret.hhp_filename); ret.hhp_filename = NULL; + } + /* + * And if we're not generating an HHP, there's no need for HHC + * or HHK. + */ + if (!ret.hhp_filename) { + sfree(ret.hhc_filename); ret.hhc_filename = NULL; + sfree(ret.hhk_filename); ret.hhk_filename = NULL; + } } /* @@ -644,20 +751,23 @@ paragraph *html_config_filename(char *filename) return p; } -void html_backend(paragraph *sourceform, keywordlist *keywords, - indexdata *idx, void *unused) +paragraph *chm_config_filename(char *filename) +{ + return cmdline_cfg_simple("chm-filename", filename, NULL); +} + +static void html_backend_common(paragraph *sourceform, keywordlist *keywords, + indexdata *idx, int chm_mode) { paragraph *p; htmlsect *topsect; htmlconfig conf; htmlfilelist files = { NULL, NULL, NULL, NULL, NULL, NULL }; htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL }; - char *hhk_filename; - int has_index; + struct chm *chm = NULL; + int has_index, hhk_needed = FALSE; - IGNORE(unused); - - conf = html_configure(sourceform); + conf = html_configure(sourceform, chm_mode); /* * We're going to make heavy use of paragraphs' private data @@ -732,10 +842,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, /* * And the index, if we have one. Note that we don't output * an index as an HTML file if we're outputting one as a - * .HHK. + * .HHK (in either of the HTML or CHM output modes). */ has_index = (count234(idx->entries) > 0); - if (has_index && !conf.hhk_filename) { + if (has_index && !chm_mode && !conf.hhk_filename) { sect = html_new_sect(§s, NULL, &conf); sect->text = NULL; sect->type = INDEX; @@ -901,6 +1011,9 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, } } + if (chm_mode) + chm = chm_new(); + /* * Now we're ready to write out the actual HTML files. * @@ -936,7 +1049,9 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, #define listname(lt) ( (lt)==UL ? "ul" : (lt)==OL ? "ol" : "dl" ) #define itemname(lt) ( (lt)==LI ? "li" : (lt)==DT ? "dt" : "dd" ) - if (!strcmp(f->filename, "-")) + if (chm) + ho_setup_chm(&ho, chm, f->filename); + else if (!strcmp(f->filename, "-")) ho_setup_stdio(&ho, stdout); else ho_setup_file(&ho, f->filename); @@ -1728,8 +1843,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, * whether there's even going to _be_ an index file: we omit it * if the index contains nothing. */ - hhk_filename = conf.hhk_filename; - if (hhk_filename) { + if (chm_mode || conf.hhk_filename) { int ok = FALSE; int i; indexentry *entry; @@ -1743,8 +1857,138 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, } } - if (!ok) - hhk_filename = NULL; + if (ok) + hhk_needed = TRUE; + } + + /* + * If we're doing direct CHM output, tell winchm.c all the things + * it will need to know aside from the various HTML files' + * contents. + */ + if (chm) { + chm_contents_filename(chm, conf.hhc_filename); + if (has_index) + chm_index_filename(chm, conf.hhk_filename); + chm_default_window(chm, "main"); + + { + htmloutput ho; + rdstringc rs = {0, 0, NULL}; + + ho.charset = CS_CP1252; /* as far as I know, CHM is */ + ho.restrict_charset = CS_CP1252; /* hardwired to this charset */ + ho.cstate = charset_init_state; + ho.ver = HTML_4; /* *shrug* */ + ho.state = HO_NEUTRAL; + ho.contents_level = 0; + ho.hackflags = HO_HACK_QUOTENOTHING; + + ho_setup_rdstringc(&ho, &rs); + + ho.hacklimit = 255; + html_words(&ho, topsect->title->words, NOTHING, + NULL, keywords, &conf); + + rdaddc(&rs, '\0'); + chm_title(chm, rs.text); + + chm_default_topic(chm, files.head->filename); + + chm_add_window(chm, "main", rs.text, + conf.hhc_filename, conf.hhk_filename, + files.head->filename, + /* This first magic number is + * fsWinProperties, controlling Navigation + * Pane options and the like. Constants + * HHWIN_PROP_* in htmlhelp.h. */ + 0x62520, + /* This second number is fsToolBarFlags, + * mainly controlling toolbar buttons. + * Constants HHWIN_BUTTON_*. NOTE: there + * are two pairs of bits for Next/Previous + * buttons: 7/8 (which do nothing useful), + * and 21/22 (which work). (Neither of + * these are exposed in the HHW UI, but + * they work fine in HH.) We use the + * latter. */ + 0x70304e); + + sfree(rs.text); + } + + { + htmlfile *f; + + for (f = files.head; f; f = f->next) + f->chmsect = NULL; + for (f = files.head; f; f = f->next) { + htmlsect *s = f->first; + htmloutput ho; + rdstringc rs = {0, 0, NULL}; + + ho.charset = CS_CP1252; + ho.restrict_charset = CS_CP1252; + ho.cstate = charset_init_state; + ho.ver = HTML_4; /* *shrug* */ + ho.state = HO_NEUTRAL; + ho.contents_level = 0; + ho.hackflags = HO_HACK_QUOTENOTHING; + + ho_setup_rdstringc(&ho, &rs); + ho.hacklimit = 255; + + if (f->first->title) + html_words(&ho, f->first->title->words, NOTHING, + NULL, keywords, &conf); + else if (f->first->type == INDEX) + html_text(&ho, conf.index_text); + rdaddc(&rs, '\0'); + + while (s && s->file == f) + s = s->parent; + + /* + * Special case, as below: the TOP file is not + * considered to be the parent of everything else. + */ + if (s && s->type == TOP) + s = NULL; + + f->chmsect = chm_add_section(chm, s ? s->file->chmsect : NULL, + rs.text, f->filename); + + sfree(rs.text); + } + } + + { + int i; + + for (i = 0; i < conf.nchmextrafiles; i++) { + const char *fname = conf.chm_extrafiles[i]; + FILE *fp; + long size; + char *data; + + fp = fopen(fname, "rb"); + if (!fp) { + err_cantopen(fname); + continue; + } + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + rewind(fp); + + data = snewn(size, char); + size = fread(data, 1, size, fp); + fclose(fp); + + chm_add_file(chm, conf.chm_extranames[i], data, size); + sfree(data); + } + } } /* @@ -1800,7 +2044,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, ho_string(&ho, conf.hhc_filename); ho_string(&ho, "\n"); } - if (hhk_filename) { + if (hhk_needed) { ho_string(&ho, "Index file="); ho_string(&ho, conf.hhk_filename); ho_string(&ho, "\n"); @@ -1817,8 +2061,8 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, if (conf.hhc_filename) ho_string(&ho, conf.hhc_filename); ho_string(&ho, "\",\""); - if (hhk_filename) - ho_string(&ho, hhk_filename); + if (hhk_needed) + ho_string(&ho, conf.hhk_filename); ho_string(&ho, "\",\""); ho_string(&ho, files.head->filename); ho_string(&ho, "\",,,,,," @@ -1848,7 +2092,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, ho_finish(&ho); } - if (conf.hhc_filename) { + if (chm || conf.hhc_filename) { htmlfile *f; htmlsect *s, *a; htmloutput ho; @@ -1862,7 +2106,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, ho.contents_level = 0; ho.hackflags = HO_HACK_QUOTEQUOTES; - ho_setup_file(&ho, conf.hhc_filename); + if (chm) + ho_setup_chm(&ho, chm, conf.hhc_filename); + else + ho_setup_file(&ho, conf.hhc_filename); /* * Magic DOCTYPE which seems to work for .HHC files. I'm @@ -1955,7 +2202,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, cleanup(&ho); } - if (hhk_filename) { + if (hhk_needed) { htmlfile *f; htmloutput ho; indexentry *entry; @@ -1976,7 +2223,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, ho.contents_level = 0; ho.hackflags = HO_HACK_QUOTEQUOTES; - ho_setup_file(&ho, hhk_filename); + if (chm) + ho_setup_chm(&ho, chm, conf.hhk_filename); + else + ho_setup_file(&ho, conf.hhk_filename); /* * Magic DOCTYPE which seems to work for .HHK files. I'm @@ -2041,6 +2291,26 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, cleanup(&ho); } + if (chm) { + /* + * Finalise and write out the CHM file. + */ + const char *data; + int len; + FILE *fp; + + fp = fopen(conf.chm_filename, "wb"); + if (!fp) { + err_cantopenw(conf.chm_filename); + } else { + data = chm_build(chm, &len); + fwrite(data, 1, len, fp); + fclose(fp); + } + + chm_free(chm); + } + /* * Go through and check that no index fragments were referenced * without being generated, or indeed vice versa. @@ -2139,6 +2409,25 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, while (conf.ntfragments--) sfree(conf.template_fragments[conf.ntfragments]); sfree(conf.template_fragments); + while (conf.nchmextrafiles--) { + sfree(conf.chm_extrafiles[conf.nchmextrafiles]); + sfree(conf.chm_extranames[conf.nchmextrafiles]); + } + sfree(conf.chm_extrafiles); +} + +void html_backend(paragraph *sourceform, keywordlist *keywords, + indexdata *idx, void *unused) +{ + IGNORE(unused); + html_backend_common(sourceform, keywords, idx, FALSE); +} + +void chm_backend(paragraph *sourceform, keywordlist *keywords, + indexdata *idx, void *unused) +{ + IGNORE(unused); + html_backend_common(sourceform, keywords, idx, TRUE); } static void html_file_section(htmlconfig *cfg, htmlfilelist *files, diff --git a/doc/Makefile b/doc/Makefile index 81a1fd8..e0cc27a 100644 --- a/doc/Makefile +++ b/doc/Makefile @@ -11,7 +11,7 @@ all: index.html halibut.1 index.html: $(INPUTS) $(HALIBUT) $(HALIBUT) --text=halibut.txt --html --info=halibut.info \ - --ps=halibut.ps --pdf=halibut.pdf $(INPUTS) + --ps=halibut.ps --pdf=halibut.pdf --chm=halibut.chm $(INPUTS) halibut.1: manpage.but $(HALIBUT) --man=halibut.1 manpage.but @@ -21,8 +21,4 @@ install: $(INSTALL) -m 644 halibut.1 $(man1dir)/halibut.1 clean: - rm -f *.html *.txt *.hlp *.cnt *.1 *.info* *.ps *.pdf *.hh* *.chm - -chm: halibut.hhp -halibut.hhp: $(INPUTS) $(HALIBUT) chm.but - $(HALIBUT) --html $(INPUTS) chm.but + rm -f *.html *.txt *.hlp *.cnt *.1 *.info* *.ps *.pdf *.chm diff --git a/doc/chm.but b/doc/chm.but deleted file mode 100644 index ef21ecc..0000000 --- a/doc/chm.but +++ /dev/null @@ -1,17 +0,0 @@ -\# File containing the magic HTML configuration directives to create -\# an MS HTML Help project. We put this on the end of the Halibut -\# docs build command line to build the HHP and friends. - -\cfg{html-leaf-level}{infinite} -\cfg{html-leaf-contains-contents}{false} -\cfg{html-suppress-navlinks}{true} -\cfg{html-suppress-address}{true} - -\cfg{html-contents-filename}{index.html} -\cfg{html-template-filename}{%k.html} -\cfg{html-template-fragment}{%k} - -\cfg{html-mshtmlhelp-chm}{halibut.chm} -\cfg{html-mshtmlhelp-project}{halibut.hhp} -\cfg{html-mshtmlhelp-contents}{halibut.hhc} -\cfg{html-mshtmlhelp-index}{halibut.hhk} diff --git a/doc/intro.but b/doc/intro.but index 2e5ada1..ce1668c 100644 --- a/doc/intro.but +++ b/doc/intro.but @@ -25,10 +25,9 @@ Currently Halibut supports the following output formats: \b PostScript. -\b Old-style Windows Help (\cw{.HLP}). +\b Windows HTML Help (\cw{.CHM}). -(By setting suitable options, the HTML output can also be made -suitable for feeding to the newer-style Windows HTML Help compiler.) +\b Old-style Windows Help (\cw{.HLP}). \H{intro-features} Features supported by Halibut diff --git a/doc/manpage.but b/doc/manpage.but index 56048f6..a13b195 100644 --- a/doc/manpage.but +++ b/doc/manpage.but @@ -43,13 +43,21 @@ produced as output; this, and the file names, will be as specified in the input files, or given a set of default names starting with \c{Contents.html} if none is specified at all. +\dt \cw{--chm}[\cw{=}\e{filename}] + +\dd Makes Halibut generate an output file in Windows HTML Help +format. If the optional \e{filename} parameter is supplied, the output +help file will be given that name. Otherwise, the name of the output +help file will be as specified in the input files, or \c{output.chm} +if none is specified at all. + \dt \cw{--winhelp}[\cw{=}\e{filename}] -\dd Makes Halibut generate an output file in Windows Help format. If -the optional \e{filename} parameter is supplied, the output help -file will be given that name. Otherwise, the name of the output help -file will be as specified in the input files, or \c{output.hlp} if -none is specified at all. +\dd Makes Halibut generate an output file in old-style Windows Help +format. If the optional \e{filename} parameter is supplied, the output +help file will be given that name. Otherwise, the name of the output +help file will be as specified in the input files, or \c{output.hlp} +if none is specified at all. \lcont{ The output help file must have a name ending in \c{.hlp}; if it does diff --git a/doc/output.but b/doc/output.but index 9309b82..ccb99df 100644 --- a/doc/output.but +++ b/doc/output.but @@ -858,13 +858,202 @@ name="description">} tag in the output HTML files, so that browsers which support this can easily pick out a brief \I{description, of document}description of the document. -\S{output-html-mshtmlhelp} Generating MS Windows \i{HTML Help} +\S{output-html-defaults} Default settings + +The \i{default settings} for Halibut's HTML output format are: + +\c \cfg{html-contents-filename}{Contents.html} +\c \cfg{html-index-filename}{IndexPage.html} +\c \cfg{html-template-filename}{%n.html} +\c \cfg{html-single-filename}{Manual.html} +\c +\c \cfg{html-leaf-level}{2} +\c \cfg{html-leaf-contains-contents}{false} +\c \cfg{html-leaf-smallest-contents}{4} +\c \cfg{html-contents-depth}{0}{2} +\c \cfg{html-contents-depth}{1}{3} +\c ... and so on for all section levels below this ... +\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii +\c +\c \cfg{html-head-end}{} +\c \cfg{html-body-tag}{<body>} +\c \cfg{html-body-start}{} +\c \cfg{html-body-end}{} +\c \cfg{html-address-start}{} +\c \cfg{html-address-end}{} +\c \cfg{html-navigation-attributes}{} +\c +\c \cfg{html-chapter-numeric}{false} +\c \cfg{html-chapter-shownumber}{true} +\c \cfg{html-chapter-suffix}{: } +\c +\c \cfg{html-section-numeric}{0}{true} +\c \cfg{html-section-shownumber}{0}{true} +\c \cfg{html-section-suffix}{0}{ } +\c +\c \cfg{html-section-numeric}{1}{true} +\c \cfg{html-section-shownumber}{1}{true} +\c \cfg{html-section-suffix}{1}{ } +\c +\c ... and so on for all section levels below this ... +\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii +\c +\c \cfg{html-preamble-text}{Preamble} +\c \cfg{html-contents-text}{Contents} +\c \cfg{html-index-text}{Index} +\c \cfg{html-title-separator}{ - } +\c \cfg{html-index-main-separator}{: } +\c \cfg{html-index-multiple-separator}{, } +\c \cfg{html-pre-versionid}{[} +\c \cfg{html-post-versionid}{]} +\c \cfg{html-nav-prev-text}{Previous} +\c \cfg{html-nav-next-text}{Next} +\c \cfg{html-nav-up-text}{Up} +\c \cfg{html-nav-separator}{ | } +\c +\c \cfg{html-output-charset}{ASCII} +\c \cfg{html-restrict-charset}{UTF-8} +\c \cfg{html-quotes}{\u2018}{\u2019}{"}{"} +\c +\c \cfg{html-version}{html4} +\c \cfg{html-template-fragment}{%b} +\c \cfg{html-versionid}{true} +\c \cfg{html-rellinks}{true} +\c \cfg{html-suppress-navlinks{false} +\c \cfg{html-suppress-address}{false} +\c \cfg{html-author}{} +\c \cfg{html-description}{} + +\H{output-chm} Windows \i{HTML Help} + +This output format generates a \c{.chm} file suitable for use with the +Windows HTML Help system. + +Older versions of Halibut could only generate HTML Help by writing out +a set of source files acceptable to the MS help compiler. Nowadays +Halibut can generate CHM directly, so that's no longer necessary. +However, the legacy method is still available if you need it; see +\k{output-html-mshtmlhelp} for details. + +\S{output-chm-file} Output file name + +\dt \I{\cw{\\cfg\{chm-filename\}}}\cw{\\cfg\{chm-filename\}\{}\e{filename}\cw{\}} + +\dd Sets the \i{output file name} in which to store the HTML Help +file. This directive is implicitly generated if you provide a file +name parameter after the command-line option \i\c{--chm} (see +\k{running-options}). + +\S{output-chm-mostconfig} Configuration shared with the HTML back end + +As the name suggests, an HTML Help file is mostly a compressed +container for HTML files. So the CHM back end shares a great deal of +its code with the HTML back end, and as a result, it supports the same +range of format configuration options. + +(One exception to this general rule is that the configuration options +relating to generating \e{HTML Help compiler input} are not supported +in CHM mode, because they wouldn't make any sense! The +\cw{html-mshtmlhelp-*} options described in \k{output-html-mshtmlhelp} +have no analogue starting \cw{chm-}.) + +However, because HTML and CHM are used in different ways, you may need +to configure the two back ends differently. So in CHM mode, Halibut +supports all the same configuration directives described in +\k{output-html}, but with their names changed so that they begin with +\cq{chm-} in place of \cq{html-}. This lets you maintain two sets of +configuration independently; for example, you could specify +\c{\\cfg\{html-chapter-numeric\}\{true\}} and +\c{\\cfg\{chm-chapter-numeric\}\{false\}} in the same source file, and +then when you ran Halibut with both the \c{--html} and \c{--chm} +options, it would produce purely numeric chapter titles in the HTML +output but not in the CHM file. + +If you do decide to apply a piece of configuration across both these +back ends, you can prefix it with \cq{htmlall-} instead of \cq{html-} +or \cq{chm-}. For example, +\c{\\cfg\{htmlall-chapter-numeric\}\{true\}} will enable purely +numeric chapter titles in \e{both} the HTML and CHM output. + +\S{output-chm-extra} Including extra files in the CHM + +CHM files are mostly a container for HTML, and the HTML files inside +them are allowed to cross-refer to all the usual other kinds of file +that HTML might refer to, such as images, stylesheets and even +Javascript. If you want to make use of this capability, you need to +tell Halibut what extra files it needs to incorporate into the CHM +container. + +\dt \I{\cw{\\cfg\{chm-extra-file\}}}\cw{\\cfg\{chm-extra-file\}\{}\e{filename}\cw{\}} + +\dt \I{\cw{\\cfg\{chm-extra-file\}}}\cw{\\cfg\{chm-extra-file\}\{}\e{filename}\cw{\}\{}\e{name inside CHM}\cw{\}} + +\dd Tells Halibut to read an additional input file from \e{filename} +and incorporate it into the CHM. + +\lcont{ + +In the first form of the directive, the file will be given the same +name within the CHM's internal namespace (i.e. for the purposes of +linking to it from HTML files) as Halibut used to load it from disk. +If you need to include the file with a different internal name, you +can use the second form of the directive, which separately specifies +the name under which Halibut should look for the input file and the +name it should give it inside the CHM. + +You can specify this directive multiple times, to include more than +one file. + +} + +\S{output-chm-internalnames} Renaming the CHM internal support files + +As well as ordinary HTML, there are also two special files inside a +CHM, containing the table of contents and the index. Halibut generates +these automatically, and you normally don't have to worry about them. +However, it is \e{just} possible (though very unlikely!) that you +might find they conflict with the name of some file you wanted to +include in the CHM yourself, and hence, Halibut provides configuration +options to change them if you need to. + +\dt \I{\cw{\\cfg\{chm-contents-name\}}}\cw{\\cfg\{chm-contents-name\}\{}\e{filename}\cw{\}} + +\dd Controls the name of the internal contents file in the CHM. + +\dt \I{\cw{\\cfg\{chm-index-name\}}}\cw{\\cfg\{chm-index-name\}\{}\e{filename}\cw{\}} + +\dd Controls the name of the internal index file in the CHM. + +\S{output-chm-defaults} Default settings + +The \i{default settings} for Halibut's CHM output format are mostly +the same as for the standard HTML output. However, a few defaults are +changed to be more in line with the way CHM wants to do things. + +\c \cfg{chm-filename}{output.chm} +\c \cfg{chm-contents-name}{contents.hhc} +\c \cfg{chm-index-name}{index.hhk} +\c \cfg{chm-leaf-level}{infinite} +\c \cfg{chm-suppress-navlinks{true} +\c \cfg{chm-suppress-address}{true} -The HTML files output from Halibut's HTML back end can be used as -input to the MS Windows HTML Help compiler. In order to do this, you -also need some auxiliary files: a project file, and (probably) a -contents file and an index file. Halibut can optionally generate -those as well. +\S{output-html-mshtmlhelp} Generating input to the MS Windows \i{HTML +Help compiler} + +Before Halibut gained the ability to write out CHM files directly, it +used a more cumbersome system in which you could run it in HTML mode +and enable some extra options that would write out supporting files +needed by the official Windows HTML Help compiler, so that you could +still generate a CHM file from your Halibut source in multiple build +steps. + +This legacy system for HTML Help generation is still supported, partly +to avoid backwards-compatibility breakage for anyone already using it, +and also because it permits more flexibility in the resulting CHM +files: Halibut's own CHM file generation makes some fixed decisions +about window layout and styling, whereas if you use the official help +compiler you can start from Halibut's default project file and make +whatever manual changes you like to that sort of thing. To enable the generation of MS HTML Help auxiliary files, use the following configuration directives: @@ -940,18 +1129,16 @@ MS HTML Help compiler (\cw{HHC.EXE}), or load into the MS HTML Help Workshop (\cw{HHW.EXE}). You may also wish to alter other HTML configuration options to make -the resulting help file look more like a help file and less like a -web page. A suggested set of additional configuration options for -HTML Help is as follows: +the resulting help file look more like a help file and less like a web +page. If you use Halibut's direct CHM output, this is done for you +automatically (see \k{output-chm-defaults}); but if you're using the +HTML output mode then I recommend the following changes. \b \cw{\\cfg\{html-leaf-level\}\{infinite\}}, because HTML Help works best with lots of small files (\q{topics}) rather than a few large ones. In particular, the contents and index mechanisms can only reference files, not subsections within files. -\b \cw{\\cfg\{html-leaf-contains-contents\}\{false\}}, to suppress -the contents list above the main text of each bottom-level file. - \b \cw{\\cfg\{html-suppress-navlinks\}\{true\}}, because HTML Help has its own navigation facilities and it looks a bit strange to duplicate them. @@ -960,83 +1147,15 @@ duplicate them. \cw{<ADDRESS>} section makes less sense in a help file than it does on a web page. -\S{output-html-defaults} Default settings - -The \i{default settings} for Halibut's HTML output format are: - -\c \cfg{html-contents-filename}{Contents.html} -\c \cfg{html-index-filename}{IndexPage.html} -\c \cfg{html-template-filename}{%n.html} -\c \cfg{html-single-filename}{Manual.html} -\c -\c \cfg{html-leaf-level}{2} -\c \cfg{html-leaf-contains-contents}{false} -\c \cfg{html-leaf-smallest-contents}{4} -\c \cfg{html-contents-depth}{0}{2} -\c \cfg{html-contents-depth}{1}{3} -\c ... and so on for all section levels below this ... -\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii -\c -\c \cfg{html-head-end}{} -\c \cfg{html-body-tag}{<body>} -\c \cfg{html-body-start}{} -\c \cfg{html-body-end}{} -\c \cfg{html-address-start}{} -\c \cfg{html-address-end}{} -\c \cfg{html-navigation-attributes}{} -\c -\c \cfg{html-chapter-numeric}{false} -\c \cfg{html-chapter-shownumber}{true} -\c \cfg{html-chapter-suffix}{: } -\c -\c \cfg{html-section-numeric}{0}{true} -\c \cfg{html-section-shownumber}{0}{true} -\c \cfg{html-section-suffix}{0}{ } -\c -\c \cfg{html-section-numeric}{1}{true} -\c \cfg{html-section-shownumber}{1}{true} -\c \cfg{html-section-suffix}{1}{ } -\c -\c ... and so on for all section levels below this ... -\e iiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiiii -\c -\c \cfg{html-preamble-text}{Preamble} -\c \cfg{html-contents-text}{Contents} -\c \cfg{html-index-text}{Index} -\c \cfg{html-title-separator}{ - } -\c \cfg{html-index-main-separator}{: } -\c \cfg{html-index-multiple-separator}{, } -\c \cfg{html-pre-versionid}{[} -\c \cfg{html-post-versionid}{]} -\c \cfg{html-nav-prev-text}{Previous} -\c \cfg{html-nav-next-text}{Next} -\c \cfg{html-nav-up-text}{Up} -\c \cfg{html-nav-separator}{ | } -\c -\c \cfg{html-output-charset}{ASCII} -\c \cfg{html-restrict-charset}{UTF-8} -\c \cfg{html-quotes}{\u2018}{\u2019}{"}{"} -\c -\c \cfg{html-version}{html4} -\c \cfg{html-template-fragment}{%b} -\c \cfg{html-versionid}{true} -\c \cfg{html-rellinks}{true} -\c \cfg{html-suppress-navlinks{false} -\c \cfg{html-suppress-address}{false} -\c \cfg{html-author}{} -\c \cfg{html-description}{} - -\H{output-whlp} Windows Help +\H{output-whlp} Legacy Windows Help -This output format generates data that can be used by the \i{Windows -Help} program \cw{WINHLP32.EXE}. There are two actual files +This output format generates data that can be used by the legacy +\i{Windows Help} program \cw{WINHLP32.EXE}. There are two actual files generated, one ending in \c{.hlp} and the other ending in \c{.cnt}. -Note that as of 2006, MS is discontinuing the Windows Help format in -favour of the newer HTML Help format (\c{.chm} files). Halibut is -not currently able to generate \c{.chm} files directly, but its HTML -back end can write out project files suitable for use as input to -the MS HTML Help compiler. See \k{output-html-mshtmlhelp} for more +This legacy Windows Help format was discontinued in 2006 in favour of +HTML Help, which Halibut can also generate. You probably want to use +that instead for any new project. See \k{output-chm} for more information on this. Currently, the Windows Help output is hardcoded to be in the diff --git a/doc/running.but b/doc/running.but index 39e1715..6c2b6c6 100644 --- a/doc/running.but +++ b/doc/running.but @@ -12,22 +12,19 @@ This will generate a large set of \i{output files}: \b \i\c{output.txt} will be a \i{plain text} version of the input document. +\b \i\c{output.chm} will be a Windows \i{HTML Help} version of the +same thing. (Note that to do this Halibut does not require any +external software such as a \i{Help compiler}. It \e{directly} +generates Windows HTML Help files, and therefore it doesn't need to be +run on Windows to do so: it can generate them even when run from an +automated script on a Unix machine.) + \b \i\c{output.hlp} and \i\c{output.cnt} will be an old-style \i{Windows Help} version of the same thing. (Most of the text is in \c{output.hlp}; \c{output.cnt} contains additional contents data used by the Windows help topic selector. If you lose the latter, the former should still be usable, but it will look less modern.) -\lcont{ - -Note that to do this Halibut does not require any external software -such as a \i{Help compiler}. It \e{directly} generates old-style -Windows Help files, and therefore it doesn't need to be run on -Windows to do so: it can generate them even when run from an -automated script on a Unix machine. - -} - \b \c{output.1} will be a Unix \i{\cw{man} page}. \b The set of files \c{*.html} will contain an \i{HTML} version of @@ -79,6 +76,13 @@ line, using the \c{-C} option). \dd Synonym for \c{--html}. +\dt \i\cw{--chm}[\cw{=}\e{filename}] + +\dd Specifies that you want to generate Windows HTML Help +output. You can optionally specify a file name (e.g. +\c{\-\-chm=myfile.chm}), in which case Halibut will change the +name of the output file as well. + \dt \i\cw{--winhelp}[\cw{=}\e{filename}] \dd Specifies that you want to generate old-style Windows Help @@ -363,3 +363,9 @@ void err_sfntbadglyph(const filepos *fpos, unsigned wc) "warning: character U+%04X references a non-existent glyph", wc); } + +void err_chm_badname(const filepos *fpos, const char *sp) +{ + do_error(fpos, "CHM internal file name `%s' begins with" + " a reserved character", sp); +} @@ -322,6 +322,8 @@ void err_sfnttablevers(const filepos *fpos, const char *sp); void err_sfntbadhdr(const filepos *fpos); /* sfnt cmap references bad glyph */ void err_sfntbadglyph(const filepos *fpos, unsigned wc); +/* CHM internal file names can't start with # or $ */ +void err_chm_badname(const filepos *fpos, const char *sp); /* * malloc.c @@ -563,7 +565,9 @@ paragraph *text_config_filename(char *filename); * bk_html.c */ void html_backend(paragraph *, keywordlist *, indexdata *, void *); +void chm_backend(paragraph *, keywordlist *, indexdata *, void *); paragraph *html_config_filename(char *filename); +paragraph *chm_config_filename(char *filename); /* * bk_whlp.c @@ -0,0 +1,697 @@ +#include <assert.h> +#include <stddef.h> + +#include "halibut.h" +#include "huffman.h" +#include "lz77.h" +#include "lzx.h" + +#define OUR_LZX_WINSIZE 0x10000 +#define LZX_MINMATCHLEN 2 +#define LZX_MAXMATCHLEN 257 + +int lzx_compute_position_slot(int pos, int *footer_bits) +{ + if (pos < 4) { + /* The bottom four position slots cover one value each. */ + *footer_bits = 0; + return pos; + } else if (pos >= 0x40000) { + /* _All_ slots from 36 onwards are 2^17 values wide. */ + *footer_bits = 17; + return 34 + (pos >> 17); + } else { + /* In between, there are two slots for each power-of-2 size, + * so that slots 4,5 have width 2^1, 6,7 have width 2^2, 8,9 + * have width 2^3, ..., and 34,35 have width 2^16. */ + int bits = 16; + int shifted = pos; + if (shifted < (1<<(18-8))) shifted <<= 8, bits -= 8; + if (shifted < (1<<(18-4))) shifted <<= 4, bits -= 4; + if (shifted < (1<<(18-2))) shifted <<= 2, bits -= 2; + if (shifted < (1<<(18-1))) shifted <<= 1, bits -= 1; + *footer_bits = bits; + return 2 + 2*bits + ((shifted >> 16) & 1); + } +} + +typedef enum LZXSymType { + LST_MAINTREE, LST_LENTREE, LST_ALIGNOFFTREE, + LST_MAINTREE_PRETREE_1, LST_MAINTREE_PRETREE_2, LST_LENTREE_PRETREE, + LST_NTREES, dummy_enum_const = LST_NTREES-1, + LST_REALIGN_BITSTREAM, + LST_RAWBITS_BASE /* add the number of actual bits to this code */ +} LZXSymType; + +typedef struct LZXSym { + LZXSymType type; + int value; +} LZXSym; + +typedef struct LZXBuffer { + LZXSym *syms; + int nsyms, symsize; +} LZXBuffer; + +typedef struct LZXInfo { + LZXBuffer *buf; + int r0, r1, r2; /* saved match offsets */ +} LZXInfo; + +static void lzx_buffer_init(LZXBuffer *buf) +{ + buf->syms = NULL; + buf->nsyms = buf->symsize = 0; +} + +static void lzx_addsym(LZXBuffer *buf, LZXSymType type, int value) +{ + if (buf->nsyms >= buf->symsize) { + assert(buf->nsyms == buf->symsize); + buf->symsize = buf->nsyms * 5 / 4 + 16384; + buf->syms = sresize(buf->syms, buf->symsize, LZXSym); + } + buf->syms[buf->nsyms].type = type; + buf->syms[buf->nsyms].value = value; + buf->nsyms++; +} + +static void lzx_literal(struct LZ77Context *ctx, unsigned char c) +{ + LZXBuffer *buf = ((LZXInfo *)ctx->userdata)->buf; + lzx_addsym(buf, LST_MAINTREE, c); +} + +static void lzx_match(struct LZ77Context *ctx, int match_offset, int totallen) +{ + LZXInfo *info = (LZXInfo *)ctx->userdata; + LZXBuffer *buf = info->buf; + + /* + * First, this variant of LZX has a maximum match length of 257 + * bytes, so if lz77.c reports a longer match than that, we must + * break it up. + */ + while (totallen > 0) { + int len, length_header, length_footer, len_pos_header; + int formatted_offset, position_slot, position_verbatim_bits; + int position_verbatim_value, position_aligned_offset; + + if (totallen <= LZX_MAXMATCHLEN) { + /* We can emit all of the (remaining) match length in one go. */ + len = totallen; + } else if (totallen >= LZX_MAXMATCHLEN+LZX_MINMATCHLEN) { + /* There's enough match left that we can emit a + * maximum-length chunk and still be assured of being able + * to emit what's left as a viable followup match. */ + len = LZX_MAXMATCHLEN; + } else { + /* The in-between case, where we have _only just_ too long + * a match to emit in one go, so that if we emitted a + * max-size chunk then what's left would be under the min + * size and we couldn't emit it. */ + len = totallen - LZX_MINMATCHLEN; + } + totallen -= len; + + /* + * Now we're outputting a single LZX-level match of length + * 'len'. Break the length up into a 'header' (included in the + * starting LST_MAINTREE symbol) and a 'footer' (tacked on + * afterwards using LST_LENTREE). + */ + if (len < 9) { + length_header = len - 2; /* in the range {0,...,6} */ + length_footer = -1; /* not transmitted at all */ + } else { + length_header = 7; /* header indicates more to come */ + length_footer = len - 9; /* in the range {0,...,248} */ + } + + /* + * Meanwhile, the raw backward distance is first transformed + * into the 'formatted offset', by either adding 2 or using + * one of the low-numbered special codes meaning to use one of + * the three most recent match distances. + */ + if (match_offset == info->r0) { + /* Reuse the most recent distance */ + formatted_offset = 0; + } else if (match_offset == info->r1) { + /* Reuse the 2nd most recent, and swap it into first place */ + int tmp = info->r1; + info->r1 = info->r0; + info->r0 = tmp; + formatted_offset = 1; + } else if (match_offset == info->r2) { + /* Reuse the 3rd most recent and swap it to first place. + * This is intentionally not quite a move-to-front + * shuffle, which would permute (r0,r1,r2)->(r2,r0,r1); MS + * decided that just swapping r0 with r2 was a better + * performance tradeoff. */ + int tmp = info->r2; + info->r2 = info->r0; + info->r0 = tmp; + formatted_offset = 2; + } else { + /* This offset matches none of the three saved values. + * Put it in r0, and move up the rest of the list. */ + info->r2 = info->r1; + info->r1 = info->r0; + info->r0 = match_offset; + formatted_offset = match_offset + 2; + } + + /* + * The formatted offset now breaks up into a 'position slot' + * (encoded as part of the starting symbol) and an offset from + * the smallest position value covered by that slot. The + * system of slots is designed so that every slot's width is a + * power of two and its base value is a multiple of its width, + * so we can get the offset just by taking the bottom n bits + * of the full formatted offset, once the choice of position + * slot tells us what n is. + */ + position_slot = lzx_compute_position_slot( + formatted_offset, &position_verbatim_bits); + position_verbatim_value = formatted_offset & + ((1 << position_verbatim_bits)-1); + + /* + * If there are three or more additional bits, then the last 3 + * of them are (potentially, depending on block type which we + * haven't decided about yet) transmitted using the aligned + * offset tree. The rest are sent verbatim. + */ + if (position_verbatim_bits >= 3) { + position_aligned_offset = position_verbatim_value & 7; + position_verbatim_bits -= 3; + position_verbatim_value >>= 3; + } else { + position_aligned_offset = -1; /* not transmitted */ + } + + /* + * Combine the length header and position slot into the full + * set of information encoded by the starting symbol. + */ + len_pos_header = position_slot * 8 + length_header; + + /* + * And now we've finished figuring out _what_ to output, so + * output it. + */ + lzx_addsym(buf, LST_MAINTREE, 256 + len_pos_header); + if (length_footer >= 0) + lzx_addsym(buf, LST_LENTREE, length_footer); + if (position_verbatim_bits > 0) + lzx_addsym(buf, LST_RAWBITS_BASE + position_verbatim_bits, + position_verbatim_value); + if (position_aligned_offset >= 0) + lzx_addsym(buf, LST_ALIGNOFFTREE, position_aligned_offset); + } +} + +void lzx_lz77_inner(LZXInfo *info, const unsigned char *data, int len) +{ + struct LZ77Context lz77c; + lz77_init(&lz77c, OUR_LZX_WINSIZE); + lz77c.literal = lzx_literal; + lz77c.match = lzx_match; + lz77c.userdata = info; + lz77_compress(&lz77c, data, len, TRUE); + lz77_cleanup(&lz77c); +} + +void lzx_lz77(LZXBuffer *buf, const unsigned char *data, + int totallen, int realign_interval) +{ + LZXInfo info; + + info.r0 = info.r1 = info.r2 = 1; + info.buf = buf; + + while (totallen > 0) { + int thislen = + totallen < realign_interval ? totallen : realign_interval; + lzx_lz77_inner(&info, data, thislen); + data += thislen; + totallen -= thislen; + if (totallen > 0) + lzx_addsym(info.buf, LST_REALIGN_BITSTREAM, 0); + } +} + +typedef struct LZXHuf { + int nsyms; + unsigned char *lengths; + unsigned char *oldlengths; /* for pretree encoding to diff against */ + int *codes; +} LZXHuf; + +typedef struct LZXHufs { + LZXHuf hufs[LST_NTREES]; +} LZXHufs; + +void lzx_build_tree(LZXSym *syms, int nsyms, LZXSymType which, LZXHufs *hufs) +{ + int i, max_code_len; + int *freqs; + LZXHuf *huf = &hufs->hufs[which]; + + switch (which) { + default: + assert(0 && "Bad lzx_build_tree tree type"); + case LST_MAINTREE: + /* + * Trees encoded via a pretree have a max code length of 16, + * because that's the limit of what the pretree alphabet can + * represent. + */ + max_code_len = 16; + + /* + * Number of symbols in the main tree is 256 literals, plus 8n + * match header symbols where n is the largest position slot + * number that might be needed to address any offset in the + * window. + */ + { + int ignored, last_slot; + last_slot = lzx_compute_position_slot(OUR_LZX_WINSIZE-1, &ignored); + huf->nsyms = 8 * (last_slot+1) + 256; + } + break; + case LST_LENTREE: + max_code_len = 16; /* pretree again */ + huf->nsyms = 249; /* a fixed value in the spec */ + break; + case LST_MAINTREE_PRETREE_1: + case LST_MAINTREE_PRETREE_2: + case LST_LENTREE_PRETREE: + /* Pretree code lengths are stored in 4-bit fields, so they + * can't go above 15. There are a standard 20 symbols in the + * pretree alphabet. */ + max_code_len = 15; + huf->nsyms = 20; + break; + case LST_ALIGNOFFTREE: + /* The aligned-offset tree has 8 elements stored in 3-bit + * fields. */ + max_code_len = 7; + huf->nsyms = 8; + break; + } + + freqs = snewn(huf->nsyms, int); + + /* + * Count up the symbol frequencies. + */ + for (i = 0; i < huf->nsyms; i++) + freqs[i] = 0; + for (i = 0; i < nsyms; i++) + if (syms[i].type == which) + freqs[syms[i].value]++; + + /* + * Build the Huffman table. + */ + huf->lengths = snewn(huf->nsyms, unsigned char); + build_huffman_tree(freqs, huf->lengths, huf->nsyms, max_code_len); + huf->codes = snewn(huf->nsyms, int); + compute_huffman_codes(huf->lengths, huf->codes, huf->nsyms); + + /* + * Cleanup. + */ + sfree(freqs); +} + +void lzx_tree_with_pretree(LZXHuf *huf, int symoffset, int symlimit, + LZXBuffer *buf, LZXSymType pretree_symtype) +{ + int i, r; + + if (!huf->oldlengths) { + huf->oldlengths = snewn(huf->nsyms, unsigned char); + for (i = 0; i < huf->nsyms; i++) + huf->oldlengths[i] = 0; + } + + for (i = symoffset; i < symlimit; i++) { + for (r = 1; i+r < symlimit; r++) + if (huf->lengths[i+r] != huf->lengths[i]) + break; + + if (r >= 4) { + /* + * We have at least one run of the same code length long + * enough to use one of the run-length encoding symbols. + */ + while (r >= 4) { + int thisrun; + if (huf->lengths[i] == 0) { + thisrun = r > 20+31 ? 20+31 : r; + if (thisrun >= 20) { + lzx_addsym(buf, pretree_symtype, 18); + lzx_addsym(buf, LST_RAWBITS_BASE + 5, thisrun - 20); + } else { + lzx_addsym(buf, pretree_symtype, 17); + lzx_addsym(buf, LST_RAWBITS_BASE + 4, thisrun - 4); + } + } else { + thisrun = r > 5 ? 5 : r; + lzx_addsym(buf, pretree_symtype, 19); + lzx_addsym(buf, LST_RAWBITS_BASE + 1, thisrun - 4); + lzx_addsym(buf, pretree_symtype, + (huf->oldlengths[i]-huf->lengths[i] + 17) % 17); + } + r -= thisrun; + i += thisrun; + } + + if (r == 0) { + i--; /* compensate for normal loop increment */ + continue; + } + } + + /* + * Otherwise, emit a normal non-encoded symbol. + */ + lzx_addsym(buf, pretree_symtype, + (huf->oldlengths[i]-huf->lengths[i] + 17) % 17); + } +} + +void lzx_tree_simple(LZXHuf *huf, LZXBuffer *buf, int bits) +{ + int i; + for (i = 0; i < huf->nsyms; i++) + lzx_addsym(buf, LST_RAWBITS_BASE + bits, huf->lengths[i]); +} + +typedef struct LZXBitstream { + struct LZXEncodedFile *ef; + size_t data_size, resets_size; + unsigned short bitbuffer; + int nbits; + int first_block; +} LZXBitstream; + +void lzx_write_bits(LZXBitstream *bs, int value, int bits) +{ + while (bs->nbits + bits >= 16) { + int thisbits = 16 - bs->nbits; + bs->bitbuffer = (bs->bitbuffer << thisbits) | + (value >> (bits-thisbits)); + + if (bs->ef->data_len+2 > bs->data_size) { + bs->data_size = bs->ef->data_len * 5 / 4 + 65536; + bs->ef->data = sresize(bs->ef->data, bs->data_size, + unsigned char); + } + bs->ef->data[bs->ef->data_len++] = bs->bitbuffer; + bs->ef->data[bs->ef->data_len++] = bs->bitbuffer >> 8; + + bs->bitbuffer = 0; + bs->nbits = 0; + + bits -= thisbits; + value &= (1<<bits) - 1; + } + + bs->bitbuffer = (bs->bitbuffer << bits) | value; + bs->nbits += bits; +} + +void lzx_realign(LZXBitstream *bs) +{ + lzx_write_bits(bs, 0, 15 & -(unsigned)bs->nbits); +} + +void lzx_write_reset_table_entry(LZXBitstream *bs) +{ + lzx_write_bits(bs, 0, 15 & -(unsigned)bs->nbits); + + if (bs->ef->n_resets >= bs->resets_size) { + bs->resets_size = bs->ef->n_resets * 5 / 4 + 256; + bs->ef->reset_byte_offsets = sresize(bs->ef->reset_byte_offsets, + bs->resets_size, size_t); + } + bs->ef->reset_byte_offsets[bs->ef->n_resets++] = bs->ef->data_len; +} + +void lzx_huf_encode(LZXSym *syms, int nsyms, LZXHufs *hufs, LZXBitstream *bs) +{ + int i; + for (i = 0; i < nsyms; i++) { + LZXSymType type = syms[i].type; + int value = syms[i].value; + + if (type >= LST_RAWBITS_BASE) { + lzx_write_bits(bs, value, type - LST_RAWBITS_BASE); + } else if (type == LST_REALIGN_BITSTREAM) { + /* Realign the bitstream to a 16-bit boundary, and write a + * reset table entry giving the resulting byte offset. */ + lzx_realign(bs); + lzx_write_reset_table_entry(bs); + } else { + lzx_write_bits(bs, hufs->hufs[type].codes[value], + hufs->hufs[type].lengths[value]); + } + } +} + +void lzx_encode_block(LZXSym *syms, int nsyms, int blocksize, + LZXHufs *hufs, LZXBitstream *bs) +{ + LZXBuffer header[8]; + int i, blocktype; + + for (i = 0; i < (int)lenof(header); i++) + lzx_buffer_init(&header[i]); + + /* + * Build the Huffman trees for the main alphabets used in the + * block. + */ + lzx_build_tree(syms, nsyms, LST_MAINTREE, hufs); + lzx_build_tree(syms, nsyms, LST_LENTREE, hufs); + lzx_build_tree(syms, nsyms, LST_ALIGNOFFTREE, hufs); + + /* + * Encode each of those as a sequence of pretree symbols. + */ + lzx_tree_with_pretree(&hufs->hufs[LST_MAINTREE], 0, 256, + &header[3], LST_MAINTREE_PRETREE_1); + lzx_tree_with_pretree(&hufs->hufs[LST_MAINTREE], 256, + hufs->hufs[LST_MAINTREE].nsyms, + &header[5], LST_MAINTREE_PRETREE_2); + lzx_tree_with_pretree(&hufs->hufs[LST_LENTREE], 0, + hufs->hufs[LST_LENTREE].nsyms, + &header[7], LST_LENTREE_PRETREE); + + /* + * Build the pretree for each of those encodings. + */ + lzx_build_tree(header[3].syms, header[3].nsyms, + LST_MAINTREE_PRETREE_1, hufs); + lzx_build_tree(header[5].syms, header[5].nsyms, + LST_MAINTREE_PRETREE_2, hufs); + lzx_build_tree(header[7].syms, header[7].nsyms, + LST_LENTREE_PRETREE, hufs); + + /* + * Decide whether we're keeping the aligned offset tree or not. + */ + { + int with, without; + + with = 3*8; /* cost of transmitting tree */ + without = 0; /* or not */ + + for (i = 0; i < nsyms; i++) + if (syms[i].type == LST_ALIGNOFFTREE) { + with += hufs->hufs[LST_ALIGNOFFTREE].lengths[syms[i].value]; + without += 3; + } + + if (with < without) { + /* Yes, it's a win to use the aligned offset tree. */ + blocktype = 2; + } else { + /* No, we do better by throwing it away. */ + blocktype = 1; + + /* Easiest way to simulate that is to pretend we're still + * using an aligned offset tree in the encoding, but to + * chuck away our code lengths and replace them with the + * fixed-length trivial tree. */ + for (i = 0; i < 8; i++) { + hufs->hufs[LST_ALIGNOFFTREE].lengths[i] = 3; + hufs->hufs[LST_ALIGNOFFTREE].codes[i] = i; + } + } + } + + /* + * Encode all the simply encoded trees (the three pretrees and the + * aligned offset tree). + */ + lzx_tree_simple(&hufs->hufs[LST_MAINTREE_PRETREE_1], &header[2], 4); + lzx_tree_simple(&hufs->hufs[LST_MAINTREE_PRETREE_2], &header[4], 4); + lzx_tree_simple(&hufs->hufs[LST_LENTREE_PRETREE], &header[6], 4); + if (blocktype == 2) + lzx_tree_simple(&hufs->hufs[LST_ALIGNOFFTREE], &header[1], 3); + + /* + * Top-level block header. + */ + if (bs->first_block) { + /* + * Also include the whole-file header which says whether E8 + * call translation is on. We never turn it on, because we + * don't support it (since in this use case it doesn't seem + * likely to be particularly useful anyway). + * + * It looks like a layer violation to put the output of this + * whole-file header inside the per-block function like this, + * but in fact it has to be done here because the first reset + * table entry really is supposed to point to the _start_ of + * the whole-file header. + */ + lzx_addsym(&header[0], LST_RAWBITS_BASE + 1, 0); + bs->first_block = FALSE; + } + lzx_addsym(&header[0], LST_RAWBITS_BASE + 3, blocktype); + lzx_addsym(&header[0], LST_RAWBITS_BASE + 24, blocksize); + + /* + * Ensure the bit stream starts off aligned, and output an initial + * reset-table entry. + */ + lzx_realign(bs); + lzx_write_reset_table_entry(bs); + + /* + * Write out all of our symbol sequences in order: all of those + * assorted header fragments, then the main LZ77 token sequence. + */ + for (i = 0; i < (int)lenof(header); i++) + lzx_huf_encode(header[i].syms, header[i].nsyms, hufs, bs); + lzx_huf_encode(syms, nsyms, hufs, bs); + + /* + * Clean up. + */ + for (i = 0; i < (int)lenof(header); i++) + sfree(header[i].syms); + for (i = 0; i < (int)lenof(hufs->hufs); i++) { + sfree(hufs->hufs[i].codes); + sfree(hufs->hufs[i].lengths); + } +} + +struct LZXEncodedFile *lzx(const void *vdata, int totallen, + int realign_interval, int reset_interval) +{ + const unsigned char *data = (const unsigned char *)vdata; + LZXBitstream bs; + LZXHufs hufs; + int i; + + bs.ef = snew(struct LZXEncodedFile); + bs.ef->data = NULL; + bs.ef->reset_byte_offsets = NULL; + bs.ef->data_len = bs.data_size = 0; + bs.ef->n_resets = bs.resets_size = 0; + bs.bitbuffer = 0; + bs.nbits = 0; + + for (i = 0; i < (int)lenof(hufs.hufs); i++) + hufs.hufs[i].oldlengths = NULL; + + while (totallen > 0) { + int thislen = + totallen < reset_interval ? totallen : reset_interval; + LZXBuffer buf; + + lzx_buffer_init(&buf); + + lzx_lz77(&buf, data, thislen, realign_interval); + data += thislen; + totallen -= thislen; + + /* + * Block boundaries are chosen completely trivially: since we + * have to terminate a block every time we reach the (fairly + * short) reset interval in any case, it doesn't hurt us much + * to just fix the assumption that every (reset_interval) + * bytes of the input turn into exactly one block, i.e. the + * whole of buf.syms that we just constructed is output in one + * go. We _could_ try improving on this by clever + * block-boundary heuristics, but I don't really think it's + * worth it. + */ + bs.first_block = TRUE; /* reset every time we reset the LZ state */ + lzx_encode_block(buf.syms, buf.nsyms, thislen, &hufs, &bs); + + sfree(buf.syms); + } + + for (i = 0; i < (int)lenof(hufs.hufs); i++) + sfree(hufs.hufs[i].oldlengths); + + /* Realign to a 16-bit boundary, i.e. flush out any last few + * unwritten bits. */ + lzx_realign(&bs); + + return bs.ef; +} + +#ifdef LZX_TEST +/* +gcc -g -O0 -DLZX_TEST -o lzxtest -Icharset lzx.c lz77.c huffman.c malloc.c +*/ +#include <err.h> +int main(int argc, char **argv) +{ + FILE *fp; + long insize; + unsigned char *inbuf; + struct LZXEncodedFile *ef; + + if (argc != 3) + errx(1, "expected infile and outfile arguments"); + + fp = fopen(argv[1], "rb"); + if (!fp) + err(1, "%s: open", argv[1]); + fseek(fp, 0, SEEK_END); + insize = ftell(fp); + rewind(fp); + inbuf = snewn(insize, unsigned char); + fread(inbuf, 1, insize, fp); + fclose(fp); + + ef = lzx(inbuf, insize, 0x8000, 0x10000); + + fp = fopen(argv[2], "wb"); + if (!fp) + err(1, "%s: open", argv[2]); + fwrite(ef->data, 1, ef->data_len, fp); + fclose(fp); + + sfree(ef->data); + sfree(ef->reset_byte_offsets); + sfree(ef); + sfree(inbuf); + + return 0; +} + +wchar_t *ustrdup(wchar_t const *s) { assert(0 && "should be unused"); } +void fatalerr_nomemory(void) { errx(1, "out of memory"); } +#endif @@ -0,0 +1,24 @@ +/* + * lzx.h: LZX encoder for Windows CHM files. + */ + +struct LZXEncodedFile { + unsigned char *data; + size_t data_len; + + size_t *reset_byte_offsets; + size_t n_resets; +}; + +/* + * Produce an LZX-compressed encoding of an input data block. Return + * it, along with a list of byte offsets where the data stream is + * realigned to a 16-bit boundary because one of realign_interval and + * reset_interval has run out. + * + * The output structure and its fields 'data' and 'reset_byte_offsets' + * are all dynamically allocated, and need freeing by the receiver + * when finished with. + */ +struct LZXEncodedFile *lzx(const void *data, int len, + int realign_interval, int reset_interval); @@ -35,6 +35,7 @@ static const struct backend { {"info", info_backend, info_config_filename, 0x0010, 0}, {"ps", ps_backend, ps_config_filename, 0x0020, 0x0001}, {"pdf", pdf_backend, pdf_config_filename, 0x0040, 0x0001}, + {"chm", chm_backend, chm_config_filename, 0x0080, 0}, }; int main(int argc, char **argv) { diff --git a/winchm.c b/winchm.c new file mode 100644 index 0000000..cb21715 --- /dev/null +++ b/winchm.c @@ -0,0 +1,1436 @@ +/* + * winchm.c: direct output of .CHM files. + */ + +#include <assert.h> +#include <stdio.h> + +#include "halibut.h" +#include "tree234.h" +#include "lzx.h" + +#define PUT_32BIT_LSB_FIRST(cp, value) do { \ + ((unsigned char *)cp)[0] = 0xFF & (value); \ + ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); \ + ((unsigned char *)cp)[2] = 0xFF & ((value) >> 16); \ + ((unsigned char *)cp)[3] = 0xFF & ((value) >> 24); } while (0) + +#define PUT_32BIT_MSB_FIRST(cp, value) do { \ + ((unsigned char *)cp)[3] = 0xFF & (value); \ + ((unsigned char *)cp)[2] = 0xFF & ((value) >> 8); \ + ((unsigned char *)cp)[1] = 0xFF & ((value) >> 16); \ + ((unsigned char *)cp)[0] = 0xFF & ((value) >> 24); } while (0) + +#define PUT_16BIT_LSB_FIRST(cp, value) do { \ + ((unsigned char *)cp)[0] = 0xFF & (value); \ + ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); } while (0) + +#define RDADD_32BIT_LSB_FIRST(rs, value) do { \ + unsigned char out[4]; \ + PUT_32BIT_LSB_FIRST(out, value); \ + rdaddsn(rs, (void *)out, sizeof(out)); \ + } while (0) + +#define RDADD_32BIT_MSB_FIRST(rs, value) do { \ + unsigned char out[4]; \ + PUT_32BIT_MSB_FIRST(out, value); \ + rdaddsn(rs, (void *)out, sizeof(out)); \ + } while (0) + +#define RDADD_16BIT_LSB_FIRST(rs, value) do { \ + unsigned char out[2]; \ + PUT_16BIT_LSB_FIRST(out, value); \ + rdaddsn(rs, (void *)out, sizeof(out)); \ + } while (0) + +static void guid(rdstringc *rs, unsigned long w0, + unsigned short h0, unsigned short h1, + unsigned char b0, unsigned char b1, + unsigned char b2, unsigned char b3, + unsigned char b4, unsigned char b5, + unsigned char b6, unsigned char b7) +{ + RDADD_32BIT_LSB_FIRST(rs, w0); + RDADD_16BIT_LSB_FIRST(rs, h0); + RDADD_16BIT_LSB_FIRST(rs, h1); + rdaddc(rs, b0); + rdaddc(rs, b1); + rdaddc(rs, b2); + rdaddc(rs, b3); + rdaddc(rs, b4); + rdaddc(rs, b5); + rdaddc(rs, b6); + rdaddc(rs, b7); +} + +static void itsf(rdstringc *rs, + const rdstringc *directory, const rdstringc *content0) +{ + int headersize_field; + int headersect_off, headersect_off_field, headersect_size_field; + int directory_off_field, content0_off_field, filesize_field; + + /* Main file header */ + rdaddsc(rs, "ITSF"); /* main file magic number */ + RDADD_32BIT_LSB_FIRST(rs, 3); /* file format version */ + headersize_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* size of main header; fill in later */ + RDADD_32BIT_LSB_FIRST(rs, 1); /* unknown, always observed to be 1 */ + RDADD_32BIT_MSB_FIRST(rs, 0x12345678); /* timestamp (FIXME) */ + RDADD_32BIT_LSB_FIRST(rs, 0x809); /* language code (FIXME: configurable) */ + guid(rs,0x7C01FD10,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC); + guid(rs,0x7C01FD11,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC); + headersect_off_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* header section offset; fill in later */ + RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */ + headersect_size_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* header section size; fill in later */ + RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */ + directory_off_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* directory offset; fill in later */ + RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */ + RDADD_32BIT_LSB_FIRST(rs, directory->pos); + RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */ + content0_off_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* content section 0 offset; fill in later */ + RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */ + PUT_32BIT_LSB_FIRST(rs->text + headersize_field, rs->pos); + + /* 'Header section' */ + headersect_off = rs->pos; + PUT_32BIT_LSB_FIRST(rs->text + headersect_off_field, rs->pos); + RDADD_32BIT_LSB_FIRST(rs, 0x1FE); /* magic number */ + RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */ + filesize_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* file size; fill in later */ + RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */ + RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */ + RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */ + PUT_32BIT_LSB_FIRST(rs->text + headersect_size_field, + rs->pos - headersect_off); + + PUT_32BIT_LSB_FIRST(rs->text + directory_off_field, rs->pos); + rdaddsn(rs, directory->text, directory->pos); + + PUT_32BIT_LSB_FIRST(rs->text + content0_off_field, rs->pos); + rdaddsn(rs, content0->text, content0->pos); + + PUT_32BIT_LSB_FIRST(rs->text + filesize_field, rs->pos); +} + +static void encint(rdstringc *rs, unsigned val) +{ + int i, j, topbit; + + /* ENCINT in the CHM format is big-endian, but it's easier to + * write little-endian and byte-reverse afterwards. */ + + i = rs->pos; /* first byte index */ + + topbit = 0; + while (val >= 0x80) { + rdaddc(rs, (val & 0x7F) | topbit); + val >>= 7; + topbit = 0x80; + } + + j = rs->pos; /* last byte index */ + rdaddc(rs, val | topbit); + + while (j > i) { + char tmp = rs->text[i]; + rs->text[i] = rs->text[j]; + rs->text[j] = tmp; + i++; + j--; + } +} + +struct chm_directory_entry { + char *filename; /* free this when done */ + int which_content_section; + int offset_in_content_section; + int file_size; +}; + +static int strcmp_chm(const char *a, const char *b) +{ + /* + * CHM directory sorting criterion appears to be case-insensitive, + * and based on sorting the _lowercased_ text. (Hence, in + * particular, '_' sorts before any alphabetic character.) + */ + while (*a || *b) { + char ac = *a, bc = *b; + if (ac >= 'A' && ac <= 'Z') ac += 'a'-'A'; + if (bc >= 'A' && bc <= 'Z') bc += 'a'-'A'; + if (ac != bc) + return ac < bc ? -1 : +1; + a++; + b++; + } + + return 0; +} + +int chm_directory_entry_cmp(void *av, void *bv) +{ + const struct chm_directory_entry + *a = (const struct chm_directory_entry *)av, + *b = (const struct chm_directory_entry *)bv; + return strcmp_chm(a->filename, b->filename); +} + +int chm_directory_entry_find(void *av, void *bv) +{ + const char *a = (const char *)av; + const struct chm_directory_entry + *b = (const struct chm_directory_entry *)bv; + return strcmp_chm(a, b->filename); +} + +struct chm_index_entry { + char *first_filename; /* shared pointer with some chm_directory_entry */ + int chunk_index; +}; + +static void directory(rdstringc *rs, tree234 *files) +{ + const int chunksize = 4096; + const int encoded_density = 2; + const int useful_density = 1 + (1 << encoded_density); + int dirhdr_size_field, dirhdr_size2_field, dirhdr_depth_field; + int dirhdr_root_field, dirhdr_tail_field, dirhdr_nchunks_field; + int curr_chunk, depth, filename_index; + tree234 *index; + + assert(rs->pos == 0); + assert(count234(files) > 0); + + /* Directory header */ + rdaddsc(rs, "ITSP"); /* directory header magic number */ + RDADD_32BIT_LSB_FIRST(rs, 1); /* format version */ + dirhdr_size_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */ + RDADD_32BIT_LSB_FIRST(rs, 10); /* unknown; observed to be 10 */ + RDADD_32BIT_LSB_FIRST(rs, chunksize); + RDADD_32BIT_LSB_FIRST(rs, encoded_density); + dirhdr_depth_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* B-tree depth; fill in later */ + dirhdr_root_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* root chunk index; fill in later */ + RDADD_32BIT_LSB_FIRST(rs, 0); /* head of PMGL chunk list; always 0 here */ + dirhdr_tail_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* tail of PMGL chunk list; fill in later */ + RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */ + dirhdr_nchunks_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* total number of chunks; fill in later */ + RDADD_32BIT_LSB_FIRST(rs, 0x409); /* language (FIXME) */ + guid(rs,0x5D02926A,0x212E,0x11D0,0x9D,0xF9,0x00,0xA0,0xC9,0x22,0xE6,0xEC); + dirhdr_size2_field = rs->pos; + RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */ + RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */ + RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */ + RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */ + PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size_field, rs->pos); + PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size2_field, rs->pos); + + index = newtree234(NULL); + curr_chunk = 0; + depth = 1; + /* Write out lowest-level PMGL chunks full of actual directory entries */ + filename_index = 0; + while (filename_index < count234(files)) { + rdstringc chunk = {0, 0, NULL}; + rdstringc reversed_quickref = {0, 0, NULL}; + int chunk_endlen_field, chunk_nextptr_field; + int n_entries, offset_of_first_entry; + int saved_pos, saved_rq_pos, i; + + rdaddsc(&chunk, "PMGL"); + chunk_endlen_field = chunk.pos; + RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */ + RDADD_32BIT_LSB_FIRST(&chunk, 0); /* unknown; observed to be 0 */ + if (curr_chunk == 0) { + RDADD_32BIT_LSB_FIRST(&chunk, 0xFFFFFFFF); /* 'null' prev ptr */ + } else { + RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk - 1); + } + chunk_nextptr_field = chunk.pos; /* may overwrite 'next' ptr later */ + RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk + 1); + + /* Enter this chunk in our index for the next level of the + * B-tree (if we end up needing one). */ + { + struct chm_directory_entry *ent = (struct chm_directory_entry *) + index234(files, filename_index); + struct chm_index_entry *ient = snew(struct chm_index_entry); + assert(ent); + ient->first_filename = ent->filename; + ient->chunk_index = curr_chunk; + addpos234(index, ient, count234(index)); + } + + /* Start accumulating the quick-reference index at the end of this + * chunk. We'll build it up backwards, and reverse it halfwordwise + * when we copy it into the end of our output chunk. */ + RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0); + offset_of_first_entry = chunk.pos; + + n_entries = 0; + /* Write filenames into this chunk until it's full, or until + * we run out of filenames. */ + while (1) { + struct chm_directory_entry *ent = (struct chm_directory_entry *) + index234(files, filename_index++); + if (!ent) { + /* Run out of filenames, so this is the last PMGL chunk. + * Reset its 'next' pointer to the 'null' -1 value. */ + PUT_32BIT_LSB_FIRST(chunk.text + chunk_nextptr_field, + 0xFFFFFFFFU); + /* And point the directory header's tail pointer at + * this chunk. */ + PUT_32BIT_LSB_FIRST(rs->text + dirhdr_tail_field, curr_chunk); + break; + } + + /* Save the sizes of stuff in this chunk, so we can put + * them back if this entry turns out to overflow. */ + saved_pos = chunk.pos; + saved_rq_pos = reversed_quickref.pos; + + if (n_entries > 0 && n_entries % useful_density == 0) { + /* Add a quick-reference index pointer. */ + RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos - + offset_of_first_entry); + } + + encint(&chunk, strlen(ent->filename)); + rdaddsc(&chunk, ent->filename); + encint(&chunk, ent->which_content_section); + encint(&chunk, ent->offset_in_content_section); + encint(&chunk, ent->file_size); + if (chunk.pos + reversed_quickref.pos > chunksize) { + filename_index--; + chunk.pos = saved_pos; + reversed_quickref.pos = saved_rq_pos; + break; + } + + /* If we didn't overflow, then commit to this entry and + * loop round for the next one. */ + n_entries++; + } + + /* Finalise the chunk. */ + assert(chunk.pos + reversed_quickref.pos <= chunksize); + PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field, + chunksize - chunk.pos); + PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries); + while (chunk.pos + reversed_quickref.pos < chunksize) + rdaddc(&chunk, 0); /* zero-pad */ + for (i = reversed_quickref.pos - 2; i >= 0; i -= 2) + rdaddsn(&chunk, reversed_quickref.text+i, 2); + + assert(chunk.pos == chunksize); + rdaddsn(rs, chunk.text, chunk.pos); + sfree(chunk.text); + sfree(reversed_quickref.text); + curr_chunk++; + } + + /* Write out as many layers of PMGI index chunks as it takes to + * reduce the total number of chunks at the current level to 1. */ + while (count234(index) > 1) { + tree234 *prev_index; + int index_index = 0; + + prev_index = index; + index = newtree234(NULL); + depth++; + + while (index_index < count234(prev_index)) { + rdstringc chunk = {0, 0, NULL}; + rdstringc reversed_quickref = {0, 0, NULL}; + int chunk_endlen_field; + int n_entries, offset_of_first_entry; + int saved_pos, saved_rq_pos, i; + + rdaddsc(&chunk, "PMGI"); + chunk_endlen_field = chunk.pos; + RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */ + + /* Enter this chunk in our index for the next level of the + * B-tree (if we end up needing one). */ + { + struct chm_index_entry *ent = (struct chm_index_entry *) + index234(prev_index, index_index); + struct chm_index_entry *ient = snew(struct chm_index_entry); + assert(ent); + ient->first_filename = ent->first_filename; + ient->chunk_index = curr_chunk; + addpos234(index, ient, count234(index)); + } + + /* Start accumulating the quick-reference index at the end + * of this chunk, as above. */ + RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0); + offset_of_first_entry = chunk.pos; + + n_entries = 0; + /* Write index entries into this chunk until it's full, or + * until we run out of chunks at the previous level. */ + while (1) { + struct chm_index_entry *ent = (struct chm_index_entry *) + index234(prev_index, index_index++); + if (!ent) + break; + + /* Save the sizes of stuff in this chunk, so we can put + * them back if this entry turns out to overflow. */ + saved_pos = chunk.pos; + saved_rq_pos = reversed_quickref.pos; + + if (n_entries > 0 && n_entries % useful_density == 0) { + /* Add a quick-reference index pointer. */ + RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos - + offset_of_first_entry); + } + + encint(&chunk, strlen(ent->first_filename)); + rdaddsc(&chunk, ent->first_filename); + encint(&chunk, ent->chunk_index); + if (chunk.pos + reversed_quickref.pos > chunksize) { + index_index--; + chunk.pos = saved_pos; + reversed_quickref.pos = saved_rq_pos; + break; + } + + /* If we didn't overflow, then commit to this entry and + * loop round for the next one. */ + n_entries++; + } + + /* Finalise the chunk. */ + assert(chunk.pos + reversed_quickref.pos <= chunksize); + PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field, + chunksize - chunk.pos); + PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries); + while (chunk.pos + reversed_quickref.pos < chunksize) + rdaddc(&chunk, 0); /* zero-pad */ + for (i = reversed_quickref.pos - 2; i >= 0; i -= 2) + rdaddsn(&chunk, reversed_quickref.text+i, 2); + + assert(chunk.pos == chunksize); + rdaddsn(rs, chunk.text, chunk.pos); + sfree(chunk.text); + sfree(reversed_quickref.text); + curr_chunk++; + } + + /* + * Now free the old index. + */ + while (1) { + struct chm_index_entry *ent = (struct chm_index_entry *) + delpos234(prev_index, 0); + if (!ent) + break; + sfree(ent); + } + freetree234(prev_index); + } + + /* + * Finished! We've reduced to a single chunk. Free the remaining + * index (which must have size 1). + */ + assert(count234(index) == 1); + sfree(delpos234(index, 0)); + freetree234(index); + + /* Fill in the deferred fields in the main header. */ + PUT_32BIT_LSB_FIRST(rs->text + dirhdr_depth_field, depth); + PUT_32BIT_LSB_FIRST(rs->text + dirhdr_root_field, curr_chunk-1); + PUT_32BIT_LSB_FIRST(rs->text + dirhdr_nchunks_field, curr_chunk); +} + +static int sys_start(rdstringc *rs, int code) +{ + int toret = rs->pos; + RDADD_16BIT_LSB_FIRST(rs, code); + RDADD_16BIT_LSB_FIRST(rs, 0); /* length; overwrite later */ + return toret; +} +static void sys_end(rdstringc *rs, int recstart) +{ + PUT_16BIT_LSB_FIRST(rs->text + recstart+2, rs->pos - (recstart+4)); +} + +struct chm_window { + char *name; + char *title; + char *contentsfile; + char *indexfile; + char *rootfile; + int navpaneflags; + int toolbarflags; +}; + +struct chm { + tree234 *files; + tree234 *windows; + tree234 *stringtab; + rdstringc content0; /* outer uncompressed container */ + rdstringc content1; /* compressed subfile */ + rdstringc outfile; + rdstringc stringsfile; + char *title, *contents_filename, *index_filename, *default_topic; + char *default_window; + struct chm_section *rootsecthead, *rootsecttail; + struct chm_section *allsecthead, *allsecttail; +}; + +struct chm_section { + /* Logical links within the section tree structure */ + struct chm_section *firstchild, *lastchild, *nextsibling, *parent; + /* Link all chm_sections together into one big list, in a + * topological order (i.e. every section comes after its + * parent) */ + struct chm_section *next; + + char *title, *url; + int tocidx_offset_1, tocidx_offset_2; + int topic_index, urltbl_offset, urlstr_offset; +}; + +struct chm_stringtab_entry { + struct chm *chm; + int strtab_offset; +}; + +static int chm_stringtab_cmp(void *av, void *bv) +{ + const struct chm_stringtab_entry + *a = (const struct chm_stringtab_entry *)av, + *b = (const struct chm_stringtab_entry *)bv; + return strcmp(a->chm->stringsfile.text + a->strtab_offset, + b->chm->stringsfile.text + b->strtab_offset); +} + +static int chm_stringtab_find(void *av, void *bv) +{ + const char *a = (const char *)av; + const struct chm_stringtab_entry + *b = (const struct chm_stringtab_entry *)bv; + return strcmp(a, b->chm->stringsfile.text + b->strtab_offset); +} + +int chm_intern_string(struct chm *chm, const char *string) +{ + struct chm_stringtab_entry *ent; + int size; + + if (!string) + return 0; + + if ((ent = (struct chm_stringtab_entry *)find234( + chm->stringtab, (void *)string, chm_stringtab_find)) == NULL) { + ent = snew(struct chm_stringtab_entry); + ent->chm = chm; + + /* Pad to ensure the string doesn't cross a page boundary. */ + size = strlen(string) + 1; /* include the NUL terminator */ + assert(size < 0x1000); /* avoid really serious trouble */ + while ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12) + rdaddc(&chm->stringsfile, 0); + + ent->strtab_offset = chm->stringsfile.pos; + rdaddsc(&chm->stringsfile, string); + rdaddc(&chm->stringsfile, '\0'); + add234(chm->stringtab, ent); + } + return ent->strtab_offset; +} + +struct chm *chm_new(void) +{ + struct chm *chm = snew(struct chm); + chm->files = newtree234(chm_directory_entry_cmp); + chm->windows = newtree234(NULL); + chm->stringtab = newtree234(chm_stringtab_cmp); + chm->content0 = empty_rdstringc; + chm->content1 = empty_rdstringc; + chm->outfile = empty_rdstringc; + chm->stringsfile = empty_rdstringc; + chm->title = NULL; + chm->contents_filename = NULL; + chm->index_filename = NULL; + chm->default_topic = NULL; + chm->default_window = NULL; + chm->rootsecthead = chm->rootsecttail = NULL; + chm->allsecthead = chm->allsecttail = NULL; + chm_intern_string(chm, ""); /* preinitialise the strings table */ + return chm; +} + +void chm_free(struct chm *chm) +{ + struct chm_directory_entry *ent; + struct chm_window *win; + struct chm_stringtab_entry *str; + struct chm_section *sect; + + while ((ent = delpos234(chm->files, 0)) != NULL) { + sfree(ent->filename); + sfree(ent); + } + freetree234(chm->files); + + while ((win = delpos234(chm->windows, 0)) != NULL) { + sfree(win->name); + sfree(win->title); + sfree(win->contentsfile); + sfree(win->indexfile); + sfree(win->rootfile); + sfree(win); + } + freetree234(chm->windows); + + while ((str = delpos234(chm->stringtab, 0)) != NULL) { + sfree(str); + } + freetree234(chm->stringtab); + + for (sect = chm->allsecthead; sect ;) { + struct chm_section *tmp = sect->next; + sfree(sect->title); + sfree(sect->url); + sfree(sect); + sect = tmp; + } + + sfree(chm->content0.text); + sfree(chm->content1.text); + sfree(chm->outfile.text); + sfree(chm->stringsfile.text); + + sfree(chm->title); + sfree(chm->contents_filename); + sfree(chm->index_filename); + sfree(chm->default_topic); + sfree(chm->default_window); + + sfree(chm); +} + +static void chm_add_file_internal(struct chm *chm, const char *name, + const char *data, int len, + rdstringc *sect, int which_sect) +{ + struct chm_directory_entry *ent = snew(struct chm_directory_entry); + ent->filename = dupstr(name); + ent->which_content_section = which_sect; + ent->offset_in_content_section = sect->pos; + ent->file_size = len; + add234(chm->files, ent); + rdaddsn(sect, data, len); +} + +static struct chm_directory_entry *chm_find_file( + struct chm *chm, const char *name) +{ + return find234(chm->files, (void *)name, chm_directory_entry_find); +} + +static char *add_leading_slash(const char *str) +{ + char *toret = snewn(2 + strlen(str), char); + toret[0] = '/'; + strcpy(toret+1, str); + return toret; +} + +void chm_add_file(struct chm *chm, const char *name, const char *data, int len) +{ + char *name_with_slash = add_leading_slash(name); + chm_add_file_internal(chm, name_with_slash, data, len, &chm->content1, 1); + sfree(name_with_slash); +} + +void chm_title(struct chm *chm, const char *title) +{ + chm->title = dupstr(title); +} + +void chm_contents_filename(struct chm *chm, const char *name) +{ + chm->contents_filename = dupstr(name); +} + +void chm_index_filename(struct chm *chm, const char *name) +{ + chm->index_filename = dupstr(name); +} + +void chm_default_topic(struct chm *chm, const char *name) +{ + chm->default_topic = dupstr(name); +} + +void chm_default_window(struct chm *chm, const char *name) +{ + chm->default_window = dupstr(name); +} + +void chm_add_window(struct chm *chm, const char *winname, const char *title, + const char *contentsfile, const char *indexfile, + const char *rootfile, int navpaneflags, int toolbarflags) +{ + struct chm_window *win = snew(struct chm_window); + win->name = dupstr(winname); + win->title = dupstr(title); + win->contentsfile = contentsfile ? dupstr(contentsfile) : NULL; + win->indexfile = indexfile ? dupstr(indexfile) : NULL; + win->rootfile = dupstr(rootfile); + win->navpaneflags = navpaneflags; + win->toolbarflags = toolbarflags; + addpos234(chm->windows, win, count234(chm->windows)); +} + +struct chm_section *chm_add_section(struct chm *chm, + struct chm_section *parent, + const char *title, const char *url) +{ + struct chm_section *sect = snew(struct chm_section); + sect->title = dupstr(title); + sect->url = dupstr(url); + sect->firstchild = sect->lastchild = sect->nextsibling = sect->next = NULL; + if (parent) { + sect->parent = parent; + if (parent->lastchild) { + parent->lastchild->nextsibling = sect; + } else { + parent->firstchild = sect; + } + parent->lastchild = sect; + } else { + sect->parent = NULL; + if (chm->rootsecttail) { + chm->rootsecttail->nextsibling = sect; + } else { + chm->rootsecthead = sect; + } + chm->rootsecttail = sect; + } + if (chm->allsecttail) { + chm->allsecttail->next = sect; + } else { + chm->allsecthead = sect; + } + chm->allsecttail = sect; + return sect; +} + +struct chm_urltbl_entry { + /* + * Records of #URLTBL, before their order is finalised. + * + * The first word of this record is listed as 'unknown, perhaps + * some kind of unique ID' in chmspec. But my observation in HTML + * Help Workshop's output is that it's actually a hash of the + * target URL, and the file is sorted by them. chm_url_hash() + * below implements the hash algorithm. + */ + unsigned long hash; + int topic_index; + int urlstr_pos; + int topics_offset_to_update; +}; + +int chm_urltbl_entry_cmp(void *av, void *bv) +{ + const struct chm_urltbl_entry + *a = (const struct chm_urltbl_entry *)av, + *b = (const struct chm_urltbl_entry *)bv; + if (a->hash < b->hash) return -1; + if (a->hash > b->hash) return +1; + if (a->topic_index < b->topic_index) return -1; + if (a->topic_index > b->topic_index) return -1; + return 0; +} + +static unsigned long chm_url_hash(const char *str) +{ + const char *p; + unsigned long hash; + + hash = 0; + for (p = str; *p; p++) { + /* + * Multiply `hash' by 43. + */ + { + unsigned long bottom, top; + bottom = (hash & 0xFFFFUL) * 43; + top = ((hash >> 16) & 0xFFFFUL) * 43; + top += (bottom >> 16); + bottom &= 0xFFFFUL; + top &= 0xFFFFUL; + hash = (top << 16) | bottom; + } + + /* + * Add the mapping value for this byte to `hash'. + */ + { + int c = (signed char)*p; + + /* + * Translation rule determined by getting hhc.exe to hash + * a lot of strings and analysing the results. I was able + * to confirm this mapping rule for all byte values except + * for NUL, CR, LF, ^Z and backslash: the first four of + * those I couldn't find any way to get hhc to insert into + * a URL, and the last one is automatically translated + * into '/', presumably for reasons of Windows vs URI path + * syntax normalisation. + */ + int val = (c == '/' ? 0x2c : c <= 'Z' ? c-0x30 : c-0x50); + + if (val > 0 && hash > (0xFFFFFFFFUL - val)) { + hash -= (0xFFFFFFFFUL - val) + 1; + } else if (val < 0 && hash < (unsigned long)-val) { + hash += (0xFFFFFFFFUL + val) + 1; + } else + hash += val; + } + } + + /* + * Special case: an output hash of 0 is turned into 1, which I + * conjecture is so that in some context or other 0 can be + * reserved to mean something like 'null' or 'no hash value + * available'. + */ + if (hash == 0) + hash = 1; + + return hash; +} + +const char *chm_build(struct chm *chm, int *outlen) +{ + rdstringc dir = {0, 0, NULL}; + rdstringc sysfile = {0, 0, NULL}; + struct LZXEncodedFile *ef; + int rec; + + chm_add_file_internal(chm, "/", "", 0, &chm->content0, 0); + + RDADD_32BIT_LSB_FIRST(&sysfile, 3); /* #SYSTEM file version */ + + rec = sys_start(&sysfile, 9); /* identify CHM-producing tool */ + rdaddsc(&sysfile, "Halibut, "); + rdaddsc(&sysfile, version); + rdaddc(&sysfile, '\0'); + sys_end(&sysfile, rec); + + rec = sys_start(&sysfile, 12); /* number of 'information types' */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); + sys_end(&sysfile, rec); + rec = sys_start(&sysfile, 15); /* checksum of 'information types' */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); + sys_end(&sysfile, rec); + /* actual section of 'information types', whatever those might be */ + chm_add_file_internal(chm, "/#ITBITS", "", 0, &chm->content0, 0); + + if (chm->title) { + rec = sys_start(&sysfile, 3); /* document title */ + rdaddsc(&sysfile, chm->title); + rdaddc(&sysfile, '\0'); + sys_end(&sysfile, rec); + } + + if (chm->default_topic) { + rec = sys_start(&sysfile, 2); + rdaddsc(&sysfile, chm->default_topic); + rdaddc(&sysfile, '\0'); + sys_end(&sysfile, rec); + } + + if (chm->contents_filename) { + rec = sys_start(&sysfile, 0); + rdaddsc(&sysfile, chm->contents_filename); + rdaddc(&sysfile, '\0'); + sys_end(&sysfile, rec); + } + + if (chm->index_filename) { + rec = sys_start(&sysfile, 1); + rdaddsc(&sysfile, chm->index_filename); + rdaddc(&sysfile, '\0'); + sys_end(&sysfile, rec); + } + + if (chm->default_window) { + rec = sys_start(&sysfile, 5); + rdaddsc(&sysfile, chm->default_window); + rdaddc(&sysfile, '\0'); + sys_end(&sysfile, rec); + } + + rec = sys_start(&sysfile, 4); + RDADD_32BIT_LSB_FIRST(&sysfile, 0x809); /* language again (FIXME) */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* DBCS: off */ + RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* full-text search: on */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no KLinks (whatever they are) */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no ALinks (whatever they are) */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0x11223344); /* timestamp LSW (FIXME) */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0x55667788); /* timestamp MSW (FIXME) */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */ + sys_end(&sysfile, rec); + + { + rdstringc winfile = {0, 0, NULL}; + int i, j, s; + struct chm_window *win; + + RDADD_32BIT_LSB_FIRST(&winfile, count234(chm->windows)); + RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of each entry */ + for (i = 0; + (win = (struct chm_window *)index234(chm->windows, i)) != NULL; + i++) { + RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of entry */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* not Unicode */ + s = chm_intern_string(chm, win->name); + RDADD_32BIT_LSB_FIRST(&winfile, s); + /* Bitmap of which fields are used: 2 means nav pane + * style, 0x200 means whether nav pane is initially + * closed, 0x400 means tab position */ + RDADD_32BIT_LSB_FIRST(&winfile, 0x502); + /* Nav pane styles: + * 0x40000 = user can control window size/pos + * 0x20000 = advanced full-text search UI + * 0x00400 = include a search tab + * 0x00100 = keep contents/index in sync with current topic + * 0x00020 = three-pane window */ + RDADD_32BIT_LSB_FIRST(&winfile, win->navpaneflags); + s = chm_intern_string(chm, win->title); + RDADD_32BIT_LSB_FIRST(&winfile, s); + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window styles */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window ex styles */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.left */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.top */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.right */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.bottom */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window show state */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane width */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.left */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.top */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.right */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.bottom */ + s = chm_intern_string(chm, win->contentsfile); + RDADD_32BIT_LSB_FIRST(&winfile, s); + s = chm_intern_string(chm, win->indexfile); + RDADD_32BIT_LSB_FIRST(&winfile, s); + s = chm_intern_string(chm, win->rootfile); + RDADD_32BIT_LSB_FIRST(&winfile, s); + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Home button target */ + RDADD_32BIT_LSB_FIRST(&winfile, win->toolbarflags); + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane initially open */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* default nav pane = TOC */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane tabs at top */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* WM_NOTIFY id */ + for (j = 0; j < 20; j++) + rdaddc(&winfile, 0); /* tab order block */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* history to keep */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button target */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button target */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button text */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button text */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.left */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.top */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.right */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.bottom */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no information types */ + RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no custom tabs */ + } + assert(winfile.pos == 8 + 196 * count234(chm->windows)); + chm_add_file_internal(chm, "/#WINDOWS", winfile.text, winfile.pos, + &chm->content1, 1); + sfree(winfile.text); + } + + { + struct chm_section *sect; + rdstringc tocidx = {0, 0, NULL}; + rdstringc topics = {0, 0, NULL}; + rdstringc urltbl = {0, 0, NULL}; + rdstringc urlstr = {0, 0, NULL}; + int i, index, s, n_tocidx_3; + struct chm_directory_entry *contentsfile = NULL, *indexfile = NULL; + tree234 *urltbl_pre; + struct chm_urltbl_entry *urltbl_entry; + + urltbl_pre = newtree234(chm_urltbl_entry_cmp); + + for (i = 0; i < 0x1000; i++) + rdaddc(&tocidx, 0); + + /* Write a header of one zero byte at the start of #URLSTR. + * chmspec says this doesn't always appear, and is unclear on + * what this is for, but I suspect it serves the same purpose + * as the zero byte at the start of #STRINGS, namely that it + * arranges that an absent string in the following records can + * be represented by an offset of zero which will + * automatically point to this byte and hence indicate the + * empty string. */ + rdaddc(&urlstr, 0); + + if (chm->contents_filename) { + char *withslash = add_leading_slash(chm->contents_filename); + contentsfile = chm_find_file(chm, withslash); + sfree(withslash); + assert(contentsfile); + } + if (chm->index_filename) { + char *withslash = add_leading_slash(chm->index_filename); + indexfile = chm_find_file(chm, withslash); + sfree(withslash); + assert(indexfile); + } + + index = 0; + + /* #TOCIDX header field pointing at start of type-1 records */ + PUT_32BIT_LSB_FIRST(tocidx.text + 0, tocidx.pos); + + /* + * First pass over the section structure, generating in + * parallel one of the multiple structure types in #TOCIDX and + * the sole record in all the other files. + */ + for (sect = chm->allsecthead; sect; sect = sect->next) { + /* Size of the first kind of #TOCIDX record varies between + * leaf and internal nodes */ + int tocidx_size_1 = (sect->firstchild ? 0x1c : 0x14); + + /* + * Flags: + * - 8 means there's a local filename, which in _our_ CHM + * files there always is. If you unset this flag, you + * get a node in the contents treeview which doesn't + * open any page when clicked, and exists solely to + * contain children; in that situation the topic index + * field at position 0x08 in this record also stops + * being an index into #TOPICS and instead becomes an + * index into #STRINGS giving the node's title. + * - 4 apparently means the node should have the 'book' + * rather than 'page' icon in the TOC tree view in the + * help viewer + * - 1 means the node has a subtree in the tree view, + * which I take to mean (contrary to chmspec) that + * _this_ is the flag that means this node is a + * non-leaf node and hence has the two extra fields for + * first-child and whatever the other one means + */ + unsigned tocidx_1_flags = (sect->firstchild ? 0x5 : 0) | 8; + + int urlstr_size; + + /* Pad to ensure the record isn't split between + * 0x1000-byte pages of the file */ + while ((tocidx.pos ^ (tocidx.pos + tocidx_size_1 - 1)) >> 12) + RDADD_32BIT_LSB_FIRST(&tocidx, 0); + + sect->topic_index = index++; + + /* Write the type-1 record in #TOCIDX */ + sect->tocidx_offset_1 = tocidx.pos; + RDADD_16BIT_LSB_FIRST(&tocidx, 0); /* unknown */ + /* chmspec thinks this 16-bit field is 'unknown', but in + * my observations it appears to be the index of an entry + * in the #TOCIDX type-3 region. But I still don't know + * what those are really for. */ + RDADD_16BIT_LSB_FIRST(&tocidx, sect->topic_index); + RDADD_32BIT_LSB_FIRST(&tocidx, tocidx_1_flags); + RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index); + RDADD_32BIT_LSB_FIRST(&tocidx, sect->parent ? + sect->parent->tocidx_offset_1 : 0); + RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* 'next' ptr; fill in later */ + if (sect->firstchild) { + RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* child; fill in later */ + RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* unknown */ + } + assert(tocidx.pos == sect->tocidx_offset_1 + tocidx_size_1); + + /* Figure out our offset in #URLSTR, by ensuring we're not + * going to overrun a page boundary (as usual). For this + * we need our record length, which is two 32-bit fields + * plus a NUL-terminated copy of the target file name / URL. */ + urlstr_size = 8 + strlen(sect->url) + 1; + assert(urlstr_size < 0x1000); /* must _fit_ in a page! */ + while ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12) + rdaddc(&urlstr, 0); + + /* + * Save everything we know so far about the #URLTBL record + * we'll need to write. + */ + urltbl_entry = snew(struct chm_urltbl_entry); + urltbl_entry->hash = chm_url_hash(sect->url); + urltbl_entry->topic_index = sect->topic_index; + urltbl_entry->urlstr_pos = urlstr.pos; + add234(urltbl_pre, urltbl_entry); + + /* Write the #TOPICS entry */ + RDADD_32BIT_LSB_FIRST(&topics, sect->tocidx_offset_1); + s = chm_intern_string(chm, sect->title); + RDADD_32BIT_LSB_FIRST(&topics, s); + urltbl_entry->topics_offset_to_update = topics.pos; + RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */ + RDADD_16BIT_LSB_FIRST(&topics, 6); /* flag as 'in contents' */ + RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */ + + /* + * Write the #URLSTR entry. + */ + RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */ + RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */ + rdaddsc(&urlstr, sect->url); /* 'Local' */ + rdaddc(&urlstr, '\0'); + } + + /* + * Add entries in #URLTBL, #URLSTR and #TOPICS for the + * contents and index files. They don't form part of the tree + * in #TOCIDX, though. + */ + if (chm->contents_filename) { + urltbl_entry = snew(struct chm_urltbl_entry); + urltbl_entry->hash = chm_url_hash(chm->contents_filename); + urltbl_entry->topic_index = index; + urltbl_entry->urlstr_pos = urlstr.pos; + add234(urltbl_pre, urltbl_entry); + + /* #TOPICS entry */ + RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */ + RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */ + urltbl_entry->topics_offset_to_update = topics.pos; + RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */ + RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */ + RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */ + + /* #URLSTR entry */ + RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */ + RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */ + rdaddsc(&urlstr, chm->contents_filename); /* 'Local' */ + rdaddc(&urlstr, '\0'); + + /* And add the entry in #SYSTEM that cites the hash of the + * #URLTBL entry. */ + rec = sys_start(&sysfile, 11); + RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash); + sys_end(&sysfile, rec); + + index++; + } + if (chm->index_filename) { + urltbl_entry = snew(struct chm_urltbl_entry); + urltbl_entry->hash = chm_url_hash(chm->index_filename); + urltbl_entry->topic_index = index; + urltbl_entry->urlstr_pos = urlstr.pos; + add234(urltbl_pre, urltbl_entry); + + /* #TOPICS entry */ + RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */ + RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */ + urltbl_entry->topics_offset_to_update = topics.pos; + RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */ + RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */ + RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */ + + /* #URLSTR entry */ + RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */ + RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */ + rdaddsc(&urlstr, chm->index_filename); /* 'Local' */ + rdaddc(&urlstr, '\0'); + + /* And add the entry in #SYSTEM that cites the hash of the + * #URLTBL entry. */ + rec = sys_start(&sysfile, 7); + RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash); + sys_end(&sysfile, rec); + + index++; + } + + /* + * Now we've got all our #URLTBL entries, so we can write out + * #URLTBL itself. + */ + while ((urltbl_entry = delpos234(urltbl_pre, 0)) != NULL) { + /* Pad #URLTBL to the beginning of this section's entry. + * Entries are all 12 bytes long, but again there's some + * padding to ensure that they don't cross a page + * boundary. */ + while ((urltbl.pos ^ (urltbl.pos + 12 - 1)) >> 12) + RDADD_32BIT_LSB_FIRST(&urltbl, 0); + + /* Fill in the link from #TOPICS to this entry's offset */ + PUT_32BIT_LSB_FIRST(topics.text + + urltbl_entry->topics_offset_to_update, + urltbl.pos); + + /* Write the entry itself. */ + RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->hash); + RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->topic_index); + RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->urlstr_pos); + + sfree(urltbl_entry); + } + freetree234(urltbl_pre); + + /* + * Small follow-up pass filling in forward-pointing offset + * fields in the #TOCIDX type-1 records which the previous + * pass didn't know yet. + */ + for (sect = chm->allsecthead; sect; sect = sect->next) { + if (sect->nextsibling) + PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x10, + sect->nextsibling->tocidx_offset_1); + if (sect->firstchild) + PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x14, + sect->firstchild->tocidx_offset_1); + } + + /* #TOCIDX header field pointing at start of type-2 records */ + PUT_32BIT_LSB_FIRST(tocidx.text + 0xC, tocidx.pos); + + /* + * Write the #TOCIDX type-2 records, which are just 4 bytes + * long and just contain another copy of each topic's index, + * but we need to have them there so that the type-3 records + * can refer to them by offset. + */ + for (sect = chm->allsecthead; sect; sect = sect->next) { + sect->tocidx_offset_2 = tocidx.pos; + RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index); + } + + /* Align the current #TOCIDX offset to 16 bytes */ + while (tocidx.pos & 0xF) + rdaddc(&tocidx, 0); + + /* #TOCIDX header field pointing at start of type-3 records */ + PUT_32BIT_LSB_FIRST(tocidx.text + 0x4, tocidx.pos); + + /* + * Write the #TOCIDX type-3 records. + * + * In help files I've examined, there are fewer of these than + * you might expect; apparently not all sections rate one for + * some reason. For the moment I'm just writing out one for + * every section. + */ + n_tocidx_3 = 0; + for (sect = chm->allsecthead; sect; sect = sect->next) { + RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_1); + RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index + 666); /* ?! */ + RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_2); + RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index); + n_tocidx_3++; + } + + /* #TOCIDX header field giving number of type-3 records */ + PUT_32BIT_LSB_FIRST(tocidx.text + 0x8, n_tocidx_3); + + chm_add_file_internal(chm, "/#TOCIDX", tocidx.text, tocidx.pos, + &chm->content1, 1); + chm_add_file_internal(chm, "/#TOPICS", topics.text, topics.pos, + &chm->content1, 1); + chm_add_file_internal(chm, "/#URLTBL", urltbl.text, urltbl.pos, + &chm->content1, 1); + chm_add_file_internal(chm, "/#URLSTR", urlstr.text, urlstr.pos, + &chm->content1, 1); + + /* + * Write #IDXHDR (and its mirror in #SYSTEM), which we + * couldn't do until we knew how many topic nodes there were. + */ + { + int idxhdr_start; + + rec = sys_start(&sysfile, 13); + idxhdr_start = sysfile.pos; + + rdaddsc(&sysfile, "T#SM"); /* #IDXHDR magic */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0x12345678); /* checksum? FIXME */ + RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */ + RDADD_32BIT_LSB_FIRST(&sysfile, index); /* number of topic nodes */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no image list */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* top-level node is + * not a folder */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no bg colour */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no fg colour */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no font spec */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window style */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no ex win style */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* unknown */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no frame name */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window name */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no information types */ + RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no merge files */ + RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */ + while (sysfile.pos - idxhdr_start < 4096) + rdaddc(&sysfile, 0); + + chm_add_file_internal(chm, "/#IDXHDR", sysfile.text + idxhdr_start, + sysfile.pos - idxhdr_start, + &chm->content1, 1); + sys_end(&sysfile, rec); + } + + sfree(tocidx.text); + sfree(topics.text); + sfree(urltbl.text); + sfree(urlstr.text); + } + + /* Missing from #SYSTEM: */ + /* 10 (4-byte timestamp) */ + /* 6 (logical file name) */ + + chm_add_file_internal(chm, "/#SYSTEM", sysfile.text, sysfile.pos, + &chm->content0, 0); + sfree(sysfile.text); + + chm_add_file_internal(chm, "/#STRINGS", chm->stringsfile.text, + chm->stringsfile.pos, &chm->content1, 1); + + /* + * ::DataSpace/NameList, giving the names of the two content sections. + */ + { + rdstringc dsnl = {0, 0, NULL}; + const char *p; + int stringstart; + + RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* total file size; fill in later */ + RDADD_16BIT_LSB_FIRST(&dsnl, 2); /* number of names */ + + RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */ + stringstart = dsnl.pos; + for (p = "Uncompressed"; *p; p++) + RDADD_16BIT_LSB_FIRST(&dsnl, *p); + PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2, + (dsnl.pos - stringstart) / 2); + RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */ + + RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */ + stringstart = dsnl.pos; + for (p = "MSCompressed"; *p; p++) + RDADD_16BIT_LSB_FIRST(&dsnl, *p); + PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2, + (dsnl.pos - stringstart) / 2); + RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */ + + PUT_16BIT_LSB_FIRST(dsnl.text, dsnl.pos / 2); + + chm_add_file_internal(chm, "::DataSpace/NameList", dsnl.text, dsnl.pos, + &chm->content0, 0); + + sfree(dsnl.text); + } + + /* + * Actually compress the compressed-data section, load the + * compressed version of it into the containing uncompressed + * section, and write the auxiliary files describing it. + */ + { + rdstringc rs = {0, 0, NULL}; + const char *p; + int orig_decomp_size = chm->content1.pos; + size_t i; + + while (chm->content1.pos & 0x7FFF) + rdaddc(&chm->content1, 0); /* pad to a realign-interval boundary */ + ef = lzx(chm->content1.text, chm->content1.pos, 0x8000, 0x10000); + chm_add_file_internal( + chm, "::DataSpace/Storage/MSCompressed/Content", + (char *)ef->data, ef->data_len, &chm->content0, 0); + + for (p = "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}"; *p; p++) + RDADD_16BIT_LSB_FIRST(&rs, *p); + rs.pos = 0x26; /* this file is always written truncated :-) */ + chm_add_file_internal( + chm, "::DataSpace/Storage/MSCompressed/Transform/List", + rs.text, rs.pos, &chm->content0, 0); + rs.pos = 0; + + RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size); + RDADD_32BIT_LSB_FIRST(&rs, 0); /* high word of 64-bit size */ + chm_add_file_internal( + chm, "::DataSpace/Storage/MSCompressed/SpanInfo", + rs.text, rs.pos, &chm->content0, 0); + rs.pos = 0; + + RDADD_32BIT_LSB_FIRST(&rs, 6); /* file size */ + rdaddsc(&rs, "LZXC"); /* compression type identifier */ + RDADD_32BIT_LSB_FIRST(&rs, 2); /* version */ + RDADD_32BIT_LSB_FIRST(&rs, 2); /* reset interval in units of 2^15 */ + RDADD_32BIT_LSB_FIRST(&rs, 2); /* window size in units of 2^15 */ + RDADD_32BIT_LSB_FIRST(&rs, 1); /* reset interval multiplier */ + RDADD_32BIT_LSB_FIRST(&rs, 0); /* unknown */ + chm_add_file_internal( + chm, "::DataSpace/Storage/MSCompressed/ControlData", + rs.text, rs.pos, &chm->content0, 0); + rs.pos = 0; + + RDADD_32BIT_LSB_FIRST(&rs, 2); /* unknown (version number?) */ + RDADD_32BIT_LSB_FIRST(&rs, ef->n_resets); /* reset table length */ + RDADD_32BIT_LSB_FIRST(&rs, 8); /* reset table entry size */ + RDADD_32BIT_LSB_FIRST(&rs, 0x28); /* reset table offset */ + RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size); /* uncompressed len */ + RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */ + RDADD_32BIT_LSB_FIRST(&rs, ef->data_len); /* compressed len */ + RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */ + RDADD_32BIT_LSB_FIRST(&rs, 0x8000); /* realign interval */ + RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */ + for (i = 0; i < ef->n_resets; i++) { + RDADD_32BIT_LSB_FIRST(&rs, ef->reset_byte_offsets[i]); + RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */ + } + chm_add_file_internal( + chm, "::DataSpace/Storage/MSCompressed/Transform/" + "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable", + rs.text, rs.pos, &chm->content0, 0); + rs.pos = 0; + } + + sfree(ef->data); + sfree(ef->reset_byte_offsets); + sfree(ef); + + directory(&dir, chm->files); + itsf(&chm->outfile, &dir, &chm->content0); + sfree(dir.text); + + assert(outlen); + *outlen = chm->outfile.pos; + return chm->outfile.text; +} diff --git a/winchm.h b/winchm.h new file mode 100644 index 0000000..caee3fc --- /dev/null +++ b/winchm.h @@ -0,0 +1,21 @@ +struct chm; + +struct chm *chm_new(void); +void chm_free(struct chm *chm); +void chm_add_file(struct chm *chm, const char *name, + const char *data, int len); +void chm_title(struct chm *chm, const char *title); +void chm_contents_filename(struct chm *chm, const char *name); +void chm_index_filename(struct chm *chm, const char *name); +void chm_default_topic(struct chm *chm, const char *name); +void chm_default_window(struct chm *chm, const char *name); +void chm_add_window(struct chm *chm, const char *winname, const char *title, + const char *contentsfile, const char *indexfile, + const char *rootfile, int navpaneflags, int toolbarflags); + +struct chm_section; +struct chm_section *chm_add_section(struct chm *chm, + struct chm_section *parent, + const char *title, const char *url); + +const char *chm_build(struct chm *chm, int *outlen); |