From 7e330bca5bc45fa7feb6c31da5c3f1b6b44c208c Mon Sep 17 00:00:00 2001 From: Simon Tatham Date: Sat, 13 May 2017 18:33:51 +0100 Subject: New output mode to write CHM files directly. I became aware a few months ago that enough is known about CHM files that free software _can_ write them without benefit of the MS HTML Help compiler - in particular there's a thing called 'chmcmd' in the Free Pascal Compiler software distribution which is more or less a drop-in replacement for hhc.exe itself. But although depending on chmcmd would be a bit nicer than depending on hhc.exe, Halibut has always preferred to do the whole job itself if it can. So here's my own from-scratch code to generate CHM directly from Halibut source. The new output mode is presented as a completely separate top-level thing independent of HTML mode. Of course, in reality, the two back ends share all of the HTML-generation code, differing only in a few configuration defaults and the minor detail of what will be _done_ with each chunk of HTML as it's generated (this is what the recent refactoring in b3db1cce3 was in aid of). But even so, the output modes are properly independent from a user-visible-behaviour perspective: they use parallel sets of config directives rather than sharing the same ones (you can set \cfg{html-foo} and \cfg{chm-foo} independently, for a great many values of 'foo'), and you can run either or neither or both as you choose in a given run of Halibut. The old HTML Help support, in the form of some config directives for HTML mode to output the auxiliary files needed by hhc.exe, is still around and should still work the same as it always did. I have no real intention of removing it, partly for the reasons stated in the manual (someone might find it useful to have Halibut generate the .HHP file once and then make manual adjustments to it, so that they can change styling options that the direct CHM output doesn't permit), and mostly because it wouldn't save a great deal of code or complexity in any case - the big two of the three auxiliary files (the HHC and HHK) have to be generated _anyway_ to go inside the .CHM, so all the code would have to stay around regardless. --- bk_html.c | 393 +++++++++++++++++++++++++++++++++++++++++++++++++++++--------- 1 file changed, 341 insertions(+), 52 deletions(-) (limited to 'bk_html.c') diff --git a/bk_html.c b/bk_html.c index 9a08fa2..784e3ca 100644 --- a/bk_html.c +++ b/bk_html.c @@ -24,6 +24,7 @@ #include #include #include "halibut.h" +#include "winchm.h" #define is_heading_type(type) ( (type) == para_Title || \ (type) == para_Chapter || \ @@ -57,6 +58,8 @@ typedef struct { char *chm_filename, *hhp_filename, *hhc_filename, *hhk_filename; char **template_fragments; int ntfragments; + char **chm_extrafiles, **chm_extranames; + int nchmextrafiles, chmextrafilesize; char *head_end, *body_start, *body_end, *addr_start, *addr_end; char *body_tag, *nav_attr; wchar_t *author, *description; @@ -94,6 +97,10 @@ struct htmlfile { * more than once. */ int temp; + /* + * CHM section structure, if we're generating a CHM. + */ + struct chm_section *chmsect; }; struct htmlsect { @@ -193,6 +200,48 @@ void ho_setup_stdio(htmloutput *ho, FILE *fp) ho->write = ho_write_stdio; ho->write_ctx = fp; } + +struct chm_output { + struct chm *chm; + char *filename; + rdstringc rs; +}; +void ho_write_chm(void *write_ctx, const char *data, int len) +{ + struct chm_output *co = (struct chm_output *)write_ctx; + if (len == -1) { + chm_add_file(co->chm, co->filename, co->rs.text, co->rs.pos); + sfree(co->filename); + sfree(co->rs.text); + sfree(co); + } else { + rdaddsn(&co->rs, data, len); + } +} +void ho_setup_chm(htmloutput *ho, struct chm *chm, const char *filename) +{ + struct chm_output *co = snew(struct chm_output); + + co->chm = chm; + co->rs = empty_rdstringc; + co->filename = dupstr(filename); + + ho->write_ctx = co; + ho->write = ho_write_chm; +} + +void ho_write_rdstringc(void *write_ctx, const char *data, int len) +{ + rdstringc *rs = (rdstringc *)write_ctx; + if (len > 0) + rdaddsn(rs, data, len); +} +void ho_setup_rdstringc(htmloutput *ho, rdstringc *rs) +{ + ho->write_ctx = rs; + ho->write = ho_write_rdstringc; +} + void ho_string(htmloutput *ho, const char *string) { ho->write(ho->write_ctx, string, strlen(string)); @@ -286,14 +335,15 @@ static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile, keywordlist *keywords, htmlconfig *cfg, int real); -static htmlconfig html_configure(paragraph *source) { +static htmlconfig html_configure(paragraph *source, int chm_mode) +{ htmlconfig ret; paragraph *p; /* * Defaults. */ - ret.leaf_level = 2; + ret.leaf_level = chm_mode ? -1 /* infinite */ : 2; ret.achapter.just_numbers = FALSE; ret.achapter.number_at_all = TRUE; ret.achapter.number_suffix = L": "; @@ -305,20 +355,29 @@ static htmlconfig html_configure(paragraph *source) { ret.ncdepths = 0; ret.contents_depths = 0; ret.visible_version_id = TRUE; - ret.address_section = TRUE; + ret.address_section = chm_mode ? FALSE : TRUE; ret.leaf_contains_contents = FALSE; ret.leaf_smallest_contents = 4; - ret.navlinks = TRUE; + ret.navlinks = chm_mode ? FALSE : TRUE; ret.rellinks = TRUE; ret.single_filename = dupstr("Manual.html"); ret.contents_filename = dupstr("Contents.html"); ret.index_filename = dupstr("IndexPage.html"); ret.template_filename = dupstr("%n.html"); - ret.chm_filename = ret.hhp_filename = NULL; - ret.hhc_filename = ret.hhk_filename = NULL; + if (chm_mode) { + ret.chm_filename = dupstr("output.chm"); + ret.hhc_filename = dupstr("contents.hhc"); + ret.hhk_filename = dupstr("index.hhk"); + ret.hhp_filename = NULL; + } else { + ret.chm_filename = ret.hhp_filename = NULL; + ret.hhc_filename = ret.hhk_filename = NULL; + } ret.ntfragments = 1; ret.template_fragments = snewn(ret.ntfragments, char *); ret.template_fragments[0] = dupstr("%b"); + ret.chm_extrafiles = ret.chm_extranames = NULL; + ret.nchmextrafiles = ret.chmextrafilesize = 0; ret.head_end = ret.body_tag = ret.body_start = ret.body_end = ret.addr_start = ret.addr_end = ret.nav_attr = NULL; ret.author = ret.description = NULL; @@ -368,11 +427,20 @@ static htmlconfig html_configure(paragraph *source) { for (p = source; p; p = p->next) { if (p->type == para_Config) { wchar_t *k = p->keyword; + int generic = FALSE; - if (!ustrnicmp(k, L"html-", 5)) { + if (!chm_mode && !ustrnicmp(k, L"html-", 5)) { k += 5; - } else if (!ustrnicmp(k, L"xhtml-", 6)) { + } else if (!chm_mode && !ustrnicmp(k, L"xhtml-", 6)) { k += 6; + } else if (chm_mode && !ustrnicmp(k, L"chm-", 4)) { + k += 4; + } else if (!ustrnicmp(k, L"htmlall-", 8)) { + k += 8; + /* In this mode, only accept directives that don't + * vary completely between the HTML and CHM output + * types. */ + generic = TRUE; } else { continue; } @@ -578,39 +646,78 @@ static htmlconfig html_configure(paragraph *source) { ret.pre_versionid = uadv(k); } else if (!ustricmp(k, L"post-versionid")) { ret.post_versionid = uadv(k); - } else if (!ustricmp(k, L"mshtmlhelp-chm")) { + } else if (!generic && !ustricmp( + k, chm_mode ? L"filename" : L"mshtmlhelp-chm")) { sfree(ret.chm_filename); ret.chm_filename = dupstr(adv(p->origkeyword)); - } else if (!ustricmp(k, L"mshtmlhelp-project")) { - sfree(ret.hhp_filename); - ret.hhp_filename = dupstr(adv(p->origkeyword)); - } else if (!ustricmp(k, L"mshtmlhelp-contents")) { + } else if (!generic && !ustricmp( + k, chm_mode ? L"contents-name" : + L"mshtmlhelp-contents")) { sfree(ret.hhc_filename); ret.hhc_filename = dupstr(adv(p->origkeyword)); - } else if (!ustricmp(k, L"mshtmlhelp-index")) { + } else if (!generic && !ustricmp( + k, chm_mode ? L"index-name" : + L"mshtmlhelp-index")) { sfree(ret.hhk_filename); ret.hhk_filename = dupstr(adv(p->origkeyword)); + } else if (!generic && !chm_mode && + !ustricmp(k, L"mshtmlhelp-project")) { + sfree(ret.hhp_filename); + ret.hhp_filename = dupstr(adv(p->origkeyword)); + } else if (!generic && chm_mode && + !ustricmp(k, L"extra-file")) { + char *diskname, *chmname; + + diskname = adv(p->origkeyword); + if (*diskname) { + chmname = adv(diskname); + if (!*chmname) + chmname = diskname; + + if (chmname[0] == '#' || chmname[0] == '$') + err_chm_badname(&p->fpos, chmname); + + if (ret.nchmextrafiles >= ret.chmextrafilesize) { + ret.chmextrafilesize = ret.nchmextrafiles * 5 / 4 + 32; + ret.chm_extrafiles = sresize( + ret.chm_extrafiles, ret.chmextrafilesize, char *); + ret.chm_extranames = sresize( + ret.chm_extranames, ret.chmextrafilesize, char *); + } + ret.chm_extrafiles[ret.nchmextrafiles] = dupstr(diskname); + ret.chm_extranames[ret.nchmextrafiles] = + dupstr(chmname); + ret.nchmextrafiles++; + } } } } - /* - * Enforce that the CHM and HHP filenames must either be both - * present or both absent. If one is present but not the other, - * turn both off. - */ - if (!ret.chm_filename ^ !ret.hhp_filename) { - err_chmnames(); - sfree(ret.chm_filename); ret.chm_filename = NULL; - sfree(ret.hhp_filename); ret.hhp_filename = NULL; - } - /* - * And if we're not generating an HHP, there's no need for HHC - * or HHK. - */ - if (!ret.hhp_filename) { - sfree(ret.hhc_filename); ret.hhc_filename = NULL; - sfree(ret.hhk_filename); ret.hhk_filename = NULL; + if (!chm_mode) { + /* + * If we're in HTML mode but using the old-style options to + * output HTML Help Workshop auxiliary files, do some + * consistency checking. + */ + + /* + * Enforce that the CHM and HHP filenames must either be both + * present or both absent. If one is present but not the other, + * turn both off. + */ + if (!ret.chm_filename ^ !ret.hhp_filename) { + err_chmnames(); + sfree(ret.chm_filename); ret.chm_filename = NULL; + sfree(ret.hhp_filename); ret.hhp_filename = NULL; + } + /* + * And if we're not generating an HHP, there's no need for HHC + * or HHK. + */ + if (!ret.hhp_filename) { + sfree(ret.hhc_filename); ret.hhc_filename = NULL; + sfree(ret.hhk_filename); ret.hhk_filename = NULL; + } } /* @@ -644,20 +751,23 @@ paragraph *html_config_filename(char *filename) return p; } -void html_backend(paragraph *sourceform, keywordlist *keywords, - indexdata *idx, void *unused) +paragraph *chm_config_filename(char *filename) +{ + return cmdline_cfg_simple("chm-filename", filename, NULL); +} + +static void html_backend_common(paragraph *sourceform, keywordlist *keywords, + indexdata *idx, int chm_mode) { paragraph *p; htmlsect *topsect; htmlconfig conf; htmlfilelist files = { NULL, NULL, NULL, NULL, NULL, NULL }; htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL }; - char *hhk_filename; - int has_index; + struct chm *chm = NULL; + int has_index, hhk_needed = FALSE; - IGNORE(unused); - - conf = html_configure(sourceform); + conf = html_configure(sourceform, chm_mode); /* * We're going to make heavy use of paragraphs' private data @@ -732,10 +842,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, /* * And the index, if we have one. Note that we don't output * an index as an HTML file if we're outputting one as a - * .HHK. + * .HHK (in either of the HTML or CHM output modes). */ has_index = (count234(idx->entries) > 0); - if (has_index && !conf.hhk_filename) { + if (has_index && !chm_mode && !conf.hhk_filename) { sect = html_new_sect(§s, NULL, &conf); sect->text = NULL; sect->type = INDEX; @@ -901,6 +1011,9 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, } } + if (chm_mode) + chm = chm_new(); + /* * Now we're ready to write out the actual HTML files. * @@ -936,7 +1049,9 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, #define listname(lt) ( (lt)==UL ? "ul" : (lt)==OL ? "ol" : "dl" ) #define itemname(lt) ( (lt)==LI ? "li" : (lt)==DT ? "dt" : "dd" ) - if (!strcmp(f->filename, "-")) + if (chm) + ho_setup_chm(&ho, chm, f->filename); + else if (!strcmp(f->filename, "-")) ho_setup_stdio(&ho, stdout); else ho_setup_file(&ho, f->filename); @@ -1728,8 +1843,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, * whether there's even going to _be_ an index file: we omit it * if the index contains nothing. */ - hhk_filename = conf.hhk_filename; - if (hhk_filename) { + if (chm_mode || conf.hhk_filename) { int ok = FALSE; int i; indexentry *entry; @@ -1743,8 +1857,138 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, } } - if (!ok) - hhk_filename = NULL; + if (ok) + hhk_needed = TRUE; + } + + /* + * If we're doing direct CHM output, tell winchm.c all the things + * it will need to know aside from the various HTML files' + * contents. + */ + if (chm) { + chm_contents_filename(chm, conf.hhc_filename); + if (has_index) + chm_index_filename(chm, conf.hhk_filename); + chm_default_window(chm, "main"); + + { + htmloutput ho; + rdstringc rs = {0, 0, NULL}; + + ho.charset = CS_CP1252; /* as far as I know, CHM is */ + ho.restrict_charset = CS_CP1252; /* hardwired to this charset */ + ho.cstate = charset_init_state; + ho.ver = HTML_4; /* *shrug* */ + ho.state = HO_NEUTRAL; + ho.contents_level = 0; + ho.hackflags = HO_HACK_QUOTENOTHING; + + ho_setup_rdstringc(&ho, &rs); + + ho.hacklimit = 255; + html_words(&ho, topsect->title->words, NOTHING, + NULL, keywords, &conf); + + rdaddc(&rs, '\0'); + chm_title(chm, rs.text); + + chm_default_topic(chm, files.head->filename); + + chm_add_window(chm, "main", rs.text, + conf.hhc_filename, conf.hhk_filename, + files.head->filename, + /* This first magic number is + * fsWinProperties, controlling Navigation + * Pane options and the like. Constants + * HHWIN_PROP_* in htmlhelp.h. */ + 0x62520, + /* This second number is fsToolBarFlags, + * mainly controlling toolbar buttons. + * Constants HHWIN_BUTTON_*. NOTE: there + * are two pairs of bits for Next/Previous + * buttons: 7/8 (which do nothing useful), + * and 21/22 (which work). (Neither of + * these are exposed in the HHW UI, but + * they work fine in HH.) We use the + * latter. */ + 0x70304e); + + sfree(rs.text); + } + + { + htmlfile *f; + + for (f = files.head; f; f = f->next) + f->chmsect = NULL; + for (f = files.head; f; f = f->next) { + htmlsect *s = f->first; + htmloutput ho; + rdstringc rs = {0, 0, NULL}; + + ho.charset = CS_CP1252; + ho.restrict_charset = CS_CP1252; + ho.cstate = charset_init_state; + ho.ver = HTML_4; /* *shrug* */ + ho.state = HO_NEUTRAL; + ho.contents_level = 0; + ho.hackflags = HO_HACK_QUOTENOTHING; + + ho_setup_rdstringc(&ho, &rs); + ho.hacklimit = 255; + + if (f->first->title) + html_words(&ho, f->first->title->words, NOTHING, + NULL, keywords, &conf); + else if (f->first->type == INDEX) + html_text(&ho, conf.index_text); + rdaddc(&rs, '\0'); + + while (s && s->file == f) + s = s->parent; + + /* + * Special case, as below: the TOP file is not + * considered to be the parent of everything else. + */ + if (s && s->type == TOP) + s = NULL; + + f->chmsect = chm_add_section(chm, s ? s->file->chmsect : NULL, + rs.text, f->filename); + + sfree(rs.text); + } + } + + { + int i; + + for (i = 0; i < conf.nchmextrafiles; i++) { + const char *fname = conf.chm_extrafiles[i]; + FILE *fp; + long size; + char *data; + + fp = fopen(fname, "rb"); + if (!fp) { + err_cantopen(fname); + continue; + } + + fseek(fp, 0, SEEK_END); + size = ftell(fp); + rewind(fp); + + data = snewn(size, char); + size = fread(data, 1, size, fp); + fclose(fp); + + chm_add_file(chm, conf.chm_extranames[i], data, size); + sfree(data); + } + } } /* @@ -1800,7 +2044,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, ho_string(&ho, conf.hhc_filename); ho_string(&ho, "\n"); } - if (hhk_filename) { + if (hhk_needed) { ho_string(&ho, "Index file="); ho_string(&ho, conf.hhk_filename); ho_string(&ho, "\n"); @@ -1817,8 +2061,8 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, if (conf.hhc_filename) ho_string(&ho, conf.hhc_filename); ho_string(&ho, "\",\""); - if (hhk_filename) - ho_string(&ho, hhk_filename); + if (hhk_needed) + ho_string(&ho, conf.hhk_filename); ho_string(&ho, "\",\""); ho_string(&ho, files.head->filename); ho_string(&ho, "\",,,,,," @@ -1848,7 +2092,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, ho_finish(&ho); } - if (conf.hhc_filename) { + if (chm || conf.hhc_filename) { htmlfile *f; htmlsect *s, *a; htmloutput ho; @@ -1862,7 +2106,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, ho.contents_level = 0; ho.hackflags = HO_HACK_QUOTEQUOTES; - ho_setup_file(&ho, conf.hhc_filename); + if (chm) + ho_setup_chm(&ho, chm, conf.hhc_filename); + else + ho_setup_file(&ho, conf.hhc_filename); /* * Magic DOCTYPE which seems to work for .HHC files. I'm @@ -1955,7 +2202,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, cleanup(&ho); } - if (hhk_filename) { + if (hhk_needed) { htmlfile *f; htmloutput ho; indexentry *entry; @@ -1976,7 +2223,10 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, ho.contents_level = 0; ho.hackflags = HO_HACK_QUOTEQUOTES; - ho_setup_file(&ho, hhk_filename); + if (chm) + ho_setup_chm(&ho, chm, conf.hhk_filename); + else + ho_setup_file(&ho, conf.hhk_filename); /* * Magic DOCTYPE which seems to work for .HHK files. I'm @@ -2041,6 +2291,26 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, cleanup(&ho); } + if (chm) { + /* + * Finalise and write out the CHM file. + */ + const char *data; + int len; + FILE *fp; + + fp = fopen(conf.chm_filename, "wb"); + if (!fp) { + err_cantopenw(conf.chm_filename); + } else { + data = chm_build(chm, &len); + fwrite(data, 1, len, fp); + fclose(fp); + } + + chm_free(chm); + } + /* * Go through and check that no index fragments were referenced * without being generated, or indeed vice versa. @@ -2139,6 +2409,25 @@ void html_backend(paragraph *sourceform, keywordlist *keywords, while (conf.ntfragments--) sfree(conf.template_fragments[conf.ntfragments]); sfree(conf.template_fragments); + while (conf.nchmextrafiles--) { + sfree(conf.chm_extrafiles[conf.nchmextrafiles]); + sfree(conf.chm_extranames[conf.nchmextrafiles]); + } + sfree(conf.chm_extrafiles); +} + +void html_backend(paragraph *sourceform, keywordlist *keywords, + indexdata *idx, void *unused) +{ + IGNORE(unused); + html_backend_common(sourceform, keywords, idx, FALSE); +} + +void chm_backend(paragraph *sourceform, keywordlist *keywords, + indexdata *idx, void *unused) +{ + IGNORE(unused); + html_backend_common(sourceform, keywords, idx, TRUE); } static void html_file_section(htmlconfig *cfg, htmlfilelist *files, -- cgit v1.1