10 files changed, 700 insertions, 44 deletions
diff --git a/bk_html.c b/bk_html.c
index a1b46f6..11d37d3 100644
--- a/bk_html.c
+++ b/bk_html.c
@@ -10,6 +10,13 @@
  *    sensible. Perhaps for the topmost section in the file, no
  *    fragment should be used? (Though it should probably still be
  *    _there_ even if unused.)
+ * 
+ *  - In HHK index mode: subsidiary hhk entries (as in replacing
+ *    `foo, bar' with `foo\n\tbar') can be done by embedding
+ *    sub-<UL>s in the hhk file. This requires me getting round to
+ *    supporting that idiom in the rest of Halibut, but I thought
+ *    I'd record how it's done here in case I turn out to have
+ *    forgotten when I get there.
  */
 
 #include <stdio.h>
@@ -41,10 +48,12 @@ typedef struct {
     int ncdepths;
     int address_section, visible_version_id;
     int leaf_contains_contents, leaf_smallest_contents;
+    int navlinks;
     char *contents_filename;
     char *index_filename;
     char *template_filename;
     char *single_filename;
+    char *chm_filename, *hhp_filename, *hhc_filename, *hhk_filename;
     char **template_fragments;
     int ntfragments;
     char *head_end, *body_start, *body_end, *addr_start, *addr_end;
@@ -77,6 +86,13 @@ struct htmlfile {
     int last_fragment_number;
     int min_heading_depth;
     htmlsect *first, *last;	       /* first/last highest-level sections */
+    /*
+     * The `temp' field is available for use in individual passes
+     * over the file list. For example, the HHK index generation
+     * uses it to ensure no index term references the same file
+     * more than once.
+     */
+    int temp;
 };
 
 struct htmlsect {
@@ -92,6 +108,7 @@ typedef struct {
     htmlfile *head, *tail;
     htmlfile *single, *index;
     tree234 *frags;
+    tree234 *files;
 } htmlfilelist;
 
 typedef struct {
@@ -127,6 +144,8 @@ typedef struct {
     enum {
 	HO_NEUTRAL, HO_IN_TAG, HO_IN_EMPTY_TAG, HO_IN_TEXT
     } state;
+    int hackflags;		       /* used for icky .HH* stuff */
+    int hacklimit;		       /* text size limit, again for .HH* */
     /*
      * Stuff beyond here deals with the higher syntactic level: it
      * tracks how many levels of <ul> are currently open when
@@ -135,6 +154,21 @@ typedef struct {
     int contents_level;
 } htmloutput;
 
+/*
+ * Nasty hacks that modify the behaviour of htmloutput files. All
+ * of these are flag bits set in ho.hackflags. HO_HACK_QUOTEQUOTES
+ * has the same effect as the `quote_quotes' parameter to
+ * html_text_limit_internal, except that it's set globally on an
+ * entire htmloutput structure; HO_HACK_QUOTENOTHING suppresses
+ * quoting of any HTML special characters (for .HHP files);
+ * HO_HACK_OMITQUOTES completely suppresses the generation of
+ * double quotes at all (turning them into single quotes, for want
+ * of a better idea).
+ */
+#define HO_HACK_QUOTEQUOTES 1
+#define HO_HACK_QUOTENOTHING 2
+#define HO_HACK_OMITQUOTES 4
+
 static int html_fragment_compare(void *av, void *bv)
 {
     htmlfragment *a = (htmlfragment *)av;
@@ -147,6 +181,14 @@ static int html_fragment_compare(void *av, void *bv)
 	return strcmp(a->fragment, b->fragment);
 }
 
+static int html_filename_compare(void *av, void *bv)
+{
+    char *a = (char *)av;
+    char *b = (char *)bv;
+
+    return strcmp(a, b);
+}
+
 static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
 			      htmlsect *sect, int depth);
 
@@ -187,6 +229,7 @@ static void html_fragment(htmloutput *ho, char const *fragment);
 static char *html_format(paragraph *p, char *template_string);
 static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file,
 				    char *text);
+static char *html_sanitise_filename(htmlfilelist *files, char *text);
 
 static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
 				htmlfile *thisfile, keywordlist *keywords,
@@ -215,10 +258,13 @@ static htmlconfig html_configure(paragraph *source) {
     ret.address_section = TRUE;
     ret.leaf_contains_contents = FALSE;
     ret.leaf_smallest_contents = 4;
+    ret.navlinks = TRUE;
     ret.single_filename = dupstr("Manual.html");
     ret.contents_filename = dupstr("Contents.html");
     ret.index_filename = dupstr("IndexPage.html");
     ret.template_filename = dupstr("%n.html");
+    ret.chm_filename = ret.hhp_filename = NULL;
+    ret.hhc_filename = ret.hhk_filename = NULL;
     ret.ntfragments = 1;
     ret.template_fragments = snewn(ret.ntfragments, char *);
     ret.template_fragments[0] = dupstr("%b");
@@ -333,10 +379,18 @@ static htmlconfig html_configure(paragraph *source) {
 		    error(err_cfginsufarg, &p->fpos, p->origkeyword, 1);
 	    } else if (!ustricmp(k, L"html-chapter-numeric")) {
 		ret.achapter.just_numbers = utob(uadv(k));
+	    } else if (!ustricmp(k, L"html-suppress-navlinks")) {
+		ret.navlinks = !utob(uadv(k));
 	    } else if (!ustricmp(k, L"html-chapter-suffix")) {
 		ret.achapter.number_suffix = uadv(k);
 	    } else if (!ustricmp(k, L"html-leaf-level")) {
-		ret.leaf_level = utoi(uadv(k));
+		wchar_t *u = uadv(k);
+		if (!ustricmp(u, L"infinite") ||
+		    !ustricmp(u, L"infinity") ||
+		    !ustricmp(u, L"inf"))
+		    ret.leaf_level = -1;   /* represents infinity */
+		else
+		    ret.leaf_level = utoi(u);
 	    } else if (!ustricmp(k, L"html-section-numeric")) {
 		wchar_t *q = uadv(k);
 		int n = 0;
@@ -446,11 +500,42 @@ static htmlconfig html_configure(paragraph *source) {
 		ret.pre_versionid = uadv(k);
 	    } else if (!ustricmp(k, L"html-post-versionid")) {
 		ret.post_versionid = uadv(k);
+	    } else if (!ustricmp(k, L"html-mshtmlhelp-chm")) {
+		sfree(ret.chm_filename);
+		ret.chm_filename = dupstr(adv(p->origkeyword));
+	    } else if (!ustricmp(k, L"html-mshtmlhelp-project")) {
+		sfree(ret.hhp_filename);
+		ret.hhp_filename = dupstr(adv(p->origkeyword));
+	    } else if (!ustricmp(k, L"html-mshtmlhelp-contents")) {
+		sfree(ret.hhc_filename);
+		ret.hhc_filename = dupstr(adv(p->origkeyword));
+	    } else if (!ustricmp(k, L"html-mshtmlhelp-index")) {
+		sfree(ret.hhk_filename);
+		ret.hhk_filename = dupstr(adv(p->origkeyword));
 	    }
 	}
     }
 
     /*
+     * Enforce that the CHM and HHP filenames must either be both
+     * present or both absent. If one is present but not the other,
+     * turn both off.
+     */
+    if (!ret.chm_filename ^ !ret.hhp_filename) {
+	error(err_chmnames);
+	sfree(ret.chm_filename); ret.chm_filename = NULL;
+	sfree(ret.hhp_filename); ret.hhp_filename = NULL;
+    }
+    /*
+     * And if we're not generating an HHP, there's no need for HHC
+     * or HHK.
+     */
+    if (!ret.hhp_filename) {
+	sfree(ret.hhc_filename); ret.hhc_filename = NULL;
+	sfree(ret.hhk_filename); ret.hhk_filename = NULL;
+    }
+
+    /*
      * Now process fallbacks on quote characters.
      */
     while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) &&
@@ -485,9 +570,11 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 		  indexdata *idx, void *unused)
 {
     paragraph *p;
+    htmlsect *topsect;
     htmlconfig conf;
-    htmlfilelist files = { NULL, NULL, NULL, NULL, NULL };
+    htmlfilelist files = { NULL, NULL, NULL, NULL, NULL, NULL };
     htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL };
+    char *hhk_filename;
     int has_index;
 
     IGNORE(unused);
@@ -504,6 +591,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	p->private_data = NULL;
 
     files.frags = newtree234(html_fragment_compare);
+    files.files = newtree234(html_filename_compare);
 
     /*
      * Start by figuring out into which file each piece of the
@@ -518,7 +606,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
      * for each section.
      */
     {
-	htmlsect *topsect, *sect;
+	htmlsect *sect;
 	int d;
 
 	topsect = html_new_sect(&sects, NULL, &conf);
@@ -563,9 +651,13 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 		}
 	    }
 
-	/* And the index, if we have one. */
+	/*
+	 * And the index, if we have one. Note that we don't output
+	 * an index as an HTML file if we're outputting one as a
+	 * .HHK.
+	 */
 	has_index = (count234(idx->entries) > 0);
-	if (has_index) {
+	if (has_index && !conf.hhk_filename) {
 	    sect = html_new_sect(&sects, NULL, &conf);
 	    sect->text = NULL;
 	    sect->type = INDEX;
@@ -776,6 +868,8 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	    ho.ver = conf.htmlver;
 	    ho.state = HO_NEUTRAL;
 	    ho.contents_level = 0;
+	    ho.hackflags = 0;	       /* none of these thankyouverymuch */
+	    ho.hacklimit = -1;
 
 	    /* <!DOCTYPE>. */
 	    switch (conf.htmlver) {
@@ -902,7 +996,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	     * Write out a nav bar. Special case: we don't do this
 	     * if there is only one file.
 	     */
-	    if (files.head != files.tail) {
+	    if (conf.navlinks && files.head != files.tail) {
 		element_open(&ho, "p");
 		if (conf.nav_attr)
 		    html_raw_as_attr(&ho, conf.nav_attr);
@@ -925,7 +1019,7 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 		if (f != files.head)
 		    element_close(&ho, "a");
 
-		if (has_index) {
+		if (has_index && files.index) {
 		    html_text(&ho, conf.nav_separator);
 		    if (f != files.index) {
 			element_open(&ho, "a");
@@ -1379,7 +1473,8 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 		 */
 		int done_version_ids = FALSE;
 
-		element_empty(&ho, "hr");
+		if (conf.address_section)
+		    element_empty(&ho, "hr");
 
 		if (conf.body_end)
 		    html_raw(&ho, conf.body_end);
@@ -1460,6 +1555,295 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
     }
 
     /*
+     * Before we start outputting the HTML Help files, check
+     * whether there's even going to _be_ an index file: we omit it
+     * if the index contains nothing.
+     */
+    hhk_filename = conf.hhk_filename;
+    if (hhk_filename) {
+	int ok = FALSE;
+	int i;
+	indexentry *entry;
+
+	for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
+	    htmlindex *hi = (htmlindex *)entry->backend_data;
+
+	    if (hi->nrefs > 0) {
+		ok = TRUE;	       /* found an index entry */
+		break;
+	    }
+	}
+
+	if (!ok)
+	    hhk_filename = NULL;
+    }
+
+    /*
+     * Output the MS HTML Help supporting files, if requested.
+     */
+    if (conf.hhp_filename) {
+	htmlfile *f;
+	htmloutput ho;
+
+	ho.charset = CS_CP1252;	       /* as far as I know, HHP files are */
+	ho.restrict_charset = CS_CP1252;   /* hardwired to this charset */
+	ho.cstate = charset_init_state;
+	ho.ver = HTML_4;	       /* *shrug* */
+	ho.state = HO_NEUTRAL;
+	ho.contents_level = 0;
+	ho.hackflags = HO_HACK_QUOTENOTHING;
+
+	ho.fp = fopen(conf.hhp_filename, "w");
+	if (!ho.fp)
+	    error(err_cantopenw, conf.hhp_filename);
+
+	fprintf(ho.fp,
+		"[OPTIONS]\n"
+		"Compatibility=1.1 or later\n"
+		"Compiled file=%s\n"
+		"Default Window=main\n"
+		"Default topic=%s\n"
+		"Display compile progress=Yes\n"
+		"Full-text search=Yes\n"
+		"Title=", conf.chm_filename, files.head->filename);
+
+	ho.hacklimit = 255;
+	html_words(&ho, topsect->title->words, NOTHING,
+		   NULL, keywords, &conf);
+
+	fprintf(ho.fp, "\n");
+
+	/*
+	 * These two entries don't seem to be remotely necessary
+	 * for a successful run of the help _compiler_, but
+	 * omitting them causes the GUI Help Workshop to behave
+	 * rather strangely if you try to load the help project
+	 * into that and edit it.
+	 */
+	if (conf.hhc_filename)
+	    fprintf(ho.fp, "Contents file=%s\n", conf.hhc_filename);
+	if (hhk_filename)
+	    fprintf(ho.fp, "Index file=%s\n", hhk_filename);
+
+	fprintf(ho.fp, "\n[WINDOWS]\nmain=\"");
+
+	ho.hackflags |= HO_HACK_OMITQUOTES;
+	ho.hacklimit = 255;
+	html_words(&ho, topsect->title->words, NOTHING,
+		   NULL, keywords, &conf);
+
+	fprintf(ho.fp, "\",\"%s\",\"%s\",\"%s\",,,,,,"
+		"0x42520,,0x3876,[271,372,593,566],,,,,,,0\n",
+		conf.hhc_filename ? conf.hhc_filename : "",
+		hhk_filename ? hhk_filename : "",
+		files.head->filename);
+
+	/*
+	 * The [FILES] section is also not necessary for
+	 * compilation (hhc appears to build up a list of needed
+	 * files just by following links from the given starting
+	 * points), but useful for loading the project into HHW.
+	 */
+	fprintf(ho.fp, "\n[FILES]\n");
+	for (f = files.head; f; f = f->next)
+	    fprintf(ho.fp, "%s\n", f->filename);
+
+	fclose(ho.fp);
+    }
+    if (conf.hhc_filename) {
+	htmlfile *f;
+	htmlsect *s, *a;
+	htmloutput ho;
+	int currdepth = 0;
+
+	ho.fp = fopen(conf.hhc_filename, "w");
+	if (!ho.fp)
+	    error(err_cantopenw, conf.hhc_filename);
+
+	ho.charset = CS_CP1252;	       /* as far as I know, HHC files are */
+	ho.restrict_charset = CS_CP1252;   /* hardwired to this charset */
+	ho.cstate = charset_init_state;
+	ho.ver = HTML_4;	       /* *shrug* */
+	ho.state = HO_NEUTRAL;
+	ho.contents_level = 0;
+	ho.hackflags = HO_HACK_QUOTEQUOTES;
+
+	/*
+	 * Magic DOCTYPE which seems to work for .HHC files. I'm
+	 * wary of trying to change it!
+	 */
+	fprintf(ho.fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n"
+		"<HTML><HEAD>\n"
+		"<META HTTP-EQUIV=\"Content-Type\" "
+		"CONTENT=\"text/html; charset=%s\">\n"
+		"</HEAD><BODY><UL>\n",
+		charset_to_mimeenc(conf.output_charset));
+
+	for (f = files.head; f; f = f->next) {
+	    /*
+	     * For each HTML file, write out a contents entry.
+	     */
+	    int depth, leaf = TRUE;
+
+	    /*
+	     * Determine the depth of this file in the contents
+	     * tree.
+	     * 
+	     * If the file contains no sections, it is assumed to
+	     * have depth zero.
+	     */
+	    depth = 0;
+	    if (f->first)
+		for (a = f->first->parent; a && a->type != TOP; a = a->parent)
+		    depth++;
+
+	    /*
+	     * Determine if this file is a leaf file, by
+	     * trawling the section list to see if there's any
+	     * section with an ancestor in this file but which
+	     * is not itself in this file.
+	     *
+	     * Special case: for contents purposes, the TOP
+	     * file is not considered to be the parent of the
+	     * chapter files, so it's always a leaf.
+	     * 
+	     * A file with no sections in it is also a leaf.
+	     */
+	    if (f->first && f->first->type != TOP) {
+		for (s = f->first; s; s = s->next) {
+		    htmlsect *a;
+
+		    if (leaf && s->file != f) {
+			for (a = s; a; a = a->parent)
+			    if (a->file == f) {
+				leaf = FALSE;
+				break;
+			    }
+		    }
+		}
+	    }
+
+	    /*
+	     * Now write out our contents entry.
+	     */
+	    while (currdepth < depth) {
+		fprintf(ho.fp, "<UL>\n");
+		currdepth++;
+	    }
+	    while (currdepth > depth) {
+		fprintf(ho.fp, "</UL>\n");
+		currdepth--;
+	    }
+	    /* fprintf(ho.fp, "<!-- depth=%d -->", depth); */
+	    fprintf(ho.fp, "<LI><OBJECT TYPE=\"text/sitemap\">"
+		    "<PARAM NAME=\"Name\" VALUE=\"");
+	    ho.hacklimit = 255;
+	    if (f->first->title)
+		html_words(&ho, f->first->title->words, NOTHING,
+			   NULL, keywords, &conf);
+	    else if (f->first->type == INDEX)
+		html_text(&ho, conf.index_text);
+	    fprintf(ho.fp, "\"><PARAM NAME=\"Local\" VALUE=\"%s\">"
+		    "<PARAM NAME=\"ImageNumber\" VALUE=\"%d\"></OBJECT>\n",
+		    f->filename, leaf ? 11 : 1);
+	}
+
+	while (currdepth > 0) {
+	    fprintf(ho.fp, "</UL>\n");
+	    currdepth--;
+	}
+
+	fprintf(ho.fp, "</UL></BODY></HTML>\n");
+
+	cleanup(&ho);
+    }
+    if (hhk_filename) {
+	htmlfile *f;
+	htmloutput ho;
+	indexentry *entry;
+	int i;
+
+	/*
+	 * First make a pass over all HTML files and set their
+	 * `temp' fields to zero, because we're about to use them.
+	 */
+	for (f = files.head; f; f = f->next)
+	    f->temp = 0;
+
+	ho.fp = fopen(hhk_filename, "w");
+	if (!ho.fp)
+	    error(err_cantopenw, hhk_filename);
+
+	ho.charset = CS_CP1252;	       /* as far as I know, HHK files are */
+	ho.restrict_charset = CS_CP1252;   /* hardwired to this charset */
+	ho.cstate = charset_init_state;
+	ho.ver = HTML_4;	       /* *shrug* */
+	ho.state = HO_NEUTRAL;
+	ho.contents_level = 0;
+	ho.hackflags = HO_HACK_QUOTEQUOTES;
+
+	/*
+	 * Magic DOCTYPE which seems to work for .HHK files. I'm
+	 * wary of trying to change it!
+	 */
+	fprintf(ho.fp, "<!DOCTYPE HTML PUBLIC \"-//IETF//DTD HTML//EN\">\n"
+		"<HTML><HEAD>\n"
+		"<META HTTP-EQUIV=\"Content-Type\" "
+		"CONTENT=\"text/html; charset=%s\">\n"
+		"</HEAD><BODY><UL>\n",
+		charset_to_mimeenc(conf.output_charset));
+
+	/*
+	 * Go through the index terms and output each one.
+	 */
+	for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) {
+	    htmlindex *hi = (htmlindex *)entry->backend_data;
+	    int j;
+
+	    if (hi->nrefs > 0) {
+		fprintf(ho.fp, "<LI><OBJECT TYPE=\"text/sitemap\">\n"
+			"<PARAM NAME=\"Name\" VALUE=\"");
+		ho.hacklimit = 255;
+		html_words(&ho, entry->text, NOTHING,
+			   NULL, keywords, &conf);
+		fprintf(ho.fp, "\">\n");
+
+		for (j = 0; j < hi->nrefs; j++) {
+		    htmlindexref *hr =
+			(htmlindexref *)hi->refs[j]->private_data;
+
+		    /*
+		     * Use the temp field to ensure we don't
+		     * reference the same file more than once.
+		     */
+		    if (!hr->section->file->temp) {
+			fprintf(ho.fp, "<PARAM NAME=\"Local\" VALUE=\"%s\">\n",
+				hr->section->file->filename);
+			hr->section->file->temp = 1;
+		    }
+
+		    hr->referenced = TRUE;
+		}
+
+		fprintf(ho.fp, "</OBJECT>\n");
+
+		/*
+		 * Now go through those files and re-clear the temp
+		 * fields ready for the _next_ index term.
+		 */
+		for (j = 0; j < hi->nrefs; j++) {
+		    htmlindexref *hr =
+			(htmlindexref *)hi->refs[j]->private_data;
+		    hr->section->file->temp = 0;
+		}
+	    }
+	}
+
+	fprintf(ho.fp, "</UL></BODY></HTML>\n");
+	cleanup(&ho);
+    }
+
+    /*
      * Go through and check that no index fragments were referenced
      * without being generated, or indeed vice versa.
      * 
@@ -1491,6 +1875,11 @@ void html_backend(paragraph *sourceform, keywordlist *keywords,
 	}
 	freetree234(files.frags);
     }
+    /*
+     * The strings in files.files are all owned by their containing
+     * htmlfile structures, so there's no need to free them here.
+     */
+    freetree234(files.files);
     {
 	htmlsect *sect, *tmp;
 	sect = sects.head;
@@ -1584,8 +1973,12 @@ static void html_file_section(htmlconfig *cfg, htmlfilelist *files,
 	 * we invent a fresh file and put this section at its head.
 	 * Otherwise, we put it in the same file as its parent
 	 * section.
+	 * 
+	 * Another special value of cfg->leaf_level is -1, which
+	 * means infinity (i.e. it's considered to always be
+	 * greater than depth).
 	 */
-	if (ldepth > cfg->leaf_level) {
+	if (cfg->leaf_level > 0 && ldepth > cfg->leaf_level) {
 	    /*
 	     * We know that sect->parent cannot be NULL. The only
 	     * circumstance in which it can be is if sect is at
@@ -1641,7 +2034,8 @@ static htmlfile *html_new_file(htmlfilelist *list, char *filename)
 	list->head = ret;
     list->tail = ret;
 
-    ret->filename = dupstr(filename);
+    ret->filename = html_sanitise_filename(list, dupstr(filename));
+    add234(list->files, ret->filename);
     ret->last_fragment_number = 0;
     ret->min_heading_depth = INT_MAX;
     ret->first = ret->last = NULL;
@@ -1943,8 +2337,16 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
     char outbuf[256];
     int bytes, err;
 
+    if (ho->hackflags & (HO_HACK_QUOTEQUOTES | HO_HACK_OMITQUOTES))
+	quote_quotes = TRUE;	       /* override the input value */
+
     if (maxlen > 0 && textlen > maxlen)
 	textlen = maxlen;
+    if (ho->hacklimit >= 0) {
+	if (textlen > ho->hacklimit)
+	    textlen = ho->hacklimit;
+	ho->hacklimit -= textlen;
+    }
 
     while (textlen > 0) {
 	/* Scan ahead for characters we really can't display in HTML. */
@@ -1978,19 +2380,25 @@ static void html_text_limit_internal(htmloutput *ho, wchar_t const *text,
 	     * HTML.
 	     */
 	    if (ho->fp) {
-		if (*text == L'<')
-		    fprintf(ho->fp, "&lt;");
-		else if (*text == L'>')
-		    fprintf(ho->fp, "&gt;");
-		else if (*text == L'&')
-		    fprintf(ho->fp, "&amp;");
-		else if (*text == L'"')
-		    fprintf(ho->fp, "&quot;");
-		else if (*text == L' ') {
-		    assert(nbsp);
-		    fprintf(ho->fp, "&nbsp;");
-		} else
-		    assert(!"Can't happen");
+		if (*text == L'"' && (ho->hackflags & HO_HACK_OMITQUOTES)) {
+		    fputc('\'', ho->fp);
+		} else if (ho->hackflags & HO_HACK_QUOTENOTHING) {
+		    fputc(*text, ho->fp);
+		} else {
+		    if (*text == L'<')
+			fprintf(ho->fp, "&lt;");
+		    else if (*text == L'>')
+			fprintf(ho->fp, "&gt;");
+		    else if (*text == L'&')
+			fprintf(ho->fp, "&amp;");
+		    else if (*text == L'"')
+			fprintf(ho->fp, "&quot;");
+		    else if (*text == L' ') {
+			assert(nbsp);
+			fprintf(ho->fp, "&nbsp;");
+		    } else
+			assert(!"Can't happen");
+		}
 	    }
 	    text++, textlen--;
 	}
@@ -2162,6 +2570,69 @@ static char *html_sanitise_fragment(htmlfilelist *files, htmlfile *file,
     return text;
 }
 
+static char *html_sanitise_filename(htmlfilelist *files, char *text)
+{
+    /*
+     * Unceremoniously rip out any character that might cause
+     * difficulty in some filesystem or another, or be otherwise
+     * inconvenient.
+     * 
+     * That doesn't leave much punctuation. I permit alphanumerics
+     * and +-.=_ only.
+     */
+    char *p = text, *q = text;
+
+    while (*p) {
+	if ((*p>='A' && *p<='Z') ||
+	    (*p>='a' && *p<='z') ||
+	    (*p>='0' && *p<='9') ||
+	    *p=='-' || *p=='_' || *p=='+' || *p=='.' || *p=='=')
+	    *q++ = *p;
+	p++;
+    }
+    *q = '\0';
+
+    /* If there's nothing left, make something valid up */
+    if (!*text) {
+	static const char anonfrag[] = "anon.html";
+	text = sresize(text, lenof(anonfrag), char);
+	strcpy(text, anonfrag);
+    }
+
+    /*
+     * Now we check for clashes with other filenames, and adjust
+     * this one if necessary by appending a hyphen followed by a
+     * number just before the file extension (if any).
+     */
+    {
+	int len, extpos;
+	int suffix = 1;
+
+	p = NULL;
+
+	while (find234(files->files, text, NULL)) {
+	    if (!p) {
+		len = strlen(text);
+		p = text;
+		text = snewn(len+20, char);
+
+		for (extpos = len; extpos > 0 && p[extpos-1] != '.'; extpos--);
+		if (extpos > 0)
+		    extpos--;
+		else
+		    extpos = len;
+	    }
+
+	    sprintf(text, "%.*s-%d%s", extpos, p, ++suffix, p+extpos);
+	}
+
+	if (p)
+	    sfree(p);
+    }
+
+    return text;
+}
+
 static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s,
 				htmlfile *thisfile, keywordlist *keywords,
 				htmlconfig *cfg)
diff --git a/doc/Makefile b/doc/Makefile
index cfdd943..63299ad 100644
--- a/doc/Makefile
+++ b/doc/Makefile
@@ -20,4 +20,8 @@ install:
 	$(INSTALL) -m 644 halibut.1 $(man1dir)/halibut.1
 
 clean:
-	rm -f *.html *.txt *.hlp *.cnt *.1 *.info* *.ps *.pdf
+	rm -f *.html *.txt *.hlp *.cnt *.1 *.info* *.ps *.pdf *.hh* *.chm
+
+chm: halibut.hhp
+halibut.hhp: $(INPUTS) $(HALIBUT) chm.but
+	$(HALIBUT) --html $(INPUTS) chm.but
diff --git a/doc/chm.but b/doc/chm.but
new file mode 100644
index 0000000..510baf8
--- /dev/null
+++ b/doc/chm.but
@@ -0,0 +1,19 @@
+\# File containing the magic HTML configuration directives to create
+\# an MS HTML Help project. We put this on the end of the Halibut
+\# docs build command line to build the HHP and friends.
+
+\cfg{html-leaf-level}{infinite}
+\cfg{html-leaf-contains-contents}{false}
+\cfg{html-suppress-navlinks}{true}
+\cfg{html-suppress-address}{true}
+
+\cfg{html-contents-filename}{index.html}
+\cfg{html-template-filename}{%k.html}
+\cfg{html-template-fragment}{%k}
+
+\cfg{html-mshtmlhelp-chm}{halibut.chm}
+\cfg{html-mshtmlhelp-project}{halibut.hhp}
+\cfg{html-mshtmlhelp-contents}{halibut.hhc}
+\cfg{html-mshtmlhelp-index}{halibut.hhk}
+
+\versionid $Id$
diff --git a/doc/index.but b/doc/index.but
index b059b47..b8587c7 100644
--- a/doc/index.but
+++ b/doc/index.but
@@ -3,6 +3,12 @@
 \IM{Windows Help} Windows Help
 \IM{Windows Help} Help, Windows
 
+\IM{HTML Help} HTML Help
+\IM{HTML Help} Windows HTML Help
+\IM{HTML Help} MS HTML Help
+\IM{HTML Help} Microsoft HTML Help
+\IM{HTML Help} \cw{.chm} files
+
 \IM{Help compiler} Help compiler, lack of need for
 
 \IM{plain text} plain text
@@ -326,6 +332,11 @@ configuration directive
 \IM{\\cfg\{html-suppress-address\}}
 \cw{\\cfg\{html-suppress-address\}}
 
+\IM{\\cfg\{html-suppress-navlinks\}} \c{html-suppress-navlinks}
+configuration directive
+\IM{\\cfg\{html-suppress-navlinks\}}
+\cw{\\cfg\{html-suppress-navlinks\}}
+
 \IM{\\cfg\{html-author\}} \c{html-author} configuration directive
 \IM{\\cfg\{html-author\}} \cw{\\cfg\{html-author\}}
 
@@ -333,6 +344,22 @@ configuration directive
 directive
 \IM{\\cfg\{html-description\}} \cw{\\cfg\{html-description\}}
 
+\IM{\\cfg\{html-mshtmlhelp-project\}} \c{html-mshtmlhelp-project}
+configuration directive
+\IM{\\cfg\{html-mshtmlhelp-project\}} \cw{\\cfg\{html-mshtmlhelp-project\}}
+
+\IM{\\cfg\{html-mshtmlhelp-chm\}} \c{html-mshtmlhelp-chm}
+configuration directive
+\IM{\\cfg\{html-mshtmlhelp-chm\}} \cw{\\cfg\{html-mshtmlhelp-chm\}}
+
+\IM{\\cfg\{html-mshtmlhelp-contents\}} \c{html-mshtmlhelp-contents}
+configuration directive
+\IM{\\cfg\{html-mshtmlhelp-contents\}} \cw{\\cfg\{html-mshtmlhelp-contents\}}
+
+\IM{\\cfg\{html-mshtmlhelp-index\}} \c{html-mshtmlhelp-index}
+configuration directive
+\IM{\\cfg\{html-mshtmlhelp-index\}} \cw{\\cfg\{html-mshtmlhelp-index\}}
+
 \IM{\\cfg\{winhelp-topic\}} \c{winhelp-topic} configuration directive
 \IM{\\cfg\{winhelp-topic\}} \cw{\\cfg\{winhelp-topic\}}
 
diff --git a/doc/intro.but b/doc/intro.but
index 2898d17..ae63dd3 100644
--- a/doc/intro.but
+++ b/doc/intro.but
@@ -19,8 +19,6 @@ Currently Halibut supports the following output formats:
 
 \b HTML.
 
-\b Windows Help.
-
 \b Unix \cw{man} page format.
 
 \b GNU \c{info} format.
@@ -29,6 +27,11 @@ Currently Halibut supports the following output formats:
 
 \b PostScript.
 
+\b Old-style Windows Help (\cw{.HLP}).
+
+(By setting suitable options, the HTML output can also be made
+suitable for feeding to the newer-style Windows HTML Help compiler.)
+
 \H{intro-features} Features supported by Halibut
 
 Here's a list of Halibut's notable features.
diff --git a/doc/output.but b/doc/output.but
index 1145fbe..df4580e 100644
--- a/doc/output.but
+++ b/doc/output.but
@@ -442,6 +442,10 @@ parameter after the command-line option \i\c{--html} (see
 and so Halibut assumes you want the whole document to be placed in
 that file.
 
+You can also specify the special name \c{infinity} (or \c{infinite}
+or \c{inf}) if you want to ensure that \e{every} section and
+subsection ends up in a separate file no matter how deep you go.
+
 }
 
 \dt \I{\cw{\\cfg\{html-contents-depth\}}}\cw{\\cfg\{html-contents-depth\}\{}\e{level}\cw{\}\{}\e{depth}\cw{\}}
@@ -765,6 +769,11 @@ visibly in the \i\cw{<ADDRESS>} section at the bottom of each HTML
 file. If it is set to \c{false}, they will only be included as HTML
 comments.
 
+\dt \I{\cw{\\cfg\{html-suppress-navlinks\}}}\cw{\\cfg\{html-suppress-navlinks\}\{}\e{boolean}\cw{\}}
+
+\dd If this is set to \c{true}, the usual \i{navigation links} at the
+top of each HTML file will be suppressed.
+
 \dt \I{\cw{\\cfg\{html-suppress-address\}}}\cw{\\cfg\{html-suppress-address\}\{}\e{boolean}\cw{\}}
 
 \dd If this is set to \c{true}, the \i\cw{<ADDRESS>} section at the
@@ -784,6 +793,108 @@ name="description">} tag in the output HTML files, so that browsers
 which support this can easily pick out a brief \I{description, of
 document}description of the document.
 
+\S{output-html-mshtmlhelp} Generating MS Windows \i{HTML Help}
+
+The HTML files output from Halibut's HTML back end can be used as
+input to the MS Windows HTML Help compiler. In order to do this, you
+also need some auxiliary files: a project file, and (probably) a
+contents file and an index file. Halibut can optionally generate
+those as well.
+
+To enable the generation of MS HTML Help auxiliary files, use the
+following configuration directives:
+
+\dt \I\cw{\\cfg\{html-mshtmlhelp-project\}}\cw{\\cfg\{html-mshtmlhelp-project\}\{}\e{filename}\cw{\}}
+
+\dd Instructs Halibut to output an HTML Help project file with the
+specified name. You will almost certainly want the filename to end
+in the extension \c{.hhp} (although Halibut will not enforce this).
+If you use this option, you must also use the
+\cw{html-mshtmlhelp-chm} option to specify the desired name of the
+compiled help file.
+
+\dt \I\cw{\\cfg\{html-mshtmlhelp-chm\}}\cw{\\cfg\{html-mshtmlhelp-chm\}\{}\e{filename}\cw{\}}
+
+\dd Specifies the desired name of the compiled HTML Help file. You
+will almost certainly want this to have the extension \c{.chm}
+(although Halibut will not enforce this). The name you specify here
+will be written into the help project file. If you specify this
+option, you must also use the \cw{html-mshtmlhelp-project} option to
+request a help project file in the first place.
+
+\dt \I\cw{\\cfg\{html-mshtmlhelp-contents\}}\cw{\\cfg\{html-mshtmlhelp-contents\}\{}\e{filename}\cw{\}}
+
+\dd Instructs Halibut to output an HTML Help contents file with the
+specified name, and refer to it in the help project file. You will
+almost certainly want the filename to end in the extension \c{.hhc}
+(although Halibut will not enforce this). This option will be
+ignored if you have not also specified a help project file.
+
+\lcont{
+
+Creating a contents file like this causes the HTML Help viewer to
+display a contents tree in the pane to the left of the main text
+window. You can choose to generate an HTML Help project without this
+feature, in which case the user will still be able to navigate
+around the document by using the ordinary internal links in the HTML
+files themselves just as if it were a web page. However, using a
+contents file is recommended.
+
+}
+
+\dt \I\cw{\\cfg\{html-mshtmlhelp-index\}}\cw{\\cfg\{html-mshtmlhelp-index\}\{}\e{filename}\cw{\}}
+
+\dd Instructs Halibut to output an HTML Help index file with the
+specified name, and refer to it in the help project file. You will
+almost certainly want the filename to end in the extension \c{.hhk}
+(although Halibut will not enforce this). This option will be
+ignored if you have not also specified a help project file.
+
+\lcont{
+
+Specifying this option suppresses the generation of an HTML-based
+index file (see \cw{\\cfg\{html-index-filename\}} in
+\k{output-html-file}).
+
+Creating an index file like this causes the HTML Help viewer to
+provide a list of index terms in a pane to the left of the main text
+window. You can choose to generate an HTML Help project without this
+feature, in which case a conventional HTML index will be generated
+instead (assuming you have any index terms at all defined) and the
+user will still be able to use that. However, using an index file is
+recommended.
+
+Halibut will not output an index file at all, or link to one from
+the help project file, if your document contains no index entries.
+
+}
+
+If you use the above options, Halibut will output a help project
+file which you should be able to feed straight to the command-line
+MS HTML Help compiler (\cw{HHC.EXE}), or load into the MS HTML Help
+Workshop (\cw{HHW.EXE}).
+
+You may also wish to alter other HTML configuration options to make
+the resulting help file look more like a help file and less like a
+web page. A suggested set of additional configuration options for
+HTML Help is as follows:
+
+\b \cw{\\cfg\{html-leaf-level\}\{infinite\}}, because HTML Help
+works best with lots of small files (\q{topics}) rather than a few
+large ones. In particular, the contents and index mechanisms can
+only reference files, not subsections within files.
+
+\b \cw{\\cfg\{html-leaf-contains-contents\}\{false\}}, to suppress
+the contents list above the main text of each bottom-level file.
+
+\b \cw{\\cfg\{html-suppress-navlinks\}\{true\}}, because HTML Help
+has its own navigation facilities and it looks a bit strange to
+duplicate them.
+
+\b \cw{\\cfg\{html-suppress-address\}\{true\}}, because the
+\cw{<address>} section makes less sense in a help file than it does
+on a web page.
+
 \S{output-html-defaults} Default settings
 
 The \i{default settings} for Halibut's HTML output format are:
@@ -847,12 +958,19 @@ The \i{default settings} for Halibut's HTML output format are:
 \H{output-whlp} Windows Help
 
 This output format generates data that can be used by the \i{Windows
-Help} program \cw{WINHELP.EXE}. There are two actual files
+Help} program \cw{WINHLP32.EXE}. There are two actual files
 generated, one ending in \c{.hlp} and the other ending in \c{.cnt}.
 
-Currently, the output is hardcoded to be in the \q{\i{Win1252}}
-character set. (If anyone knows how character sets are encoded in
-Windows Help, we'd appreciate help.)
+Note that as of 2006, MS is discontinuing the Windows Help format in
+favour of the newer HTML Help format (\c{.chm} files). Halibut is
+not currently able to generate \c{.chm} files directly, but its HTML
+back end can write out project files suitable for use as input to
+the MS HTML Help compiler. See \k{output-html-mshtmlhelp} for more
+information on this.
+
+Currently, the Windows Help output is hardcoded to be in the
+\q{\i{Win1252}} character set. (If anyone knows how character sets
+are encoded in Windows Help files, we'd appreciate help.)
 
 The Windows Help output format supports the following configuration
 directives:
diff --git a/doc/running.but b/doc/running.but
index 884c33f..8574708 100644
--- a/doc/running.but
+++ b/doc/running.but
@@ -14,18 +14,20 @@ This will generate a large set of \i{output files}:
 \b \i\c{output.txt} will be a \i{plain text} version of the input
 document.
 
-\b \i\c{output.hlp} and \i\c{output.cnt} will be a \i{Windows Help}
-version of the same thing. (Most of the text is in \c{output.hlp};
-\c{output.cnt} contains additional contents data used by the Windows
-help topic selector. If you lose the latter, the former should still
-be usable, but it will look less modern.)
+\b \i\c{output.hlp} and \i\c{output.cnt} will be an old-style
+\i{Windows Help} version of the same thing. (Most of the text is in
+\c{output.hlp}; \c{output.cnt} contains additional contents data
+used by the Windows help topic selector. If you lose the latter, the
+former should still be usable, but it will look less modern.)
 
 \lcont{
-Note that Halibut does not require any external software such as a
-\i{Help compiler}. It \e{directly} generates Windows Help files, and
-therefore it doesn't need to be run on Windows to do so: it can
-generate them even when run from an automated script on a Unix
-machine.
+
+Note that to do this Halibut does not require any external software
+such as a \i{Help compiler}. It \e{directly} generates old-style
+Windows Help files, and therefore it doesn't need to be run on
+Windows to do so: it can generate them even when run from an
+automated script on a Unix machine.
+
 }
 
 \b \c{output.1} will be a Unix \i{\cw{man} page}.
@@ -81,9 +83,10 @@ line, using the \c{-C} option).
 
 \dt \i\cw{--winhelp}[\cw{=}\e{filename}]
 
-\dd Specifies that you want to generate Windows Help output. You can
-optionally specify a file name (e.g. \c{--winhelp=myfile.hlp}), in
-which case Halibut will change the name of the output file as well.
+\dd Specifies that you want to generate old-style Windows Help
+output. You can optionally specify a file name (e.g.
+\c{--winhelp=myfile.hlp}), in which case Halibut will change the
+name of the output file as well.
 
 \lcont{
 
diff --git a/error.c b/error.c
index 2dae220..7fe97ec 100644
--- a/error.c
+++ b/error.c
@@ -333,9 +333,14 @@ static void do_error(int code, va_list ap) {
       case err_pfnoafm:
 	fpos = *va_arg(ap, filepos *);
 	sp = va_arg(ap, char *);
-	sprintf(error, "No metrics available for Type 1 font '%.200s'", sp);
+	sprintf(error, "no metrics available for Type 1 font '%.200s'", sp);
 	flags = FILEPOS;
 	break;
+      case err_chmnames:
+	sprintf(error, "only one of html-mshtmlhelp-chm and "
+		"html-mshtmlhelp-hhp found");
+	flags = PREFIX;
+	break;
       case err_whatever:
 	sp = va_arg(ap, char *);
         vsprintf(error, sp, ap);
diff --git a/halibut.h b/halibut.h
index 3cb1ed7..1ff014e 100644
--- a/halibut.h
+++ b/halibut.h
@@ -252,6 +252,7 @@ enum {
     err_pfhead,			       /* bad Type 1 header line */
     err_pfbad,			       /* otherwise invalide Type 1 font */
     err_pfnoafm,		       /* Type 1 font but no AFM */
+    err_chmnames,		       /* need both or neither of hhp+chm */
     err_whatever                       /* random error of another type */
 };
 
diff --git a/inputs/test.but b/inputs/test.but
index f52d4b8..ecd5a2d 100644
--- a/inputs/test.but
+++ b/inputs/test.but
@@ -279,6 +279,11 @@ Umm.
 
 Ahh.
 
+\H{app-\\two} Section with inconvenient keyword
+
+If you apply this file together with \cw{doc/chm.but}, this section
+should test \cw{html_sanitise_filename()}.
+
 \U Bibliography
 
 \B{book} Some text describing a book.