diff options
| -rw-r--r-- | LICENCE | 2 | ||||
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | bk_html.c | 1881 | ||||
| -rw-r--r-- | bk_xhtml.c | 1779 | ||||
| -rw-r--r-- | doc/licence.but | 4 | ||||
| -rw-r--r-- | doc/output.but | 4 | ||||
| -rw-r--r-- | halibut.h | 9 | ||||
| -rw-r--r-- | main.c | 4 | ||||
| -rw-r--r-- | ustring.c | 15 |
9 files changed, 1908 insertions, 1792 deletions
@@ -1,4 +1,4 @@ -Halibut is copyright (c) 1999-2004 Simon Tatham and James Aylett. +Halibut is copyright (c) 1999-2004 Simon Tatham. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files @@ -115,7 +115,7 @@ include $(LIBCHARSET_SRCDIR)Makefile MODULES := main malloc ustring error help licence version misc tree234 MODULES += input keywords contents index style biblio -MODULES += bk_text bk_xhtml bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf +MODULES += bk_text bk_html bk_whlp bk_man bk_info bk_paper bk_ps bk_pdf MODULES += winhelp psdata wcwidth OBJECTS := $(addsuffix .o,$(MODULES)) $(LIBCHARSET_OBJS) diff --git a/bk_html.c b/bk_html.c new file mode 100644 index 0000000..936126d --- /dev/null +++ b/bk_html.c @@ -0,0 +1,1881 @@ +/* + * HTML backend for Halibut + */ + +/* + * TODO: + * + * - I'm never entirely convinced that having a fragment link to + * come in at the start of the real text in the file is + * sensible. Perhaps for the topmost section in the file, no + * fragment should be used? (Though it should probably still be + * _there_ even if unused.) + * + * - new configurability: + * * a few new things explicitly labelled as `FIXME: + * configurable' or similar. + * * HTML flavour. + * * Some means of specifying the distinction between + * restrict-charset and output-charset. It seems to me that + * `html-charset' is output-charset, and that + * restrict-charset usually wants to be either output-charset + * or UTF-8 (the latter indicating that any Unicode character + * is fair game and it will be specified using &#foo; if it + * isn't in output-charset). However, since XHTML defaults to + * UTF-8 and it's fiddly to tell it otherwise, it's just + * possible that some user may need to set restrict-charset + * to their charset of choice while leaving _output_-charset + * at UTF-8. Figure out some configuration, and apply it. + * + * - test all HTML flavours and ensure they validate sensibly. Fix + * remaining confusion issues such as <?xml?> and obsoleteness + * of <a name>. + * + * - proper naming of all fragment IDs. The ones for sections are + * fine; the ones for numbered list and bibliociteds are utter + * crap; the ones for indexes _might_ do but it might be worth + * giving some thought to how to do them better. + * + also set up a mechanism for ensuring that fragment IDs + * never clash. + * + * - nonbreaking spaces? + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <limits.h> +#include "halibut.h" + +#define is_heading_type(type) ( (type) == para_Title || \ + (type) == para_Chapter || \ + (type) == para_Appendix || \ + (type) == para_UnnumberedChapter || \ + (type) == para_Heading || \ + (type) == para_Subsect) + +#define heading_depth(p) ( (p)->type == para_Subsect ? (p)->aux + 1 : \ + (p)->type == para_Heading ? 1 : \ + (p)->type == para_Title ? -1 : 0 ) + +typedef struct { + int just_numbers; + wchar_t *number_suffix; +} sectlevel; + +typedef struct { + int nasect; + sectlevel achapter, *asect; + int *contents_depths; /* 0=main, 1=chapter, 2=sect etc */ + int ncdepths; + int address_section, visible_version_id; + int leaf_contains_contents, leaf_smallest_contents; + char *contents_filename; + char *index_filename; + char *template_filename; + char *single_filename; + char *template_fragment; + char *head_end, *body_start, *body_end, *addr_start, *addr_end; + char *body_tag, *nav_attr; + wchar_t *author, *description; + int restrict_charset, output_charset; + enum { + HTML_3_2, HTML_4, + XHTML_1_0_TRANSITIONAL, XHTML_1_0_STRICT + } htmlver; + wchar_t *lquote, *rquote; + int leaf_level; +} htmlconfig; + +#define contents_depth(conf, level) \ + ( (conf).ncdepths > (level) ? (conf).contents_depths[level] : (level)+2 ) + +#define is_xhtml(ver) ((ver) >= XHTML_1_0_TRANSITIONAL) + +typedef struct htmlfile htmlfile; +typedef struct htmlsect htmlsect; + +struct htmlfile { + htmlfile *next; + char *filename; + int last_fragment_number; + int min_heading_depth; + htmlsect *first, *last; /* first/last highest-level sections */ +}; + +struct htmlsect { + htmlsect *next, *parent; + htmlfile *file; + paragraph *title, *text; + enum { NORMAL, TOP, INDEX } type; + int contents_depth; + char *fragment; +}; + +typedef struct { + htmlfile *head, *tail; + htmlfile *single, *index; +} htmlfilelist; + +typedef struct { + htmlsect *head, *tail; +} htmlsectlist; + +typedef struct { + int nrefs, refsize; + word **refs; +} htmlindex; + +typedef struct { + htmlsect *section; + char *fragment; +} htmlindexref; + +typedef struct { + /* + * This level deals with charset conversion, starting and + * ending tags, and writing to the file. It's the lexical + * level. + */ + FILE *fp; + int charset; + charset_state cstate; + int ver; + enum { + HO_NEUTRAL, HO_IN_TAG, HO_IN_EMPTY_TAG, HO_IN_TEXT + } state; + /* + * Stuff beyond here deals with the higher syntactic level: it + * tracks how many levels of <ul> are currently open when + * producing a contents list, for example. + */ + int contents_level; +} htmloutput; + +static void html_file_section(htmlconfig *cfg, htmlfilelist *files, + htmlsect *sect, int depth); + +static htmlfile *html_new_file(htmlfilelist *list, char *filename); +static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title); + +/* Flags for html_words() flags parameter */ +#define NOTHING 0x00 +#define MARKUP 0x01 +#define LINKS 0x02 +#define INDEXENTS 0x04 +#define ALL 0x07 +static void html_words(htmloutput *ho, word *words, int flags, + htmlfile *file, keywordlist *keywords, htmlconfig *cfg); +static void html_codepara(htmloutput *ho, word *words); + +static void element_open(htmloutput *ho, char const *name); +static void element_close(htmloutput *ho, char const *name); +static void element_empty(htmloutput *ho, char const *name); +static void element_attr(htmloutput *ho, char const *name, char const *value); +static void element_attr_w(htmloutput *ho, char const *name, + wchar_t const *value); +static void html_text(htmloutput *ho, wchar_t const *str); +static void html_text_limit(htmloutput *ho, wchar_t const *str, int maxlen); +static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, + int maxlen, int quote_quotes); +static void html_nl(htmloutput *ho); +static void html_raw(htmloutput *ho, char *text); +static void html_raw_as_attr(htmloutput *ho, char *text); +static void cleanup(htmloutput *ho); + +static void html_href(htmloutput *ho, htmlfile *thisfile, + htmlfile *targetfile, char *targetfrag); + +static char *html_format(paragraph *p, char *template_string); +static void html_sanitise_fragment(char *text); + +static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s, + htmlfile *thisfile, keywordlist *keywords, + htmlconfig *cfg); +static void html_section_title(htmloutput *ho, htmlsect *s, + htmlfile *thisfile, keywordlist *keywords, + htmlconfig *cfg); + +static htmlconfig html_configure(paragraph *source) { + htmlconfig ret; + paragraph *p; + + /* + * Defaults. + */ + ret.leaf_level = 2; + ret.achapter.just_numbers = FALSE; + ret.achapter.number_suffix = L": "; + ret.nasect = 1; + ret.asect = mknewa(sectlevel, ret.nasect); + ret.asect[0].just_numbers = TRUE; + ret.asect[0].number_suffix = L" "; + ret.ncdepths = 0; + ret.contents_depths = 0; + ret.visible_version_id = TRUE; + ret.address_section = TRUE; + ret.leaf_contains_contents = FALSE; + ret.leaf_smallest_contents = 4; + ret.single_filename = dupstr("Manual.html"); + ret.contents_filename = dupstr("Contents.html"); + ret.index_filename = dupstr("IndexPage.html"); + ret.template_filename = dupstr("%n.html"); + ret.template_fragment = dupstr("%b"); + ret.head_end = ret.body_tag = ret.body_start = ret.body_end = + ret.addr_start = ret.addr_end = ret.nav_attr = NULL; + ret.author = ret.description = NULL; + ret.restrict_charset = CS_ASCII; + ret.output_charset = CS_ASCII; + ret.htmlver = HTML_4; + /* + * Default quote characters are Unicode matched single quotes, + * falling back to ordinary ASCII ". + */ + ret.lquote = L"\x2018\0\x2019\0\"\0\"\0\0"; + ret.rquote = uadv(ret.lquote); + + /* + * Two-pass configuration so that we can pick up global config + * (e.g. `quotes') before having it overridden by specific + * config (`html-quotes'), irrespective of the order in which + * they occur. + */ + for (p = source; p; p = p->next) { + if (p->type == para_Config) { + if (!ustricmp(p->keyword, L"quotes")) { + if (*uadv(p->keyword) && *uadv(uadv(p->keyword))) { + ret.lquote = uadv(p->keyword); + ret.rquote = uadv(ret.lquote); + } + } + } + } + + for (p = source; p; p = p->next) { + if (p->type == para_Config) { + wchar_t *k = p->keyword; + + if (!ustrnicmp(k, L"xhtml-", 6)) + k++; /* treat `xhtml-' and `html-' the same */ + + if (!ustricmp(k, L"html-charset")) { + char *csname = utoa_dup(uadv(k), CS_ASCII); + ret.restrict_charset = ret.output_charset = + charset_from_localenc(csname); + sfree(csname); + } else if (!ustricmp(k, L"html-single-filename")) { + sfree(ret.single_filename); + ret.single_filename = dupstr(adv(p->origkeyword)); + } else if (!ustricmp(k, L"html-contents-filename")) { + sfree(ret.contents_filename); + ret.contents_filename = dupstr(adv(p->origkeyword)); + } else if (!ustricmp(k, L"html-index-filename")) { + sfree(ret.index_filename); + ret.index_filename = dupstr(adv(p->origkeyword)); + } else if (!ustricmp(k, L"html-template-filename")) { + sfree(ret.template_filename); + ret.template_filename = dupstr(adv(p->origkeyword)); + } else if (!ustricmp(k, L"html-template-fragment")) { + sfree(ret.template_fragment); + ret.template_fragment = dupstr(adv(p->origkeyword)); + } else if (!ustricmp(k, L"html-chapter-numeric")) { + ret.achapter.just_numbers = utob(uadv(k)); + } else if (!ustricmp(k, L"html-chapter-suffix")) { + ret.achapter.number_suffix = uadv(k); + } else if (!ustricmp(k, L"html-leaf-level")) { + ret.leaf_level = utoi(uadv(k)); + } else if (!ustricmp(k, L"html-section-numeric")) { + wchar_t *q = uadv(k); + int n = 0; + if (uisdigit(*q)) { + n = utoi(q); + q = uadv(q); + } + if (n >= ret.nasect) { + int i; + ret.asect = resize(ret.asect, n+1); + for (i = ret.nasect; i <= n; i++) + ret.asect[i] = ret.asect[ret.nasect-1]; + ret.nasect = n+1; + } + ret.asect[n].just_numbers = utob(q); + } else if (!ustricmp(k, L"html-section-suffix")) { + wchar_t *q = uadv(k); + int n = 0; + if (uisdigit(*q)) { + n = utoi(q); + q = uadv(q); + } + if (n >= ret.nasect) { + int i; + ret.asect = resize(ret.asect, n+1); + for (i = ret.nasect; i <= n; i++) { + ret.asect[i] = ret.asect[ret.nasect-1]; + } + ret.nasect = n+1; + } + ret.asect[n].number_suffix = q; + } else if (!ustricmp(k, L"html-contents-depth") || + !ustrnicmp(k, L"html-contents-depth-", 20)) { + /* + * Relic of old implementation: this directive used + * to be written as \cfg{html-contents-depth-3}{2} + * rather than the usual Halibut convention of + * \cfg{html-contents-depth}{3}{2}. We therefore + * support both. + */ + wchar_t *q = k[19] ? k+20 : uadv(k); + int n = 0; + if (uisdigit(*q)) { + n = utoi(q); + q = uadv(q); + } + if (n >= ret.ncdepths) { + int i; + ret.contents_depths = resize(ret.contents_depths, n+1); + for (i = ret.ncdepths; i <= n; i++) { + ret.contents_depths[i] = i+2; + } + ret.ncdepths = n+1; + } + ret.contents_depths[n] = utoi(q); + } else if (!ustricmp(k, L"html-head-end")) { + ret.head_end = adv(p->origkeyword); + } else if (!ustricmp(k, L"html-body-tag")) { + ret.body_tag = adv(p->origkeyword); + } else if (!ustricmp(k, L"html-body-start")) { + ret.body_start = adv(p->origkeyword); + } else if (!ustricmp(k, L"html-body-end")) { + ret.body_end = adv(p->origkeyword); + } else if (!ustricmp(k, L"html-address-start")) { + ret.addr_start = adv(p->origkeyword); + } else if (!ustricmp(k, L"html-address-end")) { + ret.addr_end = adv(p->origkeyword); + } else if (!ustricmp(k, L"html-navigation-attributes")) { + ret.nav_attr = adv(p->origkeyword); + } else if (!ustricmp(k, L"html-author")) { + ret.author = uadv(k); + } else if (!ustricmp(k, L"html-description")) { + ret.description = uadv(k); + } else if (!ustricmp(k, L"html-suppress-address")) { + ret.address_section = !utob(uadv(k)); + } else if (!ustricmp(k, L"html-versionid")) { + ret.visible_version_id = utob(uadv(k)); + } else if (!ustricmp(k, L"html-quotes")) { + if (*uadv(k) && *uadv(uadv(k))) { + ret.lquote = uadv(k); + ret.rquote = uadv(ret.lquote); + } + } else if (!ustricmp(k, L"html-leaf-contains-contents")) { + ret.leaf_contains_contents = utob(uadv(k)); + } else if (!ustricmp(k, L"html-leaf-smallest-contents")) { + ret.leaf_smallest_contents = utoi(uadv(k)); + } + } + } + + /* + * Now process fallbacks on quote characters. + */ + while (*uadv(ret.rquote) && *uadv(uadv(ret.rquote)) && + (!cvt_ok(ret.restrict_charset, ret.lquote) || + !cvt_ok(ret.restrict_charset, ret.rquote))) { + ret.lquote = uadv(ret.rquote); + ret.rquote = uadv(ret.lquote); + } + + return ret; +} + +paragraph *html_config_filename(char *filename) +{ + /* + * If the user passes in a single filename as a parameter to + * the `--html' command-line option, then we should assume it + * to imply _two_ config directives: + * \cfg{html-single-filename}{whatever} and + * \cfg{html-leaf-level}{0}; the rationale being that the user + * wants their output _in that file_. + */ + paragraph *p, *q; + + p = cmdline_cfg_simple("html-single-filename", filename, NULL); + q = cmdline_cfg_simple("html-leaf-level", "0", NULL); + p->next = q; + return p; +} + +void html_backend(paragraph *sourceform, keywordlist *keywords, + indexdata *idx, void *unused) { + paragraph *p; + htmlconfig conf; + htmlfilelist files = { NULL, NULL, NULL, NULL }; + htmlsectlist sects = { NULL, NULL }, nonsects = { NULL, NULL }; + + IGNORE(unused); + + conf = html_configure(sourceform); + + /* + * We're going to make heavy use of paragraphs' private data + * fields in the forthcoming code. Clear them first, so we can + * reliably tell whether we have auxiliary data for a + * particular paragraph. + */ + for (p = sourceform; p; p = p->next) + p->private_data = NULL; + + /* + * Start by figuring out into which file each piece of the + * document should be put. We'll do this by inventing an + * `htmlsect' structure and stashing it in the private_data + * field of each section paragraph; we also need one additional + * htmlsect for the document index, which won't show up in the + * source form but needs to be consistently mentioned in + * contents links. + * + * While we're here, we'll also invent the HTML fragment name + * for each section. + */ + { + htmlsect *topsect, *sect; + int d; + + topsect = html_new_sect(§s, p); + topsect->type = TOP; + topsect->title = NULL; + topsect->text = sourceform; + topsect->contents_depth = contents_depth(conf, 0); + html_file_section(&conf, &files, topsect, -1); + topsect->fragment = NULL; + + for (p = sourceform; p; p = p->next) + if (is_heading_type(p->type)) { + d = heading_depth(p); + + if (p->type == para_Title) { + topsect->title = p; + continue; + } + + sect = html_new_sect(§s, p); + sect->text = p->next; + + sect->contents_depth = contents_depth(conf, d+1) - (d+1); + + if (p->parent) { + sect->parent = (htmlsect *)p->parent->private_data; + assert(sect->parent != NULL); + } else + sect->parent = topsect; + p->private_data = sect; + + html_file_section(&conf, &files, sect, d); + + sect->fragment = html_format(p, conf.template_fragment); + html_sanitise_fragment(sect->fragment); + /* FIXME: clash checking? add to a tree of (file,frag)? */ + } + + /* And the index. */ + sect = html_new_sect(§s, NULL); + sect->fragment = dupstr("Index"); /* FIXME: this _can't_ be right */ + sect->text = NULL; + sect->type = INDEX; + sect->parent = topsect; + html_file_section(&conf, &files, sect, 0); /* peer of chapters */ + files.index = sect->file; + } + + /* + * Go through the keyword list and sort out fragment IDs for + * all the potentially referenced paragraphs which _aren't_ + * headings. + */ + { + int i; + keyword *kw; + htmlsect *sect; + + for (i = 0; (kw = index234(keywords->keys, i)) != NULL; i++) { + paragraph *q, *p = kw->para; + + if (!is_heading_type(p->type)) { + htmlsect *parent; + + /* + * Find the paragraph's parent htmlsect, to + * determine which file it will end up in. + */ + q = p->parent; + if (!q) { + /* + * Preamble paragraphs have no parent. So if we + * have a non-heading with no parent, it must + * be preamble, and therefore its parent + * htmlsect must be the preamble one. + */ + assert(sects.head && + sects.head->type == TOP); + parent = sects.head; + } else + parent = (htmlsect *)q->private_data; + + /* + * Now we can construct an htmlsect for this + * paragraph itself, taking care to put it in the + * list of non-sections rather than the list of + * sections (so that traverses of the `sects' list + * won't attempt to add it to the contents or + * anything weird like that). + */ + sect = html_new_sect(&nonsects, p); + sect->file = parent->file; + sect->parent = parent; + p->private_data = sect; + + /* + * FIXME: We need a much better means of naming + * these, possibly involving an additional + * configuration template. For the moment I'll just + * invent something completely stupid. + */ + sect->fragment = mknewa(char, 40); + sprintf(sect->fragment, "frag%p", sect); + } + } + } + + /* + * Now sort out the index. This involves: + * + * - For each index term, we set up an htmlindex structure to + * store all the references to that term. + * + * - Then we make a pass over the actual document, finding + * every word_IndexRef; for each one, we actually figure out + * the HTML filename/fragment pair we will use to reference + * it, store that information in the private data field of + * the word_IndexRef itself (so we can recreate it when the + * time comes to output our HTML), and add a reference to it + * to the index term in question. + */ + { + int i; + indexentry *entry; + htmlsect *lastsect; + word *w; + + /* + * Set up the htmlindex structures. + */ + + for (i = 0; (entry = index234(idx->entries, i)) != NULL; i++) { + htmlindex *hi = mknew(htmlindex); + + hi->nrefs = hi->refsize = 0; + hi->refs = NULL; + + entry->backend_data = hi; + } + + /* + * Run over the document inventing fragments. Each fragment + * is of the form `i' followed by an integer. + * + * FIXME: Probably in the file-organisation pass we should + * work out the fragment names of every section, so that we + * could load them all into a tree and hence ensure these + * index fragments don't clash with them. + */ + lastsect = NULL; + for (p = sourceform; p; p = p->next) { + if (is_heading_type(p->type)) + lastsect = (htmlsect *)p->private_data; + + for (w = p->words; w; w = w->next) + if (w->type == word_IndexRef) { + htmlindexref *hr = mknew(htmlindexref); + indextag *tag; + int i; + + hr->section = lastsect; + /* FIXME: clash checking */ + { + char buf[40]; + sprintf(buf, "i%d", + lastsect->file->last_fragment_number++); + hr->fragment = dupstr(buf); + } + w->private_data = hr; + + tag = index_findtag(idx, w->text); + if (!tag) + break; + + for (i = 0; i < tag->nrefs; i++) { + indexentry *entry = tag->refs[i]; + htmlindex *hi = (htmlindex *)entry->backend_data; + + if (hi->nrefs >= hi->refsize) { + hi->refsize += 32; + hi->refs = resize(hi->refs, hi->refsize); + } + + hi->refs[hi->nrefs++] = w; + } + } + } + } + + /* + * Now we're ready to write out the actual HTML files. + * + * For each file: + * + * - we open that file and write its header + * - we run down the list of sections + * - for each section directly contained within that file, we + * output the section text + * - for each section which is not in the file but which has a + * parent that is, we output a contents entry for the + * section if appropriate + * - finally, we output the file trailer and close the file. + */ + { + htmlfile *f, *prevf; + htmlsect *s; + paragraph *p; + + prevf = NULL; + + for (f = files.head; f; f = f->next) { + htmloutput ho; + int displaying; + enum LISTTYPE { NOLIST, UL, OL, DL }; + enum ITEMTYPE { NOITEM, LI, DT, DD }; + struct stackelement { + struct stackelement *next; + enum LISTTYPE listtype; + enum ITEMTYPE itemtype; + } *stackhead; + +#define listname(lt) ( (lt)==UL ? "ul" : (lt)==OL ? "ol" : "dl" ) +#define itemname(lt) ( (lt)==LI ? "li" : (lt)==DT ? "dt" : "dd" ) + + ho.fp = fopen(f->filename, "w"); + ho.charset = conf.output_charset; + ho.cstate = charset_init_state; + ho.ver = conf.htmlver; + ho.state = HO_NEUTRAL; + ho.contents_level = 0; + + /* <!DOCTYPE>. */ + switch (conf.htmlver) { + case HTML_3_2: + fprintf(ho.fp, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD " + "HTML 3.2 Final//EN\">\n"); + break; + case HTML_4: + fprintf(ho.fp, "<!DOCTYPE HTML PUBLIC \"-//W3C//DTD HTML" + " 4.01//EN\"\n\"http://www.w3.org/TR/html4/" + "strict.dtd\">\n"); + break; + case XHTML_1_0_TRANSITIONAL: + /* FIXME: <?xml?> to specify character encoding. + * This breaks HTML backwards compat, so perhaps avoid, or + * perhaps only emit when not using the default UTF-8? */ + fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML" + " 1.0 Transitional//EN\"\n\"http://www.w3.org/TR/" + "xhtml1/DTD/xhtml1-transitional.dtd\">\n"); + break; + case XHTML_1_0_STRICT: + /* FIXME: <?xml?> to specify character encoding. */ + fprintf(ho.fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML" + " 1.0 Strict//EN\"\n\"http://www.w3.org/TR/xhtml1/" + "DTD/xhtml1-strict.dtd\">\n"); + break; + } + + element_open(&ho, "html"); + if (is_xhtml(conf.htmlver)) { + element_attr(&ho, "xmlns", "http://www.w3.org/1999/xhtml"); + } + html_nl(&ho); + + element_open(&ho, "head"); + html_nl(&ho); + + element_empty(&ho, "meta"); + element_attr(&ho, "http-equiv", "content-type"); + { + char buf[200]; + sprintf(buf, "text/html; charset=%.150s", + charset_to_mimeenc(conf.output_charset)); + element_attr(&ho, "content", buf); + } + html_nl(&ho); + + if (conf.author) { + element_empty(&ho, "meta"); + element_attr(&ho, "name", "author"); + element_attr_w(&ho, "content", conf.author); + html_nl(&ho); + } + + if (conf.description) { + element_empty(&ho, "meta"); + element_attr(&ho, "name", "description"); + element_attr_w(&ho, "content", conf.description); + html_nl(&ho); + } + + element_open(&ho, "title"); + if (f->first && f->first->title) { + html_words(&ho, f->first->title->words, NOTHING, + f, keywords, &conf); + + assert(f->last); + if (f->last != f->first && f->last->title) { + html_text(&ho, L" - "); /* FIXME: configurable? */ + html_words(&ho, f->last->title->words, NOTHING, + f, keywords, &conf); + } + } + element_close(&ho, "title"); + html_nl(&ho); + + if (conf.head_end) + html_raw(&ho, conf.head_end); + + element_close(&ho, "head"); + html_nl(&ho); + + /* FIXME: need to be able to specify replacement for this */ + if (conf.body_tag) + html_raw(&ho, conf.body_tag); + else + element_open(&ho, "body"); + html_nl(&ho); + + if (conf.body_start) + html_raw(&ho, conf.body_start); + + /* + * Write out a nav bar. Special case: we don't do this + * if there is only one file. + */ + if (files.head != files.tail) { + element_open(&ho, "p"); + if (conf.nav_attr) + html_raw_as_attr(&ho, conf.nav_attr); + + if (prevf) { + element_open(&ho, "a"); + element_attr(&ho, "href", prevf->filename); + } + html_text(&ho, L"Previous");/* FIXME: conf? */ + if (prevf) + element_close(&ho, "a"); + + html_text(&ho, L" | "); /* FIXME: conf? */ + + if (f != files.head) { + element_open(&ho, "a"); + element_attr(&ho, "href", files.head->filename); + } + html_text(&ho, L"Contents");/* FIXME: conf? */ + if (f != files.head) + element_close(&ho, "a"); + + html_text(&ho, L" | "); /* FIXME: conf? */ + + if (f != files.index) { + element_open(&ho, "a"); + element_attr(&ho, "href", files.index->filename); + } + html_text(&ho, L"Index");/* FIXME: conf? */ + if (f != files.index) + element_close(&ho, "a"); + + html_text(&ho, L" | "); /* FIXME: conf? */ + + if (f->next) { + element_open(&ho, "a"); + element_attr(&ho, "href", f->next->filename); + } + html_text(&ho, L"Next"); /* FIXME: conf? */ + if (f->next) + element_close(&ho, "a"); + + element_close(&ho, "p"); + html_nl(&ho); + } + prevf = f; + + /* + * Write out a prefix TOC for the file. + * + * We start by going through the section list and + * collecting the sections which need to be added to + * the contents. On the way, we also test to see if + * this file is a leaf file (defined as one which + * contains all descendants of any section it + * contains), because this will play a part in our + * decision on whether or not to _output_ the TOC. + * + * Special case: we absolutely do not do this if we're + * in single-file mode. + */ + if (files.head != files.tail) { + int ntoc = 0, tocsize = 0; + htmlsect **toc = NULL; + int leaf = TRUE; + + for (s = sects.head; s; s = s->next) { + htmlsect *a, *ac; + int depth, adepth; + + /* + * Search up from this section until we find + * the highest-level one which belongs in this + * file. + */ + depth = adepth = 0; + a = NULL; + for (ac = s; ac; ac = ac->parent) { + if (ac->file == f) { + a = ac; + adepth = depth; + } + depth++; + } + + if (s->file != f && a != NULL) + leaf = FALSE; + + if (a) { + if (adepth <= a->contents_depth) { + if (ntoc >= tocsize) { + tocsize += 64; + toc = resize(toc, tocsize); + } + toc[ntoc++] = s; + } + } + } + + if (leaf && conf.leaf_contains_contents && + ntoc >= conf.leaf_smallest_contents) { + int i; + + for (i = 0; i < ntoc; i++) { + htmlsect *s = toc[i]; + int hlevel = (s->type == TOP ? -1 : + s->type == INDEX ? 0 : + heading_depth(s->title)) + - f->min_heading_depth + 1; + + assert(hlevel >= 1); + html_contents_entry(&ho, hlevel, s, + f, keywords, &conf); + } + html_contents_entry(&ho, 0, NULL, f, keywords, &conf); + } + } + + /* + * Now go through the document and output some real + * text. + */ + displaying = FALSE; + for (s = sects.head; s; s = s->next) { + if (s->file == f) { + /* + * This section belongs in this file. + * Display it. + */ + displaying = TRUE; + } else { + htmlsect *a, *ac; + int depth, adepth; + + displaying = FALSE; + + /* + * Search up from this section until we find + * the highest-level one which belongs in this + * file. + */ + depth = adepth = 0; + a = NULL; + for (ac = s; ac; ac = ac->parent) { + if (ac->file == f) { + a = ac; + adepth = depth; + } + depth++; + } + + if (a != NULL) { + /* + * This section does not belong in this + * file, but an ancestor of it does. Write + * out a contents table entry, if the depth + * doesn't exceed the maximum contents + * depth for the ancestor section. + */ + if (adepth <= a->contents_depth) { + html_contents_entry(&ho, adepth, s, + f, keywords, &conf); + } + } + } + + if (displaying) { + int hlevel; + char htag[3]; + + html_contents_entry(&ho, 0, NULL, f, keywords, &conf); + + /* + * Display the section heading. + */ + + hlevel = (s->type == TOP ? -1 : + s->type == INDEX ? 0 : + heading_depth(s->title)) + - f->min_heading_depth + 1; + assert(hlevel >= 1); + /* HTML headings only go up to <h6> */ + if (hlevel > 6) + hlevel = 6; + htag[0] = 'h'; + htag[1] = '0' + hlevel; + htag[2] = '\0'; + element_open(&ho, htag); + + /* + * Provide anchor for cross-links to target. + * + * FIXME: AIcurrentlyUI, this needs to be done + * differently in XHTML because <a name> is + * deprecated or obsolete. + * + * (Also we'll have to do this separately in + * other paragraph types - NumberedList and + * BiblioCited.) + */ + element_open(&ho, "a"); + element_attr(&ho, "name", s->fragment); + element_close(&ho, "a"); + + html_section_title(&ho, s, f, keywords, &conf); + + element_close(&ho, htag); + + /* + * Now display the section text. + */ + if (s->text) { + stackhead = mknew(struct stackelement); + stackhead->next = NULL; + stackhead->listtype = NOLIST; + stackhead->itemtype = NOITEM; + + for (p = s->text;; p = p->next) { + enum LISTTYPE listtype; + struct stackelement *se; + + /* + * Preliminary switch to figure out what + * sort of list we expect to be inside at + * this stage. + * + * Since p may still be NULL at this point, + * I invent a harmless paragraph type for + * it if it is. + */ + switch (p ? p->type : para_Normal) { + case para_Rule: + case para_Normal: + case para_Copyright: + case para_BiblioCited: + case para_Code: + case para_QuotePush: + case para_QuotePop: + case para_Chapter: + case para_Appendix: + case para_UnnumberedChapter: + case para_Heading: + case para_Subsect: + case para_LcontPop: + listtype = NOLIST; + break; + + case para_Bullet: + listtype = UL; + break; + + case para_NumberedList: + listtype = OL; + break; + + case para_DescribedThing: + case para_Description: + listtype = DL; + break; + + case para_LcontPush: + se = mknew(struct stackelement); + se->next = stackhead; + se->listtype = NOLIST; + se->itemtype = NOITEM; + stackhead = se; + continue; + + default: /* some totally non-printing para */ + continue; + } + + html_nl(&ho); + + /* + * Terminate the most recent list item, if + * any. (We left this until after + * processing LcontPush, since in that case + * the list item won't want to be + * terminated until after the corresponding + * LcontPop.) + */ + if (stackhead->itemtype != NOITEM) { + element_close(&ho, itemname(stackhead->itemtype)); + html_nl(&ho); + } + stackhead->itemtype = NOITEM; + + /* + * Terminate the current list, if it's not + * the one we want to be in. + */ + if (listtype != stackhead->listtype && + stackhead->listtype != NOLIST) { + element_close(&ho, listname(stackhead->listtype)); + html_nl(&ho); + } + + /* + * Leave the loop if our time has come. + */ + if (!p || (is_heading_type(p->type) && + p->type != para_Title)) + break; /* end of section text */ + + /* + * Start a fresh list if necessary. + */ + if (listtype != stackhead->listtype && + listtype != NOLIST) + element_open(&ho, listname(listtype)); + + stackhead->listtype = listtype; + + switch (p->type) { + case para_Rule: + element_empty(&ho, "hr"); + break; + case para_Code: + html_codepara(&ho, p->words); + break; + case para_Normal: + case para_Copyright: + element_open(&ho, "p"); + html_nl(&ho); + html_words(&ho, p->words, ALL, + f, keywords, &conf); + html_nl(&ho); + element_close(&ho, "p"); + break; + case para_BiblioCited: + element_open(&ho, "p"); + if (p->private_data) { + htmlsect *s = (htmlsect *)p->private_data; + element_open(&ho, "a"); + element_attr(&ho, "name", s->fragment); + element_close(&ho, "a"); + } + html_nl(&ho); + html_words(&ho, p->kwtext, ALL, + f, keywords, &conf); + html_text(&ho, L" "); + html_words(&ho, p->words, ALL, + f, keywords, &conf); + html_nl(&ho); + element_close(&ho, "p"); + break; + case para_Bullet: + case para_NumberedList: + element_open(&ho, "li"); + if (p->private_data) { + htmlsect *s = (htmlsect *)p->private_data; + element_open(&ho, "a"); + element_attr(&ho, "name", s->fragment); + element_close(&ho, "a"); + } + html_nl(&ho); + stackhead->itemtype = LI; + html_words(&ho, p->words, ALL, + f, keywords, &conf); + break; + case para_DescribedThing: + element_open(&ho, "dt"); + html_nl(&ho); + stackhead->itemtype = DT; + html_words(&ho, p->words, ALL, + f, keywords, &conf); + break; + case para_Description: + element_open(&ho, "dd"); + html_nl(&ho); + stackhead->itemtype = DD; + html_words(&ho, p->words, ALL, + f, keywords, &conf); + break; + + case para_QuotePush: + element_open(&ho, "blockquote"); + break; + case para_QuotePop: + element_close(&ho, "blockquote"); + break; + + case para_LcontPop: + se = stackhead; + stackhead = stackhead->next; + assert(stackhead); + sfree(se); + break; + } + } + + assert(stackhead && !stackhead->next); + sfree(stackhead); + } + + if (s->type == INDEX) { + indexentry *entry; + int i; + + /* + * This section is the index. I'll just + * render it as a single paragraph, with a + * colon between the index term and the + * references, and <br> in between each + * entry. + */ + element_open(&ho, "p"); + + for (i = 0; (entry = + index234(idx->entries, i)) != NULL; i++) { + htmlindex *hi = (htmlindex *)entry->backend_data; + int j; + + if (i > 0) + element_empty(&ho, "br"); + html_nl(&ho); + + html_words(&ho, entry->text, MARKUP|LINKS, + f, keywords, &conf); + + html_text(&ho, L": ");/* FIXME: configurable */ + + for (j = 0; j < hi->nrefs; j++) { + htmlindexref *hr = + (htmlindexref *)hi->refs[j]->private_data; + paragraph *p = hr->section->title; + + if (j > 0) + html_text(&ho, L", "); /* FIXME: conf */ + + html_href(&ho, f, hr->section->file, + hr->fragment); + if (p && p->kwtext) + html_words(&ho, p->kwtext, MARKUP|LINKS, + f, keywords, &conf); + else if (p && p->words) + html_words(&ho, p->words, MARKUP|LINKS, + f, keywords, &conf); + else + html_text(&ho, L"FIXME"); + element_close(&ho, "a"); + } + } + element_close(&ho, "p"); + } + } + } + + html_contents_entry(&ho, 0, NULL, f, keywords, &conf); + html_nl(&ho); + + { + /* + * Footer. + */ + int done_version_ids = FALSE; + + element_empty(&ho, "hr"); + + if (conf.body_end) + html_raw(&ho, conf.body_end); + + if (conf.address_section) { + element_open(&ho, "address"); + if (conf.addr_start) { + html_raw(&ho, conf.addr_start); + html_nl(&ho); + } + if (conf.visible_version_id) { + int started = FALSE; + for (p = sourceform; p; p = p->next) + if (p->type == para_VersionID) { + if (!started) + element_open(&ho, "p"); + else + element_empty(&ho, "br"); + html_nl(&ho); + html_text(&ho, L"["); /* FIXME: conf? */ + html_words(&ho, p->words, NOTHING, + f, keywords, &conf); + html_text(&ho, L"]"); /* FIXME: conf? */ + started = TRUE; + } + if (started) + element_close(&ho, "p"); + done_version_ids = TRUE; + } + if (conf.addr_end) + html_raw(&ho, conf.addr_end); + element_close(&ho, "address"); + } + + if (!done_version_ids) { + /* + * If the user didn't want the version IDs + * visible, I think we still have a duty to put + * them in an HTML comment. + */ + int started = FALSE; + for (p = sourceform; p; p = p->next) + if (p->type == para_VersionID) { + if (!started) { + html_raw(&ho, "<!-- version IDs:\n"); + started = TRUE; + } + html_words(&ho, p->words, NOTHING, + f, keywords, &conf); + html_nl(&ho); + } + if (started) + html_raw(&ho, "-->\n"); + } + } + + element_close(&ho, "body"); + html_nl(&ho); + element_close(&ho, "html"); + html_nl(&ho); + cleanup(&ho); + } + } + + /* + * FIXME: Figure out a way to free the htmlindex and + * htmlindexref structures. + */ +} + +static void html_file_section(htmlconfig *cfg, htmlfilelist *files, + htmlsect *sect, int depth) +{ + htmlfile *file; + int ldepth; + + /* + * `depth' is derived from the heading_depth() macro at the top + * of this file, which counts title as -1, chapter as 0, + * heading as 1 and subsection as 2. However, the semantics of + * cfg->leaf_level are defined to count chapter as 1, heading + * as 2 etc. So first I increment depth :-( + */ + ldepth = depth + 1; + + if (cfg->leaf_level == 0) { + /* + * leaf_level==0 is a special case, in which everything is + * put into a single file. + */ + if (!files->single) + files->single = html_new_file(files, cfg->single_filename); + + file = files->single; + } else { + /* + * If the depth of this section is at or above leaf_level, + * we invent a fresh file and put this section at its head. + * Otherwise, we put it in the same file as its parent + * section. + */ + if (ldepth > cfg->leaf_level) { + /* + * We know that sect->parent cannot be NULL. The only + * circumstance in which it can be is if sect is at + * chapter or appendix level, i.e. ldepth==1; and if + * that's the case, then we cannot have entered this + * branch unless cfg->leaf_level==0, in which case we + * would be in the single-file case above and not here + * at all. + */ + assert(sect->parent); + + file = sect->parent->file; + } else { + if (sect->type == TOP) { + file = html_new_file(files, cfg->contents_filename); + } else if (sect->type == INDEX) { + file = html_new_file(files, cfg->index_filename); + } else { + char *title; + + assert(ldepth > 0 && sect->title); + title = html_format(sect->title, cfg->template_filename); + file = html_new_file(files, title); + sfree(title); + } + } + } + + sect->file = file; + + if (file->min_heading_depth > depth) { + /* + * This heading is at a higher level than any heading we + * have so far placed in this file; so we set the `first' + * pointer. + */ + file->min_heading_depth = depth; + file->first = sect; + } + + if (file->min_heading_depth == depth) + file->last = sect; +} + +static htmlfile *html_new_file(htmlfilelist *list, char *filename) +{ + htmlfile *ret = mknew(htmlfile); + + ret->next = NULL; + if (list->tail) + list->tail->next = ret; + else + list->head = ret; + list->tail = ret; + + ret->filename = dupstr(filename); + ret->last_fragment_number = 0; + ret->min_heading_depth = INT_MAX; + ret->first = ret->last = NULL; + + return ret; +} + +static htmlsect *html_new_sect(htmlsectlist *list, paragraph *title) +{ + htmlsect *ret = mknew(htmlsect); + + ret->next = NULL; + if (list->tail) + list->tail->next = ret; + else + list->head = ret; + list->tail = ret; + + ret->title = title; + ret->file = NULL; + ret->parent = NULL; + ret->type = NORMAL; + + return ret; +} + +static void html_words(htmloutput *ho, word *words, int flags, + htmlfile *file, keywordlist *keywords, htmlconfig *cfg) +{ + word *w; + char *c; + int style, type; + + for (w = words; w; w = w->next) switch (w->type) { + case word_HyperLink: + if (flags & LINKS) { + element_open(ho, "a"); + c = utoa_dup(w->text, CS_ASCII); + element_attr(ho, "href", c); + sfree(c); + } + break; + case word_UpperXref: + case word_LowerXref: + if (flags & LINKS) { + keyword *kwl = kw_lookup(keywords, w->text); + paragraph *p = kwl->para; + htmlsect *s = (htmlsect *)p->private_data; + + assert(s); + + html_href(ho, file, s->file, s->fragment); + } + break; + case word_HyperEnd: + case word_XrefEnd: + if (flags & LINKS) + element_close(ho, "a"); + break; + case word_IndexRef: + if (flags & INDEXENTS) { + htmlindexref *hr = (htmlindexref *)w->private_data; + element_open(ho, "a"); + element_attr(ho, "name", hr->fragment); + element_close(ho, "a"); + } + break; + case word_Normal: + case word_Emph: + case word_Code: + case word_WeakCode: + case word_WhiteSpace: + case word_EmphSpace: + case word_CodeSpace: + case word_WkCodeSpace: + case word_Quote: + case word_EmphQuote: + case word_CodeQuote: + case word_WkCodeQuote: + style = towordstyle(w->type); + type = removeattr(w->type); + if (style == word_Emph && + (attraux(w->aux) == attr_First || + attraux(w->aux) == attr_Only) && + (flags & MARKUP)) + element_open(ho, "em"); + else if ((style == word_Code || style == word_WeakCode) && + (attraux(w->aux) == attr_First || + attraux(w->aux) == attr_Only) && + (flags & MARKUP)) + element_open(ho, "code"); + + if (type == word_WhiteSpace) + html_text(ho, L" "); + else if (type == word_Quote) { + if (quoteaux(w->aux) == quote_Open) + html_text(ho, cfg->lquote); + else + html_text(ho, cfg->rquote); + } else { + if (cvt_ok(ho->charset, w->text) || !w->alt) + html_text(ho, w->text); + else + html_words(ho, w->alt, flags, file, keywords, cfg); + } + + if (style == word_Emph && + (attraux(w->aux) == attr_Last || + attraux(w->aux) == attr_Only) && + (flags & MARKUP)) + element_close(ho, "em"); + else if ((style == word_Code || style == word_WeakCode) && + (attraux(w->aux) == attr_Last || + attraux(w->aux) == attr_Only) && + (flags & MARKUP)) + element_close(ho, "code"); + + break; + } +} + +static void html_codepara(htmloutput *ho, word *words) +{ + element_open(ho, "pre"); + element_open(ho, "code"); + for (; words; words = words->next) if (words->type == word_WeakCode) { + char *open_tag; + wchar_t *t, *e; + + t = words->text; + if (words->next && words->next->type == word_Emph) { + e = words->next->text; + words = words->next; + } else + e = NULL; + + while (e && *e && *t) { + int n; + int ec = *e; + + for (n = 0; t[n] && e[n] && e[n] == ec; n++); + + open_tag = NULL; + if (ec == 'i') + open_tag = "em"; + else if (ec == 'b') + open_tag = "b"; + if (open_tag) + element_open(ho, open_tag); + + html_text_limit(ho, t, n); + + if (open_tag) + element_close(ho, open_tag); + + t += n; + e += n; + } + html_text(ho, t); + html_nl(ho); + } + element_close(ho, "code"); + element_close(ho, "pre"); +} + +static void html_charset_cleanup(htmloutput *ho) +{ + char outbuf[256]; + int bytes; + + bytes = charset_from_unicode(NULL, NULL, outbuf, lenof(outbuf), + ho->charset, &ho->cstate, NULL); + if (bytes > 0) + fwrite(outbuf, 1, bytes, ho->fp); +} + +static void return_to_neutral(htmloutput *ho) +{ + if (ho->state == HO_IN_TEXT) { + html_charset_cleanup(ho); + } else if (ho->state == HO_IN_EMPTY_TAG && is_xhtml(ho->ver)) { + fprintf(ho->fp, " />"); + } else if (ho->state == HO_IN_EMPTY_TAG || ho->state == HO_IN_TAG) { + fprintf(ho->fp, ">"); + } + + ho->state = HO_NEUTRAL; +} + +static void element_open(htmloutput *ho, char const *name) +{ + return_to_neutral(ho); + fprintf(ho->fp, "<%s", name); + ho->state = HO_IN_TAG; +} + +static void element_close(htmloutput *ho, char const *name) +{ + return_to_neutral(ho); + fprintf(ho->fp, "</%s>", name); + ho->state = HO_NEUTRAL; +} + +static void element_empty(htmloutput *ho, char const *name) +{ + return_to_neutral(ho); + fprintf(ho->fp, "<%s", name); + ho->state = HO_IN_EMPTY_TAG; +} + +static void html_nl(htmloutput *ho) +{ + return_to_neutral(ho); + fputc('\n', ho->fp); +} + +static void html_raw(htmloutput *ho, char *text) +{ + return_to_neutral(ho); + fputs(text, ho->fp); +} + +static void html_raw_as_attr(htmloutput *ho, char *text) +{ + assert(ho->state == HO_IN_TAG || ho->state == HO_IN_EMPTY_TAG); + fputc(' ', ho->fp); + fputs(text, ho->fp); +} + +static void element_attr(htmloutput *ho, char const *name, char const *value) +{ + html_charset_cleanup(ho); + assert(ho->state == HO_IN_TAG || ho->state == HO_IN_EMPTY_TAG); + fprintf(ho->fp, " %s=\"%s\"", name, value); +} + +static void element_attr_w(htmloutput *ho, char const *name, + wchar_t const *value) +{ + html_charset_cleanup(ho); + fprintf(ho->fp, " %s=\"", name); + html_text_limit_internal(ho, value, 0, TRUE); + html_charset_cleanup(ho); + fputc('"', ho->fp); +} + +static void html_text(htmloutput *ho, wchar_t const *text) +{ + html_text_limit(ho, text, 0); +} + +static void html_text_limit(htmloutput *ho, wchar_t const *text, int maxlen) +{ + return_to_neutral(ho); + html_text_limit_internal(ho, text, maxlen, FALSE); +} + +static void html_text_limit_internal(htmloutput *ho, wchar_t const *text, + int maxlen, int quote_quotes) +{ + int textlen = ustrlen(text); + char outbuf[256]; + int bytes, err; + + if (maxlen > 0 && textlen > maxlen) + textlen = maxlen; + + while (textlen > 0) { + /* Scan ahead for characters we really can't display in HTML. */ + int lenbefore, lenafter; + for (lenbefore = 0; lenbefore < textlen; lenbefore++) + if (text[lenbefore] == L'<' || + text[lenbefore] == L'>' || + text[lenbefore] == L'&' || + (text[lenbefore] == L'"' && quote_quotes)) + break; + lenafter = lenbefore; + bytes = charset_from_unicode(&text, &lenafter, outbuf, lenof(outbuf), + ho->charset, &ho->cstate, &err); + textlen -= (lenbefore - lenafter); + if (bytes > 0) + fwrite(outbuf, 1, bytes, ho->fp); + if (err) { + /* + * We have encountered a character that cannot be + * displayed in the selected output charset. Therefore, + * we use an HTML numeric entity reference. + */ + assert(textlen > 0); + fprintf(ho->fp, "&#%ld;", (long int)*text); + text++, textlen--; + } else if (lenafter == 0 && textlen > 0) { + /* + * We have encountered a character which is special to + * HTML. + */ + if (*text == L'<') + fprintf(ho->fp, "<"); + else if (*text == L'>') + fprintf(ho->fp, ">"); + else if (*text == L'&') + fprintf(ho->fp, "&"); + else if (*text == L'"') + fprintf(ho->fp, """); + else + assert(!"Can't happen"); + text++, textlen--; + } + } +} + +static void cleanup(htmloutput *ho) +{ + return_to_neutral(ho); + fclose(ho->fp); +} + +static void html_href(htmloutput *ho, htmlfile *thisfile, + htmlfile *targetfile, char *targetfrag) +{ + rdstringc rs = { 0, 0, NULL }; + char *url; + + if (targetfile != thisfile) + rdaddsc(&rs, targetfile->filename); + if (targetfrag) { + rdaddc(&rs, '#'); + rdaddsc(&rs, targetfrag); + } + url = rs.text; + + element_open(ho, "a"); + element_attr(ho, "href", url); + sfree(url); +} + +static char *html_format(paragraph *p, char *template_string) +{ + char *c, *t; + word *w; + wchar_t *ws, wsbuf[2]; + rdstringc rs = { 0, 0, NULL }; + + t = template_string; + while (*t) { + if (*t == '%' && t[1]) { + int fmt; + + t++; + fmt = *t++; + + if (fmt == '%') { + rdaddc(&rs, fmt); + continue; + } + + w = NULL; + ws = NULL; + + if (p->kwtext && fmt == 'n') + w = p->kwtext; + else if (p->kwtext2 && fmt == 'b') { + /* + * HTML fragment names must start with a letter, so + * simply `1.2.3' is not adequate. In this case I'm + * going to cheat slightly by prepending the first + * character of the first word of kwtext, so that + * we get `C1' for chapter 1, `S2.3' for section + * 2.3 etc. + */ + if (p->kwtext && p->kwtext->text[0]) { + ws = wsbuf; + wsbuf[1] = '\0'; + wsbuf[0] = p->kwtext->text[0]; + } + w = p->kwtext2; + } else if (p->keyword && *p->keyword && fmt == 'k') + ws = p->keyword; + else + w = p->words; + + if (ws) { + c = utoa_dup(ws, CS_ASCII); + rdaddsc(&rs,c); + sfree(c); + } + + while (w) { + if (removeattr(w->type) == word_Normal) { + c = utoa_dup(w->text, CS_ASCII); + rdaddsc(&rs,c); + sfree(c); + } + w = w->next; + } + } else { + rdaddc(&rs, *t++); + } + } + + return rdtrimc(&rs); +} + +static void html_sanitise_fragment(char *text) +{ + /* + * The HTML 4 spec's strictest definition of fragment names (<a + * name> and "id" attributes) says that they `must begin with a + * letter and may be followed by any number of letters, digits, + * hyphens, underscores, colons, and periods'. + * + * So here we unceremoniously rip out any characters not + * conforming to this limitation. + */ + char *p = text, *q = text; + + while (*p && !((*p>='A' && *p<='Z') || (*p>='a' && *p<='z'))) + p++; + if (!(*q++ = *p++)) + return; + while (*p) { + if ((*p>='A' && *p<='Z') || + (*p>='a' && *p<='z') || + (*p>='0' && *p<='9') || + *p=='-' || *p=='_' || *p==':' || *p=='.') + *q++ = *p; + p++; + } + + *q = '\0'; +} + +static void html_contents_entry(htmloutput *ho, int depth, htmlsect *s, + htmlfile *thisfile, keywordlist *keywords, + htmlconfig *cfg) +{ + while (ho->contents_level > depth) { + element_close(ho, "ul"); + ho->contents_level--; + } + + while (ho->contents_level < depth) { + element_open(ho, "ul"); + ho->contents_level++; + } + + if (!s) + return; + + element_open(ho, "li"); + html_href(ho, thisfile, s->file, s->fragment); + html_section_title(ho, s, thisfile, keywords, cfg); + element_close(ho, "a"); + element_close(ho, "li"); +} + +static void html_section_title(htmloutput *ho, htmlsect *s, htmlfile *thisfile, + keywordlist *keywords, htmlconfig *cfg) +{ + if (s->title) { + sectlevel *sl; + word *number; + int depth = heading_depth(s->title); + + if (depth < 0) + sl = NULL; + else if (depth == 0) + sl = &cfg->achapter; + else if (depth <= cfg->nasect) + sl = &cfg->asect[depth-1]; + else + sl = &cfg->asect[cfg->nasect-1]; + + if (!sl) + number = NULL; + else if (sl->just_numbers) + number = s->title->kwtext2; + else + number = s->title->kwtext; + + if (number) { + html_words(ho, number, MARKUP, + thisfile, keywords, cfg); + html_text(ho, sl->number_suffix); + } + + html_words(ho, s->title->words, MARKUP, + thisfile, keywords, cfg); + } else { + assert(s->type != NORMAL); + if (s->type == TOP) + html_text(ho, L"Preamble");/* FIXME: configure */ + else if (s->type == INDEX) + html_text(ho, L"Index");/* FIXME: configure */ + } +} diff --git a/bk_xhtml.c b/bk_xhtml.c deleted file mode 100644 index 5d8e812..0000000 --- a/bk_xhtml.c +++ /dev/null @@ -1,1779 +0,0 @@ -/* - * xhtml backend for Halibut - * (initial implementation by James Aylett) - * - * Still to do: - * - * +++ doesn't handle non-breaking hyphens. Not sure how to yet. - * +++ entity names (from a file -- ideally supply normal SGML files) - * +++ configuration directive to file split where the current layout - * code wouldn't. Needs changes to _ponder_layout() and _do_paras(), - * perhaps others. - * - * Limitations: - * - * +++ biblio/index references target the nearest section marker, rather - * than having a dedicated target themselves. In large bibliographies - * this will cause problems. (The solution is to fake up a response - * from xhtml_find_section(), probably linking it into the sections - * chain just in case we need it again, and to make freeing it up - * easier.) docsrc.pl used to work as we do, however, and SGT agrees that - * this is acceptable for now. - * +++ can't cope with leaf-level == 0. It's all to do with the - * top-level file not being normal, probably not even having a valid - * section level, and stuff like that. I question whether this is an - * issue, frankly; small manuals that fit on one page should probably - * not be written in halibut at all. - */ - -#include <stdio.h> -#include <stdlib.h> -#include <string.h> -#include <assert.h> -#include "halibut.h" - -/* - * FILENAME_TEMPLATE (overridable in config of course) allows you - * to choose the general form for your HTML file names. It is - * slightly printf-styled (% followed by a single character is a - * formatting directive, %% is a literal %). Formatting directives - * are: - * - * - %n is the section type-plus-number, minus whitespace (`Chapter1.2'). - * - %b is the section number on its own (`1.2'). - * - %k is the section's _internal_ keyword. - * - %N is the section's visible title in the output, again minus - * whitespace. - * - * %n, %b and %k will all default to %N if the section is - * unnumbered (`Bibliography' is often a good example). - * - * FRAGMENT_TEMPLATE is the same, but defines the <a name="foo"> - * markers used to cross-reference to particular subsections of a - * file. - */ - -#define FILENAME_SINGLE "Manual.html" -#define FILENAME_CONTENTS "Contents.html" -#define FILENAME_INDEX "IndexPage.html" -#define FILENAME_TEMPLATE "%n.html" -#define FRAGMENT_TEMPLATE "%b" - -struct xhtmlsection_Struct { - struct xhtmlsection_Struct *next; /* next sibling (NULL if split across files) */ - struct xhtmlsection_Struct *child; /* NULL if split across files */ - struct xhtmlsection_Struct *parent; /* NULL if split across files */ - struct xhtmlsection_Struct *chain; /* single structure independent of weird trees */ - paragraph *para; - struct xhtmlfile_Struct *file; /* which file is this a part of? */ - char *fragment; /* fragment id within the file */ - int level; -}; - -struct xhtmlfile_Struct { - struct xhtmlfile_Struct *next; - struct xhtmlfile_Struct *child; - struct xhtmlfile_Struct *parent; - char *filename; - struct xhtmlsection_Struct *sections; /* sections within this file (only one for non-leaf) */ - int is_leaf; /* is this file a leaf file, ie does it not have any children? */ -}; - -typedef struct xhtmlsection_Struct xhtmlsection; -typedef struct xhtmlfile_Struct xhtmlfile; -typedef struct xhtmlindex_Struct xhtmlindex; - -struct xhtmlindex_Struct { - int nsection; - int size; - xhtmlsection **sections; -}; - -typedef struct { - int just_numbers; - wchar_t *number_suffix; -} xhtmlheadfmt; - -typedef struct { - int contents_depth[6]; - int leaf_contains_contents; - int leaf_level; - int leaf_smallest_contents; - int include_version_id; - wchar_t *author, *description; - wchar_t *head_end, *body, *body_start, *body_end, *address_start, *address_end, *nav_attrs; - int suppress_address; - xhtmlheadfmt fchapter, *fsect; - int nfsect; - char *contents_filename, *index_filename; - char *single_filename, *template_filename, *template_fragment; -} xhtmlconfig; - -/*static void xhtml_level(paragraph *, int); -static void xhtml_level_0(paragraph *); -static void xhtml_docontents(FILE *, paragraph *, int); -static void xhtml_dosections(FILE *, paragraph *, int); -static void xhtml_dobody(FILE *, paragraph *, int);*/ - -static void xhtml_doheader(FILE *, word *); -static void xhtml_dofooter(FILE *); -static void xhtml_versionid(FILE *, word *, int); - -static void xhtml_utostr(wchar_t *, char **); -static int xhtml_para_level(paragraph *); -static int xhtml_reservedchar(int); - -static int xhtml_convert(wchar_t *, int, char **, int); -static void xhtml_rdaddwc(rdstringc *, word *, word *, int); -static void xhtml_para(FILE *, word *, int); -static void xhtml_codepara(FILE *, word *); -static void xhtml_heading(FILE *, paragraph *, int); - -/* File-global variables are much easier than passing these things - * all over the place. Evil, but easier. We can replace this with a single - * structure at some point. - */ -static xhtmlconfig conf; -static keywordlist *keywords; -static indexdata *idx; -static xhtmlfile *topfile; -static xhtmlsection *topsection; -static paragraph *sourceparas; -static xhtmlfile *lastfile; -static xhtmlfile *xhtml_last_file = NULL; -static int last_level=-1, start_level; -static xhtmlsection *currentsection; - -static xhtmlconfig xhtml_configure(paragraph *source) -{ - xhtmlconfig ret; - - /* - * Defaults. - */ - ret.contents_depth[0] = 2; - ret.contents_depth[1] = 3; - ret.contents_depth[2] = 4; - ret.contents_depth[3] = 5; - ret.contents_depth[4] = 6; - ret.contents_depth[5] = 7; - ret.leaf_level = 2; - ret.leaf_smallest_contents = 4; - ret.leaf_contains_contents = FALSE; - ret.include_version_id = TRUE; - ret.author = NULL; - ret.description = NULL; - ret.head_end = NULL; - ret.body = NULL; - ret.body_start = NULL; - ret.body_end = NULL; - ret.address_start = NULL; - ret.address_end = NULL; - ret.nav_attrs = NULL; - ret.suppress_address = FALSE; - - ret.fchapter.just_numbers = FALSE; - ret.fchapter.number_suffix = L": "; - ret.nfsect = 2; - ret.fsect = mknewa(xhtmlheadfmt, ret.nfsect); - ret.fsect[0].just_numbers = FALSE; - ret.fsect[0].number_suffix = L": "; - ret.fsect[1].just_numbers = TRUE; - ret.fsect[1].number_suffix = L" "; - ret.contents_filename = strdup(FILENAME_CONTENTS); - ret.single_filename = strdup(FILENAME_SINGLE); - ret.index_filename = strdup(FILENAME_INDEX); - ret.template_filename = strdup(FILENAME_TEMPLATE); - ret.template_fragment = strdup(FRAGMENT_TEMPLATE); - - for (; source; source = source->next) - { - if (source->type == para_Config) - { - if (!ustricmp(source->keyword, L"xhtml-contents-filename")) { - sfree(ret.contents_filename); - ret.contents_filename = dupstr(adv(source->origkeyword)); - } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) { - sfree(ret.single_filename); - ret.single_filename = dupstr(adv(source->origkeyword)); - } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) { - sfree(ret.index_filename); - ret.index_filename = dupstr(adv(source->origkeyword)); - } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) { - sfree(ret.template_filename); - ret.template_filename = dupstr(adv(source->origkeyword)); - } else if (!ustricmp(source->keyword, L"xhtml-template-fragment")) { - sfree(ret.template_fragment); - ret.template_fragment = utoa_dup(uadv(source->keyword), CS_ASCII); - } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) { - ret.contents_depth[0] = utoi(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) { - ret.contents_depth[1] = utoi(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-2")) { - ret.contents_depth[2] = utoi(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-3")) { - ret.contents_depth[3] = utoi(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-4")) { - ret.contents_depth[4] = utoi(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-5")) { - ret.contents_depth[5] = utoi(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-leaf-level")) { - ret.leaf_level = utoi(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-leaf-smallest-contents")) { - ret.leaf_smallest_contents = utoi(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-versionid")) { - ret.include_version_id = utob(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-leaf-contains-contents")) { - ret.leaf_contains_contents = utob(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-suppress-address")) { - ret.suppress_address = utob(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-author")) { - ret.author = uadv(source->keyword); - } else if (!ustricmp(source->keyword, L"xhtml-description")) { - ret.description = uadv(source->keyword); - } else if (!ustricmp(source->keyword, L"xhtml-head-end")) { - ret.head_end = uadv(source->keyword); - } else if (!ustricmp(source->keyword, L"xhtml-body-start")) { - ret.body_start = uadv(source->keyword); - } else if (!ustricmp(source->keyword, L"xhtml-body-tag")) { - ret.body = uadv(source->keyword); - } else if (!ustricmp(source->keyword, L"xhtml-body-end")) { - ret.body_end = uadv(source->keyword); - } else if (!ustricmp(source->keyword, L"xhtml-address-start")) { - ret.address_start = uadv(source->keyword); - } else if (!ustricmp(source->keyword, L"xhtml-address-end")) { - ret.address_end = uadv(source->keyword); - } else if (!ustricmp(source->keyword, L"xhtml-navigation-attributes")) { - ret.nav_attrs = uadv(source->keyword); - } else if (!ustricmp(source->keyword, L"xhtml-chapter-numeric")) { - ret.fchapter.just_numbers = utob(uadv(source->keyword)); - } else if (!ustricmp(source->keyword, L"xhtml-chapter-suffix")) { - ret.fchapter.number_suffix = uadv(source->keyword); - } else if (!ustricmp(source->keyword, L"xhtml-section-numeric")) { - wchar_t *p = uadv(source->keyword); - int n = 0; - if (uisdigit(*p)) { - n = utoi(p); - p = uadv(p); - } - if (n >= ret.nfsect) { - int i; - ret.fsect = resize(ret.fsect, n+1); - for (i = ret.nfsect; i <= n; i++) - ret.fsect[i] = ret.fsect[ret.nfsect-1]; - ret.nfsect = n+1; - } - ret.fsect[n].just_numbers = utob(p); - } else if (!ustricmp(source->keyword, L"xhtml-section-suffix")) { - wchar_t *p = uadv(source->keyword); - int n = 0; - if (uisdigit(*p)) { - n = utoi(p); - p = uadv(p); - } - if (n >= ret.nfsect) { - int i; - ret.fsect = resize(ret.fsect, n+1); - for (i = ret.nfsect; i <= n; i++) - ret.fsect[i] = ret.fsect[ret.nfsect-1]; - ret.nfsect = n+1; - } - ret.fsect[n].number_suffix = p; - } - } - } - - /* printf(" !!! leaf_level = %i\n", ret.leaf_level); - printf(" !!! contentdepth-0 = %i\n", ret.contents_depth[0]); - printf(" !!! contentdepth-1 = %i\n", ret.contents_depth[1]); - printf(" !!! contentdepth-2 = %i\n", ret.contents_depth[2]); - printf(" !!! contentdepth-3 = %i\n", ret.contents_depth[3]); - printf(" !!! contentdepth-4 = %i\n", ret.contents_depth[4]); - printf(" !!! contentdepth-5 = %i\n", ret.contents_depth[5]); - printf(" !!! leaf_contains_contents = %i\n", ret.leaf_contains_contents);*/ - return ret; -} - -paragraph *xhtml_config_filename(char *filename) -{ - /* - * If the user passes in a single filename as a parameter to - * the `--html' command-line option, then we should assume it - * to imply _two_ config directives: - * \cfg{xhtml-single-filename}{whatever} and - * \cfg{xhtml-leaf-level}{0}; the rationale being that the user - * wants their output _in that file_. - */ - paragraph *p, *q; - - p = cmdline_cfg_simple("xhtml-single-filename", filename, NULL); - q = cmdline_cfg_simple("xhtml-leaf-level", "0", NULL); - p->next = q; - return p; -} - -static xhtmlsection *xhtml_new_section(xhtmlsection *last) -{ - xhtmlsection *ret = mknew(xhtmlsection); - ret->next=NULL; - ret->child=NULL; - ret->parent=NULL; - ret->chain=last; - ret->para=NULL; - ret->file=NULL; - ret->fragment=NULL; - ret->level=-1; /* marker: end of chain */ - return ret; -} - -/* Returns NULL or the section that marks that paragraph */ -static xhtmlsection *xhtml_find_section(paragraph *p) -{ - xhtmlsection *ret = topsection; - if (xhtml_para_level(p)==-1) { /* first, we back-track to a section paragraph */ - paragraph *p2 = sourceparas; - paragraph *p3 = NULL; - while (p2 && p2!=p) { - if (xhtml_para_level(p2)!=-1) { - p3 = p2; - } - p2=p2->next; - } - if (p3==NULL) { /* for some reason, we couldn't find a section before this paragraph ... ? */ - /* Note that this can happen, if you have a cross-reference to before the first chapter starts. - * So don't do that, then. - */ - return NULL; - } - p=p3; - } - while (ret && ret->para != p) { -/* printf(" xhtml_find_section(): checking %s for para @ %p\n", ret->fragment, p);*/ - ret=ret->chain; - } - return ret; -} - -static void xhtml_format(paragraph *p, char *template_string, rdstringc *r) -{ - char *c, *t; - word *w; - wchar_t *ws; - - t = template_string; - while (*t) { - if (*t == '%' && t[1]) { - int fmt; - - t++; - fmt = *t++; - - if (fmt == '%') { - rdaddc(r, fmt); - continue; - } - - w = NULL; - ws = NULL; - - if (p->kwtext && fmt == 'n') - w = p->kwtext; - else if (p->kwtext2 && fmt == 'b') - w = p->kwtext2; - else if (p->keyword && *p->keyword && fmt == 'k') - ws = p->keyword; - else - w = p->words; - - while (w) { - switch (removeattr(w->type)) - { - case word_Normal: - /*case word_Emph: - case word_Code: - case word_WeakCode:*/ - xhtml_utostr(w->text, &c); - rdaddsc(r,c); - sfree(c); - break; - } - w = w->next; - } - if (ws) { - xhtml_utostr(ws, &c); - rdaddsc(r,c); - sfree(c); - } - } else { - rdaddc(r, *t++); - } - } -} - -static xhtmlfile *xhtml_new_file(xhtmlsection *sect) -{ - xhtmlfile *ret = mknew(xhtmlfile); - - ret->next=NULL; - ret->child=NULL; - ret->parent=NULL; - ret->filename=NULL; - ret->sections=sect; - ret->is_leaf=(sect!=NULL && sect->level==conf.leaf_level); - if (sect==NULL) { - if (conf.leaf_level==0) { /* currently unused */ - ret->filename = smalloc(strlen(conf.single_filename)+1); - sprintf(ret->filename, conf.single_filename); - } else { - ret->filename = smalloc(strlen(conf.contents_filename)+1); - sprintf(ret->filename, conf.contents_filename); - } - } else { - paragraph *p = sect->para; - rdstringc fname_c = { 0, 0, NULL }; - xhtml_format(p, conf.template_filename, &fname_c); - ret->filename = rdtrimc(&fname_c); - } - /* printf(" ! new file '%s', is_leaf == %s\n", ret->filename, (ret->is_leaf)?("true"):("false"));*/ - return ret; -} - -/* - * Walk the tree fixing up files which are actually leaf (ie - * have no children) but aren't at leaf level, so they have the - * leaf flag set. - */ -void xhtml_fixup_layout(xhtmlfile* file) -{ - if (file->child==NULL) { - file->is_leaf = TRUE; - } else { - xhtml_fixup_layout(file->child); - } - if (file->next) - xhtml_fixup_layout(file->next); -} - -/* - * Create the tree structure so we know where everything goes. - * Method: - * - * Ignoring file splitting, we have three choices with each new section: - * - * +-----------------+-----------------+ - * | | | - * X +----X----+ (1) - * | | - * Y (2) - * | - * (3) - * - * Y is the last section we added (currentsect). - * If sect is the section we want to add, then: - * - * (1) if sect->level < currentsect->level - * (2) if sect->level == currentsect->level - * (3) if sect->level > currentsect->level - * - * This requires the constraint that you never skip section numbers - * (so you can't have a.b.c.d without all of a, a.b and a.b.c existing). - * - * Note that you _can_ have 1.1.1.1 followed by 1.2 - you can change - * more than one level at a time. Lots of asserts, and probably part of - * the algorithm here, rely on this being true. (It currently isn't - * enforced by halibut, however.) - * - * File splitting makes this harder. For instance, say we added at (3) - * above and now need to add another section. We are splitting at level - * 2, ie the level of Y. Z is the last section we added: - * - * +-----------------+-----------------+ - * | | | - * X +----X----+ (1) - * | | - * +----Y----+ (1) - * | | - * Z (2) - * | - * (3) - * - * The (1) case is now split; we need to search upwards to find where - * to actually link in. The other two cases remain the same (and will - * always be like this). - * - * File splitting makes this harder, however. The decision of whether - * to split to a new file is always on the same condition, however (is - * the level of this section higher than the leaf_level configuration - * value or not). - * - * Treating the cases backwards: - * - * (3) same file if sect->level > conf.leaf_level, otherwise new file - * - * if in the same file, currentsect->child points to sect - * otherwise the linking is done through the file tree (which works - * in more or less the same way, ie currentfile->child points to - * the new file) - * - * (2) same file if sect->level > conf.leaf_level, otherwise new file - * - * if in the same file, currentsect->next points to sect - * otherwise file linking and currentfile->next points to the new - * file (we know that Z must have caused a new file to be created) - * - * (1) same file if sect->level > conf.leaf_level, otherwise new file - * - * this is actually effectively the same case as (2) here, - * except that we first have to travel up the sections to figure - * out which section this new one will be a sibling of. In doing - * so, we may disappear off the top of a file and have to go up - * to its parent in the file tree. - * - */ -static void xhtml_ponder_layout(paragraph *p) -{ - xhtmlsection *lastsection; - xhtmlsection *currentsect; - xhtmlfile *currentfile; - - lastfile = NULL; - topsection = xhtml_new_section(NULL); - topfile = xhtml_new_file(NULL); - lastsection = topsection; - currentfile = topfile; - currentsect = topsection; - - if (conf.leaf_level == 0) { - topfile->is_leaf = 1; - topfile->sections = topsection; - topsection->file = topfile; - } - - for (; p; p=p->next) - { - int level = xhtml_para_level(p); - if (level>0) /* actually a section */ - { - xhtmlsection *sect; - rdstringc frag_c = { 0, 0, NULL }; - - sect = xhtml_new_section(lastsection); - lastsection = sect; - sect->para = p; - - xhtml_format(p, conf.template_fragment, &frag_c); - sect->fragment = rdtrimc(&frag_c); - sect->level = level; - /* printf(" ! adding para @ %p as sect %s, level %i\n", sect->para, sect->fragment, level);*/ - - if (level>currentsect->level) { /* case (3) */ - if (level>conf.leaf_level) { /* same file */ - assert(currentfile->is_leaf); - currentsect->child = sect; - sect->parent=currentsect; - sect->file=currentfile; - /* printf("connected '%s' to existing file '%s' [I]\n", sect->fragment, currentfile->filename);*/ - currentsect=sect; - } else { /* new file */ - xhtmlfile *file = xhtml_new_file(sect); - assert(!currentfile->is_leaf); - currentfile->child=file; - sect->file=file; - file->parent=currentfile; - /* printf("connected '%s' to new file '%s' [I]\n", sect->fragment, file->filename);*/ - currentfile=file; - currentsect=sect; - } - } else if (level >= currentsect->file->sections->level) { - /* Case (1) or (2) *AND* still under the section that starts - * the current file. - * - * I'm not convinced that this couldn't be rolled in with the - * final else {} leg further down. It seems a lot of effort - * this way. - */ - if (level>conf.leaf_level) { /* stick within the same file */ - assert(currentfile->is_leaf); - sect->file = currentfile; - while (currentsect && currentsect->level > level && - currentsect->file==currentsect->parent->file) { - currentsect = currentsect->parent; - } - assert(currentsect); - currentsect->next = sect; - assert(currentsect->level == sect->level); - sect->parent = currentsect->parent; - currentsect = sect; - /* printf("connected '%s' to existing file '%s' [II]\n", sect->fragment, currentfile->filename);*/ - } else { /* new file */ - xhtmlfile *file = xhtml_new_file(sect); - sect->file=file; - currentfile->next=file; - file->parent=currentfile->parent; - file->is_leaf=(level==conf.leaf_level); - file->sections=sect; - /* printf("connected '%s' to new file '%s' [II]\n", sect->fragment, file->filename);*/ - currentfile=file; - currentsect=sect; - } - } else { /* Case (1) or (2) and we must move up the file tree first */ - /* this loop is now probably irrelevant - we know we can't connect - * to anything in the current file */ - while (currentsect && level<currentsect->level) { - currentsect=currentsect->parent; - if (currentsect) { - /* printf(" * up one level to '%s'\n", currentsect->fragment);*/ - } else { - /* printf(" * up one level (off top of current file)\n");*/ - } - } - if (currentsect) { - /* I'm pretty sure this can now never fire */ - assert(currentfile->is_leaf); - /* printf("connected '%s' to existing file '%s' [III]\n", sect->fragment, currentfile->filename);*/ - sect->file = currentfile; - currentsect->next=sect; - currentsect=sect; - } else { /* find a file we can attach to */ - while (currentfile && currentfile->sections && level<currentfile->sections->level) { - currentfile=currentfile->parent; - if (currentfile) { - /* printf(" * up one file level to '%s'\n", currentfile->filename);*/ - } else { - /* printf(" * up one file level (off top of tree)\n");*/ - } - } - if (currentfile) { /* new file (we had to skip up a file to - get here, so we must be dealing with a - level no lower than the configured - leaf_level */ - xhtmlfile *file = xhtml_new_file(sect); - currentfile->next=file; - sect->file=file; - file->parent=currentfile->parent; - file->is_leaf=(level==conf.leaf_level); - file->sections=sect; - /* printf("connected '%s' to new file '%s' [III]\n", sect->fragment, file->filename);*/ - currentfile=file; - currentsect=sect; - } else { - fatal(err_whatever, "Ran off the top trying to connect sibling: strange document."); - } - } - } - } - } - topsection = lastsection; /* get correct end of the chain */ - xhtml_fixup_layout(topfile); /* leaf files not at leaf level marked as such */ -} - -static void xhtml_do_index(); -static void xhtml_do_file(xhtmlfile *file); -static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform); -static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end, int indexable); -static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit); -static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit); -static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit); -static int xhtml_do_contents(FILE *fp, xhtmlfile *file); -static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file); -static void xhtml_do_sections(FILE *fp, xhtmlsection *sections); - -/* - * Do all the files in this structure. - */ -static void xhtml_do_files(xhtmlfile *file) -{ - xhtml_do_file(file); - if (file->child) - xhtml_do_files(file->child); - if (file->next) - xhtml_do_files(file->next); -} - -/* - * Free up all memory used by the file tree from 'xfile' downwards - */ -static void xhtml_free_file(xhtmlfile* xfile) -{ - if (xfile==NULL) { - return; - } - - if (xfile->filename) { - sfree(xfile->filename); - } - xhtml_free_file(xfile->child); - xhtml_free_file(xfile->next); - sfree(xfile); -} - -/* - * Main function. - */ -void xhtml_backend(paragraph *sourceform, keywordlist *in_keywords, - indexdata *in_idx, void *unused) -{ -/* int i;*/ - indexentry *ientry; - int ti; - xhtmlsection *xsect; - - IGNORE(unused); - - sourceparas = sourceform; - conf = xhtml_configure(sourceform); - keywords = in_keywords; - idx = in_idx; - - /* Clear up the index entries backend data pointers */ - for (ti=0; (ientry = (indexentry *)index234(idx->entries, ti))!=NULL; ti++) { - ientry->backend_data=NULL; - } - - xhtml_ponder_layout(sourceform); - - /* old system ... (writes to *.alt, but gets some stuff wrong and is ugly) */ -/* xhtml_level_0(sourceform); - for (i=1; i<=conf.leaf_level; i++) - { - xhtml_level(sourceform, i); - }*/ - - /* new system ... (writes to *.html, but isn't fully trusted) */ - xhtml_do_top_file(topfile, sourceform); - assert(!topfile->next); /* shouldn't have a sibling at all */ - if (topfile->child) { - xhtml_do_files(topfile->child); - xhtml_do_index(); - } - - /* release file, section, index data structures */ - xsect = topsection; - while (xsect) { - xhtmlsection *tmp = xsect->chain; - if (xsect->fragment) { - sfree(xsect->fragment); - } - sfree(xsect); - xsect = tmp; - } - xhtml_free_file(topfile); - for (ti = 0; (ientry=(indexentry *)index234(idx->entries, ti))!=NULL; ti++) { - if (ientry->backend_data!=NULL) { - xhtmlindex *xi = (xhtmlindex*) ientry->backend_data; - if (xi->sections!=NULL) { - sfree(xi->sections); - } - sfree(xi); - } - ientry->backend_data = NULL; - } - sfree(conf.fsect); -} - -static int xhtml_para_level(paragraph *p) -{ - switch (p->type) - { - case para_Title: - return 0; - break; - case para_UnnumberedChapter: - case para_Chapter: - case para_Appendix: - return 1; - break; -/* case para_BiblioCited: - return 2; - break;*/ - case para_Heading: - case para_Subsect: - return p->aux+2; - break; - default: - return -1; - break; - } -} - -/* Output the nav links for the current file. - * file == NULL means we're doing the index - */ -static void xhtml_donavlinks(FILE *fp, xhtmlfile *file) -{ - xhtmlfile *xhtml_next_file = NULL; - fprintf(fp, "<p"); - if (conf.nav_attrs!=NULL) { - fprintf(fp, " %ls>", conf.nav_attrs); - } else { - fprintf(fp, ">"); - } - if (xhtml_last_file==NULL) { - fprintf(fp, "Previous | "); - } else { - fprintf(fp, "<a href=\"%s\">Previous</a> | ", xhtml_last_file->filename); - } - fprintf(fp, "<a href=\"%s\">Contents</a> | ", conf.contents_filename); - if (file == NULL) { - fprintf(fp, "Index | "); - } else { - fprintf(fp, "<a href=\"%s\">Index</a> | ", conf.index_filename); - } - if (file != NULL) { /* otherwise we're doing nav links for the index */ - if (xhtml_next_file==NULL) - xhtml_next_file = file->child; - if (xhtml_next_file==NULL) - xhtml_next_file = file->next; - if (xhtml_next_file==NULL) - xhtml_next_file = file->parent->next; - } - if (xhtml_next_file==NULL) { - if (file==NULL) { /* index, so no next file */ - fprintf(fp, "Next "); - } else { - fprintf(fp, "<a href=\"%s\">Next</a>", conf.index_filename); - } - } else { - fprintf(fp, "<a href=\"%s\">Next</a>", xhtml_next_file->filename); - } - fprintf(fp, "</p>\n"); -} - -/* Write out the index file */ -static void xhtml_do_index_body(FILE *fp) -{ - indexentry *y; - int ti; - - if (count234(idx->entries) == 0) - return; /* don't write anything at all */ - - fprintf(fp, "<dl>\n"); - /* iterate over idx->entries using the tree functions and display everything */ - for (ti = 0; (y = (indexentry *)index234(idx->entries, ti)) != NULL; ti++) { - if (y->backend_data) { - int i; - xhtmlindex *xi; - - fprintf(fp, "<dt>"); - xhtml_para(fp, y->text, FALSE); - fprintf(fp, "</dt>\n<dd>"); - - xi = (xhtmlindex*) y->backend_data; - for (i=0; i<xi->nsection; i++) { - xhtmlsection *sect = xi->sections[i]; - if (sect) { - fprintf(fp, "<a href=\"%s#%s\">", sect->file->filename, sect->fragment); - if (sect->para->kwtext) { - xhtml_para(fp, sect->para->kwtext, FALSE); - } else if (sect->para->words) { - xhtml_para(fp, sect->para->words, FALSE); - } - fprintf(fp, "</a>"); - if (i+1<xi->nsection) { - fprintf(fp, ", "); - } - } - } - fprintf(fp, "</dd>\n"); - } - } - fprintf(fp, "</dl>\n"); -} -static void xhtml_do_index() -{ - word temp_word = { NULL, NULL, word_Normal, 0, 0, L"Index", - { NULL, 0, 0}, NULL }; - FILE *fp = fopen(conf.index_filename, "w"); - - if (fp==NULL) - fatal(err_cantopenw, conf.index_filename); - xhtml_doheader(fp, &temp_word); - xhtml_donavlinks(fp, NULL); - - xhtml_do_index_body(fp); - - xhtml_donavlinks(fp, NULL); - xhtml_dofooter(fp); - fclose(fp); -} - -/* Output the given file. This includes whatever contents at beginning and end, etc. etc. */ -static void xhtml_do_file(xhtmlfile *file) -{ - FILE *fp = fopen(file->filename, "w"); - if (fp==NULL) - fatal(err_cantopenw, file->filename); - - if (file->sections->para->words) { - xhtml_doheader(fp, file->sections->para->words); - } else if (file->sections->para->kwtext) { - xhtml_doheader(fp, file->sections->para->kwtext); - } else { - xhtml_doheader(fp, NULL); - } - - xhtml_donavlinks(fp, file); - - if (file->is_leaf && conf.leaf_contains_contents && - xhtml_do_contents(NULL, file)>=conf.leaf_smallest_contents) - xhtml_do_contents(fp, file); - xhtml_do_sections(fp, file->sections); - if (!file->is_leaf) - xhtml_do_naked_contents(fp, file); - - xhtml_donavlinks(fp, file); - - xhtml_dofooter(fp); - fclose(fp); - - xhtml_last_file = file; -} - -/* Output the top-level file. */ -static void xhtml_do_top_file(xhtmlfile *file, paragraph *sourceform) -{ - paragraph *p; - int done=FALSE; - FILE *fp = fopen(file->filename, "w"); - if (fp==NULL) - fatal(err_cantopenw, file->filename); - - /* Do the title -- only one allowed */ - for (p = sourceform; p && !done; p = p->next) - { - if (p->type == para_Title) - { - xhtml_doheader(fp, p->words); - done=TRUE; - } - } - if (!done) - xhtml_doheader(fp, NULL /* Eek! */); - - /* - * Display the title. - */ - for (p = sourceform; p; p = p->next) - { - if (p->type == para_Title) { - xhtml_heading(fp, p, FALSE); - break; - } - } - - /* Do the preamble */ - for (p = sourceform; p; p = p->next) - { - if (p->type == para_Chapter || p->type == para_Heading || - p->type == para_Subsect || p->type == para_Appendix || - p->type == para_UnnumberedChapter) { - /* - * We've found the end of the preamble. Do every normal - * paragraph up to there. - */ - xhtml_do_paras(fp, sourceform, p, FALSE); - break; - } - } - - xhtml_do_contents(fp, file); - xhtml_do_sections(fp, file->sections); - - /* - * Put the index in the top file if we're in single-file mode - * (leaf-level 0). - */ - if (conf.leaf_level == 0 && count234(idx->entries) > 0) { - fprintf(fp, "<a name=\"index\"></a><h1>Index</h1>\n"); - xhtml_do_index_body(fp); - } - - xhtml_dofooter(fp); - fclose(fp); -} - -/* Convert a Unicode string to an ASCII one. '?' is - * used for unmappable characters. - */ -static void xhtml_utostr(wchar_t *in, char **out) -{ - int l = ustrlen(in); - int i; - *out = smalloc(l+1); - for (i=0; i<l; i++) - { - if (in[i]>=32 && in[i]<=126) - (*out)[i]=(char)in[i]; - else - (*out)[i]='?'; - } - (*out)[i]=0; -} - -/* - * Write contents for the given file, and subfiles, down to - * the appropriate contents depth. Returns the number of - * entries written. - */ -static int xhtml_do_contents(FILE *fp, xhtmlfile *file) -{ - int level, limit, count = 0; - if (!file) - return 0; - - level = (file->sections)?(file->sections->level):(0); - limit = conf.contents_depth[(level>5)?(5):(level)]; - start_level = (file->is_leaf) ? (level-1) : (level); - last_level = start_level; - - count += xhtml_do_contents_section_limit(fp, file->sections, limit); - count += xhtml_do_contents_limit(fp, file->child, limit); - if (fp!=NULL) { - while (last_level > start_level) { - last_level--; - fprintf(fp, "</li></ul>\n"); - } - } - return count; -} - -/* As above, but doesn't do anything in the current file */ -static int xhtml_do_naked_contents(FILE *fp, xhtmlfile *file) -{ - int level, limit, start_level, count = 0; - if (!file) - return 0; - - level = (file->sections)?(file->sections->level):(0); - limit = conf.contents_depth[(level>5)?(5):(level)]; - start_level = (file->is_leaf) ? (level-1) : (level); - last_level = start_level; - - count = xhtml_do_contents_limit(fp, file->child, limit); - if (fp!=NULL) { - while (last_level > start_level) { - last_level--; - fprintf(fp, "</li></ul>\n"); - } - } - return count; -} - -/* - * Write contents for the given file, children, and siblings, down to - * given limit contents depth. - */ -static int xhtml_do_contents_limit(FILE *fp, xhtmlfile *file, int limit) -{ - int count = 0; - while (file) { - count += xhtml_do_contents_section_limit(fp, file->sections, limit); - count += xhtml_do_contents_limit(fp, file->child, limit); - file = file->next; - } - return count; -} - -/* - * Write contents entries for the given section tree, down to the - * limit contents depth. - */ -static int xhtml_do_contents_section_deep_limit(FILE *fp, xhtmlsection *section, int limit) -{ - int count = 0; - while (section) { - if (!xhtml_add_contents_entry(fp, section, limit)) - return 0; - else - count++; - count += xhtml_do_contents_section_deep_limit(fp, section->child, limit); - section = section->next; - } - return count; -} - -/* - * Write contents entries for the given section tree, down to the - * limit contents depth. - */ -static int xhtml_do_contents_section_limit(FILE *fp, xhtmlsection *section, int limit) -{ - int count = 0; - if (!section) - return 0; - xhtml_add_contents_entry(fp, section, limit); - count=1; - count += xhtml_do_contents_section_deep_limit(fp, section->child, limit); - /* section=section->child; - while (section && xhtml_add_contents_entry(fp, section, limit)) { - section = section->next; - }*/ - return count; -} - -/* - * Add a section entry, unless we're exceeding the limit, in which - * case return FALSE (otherwise return TRUE). - */ -static int xhtml_add_contents_entry(FILE *fp, xhtmlsection *section, int limit) -{ - if (!section || section->level > limit) - return FALSE; - if (fp==NULL || section->level < 0) - return TRUE; - if (last_level > section->level) { - while (last_level > section->level) { - last_level--; - fprintf(fp, "</li></ul>\n"); - } - fprintf(fp, "</li>\n"); - } else if (last_level < section->level) { - assert(last_level == section->level - 1); - last_level++; - fprintf(fp, "<ul>\n"); - } else { - fprintf(fp, "</li>\n"); - } - fprintf(fp, "<li><a href=\"%s#%s\">", section->file->filename, section->fragment); - if (section->para->kwtext) { - xhtml_para(fp, section->para->kwtext, FALSE); - if (section->para->words) { - fprintf(fp, ": "); - } - } - if (section->para->words) { - xhtml_para(fp, section->para->words, FALSE); - } - fprintf(fp, "</a>\n"); - return TRUE; -} - -/* - * Write all the sections in this file. Do all paragraphs in this section, then all - * children (recursively), then go on to the next one (tail recursively). - */ -static void xhtml_do_sections(FILE *fp, xhtmlsection *sections) -{ - while (sections) { - currentsection = sections; - xhtml_do_paras(fp, sections->para, NULL, TRUE); - xhtml_do_sections(fp, sections->child); - sections = sections->next; - } -} - -/* Write this list of paragraphs. Close off all lists at the end. */ -static void xhtml_do_paras(FILE *fp, paragraph *p, paragraph *end, - int indexable) -{ - int last_type = -1, ptype, first=TRUE; - stack lcont_stack = stk_new(); - if (!p) - return; - -/* for (; p && (xhtml_para_level(p)>limit || xhtml_para_level(p)==-1 || first); p=p->next) {*/ - for (; p && p != end && (xhtml_para_level(p)<=0 || first); p=p->next) { - first=FALSE; - switch (ptype = p->type) - { - /* - * Things we ignore because we've already processed them or - * aren't going to touch them in this pass. - */ - case para_IM: - case para_BR: - case para_Biblio: /* only touch BiblioCited */ - case para_VersionID: - case para_NoCite: - case para_Title: - break; - - /* - * Chapter titles. - */ - case para_Chapter: - case para_Appendix: - case para_UnnumberedChapter: - xhtml_heading(fp, p, indexable); - break; - - case para_Heading: - case para_Subsect: - xhtml_heading(fp, p, indexable); - break; - - case para_Rule: - fprintf(fp, "\n<hr />\n"); - break; - - case para_Normal: - case para_Copyright: - fprintf(fp, "\n<p>"); - xhtml_para(fp, p->words, indexable); - fprintf(fp, "</p>\n"); - break; - - case para_LcontPush: - { - int *p; - p = mknew(int); - *p = last_type; - stk_push(lcont_stack, p); - last_type = para_Normal; - } - break; - case para_LcontPop: - { - int *p = stk_pop(lcont_stack); - assert(p); - ptype = last_type = *p; - sfree(p); - goto closeofflist; /* ick */ - } - break; - case para_QuotePush: - fprintf(fp, "<blockquote>\n"); - break; - case para_QuotePop: - fprintf(fp, "</blockquote>\n"); - break; - - case para_Bullet: - case para_NumberedList: - case para_Description: - case para_DescribedThing: - case para_BiblioCited: - if (last_type!=p->type && - !(last_type==para_DescribedThing && p->type==para_Description) && - !(last_type==para_Description && p->type==para_DescribedThing)) { - /* start up list if necessary */ - if (p->type == para_Bullet) { - fprintf(fp, "<ul>\n"); - } else if (p->type == para_NumberedList) { - fprintf(fp, "<ol>\n"); - } else if (p->type == para_BiblioCited || - p->type == para_DescribedThing || - p->type == para_Description) { - fprintf(fp, "<dl>\n"); - } - } - if (p->type == para_Bullet || p->type == para_NumberedList) { - fprintf(fp, "<li>"); - } else if (p->type == para_DescribedThing) { - fprintf(fp, "<dt>"); - } else if (p->type == para_Description) { - fprintf(fp, "<dd>"); - } else if (p->type == para_BiblioCited) { - fprintf(fp, "<dt>"); - xhtml_para(fp, p->kwtext, indexable); - fprintf(fp, "</dt>\n<dd>"); - } - xhtml_para(fp, p->words, indexable); - { - paragraph *p2 = p->next; - if (p2 && xhtml_para_level(p2)==-1 && p2->type == para_LcontPush) - break; - } - - closeofflist: - if (ptype == para_BiblioCited) { - fprintf(fp, "</dd>\n"); - } else if (ptype == para_DescribedThing) { - fprintf(fp, "</dt>"); - } else if (ptype == para_Description) { - fprintf(fp, "</dd>"); - } else if (ptype == para_Bullet || ptype == para_NumberedList) { - fprintf(fp, "</li>"); - } - if (ptype == para_Bullet || ptype == para_NumberedList || - ptype == para_BiblioCited || ptype == para_Description || - ptype == para_DescribedThing) - /* close off list if necessary */ - { - paragraph *p2 = p->next; - int close_off=FALSE; -/* if (p2 && (xhtml_para_level(p2)>limit || xhtml_para_level(p2)==-1)) {*/ - if (p2 && xhtml_para_level(p2)==-1) { - if (p2->type != ptype && - !(p2->type==para_DescribedThing && ptype==para_Description) && - !(p2->type==para_Description && ptype==para_DescribedThing) && - p2->type != para_LcontPush) - close_off=TRUE; - } else { - close_off=TRUE; - } - if (close_off) { - if (ptype == para_Bullet) { - fprintf(fp, "</ul>\n"); - } else if (ptype == para_NumberedList) { - fprintf(fp, "</ol>\n"); - } else if (ptype == para_BiblioCited || - ptype == para_Description || - ptype == para_DescribedThing) { - fprintf(fp, "</dl>\n"); - } - } - } - break; - - case para_Code: - xhtml_codepara(fp, p->words); - break; - } - last_type = ptype; - } - - stk_free(lcont_stack); -} - -/* - * Output a header for this XHTML file. - */ -static void xhtml_doheader(FILE *fp, word *title) -{ - fprintf(fp, "<!DOCTYPE html PUBLIC \"-//W3C//DTD XHTML 1.0 Transitional//EN\"\n"); - fprintf(fp, "\"http://www.w3.org/TR/xhtml1/DTD/xhtml1-transitional.dtd\">\n"); - fprintf(fp, "<html xmlns=\"http://www.w3.org/1999/xhtml\">\n\n<head>\n<title>"); - if (title==NULL) - fprintf(fp, "The thing with no name!"); - else - xhtml_para(fp, title, FALSE); - fprintf(fp, "</title>\n"); - fprintf(fp, "<meta name=\"generator\" content=\"Halibut %s xhtml-backend\" />\n", version); - if (conf.author) - fprintf(fp, "<meta name=\"author\" content=\"%ls\" />\n", conf.author); - if (conf.description) - fprintf(fp, "<meta name=\"description\" content=\"%ls\" />\n", conf.description); - if (conf.head_end) - fprintf(fp, "%ls\n", conf.head_end); - fprintf(fp, "</head>\n\n"); - if (conf.body) - fprintf(fp, "%ls\n", conf.body); - else - fprintf(fp, "<body>\n"); - if (conf.body_start) - fprintf(fp, "%ls\n", conf.body_start); -} - -/* - * Output a footer for this XHTML file. - */ -static void xhtml_dofooter(FILE *fp) -{ - fprintf(fp, "\n<hr />\n\n"); - if (conf.body_end) - fprintf(fp, "%ls\n", conf.body_end); - if (!conf.suppress_address) { - fprintf(fp,"<address>\n"); - if (conf.address_start) - fprintf(fp, "%ls\n", conf.address_start); - /* Do the version ID */ - if (conf.include_version_id) { - paragraph *p; - int started = 0; - for (p = sourceparas; p; p = p->next) - if (p->type == para_VersionID) { - xhtml_versionid(fp, p->words, started); - started = 1; - } - } - if (conf.address_end) - fprintf(fp, "%ls\n", conf.address_end); - fprintf(fp, "</address>\n"); - } - fprintf(fp, "</body>\n\n</html>\n"); -} - -/* - * Output the versionid paragraph. Typically this is a version control - * ID string (such as $Id...$ in RCS). - */ -static void xhtml_versionid(FILE *fp, word *text, int started) -{ - rdstringc t = { 0, 0, NULL }; - - rdaddc(&t, '['); /* FIXME: configurability */ - xhtml_rdaddwc(&t, text, NULL, FALSE); - rdaddc(&t, ']'); /* FIXME: configurability */ - - if (started) - fprintf(fp, "<br />\n"); - fprintf(fp, "%s\n", t.text); - sfree(t.text); -} - -/* Is this an XHTML reserved character? */ -static int xhtml_reservedchar(int c) -{ - if (c=='&' || c=='<' || c=='>' || c=='"') - return TRUE; - else - return FALSE; -} - -/* - * Convert a wide string into valid XHTML: Anything outside ASCII will - * be fixed up as an entity. Currently we don't worry about constraining the - * encoded character set, which we should probably do at some point (we can - * still fix up and return FALSE - see the last comment here). We also don't - * currently - * - * Because this is only used for words, spaces are HARD spaces (any other - * spaces will be word_Whitespace not word_Normal). So they become - * Unless hard_spaces is FALSE, of course (code paragraphs break the above - * rule). - * - * If `result' is non-NULL, mallocs the resulting string and stores a pointer to - * it in `*result'. If `result' is NULL, merely checks whether all - * characters in the string are feasible. - * - * Return is nonzero if all characters are OK. If not all - * characters are OK but `result' is non-NULL, a result _will_ - * still be generated! - */ -static int xhtml_convert(wchar_t *s, int maxlen, char **result, - int hard_spaces) { - int doing = (result != 0); - int ok = TRUE; - char *p = NULL; - int plen = 0, psize = 0; - - if (maxlen <= 0) - maxlen = -1; - - for (; *s && maxlen != 0; s++, maxlen--) { - wchar_t c = *s; - -#define ensure_size(i) if (i>=psize) { psize = i+256; p = resize(p, psize); } - - if (((c == 32 && !hard_spaces) || (c > 32 && c <= 126 && !xhtml_reservedchar(c)))) { - /* Char is OK. */ - if (doing) - { - ensure_size(plen); - p[plen++] = (char)c; - } - } else { - /* Char needs fixing up. */ - /* ok = FALSE; -- currently we never return FALSE; we - * might want to when considering a character set for the - * encoded document. - */ - if (doing) - { - if (c==32) { /* a space in a word is a hard space */ - ensure_size(plen+6); /* includes space for the NUL, which is subsequently stomped on */ - sprintf(p+plen, " "); - plen+=6; - } else { - /* FIXME: entity names! */ - ensure_size(plen+8); /* includes space for the NUL, which is subsequently stomped on */ - plen+=sprintf(p+plen, "&#%04i;", (int)c); - } - } - } - } - if (doing) { - p = resize(p, plen+1); - p[plen] = '\0'; - *result = p; - } - return ok; -} - -/* - * This formats the given words as XHTML. - * - * `indexable', if FALSE, prohibits adding any index references. - * You might use this, for example, if an index reference occurred - * in a section title, to prevent phony index references when the - * section title is processed in strange places such as contents - * sections. - */ -static void xhtml_rdaddwc(rdstringc *rs, word *text, word *end, int indexable) { - char *c; - keyword *kwl; - xhtmlsection *sect; - indextag *itag; - int ti; - - for (; text && text != end; text = text->next) { - switch (text->type) { - case word_HyperLink: - xhtml_utostr(text->text, &c); - rdaddsc(rs, "<a href=\""); - rdaddsc(rs, c); - rdaddsc(rs, "\">"); - sfree(c); - break; - - case word_UpperXref: - case word_LowerXref: - kwl = kw_lookup(keywords, text->text); - if (kwl) { - sect=xhtml_find_section(kwl->para); - if (sect) { - rdaddsc(rs, "<a href=\""); - rdaddsc(rs, sect->file->filename); - rdaddc(rs, '#'); - rdaddsc(rs, sect->fragment); - rdaddsc(rs, "\">"); - } else { - rdaddsc(rs, "<a href=\"Apologies.html\"><!-- probably a bibliography cross reference -->"); - error(err_whatever, "Couldn't locate cross-reference! (Probably a bibliography entry.)"); - } - } else { - rdaddsc(rs, "<a href=\"Apologies.html\"><!-- unknown cross-reference -->"); - error(err_whatever, "Couldn't locate cross-reference! (Wasn't in source file.)"); - } - break; - - case word_IndexRef: /* in theory we could make an index target here */ -/* rdaddsc(rs, "<a name=\"idx-"); - xhtml_utostr(text->text, &c); - rdaddsc(rs, c); - sfree(c); - rdaddsc(rs, "\"></a>");*/ - /* what we _do_ need to do is to fix up the backend data - * for any indexentry this points to. - */ - if (!indexable) - break; - - for (ti=0; (itag = (indextag *)index234(idx->tags, ti))!=NULL; ti++) { - /* FIXME: really ustricmp() and not ustrcmp()? */ - if (ustricmp(itag->name, text->text)==0) { - break; - } - } - if (itag!=NULL) { - if (itag->refs!=NULL) { - int i; - for (i=0; i<itag->nrefs; i++) { - xhtmlindex *idx_ref; - indexentry *ientry; - - ientry = itag->refs[i]; - if (ientry->backend_data==NULL) { - idx_ref = (xhtmlindex*) smalloc(sizeof(xhtmlindex)); - if (idx_ref==NULL) - fatal(err_nomemory); - idx_ref->nsection = 0; - idx_ref->size = 4; - idx_ref->sections = (xhtmlsection**) smalloc(idx_ref->size * sizeof(xhtmlsection*)); - if (idx_ref->sections==NULL) - fatal(err_nomemory); - ientry->backend_data = idx_ref; - } else { - idx_ref = ientry->backend_data; - if (idx_ref->nsection+1 > idx_ref->size) { - int new_size = idx_ref->size * 2; - idx_ref->sections = srealloc(idx_ref->sections, new_size * sizeof(xhtmlsection)); - if (idx_ref->sections==NULL) { - fatal(err_nomemory); - } - idx_ref->size = new_size; - } - } - idx_ref->sections[idx_ref->nsection++] = currentsection; -#if 0 -#endif - } - } else { - fatal(err_whatever, "Index tag had no entries!"); - } - } else { - fprintf(stderr, "Looking for index entry '%ls'\n", text->text); - fatal(err_whatever, "Couldn't locate index entry! (Wasn't in index.)"); - } - break; - - case word_HyperEnd: - case word_XrefEnd: - rdaddsc(rs, "</a>"); - break; - - case word_Normal: - case word_Emph: - case word_Code: - case word_WeakCode: - case word_WhiteSpace: - case word_EmphSpace: - case word_CodeSpace: - case word_WkCodeSpace: - case word_Quote: - case word_EmphQuote: - case word_CodeQuote: - case word_WkCodeQuote: - assert(text->type != word_CodeQuote && - text->type != word_WkCodeQuote); - if (towordstyle(text->type) == word_Emph && - (attraux(text->aux) == attr_First || - attraux(text->aux) == attr_Only)) - rdaddsc(rs, "<em>"); - else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) && - (attraux(text->aux) == attr_First || - attraux(text->aux) == attr_Only)) - rdaddsc(rs, "<code>"); - - if (removeattr(text->type) == word_Normal) { - if (xhtml_convert(text->text, 0, &c, TRUE) || !text->alt) - /* spaces in the word are hard */ - rdaddsc(rs, c); - else - xhtml_rdaddwc(rs, text->alt, NULL, indexable); - sfree(c); - } else if (removeattr(text->type) == word_WhiteSpace) { - rdaddc(rs, ' '); - } else if (removeattr(text->type) == word_Quote) { - rdaddsc(rs, """); - } - - if (towordstyle(text->type) == word_Emph && - (attraux(text->aux) == attr_Last || - attraux(text->aux) == attr_Only)) - rdaddsc(rs, "</em>"); - else if ((towordstyle(text->type) == word_Code || towordstyle(text->type) == word_WeakCode) && - (attraux(text->aux) == attr_Last || - attraux(text->aux) == attr_Only)) - rdaddsc(rs, "</code>"); - break; - } - } -} - -/* Output a heading, formatted as XHTML. - */ -static void xhtml_heading(FILE *fp, paragraph *p, int indexable) -{ - rdstringc t = { 0, 0, NULL }; - word *tprefix = p->kwtext; - word *nprefix = p->kwtext2; - word *text = p->words; - int level = xhtml_para_level(p); - xhtmlsection *sect = xhtml_find_section(p); - xhtmlheadfmt *fmt; - char *fragment; - if (sect) { - fragment = sect->fragment; - } else { - if (p->type == para_Title) - fragment = "title"; - else { - fragment = ""; /* FIXME: what else can we do? */ - error(err_whatever, "Couldn't locate heading cross-reference!"); - } - } - - if (p->type == para_Title) - fmt = NULL; - else if (level == 1) - fmt = &conf.fchapter; - else if (level-1 < conf.nfsect) - fmt = &conf.fsect[level-1]; - else - fmt = &conf.fsect[conf.nfsect-1]; - - if (fmt && fmt->just_numbers && nprefix) { - xhtml_rdaddwc(&t, nprefix, NULL, indexable); - if (fmt) { - char *c; - if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) { - rdaddsc(&t, c); - sfree(c); - } - } - } else if (fmt && !fmt->just_numbers && tprefix) { - xhtml_rdaddwc(&t, tprefix, NULL, indexable); - if (fmt) { - char *c; - if (xhtml_convert(fmt->number_suffix, 0, &c, FALSE)) { - rdaddsc(&t, c); - sfree(c); - } - } - } - xhtml_rdaddwc(&t, text, NULL, indexable); - /* - * If we're outputting in single-file mode, we need to lower - * the level of each heading by one, because the overall - * document title will be sitting right at the top as an <h1> - * and so chapters and sections should start at <h2>. - * - * Even if not, the document title will come back from - * xhtml_para_level() as level zero, so we must increment that - * no matter what leaf_level is set to. - */ - if (conf.leaf_level == 0 || level == 0) - level++; - fprintf(fp, "<a name=\"%s\"></a><h%i>%s</h%i>\n", fragment, level, t.text, level); - sfree(t.text); -} - -/* Output a paragraph. Styles are handled by xhtml_rdaddwc(). - * This looks pretty simple; I may have missed something ... - */ -static void xhtml_para(FILE *fp, word *text, int indexable) -{ - rdstringc out = { 0, 0, NULL }; - xhtml_rdaddwc(&out, text, NULL, indexable); - fprintf(fp, "%s", out.text); - sfree(out.text); -} - -/* Output a code paragraph. I'm treating this as preformatted, which - * may not be entirely correct. See xhtml_para() for my worries about - * this being overly-simple; however I think that most of the complexity - * of the text backend came entirely out of word wrapping anyway. - */ -static void xhtml_codepara(FILE *fp, word *text) -{ - fprintf(fp, "<pre>"); - for (; text; text = text->next) if (text->type == word_WeakCode) { - word *here, *next; - char *c; - - /* - * See if this WeakCode is followed by an Emph to indicate - * emphasis. - */ - here = text; - if (text->next && text->next->type == word_Emph) { - next = text = text->next; - } else - next = NULL; - - if (next) { - wchar_t *t, *e; - int n; - - t = here->text; - e = next->text; - - while (*e) { - int ec = *e; - - for (n = 0; t[n] && e[n] && e[n] == ec; n++); - xhtml_convert(t, n, &c, FALSE); - fprintf(fp, "%s%s%s", - (ec == 'i' ? "<em>" : ec == 'b' ? "<b>" : ""), - c, - (ec == 'i' ? "</em>" : ec == 'b' ? "</b>" : "")); - sfree(c); - - t += n; - e += n; - } - - xhtml_convert(t, 0, &c, FALSE); - fprintf(fp, "%s\n", c); - sfree(c); - } else { - xhtml_convert(here->text, 0, &c, FALSE); - fprintf(fp, "%s\n", c); - sfree(c); - } - } - fprintf(fp, "</pre>\n"); -} diff --git a/doc/licence.but b/doc/licence.but index 4c01c29..0304992 100644 --- a/doc/licence.but +++ b/doc/licence.but @@ -1,8 +1,8 @@ -\versionid $Id: licence.but,v 1.2 2004/04/01 17:36:42 simon Exp $ +\versionid $Id: licence.but,v 1.3 2004/06/12 20:09:40 simon Exp $ \A{licence} Halibut Licence -Halibut is copyright (c) 1999-2004 Simon Tatham and James Aylett. +Halibut is copyright (c) 1999-2004 Simon Tatham. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files diff --git a/doc/output.but b/doc/output.but index 3eeecda..5ef0170 100644 --- a/doc/output.but +++ b/doc/output.but @@ -565,7 +565,7 @@ number, before displaying the chapter title. For example, if you set this to \q{\cw{:\_}}, then the chapter title might look something like \q{Chapter 2: Doing Things}. -\dt \I{\cw{\\cfg\{xhtml-section-numeric\}}}\cw{\\cfg\{xhtml-section-numeric\}\{}\e{boolean}\cw{\}} +\dt \I{\cw{\\cfg\{xhtml-section-numeric\}}}\cw{\\cfg\{xhtml-section-numeric\}\{}\e{level}\cw{\}\{}\e{boolean}\cw{\}} \dd Specifies whether section headings at a particular level should contain the word \q{Section} or equivalent (if \c{false}), or should @@ -574,7 +574,7 @@ which level of section headings you want to affect: 0 means first-level headings (\c{\\H}), 1 means second-level headings (\c{\\S}), 2 means the level below that (\c{\\S2}), and so on. -\dt \I{\cw{\\cfg\{xhtml-section-suffix\}}}\cw{\\cfg\{xhtml-section-suffix\}\{}\e{text}\cw{\}} +\dt \I{\cw{\\cfg\{xhtml-section-suffix\}}}\cw{\\cfg\{xhtml-section-suffix\}\{}\e{level}\cw{\}\{}\e{text}\cw{\}} \dd Specifies the suffix text to be appended to section numbers at a particular level, before displaying the section title. @@ -286,7 +286,8 @@ wchar_t *ustrncpy(wchar_t *dest, wchar_t const *source, int n); wchar_t utolower(wchar_t); int uisalpha(wchar_t); int ustrcmp(wchar_t *lhs, wchar_t *rhs); -int ustricmp(wchar_t *lhs, wchar_t *rhs); +int ustricmp(wchar_t const *lhs, wchar_t const *rhs); +int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen); int utoi(wchar_t const *); double utof(wchar_t const *); int utob(wchar_t const *); @@ -463,10 +464,10 @@ void text_backend(paragraph *, keywordlist *, indexdata *, void *); paragraph *text_config_filename(char *filename); /* - * bk_xhtml.c + * bk_html.c */ -void xhtml_backend(paragraph *, keywordlist *, indexdata *, void *); -paragraph *xhtml_config_filename(char *filename); +void html_backend(paragraph *, keywordlist *, indexdata *, void *); +paragraph *html_config_filename(char *filename); /* * bk_whlp.c @@ -26,8 +26,8 @@ static const struct backend { int bitfield, prebackend_bitfield; } backends[] = { {"text", text_backend, text_config_filename, 0x0001, 0}, - {"xhtml", xhtml_backend, xhtml_config_filename, 0x0002, 0}, - {"html", xhtml_backend, xhtml_config_filename, 0x0002, 0}, + {"xhtml", html_backend, html_config_filename, 0x0002, 0}, + {"html", html_backend, html_config_filename, 0x0002, 0}, {"hlp", whlp_backend, whlp_config_filename, 0x0004, 0}, {"whlp", whlp_backend, whlp_config_filename, 0x0004, 0}, {"winhelp", whlp_backend, whlp_config_filename, 0x0004, 0}, @@ -288,7 +288,7 @@ int uisalpha(wchar_t c) { #endif } -int ustricmp(wchar_t *lhs, wchar_t *rhs) { +int ustricmp(wchar_t const *lhs, wchar_t const *rhs) { wchar_t lc, rc; while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc) lhs++, rhs++; @@ -300,6 +300,19 @@ int ustricmp(wchar_t *lhs, wchar_t *rhs) { return 1; } +int ustrnicmp(wchar_t const *lhs, wchar_t const *rhs, int maxlen) { + wchar_t lc = 0, rc = 0; + while (maxlen-- > 0 && + (lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc) + lhs++, rhs++; + if (lc < rc) + return -1; + else if (lc > rc) + return 1; + else + return 0; +} + wchar_t *ustrlow(wchar_t *s) { wchar_t *p = s; while (*p) { |