/* * HTML backend for Halibut */ /* * TODO: * * - I'm never entirely convinced that having a fragment link to * come in at the start of the real text in the file is * sensible. Perhaps for the topmost section in the file, no * fragment should be used? (Though it should probably still be * _there_ even if unused.) * * - new configurability: * * a few new things explicitly labelled as `FIXME: * configurable' or similar. * * HTML flavour. * * Some means of specifying the distinction between * restrict-charset and output-charset. It seems to me that * `html-charset' is output-charset, and that * restrict-charset usually wants to be either output-charset * or UTF-8 (the latter indicating that any Unicode character * is fair game and it will be specified using &#foo; if it * isn't in output-charset). However, since XHTML defaults to * UTF-8 and it's fiddly to tell it otherwise, it's just * possible that some user may need to set restrict-charset * to their charset of choice while leaving _output_-charset * at UTF-8. Figure out some configuration, and apply it. * * - test all HTML flavours and ensure they validate sensibly. Fix * remaining confusion issues such as and obsoleteness * of . * * - proper naming of all fragment IDs. The ones for sections are * fine; the ones for numbered list and bibliociteds are utter * crap; the ones for indexes _might_ do but it might be worth * giving some thought to how to do them better. * + also set up a mechanism for ensuring that fragment IDs * never clash. * * - nonbreaking spaces? */ #include #include #include #include #include "halibut.h" #define is_heading_type(type) ( (type) == para_Title || \ (type) == para_Chapter || \ (type) == para_Appendix || \ (type) == para_UnnumberedChapter || \ (type) == para_Heading || \ (type) == para_Subsect) #define heading_depth(p) ( (p)->type == para_Subsect ? (p)->aux + 1 : \ (p)->type == para_Heading ? 1 : \ (p)->type == para_Title ? -1 : 0 ) typedef struct { int just_numbers; wchar_t *number_suffix; } sectlevel; typedef struct { int nasect; sectlevel achapter, *asect; int *contents_depths; /* 0=main, 1=chapter, 2=sect etc */ int ncdepths; int address_section, visible_version_id; int leaf_contains_contents, leaf_smallest_contents; char *contents_filename; char *index_filename; char *template_filename; char *single_filename; char *template_fragment; char *head_end, *body_start, *body_end, *addr_start, *addr_end; char *body_tag, *nav_attr; wchar_t *author, *description; int restrict_charset, output_charset; enum { HTML_3_2, HTML_4, XHTML_1_0_TRANSITIONAL, XHTML_1_0_STRICT } htmlver; wchar_t *lquote, *rquote; int leaf_level; } htmlconfig; #define contents_depth(conf, level) \ ( (conf).ncdepths > (level) ? (conf).contents_depths[level] : (level)+2 ) #define is_xhtml(ver) ((ver) >= XHTML_1_0_TRANSITIONAL) typedef struct htmlfile htmlfile; typedef struct htmlsect htmlsect; struct htmlfile { htmlfile *next; char *filename; int last_fragment_number; int min_heading_depth; htmlsect *first, *last; /* first/last highest-level sections */ }; struct htmlsect { htmlsect *next, *parent; htmlfile *file; paragraph *title, *text; enum { NORMAL, TOP, INDEX } type; int contents_depth; char *fragment; }; typedef struct { htmlfile *head, *tail; htmlfile *single, *index; } htmlfilelist; typedef struct { htmlsect *head, *tail; } htmlsectlist; typedef struct { int nrefs, refsize; word **refs; } htmlindex; typedef struct { htmlsect *section; char *fragment; } htmlindexref; typedef struct { /* * This level deals with charset conversion, starting and * ending tags, and writing to the file. It's the lexical * level. */ FILE *fp; int charset; charset_state cstate; int ver; enum { HO_NEUTRAL, HO_IN_TAG, HO_IN_EMPTY_TAG, HO_IN_TEXT } state; /* * Stuff beyond here deals with the higher syntactic level: it * tracks how many levels of