summaryrefslogtreecommitdiff
path: root/winchm.c
diff options
context:
space:
mode:
Diffstat (limited to 'winchm.c')
-rw-r--r--winchm.c1436
1 files changed, 1436 insertions, 0 deletions
diff --git a/winchm.c b/winchm.c
new file mode 100644
index 0000000..cb21715
--- /dev/null
+++ b/winchm.c
@@ -0,0 +1,1436 @@
+/*
+ * winchm.c: direct output of .CHM files.
+ */
+
+#include <assert.h>
+#include <stdio.h>
+
+#include "halibut.h"
+#include "tree234.h"
+#include "lzx.h"
+
+#define PUT_32BIT_LSB_FIRST(cp, value) do { \
+ ((unsigned char *)cp)[0] = 0xFF & (value); \
+ ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); \
+ ((unsigned char *)cp)[2] = 0xFF & ((value) >> 16); \
+ ((unsigned char *)cp)[3] = 0xFF & ((value) >> 24); } while (0)
+
+#define PUT_32BIT_MSB_FIRST(cp, value) do { \
+ ((unsigned char *)cp)[3] = 0xFF & (value); \
+ ((unsigned char *)cp)[2] = 0xFF & ((value) >> 8); \
+ ((unsigned char *)cp)[1] = 0xFF & ((value) >> 16); \
+ ((unsigned char *)cp)[0] = 0xFF & ((value) >> 24); } while (0)
+
+#define PUT_16BIT_LSB_FIRST(cp, value) do { \
+ ((unsigned char *)cp)[0] = 0xFF & (value); \
+ ((unsigned char *)cp)[1] = 0xFF & ((value) >> 8); } while (0)
+
+#define RDADD_32BIT_LSB_FIRST(rs, value) do { \
+ unsigned char out[4]; \
+ PUT_32BIT_LSB_FIRST(out, value); \
+ rdaddsn(rs, (void *)out, sizeof(out)); \
+ } while (0)
+
+#define RDADD_32BIT_MSB_FIRST(rs, value) do { \
+ unsigned char out[4]; \
+ PUT_32BIT_MSB_FIRST(out, value); \
+ rdaddsn(rs, (void *)out, sizeof(out)); \
+ } while (0)
+
+#define RDADD_16BIT_LSB_FIRST(rs, value) do { \
+ unsigned char out[2]; \
+ PUT_16BIT_LSB_FIRST(out, value); \
+ rdaddsn(rs, (void *)out, sizeof(out)); \
+ } while (0)
+
+static void guid(rdstringc *rs, unsigned long w0,
+ unsigned short h0, unsigned short h1,
+ unsigned char b0, unsigned char b1,
+ unsigned char b2, unsigned char b3,
+ unsigned char b4, unsigned char b5,
+ unsigned char b6, unsigned char b7)
+{
+ RDADD_32BIT_LSB_FIRST(rs, w0);
+ RDADD_16BIT_LSB_FIRST(rs, h0);
+ RDADD_16BIT_LSB_FIRST(rs, h1);
+ rdaddc(rs, b0);
+ rdaddc(rs, b1);
+ rdaddc(rs, b2);
+ rdaddc(rs, b3);
+ rdaddc(rs, b4);
+ rdaddc(rs, b5);
+ rdaddc(rs, b6);
+ rdaddc(rs, b7);
+}
+
+static void itsf(rdstringc *rs,
+ const rdstringc *directory, const rdstringc *content0)
+{
+ int headersize_field;
+ int headersect_off, headersect_off_field, headersect_size_field;
+ int directory_off_field, content0_off_field, filesize_field;
+
+ /* Main file header */
+ rdaddsc(rs, "ITSF"); /* main file magic number */
+ RDADD_32BIT_LSB_FIRST(rs, 3); /* file format version */
+ headersize_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* size of main header; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 1); /* unknown, always observed to be 1 */
+ RDADD_32BIT_MSB_FIRST(rs, 0x12345678); /* timestamp (FIXME) */
+ RDADD_32BIT_LSB_FIRST(rs, 0x809); /* language code (FIXME: configurable) */
+ guid(rs,0x7C01FD10,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
+ guid(rs,0x7C01FD11,0x7BAA,0x11D0,0x9E,0x0C,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
+ headersect_off_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* header section offset; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ headersect_size_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* header section size; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ directory_off_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* directory offset; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ RDADD_32BIT_LSB_FIRST(rs, directory->pos);
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ content0_off_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* content section 0 offset; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ PUT_32BIT_LSB_FIRST(rs->text + headersize_field, rs->pos);
+
+ /* 'Header section' */
+ headersect_off = rs->pos;
+ PUT_32BIT_LSB_FIRST(rs->text + headersect_off_field, rs->pos);
+ RDADD_32BIT_LSB_FIRST(rs, 0x1FE); /* magic number */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
+ filesize_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* file size; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* MSW of 64-bit field */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* unknown, always observed to be 0 */
+ PUT_32BIT_LSB_FIRST(rs->text + headersect_size_field,
+ rs->pos - headersect_off);
+
+ PUT_32BIT_LSB_FIRST(rs->text + directory_off_field, rs->pos);
+ rdaddsn(rs, directory->text, directory->pos);
+
+ PUT_32BIT_LSB_FIRST(rs->text + content0_off_field, rs->pos);
+ rdaddsn(rs, content0->text, content0->pos);
+
+ PUT_32BIT_LSB_FIRST(rs->text + filesize_field, rs->pos);
+}
+
+static void encint(rdstringc *rs, unsigned val)
+{
+ int i, j, topbit;
+
+ /* ENCINT in the CHM format is big-endian, but it's easier to
+ * write little-endian and byte-reverse afterwards. */
+
+ i = rs->pos; /* first byte index */
+
+ topbit = 0;
+ while (val >= 0x80) {
+ rdaddc(rs, (val & 0x7F) | topbit);
+ val >>= 7;
+ topbit = 0x80;
+ }
+
+ j = rs->pos; /* last byte index */
+ rdaddc(rs, val | topbit);
+
+ while (j > i) {
+ char tmp = rs->text[i];
+ rs->text[i] = rs->text[j];
+ rs->text[j] = tmp;
+ i++;
+ j--;
+ }
+}
+
+struct chm_directory_entry {
+ char *filename; /* free this when done */
+ int which_content_section;
+ int offset_in_content_section;
+ int file_size;
+};
+
+static int strcmp_chm(const char *a, const char *b)
+{
+ /*
+ * CHM directory sorting criterion appears to be case-insensitive,
+ * and based on sorting the _lowercased_ text. (Hence, in
+ * particular, '_' sorts before any alphabetic character.)
+ */
+ while (*a || *b) {
+ char ac = *a, bc = *b;
+ if (ac >= 'A' && ac <= 'Z') ac += 'a'-'A';
+ if (bc >= 'A' && bc <= 'Z') bc += 'a'-'A';
+ if (ac != bc)
+ return ac < bc ? -1 : +1;
+ a++;
+ b++;
+ }
+
+ return 0;
+}
+
+int chm_directory_entry_cmp(void *av, void *bv)
+{
+ const struct chm_directory_entry
+ *a = (const struct chm_directory_entry *)av,
+ *b = (const struct chm_directory_entry *)bv;
+ return strcmp_chm(a->filename, b->filename);
+}
+
+int chm_directory_entry_find(void *av, void *bv)
+{
+ const char *a = (const char *)av;
+ const struct chm_directory_entry
+ *b = (const struct chm_directory_entry *)bv;
+ return strcmp_chm(a, b->filename);
+}
+
+struct chm_index_entry {
+ char *first_filename; /* shared pointer with some chm_directory_entry */
+ int chunk_index;
+};
+
+static void directory(rdstringc *rs, tree234 *files)
+{
+ const int chunksize = 4096;
+ const int encoded_density = 2;
+ const int useful_density = 1 + (1 << encoded_density);
+ int dirhdr_size_field, dirhdr_size2_field, dirhdr_depth_field;
+ int dirhdr_root_field, dirhdr_tail_field, dirhdr_nchunks_field;
+ int curr_chunk, depth, filename_index;
+ tree234 *index;
+
+ assert(rs->pos == 0);
+ assert(count234(files) > 0);
+
+ /* Directory header */
+ rdaddsc(rs, "ITSP"); /* directory header magic number */
+ RDADD_32BIT_LSB_FIRST(rs, 1); /* format version */
+ dirhdr_size_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 10); /* unknown; observed to be 10 */
+ RDADD_32BIT_LSB_FIRST(rs, chunksize);
+ RDADD_32BIT_LSB_FIRST(rs, encoded_density);
+ dirhdr_depth_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* B-tree depth; fill in later */
+ dirhdr_root_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* root chunk index; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* head of PMGL chunk list; always 0 here */
+ dirhdr_tail_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* tail of PMGL chunk list; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+ dirhdr_nchunks_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* total number of chunks; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0x409); /* language (FIXME) */
+ guid(rs,0x5D02926A,0x212E,0x11D0,0x9D,0xF9,0x00,0xA0,0xC9,0x22,0xE6,0xEC);
+ dirhdr_size2_field = rs->pos;
+ RDADD_32BIT_LSB_FIRST(rs, 0); /* directory header size; fill in later */
+ RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+ RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+ RDADD_32BIT_LSB_FIRST(rs, 0xFFFFFFFFU); /* unknown; observed to be -1 */
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size_field, rs->pos);
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_size2_field, rs->pos);
+
+ index = newtree234(NULL);
+ curr_chunk = 0;
+ depth = 1;
+ /* Write out lowest-level PMGL chunks full of actual directory entries */
+ filename_index = 0;
+ while (filename_index < count234(files)) {
+ rdstringc chunk = {0, 0, NULL};
+ rdstringc reversed_quickref = {0, 0, NULL};
+ int chunk_endlen_field, chunk_nextptr_field;
+ int n_entries, offset_of_first_entry;
+ int saved_pos, saved_rq_pos, i;
+
+ rdaddsc(&chunk, "PMGL");
+ chunk_endlen_field = chunk.pos;
+ RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */
+ RDADD_32BIT_LSB_FIRST(&chunk, 0); /* unknown; observed to be 0 */
+ if (curr_chunk == 0) {
+ RDADD_32BIT_LSB_FIRST(&chunk, 0xFFFFFFFF); /* 'null' prev ptr */
+ } else {
+ RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk - 1);
+ }
+ chunk_nextptr_field = chunk.pos; /* may overwrite 'next' ptr later */
+ RDADD_32BIT_LSB_FIRST(&chunk, curr_chunk + 1);
+
+ /* Enter this chunk in our index for the next level of the
+ * B-tree (if we end up needing one). */
+ {
+ struct chm_directory_entry *ent = (struct chm_directory_entry *)
+ index234(files, filename_index);
+ struct chm_index_entry *ient = snew(struct chm_index_entry);
+ assert(ent);
+ ient->first_filename = ent->filename;
+ ient->chunk_index = curr_chunk;
+ addpos234(index, ient, count234(index));
+ }
+
+ /* Start accumulating the quick-reference index at the end of this
+ * chunk. We'll build it up backwards, and reverse it halfwordwise
+ * when we copy it into the end of our output chunk. */
+ RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0);
+ offset_of_first_entry = chunk.pos;
+
+ n_entries = 0;
+ /* Write filenames into this chunk until it's full, or until
+ * we run out of filenames. */
+ while (1) {
+ struct chm_directory_entry *ent = (struct chm_directory_entry *)
+ index234(files, filename_index++);
+ if (!ent) {
+ /* Run out of filenames, so this is the last PMGL chunk.
+ * Reset its 'next' pointer to the 'null' -1 value. */
+ PUT_32BIT_LSB_FIRST(chunk.text + chunk_nextptr_field,
+ 0xFFFFFFFFU);
+ /* And point the directory header's tail pointer at
+ * this chunk. */
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_tail_field, curr_chunk);
+ break;
+ }
+
+ /* Save the sizes of stuff in this chunk, so we can put
+ * them back if this entry turns out to overflow. */
+ saved_pos = chunk.pos;
+ saved_rq_pos = reversed_quickref.pos;
+
+ if (n_entries > 0 && n_entries % useful_density == 0) {
+ /* Add a quick-reference index pointer. */
+ RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos -
+ offset_of_first_entry);
+ }
+
+ encint(&chunk, strlen(ent->filename));
+ rdaddsc(&chunk, ent->filename);
+ encint(&chunk, ent->which_content_section);
+ encint(&chunk, ent->offset_in_content_section);
+ encint(&chunk, ent->file_size);
+ if (chunk.pos + reversed_quickref.pos > chunksize) {
+ filename_index--;
+ chunk.pos = saved_pos;
+ reversed_quickref.pos = saved_rq_pos;
+ break;
+ }
+
+ /* If we didn't overflow, then commit to this entry and
+ * loop round for the next one. */
+ n_entries++;
+ }
+
+ /* Finalise the chunk. */
+ assert(chunk.pos + reversed_quickref.pos <= chunksize);
+ PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
+ chunksize - chunk.pos);
+ PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
+ while (chunk.pos + reversed_quickref.pos < chunksize)
+ rdaddc(&chunk, 0); /* zero-pad */
+ for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
+ rdaddsn(&chunk, reversed_quickref.text+i, 2);
+
+ assert(chunk.pos == chunksize);
+ rdaddsn(rs, chunk.text, chunk.pos);
+ sfree(chunk.text);
+ sfree(reversed_quickref.text);
+ curr_chunk++;
+ }
+
+ /* Write out as many layers of PMGI index chunks as it takes to
+ * reduce the total number of chunks at the current level to 1. */
+ while (count234(index) > 1) {
+ tree234 *prev_index;
+ int index_index = 0;
+
+ prev_index = index;
+ index = newtree234(NULL);
+ depth++;
+
+ while (index_index < count234(prev_index)) {
+ rdstringc chunk = {0, 0, NULL};
+ rdstringc reversed_quickref = {0, 0, NULL};
+ int chunk_endlen_field;
+ int n_entries, offset_of_first_entry;
+ int saved_pos, saved_rq_pos, i;
+
+ rdaddsc(&chunk, "PMGI");
+ chunk_endlen_field = chunk.pos;
+ RDADD_32BIT_LSB_FIRST(&chunk, 0); /* space at end; fill in later */
+
+ /* Enter this chunk in our index for the next level of the
+ * B-tree (if we end up needing one). */
+ {
+ struct chm_index_entry *ent = (struct chm_index_entry *)
+ index234(prev_index, index_index);
+ struct chm_index_entry *ient = snew(struct chm_index_entry);
+ assert(ent);
+ ient->first_filename = ent->first_filename;
+ ient->chunk_index = curr_chunk;
+ addpos234(index, ient, count234(index));
+ }
+
+ /* Start accumulating the quick-reference index at the end
+ * of this chunk, as above. */
+ RDADD_16BIT_LSB_FIRST(&reversed_quickref, 0);
+ offset_of_first_entry = chunk.pos;
+
+ n_entries = 0;
+ /* Write index entries into this chunk until it's full, or
+ * until we run out of chunks at the previous level. */
+ while (1) {
+ struct chm_index_entry *ent = (struct chm_index_entry *)
+ index234(prev_index, index_index++);
+ if (!ent)
+ break;
+
+ /* Save the sizes of stuff in this chunk, so we can put
+ * them back if this entry turns out to overflow. */
+ saved_pos = chunk.pos;
+ saved_rq_pos = reversed_quickref.pos;
+
+ if (n_entries > 0 && n_entries % useful_density == 0) {
+ /* Add a quick-reference index pointer. */
+ RDADD_16BIT_LSB_FIRST(&reversed_quickref, chunk.pos -
+ offset_of_first_entry);
+ }
+
+ encint(&chunk, strlen(ent->first_filename));
+ rdaddsc(&chunk, ent->first_filename);
+ encint(&chunk, ent->chunk_index);
+ if (chunk.pos + reversed_quickref.pos > chunksize) {
+ index_index--;
+ chunk.pos = saved_pos;
+ reversed_quickref.pos = saved_rq_pos;
+ break;
+ }
+
+ /* If we didn't overflow, then commit to this entry and
+ * loop round for the next one. */
+ n_entries++;
+ }
+
+ /* Finalise the chunk. */
+ assert(chunk.pos + reversed_quickref.pos <= chunksize);
+ PUT_32BIT_LSB_FIRST(chunk.text + chunk_endlen_field,
+ chunksize - chunk.pos);
+ PUT_16BIT_LSB_FIRST(reversed_quickref.text, n_entries);
+ while (chunk.pos + reversed_quickref.pos < chunksize)
+ rdaddc(&chunk, 0); /* zero-pad */
+ for (i = reversed_quickref.pos - 2; i >= 0; i -= 2)
+ rdaddsn(&chunk, reversed_quickref.text+i, 2);
+
+ assert(chunk.pos == chunksize);
+ rdaddsn(rs, chunk.text, chunk.pos);
+ sfree(chunk.text);
+ sfree(reversed_quickref.text);
+ curr_chunk++;
+ }
+
+ /*
+ * Now free the old index.
+ */
+ while (1) {
+ struct chm_index_entry *ent = (struct chm_index_entry *)
+ delpos234(prev_index, 0);
+ if (!ent)
+ break;
+ sfree(ent);
+ }
+ freetree234(prev_index);
+ }
+
+ /*
+ * Finished! We've reduced to a single chunk. Free the remaining
+ * index (which must have size 1).
+ */
+ assert(count234(index) == 1);
+ sfree(delpos234(index, 0));
+ freetree234(index);
+
+ /* Fill in the deferred fields in the main header. */
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_depth_field, depth);
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_root_field, curr_chunk-1);
+ PUT_32BIT_LSB_FIRST(rs->text + dirhdr_nchunks_field, curr_chunk);
+}
+
+static int sys_start(rdstringc *rs, int code)
+{
+ int toret = rs->pos;
+ RDADD_16BIT_LSB_FIRST(rs, code);
+ RDADD_16BIT_LSB_FIRST(rs, 0); /* length; overwrite later */
+ return toret;
+}
+static void sys_end(rdstringc *rs, int recstart)
+{
+ PUT_16BIT_LSB_FIRST(rs->text + recstart+2, rs->pos - (recstart+4));
+}
+
+struct chm_window {
+ char *name;
+ char *title;
+ char *contentsfile;
+ char *indexfile;
+ char *rootfile;
+ int navpaneflags;
+ int toolbarflags;
+};
+
+struct chm {
+ tree234 *files;
+ tree234 *windows;
+ tree234 *stringtab;
+ rdstringc content0; /* outer uncompressed container */
+ rdstringc content1; /* compressed subfile */
+ rdstringc outfile;
+ rdstringc stringsfile;
+ char *title, *contents_filename, *index_filename, *default_topic;
+ char *default_window;
+ struct chm_section *rootsecthead, *rootsecttail;
+ struct chm_section *allsecthead, *allsecttail;
+};
+
+struct chm_section {
+ /* Logical links within the section tree structure */
+ struct chm_section *firstchild, *lastchild, *nextsibling, *parent;
+ /* Link all chm_sections together into one big list, in a
+ * topological order (i.e. every section comes after its
+ * parent) */
+ struct chm_section *next;
+
+ char *title, *url;
+ int tocidx_offset_1, tocidx_offset_2;
+ int topic_index, urltbl_offset, urlstr_offset;
+};
+
+struct chm_stringtab_entry {
+ struct chm *chm;
+ int strtab_offset;
+};
+
+static int chm_stringtab_cmp(void *av, void *bv)
+{
+ const struct chm_stringtab_entry
+ *a = (const struct chm_stringtab_entry *)av,
+ *b = (const struct chm_stringtab_entry *)bv;
+ return strcmp(a->chm->stringsfile.text + a->strtab_offset,
+ b->chm->stringsfile.text + b->strtab_offset);
+}
+
+static int chm_stringtab_find(void *av, void *bv)
+{
+ const char *a = (const char *)av;
+ const struct chm_stringtab_entry
+ *b = (const struct chm_stringtab_entry *)bv;
+ return strcmp(a, b->chm->stringsfile.text + b->strtab_offset);
+}
+
+int chm_intern_string(struct chm *chm, const char *string)
+{
+ struct chm_stringtab_entry *ent;
+ int size;
+
+ if (!string)
+ return 0;
+
+ if ((ent = (struct chm_stringtab_entry *)find234(
+ chm->stringtab, (void *)string, chm_stringtab_find)) == NULL) {
+ ent = snew(struct chm_stringtab_entry);
+ ent->chm = chm;
+
+ /* Pad to ensure the string doesn't cross a page boundary. */
+ size = strlen(string) + 1; /* include the NUL terminator */
+ assert(size < 0x1000); /* avoid really serious trouble */
+ while ((chm->stringsfile.pos ^ (chm->stringsfile.pos + size-1)) >> 12)
+ rdaddc(&chm->stringsfile, 0);
+
+ ent->strtab_offset = chm->stringsfile.pos;
+ rdaddsc(&chm->stringsfile, string);
+ rdaddc(&chm->stringsfile, '\0');
+ add234(chm->stringtab, ent);
+ }
+ return ent->strtab_offset;
+}
+
+struct chm *chm_new(void)
+{
+ struct chm *chm = snew(struct chm);
+ chm->files = newtree234(chm_directory_entry_cmp);
+ chm->windows = newtree234(NULL);
+ chm->stringtab = newtree234(chm_stringtab_cmp);
+ chm->content0 = empty_rdstringc;
+ chm->content1 = empty_rdstringc;
+ chm->outfile = empty_rdstringc;
+ chm->stringsfile = empty_rdstringc;
+ chm->title = NULL;
+ chm->contents_filename = NULL;
+ chm->index_filename = NULL;
+ chm->default_topic = NULL;
+ chm->default_window = NULL;
+ chm->rootsecthead = chm->rootsecttail = NULL;
+ chm->allsecthead = chm->allsecttail = NULL;
+ chm_intern_string(chm, ""); /* preinitialise the strings table */
+ return chm;
+}
+
+void chm_free(struct chm *chm)
+{
+ struct chm_directory_entry *ent;
+ struct chm_window *win;
+ struct chm_stringtab_entry *str;
+ struct chm_section *sect;
+
+ while ((ent = delpos234(chm->files, 0)) != NULL) {
+ sfree(ent->filename);
+ sfree(ent);
+ }
+ freetree234(chm->files);
+
+ while ((win = delpos234(chm->windows, 0)) != NULL) {
+ sfree(win->name);
+ sfree(win->title);
+ sfree(win->contentsfile);
+ sfree(win->indexfile);
+ sfree(win->rootfile);
+ sfree(win);
+ }
+ freetree234(chm->windows);
+
+ while ((str = delpos234(chm->stringtab, 0)) != NULL) {
+ sfree(str);
+ }
+ freetree234(chm->stringtab);
+
+ for (sect = chm->allsecthead; sect ;) {
+ struct chm_section *tmp = sect->next;
+ sfree(sect->title);
+ sfree(sect->url);
+ sfree(sect);
+ sect = tmp;
+ }
+
+ sfree(chm->content0.text);
+ sfree(chm->content1.text);
+ sfree(chm->outfile.text);
+ sfree(chm->stringsfile.text);
+
+ sfree(chm->title);
+ sfree(chm->contents_filename);
+ sfree(chm->index_filename);
+ sfree(chm->default_topic);
+ sfree(chm->default_window);
+
+ sfree(chm);
+}
+
+static void chm_add_file_internal(struct chm *chm, const char *name,
+ const char *data, int len,
+ rdstringc *sect, int which_sect)
+{
+ struct chm_directory_entry *ent = snew(struct chm_directory_entry);
+ ent->filename = dupstr(name);
+ ent->which_content_section = which_sect;
+ ent->offset_in_content_section = sect->pos;
+ ent->file_size = len;
+ add234(chm->files, ent);
+ rdaddsn(sect, data, len);
+}
+
+static struct chm_directory_entry *chm_find_file(
+ struct chm *chm, const char *name)
+{
+ return find234(chm->files, (void *)name, chm_directory_entry_find);
+}
+
+static char *add_leading_slash(const char *str)
+{
+ char *toret = snewn(2 + strlen(str), char);
+ toret[0] = '/';
+ strcpy(toret+1, str);
+ return toret;
+}
+
+void chm_add_file(struct chm *chm, const char *name, const char *data, int len)
+{
+ char *name_with_slash = add_leading_slash(name);
+ chm_add_file_internal(chm, name_with_slash, data, len, &chm->content1, 1);
+ sfree(name_with_slash);
+}
+
+void chm_title(struct chm *chm, const char *title)
+{
+ chm->title = dupstr(title);
+}
+
+void chm_contents_filename(struct chm *chm, const char *name)
+{
+ chm->contents_filename = dupstr(name);
+}
+
+void chm_index_filename(struct chm *chm, const char *name)
+{
+ chm->index_filename = dupstr(name);
+}
+
+void chm_default_topic(struct chm *chm, const char *name)
+{
+ chm->default_topic = dupstr(name);
+}
+
+void chm_default_window(struct chm *chm, const char *name)
+{
+ chm->default_window = dupstr(name);
+}
+
+void chm_add_window(struct chm *chm, const char *winname, const char *title,
+ const char *contentsfile, const char *indexfile,
+ const char *rootfile, int navpaneflags, int toolbarflags)
+{
+ struct chm_window *win = snew(struct chm_window);
+ win->name = dupstr(winname);
+ win->title = dupstr(title);
+ win->contentsfile = contentsfile ? dupstr(contentsfile) : NULL;
+ win->indexfile = indexfile ? dupstr(indexfile) : NULL;
+ win->rootfile = dupstr(rootfile);
+ win->navpaneflags = navpaneflags;
+ win->toolbarflags = toolbarflags;
+ addpos234(chm->windows, win, count234(chm->windows));
+}
+
+struct chm_section *chm_add_section(struct chm *chm,
+ struct chm_section *parent,
+ const char *title, const char *url)
+{
+ struct chm_section *sect = snew(struct chm_section);
+ sect->title = dupstr(title);
+ sect->url = dupstr(url);
+ sect->firstchild = sect->lastchild = sect->nextsibling = sect->next = NULL;
+ if (parent) {
+ sect->parent = parent;
+ if (parent->lastchild) {
+ parent->lastchild->nextsibling = sect;
+ } else {
+ parent->firstchild = sect;
+ }
+ parent->lastchild = sect;
+ } else {
+ sect->parent = NULL;
+ if (chm->rootsecttail) {
+ chm->rootsecttail->nextsibling = sect;
+ } else {
+ chm->rootsecthead = sect;
+ }
+ chm->rootsecttail = sect;
+ }
+ if (chm->allsecttail) {
+ chm->allsecttail->next = sect;
+ } else {
+ chm->allsecthead = sect;
+ }
+ chm->allsecttail = sect;
+ return sect;
+}
+
+struct chm_urltbl_entry {
+ /*
+ * Records of #URLTBL, before their order is finalised.
+ *
+ * The first word of this record is listed as 'unknown, perhaps
+ * some kind of unique ID' in chmspec. But my observation in HTML
+ * Help Workshop's output is that it's actually a hash of the
+ * target URL, and the file is sorted by them. chm_url_hash()
+ * below implements the hash algorithm.
+ */
+ unsigned long hash;
+ int topic_index;
+ int urlstr_pos;
+ int topics_offset_to_update;
+};
+
+int chm_urltbl_entry_cmp(void *av, void *bv)
+{
+ const struct chm_urltbl_entry
+ *a = (const struct chm_urltbl_entry *)av,
+ *b = (const struct chm_urltbl_entry *)bv;
+ if (a->hash < b->hash) return -1;
+ if (a->hash > b->hash) return +1;
+ if (a->topic_index < b->topic_index) return -1;
+ if (a->topic_index > b->topic_index) return -1;
+ return 0;
+}
+
+static unsigned long chm_url_hash(const char *str)
+{
+ const char *p;
+ unsigned long hash;
+
+ hash = 0;
+ for (p = str; *p; p++) {
+ /*
+ * Multiply `hash' by 43.
+ */
+ {
+ unsigned long bottom, top;
+ bottom = (hash & 0xFFFFUL) * 43;
+ top = ((hash >> 16) & 0xFFFFUL) * 43;
+ top += (bottom >> 16);
+ bottom &= 0xFFFFUL;
+ top &= 0xFFFFUL;
+ hash = (top << 16) | bottom;
+ }
+
+ /*
+ * Add the mapping value for this byte to `hash'.
+ */
+ {
+ int c = (signed char)*p;
+
+ /*
+ * Translation rule determined by getting hhc.exe to hash
+ * a lot of strings and analysing the results. I was able
+ * to confirm this mapping rule for all byte values except
+ * for NUL, CR, LF, ^Z and backslash: the first four of
+ * those I couldn't find any way to get hhc to insert into
+ * a URL, and the last one is automatically translated
+ * into '/', presumably for reasons of Windows vs URI path
+ * syntax normalisation.
+ */
+ int val = (c == '/' ? 0x2c : c <= 'Z' ? c-0x30 : c-0x50);
+
+ if (val > 0 && hash > (0xFFFFFFFFUL - val)) {
+ hash -= (0xFFFFFFFFUL - val) + 1;
+ } else if (val < 0 && hash < (unsigned long)-val) {
+ hash += (0xFFFFFFFFUL + val) + 1;
+ } else
+ hash += val;
+ }
+ }
+
+ /*
+ * Special case: an output hash of 0 is turned into 1, which I
+ * conjecture is so that in some context or other 0 can be
+ * reserved to mean something like 'null' or 'no hash value
+ * available'.
+ */
+ if (hash == 0)
+ hash = 1;
+
+ return hash;
+}
+
+const char *chm_build(struct chm *chm, int *outlen)
+{
+ rdstringc dir = {0, 0, NULL};
+ rdstringc sysfile = {0, 0, NULL};
+ struct LZXEncodedFile *ef;
+ int rec;
+
+ chm_add_file_internal(chm, "/", "", 0, &chm->content0, 0);
+
+ RDADD_32BIT_LSB_FIRST(&sysfile, 3); /* #SYSTEM file version */
+
+ rec = sys_start(&sysfile, 9); /* identify CHM-producing tool */
+ rdaddsc(&sysfile, "Halibut, ");
+ rdaddsc(&sysfile, version);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+
+ rec = sys_start(&sysfile, 12); /* number of 'information types' */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0);
+ sys_end(&sysfile, rec);
+ rec = sys_start(&sysfile, 15); /* checksum of 'information types' */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0);
+ sys_end(&sysfile, rec);
+ /* actual section of 'information types', whatever those might be */
+ chm_add_file_internal(chm, "/#ITBITS", "", 0, &chm->content0, 0);
+
+ if (chm->title) {
+ rec = sys_start(&sysfile, 3); /* document title */
+ rdaddsc(&sysfile, chm->title);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+ }
+
+ if (chm->default_topic) {
+ rec = sys_start(&sysfile, 2);
+ rdaddsc(&sysfile, chm->default_topic);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+ }
+
+ if (chm->contents_filename) {
+ rec = sys_start(&sysfile, 0);
+ rdaddsc(&sysfile, chm->contents_filename);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+ }
+
+ if (chm->index_filename) {
+ rec = sys_start(&sysfile, 1);
+ rdaddsc(&sysfile, chm->index_filename);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+ }
+
+ if (chm->default_window) {
+ rec = sys_start(&sysfile, 5);
+ rdaddsc(&sysfile, chm->default_window);
+ rdaddc(&sysfile, '\0');
+ sys_end(&sysfile, rec);
+ }
+
+ rec = sys_start(&sysfile, 4);
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0x809); /* language again (FIXME) */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* DBCS: off */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* full-text search: on */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no KLinks (whatever they are) */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no ALinks (whatever they are) */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0x11223344); /* timestamp LSW (FIXME) */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0x55667788); /* timestamp MSW (FIXME) */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+ sys_end(&sysfile, rec);
+
+ {
+ rdstringc winfile = {0, 0, NULL};
+ int i, j, s;
+ struct chm_window *win;
+
+ RDADD_32BIT_LSB_FIRST(&winfile, count234(chm->windows));
+ RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of each entry */
+ for (i = 0;
+ (win = (struct chm_window *)index234(chm->windows, i)) != NULL;
+ i++) {
+ RDADD_32BIT_LSB_FIRST(&winfile, 196); /* size of entry */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* not Unicode */
+ s = chm_intern_string(chm, win->name);
+ RDADD_32BIT_LSB_FIRST(&winfile, s);
+ /* Bitmap of which fields are used: 2 means nav pane
+ * style, 0x200 means whether nav pane is initially
+ * closed, 0x400 means tab position */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0x502);
+ /* Nav pane styles:
+ * 0x40000 = user can control window size/pos
+ * 0x20000 = advanced full-text search UI
+ * 0x00400 = include a search tab
+ * 0x00100 = keep contents/index in sync with current topic
+ * 0x00020 = three-pane window */
+ RDADD_32BIT_LSB_FIRST(&winfile, win->navpaneflags);
+ s = chm_intern_string(chm, win->title);
+ RDADD_32BIT_LSB_FIRST(&winfile, s);
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window styles */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window ex styles */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.left */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.top */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.right */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window rect.bottom */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window show state */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* only used at runtime */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane width */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.left */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.top */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.right */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* topic rect.bottom */
+ s = chm_intern_string(chm, win->contentsfile);
+ RDADD_32BIT_LSB_FIRST(&winfile, s);
+ s = chm_intern_string(chm, win->indexfile);
+ RDADD_32BIT_LSB_FIRST(&winfile, s);
+ s = chm_intern_string(chm, win->rootfile);
+ RDADD_32BIT_LSB_FIRST(&winfile, s);
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Home button target */
+ RDADD_32BIT_LSB_FIRST(&winfile, win->toolbarflags);
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane initially open */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* default nav pane = TOC */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* nav pane tabs at top */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* WM_NOTIFY id */
+ for (j = 0; j < 20; j++)
+ rdaddc(&winfile, 0); /* tab order block */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* history to keep */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button target */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button target */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 1 button text */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no Jump 2 button text */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.left */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.top */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.right */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* window min rect.bottom */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no information types */
+ RDADD_32BIT_LSB_FIRST(&winfile, 0); /* no custom tabs */
+ }
+ assert(winfile.pos == 8 + 196 * count234(chm->windows));
+ chm_add_file_internal(chm, "/#WINDOWS", winfile.text, winfile.pos,
+ &chm->content1, 1);
+ sfree(winfile.text);
+ }
+
+ {
+ struct chm_section *sect;
+ rdstringc tocidx = {0, 0, NULL};
+ rdstringc topics = {0, 0, NULL};
+ rdstringc urltbl = {0, 0, NULL};
+ rdstringc urlstr = {0, 0, NULL};
+ int i, index, s, n_tocidx_3;
+ struct chm_directory_entry *contentsfile = NULL, *indexfile = NULL;
+ tree234 *urltbl_pre;
+ struct chm_urltbl_entry *urltbl_entry;
+
+ urltbl_pre = newtree234(chm_urltbl_entry_cmp);
+
+ for (i = 0; i < 0x1000; i++)
+ rdaddc(&tocidx, 0);
+
+ /* Write a header of one zero byte at the start of #URLSTR.
+ * chmspec says this doesn't always appear, and is unclear on
+ * what this is for, but I suspect it serves the same purpose
+ * as the zero byte at the start of #STRINGS, namely that it
+ * arranges that an absent string in the following records can
+ * be represented by an offset of zero which will
+ * automatically point to this byte and hence indicate the
+ * empty string. */
+ rdaddc(&urlstr, 0);
+
+ if (chm->contents_filename) {
+ char *withslash = add_leading_slash(chm->contents_filename);
+ contentsfile = chm_find_file(chm, withslash);
+ sfree(withslash);
+ assert(contentsfile);
+ }
+ if (chm->index_filename) {
+ char *withslash = add_leading_slash(chm->index_filename);
+ indexfile = chm_find_file(chm, withslash);
+ sfree(withslash);
+ assert(indexfile);
+ }
+
+ index = 0;
+
+ /* #TOCIDX header field pointing at start of type-1 records */
+ PUT_32BIT_LSB_FIRST(tocidx.text + 0, tocidx.pos);
+
+ /*
+ * First pass over the section structure, generating in
+ * parallel one of the multiple structure types in #TOCIDX and
+ * the sole record in all the other files.
+ */
+ for (sect = chm->allsecthead; sect; sect = sect->next) {
+ /* Size of the first kind of #TOCIDX record varies between
+ * leaf and internal nodes */
+ int tocidx_size_1 = (sect->firstchild ? 0x1c : 0x14);
+
+ /*
+ * Flags:
+ * - 8 means there's a local filename, which in _our_ CHM
+ * files there always is. If you unset this flag, you
+ * get a node in the contents treeview which doesn't
+ * open any page when clicked, and exists solely to
+ * contain children; in that situation the topic index
+ * field at position 0x08 in this record also stops
+ * being an index into #TOPICS and instead becomes an
+ * index into #STRINGS giving the node's title.
+ * - 4 apparently means the node should have the 'book'
+ * rather than 'page' icon in the TOC tree view in the
+ * help viewer
+ * - 1 means the node has a subtree in the tree view,
+ * which I take to mean (contrary to chmspec) that
+ * _this_ is the flag that means this node is a
+ * non-leaf node and hence has the two extra fields for
+ * first-child and whatever the other one means
+ */
+ unsigned tocidx_1_flags = (sect->firstchild ? 0x5 : 0) | 8;
+
+ int urlstr_size;
+
+ /* Pad to ensure the record isn't split between
+ * 0x1000-byte pages of the file */
+ while ((tocidx.pos ^ (tocidx.pos + tocidx_size_1 - 1)) >> 12)
+ RDADD_32BIT_LSB_FIRST(&tocidx, 0);
+
+ sect->topic_index = index++;
+
+ /* Write the type-1 record in #TOCIDX */
+ sect->tocidx_offset_1 = tocidx.pos;
+ RDADD_16BIT_LSB_FIRST(&tocidx, 0); /* unknown */
+ /* chmspec thinks this 16-bit field is 'unknown', but in
+ * my observations it appears to be the index of an entry
+ * in the #TOCIDX type-3 region. But I still don't know
+ * what those are really for. */
+ RDADD_16BIT_LSB_FIRST(&tocidx, sect->topic_index);
+ RDADD_32BIT_LSB_FIRST(&tocidx, tocidx_1_flags);
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->parent ?
+ sect->parent->tocidx_offset_1 : 0);
+ RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* 'next' ptr; fill in later */
+ if (sect->firstchild) {
+ RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* child; fill in later */
+ RDADD_32BIT_LSB_FIRST(&tocidx, 0); /* unknown */
+ }
+ assert(tocidx.pos == sect->tocidx_offset_1 + tocidx_size_1);
+
+ /* Figure out our offset in #URLSTR, by ensuring we're not
+ * going to overrun a page boundary (as usual). For this
+ * we need our record length, which is two 32-bit fields
+ * plus a NUL-terminated copy of the target file name / URL. */
+ urlstr_size = 8 + strlen(sect->url) + 1;
+ assert(urlstr_size < 0x1000); /* must _fit_ in a page! */
+ while ((urlstr.pos ^ (urlstr.pos + urlstr_size - 1)) >> 12)
+ rdaddc(&urlstr, 0);
+
+ /*
+ * Save everything we know so far about the #URLTBL record
+ * we'll need to write.
+ */
+ urltbl_entry = snew(struct chm_urltbl_entry);
+ urltbl_entry->hash = chm_url_hash(sect->url);
+ urltbl_entry->topic_index = sect->topic_index;
+ urltbl_entry->urlstr_pos = urlstr.pos;
+ add234(urltbl_pre, urltbl_entry);
+
+ /* Write the #TOPICS entry */
+ RDADD_32BIT_LSB_FIRST(&topics, sect->tocidx_offset_1);
+ s = chm_intern_string(chm, sect->title);
+ RDADD_32BIT_LSB_FIRST(&topics, s);
+ urltbl_entry->topics_offset_to_update = topics.pos;
+ RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
+ RDADD_16BIT_LSB_FIRST(&topics, 6); /* flag as 'in contents' */
+ RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
+
+ /*
+ * Write the #URLSTR entry.
+ */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
+ rdaddsc(&urlstr, sect->url); /* 'Local' */
+ rdaddc(&urlstr, '\0');
+ }
+
+ /*
+ * Add entries in #URLTBL, #URLSTR and #TOPICS for the
+ * contents and index files. They don't form part of the tree
+ * in #TOCIDX, though.
+ */
+ if (chm->contents_filename) {
+ urltbl_entry = snew(struct chm_urltbl_entry);
+ urltbl_entry->hash = chm_url_hash(chm->contents_filename);
+ urltbl_entry->topic_index = index;
+ urltbl_entry->urlstr_pos = urlstr.pos;
+ add234(urltbl_pre, urltbl_entry);
+
+ /* #TOPICS entry */
+ RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */
+ RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */
+ urltbl_entry->topics_offset_to_update = topics.pos;
+ RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
+ RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */
+ RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
+
+ /* #URLSTR entry */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
+ rdaddsc(&urlstr, chm->contents_filename); /* 'Local' */
+ rdaddc(&urlstr, '\0');
+
+ /* And add the entry in #SYSTEM that cites the hash of the
+ * #URLTBL entry. */
+ rec = sys_start(&sysfile, 11);
+ RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash);
+ sys_end(&sysfile, rec);
+
+ index++;
+ }
+ if (chm->index_filename) {
+ urltbl_entry = snew(struct chm_urltbl_entry);
+ urltbl_entry->hash = chm_url_hash(chm->index_filename);
+ urltbl_entry->topic_index = index;
+ urltbl_entry->urlstr_pos = urlstr.pos;
+ add234(urltbl_pre, urltbl_entry);
+
+ /* #TOPICS entry */
+ RDADD_32BIT_LSB_FIRST(&topics, 0); /* no #TOCIDX entry */
+ RDADD_32BIT_LSB_FIRST(&topics, 0xFFFFFFFFU); /* no title either */
+ urltbl_entry->topics_offset_to_update = topics.pos;
+ RDADD_32BIT_LSB_FIRST(&topics, 0); /* fill in later */
+ RDADD_16BIT_LSB_FIRST(&topics, 2); /* flag as 'not in contents' */
+ RDADD_16BIT_LSB_FIRST(&topics, 0); /* unknown */
+
+ /* #URLSTR entry */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* URL string (null) */
+ RDADD_32BIT_LSB_FIRST(&urlstr, 0); /* FrameName location (null) */
+ rdaddsc(&urlstr, chm->index_filename); /* 'Local' */
+ rdaddc(&urlstr, '\0');
+
+ /* And add the entry in #SYSTEM that cites the hash of the
+ * #URLTBL entry. */
+ rec = sys_start(&sysfile, 7);
+ RDADD_32BIT_LSB_FIRST(&sysfile, urltbl_entry->hash);
+ sys_end(&sysfile, rec);
+
+ index++;
+ }
+
+ /*
+ * Now we've got all our #URLTBL entries, so we can write out
+ * #URLTBL itself.
+ */
+ while ((urltbl_entry = delpos234(urltbl_pre, 0)) != NULL) {
+ /* Pad #URLTBL to the beginning of this section's entry.
+ * Entries are all 12 bytes long, but again there's some
+ * padding to ensure that they don't cross a page
+ * boundary. */
+ while ((urltbl.pos ^ (urltbl.pos + 12 - 1)) >> 12)
+ RDADD_32BIT_LSB_FIRST(&urltbl, 0);
+
+ /* Fill in the link from #TOPICS to this entry's offset */
+ PUT_32BIT_LSB_FIRST(topics.text +
+ urltbl_entry->topics_offset_to_update,
+ urltbl.pos);
+
+ /* Write the entry itself. */
+ RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->hash);
+ RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->topic_index);
+ RDADD_32BIT_LSB_FIRST(&urltbl, urltbl_entry->urlstr_pos);
+
+ sfree(urltbl_entry);
+ }
+ freetree234(urltbl_pre);
+
+ /*
+ * Small follow-up pass filling in forward-pointing offset
+ * fields in the #TOCIDX type-1 records which the previous
+ * pass didn't know yet.
+ */
+ for (sect = chm->allsecthead; sect; sect = sect->next) {
+ if (sect->nextsibling)
+ PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x10,
+ sect->nextsibling->tocidx_offset_1);
+ if (sect->firstchild)
+ PUT_32BIT_LSB_FIRST(tocidx.text + sect->tocidx_offset_1 + 0x14,
+ sect->firstchild->tocidx_offset_1);
+ }
+
+ /* #TOCIDX header field pointing at start of type-2 records */
+ PUT_32BIT_LSB_FIRST(tocidx.text + 0xC, tocidx.pos);
+
+ /*
+ * Write the #TOCIDX type-2 records, which are just 4 bytes
+ * long and just contain another copy of each topic's index,
+ * but we need to have them there so that the type-3 records
+ * can refer to them by offset.
+ */
+ for (sect = chm->allsecthead; sect; sect = sect->next) {
+ sect->tocidx_offset_2 = tocidx.pos;
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
+ }
+
+ /* Align the current #TOCIDX offset to 16 bytes */
+ while (tocidx.pos & 0xF)
+ rdaddc(&tocidx, 0);
+
+ /* #TOCIDX header field pointing at start of type-3 records */
+ PUT_32BIT_LSB_FIRST(tocidx.text + 0x4, tocidx.pos);
+
+ /*
+ * Write the #TOCIDX type-3 records.
+ *
+ * In help files I've examined, there are fewer of these than
+ * you might expect; apparently not all sections rate one for
+ * some reason. For the moment I'm just writing out one for
+ * every section.
+ */
+ n_tocidx_3 = 0;
+ for (sect = chm->allsecthead; sect; sect = sect->next) {
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_1);
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index + 666); /* ?! */
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->tocidx_offset_2);
+ RDADD_32BIT_LSB_FIRST(&tocidx, sect->topic_index);
+ n_tocidx_3++;
+ }
+
+ /* #TOCIDX header field giving number of type-3 records */
+ PUT_32BIT_LSB_FIRST(tocidx.text + 0x8, n_tocidx_3);
+
+ chm_add_file_internal(chm, "/#TOCIDX", tocidx.text, tocidx.pos,
+ &chm->content1, 1);
+ chm_add_file_internal(chm, "/#TOPICS", topics.text, topics.pos,
+ &chm->content1, 1);
+ chm_add_file_internal(chm, "/#URLTBL", urltbl.text, urltbl.pos,
+ &chm->content1, 1);
+ chm_add_file_internal(chm, "/#URLSTR", urlstr.text, urlstr.pos,
+ &chm->content1, 1);
+
+ /*
+ * Write #IDXHDR (and its mirror in #SYSTEM), which we
+ * couldn't do until we knew how many topic nodes there were.
+ */
+ {
+ int idxhdr_start;
+
+ rec = sys_start(&sysfile, 13);
+ idxhdr_start = sysfile.pos;
+
+ rdaddsc(&sysfile, "T#SM"); /* #IDXHDR magic */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0x12345678); /* checksum? FIXME */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, index); /* number of topic nodes */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no image list */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* top-level node is
+ * not a folder */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no bg colour */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no fg colour */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no font spec */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window style */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no ex win style */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no frame name */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0xFFFFFFFFU); /* no window name */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no information types */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 1); /* unknown */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* no merge files */
+ RDADD_32BIT_LSB_FIRST(&sysfile, 0); /* unknown */
+ while (sysfile.pos - idxhdr_start < 4096)
+ rdaddc(&sysfile, 0);
+
+ chm_add_file_internal(chm, "/#IDXHDR", sysfile.text + idxhdr_start,
+ sysfile.pos - idxhdr_start,
+ &chm->content1, 1);
+ sys_end(&sysfile, rec);
+ }
+
+ sfree(tocidx.text);
+ sfree(topics.text);
+ sfree(urltbl.text);
+ sfree(urlstr.text);
+ }
+
+ /* Missing from #SYSTEM: */
+ /* 10 (4-byte timestamp) */
+ /* 6 (logical file name) */
+
+ chm_add_file_internal(chm, "/#SYSTEM", sysfile.text, sysfile.pos,
+ &chm->content0, 0);
+ sfree(sysfile.text);
+
+ chm_add_file_internal(chm, "/#STRINGS", chm->stringsfile.text,
+ chm->stringsfile.pos, &chm->content1, 1);
+
+ /*
+ * ::DataSpace/NameList, giving the names of the two content sections.
+ */
+ {
+ rdstringc dsnl = {0, 0, NULL};
+ const char *p;
+ int stringstart;
+
+ RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* total file size; fill in later */
+ RDADD_16BIT_LSB_FIRST(&dsnl, 2); /* number of names */
+
+ RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */
+ stringstart = dsnl.pos;
+ for (p = "Uncompressed"; *p; p++)
+ RDADD_16BIT_LSB_FIRST(&dsnl, *p);
+ PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2,
+ (dsnl.pos - stringstart) / 2);
+ RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */
+
+ RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* string length; fill in later */
+ stringstart = dsnl.pos;
+ for (p = "MSCompressed"; *p; p++)
+ RDADD_16BIT_LSB_FIRST(&dsnl, *p);
+ PUT_16BIT_LSB_FIRST(dsnl.text + stringstart - 2,
+ (dsnl.pos - stringstart) / 2);
+ RDADD_16BIT_LSB_FIRST(&dsnl, 0); /* NUL terminator */
+
+ PUT_16BIT_LSB_FIRST(dsnl.text, dsnl.pos / 2);
+
+ chm_add_file_internal(chm, "::DataSpace/NameList", dsnl.text, dsnl.pos,
+ &chm->content0, 0);
+
+ sfree(dsnl.text);
+ }
+
+ /*
+ * Actually compress the compressed-data section, load the
+ * compressed version of it into the containing uncompressed
+ * section, and write the auxiliary files describing it.
+ */
+ {
+ rdstringc rs = {0, 0, NULL};
+ const char *p;
+ int orig_decomp_size = chm->content1.pos;
+ size_t i;
+
+ while (chm->content1.pos & 0x7FFF)
+ rdaddc(&chm->content1, 0); /* pad to a realign-interval boundary */
+ ef = lzx(chm->content1.text, chm->content1.pos, 0x8000, 0x10000);
+ chm_add_file_internal(
+ chm, "::DataSpace/Storage/MSCompressed/Content",
+ (char *)ef->data, ef->data_len, &chm->content0, 0);
+
+ for (p = "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}"; *p; p++)
+ RDADD_16BIT_LSB_FIRST(&rs, *p);
+ rs.pos = 0x26; /* this file is always written truncated :-) */
+ chm_add_file_internal(
+ chm, "::DataSpace/Storage/MSCompressed/Transform/List",
+ rs.text, rs.pos, &chm->content0, 0);
+ rs.pos = 0;
+
+ RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size);
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* high word of 64-bit size */
+ chm_add_file_internal(
+ chm, "::DataSpace/Storage/MSCompressed/SpanInfo",
+ rs.text, rs.pos, &chm->content0, 0);
+ rs.pos = 0;
+
+ RDADD_32BIT_LSB_FIRST(&rs, 6); /* file size */
+ rdaddsc(&rs, "LZXC"); /* compression type identifier */
+ RDADD_32BIT_LSB_FIRST(&rs, 2); /* version */
+ RDADD_32BIT_LSB_FIRST(&rs, 2); /* reset interval in units of 2^15 */
+ RDADD_32BIT_LSB_FIRST(&rs, 2); /* window size in units of 2^15 */
+ RDADD_32BIT_LSB_FIRST(&rs, 1); /* reset interval multiplier */
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* unknown */
+ chm_add_file_internal(
+ chm, "::DataSpace/Storage/MSCompressed/ControlData",
+ rs.text, rs.pos, &chm->content0, 0);
+ rs.pos = 0;
+
+ RDADD_32BIT_LSB_FIRST(&rs, 2); /* unknown (version number?) */
+ RDADD_32BIT_LSB_FIRST(&rs, ef->n_resets); /* reset table length */
+ RDADD_32BIT_LSB_FIRST(&rs, 8); /* reset table entry size */
+ RDADD_32BIT_LSB_FIRST(&rs, 0x28); /* reset table offset */
+ RDADD_32BIT_LSB_FIRST(&rs, orig_decomp_size); /* uncompressed len */
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+ RDADD_32BIT_LSB_FIRST(&rs, ef->data_len); /* compressed len */
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+ RDADD_32BIT_LSB_FIRST(&rs, 0x8000); /* realign interval */
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+ for (i = 0; i < ef->n_resets; i++) {
+ RDADD_32BIT_LSB_FIRST(&rs, ef->reset_byte_offsets[i]);
+ RDADD_32BIT_LSB_FIRST(&rs, 0); /* MSW */
+ }
+ chm_add_file_internal(
+ chm, "::DataSpace/Storage/MSCompressed/Transform/"
+ "{7FC28940-9D31-11D0-9B27-00A0C91E9C7C}/InstanceData/ResetTable",
+ rs.text, rs.pos, &chm->content0, 0);
+ rs.pos = 0;
+ }
+
+ sfree(ef->data);
+ sfree(ef->reset_byte_offsets);
+ sfree(ef);
+
+ directory(&dir, chm->files);
+ itsf(&chm->outfile, &dir, &chm->content0);
+ sfree(dir.text);
+
+ assert(outlen);
+ *outlen = chm->outfile.pos;
+ return chm->outfile.text;
+}