summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Tatham <anakin@pobox.com>2001-12-03 17:34:43 +0000
committerSimon Tatham <anakin@pobox.com>2001-12-03 17:34:43 +0000
commita299f41baa1242dea65fcddfc69b85286dbc2490 (patch)
tree6b7e48bd58c4777d5fbc6a410fa1a854ec9434a4
parent15f9f47056700ed8fea2659e15cb1b28415f7cb6 (diff)
downloadhalibut-a299f41baa1242dea65fcddfc69b85286dbc2490.zip
halibut-a299f41baa1242dea65fcddfc69b85286dbc2490.tar.gz
halibut-a299f41baa1242dea65fcddfc69b85286dbc2490.tar.bz2
halibut-a299f41baa1242dea65fcddfc69b85286dbc2490.tar.xz
Add indexing support. Wow, that was unexpectedly easy.
[originally from svn r1440]
-rw-r--r--winhelp.c204
-rw-r--r--winhelp.h5
2 files changed, 195 insertions, 14 deletions
diff --git a/winhelp.c b/winhelp.c
index c0731c8..a5520da 100644
--- a/winhelp.c
+++ b/winhelp.c
@@ -10,8 +10,11 @@
/*
* Still to do:
*
- * - Go back and look at the KW* sections, which should allow us
- * to add indexing support.
+ * - Code cleanups: prototype whlp_{start,end}_hyperlink in the
+ * header file. Also decide whether context_hash is going to be
+ * externally visible or not; if it is, rename it to
+ * whlp_something and put it in the header file, and if not,
+ * make it static.
* - nonbreaking spaces and hyphens will be needed.
* - tabs, and tab stop settings in the paragraphinfo.
*
@@ -119,6 +122,12 @@ struct file {
int fileoffset; /* offset in the real .HLP file */
};
+struct indexrec {
+ char *term; /* index term, will need freeing */
+ context *topic; /* topic it links to */
+ int count, offset; /* used when building |KWDATA */
+};
+
struct topiclink {
int topicoffset, topicpos; /* for referencing from elsewhere */
int recordtype;
@@ -135,6 +144,7 @@ struct WHLP_TOPIC_tag {
struct topiclink *link; /* this provides TOPICOFFSET */
context *browse_next, *browse_prev;
char *title; /* needs freeing */
+ int index; /* arbitrary number */
};
struct WHLP_tag {
@@ -143,8 +153,7 @@ struct WHLP_tag {
tree234 *contexts; /* also stores `context' */
tree234 *titles; /* _also_ stores `context' */
tree234 *text; /* stores `struct topiclink' */
- struct file *contextfile; /* the |CONTEXT internal file */
- struct file *titlefile; /* the |TTLBTREE internal file */
+ tree234 *index; /* stores `struct indexrec' */
struct file *systemfile; /* the |SYSTEM internal file */
context *ptopic; /* primary topic */
struct topiclink *prevtopic; /* to link type-2 records together */
@@ -157,6 +166,7 @@ struct WHLP_tag {
int lasttopicstart; /* while building |TOPIC section */
int para_flags;
int para_attrs[7];
+ int ncontexts;
};
/* Functions to return the index and leaf data for B-tree contents. */
@@ -262,6 +272,41 @@ static int ttlleaf(const void *av, unsigned char *outbuf)
return 4+slen;
}
+/* The |KWBTREE internal file maps index strings to TOPICOFFSETs. */
+
+static int idxcmp(void *av, void *bv)
+{
+ const struct indexrec *a = (const struct indexrec *)av;
+ const struct indexrec *b = (const struct indexrec *)bv;
+ int cmp;
+ if ( (cmp = strcmp(a->term, b->term)) != 0)
+ return cmp;
+ /* Now sort on the index field of the topics. */
+ if (a->topic->index < b->topic->index)
+ return -1;
+ if (a->topic->index > b->topic->index)
+ return +1;
+ return 0;
+}
+
+static int idxindex(const void *av, unsigned char *outbuf)
+{
+ const struct indexrec *a = (const struct indexrec *)av;
+ int len = 1+strlen(a->term);
+ memcpy(outbuf, a->term, len);
+ return len;
+}
+
+static int idxleaf(const void *av, unsigned char *outbuf)
+{
+ const struct indexrec *a = (const struct indexrec *)av;
+ int len = 1+strlen(a->term);
+ memcpy(outbuf, a->term, len);
+ PUT_16BIT_LSB_FIRST(outbuf+len, a->count);
+ PUT_32BIT_LSB_FIRST(outbuf+len+2, a->offset);
+ return len+6;
+}
+
/* ----------------------------------------------------------------------
* Manage help contexts and topics.
*/
@@ -322,6 +367,15 @@ WHLP_TOPIC whlp_register_topic(WHLP h, char *context_name, char **clash)
context *ctx = mknew(context);
context *otherctx;
+ /*
+ * Index contexts in order of creation, just so there's some
+ * sort of non-arbitrary ordering in the index B-tree. Call me
+ * fussy, but I don't like indexing on pointer values because I
+ * prefer the code to be deterministic when run under different
+ * C libraries.
+ */
+ ctx->index = h->ncontexts++;
+
if (context_name) {
/*
* We have a context name, which means we can put this
@@ -865,6 +919,65 @@ static void whlp_topic_layout(WHLP h)
}
/* ----------------------------------------------------------------------
+ * Manage the index sections (|KWDATA, |KWMAP, |KWBTREE).
+ */
+
+void whlp_index_term(WHLP h, char *index, WHLP_TOPIC topic)
+{
+ struct indexrec *idx = mknew(struct indexrec);
+
+ idx->term = dupstr(index);
+ idx->topic = topic;
+ /*
+ * If this reference is already in the tree, just silently drop
+ * the duplicate.
+ */
+ if (add234(h->index, idx) != idx) {
+ sfree(idx->term);
+ sfree(idx);
+ }
+}
+
+static void whlp_build_kwdata(WHLP h)
+{
+ struct file *f;
+ int i;
+ struct indexrec *first, *next;
+
+ f = whlp_new_file(h, "|KWDATA");
+
+ /*
+ * Go through the index B-tree, condensing all sequences of
+ * records with the same term into a single one with a valid
+ * (count,offset) pair, and building up the KWDATA section.
+ */
+ i = 0;
+ while ( (first = index234(h->index, i)) != NULL) {
+ first->count = 1;
+ first->offset = whlp_file_offset(f);
+ whlp_file_add_long(f, first->topic->link->topicoffset);
+ i++;
+ while ( (next = index234(h->index, i)) != NULL &&
+ !strcmp(first->term, next->term)) {
+ /*
+ * The next index record has the same term. Fold it
+ * into this one and remove from the tree.
+ */
+ whlp_file_add_long(f, next->topic->link->topicoffset);
+ first->count++;
+ delpos234(h->index, i);
+ sfree(next->term);
+ sfree(next);
+ }
+ }
+
+ /*
+ * Now we should have `index' in a form that's ready to
+ * construct |KWBTREE. So we can return.
+ */
+}
+
+/* ----------------------------------------------------------------------
* Standard chunks of data for the |SYSTEM and |FONT sections.
*/
@@ -1013,11 +1126,13 @@ static void whlp_standard_fontsection(struct file *f)
static void whlp_make_btree(struct file *f, int flags, int pagesize,
char *dataformat, tree234 *tree,
+ struct file *map,
bt_index_fn indexfn, bt_leaf_fn leaffn)
{
void **page_elements = NULL;
int npages = 0, pagessize = 0;
int npages_this_level, nentries, nlevels;
+ int total_leaf_entries;
char btdata[MAX_PAGE_SIZE];
int btlen;
int page_start, fixups_offset, unused_bytes;
@@ -1049,12 +1164,21 @@ static void whlp_make_btree(struct file *f, int flags, int pagesize,
whlp_file_add_short(f, 0); /* number of levels; fix later */
whlp_file_add_long(f, count234(tree));/* total B-tree entries */
+ /*
+ * If we have a map section, leave space at the start for its
+ * element count.
+ */
+ if (map) {
+ whlp_file_add_short(map, 0);
+ }
+
/*
* Now create the leaf pages.
*/
index = 0;
npages_this_level = 0;
+ total_leaf_entries = 0;
element = index234(tree, index);
while (element) {
@@ -1109,6 +1233,25 @@ static void whlp_make_btree(struct file *f, int flags, int pagesize,
else
whlp_file_add_short(f, npages);
whlp_file_seek(f, 0, 2);
+
+ /*
+ * If we have a map section, add a map entry.
+ */
+ if (map) {
+ whlp_file_add_long(map, total_leaf_entries);
+ whlp_file_add_short(map, npages_this_level-1);
+ }
+ total_leaf_entries += nentries;
+ }
+
+ /*
+ * If we have a map section, write the total number of map
+ * entries into it.
+ */
+ if (map) {
+ whlp_file_seek(map, 0, 0);
+ whlp_file_add_short(map, npages_this_level);
+ whlp_file_seek(map, 0, 2);
}
/*
@@ -1291,16 +1434,13 @@ WHLP whlp_new(void)
ret->contexts = newtree234(ctxcmp);
ret->titles = newtree234(ttlcmp);
ret->text = newtree234(NULL);
+ ret->index = newtree234(idxcmp);
/*
* Some standard files.
*/
f = whlp_new_file(ret, "|CTXOMAP");
whlp_file_add_short(f, 0); /* dummy section */
- f = whlp_new_file(ret, "|CONTEXT");
- ret->contextfile = f;
- f = whlp_new_file(ret, "|TTLBTREE");
- ret->titlefile = f;
f = whlp_new_file(ret, "|FONT");
whlp_standard_fontsection(f);
f = whlp_new_file(ret, "|SYSTEM");
@@ -1311,6 +1451,7 @@ WHLP whlp_new(void)
* Other variables.
*/
ret->prevtopic = NULL;
+ ret->ncontexts = 0;
return ret;
}
@@ -1319,8 +1460,9 @@ void whlp_close(WHLP h, char *filename)
{
FILE *fp;
int filecount, offset, index, filelen;
- struct file *file, *md;
+ struct file *file, *map, *md;
context *ctx;
+ int has_index;
/*
* Lay out the topic section.
@@ -1333,6 +1475,13 @@ void whlp_close(WHLP h, char *filename)
whlp_do_primary_topic(h);
/*
+ * Set up the index.
+ */
+ has_index = (count234(h->index) != 0);
+ if (has_index)
+ whlp_build_kwdata(h);
+
+ /*
* Set up the `titles' B-tree for the |TTLBTREE section.
*/
for (index = 0; (ctx = index234(h->contexts, index)) != NULL; index++)
@@ -1341,10 +1490,20 @@ void whlp_close(WHLP h, char *filename)
/*
* Construct the various B-trees.
*/
- whlp_make_btree(h->contextfile, 0x0002, 0x0800, "L4",
- h->contexts, ctxindex, ctxleaf);
- whlp_make_btree(h->titlefile, 0x0002, 0x0800, "Lz",
- h->titles, ttlindex, ttlleaf);
+ file = whlp_new_file(h, "|CONTEXT");
+ whlp_make_btree(file, 0x0002, 0x0800, "L4",
+ h->contexts, NULL, ctxindex, ctxleaf);
+
+ file = whlp_new_file(h, "|TTLBTREE");
+ whlp_make_btree(file, 0x0002, 0x0800, "Lz",
+ h->titles, NULL, ttlindex, ttlleaf);
+
+ if (has_index) {
+ file = whlp_new_file(h, "|KWBTREE");
+ map = whlp_new_file(h, "|KWMAP");
+ whlp_make_btree(file, 0x0002, 0x0800, "F24",
+ h->index, map, idxindex, idxleaf);
+ }
/*
* Open the output file.
@@ -1368,7 +1527,8 @@ void whlp_close(WHLP h, char *filename)
/* Now `offset' holds what will be the offset of the master directory. */
md = whlp_new_file(h, NULL); /* master directory file */
- whlp_make_btree(md, 0x0402, 0x0400, "z4", h->files, fileindex, fileleaf);
+ whlp_make_btree(md, 0x0402, 0x0400, "z4",
+ h->files, NULL, fileindex, fileleaf);
filelen = offset + 9 + md->len;
@@ -1722,6 +1882,22 @@ int main(void)
whlp_browse_link(h, t1, t2);
whlp_browse_link(h, t2, t3);
+ /*
+ * Index terms.
+ */
+ whlp_index_term(h, "foobarbaz", t1);
+ whlp_index_term(h, "foobarbaz", t2);
+ whlp_index_term(h, "foobarbaz", t3);
+ whlp_index_term(h, "foobar", t1);
+ whlp_index_term(h, "foobar", t2);
+ whlp_index_term(h, "foobaz", t1);
+ whlp_index_term(h, "foobaz", t3);
+ whlp_index_term(h, "barbaz", t2);
+ whlp_index_term(h, "barbaz", t3);
+ whlp_index_term(h, "foo", t1);
+ whlp_index_term(h, "bar", t2);
+ whlp_index_term(h, "baz", t3);
+
whlp_close(h, "test.hlp");
return 0;
}
diff --git a/winhelp.h b/winhelp.h
index afadcfd..60eccaa 100644
--- a/winhelp.h
+++ b/winhelp.h
@@ -70,6 +70,11 @@ void whlp_browse_link(WHLP h, WHLP_TOPIC before, WHLP_TOPIC after);
void whlp_prepare(WHLP h);
/*
+ * Create a link from an index term to a topic.
+ */
+void whlp_index_term(WHLP h, char *index, WHLP_TOPIC topic);
+
+/*
* Call this if you need the id of a topic and you don't already
* know it (for example, if whlp_prepare has allocated it
* anonymously for you). You might need this, for example, in