Further development: index work, phase I

[originally from svn r237]
author: Simon Tatham <anakin@pobox.com> 1999-10-16 15:23:27 +0000
committer: Simon Tatham <anakin@pobox.com> 1999-10-16 15:23:27 +0000
commit: 00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4 (patch)
tree: c96a462edd1b2ebb631f90420ceb51e266114576
parent: 9972b0f0d1ce6e08ce6f9505980c9c889ae994bc (diff)
download: halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.zip
halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.gz
halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.bz2
halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.xz
7 files changed, 262 insertions, 20 deletions
diff --git a/buttress.h b/buttress.h
index ef19d4c..786c6e2 100644
--- a/buttress.h
+++ b/buttress.h
@@ -214,6 +214,9 @@ struct keywordlist_Tag {
     int nkeywords;
     int size;
     keyword **keys;
+    word **looseends;		       /* non-keyword list element numbers */
+    int nlooseends;
+    int looseendssize;
 };
 struct keyword_Tag {
     wchar_t *key;		       /* the keyword itself */
@@ -229,6 +232,8 @@ void subst_keywords(paragraph *, keywordlist *);
 /*
  * index.c
  */
+/* index_merge takes responsibility for freeing arg 3 but not arg 2 */
+void index_merge(int is_explicit, wchar_t *, word *);
 
 /*
  * contents.c
diff --git a/contents.c b/contents.c
index b387906..790c58f 100644
--- a/contents.c
+++ b/contents.c
@@ -29,6 +29,7 @@ numberstate *number_init(void) {
 }
 
 void number_free(numberstate *state) {
+    sfree(state->sectionlevels);
     sfree(state);
 }
 
diff --git a/index.c b/index.c
index 90aca82..7c73565 100644
--- a/index.c
+++ b/index.c
@@ -6,3 +6,185 @@
 #include <stdlib.h>
 #include "buttress.h"
 
+#if 0
+typedef struct tag_node23 node23;
+typedef struct tag_tree23 tree23;
+typedef struct tag_elem23 elem23;
+
+int cmp23(elem23 *, elem23 *);
+
+struct tag_elem23 {
+    char *string;
+};
+
+struct tag_tree23 {
+    node23 *root;
+};
+
+struct tag_node23 {
+    node23 *parent;
+    node23 *kids[3];
+    elem23 *elems[2];
+}
+
+int cmp23(elem23 *a, elem23 *b) {
+    return strcmp(a->string, b->string);
+}
+
+void add23(tree23 *t, elem23 *e) {
+    node23 *n, *left, *right;
+    int c;
+
+    if (t->root == NULL) {
+	t->root = mknew(node23);
+	t->root->elems[1] = NULL;
+	t->root->kids[0] = t->root->kids[1] = t->root->kids[2] = NULL;
+	t->root->parent = NULL;
+	t->root->elems[0] = e;
+	return;
+    }
+
+    np = &t->root;
+    while (*np) {
+	n = *np;
+	c = cmp32(e, n->elems[0]);
+	if (c == 0) {
+	    printf("present already\n");
+	    return;
+	} else if (c < 0) {
+	    np = &n->kids[0];
+	} else {
+	    if (n->elems[1] == NULL || (c = cmp32(e, n->elems[1])) < 0)
+		np = &n->kids[1];
+	    else if (c > 0)
+		np = &n->kids[2];
+	    else {
+		printf("present already\n");
+		return;
+	    }
+	}
+    }
+
+    /*
+     * We need to insert the new element in n at position np.
+     */
+    left = NULL;
+    right = NULL;
+    while (n) {
+	if (n->elems[1] == NULL) {
+	    /*
+	     * Insert in a 2-node; simple.
+	     */
+	    if (np == &n->kids[0]) {
+		n->kids[2] = n->kids[1];
+		n->elems[1] = n->elems[0];
+		n->kids[1] = right;
+		n->elems[0] = e;
+		n->kids[0] = left;
+	    } else { /* np == &n->kids[1] */
+		n->kids[2] = right;
+		n->elems[1] = e;
+		n->kids[1] = left;
+	    }
+	    break;
+	} else {
+	    node23 *m = mknew(node23);
+	    /*
+	     * Insert in a 3-node; split into two 2-nodes and move
+	     * focus up a level.
+	     */
+	    if (np == &n->kids[0]) {
+		m->kids[0] = left;
+		m->elems[0] = e;
+		m->kids[1] = right;
+		e = n->elems[0];
+		n->kids[0] = n->kids[1];
+		n->elems[1] = n->elems[0];
+		n->kids[1] = n->kids[2];
+	    } else if (np == &n->kids[1]) {
+		m->kids[0] = n->kids[0];
+		m->elems[0] = n->elems[0];
+		m->kids[1] = left;
+		/* e = e; */
+		n->kids[0] = right;
+		n->elems[0] = n->elems[1];
+		n->kids[1] = n->kids[2];
+	    } else { /* np == &n->kids[2] */
+		m->kids[0] = n->kids[0];
+		m->elems[0] = n->elems[0];
+		m->kids[1] = n->kids[1];
+		e = n->elems[1];
+		n->kids[0] = left;
+		n->elems[0] = e;
+		n->kids[1] = right;
+	    }
+	    m->kids[2] = n->kids[2] = NULL;
+	    m->elems[1] = n->elems[1] = NULL;
+	    left = m;
+	    right = n;
+	}
+	n = n->parent;
+    }
+
+    /*
+     * If we've come out of here by `break', n will still be
+     * non-NULL and we've finished. If we've come here because n is
+     * NULL, we need to create a new root for the tree because the
+     * old one has just split into two.
+     */
+    if (!n) {
+	t->root = mknew(node23);
+	t->root->kids[0] = left;
+	t->root->elems[0] = e;
+	t->root->kids[1] = right;
+	t->root->elems[1] = NULL;
+	t->root->kids[2] = NULL;
+    }
+}
+
+void find23(tree23 *t, elem23 *e) {
+}
+#endif
+
+static void dbg_prtwordlist(int level, word *w);
+
+void index_merge(int is_explicit, wchar_t *tags, word *text) {
+    wchar_t *wp;
+    printf("\\IM: %splicit: ", is_explicit ? "ex" : "im");
+    if (tags) {
+	wp = tags;
+	while (*wp) {
+	    putchar('\"');
+	    for (; *wp; wp++)
+		putchar(*wp);
+	    putchar('\"');
+	    if (*++wp)
+		printf(", ");
+	}
+    } else
+	printf("(no keyword)");
+    printf(" {\n");
+    dbg_prtwordlist(1, text);
+    printf("}\n");
+    free_word_list(text);
+}
+
+static void dbg_prtwordlist(int level, word *w) {
+    for (; w; w = w->next) {
+	wchar_t *wp;
+	printf("%*sword %d ", level*4, "", w->type);
+	if (w->text) {
+	    printf("\"");
+	    for (wp = w->text; *wp; wp++)
+		    putchar(*wp);
+	    printf("\"");
+	} else
+	    printf("(no text)");
+	if (w->alt) {
+	    printf(" alt = {\n");
+	    dbg_prtwordlist(level+1, w->alt);
+	    printf("%*s}", level*4, "");
+	}
+	printf("\n");
+    }
+}
diff --git a/input.c b/input.c
index bb193a3..3749ffa 100644
--- a/input.c
+++ b/input.c
@@ -410,7 +410,10 @@ token get_codepar_token(input *in) {
  * Adds a new word to a linked list
  */
 static word *addword(word newword, word ***hptrptr) {
-    word *mnewword = mknew(word);
+    word *mnewword;
+    if (!hptrptr)
+	return NULL;
+    mnewword = mknew(word);
     *mnewword = newword;	       /* structure copy */
     mnewword->next = NULL;
     **hptrptr = mnewword;
@@ -442,7 +445,8 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
 static void read_file(paragraph ***ret, input *in) {
     token t;
     paragraph par;
-    word wd, **whptr;
+    word wd, **whptr, **idximplicit;
+    wchar_t utext[2], *wdtext;
     int style;
     int already;
     int iswhite, seenwhite;
@@ -456,9 +460,11 @@ static void read_file(paragraph ***ret, input *in) {
 	    stack_hyper = 8,	       /* \W */
 	} type;
 	word **whptr;		       /* to restore from \u alternatives */
+	word **idximplicit;	       /* to restore from \u alternatives */
     } *sitem;
     stack parsestk;
-    word *indexword, *uword;
+    word *indexword, *uword, *iword;
+    word *idxwordlist;
     rdstring indexstr;
     int index_downcase, index_visible, indexing;
     const rdstring nullrs = { 0, 0, NULL };
@@ -658,6 +664,8 @@ static void read_file(paragraph ***ret, input *in) {
 		    rdadd(&indexstr, ' ');
 		if (!indexing || index_visible)
 		    addword(wd, &whptr);
+		if (indexing)
+		    addword(wd, &idximplicit);
 		iswhite = TRUE;
 		break;
 	      case tok_word:
@@ -670,6 +678,10 @@ static void read_file(paragraph ***ret, input *in) {
 		    wd.fpos = t.pos;
 		    addword(wd, &whptr);
 		}
+		if (indexing) {
+		    wd.text = ustrdup(t.text);
+		    addword(wd, &idximplicit);
+		}
 		break;
 	      case tok_lbrace:
 		error(err_unexbrace, &t.pos);
@@ -683,16 +695,20 @@ static void read_file(paragraph ***ret, input *in) {
 		if (!sitem)
 		    error(err_unexbrace, &t.pos);
 		else {
-		    if (sitem->type & stack_ualt)
+		    if (sitem->type & stack_ualt) {
 			whptr = sitem->whptr;
+			idximplicit = sitem->idximplicit;
+		    }
 		    if (sitem->type & stack_style)
 			style = word_Normal;
 		    if (sitem->type & stack_idx) {
 			indexword->text = ustrdup(indexstr.text);
-			sfree(indexstr.text);
 			if (index_downcase)
 			    ustrlow(indexword->text);
 			indexing = FALSE;
+			rdadd(&indexstr, L'\0');
+			index_merge(FALSE, indexstr.text, idxwordlist);
+			sfree(indexstr.text);
 		    }
 		    if (sitem->type & stack_hyper) {
 			wd.text = NULL;
@@ -701,6 +717,8 @@ static void read_file(paragraph ***ret, input *in) {
 			wd.fpos = t.pos;
 			if (!indexing || index_visible)
 			    addword(wd, &whptr);
+			if (indexing)
+			    addword(wd, &idximplicit);
 		    }
 		}
 		sfree(sitem);
@@ -765,7 +783,7 @@ static void read_file(paragraph ***ret, input *in) {
 			    time_t thetime = time(NULL);
 			    struct tm *broken = localtime(&thetime);
 			    already = TRUE;
-			    wd.text = ustrftime(NULL, broken);
+			    wdtext = ustrftime(NULL, broken);
 			    wd.type = style;
 			} else
 			    error(err_explbr, &t.pos);
@@ -781,10 +799,10 @@ static void read_file(paragraph ***ret, input *in) {
 			if (wd.type == word_Normal) {
 			    time_t thetime = time(NULL);
 			    struct tm *broken = localtime(&thetime);
-			    wd.text = ustrftime(rs.text, broken);
+			    wdtext = ustrftime(rs.text, broken);
 			    wd.type = style;
 			} else {
-			    wd.text = ustrdup(rs.text);
+			    wdtext = ustrdup(rs.text);
 			}
 			sfree(rs.text);
 			if (t.type != tok_rbrace) {
@@ -792,10 +810,15 @@ static void read_file(paragraph ***ret, input *in) {
 			}
 		    }
 		    wd.alt = NULL;
-		    if (!indexing || index_visible)
+		    if (!indexing || index_visible) {
+			wd.text = ustrdup(wdtext);
 			addword(wd, &whptr);
-		    else
-			sfree(wd.text);
+		    }
+		    if (indexing) {
+			wd.text = ustrdup(wdtext);
+			addword(wd, &idximplicit);
+		    }
+		    sfree(wdtext);
 		    if (wd.type == word_HyperLink) {
 			/*
 			 * Hyperlinks are different: they then
@@ -897,22 +920,28 @@ static void read_file(paragraph ***ret, input *in) {
 			index_visible = (type != c_I);
 			index_downcase = (type == c_ii);
 			indexing = TRUE;
+			idxwordlist = NULL;
+			idximplicit = &idxwordlist;
 			/* Stack item to close the indexing on exit */
 			stk_push(parsestk, sitem);
 		    }
 		    break;
 		  case c_u:
 		    uchr = t.aux;
+		    utext[0] = uchr; utext[1] = 0;
 		    if (!indexing || index_visible) {
-			wchar_t text[2];
-			text[1] = 0;
-			text[0] = uchr;
-			wd.text = ustrdup(text);
+			wd.text = ustrdup(utext);
 			wd.type = style;
 			wd.alt = NULL;
 			wd.fpos = t.pos;
 			uword = addword(wd, &whptr);
-		    }
+		    } else
+			uword = NULL;
+		    if (indexing) {
+			wd.text = ustrdup(utext);
+			iword = addword(wd, &idximplicit);
+		    } else
+			iword = NULL;
 		    dtor(t), t = get_token(in);
 		    if (t.type == tok_lbrace) {
 			/*
@@ -924,8 +953,10 @@ static void read_file(paragraph ***ret, input *in) {
 			sitem = mknew(struct stack_item);
 			sitem->type = stack_ualt;
 			sitem->whptr = whptr;
+			sitem->idximplicit = idximplicit;
 			stk_push(parsestk, sitem);
-			whptr = &uword->alt;
+			whptr = uword ? &uword->alt : NULL;
+			idximplicit = iword ? &iword->alt : NULL;
 		    } else {
 			if (indexing)
 			    rdadd(&indexstr, uchr);
diff --git a/keywords.c b/keywords.c
index dad787d..c2d9ed6 100644
--- a/keywords.c
+++ b/keywords.c
@@ -93,6 +93,8 @@ keywordlist *get_keywords(paragraph *source) {
     kl->nkeywords = 0;
     kl->size = 0;
     kl->keys = NULL;
+    kl->nlooseends = kl->looseendssize = 0;
+    kl->looseends = NULL;
     for (; source; source = source->next) {
 	/*
 	 * Number the chapter / section / list-item / whatever.
@@ -113,6 +115,12 @@ keywordlist *get_keywords(paragraph *source) {
 		    p += ustrlen(p) + 1;
 		}
 	    }
+	} else {
+	    if (kl->nlooseends >= kl->looseendssize) {
+		kl->looseendssize += 32;
+		kl->looseends = resize(kl->looseends, kl->looseendssize);
+	    }
+	    kl->looseends[kl->nlooseends++] = source->kwtext;
 	}
     }
 
@@ -125,8 +133,15 @@ keywordlist *get_keywords(paragraph *source) {
 
 void free_keywords(keywordlist *kl) {
     int i;
-    for (i = 0; i < kl->nkeywords; i++)
+    while (kl->nlooseends)
+	free_word_list(kl->looseends[--kl->nlooseends]);
+    sfree(kl->looseends);
+    for (i = 0; i < kl->nkeywords; i++) {
+	if (kl->keys[i])
+	    free_word_list(kl->keys[i]->text);
 	sfree(kl->keys[i]);
+    }
+    sfree(kl->keys);
     sfree(kl);
 }
 
diff --git a/main.c b/main.c
index edcd430..0320526 100644
--- a/main.c
+++ b/main.c
@@ -165,7 +165,7 @@ int main(int argc, char **argv) {
 
     {
 	input in;
-	paragraph *sourceform;
+	paragraph *sourceform, *p;
 	keywordlist *keywords;
 
 	in.filenames = infiles;
@@ -185,10 +185,18 @@ int main(int argc, char **argv) {
 	gen_citations(sourceform, keywords);
 	subst_keywords(sourceform, keywords);
 
+	for (p = sourceform; p; p = p->next) {
+	    if (p->type == para_IM) {
+		index_merge(TRUE, p->keyword, p->words);
+		p->words = NULL;       /* this has now been freed */
+	    }
+	}
+
 	dbg_prtkws(keywords);
 	dbg_prtsource(sourceform);
 
 	free_para_list(sourceform);
+	free_keywords(keywords);
     }
 
     return 0;
diff --git a/misc.c b/misc.c
index 96b0e53..c9ea115 100644
--- a/misc.c
+++ b/misc.c
@@ -13,7 +13,7 @@ struct stackTag {
 stack stk_new(void) {
     stack s;
 
-    s = mknew(stack);
+    s = mknew(struct stackTag);
     s->sp = 0;
     s->size = 0;
     s->data = NULL;
author	Simon Tatham <anakin@pobox.com>	1999-10-16 15:23:27 +0000
committer	Simon Tatham <anakin@pobox.com>	1999-10-16 15:23:27 +0000
commit	00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4 (patch)
tree	c96a462edd1b2ebb631f90420ceb51e266114576
parent	9972b0f0d1ce6e08ce6f9505980c9c889ae994bc (diff)
download	halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.zip halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.gz halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.bz2 halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.xz