summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Tatham <anakin@pobox.com>1999-10-16 15:23:27 +0000
committerSimon Tatham <anakin@pobox.com>1999-10-16 15:23:27 +0000
commit00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4 (patch)
treec96a462edd1b2ebb631f90420ceb51e266114576
parent9972b0f0d1ce6e08ce6f9505980c9c889ae994bc (diff)
downloadhalibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.zip
halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.gz
halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.bz2
halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.xz
Further development: index work, phase I
[originally from svn r237]
-rw-r--r--buttress.h5
-rw-r--r--contents.c1
-rw-r--r--index.c182
-rw-r--r--input.c65
-rw-r--r--keywords.c17
-rw-r--r--main.c10
-rw-r--r--misc.c2
7 files changed, 262 insertions, 20 deletions
diff --git a/buttress.h b/buttress.h
index ef19d4c..786c6e2 100644
--- a/buttress.h
+++ b/buttress.h
@@ -214,6 +214,9 @@ struct keywordlist_Tag {
int nkeywords;
int size;
keyword **keys;
+ word **looseends; /* non-keyword list element numbers */
+ int nlooseends;
+ int looseendssize;
};
struct keyword_Tag {
wchar_t *key; /* the keyword itself */
@@ -229,6 +232,8 @@ void subst_keywords(paragraph *, keywordlist *);
/*
* index.c
*/
+/* index_merge takes responsibility for freeing arg 3 but not arg 2 */
+void index_merge(int is_explicit, wchar_t *, word *);
/*
* contents.c
diff --git a/contents.c b/contents.c
index b387906..790c58f 100644
--- a/contents.c
+++ b/contents.c
@@ -29,6 +29,7 @@ numberstate *number_init(void) {
}
void number_free(numberstate *state) {
+ sfree(state->sectionlevels);
sfree(state);
}
diff --git a/index.c b/index.c
index 90aca82..7c73565 100644
--- a/index.c
+++ b/index.c
@@ -6,3 +6,185 @@
#include <stdlib.h>
#include "buttress.h"
+#if 0
+typedef struct tag_node23 node23;
+typedef struct tag_tree23 tree23;
+typedef struct tag_elem23 elem23;
+
+int cmp23(elem23 *, elem23 *);
+
+struct tag_elem23 {
+ char *string;
+};
+
+struct tag_tree23 {
+ node23 *root;
+};
+
+struct tag_node23 {
+ node23 *parent;
+ node23 *kids[3];
+ elem23 *elems[2];
+}
+
+int cmp23(elem23 *a, elem23 *b) {
+ return strcmp(a->string, b->string);
+}
+
+void add23(tree23 *t, elem23 *e) {
+ node23 *n, *left, *right;
+ int c;
+
+ if (t->root == NULL) {
+ t->root = mknew(node23);
+ t->root->elems[1] = NULL;
+ t->root->kids[0] = t->root->kids[1] = t->root->kids[2] = NULL;
+ t->root->parent = NULL;
+ t->root->elems[0] = e;
+ return;
+ }
+
+ np = &t->root;
+ while (*np) {
+ n = *np;
+ c = cmp32(e, n->elems[0]);
+ if (c == 0) {
+ printf("present already\n");
+ return;
+ } else if (c < 0) {
+ np = &n->kids[0];
+ } else {
+ if (n->elems[1] == NULL || (c = cmp32(e, n->elems[1])) < 0)
+ np = &n->kids[1];
+ else if (c > 0)
+ np = &n->kids[2];
+ else {
+ printf("present already\n");
+ return;
+ }
+ }
+ }
+
+ /*
+ * We need to insert the new element in n at position np.
+ */
+ left = NULL;
+ right = NULL;
+ while (n) {
+ if (n->elems[1] == NULL) {
+ /*
+ * Insert in a 2-node; simple.
+ */
+ if (np == &n->kids[0]) {
+ n->kids[2] = n->kids[1];
+ n->elems[1] = n->elems[0];
+ n->kids[1] = right;
+ n->elems[0] = e;
+ n->kids[0] = left;
+ } else { /* np == &n->kids[1] */
+ n->kids[2] = right;
+ n->elems[1] = e;
+ n->kids[1] = left;
+ }
+ break;
+ } else {
+ node23 *m = mknew(node23);
+ /*
+ * Insert in a 3-node; split into two 2-nodes and move
+ * focus up a level.
+ */
+ if (np == &n->kids[0]) {
+ m->kids[0] = left;
+ m->elems[0] = e;
+ m->kids[1] = right;
+ e = n->elems[0];
+ n->kids[0] = n->kids[1];
+ n->elems[1] = n->elems[0];
+ n->kids[1] = n->kids[2];
+ } else if (np == &n->kids[1]) {
+ m->kids[0] = n->kids[0];
+ m->elems[0] = n->elems[0];
+ m->kids[1] = left;
+ /* e = e; */
+ n->kids[0] = right;
+ n->elems[0] = n->elems[1];
+ n->kids[1] = n->kids[2];
+ } else { /* np == &n->kids[2] */
+ m->kids[0] = n->kids[0];
+ m->elems[0] = n->elems[0];
+ m->kids[1] = n->kids[1];
+ e = n->elems[1];
+ n->kids[0] = left;
+ n->elems[0] = e;
+ n->kids[1] = right;
+ }
+ m->kids[2] = n->kids[2] = NULL;
+ m->elems[1] = n->elems[1] = NULL;
+ left = m;
+ right = n;
+ }
+ n = n->parent;
+ }
+
+ /*
+ * If we've come out of here by `break', n will still be
+ * non-NULL and we've finished. If we've come here because n is
+ * NULL, we need to create a new root for the tree because the
+ * old one has just split into two.
+ */
+ if (!n) {
+ t->root = mknew(node23);
+ t->root->kids[0] = left;
+ t->root->elems[0] = e;
+ t->root->kids[1] = right;
+ t->root->elems[1] = NULL;
+ t->root->kids[2] = NULL;
+ }
+}
+
+void find23(tree23 *t, elem23 *e) {
+}
+#endif
+
+static void dbg_prtwordlist(int level, word *w);
+
+void index_merge(int is_explicit, wchar_t *tags, word *text) {
+ wchar_t *wp;
+ printf("\\IM: %splicit: ", is_explicit ? "ex" : "im");
+ if (tags) {
+ wp = tags;
+ while (*wp) {
+ putchar('\"');
+ for (; *wp; wp++)
+ putchar(*wp);
+ putchar('\"');
+ if (*++wp)
+ printf(", ");
+ }
+ } else
+ printf("(no keyword)");
+ printf(" {\n");
+ dbg_prtwordlist(1, text);
+ printf("}\n");
+ free_word_list(text);
+}
+
+static void dbg_prtwordlist(int level, word *w) {
+ for (; w; w = w->next) {
+ wchar_t *wp;
+ printf("%*sword %d ", level*4, "", w->type);
+ if (w->text) {
+ printf("\"");
+ for (wp = w->text; *wp; wp++)
+ putchar(*wp);
+ printf("\"");
+ } else
+ printf("(no text)");
+ if (w->alt) {
+ printf(" alt = {\n");
+ dbg_prtwordlist(level+1, w->alt);
+ printf("%*s}", level*4, "");
+ }
+ printf("\n");
+ }
+}
diff --git a/input.c b/input.c
index bb193a3..3749ffa 100644
--- a/input.c
+++ b/input.c
@@ -410,7 +410,10 @@ token get_codepar_token(input *in) {
* Adds a new word to a linked list
*/
static word *addword(word newword, word ***hptrptr) {
- word *mnewword = mknew(word);
+ word *mnewword;
+ if (!hptrptr)
+ return NULL;
+ mnewword = mknew(word);
*mnewword = newword; /* structure copy */
mnewword->next = NULL;
**hptrptr = mnewword;
@@ -442,7 +445,8 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
static void read_file(paragraph ***ret, input *in) {
token t;
paragraph par;
- word wd, **whptr;
+ word wd, **whptr, **idximplicit;
+ wchar_t utext[2], *wdtext;
int style;
int already;
int iswhite, seenwhite;
@@ -456,9 +460,11 @@ static void read_file(paragraph ***ret, input *in) {
stack_hyper = 8, /* \W */
} type;
word **whptr; /* to restore from \u alternatives */
+ word **idximplicit; /* to restore from \u alternatives */
} *sitem;
stack parsestk;
- word *indexword, *uword;
+ word *indexword, *uword, *iword;
+ word *idxwordlist;
rdstring indexstr;
int index_downcase, index_visible, indexing;
const rdstring nullrs = { 0, 0, NULL };
@@ -658,6 +664,8 @@ static void read_file(paragraph ***ret, input *in) {
rdadd(&indexstr, ' ');
if (!indexing || index_visible)
addword(wd, &whptr);
+ if (indexing)
+ addword(wd, &idximplicit);
iswhite = TRUE;
break;
case tok_word:
@@ -670,6 +678,10 @@ static void read_file(paragraph ***ret, input *in) {
wd.fpos = t.pos;
addword(wd, &whptr);
}
+ if (indexing) {
+ wd.text = ustrdup(t.text);
+ addword(wd, &idximplicit);
+ }
break;
case tok_lbrace:
error(err_unexbrace, &t.pos);
@@ -683,16 +695,20 @@ static void read_file(paragraph ***ret, input *in) {
if (!sitem)
error(err_unexbrace, &t.pos);
else {
- if (sitem->type & stack_ualt)
+ if (sitem->type & stack_ualt) {
whptr = sitem->whptr;
+ idximplicit = sitem->idximplicit;
+ }
if (sitem->type & stack_style)
style = word_Normal;
if (sitem->type & stack_idx) {
indexword->text = ustrdup(indexstr.text);
- sfree(indexstr.text);
if (index_downcase)
ustrlow(indexword->text);
indexing = FALSE;
+ rdadd(&indexstr, L'\0');
+ index_merge(FALSE, indexstr.text, idxwordlist);
+ sfree(indexstr.text);
}
if (sitem->type & stack_hyper) {
wd.text = NULL;
@@ -701,6 +717,8 @@ static void read_file(paragraph ***ret, input *in) {
wd.fpos = t.pos;
if (!indexing || index_visible)
addword(wd, &whptr);
+ if (indexing)
+ addword(wd, &idximplicit);
}
}
sfree(sitem);
@@ -765,7 +783,7 @@ static void read_file(paragraph ***ret, input *in) {
time_t thetime = time(NULL);
struct tm *broken = localtime(&thetime);
already = TRUE;
- wd.text = ustrftime(NULL, broken);
+ wdtext = ustrftime(NULL, broken);
wd.type = style;
} else
error(err_explbr, &t.pos);
@@ -781,10 +799,10 @@ static void read_file(paragraph ***ret, input *in) {
if (wd.type == word_Normal) {
time_t thetime = time(NULL);
struct tm *broken = localtime(&thetime);
- wd.text = ustrftime(rs.text, broken);
+ wdtext = ustrftime(rs.text, broken);
wd.type = style;
} else {
- wd.text = ustrdup(rs.text);
+ wdtext = ustrdup(rs.text);
}
sfree(rs.text);
if (t.type != tok_rbrace) {
@@ -792,10 +810,15 @@ static void read_file(paragraph ***ret, input *in) {
}
}
wd.alt = NULL;
- if (!indexing || index_visible)
+ if (!indexing || index_visible) {
+ wd.text = ustrdup(wdtext);
addword(wd, &whptr);
- else
- sfree(wd.text);
+ }
+ if (indexing) {
+ wd.text = ustrdup(wdtext);
+ addword(wd, &idximplicit);
+ }
+ sfree(wdtext);
if (wd.type == word_HyperLink) {
/*
* Hyperlinks are different: they then
@@ -897,22 +920,28 @@ static void read_file(paragraph ***ret, input *in) {
index_visible = (type != c_I);
index_downcase = (type == c_ii);
indexing = TRUE;
+ idxwordlist = NULL;
+ idximplicit = &idxwordlist;
/* Stack item to close the indexing on exit */
stk_push(parsestk, sitem);
}
break;
case c_u:
uchr = t.aux;
+ utext[0] = uchr; utext[1] = 0;
if (!indexing || index_visible) {
- wchar_t text[2];
- text[1] = 0;
- text[0] = uchr;
- wd.text = ustrdup(text);
+ wd.text = ustrdup(utext);
wd.type = style;
wd.alt = NULL;
wd.fpos = t.pos;
uword = addword(wd, &whptr);
- }
+ } else
+ uword = NULL;
+ if (indexing) {
+ wd.text = ustrdup(utext);
+ iword = addword(wd, &idximplicit);
+ } else
+ iword = NULL;
dtor(t), t = get_token(in);
if (t.type == tok_lbrace) {
/*
@@ -924,8 +953,10 @@ static void read_file(paragraph ***ret, input *in) {
sitem = mknew(struct stack_item);
sitem->type = stack_ualt;
sitem->whptr = whptr;
+ sitem->idximplicit = idximplicit;
stk_push(parsestk, sitem);
- whptr = &uword->alt;
+ whptr = uword ? &uword->alt : NULL;
+ idximplicit = iword ? &iword->alt : NULL;
} else {
if (indexing)
rdadd(&indexstr, uchr);
diff --git a/keywords.c b/keywords.c
index dad787d..c2d9ed6 100644
--- a/keywords.c
+++ b/keywords.c
@@ -93,6 +93,8 @@ keywordlist *get_keywords(paragraph *source) {
kl->nkeywords = 0;
kl->size = 0;
kl->keys = NULL;
+ kl->nlooseends = kl->looseendssize = 0;
+ kl->looseends = NULL;
for (; source; source = source->next) {
/*
* Number the chapter / section / list-item / whatever.
@@ -113,6 +115,12 @@ keywordlist *get_keywords(paragraph *source) {
p += ustrlen(p) + 1;
}
}
+ } else {
+ if (kl->nlooseends >= kl->looseendssize) {
+ kl->looseendssize += 32;
+ kl->looseends = resize(kl->looseends, kl->looseendssize);
+ }
+ kl->looseends[kl->nlooseends++] = source->kwtext;
}
}
@@ -125,8 +133,15 @@ keywordlist *get_keywords(paragraph *source) {
void free_keywords(keywordlist *kl) {
int i;
- for (i = 0; i < kl->nkeywords; i++)
+ while (kl->nlooseends)
+ free_word_list(kl->looseends[--kl->nlooseends]);
+ sfree(kl->looseends);
+ for (i = 0; i < kl->nkeywords; i++) {
+ if (kl->keys[i])
+ free_word_list(kl->keys[i]->text);
sfree(kl->keys[i]);
+ }
+ sfree(kl->keys);
sfree(kl);
}
diff --git a/main.c b/main.c
index edcd430..0320526 100644
--- a/main.c
+++ b/main.c
@@ -165,7 +165,7 @@ int main(int argc, char **argv) {
{
input in;
- paragraph *sourceform;
+ paragraph *sourceform, *p;
keywordlist *keywords;
in.filenames = infiles;
@@ -185,10 +185,18 @@ int main(int argc, char **argv) {
gen_citations(sourceform, keywords);
subst_keywords(sourceform, keywords);
+ for (p = sourceform; p; p = p->next) {
+ if (p->type == para_IM) {
+ index_merge(TRUE, p->keyword, p->words);
+ p->words = NULL; /* this has now been freed */
+ }
+ }
+
dbg_prtkws(keywords);
dbg_prtsource(sourceform);
free_para_list(sourceform);
+ free_keywords(keywords);
}
return 0;
diff --git a/misc.c b/misc.c
index 96b0e53..c9ea115 100644
--- a/misc.c
+++ b/misc.c
@@ -13,7 +13,7 @@ struct stackTag {
stack stk_new(void) {
stack s;
- s = mknew(stack);
+ s = mknew(struct stackTag);
s->sp = 0;
s->size = 0;
s->data = NULL;