diff options
Diffstat (limited to 'index.c')
| -rw-r--r-- | index.c | 292 |
1 files changed, 163 insertions, 129 deletions
@@ -6,167 +6,200 @@ #include <stdlib.h> #include "buttress.h" -#if 0 -typedef struct tag_node23 node23; -typedef struct tag_tree23 tree23; -typedef struct tag_elem23 elem23; +typedef struct indextag_Tag indextag; +typedef struct indexentry_Tag indexentry; -int cmp23(elem23 *, elem23 *); +struct index_Tag { + tree23 *tags; /* holds type `indextag' */ + tree23 *entries; /* holds type `indexentry' */ +}; -struct tag_elem23 { - char *string; +struct indextag_Tag { + wchar_t *name; + word *implicit_text; + word **explicit_texts; + int nexplicit, explicit_size; + int nrefs; + indexentry **refs; /* array of entries referenced by tag */ }; -struct tag_tree23 { - node23 *root; +struct indexentry_Tag { + word *text; + void *backend_data; /* private to back end */ }; -struct tag_node23 { - node23 *parent; - node23 *kids[3]; - elem23 *elems[2]; +index *make_index(void) { + index *ret = mknew(index); + ret->tags = newtree23(); + ret->entries = newtree23(); + return ret; } -int cmp23(elem23 *a, elem23 *b) { - return strcmp(a->string, b->string); +static indextag *make_indextag(void) { + indextag *ret = mknew(indextag); + ret->name = NULL; + ret->implicit_text = NULL; + ret->explicit_texts = NULL; + ret->nexplicit = ret->explicit_size = ret->nrefs = 0; + ret->refs = NULL; + return ret; } -void add23(tree23 *t, elem23 *e) { - node23 *n, *left, *right; - int c; - - if (t->root == NULL) { - t->root = mknew(node23); - t->root->elems[1] = NULL; - t->root->kids[0] = t->root->kids[1] = t->root->kids[2] = NULL; - t->root->parent = NULL; - t->root->elems[0] = e; - return; - } +static int compare_tags(void *av, void *bv) { + indextag *a = (indextag *)av, *b = (indextag *)bv; + return ustricmp(a->name, b->name); +} - np = &t->root; - while (*np) { - n = *np; - c = cmp32(e, n->elems[0]); - if (c == 0) { - printf("present already\n"); - return; - } else if (c < 0) { - np = &n->kids[0]; - } else { - if (n->elems[1] == NULL || (c = cmp32(e, n->elems[1])) < 0) - np = &n->kids[1]; - else if (c > 0) - np = &n->kids[2]; - else { - printf("present already\n"); - return; - } - } - } +static int compare_entries(void *av, void *bv) { + indexentry *a = (indexentry *)av, *b = (indexentry *)bv; + return compare_wordlists(a->text, b->text); +} + +/* + * Add a \IM. `tags' points to a zero-terminated chain of + * zero-terminated strings ("first\0second\0thirdandlast\0\0"). + * `text' points to a word list. + * + * Guarantee on calling sequence: all implicit merges are given + * before the explicit ones. + */ +void index_merge(index *idx, int is_explicit, wchar_t *tags, word *text) { + indextag *t, *existing; /* - * We need to insert the new element in n at position np. + * FIXME: want to warn on overlapping source sets. */ - left = NULL; - right = NULL; - while (n) { - if (n->elems[1] == NULL) { + for (; *tags; tags += 1+ustrlen(tags)) { + t = make_indextag(); + t->name = tags; + existing = add23(idx->tags, t, compare_tags); + if (existing == t) { /* - * Insert in a 2-node; simple. + * Every tag has an implicit \IM. So if this tag + * doesn't exist and we're explicit, then we should + * warn (and drop it, since it won't be referenced). */ - if (np == &n->kids[0]) { - n->kids[2] = n->kids[1]; - n->elems[1] = n->elems[0]; - n->kids[1] = right; - n->elems[0] = e; - n->kids[0] = left; - } else { /* np == &n->kids[1] */ - n->kids[2] = right; - n->elems[1] = e; - n->kids[1] = left; + if (is_explicit) { + error(err_nosuchidxtag, tags); + continue; } - break; - } else { - node23 *m = mknew(node23); /* - * Insert in a 3-node; split into two 2-nodes and move - * focus up a level. + * Otherwise, this is a new tag with an implicit \IM. */ - if (np == &n->kids[0]) { - m->kids[0] = left; - m->elems[0] = e; - m->kids[1] = right; - e = n->elems[0]; - n->kids[0] = n->kids[1]; - n->elems[1] = n->elems[0]; - n->kids[1] = n->kids[2]; - } else if (np == &n->kids[1]) { - m->kids[0] = n->kids[0]; - m->elems[0] = n->elems[0]; - m->kids[1] = left; - /* e = e; */ - n->kids[0] = right; - n->elems[0] = n->elems[1]; - n->kids[1] = n->kids[2]; - } else { /* np == &n->kids[2] */ - m->kids[0] = n->kids[0]; - m->elems[0] = n->elems[0]; - m->kids[1] = n->kids[1]; - e = n->elems[1]; - n->kids[0] = left; - n->elems[0] = e; - n->kids[1] = right; + t->name = ustrdup(tags); + t->implicit_text = text; + } else { + sfree(t); + t = existing; + if (!is_explicit) { + /* + * An implicit \IM for a tag that's had an implicit + * \IM before. FIXME: we should check the text + * against the existing text and warn on + * differences. And check the tag for case match + * against the existing tag, likewise. + */ + } else { + /* + * An explicit \IM added to a valid tag. In + * particular, this removes the implicit \IM if + * present. + */ + if (t->implicit_text) { + free_word_list(t->implicit_text); + t->implicit_text = NULL; + } + if (t->nexplicit >= t->explicit_size) { + t->explicit_size = t->nexplicit + 8; + t->explicit_texts = resize(t->explicit_texts, + t->explicit_size); + } + t->explicit_texts[t->nexplicit++] = text; } - m->kids[2] = n->kids[2] = NULL; - m->elems[1] = n->elems[1] = NULL; - left = m; - right = n; } - n = n->parent; } +} - /* - * If we've come out of here by `break', n will still be - * non-NULL and we've finished. If we've come here because n is - * NULL, we need to create a new root for the tree because the - * old one has just split into two. - */ - if (!n) { - t->root = mknew(node23); - t->root->kids[0] = left; - t->root->elems[0] = e; - t->root->kids[1] = right; - t->root->elems[1] = NULL; - t->root->kids[2] = NULL; +/* + * Build the final-form index. We now have every tag, with every + * \IM, set up in a 2-3 tree indexed by tag. We now want to collate + * the RHSes of the \IMs, and sort by final form, and decorate the + * entries in the original 2-3 tree with pointers to the RHS + * entries. + */ +void build_index(index *i) { + indextag *t, **ta; + enum23 e; + int j; + + for (t = (indextag *)first23(i->tags, &e); t; + t = (indextag *)next23(i->tags, &e)) { + if (t->implicit_text) { + t->nrefs = 1; + ta = &t->implicit_text; + } else { + t->nrefs = t->nexplicit; + ta = t->explicit_texts; + } + t->refs = mknewa(indexentry *, t->nrefs); + for (j = 0; j < t->nrefs; j++) { + indexentry *ent = mknew(indexentry); + ent->text = *ta++; + t->refs[j] = add23(i->entries, ent, compare_entries); + if (t->refs[j] != ent) /* duplicate */ + sfree(ent); + } + } +} + +void cleanup_index(index *i) { + indextag *t; + indexentry *ent; + enum23 e; + int j; + + for (t = (indextag *)first23(i->tags, &e); t; + t = (indextag *)next23(i->tags, &e)) { + sfree(t->name); + free_word_list(t->implicit_text); + sfree(t->explicit_texts); + sfree(t->refs); + sfree(t); } + freetree23(i->tags); + for (ent = (indexentry *)first23(i->entries, &e); ent; + ent = (indexentry *)next23(i->entries, &e)) { + sfree(ent); + } + freetree23(i->entries); + sfree(i); } -void find23(tree23 *t, elem23 *e) { +void index_debug(index *i) { + indextag *t; + enum23 e; + int j; + + for (t = (indextag *)first23(i->tags, &e); t; + t = (indextag *)next23(i->tags, &e)) { + if (t->implicit_text) + dbg_prtmerge(0, t->name, t->implicit_text); + for (j = 0; j < t->nexplicit; j++) + dbg_prtmerge(1, t->name, t->explicit_texts[j]); + } } -#endif +#define DEBUG +#ifdef DEBUG static void dbg_prtwordlist(int level, word *w); -void index_merge(int is_explicit, wchar_t *tags, word *text) { - wchar_t *wp; - printf("\\IM: %splicit: ", is_explicit ? "ex" : "im"); - if (tags) { - wp = tags; - while (*wp) { - putchar('\"'); - for (; *wp; wp++) - putchar(*wp); - putchar('\"'); - if (*++wp) - printf(", "); - } - } else - printf("(no keyword)"); - printf(" {\n"); +static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) { + printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im"); + for (; *tag; tag++) + putchar(*tag); + printf("\" {\n"); dbg_prtwordlist(1, text); printf("}\n"); - free_word_list(text); } static void dbg_prtwordlist(int level, word *w) { @@ -188,3 +221,4 @@ static void dbg_prtwordlist(int level, word *w) { printf("\n"); } } +#endif |