diff options
| author | Simon Tatham <anakin@pobox.com> | 1999-10-18 18:03:37 +0000 |
|---|---|---|
| committer | Simon Tatham <anakin@pobox.com> | 1999-10-18 18:03:37 +0000 |
| commit | e44f985bd4f796d4c4b11eb3555436dbaa2d163b (patch) | |
| tree | 8e037d5b32c5349760277e79ac53993b34035885 | |
| parent | 00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4 (diff) | |
| download | halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.zip halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.gz halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.bz2 halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.xz | |
Further development; mid-end index handling pretty much there!
[originally from svn r238]
Diffstat (limited to '')
| -rw-r--r-- | Makefile | 2 | ||||
| -rw-r--r-- | buttress.h | 32 | ||||
| -rw-r--r-- | error.c | 7 | ||||
| -rw-r--r-- | index.c | 292 | ||||
| -rw-r--r-- | input.c | 8 | ||||
| -rw-r--r-- | keywords.c | 2 | ||||
| -rw-r--r-- | main.c | 18 | ||||
| -rw-r--r-- | malloc.c | 28 | ||||
| -rw-r--r-- | misc.c | 44 | ||||
| -rw-r--r-- | tree23.c | 271 | ||||
| -rw-r--r-- | ustring.c | 25 |
11 files changed, 572 insertions, 157 deletions
@@ -43,7 +43,7 @@ endif SRC := ../ -MODULES := main malloc ustring error help licence version misc +MODULES := main malloc ustring error help licence version misc tree23 MODULES += input keywords contents index style biblio OBJECTS := $(addsuffix .o,$(MODULES)) @@ -27,6 +27,7 @@ typedef struct keywordlist_Tag keywordlist; typedef struct keyword_Tag keyword; typedef struct userstyle_Tag userstyle; typedef struct numberstate_Tag numberstate; +typedef struct index_Tag index; /* * Data structure to hold a file name and index, a line and a @@ -138,7 +139,8 @@ enum { err_nestedstyles, /* unable to nest text styles */ err_nestedindex, /* unable to nest `\i' thingys */ err_nosuchkw, /* unresolved cross-reference */ - err_multiBR /* multiple \BRs on same keyword */ + err_multiBR, /* multiple \BRs on same keyword */ + err_nosuchidxtag /* \IM on unknown index tag (warning) */ }; /* @@ -172,7 +174,9 @@ wchar_t *ustrdup(wchar_t *s); char *ustrtoa(wchar_t *s, char *outbuf, int size); int ustrlen(wchar_t *s); wchar_t *ustrcpy(wchar_t *dest, wchar_t *source); +wchar_t utolower(wchar_t); int ustrcmp(wchar_t *lhs, wchar_t *rhs); +int ustricmp(wchar_t *lhs, wchar_t *rhs); wchar_t *ustrlow(wchar_t *s); wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec); @@ -201,11 +205,12 @@ stack stk_new(void); void stk_free(stack); void stk_push(stack, void *); void *stk_pop(stack); +int compare_wordlists(word *a, word *b); /* * input.c */ -paragraph *read_input(input *in); +paragraph *read_input(input *in, index *idx); /* * keywords.c @@ -232,8 +237,12 @@ void subst_keywords(paragraph *, keywordlist *); /* * index.c */ -/* index_merge takes responsibility for freeing arg 3 but not arg 2 */ -void index_merge(int is_explicit, wchar_t *, word *); +index *make_index(void); +void cleanup_index(index *); +/* index_merge takes responsibility for freeing arg 3 iff implicit; never + * takes responsibility for arg 2 */ +void index_merge(index *, int is_explicit, wchar_t *, word *); +void build_index(index *); /* * contents.c @@ -253,4 +262,19 @@ void gen_citations(paragraph *, keywordlist *); struct userstyle_Tag { }; +/* + * tree23.c + */ +typedef struct tree23_Tag tree23; +typedef struct enum23_Tag { + void *node; + int posn; +} enum23; +tree23 *newtree23(void); +void freetree23(tree23 *); +void *add23(tree23 *, void *, int (*cmp)(void *, void *)); +void *find23(tree23 *, void *, int (*cmp)(void *, void *)); +void *first23(tree23 *, enum23 *); +void *next23(tree23 *, enum23 *); + #endif @@ -135,6 +135,13 @@ static void do_error(int code, va_list ap) { sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp); flags = FILEPOS; break; + case err_nosuchidxtag: + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp); + flags = 0; + /* FIXME: need to get a filepos to here somehow */ + break; } if (flags & PREFIX) @@ -6,167 +6,200 @@ #include <stdlib.h> #include "buttress.h" -#if 0 -typedef struct tag_node23 node23; -typedef struct tag_tree23 tree23; -typedef struct tag_elem23 elem23; +typedef struct indextag_Tag indextag; +typedef struct indexentry_Tag indexentry; -int cmp23(elem23 *, elem23 *); +struct index_Tag { + tree23 *tags; /* holds type `indextag' */ + tree23 *entries; /* holds type `indexentry' */ +}; -struct tag_elem23 { - char *string; +struct indextag_Tag { + wchar_t *name; + word *implicit_text; + word **explicit_texts; + int nexplicit, explicit_size; + int nrefs; + indexentry **refs; /* array of entries referenced by tag */ }; -struct tag_tree23 { - node23 *root; +struct indexentry_Tag { + word *text; + void *backend_data; /* private to back end */ }; -struct tag_node23 { - node23 *parent; - node23 *kids[3]; - elem23 *elems[2]; +index *make_index(void) { + index *ret = mknew(index); + ret->tags = newtree23(); + ret->entries = newtree23(); + return ret; } -int cmp23(elem23 *a, elem23 *b) { - return strcmp(a->string, b->string); +static indextag *make_indextag(void) { + indextag *ret = mknew(indextag); + ret->name = NULL; + ret->implicit_text = NULL; + ret->explicit_texts = NULL; + ret->nexplicit = ret->explicit_size = ret->nrefs = 0; + ret->refs = NULL; + return ret; } -void add23(tree23 *t, elem23 *e) { - node23 *n, *left, *right; - int c; - - if (t->root == NULL) { - t->root = mknew(node23); - t->root->elems[1] = NULL; - t->root->kids[0] = t->root->kids[1] = t->root->kids[2] = NULL; - t->root->parent = NULL; - t->root->elems[0] = e; - return; - } +static int compare_tags(void *av, void *bv) { + indextag *a = (indextag *)av, *b = (indextag *)bv; + return ustricmp(a->name, b->name); +} - np = &t->root; - while (*np) { - n = *np; - c = cmp32(e, n->elems[0]); - if (c == 0) { - printf("present already\n"); - return; - } else if (c < 0) { - np = &n->kids[0]; - } else { - if (n->elems[1] == NULL || (c = cmp32(e, n->elems[1])) < 0) - np = &n->kids[1]; - else if (c > 0) - np = &n->kids[2]; - else { - printf("present already\n"); - return; - } - } - } +static int compare_entries(void *av, void *bv) { + indexentry *a = (indexentry *)av, *b = (indexentry *)bv; + return compare_wordlists(a->text, b->text); +} + +/* + * Add a \IM. `tags' points to a zero-terminated chain of + * zero-terminated strings ("first\0second\0thirdandlast\0\0"). + * `text' points to a word list. + * + * Guarantee on calling sequence: all implicit merges are given + * before the explicit ones. + */ +void index_merge(index *idx, int is_explicit, wchar_t *tags, word *text) { + indextag *t, *existing; /* - * We need to insert the new element in n at position np. + * FIXME: want to warn on overlapping source sets. */ - left = NULL; - right = NULL; - while (n) { - if (n->elems[1] == NULL) { + for (; *tags; tags += 1+ustrlen(tags)) { + t = make_indextag(); + t->name = tags; + existing = add23(idx->tags, t, compare_tags); + if (existing == t) { /* - * Insert in a 2-node; simple. + * Every tag has an implicit \IM. So if this tag + * doesn't exist and we're explicit, then we should + * warn (and drop it, since it won't be referenced). */ - if (np == &n->kids[0]) { - n->kids[2] = n->kids[1]; - n->elems[1] = n->elems[0]; - n->kids[1] = right; - n->elems[0] = e; - n->kids[0] = left; - } else { /* np == &n->kids[1] */ - n->kids[2] = right; - n->elems[1] = e; - n->kids[1] = left; + if (is_explicit) { + error(err_nosuchidxtag, tags); + continue; } - break; - } else { - node23 *m = mknew(node23); /* - * Insert in a 3-node; split into two 2-nodes and move - * focus up a level. + * Otherwise, this is a new tag with an implicit \IM. */ - if (np == &n->kids[0]) { - m->kids[0] = left; - m->elems[0] = e; - m->kids[1] = right; - e = n->elems[0]; - n->kids[0] = n->kids[1]; - n->elems[1] = n->elems[0]; - n->kids[1] = n->kids[2]; - } else if (np == &n->kids[1]) { - m->kids[0] = n->kids[0]; - m->elems[0] = n->elems[0]; - m->kids[1] = left; - /* e = e; */ - n->kids[0] = right; - n->elems[0] = n->elems[1]; - n->kids[1] = n->kids[2]; - } else { /* np == &n->kids[2] */ - m->kids[0] = n->kids[0]; - m->elems[0] = n->elems[0]; - m->kids[1] = n->kids[1]; - e = n->elems[1]; - n->kids[0] = left; - n->elems[0] = e; - n->kids[1] = right; + t->name = ustrdup(tags); + t->implicit_text = text; + } else { + sfree(t); + t = existing; + if (!is_explicit) { + /* + * An implicit \IM for a tag that's had an implicit + * \IM before. FIXME: we should check the text + * against the existing text and warn on + * differences. And check the tag for case match + * against the existing tag, likewise. + */ + } else { + /* + * An explicit \IM added to a valid tag. In + * particular, this removes the implicit \IM if + * present. + */ + if (t->implicit_text) { + free_word_list(t->implicit_text); + t->implicit_text = NULL; + } + if (t->nexplicit >= t->explicit_size) { + t->explicit_size = t->nexplicit + 8; + t->explicit_texts = resize(t->explicit_texts, + t->explicit_size); + } + t->explicit_texts[t->nexplicit++] = text; } - m->kids[2] = n->kids[2] = NULL; - m->elems[1] = n->elems[1] = NULL; - left = m; - right = n; } - n = n->parent; } +} - /* - * If we've come out of here by `break', n will still be - * non-NULL and we've finished. If we've come here because n is - * NULL, we need to create a new root for the tree because the - * old one has just split into two. - */ - if (!n) { - t->root = mknew(node23); - t->root->kids[0] = left; - t->root->elems[0] = e; - t->root->kids[1] = right; - t->root->elems[1] = NULL; - t->root->kids[2] = NULL; +/* + * Build the final-form index. We now have every tag, with every + * \IM, set up in a 2-3 tree indexed by tag. We now want to collate + * the RHSes of the \IMs, and sort by final form, and decorate the + * entries in the original 2-3 tree with pointers to the RHS + * entries. + */ +void build_index(index *i) { + indextag *t, **ta; + enum23 e; + int j; + + for (t = (indextag *)first23(i->tags, &e); t; + t = (indextag *)next23(i->tags, &e)) { + if (t->implicit_text) { + t->nrefs = 1; + ta = &t->implicit_text; + } else { + t->nrefs = t->nexplicit; + ta = t->explicit_texts; + } + t->refs = mknewa(indexentry *, t->nrefs); + for (j = 0; j < t->nrefs; j++) { + indexentry *ent = mknew(indexentry); + ent->text = *ta++; + t->refs[j] = add23(i->entries, ent, compare_entries); + if (t->refs[j] != ent) /* duplicate */ + sfree(ent); + } + } +} + +void cleanup_index(index *i) { + indextag *t; + indexentry *ent; + enum23 e; + int j; + + for (t = (indextag *)first23(i->tags, &e); t; + t = (indextag *)next23(i->tags, &e)) { + sfree(t->name); + free_word_list(t->implicit_text); + sfree(t->explicit_texts); + sfree(t->refs); + sfree(t); } + freetree23(i->tags); + for (ent = (indexentry *)first23(i->entries, &e); ent; + ent = (indexentry *)next23(i->entries, &e)) { + sfree(ent); + } + freetree23(i->entries); + sfree(i); } -void find23(tree23 *t, elem23 *e) { +void index_debug(index *i) { + indextag *t; + enum23 e; + int j; + + for (t = (indextag *)first23(i->tags, &e); t; + t = (indextag *)next23(i->tags, &e)) { + if (t->implicit_text) + dbg_prtmerge(0, t->name, t->implicit_text); + for (j = 0; j < t->nexplicit; j++) + dbg_prtmerge(1, t->name, t->explicit_texts[j]); + } } -#endif +#define DEBUG +#ifdef DEBUG static void dbg_prtwordlist(int level, word *w); -void index_merge(int is_explicit, wchar_t *tags, word *text) { - wchar_t *wp; - printf("\\IM: %splicit: ", is_explicit ? "ex" : "im"); - if (tags) { - wp = tags; - while (*wp) { - putchar('\"'); - for (; *wp; wp++) - putchar(*wp); - putchar('\"'); - if (*++wp) - printf(", "); - } - } else - printf("(no keyword)"); - printf(" {\n"); +static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) { + printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im"); + for (; *tag; tag++) + putchar(*tag); + printf("\" {\n"); dbg_prtwordlist(1, text); printf("}\n"); - free_word_list(text); } static void dbg_prtwordlist(int level, word *w) { @@ -188,3 +221,4 @@ static void dbg_prtwordlist(int level, word *w) { printf("\n"); } } +#endif @@ -442,7 +442,7 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) { /* * Reads a single file (ie until get() returns EOF) */ -static void read_file(paragraph ***ret, input *in) { +static void read_file(paragraph ***ret, input *in, index *idx) { token t; paragraph par; word wd, **whptr, **idximplicit; @@ -707,7 +707,7 @@ static void read_file(paragraph ***ret, input *in) { ustrlow(indexword->text); indexing = FALSE; rdadd(&indexstr, L'\0'); - index_merge(FALSE, indexstr.text, idxwordlist); + index_merge(idx, FALSE, indexstr.text, idxwordlist); sfree(indexstr.text); } if (sitem->type & stack_hyper) { @@ -986,7 +986,7 @@ static void read_file(paragraph ***ret, input *in) { dtor(t); } -paragraph *read_input(input *in) { +paragraph *read_input(input *in, index *idx) { paragraph *head = NULL; paragraph **hptr = &head; @@ -994,7 +994,7 @@ paragraph *read_input(input *in) { in->currfp = fopen(in->filenames[in->currindex], "r"); if (in->currfp) { setpos(in, in->filenames[in->currindex]); - read_file(&hptr, in); + read_file(&hptr, in, idx); } in->currindex++; } @@ -117,7 +117,7 @@ keywordlist *get_keywords(paragraph *source) { } } else { if (kl->nlooseends >= kl->looseendssize) { - kl->looseendssize += 32; + kl->looseendssize = kl->nlooseends + 32; kl->looseends = resize(kl->looseends, kl->looseendssize); } kl->looseends[kl->nlooseends++] = source->kwtext; @@ -166,6 +166,7 @@ int main(int argc, char **argv) { { input in; paragraph *sourceform, *p; + index *idx; keywordlist *keywords; in.filenames = infiles; @@ -175,7 +176,9 @@ int main(int argc, char **argv) { in.npushback = 0; in.reportcols = reportcols; - sourceform = read_input(&in); + idx = make_index(); + + sourceform = read_input(&in, idx); if (!sourceform) exit(EXIT_FAILURE); @@ -185,18 +188,19 @@ int main(int argc, char **argv) { gen_citations(sourceform, keywords); subst_keywords(sourceform, keywords); - for (p = sourceform; p; p = p->next) { - if (p->type == para_IM) { - index_merge(TRUE, p->keyword, p->words); - p->words = NULL; /* this has now been freed */ - } - } + for (p = sourceform; p; p = p->next) + if (p->type == para_IM) + index_merge(idx, TRUE, p->keyword, p->words); + + build_index(idx); + index_debug(idx); dbg_prtkws(keywords); dbg_prtsource(sourceform); free_para_list(sourceform); free_keywords(keywords); + cleanup_index(idx); } return 0; @@ -9,6 +9,8 @@ #ifdef LOGALLOC #define LOGPARAMS char *file, int line, static FILE *logallocfp = NULL; +static int logline = 2; /* off by 1: `null pointer is' */ +static void loginc(void) { } static void logallocinit(void) { if (!logallocfp) { logallocfp = fopen("malloc.log", "w"); @@ -27,9 +29,11 @@ static void logprintf(char *fmt, ...) { va_end(ap); } #define LOGPRINT(x) ( logallocinit(), logprintf x ) +#define LOGINC do { loginc(); logline++; } while (0) #else #define LOGPARAMS #define LOGPRINT(x) +#define LOGINC ((void)0) #endif /* @@ -37,11 +41,14 @@ static void logprintf(char *fmt, ...) { * can do nothing except die when it's out of memory anyway */ void *(smalloc)(LOGPARAMS int size) { - void *p = malloc(size); + void *p; + LOGINC; + LOGPRINT(("%s %d malloc(%ld)", + file, line, (long)size)); + p = malloc(size); if (!p) fatal(err_nomemory); - LOGPRINT(("%s %d malloc(%ld) returns %p\n", - file, line, (long)size, p)); + LOGPRINT((" returns %p\n", p)); return p; } @@ -50,9 +57,10 @@ void *(smalloc)(LOGPARAMS int size) { */ void (sfree)(LOGPARAMS void *p) { if (p) { - free(p); + LOGINC; LOGPRINT(("%s %d free(%p)\n", file, line, p)); + free(p); } } @@ -62,13 +70,17 @@ void (sfree)(LOGPARAMS void *p) { void *(srealloc)(LOGPARAMS void *p, int size) { void *q; if (p) { + LOGINC; + LOGPRINT(("%s %d realloc(%p,%ld)", + file, line, p, (long)size)); q = realloc(p, size); - LOGPRINT(("%s %d realloc(%p,%ld) returns %p\n", - file, line, p, (long)size, q)); + LOGPRINT((" returns %p\n", q)); } else { + LOGINC; + LOGPRINT(("%s %d malloc(%ld)", + file, line, (long)size)); q = malloc(size); - LOGPRINT(("%s %d malloc(%ld) returns %p\n", - file, line, (long)size, q)); + LOGPRINT((" returns %p\n", q)); } if (!q) fatal(err_nomemory); @@ -40,3 +40,47 @@ void *stk_pop(stack s) { else return NULL; } + +int compare_wordlists(word *a, word *b) { + int t; + while (a && b) { + if (a->type != b->type) + return (a->type < b->type ? -1 : +1); /* FIXME? */ + t = a->type; + if ((t != word_Normal && t != word_Code && + t != word_WeakCode && t != word_Emph) || + a->alt || b->alt) { + int c; + if (a->text && b->text) { + c = ustricmp(a->text, b->text); + if (c) + return c; + } + c = compare_wordlists(a->alt, b->alt); + if (c) + return c; + a = a->next; + b = b->next; + } else { + wchar_t *ap = a->text, *bp = b->text; + while (*ap && *bp) { + wchar_t ac = utolower(*ap), bc = utolower(*bp); + if (ac != bc) + return (ac < bc ? -1 : +1); + if (!*++ap && a->next && a->next->type == t && !a->next->alt) + a = a->next, ap = a->text; + if (!*++bp && b->next && b->next->type == t && !b->next->alt) + b = b->next, bp = b->text; + } + if (*ap || *bp) + return (*ap ? +1 : -1); + a = a->next; + b = b->next; + } + } + + if (a || b) + return (a ? +1 : -1); + else + return 0; +} diff --git a/tree23.c b/tree23.c new file mode 100644 index 0000000..4a0d60c --- /dev/null +++ b/tree23.c @@ -0,0 +1,271 @@ +/* + * tree23.c: reasonably generic 2-3 tree routines. Currently only + * supports insert and find operations. + */ + +#include <stdio.h> /* we only need this for NULL :-) */ + +/* + * This module should be easily de-Buttressable. All it relies on + * from the rest of Buttress is the macro `mknew': + * + * #define mknew(typ) ( (typ *) malloc_like_function (sizeof (typ)) ) + * + * and the free-like function `sfree'. + */ +#include "buttress.h" + +typedef struct node23_Tag node23; +/* typedef struct tree23_Tag tree23; */ + +struct tree23_Tag { + node23 *root; +}; + +struct node23_Tag { + node23 *parent; + node23 *kids[3]; + void *elems[2]; +}; + +/* + * Create a 2-3 tree. + */ +tree23 *newtree23(void) { + tree23 *ret = mknew(tree23); + ret->root = NULL; + return ret; +} + +/* + * Free a 2-3 tree (not including freeing the elements). + */ +static void freenode23(node23 *n) { + if (!n) + return; + freenode23(n->kids[0]); + freenode23(n->kids[1]); + freenode23(n->kids[2]); + sfree(n); +} +void freetree23(tree23 *t) { + freenode23(t->root); + sfree(t); +} + +/* + * Add an element e to a 2-3 tree t. Returns e on success, or if an + * existing element compares equal, returns that. + */ +void *add23(tree23 *t, void *e, int (*cmp)(void *, void *)) { + node23 *n, **np, *left, *right; + void *orig_e = e; + int c; + + if (t->root == NULL) { + t->root = mknew(node23); + t->root->elems[1] = NULL; + t->root->kids[0] = t->root->kids[1] = t->root->kids[2] = NULL; + t->root->parent = NULL; + t->root->elems[0] = e; + return orig_e; + } + + np = &t->root; + while (*np) { + n = *np; + c = cmp(e, n->elems[0]); + if (c == 0) { + /* node already exists; return existing one */ + return n->elems[0]; + } else if (c < 0) { + np = &n->kids[0]; + } else { + if (n->elems[1] == NULL || (c = cmp(e, n->elems[1])) < 0) + np = &n->kids[1]; + else if (c > 0) + np = &n->kids[2]; + else { + /* node already exists; return existing one */ + return n->elems[1]; + } + } + } + + /* + * We need to insert the new element in n at position np. + */ + left = NULL; + right = NULL; + while (n) { + if (n->elems[1] == NULL) { + /* + * Insert in a 2-node; simple. + */ + if (np == &n->kids[0]) { + n->kids[2] = n->kids[1]; + n->elems[1] = n->elems[0]; + n->kids[1] = right; + n->elems[0] = e; + n->kids[0] = left; + } else { /* np == &n->kids[1] */ + n->kids[2] = right; + n->elems[1] = e; + n->kids[1] = left; + } + if (n->kids[0]) n->kids[0]->parent = n; + if (n->kids[1]) n->kids[1]->parent = n; + if (n->kids[2]) n->kids[2]->parent = n; + break; + } else { + node23 *m = mknew(node23); + m->parent = n->parent; + /* + * Insert in a 3-node; split into two 2-nodes and move + * focus up a level. + */ + if (np == &n->kids[0]) { + m->kids[0] = left; + m->elems[0] = e; + m->kids[1] = right; + e = n->elems[0]; + n->kids[0] = n->kids[1]; + n->elems[0] = n->elems[1]; + n->kids[1] = n->kids[2]; + } else if (np == &n->kids[1]) { + m->kids[0] = n->kids[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = left; + /* e = e; */ + n->kids[0] = right; + n->elems[0] = n->elems[1]; + n->kids[1] = n->kids[2]; + } else { /* np == &n->kids[2] */ + m->kids[0] = n->kids[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = n->kids[1]; + n->kids[0] = left; + n->elems[0] = e; + n->kids[1] = right; + e = n->elems[1]; + } + m->kids[2] = n->kids[2] = NULL; + m->elems[1] = n->elems[1] = NULL; + if (m->kids[0]) m->kids[0]->parent = m; + if (m->kids[1]) m->kids[1]->parent = m; + if (n->kids[0]) n->kids[0]->parent = n; + if (n->kids[1]) n->kids[1]->parent = n; + left = m; + right = n; + } + if (n->parent) + np = (n->parent->kids[0] == n ? &n->parent->kids[0] : + n->parent->kids[1] == n ? &n->parent->kids[1] : + &n->parent->kids[2]); + n = n->parent; + } + + /* + * If we've come out of here by `break', n will still be + * non-NULL and we've finished. If we've come here because n is + * NULL, we need to create a new root for the tree because the + * old one has just split into two. + */ + if (!n) { + t->root = mknew(node23); + t->root->kids[0] = left; + t->root->elems[0] = e; + t->root->kids[1] = right; + t->root->elems[1] = NULL; + t->root->kids[2] = NULL; + t->root->parent = NULL; + if (t->root->kids[0]) t->root->kids[0]->parent = t->root; + if (t->root->kids[1]) t->root->kids[1]->parent = t->root; + } + + return orig_e; +} + +/* + * Find an element e in a 2-3 tree t. Returns NULL if not found. e + * is always passed as the first argument to cmp, so cmp can be an + * asymmetric function if desired. + */ +void *find23(tree23 *t, void *e, int (*cmp)(void *, void *)) { + node23 *n; + int c; + + if (t->root == NULL) + return NULL; + + n = t->root; + while (n) { + c = cmp(e, n->elems[0]); + if (c == 0) { + return n->elems[0]; + } else if (c < 0) { + n = n->kids[0]; + } else { + if (n->elems[1] == NULL || (c = cmp(e, n->elems[1])) < 0) + n = n->kids[1]; + else if (c > 0) + n = n->kids[2]; + else { + return n->elems[1]; + } + } + } + + /* + * We've found our way to the bottom of the tree and we know + * where we would insert this node if we wanted to. But it + * isn't there. + */ + return NULL; +} + +/* + * Iterate over the elements of a tree23, in order. + */ +void *first23(tree23 *t, enum23 *e) { + node23 *n = t->root; + if (!n) + return NULL; + while (n->kids[0]) + n = n->kids[0]; + e->node = n; + e->posn = 0; + return n->elems[0]; +} + +void *next23(tree23 *t, enum23 *e) { + node23 *n = (node23 *)e->node; + int pos = e->posn; + + if (n->kids[pos+1]) { + n = n->kids[pos+1]; + while (n->kids[0]) + n = n->kids[0]; + e->node = n; + e->posn = 0; + return n->elems[0]; + } + + if (pos == 0 && n->elems[1]) { + e->posn = 1; + return n->elems[1]; + } + + do { + node23 *nn = n->parent; + if (nn == NULL) + return NULL; /* end of tree */ + pos = (nn->kids[0] == n ? 0 : + nn->kids[1] == n ? 1 : 2); + n = nn; + } while (pos == 2 || n->kids[pos+1] == NULL); + + e->node = n; + e->posn = pos; + return n->elems[pos]; +} @@ -57,12 +57,31 @@ int ustrcmp(wchar_t *lhs, wchar_t *rhs) { return 0; } +wchar_t utolower(wchar_t c) { + if (c == L'\0') + return c; /* this property needed by ustricmp */ + /* FIXME: this doesn't even come close */ + if (c >= 'A' && c <= 'Z') + c += 'a'-'A'; + return c; +} + +int ustricmp(wchar_t *lhs, wchar_t *rhs) { + wchar_t lc, rc; + while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc) + lhs++, rhs++; + if (!lc && !rc) + return 0; + if (lc < rc) + return -1; + else + return 1; +} + wchar_t *ustrlow(wchar_t *s) { wchar_t *p = s; while (*p) { - /* FIXME: this doesn't even come close */ - if (*p >= 'A' && *p <= 'Z') - *p += 'a'-'A'; + *p = utolower(*p); p++; } return s; |