summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Tatham <anakin@pobox.com>1999-10-18 18:03:37 +0000
committerSimon Tatham <anakin@pobox.com>1999-10-18 18:03:37 +0000
commite44f985bd4f796d4c4b11eb3555436dbaa2d163b (patch)
tree8e037d5b32c5349760277e79ac53993b34035885
parent00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4 (diff)
downloadhalibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.zip
halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.gz
halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.bz2
halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.xz
Further development; mid-end index handling pretty much there!
[originally from svn r238]
-rw-r--r--Makefile2
-rw-r--r--buttress.h32
-rw-r--r--error.c7
-rw-r--r--index.c292
-rw-r--r--input.c8
-rw-r--r--keywords.c2
-rw-r--r--main.c18
-rw-r--r--malloc.c28
-rw-r--r--misc.c44
-rw-r--r--tree23.c271
-rw-r--r--ustring.c25
11 files changed, 572 insertions, 157 deletions
diff --git a/Makefile b/Makefile
index ab67cbc..d89b105 100644
--- a/Makefile
+++ b/Makefile
@@ -43,7 +43,7 @@ endif
SRC := ../
-MODULES := main malloc ustring error help licence version misc
+MODULES := main malloc ustring error help licence version misc tree23
MODULES += input keywords contents index style biblio
OBJECTS := $(addsuffix .o,$(MODULES))
diff --git a/buttress.h b/buttress.h
index 786c6e2..9bdfcfc 100644
--- a/buttress.h
+++ b/buttress.h
@@ -27,6 +27,7 @@ typedef struct keywordlist_Tag keywordlist;
typedef struct keyword_Tag keyword;
typedef struct userstyle_Tag userstyle;
typedef struct numberstate_Tag numberstate;
+typedef struct index_Tag index;
/*
* Data structure to hold a file name and index, a line and a
@@ -138,7 +139,8 @@ enum {
err_nestedstyles, /* unable to nest text styles */
err_nestedindex, /* unable to nest `\i' thingys */
err_nosuchkw, /* unresolved cross-reference */
- err_multiBR /* multiple \BRs on same keyword */
+ err_multiBR, /* multiple \BRs on same keyword */
+ err_nosuchidxtag /* \IM on unknown index tag (warning) */
};
/*
@@ -172,7 +174,9 @@ wchar_t *ustrdup(wchar_t *s);
char *ustrtoa(wchar_t *s, char *outbuf, int size);
int ustrlen(wchar_t *s);
wchar_t *ustrcpy(wchar_t *dest, wchar_t *source);
+wchar_t utolower(wchar_t);
int ustrcmp(wchar_t *lhs, wchar_t *rhs);
+int ustricmp(wchar_t *lhs, wchar_t *rhs);
wchar_t *ustrlow(wchar_t *s);
wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec);
@@ -201,11 +205,12 @@ stack stk_new(void);
void stk_free(stack);
void stk_push(stack, void *);
void *stk_pop(stack);
+int compare_wordlists(word *a, word *b);
/*
* input.c
*/
-paragraph *read_input(input *in);
+paragraph *read_input(input *in, index *idx);
/*
* keywords.c
@@ -232,8 +237,12 @@ void subst_keywords(paragraph *, keywordlist *);
/*
* index.c
*/
-/* index_merge takes responsibility for freeing arg 3 but not arg 2 */
-void index_merge(int is_explicit, wchar_t *, word *);
+index *make_index(void);
+void cleanup_index(index *);
+/* index_merge takes responsibility for freeing arg 3 iff implicit; never
+ * takes responsibility for arg 2 */
+void index_merge(index *, int is_explicit, wchar_t *, word *);
+void build_index(index *);
/*
* contents.c
@@ -253,4 +262,19 @@ void gen_citations(paragraph *, keywordlist *);
struct userstyle_Tag {
};
+/*
+ * tree23.c
+ */
+typedef struct tree23_Tag tree23;
+typedef struct enum23_Tag {
+ void *node;
+ int posn;
+} enum23;
+tree23 *newtree23(void);
+void freetree23(tree23 *);
+void *add23(tree23 *, void *, int (*cmp)(void *, void *));
+void *find23(tree23 *, void *, int (*cmp)(void *, void *));
+void *first23(tree23 *, enum23 *);
+void *next23(tree23 *, enum23 *);
+
#endif
diff --git a/error.c b/error.c
index cc574cc..704ae03 100644
--- a/error.c
+++ b/error.c
@@ -135,6 +135,13 @@ static void do_error(int code, va_list ap) {
sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp);
flags = FILEPOS;
break;
+ case err_nosuchidxtag:
+ wsp = va_arg(ap, wchar_t *);
+ sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+ sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp);
+ flags = 0;
+ /* FIXME: need to get a filepos to here somehow */
+ break;
}
if (flags & PREFIX)
diff --git a/index.c b/index.c
index 7c73565..3f629a0 100644
--- a/index.c
+++ b/index.c
@@ -6,167 +6,200 @@
#include <stdlib.h>
#include "buttress.h"
-#if 0
-typedef struct tag_node23 node23;
-typedef struct tag_tree23 tree23;
-typedef struct tag_elem23 elem23;
+typedef struct indextag_Tag indextag;
+typedef struct indexentry_Tag indexentry;
-int cmp23(elem23 *, elem23 *);
+struct index_Tag {
+ tree23 *tags; /* holds type `indextag' */
+ tree23 *entries; /* holds type `indexentry' */
+};
-struct tag_elem23 {
- char *string;
+struct indextag_Tag {
+ wchar_t *name;
+ word *implicit_text;
+ word **explicit_texts;
+ int nexplicit, explicit_size;
+ int nrefs;
+ indexentry **refs; /* array of entries referenced by tag */
};
-struct tag_tree23 {
- node23 *root;
+struct indexentry_Tag {
+ word *text;
+ void *backend_data; /* private to back end */
};
-struct tag_node23 {
- node23 *parent;
- node23 *kids[3];
- elem23 *elems[2];
+index *make_index(void) {
+ index *ret = mknew(index);
+ ret->tags = newtree23();
+ ret->entries = newtree23();
+ return ret;
}
-int cmp23(elem23 *a, elem23 *b) {
- return strcmp(a->string, b->string);
+static indextag *make_indextag(void) {
+ indextag *ret = mknew(indextag);
+ ret->name = NULL;
+ ret->implicit_text = NULL;
+ ret->explicit_texts = NULL;
+ ret->nexplicit = ret->explicit_size = ret->nrefs = 0;
+ ret->refs = NULL;
+ return ret;
}
-void add23(tree23 *t, elem23 *e) {
- node23 *n, *left, *right;
- int c;
-
- if (t->root == NULL) {
- t->root = mknew(node23);
- t->root->elems[1] = NULL;
- t->root->kids[0] = t->root->kids[1] = t->root->kids[2] = NULL;
- t->root->parent = NULL;
- t->root->elems[0] = e;
- return;
- }
+static int compare_tags(void *av, void *bv) {
+ indextag *a = (indextag *)av, *b = (indextag *)bv;
+ return ustricmp(a->name, b->name);
+}
- np = &t->root;
- while (*np) {
- n = *np;
- c = cmp32(e, n->elems[0]);
- if (c == 0) {
- printf("present already\n");
- return;
- } else if (c < 0) {
- np = &n->kids[0];
- } else {
- if (n->elems[1] == NULL || (c = cmp32(e, n->elems[1])) < 0)
- np = &n->kids[1];
- else if (c > 0)
- np = &n->kids[2];
- else {
- printf("present already\n");
- return;
- }
- }
- }
+static int compare_entries(void *av, void *bv) {
+ indexentry *a = (indexentry *)av, *b = (indexentry *)bv;
+ return compare_wordlists(a->text, b->text);
+}
+
+/*
+ * Add a \IM. `tags' points to a zero-terminated chain of
+ * zero-terminated strings ("first\0second\0thirdandlast\0\0").
+ * `text' points to a word list.
+ *
+ * Guarantee on calling sequence: all implicit merges are given
+ * before the explicit ones.
+ */
+void index_merge(index *idx, int is_explicit, wchar_t *tags, word *text) {
+ indextag *t, *existing;
/*
- * We need to insert the new element in n at position np.
+ * FIXME: want to warn on overlapping source sets.
*/
- left = NULL;
- right = NULL;
- while (n) {
- if (n->elems[1] == NULL) {
+ for (; *tags; tags += 1+ustrlen(tags)) {
+ t = make_indextag();
+ t->name = tags;
+ existing = add23(idx->tags, t, compare_tags);
+ if (existing == t) {
/*
- * Insert in a 2-node; simple.
+ * Every tag has an implicit \IM. So if this tag
+ * doesn't exist and we're explicit, then we should
+ * warn (and drop it, since it won't be referenced).
*/
- if (np == &n->kids[0]) {
- n->kids[2] = n->kids[1];
- n->elems[1] = n->elems[0];
- n->kids[1] = right;
- n->elems[0] = e;
- n->kids[0] = left;
- } else { /* np == &n->kids[1] */
- n->kids[2] = right;
- n->elems[1] = e;
- n->kids[1] = left;
+ if (is_explicit) {
+ error(err_nosuchidxtag, tags);
+ continue;
}
- break;
- } else {
- node23 *m = mknew(node23);
/*
- * Insert in a 3-node; split into two 2-nodes and move
- * focus up a level.
+ * Otherwise, this is a new tag with an implicit \IM.
*/
- if (np == &n->kids[0]) {
- m->kids[0] = left;
- m->elems[0] = e;
- m->kids[1] = right;
- e = n->elems[0];
- n->kids[0] = n->kids[1];
- n->elems[1] = n->elems[0];
- n->kids[1] = n->kids[2];
- } else if (np == &n->kids[1]) {
- m->kids[0] = n->kids[0];
- m->elems[0] = n->elems[0];
- m->kids[1] = left;
- /* e = e; */
- n->kids[0] = right;
- n->elems[0] = n->elems[1];
- n->kids[1] = n->kids[2];
- } else { /* np == &n->kids[2] */
- m->kids[0] = n->kids[0];
- m->elems[0] = n->elems[0];
- m->kids[1] = n->kids[1];
- e = n->elems[1];
- n->kids[0] = left;
- n->elems[0] = e;
- n->kids[1] = right;
+ t->name = ustrdup(tags);
+ t->implicit_text = text;
+ } else {
+ sfree(t);
+ t = existing;
+ if (!is_explicit) {
+ /*
+ * An implicit \IM for a tag that's had an implicit
+ * \IM before. FIXME: we should check the text
+ * against the existing text and warn on
+ * differences. And check the tag for case match
+ * against the existing tag, likewise.
+ */
+ } else {
+ /*
+ * An explicit \IM added to a valid tag. In
+ * particular, this removes the implicit \IM if
+ * present.
+ */
+ if (t->implicit_text) {
+ free_word_list(t->implicit_text);
+ t->implicit_text = NULL;
+ }
+ if (t->nexplicit >= t->explicit_size) {
+ t->explicit_size = t->nexplicit + 8;
+ t->explicit_texts = resize(t->explicit_texts,
+ t->explicit_size);
+ }
+ t->explicit_texts[t->nexplicit++] = text;
}
- m->kids[2] = n->kids[2] = NULL;
- m->elems[1] = n->elems[1] = NULL;
- left = m;
- right = n;
}
- n = n->parent;
}
+}
- /*
- * If we've come out of here by `break', n will still be
- * non-NULL and we've finished. If we've come here because n is
- * NULL, we need to create a new root for the tree because the
- * old one has just split into two.
- */
- if (!n) {
- t->root = mknew(node23);
- t->root->kids[0] = left;
- t->root->elems[0] = e;
- t->root->kids[1] = right;
- t->root->elems[1] = NULL;
- t->root->kids[2] = NULL;
+/*
+ * Build the final-form index. We now have every tag, with every
+ * \IM, set up in a 2-3 tree indexed by tag. We now want to collate
+ * the RHSes of the \IMs, and sort by final form, and decorate the
+ * entries in the original 2-3 tree with pointers to the RHS
+ * entries.
+ */
+void build_index(index *i) {
+ indextag *t, **ta;
+ enum23 e;
+ int j;
+
+ for (t = (indextag *)first23(i->tags, &e); t;
+ t = (indextag *)next23(i->tags, &e)) {
+ if (t->implicit_text) {
+ t->nrefs = 1;
+ ta = &t->implicit_text;
+ } else {
+ t->nrefs = t->nexplicit;
+ ta = t->explicit_texts;
+ }
+ t->refs = mknewa(indexentry *, t->nrefs);
+ for (j = 0; j < t->nrefs; j++) {
+ indexentry *ent = mknew(indexentry);
+ ent->text = *ta++;
+ t->refs[j] = add23(i->entries, ent, compare_entries);
+ if (t->refs[j] != ent) /* duplicate */
+ sfree(ent);
+ }
+ }
+}
+
+void cleanup_index(index *i) {
+ indextag *t;
+ indexentry *ent;
+ enum23 e;
+ int j;
+
+ for (t = (indextag *)first23(i->tags, &e); t;
+ t = (indextag *)next23(i->tags, &e)) {
+ sfree(t->name);
+ free_word_list(t->implicit_text);
+ sfree(t->explicit_texts);
+ sfree(t->refs);
+ sfree(t);
}
+ freetree23(i->tags);
+ for (ent = (indexentry *)first23(i->entries, &e); ent;
+ ent = (indexentry *)next23(i->entries, &e)) {
+ sfree(ent);
+ }
+ freetree23(i->entries);
+ sfree(i);
}
-void find23(tree23 *t, elem23 *e) {
+void index_debug(index *i) {
+ indextag *t;
+ enum23 e;
+ int j;
+
+ for (t = (indextag *)first23(i->tags, &e); t;
+ t = (indextag *)next23(i->tags, &e)) {
+ if (t->implicit_text)
+ dbg_prtmerge(0, t->name, t->implicit_text);
+ for (j = 0; j < t->nexplicit; j++)
+ dbg_prtmerge(1, t->name, t->explicit_texts[j]);
+ }
}
-#endif
+#define DEBUG
+#ifdef DEBUG
static void dbg_prtwordlist(int level, word *w);
-void index_merge(int is_explicit, wchar_t *tags, word *text) {
- wchar_t *wp;
- printf("\\IM: %splicit: ", is_explicit ? "ex" : "im");
- if (tags) {
- wp = tags;
- while (*wp) {
- putchar('\"');
- for (; *wp; wp++)
- putchar(*wp);
- putchar('\"');
- if (*++wp)
- printf(", ");
- }
- } else
- printf("(no keyword)");
- printf(" {\n");
+static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) {
+ printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im");
+ for (; *tag; tag++)
+ putchar(*tag);
+ printf("\" {\n");
dbg_prtwordlist(1, text);
printf("}\n");
- free_word_list(text);
}
static void dbg_prtwordlist(int level, word *w) {
@@ -188,3 +221,4 @@ static void dbg_prtwordlist(int level, word *w) {
printf("\n");
}
}
+#endif
diff --git a/input.c b/input.c
index 3749ffa..c5d0b5c 100644
--- a/input.c
+++ b/input.c
@@ -442,7 +442,7 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
/*
* Reads a single file (ie until get() returns EOF)
*/
-static void read_file(paragraph ***ret, input *in) {
+static void read_file(paragraph ***ret, input *in, index *idx) {
token t;
paragraph par;
word wd, **whptr, **idximplicit;
@@ -707,7 +707,7 @@ static void read_file(paragraph ***ret, input *in) {
ustrlow(indexword->text);
indexing = FALSE;
rdadd(&indexstr, L'\0');
- index_merge(FALSE, indexstr.text, idxwordlist);
+ index_merge(idx, FALSE, indexstr.text, idxwordlist);
sfree(indexstr.text);
}
if (sitem->type & stack_hyper) {
@@ -986,7 +986,7 @@ static void read_file(paragraph ***ret, input *in) {
dtor(t);
}
-paragraph *read_input(input *in) {
+paragraph *read_input(input *in, index *idx) {
paragraph *head = NULL;
paragraph **hptr = &head;
@@ -994,7 +994,7 @@ paragraph *read_input(input *in) {
in->currfp = fopen(in->filenames[in->currindex], "r");
if (in->currfp) {
setpos(in, in->filenames[in->currindex]);
- read_file(&hptr, in);
+ read_file(&hptr, in, idx);
}
in->currindex++;
}
diff --git a/keywords.c b/keywords.c
index c2d9ed6..c446761 100644
--- a/keywords.c
+++ b/keywords.c
@@ -117,7 +117,7 @@ keywordlist *get_keywords(paragraph *source) {
}
} else {
if (kl->nlooseends >= kl->looseendssize) {
- kl->looseendssize += 32;
+ kl->looseendssize = kl->nlooseends + 32;
kl->looseends = resize(kl->looseends, kl->looseendssize);
}
kl->looseends[kl->nlooseends++] = source->kwtext;
diff --git a/main.c b/main.c
index 0320526..1fef5e4 100644
--- a/main.c
+++ b/main.c
@@ -166,6 +166,7 @@ int main(int argc, char **argv) {
{
input in;
paragraph *sourceform, *p;
+ index *idx;
keywordlist *keywords;
in.filenames = infiles;
@@ -175,7 +176,9 @@ int main(int argc, char **argv) {
in.npushback = 0;
in.reportcols = reportcols;
- sourceform = read_input(&in);
+ idx = make_index();
+
+ sourceform = read_input(&in, idx);
if (!sourceform)
exit(EXIT_FAILURE);
@@ -185,18 +188,19 @@ int main(int argc, char **argv) {
gen_citations(sourceform, keywords);
subst_keywords(sourceform, keywords);
- for (p = sourceform; p; p = p->next) {
- if (p->type == para_IM) {
- index_merge(TRUE, p->keyword, p->words);
- p->words = NULL; /* this has now been freed */
- }
- }
+ for (p = sourceform; p; p = p->next)
+ if (p->type == para_IM)
+ index_merge(idx, TRUE, p->keyword, p->words);
+
+ build_index(idx);
+ index_debug(idx);
dbg_prtkws(keywords);
dbg_prtsource(sourceform);
free_para_list(sourceform);
free_keywords(keywords);
+ cleanup_index(idx);
}
return 0;
diff --git a/malloc.c b/malloc.c
index 1a2be7a..0f42bde 100644
--- a/malloc.c
+++ b/malloc.c
@@ -9,6 +9,8 @@
#ifdef LOGALLOC
#define LOGPARAMS char *file, int line,
static FILE *logallocfp = NULL;
+static int logline = 2; /* off by 1: `null pointer is' */
+static void loginc(void) { }
static void logallocinit(void) {
if (!logallocfp) {
logallocfp = fopen("malloc.log", "w");
@@ -27,9 +29,11 @@ static void logprintf(char *fmt, ...) {
va_end(ap);
}
#define LOGPRINT(x) ( logallocinit(), logprintf x )
+#define LOGINC do { loginc(); logline++; } while (0)
#else
#define LOGPARAMS
#define LOGPRINT(x)
+#define LOGINC ((void)0)
#endif
/*
@@ -37,11 +41,14 @@ static void logprintf(char *fmt, ...) {
* can do nothing except die when it's out of memory anyway
*/
void *(smalloc)(LOGPARAMS int size) {
- void *p = malloc(size);
+ void *p;
+ LOGINC;
+ LOGPRINT(("%s %d malloc(%ld)",
+ file, line, (long)size));
+ p = malloc(size);
if (!p)
fatal(err_nomemory);
- LOGPRINT(("%s %d malloc(%ld) returns %p\n",
- file, line, (long)size, p));
+ LOGPRINT((" returns %p\n", p));
return p;
}
@@ -50,9 +57,10 @@ void *(smalloc)(LOGPARAMS int size) {
*/
void (sfree)(LOGPARAMS void *p) {
if (p) {
- free(p);
+ LOGINC;
LOGPRINT(("%s %d free(%p)\n",
file, line, p));
+ free(p);
}
}
@@ -62,13 +70,17 @@ void (sfree)(LOGPARAMS void *p) {
void *(srealloc)(LOGPARAMS void *p, int size) {
void *q;
if (p) {
+ LOGINC;
+ LOGPRINT(("%s %d realloc(%p,%ld)",
+ file, line, p, (long)size));
q = realloc(p, size);
- LOGPRINT(("%s %d realloc(%p,%ld) returns %p\n",
- file, line, p, (long)size, q));
+ LOGPRINT((" returns %p\n", q));
} else {
+ LOGINC;
+ LOGPRINT(("%s %d malloc(%ld)",
+ file, line, (long)size));
q = malloc(size);
- LOGPRINT(("%s %d malloc(%ld) returns %p\n",
- file, line, (long)size, q));
+ LOGPRINT((" returns %p\n", q));
}
if (!q)
fatal(err_nomemory);
diff --git a/misc.c b/misc.c
index c9ea115..3ead01c 100644
--- a/misc.c
+++ b/misc.c
@@ -40,3 +40,47 @@ void *stk_pop(stack s) {
else
return NULL;
}
+
+int compare_wordlists(word *a, word *b) {
+ int t;
+ while (a && b) {
+ if (a->type != b->type)
+ return (a->type < b->type ? -1 : +1); /* FIXME? */
+ t = a->type;
+ if ((t != word_Normal && t != word_Code &&
+ t != word_WeakCode && t != word_Emph) ||
+ a->alt || b->alt) {
+ int c;
+ if (a->text && b->text) {
+ c = ustricmp(a->text, b->text);
+ if (c)
+ return c;
+ }
+ c = compare_wordlists(a->alt, b->alt);
+ if (c)
+ return c;
+ a = a->next;
+ b = b->next;
+ } else {
+ wchar_t *ap = a->text, *bp = b->text;
+ while (*ap && *bp) {
+ wchar_t ac = utolower(*ap), bc = utolower(*bp);
+ if (ac != bc)
+ return (ac < bc ? -1 : +1);
+ if (!*++ap && a->next && a->next->type == t && !a->next->alt)
+ a = a->next, ap = a->text;
+ if (!*++bp && b->next && b->next->type == t && !b->next->alt)
+ b = b->next, bp = b->text;
+ }
+ if (*ap || *bp)
+ return (*ap ? +1 : -1);
+ a = a->next;
+ b = b->next;
+ }
+ }
+
+ if (a || b)
+ return (a ? +1 : -1);
+ else
+ return 0;
+}
diff --git a/tree23.c b/tree23.c
new file mode 100644
index 0000000..4a0d60c
--- /dev/null
+++ b/tree23.c
@@ -0,0 +1,271 @@
+/*
+ * tree23.c: reasonably generic 2-3 tree routines. Currently only
+ * supports insert and find operations.
+ */
+
+#include <stdio.h> /* we only need this for NULL :-) */
+
+/*
+ * This module should be easily de-Buttressable. All it relies on
+ * from the rest of Buttress is the macro `mknew':
+ *
+ * #define mknew(typ) ( (typ *) malloc_like_function (sizeof (typ)) )
+ *
+ * and the free-like function `sfree'.
+ */
+#include "buttress.h"
+
+typedef struct node23_Tag node23;
+/* typedef struct tree23_Tag tree23; */
+
+struct tree23_Tag {
+ node23 *root;
+};
+
+struct node23_Tag {
+ node23 *parent;
+ node23 *kids[3];
+ void *elems[2];
+};
+
+/*
+ * Create a 2-3 tree.
+ */
+tree23 *newtree23(void) {
+ tree23 *ret = mknew(tree23);
+ ret->root = NULL;
+ return ret;
+}
+
+/*
+ * Free a 2-3 tree (not including freeing the elements).
+ */
+static void freenode23(node23 *n) {
+ if (!n)
+ return;
+ freenode23(n->kids[0]);
+ freenode23(n->kids[1]);
+ freenode23(n->kids[2]);
+ sfree(n);
+}
+void freetree23(tree23 *t) {
+ freenode23(t->root);
+ sfree(t);
+}
+
+/*
+ * Add an element e to a 2-3 tree t. Returns e on success, or if an
+ * existing element compares equal, returns that.
+ */
+void *add23(tree23 *t, void *e, int (*cmp)(void *, void *)) {
+ node23 *n, **np, *left, *right;
+ void *orig_e = e;
+ int c;
+
+ if (t->root == NULL) {
+ t->root = mknew(node23);
+ t->root->elems[1] = NULL;
+ t->root->kids[0] = t->root->kids[1] = t->root->kids[2] = NULL;
+ t->root->parent = NULL;
+ t->root->elems[0] = e;
+ return orig_e;
+ }
+
+ np = &t->root;
+ while (*np) {
+ n = *np;
+ c = cmp(e, n->elems[0]);
+ if (c == 0) {
+ /* node already exists; return existing one */
+ return n->elems[0];
+ } else if (c < 0) {
+ np = &n->kids[0];
+ } else {
+ if (n->elems[1] == NULL || (c = cmp(e, n->elems[1])) < 0)
+ np = &n->kids[1];
+ else if (c > 0)
+ np = &n->kids[2];
+ else {
+ /* node already exists; return existing one */
+ return n->elems[1];
+ }
+ }
+ }
+
+ /*
+ * We need to insert the new element in n at position np.
+ */
+ left = NULL;
+ right = NULL;
+ while (n) {
+ if (n->elems[1] == NULL) {
+ /*
+ * Insert in a 2-node; simple.
+ */
+ if (np == &n->kids[0]) {
+ n->kids[2] = n->kids[1];
+ n->elems[1] = n->elems[0];
+ n->kids[1] = right;
+ n->elems[0] = e;
+ n->kids[0] = left;
+ } else { /* np == &n->kids[1] */
+ n->kids[2] = right;
+ n->elems[1] = e;
+ n->kids[1] = left;
+ }
+ if (n->kids[0]) n->kids[0]->parent = n;
+ if (n->kids[1]) n->kids[1]->parent = n;
+ if (n->kids[2]) n->kids[2]->parent = n;
+ break;
+ } else {
+ node23 *m = mknew(node23);
+ m->parent = n->parent;
+ /*
+ * Insert in a 3-node; split into two 2-nodes and move
+ * focus up a level.
+ */
+ if (np == &n->kids[0]) {
+ m->kids[0] = left;
+ m->elems[0] = e;
+ m->kids[1] = right;
+ e = n->elems[0];
+ n->kids[0] = n->kids[1];
+ n->elems[0] = n->elems[1];
+ n->kids[1] = n->kids[2];
+ } else if (np == &n->kids[1]) {
+ m->kids[0] = n->kids[0];
+ m->elems[0] = n->elems[0];
+ m->kids[1] = left;
+ /* e = e; */
+ n->kids[0] = right;
+ n->elems[0] = n->elems[1];
+ n->kids[1] = n->kids[2];
+ } else { /* np == &n->kids[2] */
+ m->kids[0] = n->kids[0];
+ m->elems[0] = n->elems[0];
+ m->kids[1] = n->kids[1];
+ n->kids[0] = left;
+ n->elems[0] = e;
+ n->kids[1] = right;
+ e = n->elems[1];
+ }
+ m->kids[2] = n->kids[2] = NULL;
+ m->elems[1] = n->elems[1] = NULL;
+ if (m->kids[0]) m->kids[0]->parent = m;
+ if (m->kids[1]) m->kids[1]->parent = m;
+ if (n->kids[0]) n->kids[0]->parent = n;
+ if (n->kids[1]) n->kids[1]->parent = n;
+ left = m;
+ right = n;
+ }
+ if (n->parent)
+ np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
+ n->parent->kids[1] == n ? &n->parent->kids[1] :
+ &n->parent->kids[2]);
+ n = n->parent;
+ }
+
+ /*
+ * If we've come out of here by `break', n will still be
+ * non-NULL and we've finished. If we've come here because n is
+ * NULL, we need to create a new root for the tree because the
+ * old one has just split into two.
+ */
+ if (!n) {
+ t->root = mknew(node23);
+ t->root->kids[0] = left;
+ t->root->elems[0] = e;
+ t->root->kids[1] = right;
+ t->root->elems[1] = NULL;
+ t->root->kids[2] = NULL;
+ t->root->parent = NULL;
+ if (t->root->kids[0]) t->root->kids[0]->parent = t->root;
+ if (t->root->kids[1]) t->root->kids[1]->parent = t->root;
+ }
+
+ return orig_e;
+}
+
+/*
+ * Find an element e in a 2-3 tree t. Returns NULL if not found. e
+ * is always passed as the first argument to cmp, so cmp can be an
+ * asymmetric function if desired.
+ */
+void *find23(tree23 *t, void *e, int (*cmp)(void *, void *)) {
+ node23 *n;
+ int c;
+
+ if (t->root == NULL)
+ return NULL;
+
+ n = t->root;
+ while (n) {
+ c = cmp(e, n->elems[0]);
+ if (c == 0) {
+ return n->elems[0];
+ } else if (c < 0) {
+ n = n->kids[0];
+ } else {
+ if (n->elems[1] == NULL || (c = cmp(e, n->elems[1])) < 0)
+ n = n->kids[1];
+ else if (c > 0)
+ n = n->kids[2];
+ else {
+ return n->elems[1];
+ }
+ }
+ }
+
+ /*
+ * We've found our way to the bottom of the tree and we know
+ * where we would insert this node if we wanted to. But it
+ * isn't there.
+ */
+ return NULL;
+}
+
+/*
+ * Iterate over the elements of a tree23, in order.
+ */
+void *first23(tree23 *t, enum23 *e) {
+ node23 *n = t->root;
+ if (!n)
+ return NULL;
+ while (n->kids[0])
+ n = n->kids[0];
+ e->node = n;
+ e->posn = 0;
+ return n->elems[0];
+}
+
+void *next23(tree23 *t, enum23 *e) {
+ node23 *n = (node23 *)e->node;
+ int pos = e->posn;
+
+ if (n->kids[pos+1]) {
+ n = n->kids[pos+1];
+ while (n->kids[0])
+ n = n->kids[0];
+ e->node = n;
+ e->posn = 0;
+ return n->elems[0];
+ }
+
+ if (pos == 0 && n->elems[1]) {
+ e->posn = 1;
+ return n->elems[1];
+ }
+
+ do {
+ node23 *nn = n->parent;
+ if (nn == NULL)
+ return NULL; /* end of tree */
+ pos = (nn->kids[0] == n ? 0 :
+ nn->kids[1] == n ? 1 : 2);
+ n = nn;
+ } while (pos == 2 || n->kids[pos+1] == NULL);
+
+ e->node = n;
+ e->posn = pos;
+ return n->elems[pos];
+}
diff --git a/ustring.c b/ustring.c
index 42c5683..17ef3bd 100644
--- a/ustring.c
+++ b/ustring.c
@@ -57,12 +57,31 @@ int ustrcmp(wchar_t *lhs, wchar_t *rhs) {
return 0;
}
+wchar_t utolower(wchar_t c) {
+ if (c == L'\0')
+ return c; /* this property needed by ustricmp */
+ /* FIXME: this doesn't even come close */
+ if (c >= 'A' && c <= 'Z')
+ c += 'a'-'A';
+ return c;
+}
+
+int ustricmp(wchar_t *lhs, wchar_t *rhs) {
+ wchar_t lc, rc;
+ while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
+ lhs++, rhs++;
+ if (!lc && !rc)
+ return 0;
+ if (lc < rc)
+ return -1;
+ else
+ return 1;
+}
+
wchar_t *ustrlow(wchar_t *s) {
wchar_t *p = s;
while (*p) {
- /* FIXME: this doesn't even come close */
- if (*p >= 'A' && *p <= 'Z')
- *p += 'a'-'A';
+ *p = utolower(*p);
p++;
}
return s;