Further development; mid-end index handling pretty much there!

[originally from svn r238]
author: Simon Tatham <anakin@pobox.com> 1999-10-18 18:03:37 +0000
committer: Simon Tatham <anakin@pobox.com> 1999-10-18 18:03:37 +0000
commit: e44f985bd4f796d4c4b11eb3555436dbaa2d163b (patch)
tree: 8e037d5b32c5349760277e79ac53993b34035885
parent: 00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4 (diff)
download: halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.zip
halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.gz
halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.bz2
halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.xz
11 files changed, 572 insertions, 157 deletions
diff --git a/Makefile b/Makefile
index ab67cbc..d89b105 100644
--- a/Makefile
+++ b/Makefile
@@ -43,7 +43,7 @@ endif
 
 SRC := ../
 
-MODULES := main malloc ustring error help licence version misc
+MODULES := main malloc ustring error help licence version misc tree23
 MODULES += input keywords contents index style biblio
 
 OBJECTS := $(addsuffix .o,$(MODULES))
diff --git a/buttress.h b/buttress.h
index 786c6e2..9bdfcfc 100644
--- a/buttress.h
+++ b/buttress.h
@@ -27,6 +27,7 @@ typedef struct keywordlist_Tag keywordlist;
 typedef struct keyword_Tag keyword;
 typedef struct userstyle_Tag userstyle;
 typedef struct numberstate_Tag numberstate;
+typedef struct index_Tag index;
 
 /*
  * Data structure to hold a file name and index, a line and a
@@ -138,7 +139,8 @@ enum {
     err_nestedstyles,		       /* unable to nest text styles */
     err_nestedindex,		       /* unable to nest `\i' thingys */
     err_nosuchkw,		       /* unresolved cross-reference */
-    err_multiBR			       /* multiple \BRs on same keyword */
+    err_multiBR,		       /* multiple \BRs on same keyword */
+    err_nosuchidxtag		       /* \IM on unknown index tag (warning) */
 };
 
 /*
@@ -172,7 +174,9 @@ wchar_t *ustrdup(wchar_t *s);
 char *ustrtoa(wchar_t *s, char *outbuf, int size);
 int ustrlen(wchar_t *s);
 wchar_t *ustrcpy(wchar_t *dest, wchar_t *source);
+wchar_t utolower(wchar_t);
 int ustrcmp(wchar_t *lhs, wchar_t *rhs);
+int ustricmp(wchar_t *lhs, wchar_t *rhs);
 wchar_t *ustrlow(wchar_t *s);
 wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec);
 
@@ -201,11 +205,12 @@ stack stk_new(void);
 void stk_free(stack);
 void stk_push(stack, void *);
 void *stk_pop(stack);
+int compare_wordlists(word *a, word *b);
 
 /*
  * input.c
  */
-paragraph *read_input(input *in);
+paragraph *read_input(input *in, index *idx);
 
 /*
  * keywords.c
@@ -232,8 +237,12 @@ void subst_keywords(paragraph *, keywordlist *);
 /*
  * index.c
  */
-/* index_merge takes responsibility for freeing arg 3 but not arg 2 */
-void index_merge(int is_explicit, wchar_t *, word *);
+index *make_index(void);
+void cleanup_index(index *);
+/* index_merge takes responsibility for freeing arg 3 iff implicit; never
+ * takes responsibility for arg 2 */
+void index_merge(index *, int is_explicit, wchar_t *, word *);
+void build_index(index *);
 
 /*
  * contents.c
@@ -253,4 +262,19 @@ void gen_citations(paragraph *, keywordlist *);
 struct userstyle_Tag {
 };
 
+/*
+ * tree23.c
+ */
+typedef struct tree23_Tag tree23;
+typedef struct enum23_Tag {
+    void *node;
+    int posn;
+} enum23;
+tree23 *newtree23(void);
+void freetree23(tree23 *);
+void *add23(tree23 *, void *, int (*cmp)(void *, void *));
+void *find23(tree23 *, void *, int (*cmp)(void *, void *));
+void *first23(tree23 *, enum23 *);
+void *next23(tree23 *, enum23 *);
+
 #endif
diff --git a/error.c b/error.c
index cc574cc..704ae03 100644
--- a/error.c
+++ b/error.c
@@ -135,6 +135,13 @@ static void do_error(int code, va_list ap) {
 	sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp);
 	flags = FILEPOS;
 	break;
+      case err_nosuchidxtag:
+	wsp = va_arg(ap, wchar_t *);
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp);
+	flags = 0;
+	/* FIXME: need to get a filepos to here somehow */
+	break;
     }
 
     if (flags & PREFIX)
diff --git a/index.c b/index.c
index 7c73565..3f629a0 100644
--- a/index.c
+++ b/index.c
@@ -6,167 +6,200 @@
 #include <stdlib.h>
 #include "buttress.h"
 
-#if 0
-typedef struct tag_node23 node23;
-typedef struct tag_tree23 tree23;
-typedef struct tag_elem23 elem23;
+typedef struct indextag_Tag indextag;
+typedef struct indexentry_Tag indexentry;
 
-int cmp23(elem23 *, elem23 *);
+struct index_Tag {
+    tree23 *tags;		       /* holds type `indextag' */
+    tree23 *entries;		       /* holds type `indexentry' */
+};
 
-struct tag_elem23 {
-    char *string;
+struct indextag_Tag {
+    wchar_t *name;
+    word *implicit_text;
+    word **explicit_texts;
+    int nexplicit, explicit_size;
+    int nrefs;
+    indexentry **refs;		       /* array of entries referenced by tag */
 };
 
-struct tag_tree23 {
-    node23 *root;
+struct indexentry_Tag {
+    word *text;
+    void *backend_data;		       /* private to back end */
 };
 
-struct tag_node23 {
-    node23 *parent;
-    node23 *kids[3];
-    elem23 *elems[2];
+index *make_index(void) {
+    index *ret = mknew(index);
+    ret->tags = newtree23();
+    ret->entries = newtree23();
+    return ret;
 }
 
-int cmp23(elem23 *a, elem23 *b) {
-    return strcmp(a->string, b->string);
+static indextag *make_indextag(void) {
+    indextag *ret = mknew(indextag);
+    ret->name = NULL;
+    ret->implicit_text = NULL;
+    ret->explicit_texts = NULL;
+    ret->nexplicit = ret->explicit_size = ret->nrefs = 0;
+    ret->refs = NULL;
+    return ret;
 }
 
-void add23(tree23 *t, elem23 *e) {
-    node23 *n, *left, *right;
-    int c;
-
-    if (t->root == NULL) {
-	t->root = mknew(node23);
-	t->root->elems[1] = NULL;
-	t->root->kids[0] = t->root->kids[1] = t->root->kids[2] = NULL;
-	t->root->parent = NULL;
-	t->root->elems[0] = e;
-	return;
-    }
+static int compare_tags(void *av, void *bv) {
+    indextag *a = (indextag *)av, *b = (indextag *)bv;
+    return ustricmp(a->name, b->name);
+}
 
-    np = &t->root;
-    while (*np) {
-	n = *np;
-	c = cmp32(e, n->elems[0]);
-	if (c == 0) {
-	    printf("present already\n");
-	    return;
-	} else if (c < 0) {
-	    np = &n->kids[0];
-	} else {
-	    if (n->elems[1] == NULL || (c = cmp32(e, n->elems[1])) < 0)
-		np = &n->kids[1];
-	    else if (c > 0)
-		np = &n->kids[2];
-	    else {
-		printf("present already\n");
-		return;
-	    }
-	}
-    }
+static int compare_entries(void *av, void *bv) {
+    indexentry *a = (indexentry *)av, *b = (indexentry *)bv;
+    return compare_wordlists(a->text, b->text);    
+}
+
+/*
+ * Add a \IM. `tags' points to a zero-terminated chain of
+ * zero-terminated strings ("first\0second\0thirdandlast\0\0").
+ * `text' points to a word list.
+ *
+ * Guarantee on calling sequence: all implicit merges are given
+ * before the explicit ones.
+ */
+void index_merge(index *idx, int is_explicit, wchar_t *tags, word *text) {
+    indextag *t, *existing;
 
     /*
-     * We need to insert the new element in n at position np.
+     * FIXME: want to warn on overlapping source sets.
      */
-    left = NULL;
-    right = NULL;
-    while (n) {
-	if (n->elems[1] == NULL) {
+    for (; *tags; tags += 1+ustrlen(tags)) {
+	t = make_indextag();
+	t->name = tags;
+	existing = add23(idx->tags, t, compare_tags);
+	if (existing == t) {
 	    /*
-	     * Insert in a 2-node; simple.
+	     * Every tag has an implicit \IM. So if this tag
+	     * doesn't exist and we're explicit, then we should
+	     * warn (and drop it, since it won't be referenced).
 	     */
-	    if (np == &n->kids[0]) {
-		n->kids[2] = n->kids[1];
-		n->elems[1] = n->elems[0];
-		n->kids[1] = right;
-		n->elems[0] = e;
-		n->kids[0] = left;
-	    } else { /* np == &n->kids[1] */
-		n->kids[2] = right;
-		n->elems[1] = e;
-		n->kids[1] = left;
+	    if (is_explicit) {
+		error(err_nosuchidxtag, tags);
+		continue;
 	    }
-	    break;
-	} else {
-	    node23 *m = mknew(node23);
 	    /*
-	     * Insert in a 3-node; split into two 2-nodes and move
-	     * focus up a level.
+	     * Otherwise, this is a new tag with an implicit \IM.
 	     */
-	    if (np == &n->kids[0]) {
-		m->kids[0] = left;
-		m->elems[0] = e;
-		m->kids[1] = right;
-		e = n->elems[0];
-		n->kids[0] = n->kids[1];
-		n->elems[1] = n->elems[0];
-		n->kids[1] = n->kids[2];
-	    } else if (np == &n->kids[1]) {
-		m->kids[0] = n->kids[0];
-		m->elems[0] = n->elems[0];
-		m->kids[1] = left;
-		/* e = e; */
-		n->kids[0] = right;
-		n->elems[0] = n->elems[1];
-		n->kids[1] = n->kids[2];
-	    } else { /* np == &n->kids[2] */
-		m->kids[0] = n->kids[0];
-		m->elems[0] = n->elems[0];
-		m->kids[1] = n->kids[1];
-		e = n->elems[1];
-		n->kids[0] = left;
-		n->elems[0] = e;
-		n->kids[1] = right;
+	    t->name = ustrdup(tags);
+	    t->implicit_text = text;
+	} else {
+	    sfree(t);
+	    t = existing;
+	    if (!is_explicit) {
+ 		/*
+		 * An implicit \IM for a tag that's had an implicit
+		 * \IM before. FIXME: we should check the text
+		 * against the existing text and warn on
+		 * differences. And check the tag for case match
+		 * against the existing tag, likewise.
+		 */
+	    } else {
+		/*
+		 * An explicit \IM added to a valid tag. In
+		 * particular, this removes the implicit \IM if
+		 * present.
+		 */
+		if (t->implicit_text) {
+		    free_word_list(t->implicit_text);
+		    t->implicit_text = NULL;
+		}
+		if (t->nexplicit >= t->explicit_size) {
+		    t->explicit_size = t->nexplicit + 8;
+		    t->explicit_texts = resize(t->explicit_texts,
+					       t->explicit_size);
+		}
+		t->explicit_texts[t->nexplicit++] = text;
 	    }
-	    m->kids[2] = n->kids[2] = NULL;
-	    m->elems[1] = n->elems[1] = NULL;
-	    left = m;
-	    right = n;
 	}
-	n = n->parent;
     }
+}
 
-    /*
-     * If we've come out of here by `break', n will still be
-     * non-NULL and we've finished. If we've come here because n is
-     * NULL, we need to create a new root for the tree because the
-     * old one has just split into two.
-     */
-    if (!n) {
-	t->root = mknew(node23);
-	t->root->kids[0] = left;
-	t->root->elems[0] = e;
-	t->root->kids[1] = right;
-	t->root->elems[1] = NULL;
-	t->root->kids[2] = NULL;
+/*
+ * Build the final-form index. We now have every tag, with every
+ * \IM, set up in a 2-3 tree indexed by tag. We now want to collate
+ * the RHSes of the \IMs, and sort by final form, and decorate the
+ * entries in the original 2-3 tree with pointers to the RHS
+ * entries.
+ */
+void build_index(index *i) {
+    indextag *t, **ta;
+    enum23 e;
+    int j;
+
+    for (t = (indextag *)first23(i->tags, &e); t;
+	 t = (indextag *)next23(i->tags, &e)) {
+	if (t->implicit_text) {
+	    t->nrefs = 1;
+	    ta = &t->implicit_text;
+	} else {
+	    t->nrefs = t->nexplicit;
+	    ta = t->explicit_texts;
+	}
+	t->refs = mknewa(indexentry *, t->nrefs);
+	for (j = 0; j < t->nrefs; j++) {
+	    indexentry *ent = mknew(indexentry);
+	    ent->text = *ta++;
+	    t->refs[j] = add23(i->entries, ent, compare_entries);
+	    if (t->refs[j] != ent)     /* duplicate */
+		sfree(ent);
+	}
+    }
+}
+
+void cleanup_index(index *i) {
+    indextag *t;
+    indexentry *ent;
+    enum23 e;
+    int j;
+
+    for (t = (indextag *)first23(i->tags, &e); t;
+	 t = (indextag *)next23(i->tags, &e)) {
+	sfree(t->name);
+	free_word_list(t->implicit_text);
+	sfree(t->explicit_texts);
+	sfree(t->refs);
+	sfree(t);
     }
+    freetree23(i->tags);
+    for (ent = (indexentry *)first23(i->entries, &e); ent;
+	 ent = (indexentry *)next23(i->entries, &e)) {
+	sfree(ent);
+    }
+    freetree23(i->entries);
+    sfree(i);
 }
 
-void find23(tree23 *t, elem23 *e) {
+void index_debug(index *i) {
+    indextag *t;
+    enum23 e;
+    int j;
+
+    for (t = (indextag *)first23(i->tags, &e); t;
+	 t = (indextag *)next23(i->tags, &e)) {
+	if (t->implicit_text)
+	    dbg_prtmerge(0, t->name, t->implicit_text);
+	for (j = 0; j < t->nexplicit; j++)
+	    dbg_prtmerge(1, t->name, t->explicit_texts[j]);
+    }
 }
-#endif
+#define DEBUG
 
+#ifdef DEBUG
 static void dbg_prtwordlist(int level, word *w);
 
-void index_merge(int is_explicit, wchar_t *tags, word *text) {
-    wchar_t *wp;
-    printf("\\IM: %splicit: ", is_explicit ? "ex" : "im");
-    if (tags) {
-	wp = tags;
-	while (*wp) {
-	    putchar('\"');
-	    for (; *wp; wp++)
-		putchar(*wp);
-	    putchar('\"');
-	    if (*++wp)
-		printf(", ");
-	}
-    } else
-	printf("(no keyword)");
-    printf(" {\n");
+static void dbg_prtmerge(int is_explicit, wchar_t *tag, word *text) {
+    printf("\\IM: %splicit: \"", is_explicit ? "ex" : "im");
+    for (; *tag; tag++)
+	putchar(*tag);
+    printf("\" {\n");
     dbg_prtwordlist(1, text);
     printf("}\n");
-    free_word_list(text);
 }
 
 static void dbg_prtwordlist(int level, word *w) {
@@ -188,3 +221,4 @@ static void dbg_prtwordlist(int level, word *w) {
 	printf("\n");
     }
 }
+#endif
diff --git a/input.c b/input.c
index 3749ffa..c5d0b5c 100644
--- a/input.c
+++ b/input.c
@@ -442,7 +442,7 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
 /*
  * Reads a single file (ie until get() returns EOF)
  */
-static void read_file(paragraph ***ret, input *in) {
+static void read_file(paragraph ***ret, input *in, index *idx) {
     token t;
     paragraph par;
     word wd, **whptr, **idximplicit;
@@ -707,7 +707,7 @@ static void read_file(paragraph ***ret, input *in) {
 			    ustrlow(indexword->text);
 			indexing = FALSE;
 			rdadd(&indexstr, L'\0');
-			index_merge(FALSE, indexstr.text, idxwordlist);
+			index_merge(idx, FALSE, indexstr.text, idxwordlist);
 			sfree(indexstr.text);
 		    }
 		    if (sitem->type & stack_hyper) {
@@ -986,7 +986,7 @@ static void read_file(paragraph ***ret, input *in) {
     dtor(t);
 }
 
-paragraph *read_input(input *in) {
+paragraph *read_input(input *in, index *idx) {
     paragraph *head = NULL;
     paragraph **hptr = &head;
 
@@ -994,7 +994,7 @@ paragraph *read_input(input *in) {
 	in->currfp = fopen(in->filenames[in->currindex], "r");
 	if (in->currfp) {
 	    setpos(in, in->filenames[in->currindex]);
-	    read_file(&hptr, in);
+	    read_file(&hptr, in, idx);
 	}
 	in->currindex++;
     }
diff --git a/keywords.c b/keywords.c
index c2d9ed6..c446761 100644
--- a/keywords.c
+++ b/keywords.c
@@ -117,7 +117,7 @@ keywordlist *get_keywords(paragraph *source) {
 	    }
 	} else {
 	    if (kl->nlooseends >= kl->looseendssize) {
-		kl->looseendssize += 32;
+		kl->looseendssize = kl->nlooseends + 32;
 		kl->looseends = resize(kl->looseends, kl->looseendssize);
 	    }
 	    kl->looseends[kl->nlooseends++] = source->kwtext;
diff --git a/main.c b/main.c
index 0320526..1fef5e4 100644
--- a/main.c
+++ b/main.c
@@ -166,6 +166,7 @@ int main(int argc, char **argv) {
     {
 	input in;
 	paragraph *sourceform, *p;
+	index *idx;
 	keywordlist *keywords;
 
 	in.filenames = infiles;
@@ -175,7 +176,9 @@ int main(int argc, char **argv) {
 	in.npushback = 0;
 	in.reportcols = reportcols;
 
-	sourceform = read_input(&in);
+	idx = make_index();
+
+	sourceform = read_input(&in, idx);
 	if (!sourceform)
 	    exit(EXIT_FAILURE);
 
@@ -185,18 +188,19 @@ int main(int argc, char **argv) {
 	gen_citations(sourceform, keywords);
 	subst_keywords(sourceform, keywords);
 
-	for (p = sourceform; p; p = p->next) {
-	    if (p->type == para_IM) {
-		index_merge(TRUE, p->keyword, p->words);
-		p->words = NULL;       /* this has now been freed */
-	    }
-	}
+	for (p = sourceform; p; p = p->next)
+	    if (p->type == para_IM)
+		index_merge(idx, TRUE, p->keyword, p->words);
+
+	build_index(idx);
+	index_debug(idx);
 
 	dbg_prtkws(keywords);
 	dbg_prtsource(sourceform);
 
 	free_para_list(sourceform);
 	free_keywords(keywords);
+	cleanup_index(idx);
     }
 
     return 0;
diff --git a/malloc.c b/malloc.c
index 1a2be7a..0f42bde 100644
--- a/malloc.c
+++ b/malloc.c
@@ -9,6 +9,8 @@
 #ifdef LOGALLOC
 #define LOGPARAMS char *file, int line,
 static FILE *logallocfp = NULL;
+static int logline = 2;		       /* off by 1: `null pointer is' */
+static void loginc(void) { }
 static void logallocinit(void) {
     if (!logallocfp) {
 	logallocfp = fopen("malloc.log", "w");
@@ -27,9 +29,11 @@ static void logprintf(char *fmt, ...) {
     va_end(ap);
 }
 #define LOGPRINT(x) ( logallocinit(), logprintf x )
+#define LOGINC do { loginc(); logline++; } while (0)
 #else
 #define LOGPARAMS
 #define LOGPRINT(x)
+#define LOGINC ((void)0)
 #endif
 
 /*
@@ -37,11 +41,14 @@ static void logprintf(char *fmt, ...) {
  * can do nothing except die when it's out of memory anyway
  */
 void *(smalloc)(LOGPARAMS int size) {
-    void *p = malloc(size);
+    void *p;
+    LOGINC;
+    LOGPRINT(("%s %d malloc(%ld)",
+	      file, line, (long)size));
+    p = malloc(size);
     if (!p)
 	fatal(err_nomemory);
-    LOGPRINT(("%s %d malloc(%ld) returns %p\n",
-	      file, line, (long)size, p));
+    LOGPRINT((" returns %p\n", p));
     return p;
 }
 
@@ -50,9 +57,10 @@ void *(smalloc)(LOGPARAMS int size) {
  */
 void (sfree)(LOGPARAMS void *p) {
     if (p) {
-	free(p);
+	LOGINC;
 	LOGPRINT(("%s %d free(%p)\n",
 		  file, line, p));
+	free(p);
     }
 }
 
@@ -62,13 +70,17 @@ void (sfree)(LOGPARAMS void *p) {
 void *(srealloc)(LOGPARAMS void *p, int size) {
     void *q;
     if (p) {
+	LOGINC;
+	LOGPRINT(("%s %d realloc(%p,%ld)",
+		  file, line, p, (long)size));
 	q = realloc(p, size);
-	LOGPRINT(("%s %d realloc(%p,%ld) returns %p\n",
-		  file, line, p, (long)size, q));
+	LOGPRINT((" returns %p\n", q));
     } else {
+	LOGINC;
+	LOGPRINT(("%s %d malloc(%ld)",
+		  file, line, (long)size));
 	q = malloc(size);
-	LOGPRINT(("%s %d malloc(%ld) returns %p\n",
-		  file, line, (long)size, q));
+	LOGPRINT((" returns %p\n", q));
     }
     if (!q)
 	fatal(err_nomemory);
diff --git a/misc.c b/misc.c
index c9ea115..3ead01c 100644
--- a/misc.c
+++ b/misc.c
@@ -40,3 +40,47 @@ void *stk_pop(stack s) {
     else
 	return NULL;
 }
+
+int compare_wordlists(word *a, word *b) {
+    int t;
+    while (a && b) {
+	if (a->type != b->type)
+	    return (a->type < b->type ? -1 : +1);   /* FIXME? */
+	t = a->type;
+	if ((t != word_Normal && t != word_Code &&
+	     t != word_WeakCode && t != word_Emph) ||
+	    a->alt || b->alt) {
+	    int c;
+	    if (a->text && b->text) {
+		c = ustricmp(a->text, b->text);
+		if (c)
+		    return c;
+	    }
+	    c = compare_wordlists(a->alt, b->alt);
+	    if (c)
+		return c;
+	    a = a->next;
+	    b = b->next;
+	} else {
+	    wchar_t *ap = a->text, *bp = b->text;
+	    while (*ap && *bp) {
+		wchar_t ac = utolower(*ap), bc = utolower(*bp);
+		if (ac != bc)
+		    return (ac < bc ? -1 : +1);
+		if (!*++ap && a->next && a->next->type == t && !a->next->alt)
+		    a = a->next, ap = a->text;
+		if (!*++bp && b->next && b->next->type == t && !b->next->alt)
+		    b = b->next, bp = b->text;
+	    }
+	    if (*ap || *bp)
+		return (*ap ? +1 : -1);
+	    a = a->next;
+	    b = b->next;
+	}
+    }
+
+    if (a || b)
+	return (a ? +1 : -1);
+    else
+	return 0;
+}
diff --git a/tree23.c b/tree23.c
new file mode 100644
index 0000000..4a0d60c
--- /dev/null
+++ b/tree23.c
@@ -0,0 +1,271 @@
+/*
+ * tree23.c: reasonably generic 2-3 tree routines. Currently only
+ * supports insert and find operations.
+ */
+
+#include <stdio.h>		       /* we only need this for NULL :-) */
+
+/*
+ * This module should be easily de-Buttressable. All it relies on
+ * from the rest of Buttress is the macro `mknew':
+ *
+ *   #define mknew(typ) ( (typ *) malloc_like_function (sizeof (typ)) )
+ *
+ * and the free-like function `sfree'.
+ */
+#include "buttress.h"
+
+typedef struct node23_Tag node23;
+/* typedef struct tree23_Tag tree23; */
+
+struct tree23_Tag {
+    node23 *root;
+};
+
+struct node23_Tag {
+    node23 *parent;
+    node23 *kids[3];
+    void *elems[2];
+};
+
+/*
+ * Create a 2-3 tree.
+ */
+tree23 *newtree23(void) {
+    tree23 *ret = mknew(tree23);
+    ret->root = NULL;
+    return ret;
+}
+
+/*
+ * Free a 2-3 tree (not including freeing the elements).
+ */
+static void freenode23(node23 *n) {
+    if (!n)
+	return;
+    freenode23(n->kids[0]);
+    freenode23(n->kids[1]);
+    freenode23(n->kids[2]);
+    sfree(n);
+}
+void freetree23(tree23 *t) {
+    freenode23(t->root);
+    sfree(t);
+}
+
+/*
+ * Add an element e to a 2-3 tree t. Returns e on success, or if an
+ * existing element compares equal, returns that.
+ */
+void *add23(tree23 *t, void *e, int (*cmp)(void *, void *)) {
+    node23 *n, **np, *left, *right;
+    void *orig_e = e;
+    int c;
+
+    if (t->root == NULL) {
+	t->root = mknew(node23);
+	t->root->elems[1] = NULL;
+	t->root->kids[0] = t->root->kids[1] = t->root->kids[2] = NULL;
+	t->root->parent = NULL;
+	t->root->elems[0] = e;
+	return orig_e;
+    }
+
+    np = &t->root;
+    while (*np) {
+	n = *np;
+	c = cmp(e, n->elems[0]);
+	if (c == 0) {
+	    /* node already exists; return existing one */
+	    return n->elems[0];
+	} else if (c < 0) {
+	    np = &n->kids[0];
+	} else {
+	    if (n->elems[1] == NULL || (c = cmp(e, n->elems[1])) < 0)
+		np = &n->kids[1];
+	    else if (c > 0)
+		np = &n->kids[2];
+	    else {
+		/* node already exists; return existing one */
+		return n->elems[1];
+	    }
+	}
+    }
+
+    /*
+     * We need to insert the new element in n at position np.
+     */
+    left = NULL;
+    right = NULL;
+    while (n) {
+	if (n->elems[1] == NULL) {
+	    /*
+	     * Insert in a 2-node; simple.
+	     */
+	    if (np == &n->kids[0]) {
+		n->kids[2] = n->kids[1];
+		n->elems[1] = n->elems[0];
+		n->kids[1] = right;
+		n->elems[0] = e;
+		n->kids[0] = left;
+	    } else { /* np == &n->kids[1] */
+		n->kids[2] = right;
+		n->elems[1] = e;
+		n->kids[1] = left;
+	    }
+	    if (n->kids[0]) n->kids[0]->parent = n;
+	    if (n->kids[1]) n->kids[1]->parent = n;
+	    if (n->kids[2]) n->kids[2]->parent = n;
+	    break;
+	} else {
+	    node23 *m = mknew(node23);
+	    m->parent = n->parent;
+	    /*
+	     * Insert in a 3-node; split into two 2-nodes and move
+	     * focus up a level.
+	     */
+	    if (np == &n->kids[0]) {
+		m->kids[0] = left;
+		m->elems[0] = e;
+		m->kids[1] = right;
+		e = n->elems[0];
+		n->kids[0] = n->kids[1];
+		n->elems[0] = n->elems[1];
+		n->kids[1] = n->kids[2];
+	    } else if (np == &n->kids[1]) {
+		m->kids[0] = n->kids[0];
+		m->elems[0] = n->elems[0];
+		m->kids[1] = left;
+		/* e = e; */
+		n->kids[0] = right;
+		n->elems[0] = n->elems[1];
+		n->kids[1] = n->kids[2];
+	    } else { /* np == &n->kids[2] */
+		m->kids[0] = n->kids[0];
+		m->elems[0] = n->elems[0];
+		m->kids[1] = n->kids[1];
+		n->kids[0] = left;
+		n->elems[0] = e;
+		n->kids[1] = right;
+		e = n->elems[1];
+	    }
+	    m->kids[2] = n->kids[2] = NULL;
+	    m->elems[1] = n->elems[1] = NULL;
+	    if (m->kids[0]) m->kids[0]->parent = m;
+	    if (m->kids[1]) m->kids[1]->parent = m;
+	    if (n->kids[0]) n->kids[0]->parent = n;
+	    if (n->kids[1]) n->kids[1]->parent = n;
+	    left = m;
+	    right = n;
+	}
+	if (n->parent)
+	    np = (n->parent->kids[0] == n ? &n->parent->kids[0] :
+		  n->parent->kids[1] == n ? &n->parent->kids[1] :
+		  &n->parent->kids[2]);
+	n = n->parent;
+    }
+
+    /*
+     * If we've come out of here by `break', n will still be
+     * non-NULL and we've finished. If we've come here because n is
+     * NULL, we need to create a new root for the tree because the
+     * old one has just split into two.
+     */
+    if (!n) {
+	t->root = mknew(node23);
+	t->root->kids[0] = left;
+	t->root->elems[0] = e;
+	t->root->kids[1] = right;
+	t->root->elems[1] = NULL;
+	t->root->kids[2] = NULL;
+	t->root->parent = NULL;
+	if (t->root->kids[0]) t->root->kids[0]->parent = t->root;
+	if (t->root->kids[1]) t->root->kids[1]->parent = t->root;
+    }
+
+    return orig_e;
+}
+
+/*
+ * Find an element e in a 2-3 tree t. Returns NULL if not found. e
+ * is always passed as the first argument to cmp, so cmp can be an
+ * asymmetric function if desired.
+ */
+void *find23(tree23 *t, void *e, int (*cmp)(void *, void *)) {
+    node23 *n;
+    int c;
+
+    if (t->root == NULL)
+	return NULL;
+
+    n = t->root;
+    while (n) {
+	c = cmp(e, n->elems[0]);
+	if (c == 0) {
+	    return n->elems[0];
+	} else if (c < 0) {
+	    n = n->kids[0];
+	} else {
+	    if (n->elems[1] == NULL || (c = cmp(e, n->elems[1])) < 0)
+		n = n->kids[1];
+	    else if (c > 0)
+		n = n->kids[2];
+	    else {
+		return n->elems[1];
+	    }
+	}
+    }
+
+    /*
+     * We've found our way to the bottom of the tree and we know
+     * where we would insert this node if we wanted to. But it
+     * isn't there.
+     */
+    return NULL;
+}
+
+/*
+ * Iterate over the elements of a tree23, in order.
+ */
+void *first23(tree23 *t, enum23 *e) {
+    node23 *n = t->root;
+    if (!n)
+	return NULL;
+    while (n->kids[0])
+	n = n->kids[0];
+    e->node = n;
+    e->posn = 0;
+    return n->elems[0];
+}
+
+void *next23(tree23 *t, enum23 *e) {
+    node23 *n = (node23 *)e->node;
+    int pos = e->posn;
+
+    if (n->kids[pos+1]) {
+	n = n->kids[pos+1];
+	while (n->kids[0])
+	    n = n->kids[0];
+	e->node = n;
+	e->posn = 0;
+	return n->elems[0];
+    }
+
+    if (pos == 0 && n->elems[1]) {
+	e->posn = 1;
+	return n->elems[1];
+    }
+
+    do {
+	node23 *nn = n->parent;
+	if (nn == NULL)
+	    return NULL;	       /* end of tree */
+	pos = (nn->kids[0] == n ? 0 :
+	       nn->kids[1] == n ? 1 : 2);
+	n = nn;
+    } while (pos == 2 || n->kids[pos+1] == NULL);
+
+    e->node = n;
+    e->posn = pos;
+    return n->elems[pos];
+}
diff --git a/ustring.c b/ustring.c
index 42c5683..17ef3bd 100644
--- a/ustring.c
+++ b/ustring.c
@@ -57,12 +57,31 @@ int ustrcmp(wchar_t *lhs, wchar_t *rhs) {
     return 0;
 }
 
+wchar_t utolower(wchar_t c) {
+    if (c == L'\0')
+	return c;		       /* this property needed by ustricmp */
+    /* FIXME: this doesn't even come close */
+    if (c >= 'A' && c <= 'Z')
+	c += 'a'-'A';
+    return c;
+}
+
+int ustricmp(wchar_t *lhs, wchar_t *rhs) {
+    wchar_t lc, rc;
+    while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)
+	lhs++, rhs++;
+    if (!lc && !rc)
+	return 0;
+    if (lc < rc)
+	return -1;
+    else
+	return 1;
+}
+
 wchar_t *ustrlow(wchar_t *s) {
     wchar_t *p = s;
     while (*p) {
-	/* FIXME: this doesn't even come close */
-	if (*p >= 'A' && *p <= 'Z')
-	    *p += 'a'-'A';
+	*p = utolower(*p);
 	p++;
     }
     return s;
author	Simon Tatham <anakin@pobox.com>	1999-10-18 18:03:37 +0000
committer	Simon Tatham <anakin@pobox.com>	1999-10-18 18:03:37 +0000
commit	e44f985bd4f796d4c4b11eb3555436dbaa2d163b (patch)
tree	8e037d5b32c5349760277e79ac53993b34035885
parent	00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4 (diff)
download	halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.zip halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.gz halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.bz2 halibut-e44f985bd4f796d4c4b11eb3555436dbaa2d163b.tar.xz