diff options
| author | Simon Tatham <anakin@pobox.com> | 1999-10-16 15:23:27 +0000 |
|---|---|---|
| committer | Simon Tatham <anakin@pobox.com> | 1999-10-16 15:23:27 +0000 |
| commit | 00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4 (patch) | |
| tree | c96a462edd1b2ebb631f90420ceb51e266114576 | |
| parent | 9972b0f0d1ce6e08ce6f9505980c9c889ae994bc (diff) | |
| download | halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.zip halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.gz halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.bz2 halibut-00f6e0ee13c753d98e8665ad1ff2e992f43ef6e4.tar.xz | |
Further development: index work, phase I
[originally from svn r237]
| -rw-r--r-- | buttress.h | 5 | ||||
| -rw-r--r-- | contents.c | 1 | ||||
| -rw-r--r-- | index.c | 182 | ||||
| -rw-r--r-- | input.c | 65 | ||||
| -rw-r--r-- | keywords.c | 17 | ||||
| -rw-r--r-- | main.c | 10 | ||||
| -rw-r--r-- | misc.c | 2 |
7 files changed, 262 insertions, 20 deletions
@@ -214,6 +214,9 @@ struct keywordlist_Tag { int nkeywords; int size; keyword **keys; + word **looseends; /* non-keyword list element numbers */ + int nlooseends; + int looseendssize; }; struct keyword_Tag { wchar_t *key; /* the keyword itself */ @@ -229,6 +232,8 @@ void subst_keywords(paragraph *, keywordlist *); /* * index.c */ +/* index_merge takes responsibility for freeing arg 3 but not arg 2 */ +void index_merge(int is_explicit, wchar_t *, word *); /* * contents.c @@ -29,6 +29,7 @@ numberstate *number_init(void) { } void number_free(numberstate *state) { + sfree(state->sectionlevels); sfree(state); } @@ -6,3 +6,185 @@ #include <stdlib.h> #include "buttress.h" +#if 0 +typedef struct tag_node23 node23; +typedef struct tag_tree23 tree23; +typedef struct tag_elem23 elem23; + +int cmp23(elem23 *, elem23 *); + +struct tag_elem23 { + char *string; +}; + +struct tag_tree23 { + node23 *root; +}; + +struct tag_node23 { + node23 *parent; + node23 *kids[3]; + elem23 *elems[2]; +} + +int cmp23(elem23 *a, elem23 *b) { + return strcmp(a->string, b->string); +} + +void add23(tree23 *t, elem23 *e) { + node23 *n, *left, *right; + int c; + + if (t->root == NULL) { + t->root = mknew(node23); + t->root->elems[1] = NULL; + t->root->kids[0] = t->root->kids[1] = t->root->kids[2] = NULL; + t->root->parent = NULL; + t->root->elems[0] = e; + return; + } + + np = &t->root; + while (*np) { + n = *np; + c = cmp32(e, n->elems[0]); + if (c == 0) { + printf("present already\n"); + return; + } else if (c < 0) { + np = &n->kids[0]; + } else { + if (n->elems[1] == NULL || (c = cmp32(e, n->elems[1])) < 0) + np = &n->kids[1]; + else if (c > 0) + np = &n->kids[2]; + else { + printf("present already\n"); + return; + } + } + } + + /* + * We need to insert the new element in n at position np. + */ + left = NULL; + right = NULL; + while (n) { + if (n->elems[1] == NULL) { + /* + * Insert in a 2-node; simple. + */ + if (np == &n->kids[0]) { + n->kids[2] = n->kids[1]; + n->elems[1] = n->elems[0]; + n->kids[1] = right; + n->elems[0] = e; + n->kids[0] = left; + } else { /* np == &n->kids[1] */ + n->kids[2] = right; + n->elems[1] = e; + n->kids[1] = left; + } + break; + } else { + node23 *m = mknew(node23); + /* + * Insert in a 3-node; split into two 2-nodes and move + * focus up a level. + */ + if (np == &n->kids[0]) { + m->kids[0] = left; + m->elems[0] = e; + m->kids[1] = right; + e = n->elems[0]; + n->kids[0] = n->kids[1]; + n->elems[1] = n->elems[0]; + n->kids[1] = n->kids[2]; + } else if (np == &n->kids[1]) { + m->kids[0] = n->kids[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = left; + /* e = e; */ + n->kids[0] = right; + n->elems[0] = n->elems[1]; + n->kids[1] = n->kids[2]; + } else { /* np == &n->kids[2] */ + m->kids[0] = n->kids[0]; + m->elems[0] = n->elems[0]; + m->kids[1] = n->kids[1]; + e = n->elems[1]; + n->kids[0] = left; + n->elems[0] = e; + n->kids[1] = right; + } + m->kids[2] = n->kids[2] = NULL; + m->elems[1] = n->elems[1] = NULL; + left = m; + right = n; + } + n = n->parent; + } + + /* + * If we've come out of here by `break', n will still be + * non-NULL and we've finished. If we've come here because n is + * NULL, we need to create a new root for the tree because the + * old one has just split into two. + */ + if (!n) { + t->root = mknew(node23); + t->root->kids[0] = left; + t->root->elems[0] = e; + t->root->kids[1] = right; + t->root->elems[1] = NULL; + t->root->kids[2] = NULL; + } +} + +void find23(tree23 *t, elem23 *e) { +} +#endif + +static void dbg_prtwordlist(int level, word *w); + +void index_merge(int is_explicit, wchar_t *tags, word *text) { + wchar_t *wp; + printf("\\IM: %splicit: ", is_explicit ? "ex" : "im"); + if (tags) { + wp = tags; + while (*wp) { + putchar('\"'); + for (; *wp; wp++) + putchar(*wp); + putchar('\"'); + if (*++wp) + printf(", "); + } + } else + printf("(no keyword)"); + printf(" {\n"); + dbg_prtwordlist(1, text); + printf("}\n"); + free_word_list(text); +} + +static void dbg_prtwordlist(int level, word *w) { + for (; w; w = w->next) { + wchar_t *wp; + printf("%*sword %d ", level*4, "", w->type); + if (w->text) { + printf("\""); + for (wp = w->text; *wp; wp++) + putchar(*wp); + printf("\""); + } else + printf("(no text)"); + if (w->alt) { + printf(" alt = {\n"); + dbg_prtwordlist(level+1, w->alt); + printf("%*s}", level*4, ""); + } + printf("\n"); + } +} @@ -410,7 +410,10 @@ token get_codepar_token(input *in) { * Adds a new word to a linked list */ static word *addword(word newword, word ***hptrptr) { - word *mnewword = mknew(word); + word *mnewword; + if (!hptrptr) + return NULL; + mnewword = mknew(word); *mnewword = newword; /* structure copy */ mnewword->next = NULL; **hptrptr = mnewword; @@ -442,7 +445,8 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) { static void read_file(paragraph ***ret, input *in) { token t; paragraph par; - word wd, **whptr; + word wd, **whptr, **idximplicit; + wchar_t utext[2], *wdtext; int style; int already; int iswhite, seenwhite; @@ -456,9 +460,11 @@ static void read_file(paragraph ***ret, input *in) { stack_hyper = 8, /* \W */ } type; word **whptr; /* to restore from \u alternatives */ + word **idximplicit; /* to restore from \u alternatives */ } *sitem; stack parsestk; - word *indexword, *uword; + word *indexword, *uword, *iword; + word *idxwordlist; rdstring indexstr; int index_downcase, index_visible, indexing; const rdstring nullrs = { 0, 0, NULL }; @@ -658,6 +664,8 @@ static void read_file(paragraph ***ret, input *in) { rdadd(&indexstr, ' '); if (!indexing || index_visible) addword(wd, &whptr); + if (indexing) + addword(wd, &idximplicit); iswhite = TRUE; break; case tok_word: @@ -670,6 +678,10 @@ static void read_file(paragraph ***ret, input *in) { wd.fpos = t.pos; addword(wd, &whptr); } + if (indexing) { + wd.text = ustrdup(t.text); + addword(wd, &idximplicit); + } break; case tok_lbrace: error(err_unexbrace, &t.pos); @@ -683,16 +695,20 @@ static void read_file(paragraph ***ret, input *in) { if (!sitem) error(err_unexbrace, &t.pos); else { - if (sitem->type & stack_ualt) + if (sitem->type & stack_ualt) { whptr = sitem->whptr; + idximplicit = sitem->idximplicit; + } if (sitem->type & stack_style) style = word_Normal; if (sitem->type & stack_idx) { indexword->text = ustrdup(indexstr.text); - sfree(indexstr.text); if (index_downcase) ustrlow(indexword->text); indexing = FALSE; + rdadd(&indexstr, L'\0'); + index_merge(FALSE, indexstr.text, idxwordlist); + sfree(indexstr.text); } if (sitem->type & stack_hyper) { wd.text = NULL; @@ -701,6 +717,8 @@ static void read_file(paragraph ***ret, input *in) { wd.fpos = t.pos; if (!indexing || index_visible) addword(wd, &whptr); + if (indexing) + addword(wd, &idximplicit); } } sfree(sitem); @@ -765,7 +783,7 @@ static void read_file(paragraph ***ret, input *in) { time_t thetime = time(NULL); struct tm *broken = localtime(&thetime); already = TRUE; - wd.text = ustrftime(NULL, broken); + wdtext = ustrftime(NULL, broken); wd.type = style; } else error(err_explbr, &t.pos); @@ -781,10 +799,10 @@ static void read_file(paragraph ***ret, input *in) { if (wd.type == word_Normal) { time_t thetime = time(NULL); struct tm *broken = localtime(&thetime); - wd.text = ustrftime(rs.text, broken); + wdtext = ustrftime(rs.text, broken); wd.type = style; } else { - wd.text = ustrdup(rs.text); + wdtext = ustrdup(rs.text); } sfree(rs.text); if (t.type != tok_rbrace) { @@ -792,10 +810,15 @@ static void read_file(paragraph ***ret, input *in) { } } wd.alt = NULL; - if (!indexing || index_visible) + if (!indexing || index_visible) { + wd.text = ustrdup(wdtext); addword(wd, &whptr); - else - sfree(wd.text); + } + if (indexing) { + wd.text = ustrdup(wdtext); + addword(wd, &idximplicit); + } + sfree(wdtext); if (wd.type == word_HyperLink) { /* * Hyperlinks are different: they then @@ -897,22 +920,28 @@ static void read_file(paragraph ***ret, input *in) { index_visible = (type != c_I); index_downcase = (type == c_ii); indexing = TRUE; + idxwordlist = NULL; + idximplicit = &idxwordlist; /* Stack item to close the indexing on exit */ stk_push(parsestk, sitem); } break; case c_u: uchr = t.aux; + utext[0] = uchr; utext[1] = 0; if (!indexing || index_visible) { - wchar_t text[2]; - text[1] = 0; - text[0] = uchr; - wd.text = ustrdup(text); + wd.text = ustrdup(utext); wd.type = style; wd.alt = NULL; wd.fpos = t.pos; uword = addword(wd, &whptr); - } + } else + uword = NULL; + if (indexing) { + wd.text = ustrdup(utext); + iword = addword(wd, &idximplicit); + } else + iword = NULL; dtor(t), t = get_token(in); if (t.type == tok_lbrace) { /* @@ -924,8 +953,10 @@ static void read_file(paragraph ***ret, input *in) { sitem = mknew(struct stack_item); sitem->type = stack_ualt; sitem->whptr = whptr; + sitem->idximplicit = idximplicit; stk_push(parsestk, sitem); - whptr = &uword->alt; + whptr = uword ? &uword->alt : NULL; + idximplicit = iword ? &iword->alt : NULL; } else { if (indexing) rdadd(&indexstr, uchr); @@ -93,6 +93,8 @@ keywordlist *get_keywords(paragraph *source) { kl->nkeywords = 0; kl->size = 0; kl->keys = NULL; + kl->nlooseends = kl->looseendssize = 0; + kl->looseends = NULL; for (; source; source = source->next) { /* * Number the chapter / section / list-item / whatever. @@ -113,6 +115,12 @@ keywordlist *get_keywords(paragraph *source) { p += ustrlen(p) + 1; } } + } else { + if (kl->nlooseends >= kl->looseendssize) { + kl->looseendssize += 32; + kl->looseends = resize(kl->looseends, kl->looseendssize); + } + kl->looseends[kl->nlooseends++] = source->kwtext; } } @@ -125,8 +133,15 @@ keywordlist *get_keywords(paragraph *source) { void free_keywords(keywordlist *kl) { int i; - for (i = 0; i < kl->nkeywords; i++) + while (kl->nlooseends) + free_word_list(kl->looseends[--kl->nlooseends]); + sfree(kl->looseends); + for (i = 0; i < kl->nkeywords; i++) { + if (kl->keys[i]) + free_word_list(kl->keys[i]->text); sfree(kl->keys[i]); + } + sfree(kl->keys); sfree(kl); } @@ -165,7 +165,7 @@ int main(int argc, char **argv) { { input in; - paragraph *sourceform; + paragraph *sourceform, *p; keywordlist *keywords; in.filenames = infiles; @@ -185,10 +185,18 @@ int main(int argc, char **argv) { gen_citations(sourceform, keywords); subst_keywords(sourceform, keywords); + for (p = sourceform; p; p = p->next) { + if (p->type == para_IM) { + index_merge(TRUE, p->keyword, p->words); + p->words = NULL; /* this has now been freed */ + } + } + dbg_prtkws(keywords); dbg_prtsource(sourceform); free_para_list(sourceform); + free_keywords(keywords); } return 0; @@ -13,7 +13,7 @@ struct stackTag { stack stk_new(void) { stack s; - s = mknew(stack); + s = mknew(struct stackTag); s->sp = 0; s->size = 0; s->data = NULL; |