12 files changed, 954 insertions, 143 deletions
diff --git a/Makefile b/Makefile
index 739c91e..518ea56 100644
--- a/Makefile
+++ b/Makefile
@@ -20,6 +20,10 @@ else
 
 CFLAGS += -Wall -W
 
+ifdef LOGALLOC
+CFLAGS += -DLOGALLOC
+endif
+
 ifdef RELEASE
 ifndef VERSION
 VERSION := $(RELEASE)
@@ -40,7 +44,7 @@ endif
 SRC := ../
 
 MODULES := main malloc ustring error help licence version misc
-MODULES += input
+MODULES += input keywords contents index style
 
 OBJECTS := $(addsuffix .o,$(MODULES))
 DEPS := $(addsuffix .d,$(MODULES))
diff --git a/buttress.h b/buttress.h
index 9dfa492..6ec6cd1 100644
--- a/buttress.h
+++ b/buttress.h
@@ -3,6 +3,7 @@
 
 #include <stdio.h>
 #include <wchar.h>
+#include <time.h>
 
 #ifdef __GNUC__
 #define NORETURN __attribute__((__noreturn__))
@@ -15,6 +16,8 @@
 #define FALSE 0
 #endif
 
+#define lenof(x) ( sizeof((x)) / sizeof(*(x)) )
+
 /*
  * Structure tags
  */
@@ -22,6 +25,10 @@ typedef struct input_Tag input;
 typedef struct filepos_Tag filepos;
 typedef struct paragraph_Tag paragraph;
 typedef struct word_Tag word;
+typedef struct keywordlist_Tag keywordlist;
+typedef struct keyword_Tag keyword;
+typedef struct userstyle_Tag userstyle;
+typedef struct numberstate_Tag numberstate;
 
 /*
  * Data structure to hold a file name and index, a line and a
@@ -35,7 +42,7 @@ struct filepos_Tag {
 /*
  * Data structure to hold all the file names etc for input
  */
-#define INPUT_PUSHBACK_MAX 16
+#define INPUT_PUSHBACK_MAX 1
 struct input_Tag {
     char **filenames;		       /* complete list of input files */
     int nfiles;			       /* how many in the list */
@@ -43,7 +50,9 @@ struct input_Tag {
     int currindex;		       /* which one is that in the list */
     int pushback[INPUT_PUSHBACK_MAX];  /* pushed-back input characters */
     int npushback;
-    filepos pos;
+    int reportcols;		       /* report column numbers in errors */
+    filepos pos[1+INPUT_PUSHBACK_MAX];
+    int posptr;
 };
 
 /*
@@ -55,6 +64,8 @@ struct paragraph_Tag {
     int type;
     wchar_t *keyword;		       /* for most special paragraphs */
     word *words;		       /* list of words in paragraph */
+    int aux;			       /* number, in a numbered paragraph */
+    word *kwtext;		       /* chapter/section indication */
 };
 enum {
     para_IM,			       /* index merge */
@@ -73,16 +84,18 @@ enum {
     para_Preamble,
     para_NoCite,
     para_Title,
-    para_VersionID
+    para_VersionID,
+    para_NotParaType		       /* placeholder value */
 };
 
 /*
  * Data structure to hold an individual word
  */
 struct word_Tag {
-    word *next;
+    word *next, *alt;
     int type;
     wchar_t *text;
+    filepos fpos;
 };
 enum {
     word_Normal,
@@ -91,8 +104,11 @@ enum {
     word_WeakCode,		       /* monospaced, normal in text */
     word_UpperXref,		       /* \K */
     word_LowerXref,		       /* \k */
+    word_XrefEnd,		       /* (invisible; no text) */
     word_IndexRef,		       /* (always an invisible one) */
-    word_WhiteSpace		       /* text is NULL or ignorable */
+    word_WhiteSpace,		       /* text is NULL or ignorable */
+    word_HyperLink,		       /* (invisible) */
+    word_HyperEnd		       /* (also invisible; no text) */
 };
 
 /*
@@ -118,15 +134,29 @@ enum {
     err_explbr,			       /* expected `{' after command */
     err_kwexprbr,		       /* expected `}' after cross-ref */
     err_missingrbrace,		       /* unclosed braces at end of para */
-    err_nestedstyles		       /* unable to nest text styles */
+    err_nestedstyles,		       /* unable to nest text styles */
+    err_nestedindex,		       /* unable to nest `\i' thingys */
+    err_nosuchkw		       /* unresolved cross-reference */
 };
 
 /*
  * malloc.c
  */
+#ifdef LOGALLOC
+void *smalloc(char *file, int line, int size);
+void *srealloc(char *file, int line, void *p, int size);
+void sfree(char *file, int line, void *p);
+#define smalloc(x) smalloc(__FILE__, __LINE__, x)
+#define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y)
+#define sfree(x) sfree(__FILE__, __LINE__, x)
+#else
 void *smalloc(int size);
 void *srealloc(void *p, int size);
 void sfree(void *p);
+#endif
+void free_word_list(word *w);
+void free_para_list(paragraph *p);
+word *dup_word_list(word *w);
 
 /*
  * ustring.c
@@ -135,6 +165,9 @@ wchar_t *ustrdup(wchar_t *s);
 char *ustrtoa(wchar_t *s, char *outbuf, int size);
 int ustrlen(wchar_t *s);
 wchar_t *ustrcpy(wchar_t *dest, wchar_t *source);
+int ustrcmp(wchar_t *lhs, wchar_t *rhs);
+wchar_t *ustrlow(wchar_t *s);
+wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec);
 
 /*
  * help.c
@@ -167,4 +200,38 @@ void *stk_pop(stack);
  */
 paragraph *read_input(input *in);
 
+/*
+ * keywords.c
+ */
+struct keywordlist_Tag {
+    int nkeywords;
+    int size;
+    keyword **keys;
+};
+struct keyword_Tag {
+    wchar_t *key;		       /* the keyword itself */
+    word *text;			       /* "Chapter 2", "Appendix Q"... */
+    				       /* (NB: filepos are not set) */
+};
+keywordlist *get_keywords(paragraph *);
+void free_keywords(keywordlist *);
+void subst_keywords(paragraph *, keywordlist *);
+
+/*
+ * index.c
+ */
+
+/*
+ * contents.c
+ */
+numberstate *number_init(void);
+word *number_mktext(numberstate *, int, int, int);
+void number_free(numberstate *);
+
+/*
+ * style.c
+ */
+struct userstyle_Tag {
+};
+
 #endif
diff --git a/contents.c b/contents.c
new file mode 100644
index 0000000..c203852
--- /dev/null
+++ b/contents.c
@@ -0,0 +1,150 @@
+/*
+ * contents.c: build a table of contents
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include <limits.h>
+#include "buttress.h"
+
+struct numberstate_Tag {
+    int chapternum;
+    int appendixnum;
+    int ischapter;
+    int *sectionlevels;
+    int maxsectlevel;
+    int listitem;
+};
+
+numberstate *number_init(void) {
+    numberstate *ret = smalloc(sizeof(numberstate));
+    ret->chapternum = 0;
+    ret->appendixnum = -1;
+    ret->ischapter = 1;
+    ret->maxsectlevel = 32;
+    ret->sectionlevels = smalloc(ret->maxsectlevel *
+				 sizeof(*ret->sectionlevels));
+    ret->listitem = -1;
+    return ret;
+}
+
+void number_free(numberstate *state) {
+    sfree(state);
+}
+
+static void dotext(word ***wret, wchar_t *text) {
+    word *mnewword = smalloc(sizeof(word));
+    mnewword->text = ustrdup(text);
+    mnewword->type = word_Normal;
+    mnewword->alt = NULL;
+    mnewword->next = NULL;
+    **wret = mnewword;
+    *wret = &mnewword->next;
+}
+
+static void dospace(word ***wret) {
+    word *mnewword = smalloc(sizeof(word));
+    mnewword->text = NULL;
+    mnewword->type = word_WhiteSpace;
+    mnewword->alt = NULL;
+    mnewword->next = NULL;
+    **wret = mnewword;
+    *wret = &mnewword->next;
+}
+
+static void donumber(word ***wret, int num) {
+    wchar_t text[20];
+    wchar_t *p = text + sizeof(text);
+    *--p = L'\0';
+    while (num != 0) {
+	assert(p > text);
+	*--p = L"0123456789"[num % 10];
+	num /= 10;
+    }
+    dotext(wret, p);
+}
+
+static void doanumber(word ***wret, int num) {
+    wchar_t text[20];
+    wchar_t *p;
+    int nletters, aton;
+    nletters = 1;
+    aton = 25;
+    while (num > aton) {
+	nletters++;
+	num -= aton+1;
+	if (aton < INT_MAX/26)
+	    aton = (aton+1) * 26 - 1;
+	else
+	    aton = INT_MAX;
+    }
+    p = text + sizeof(text);
+    *--p = L'\0';
+    while (nletters--) {
+	assert(p > text);
+	*--p = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[num % 26];
+	num /= 26;
+    }
+    dotext(wret, p);
+}
+
+word *number_mktext(numberstate *state, int para, int aux, int prev) {
+    word *ret = NULL;
+    word **pret = &ret;
+    int i, level;
+
+    switch (para) {
+      case para_Chapter:
+	state->chapternum++;
+	for (i = 0; i < state->maxsectlevel; i++)
+	    state->sectionlevels[i] = 0;
+	dotext(&pret, L"Chapter");
+	dospace(&pret);
+	donumber(&pret, state->chapternum);
+	state->ischapter = 1;
+	break;
+      case para_Heading:
+      case para_Subsect:
+	level = (para == para_Heading ? 0 : aux);
+	if (state->maxsectlevel <= level) {
+	    state->maxsectlevel = level + 32;
+	    state->sectionlevels = srealloc(state->sectionlevels,
+					    state->maxsectlevel *
+					    sizeof(*state->sectionlevels));
+	}
+	state->sectionlevels[level]++;
+	for (i = level+1; i < state->maxsectlevel; i++)
+	    state->sectionlevels[i] = 0;
+	dotext(&pret, L"Section");
+	dospace(&pret);
+	if (state->ischapter)
+	    donumber(&pret, state->chapternum);
+	else
+	    doanumber(&pret, state->appendixnum);
+	for (i = 0; i <= level; i++) {
+	    dotext(&pret, L".");
+	    if (state->sectionlevels[i] == 0)
+		state->sectionlevels[i] = 1;
+	    donumber(&pret, state->sectionlevels[i]);
+	}
+	break;
+      case para_Appendix:
+	state->appendixnum++;
+	for (i = 0; i < state->maxsectlevel; i++)
+	    state->sectionlevels[i] = 0;
+	dotext(&pret, L"Appendix");
+	dospace(&pret);
+	doanumber(&pret, state->appendixnum);
+	state->ischapter = 0;
+	break;
+      case para_NumberedList:
+	if (prev != para_NumberedList)
+	    state->listitem = 0;
+	state->listitem++;
+	donumber(&pret, state->listitem);
+	break;
+    }
+
+    return ret;
+}
diff --git a/error.c b/error.c
index 218a109..f6621f2 100644
--- a/error.c
+++ b/error.c
@@ -111,12 +111,28 @@ static void do_error(int code, va_list ap) {
 	sprintf(error, "unable to nest text styles");
 	flags = FILEPOS;
 	break;
+      case err_nestedindex:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "unable to nest index markings");
+	flags = FILEPOS;
+	break;
+      case err_nosuchkw:
+	fpos = *va_arg(ap, filepos *);
+	wsp = va_arg(ap, wchar_t *);
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sprintf(error, "unable to resolve cross-reference to `%.200s'", sp);
+	flags = FILEPOS;
+	break;
     }
 
     if (flags & PREFIX)
 	fputs("buttress: ", stderr);
-    if (flags & FILEPOS)
-	fprintf(stderr, "%s:%d: ", fpos.filename, fpos.line);
+    if (flags & FILEPOS) {
+	fprintf(stderr, "%s:%d:", fpos.filename, fpos.line);
+	if (fpos.col > 0)
+	    fprintf(stderr, "%d:", fpos.col);
+	fputc(' ', stderr);
+    }
     fputs(error, stderr);
     fputc('\n', stderr);
 }
diff --git a/index.c b/index.c
new file mode 100644
index 0000000..90aca82
--- /dev/null
+++ b/index.c
@@ -0,0 +1,8 @@
+/*
+ * index.c: create and collate index data structures
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "buttress.h"
+
diff --git a/input.c b/input.c
index e10884f..c6231a7 100644
--- a/input.c
+++ b/input.c
@@ -4,40 +4,70 @@
 
 #include <stdio.h>
 #include <assert.h>
+#include <time.h>
 #include "buttress.h"
 
 #define TAB_STOP 8		       /* for column number tracking */
 
+static void setpos(input *in, char *fname) {
+    in->pos[0].filename = fname;
+    in->pos[0].line = 1;
+    in->pos[0].col = (in->reportcols ? 1 : -1);
+    in->pos[1] = in->pos[0];
+    in->posptr = 1;
+}
+
+static filepos getpos(input *in) {
+    return in->pos[in->posptr];
+}
+
 static void unget(input *in, int c) {
     assert(in->npushback < INPUT_PUSHBACK_MAX);
     in->pushback[in->npushback++] = c;
+    in->posptr += lenof(in->pos)-1;
+    in->posptr %= lenof(in->pos);
 }
 
 /*
  * Can return EOF
  */
 static int get(input *in) {
-    if (in->npushback)
+    if (in->npushback) {
+	in->posptr++;
+	in->posptr %= lenof(in->pos);
 	return in->pushback[--in->npushback];
+    }
     else if (in->currfp) {
 	int c = getc(in->currfp);
+	filepos fp;
+
 	if (c == EOF) {
 	    fclose(in->currfp);
 	    in->currfp = NULL;
 	}
 	/* Track line numbers, for error reporting */
-	switch (c) {
-	  case '\t':
-	    in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
-	    break;
-	  case '\n':
-	    in->pos.col = 1;
-	    in->pos.line++;
-	    break;
-	  default:
-	    in->pos.col++;
-	    break;
+	fp = in->pos[in->posptr];
+	in->posptr++;
+	in->posptr %= lenof(in->pos);
+	if (in->reportcols) {
+	    switch (c) {
+	      case '\t':
+		fp.col = 1 + (fp.col + TAB_STOP-1) % TAB_STOP;
+		break;
+	      case '\n':
+		fp.col = 1;
+		fp.line++;
+		break;
+	      default:
+		fp.col++;
+		break;
+	    }
+	} else {
+	    fp.col = -1;
+	    if (c == '\n')
+		fp.line++;
 	}
+	in->pos[in->posptr] = fp;
 	/* FIXME: do input charmap translation. We should be returning
 	 * Unicode here. */
 	return c;
@@ -197,10 +227,15 @@ static void match_kw(token *tok) {
     if (tok->text[0] == 'S') {
 	/* We expect numeric characters thereafter. */
 	wchar_t *p = tok->text+1;
-	int n = 0;
-	while (*p && isdec(*p)) {
-	    n = 10 * n + fromdec(*p);
-	    p++;
+	int n;
+	if (!*p)
+	    n = 1;
+	else {
+	    n = 0;
+	    while (*p && isdec(*p)) {
+		n = 10 * n + fromdec(*p);
+		p++;
+	    }
 	}
 	if (!*p) {
 	    tok->cmd = c_S;
@@ -252,7 +287,7 @@ token get_token(input *in) {
     rdstring rs = { 0, 0, NULL };
 
     ret.text = NULL;		       /* default */
-    ret.pos = in->pos;
+    ret.pos = getpos(in);
     c = get(in);
     if (iswhite(c)) {		       /* tok_white or tok_eop */
 	nls = 0;
@@ -260,6 +295,10 @@ token get_token(input *in) {
 	    if (isnl(c))
 		nls++;
 	} while ((c = get(in)) != EOF && iswhite(c));
+	if (c == EOF) {
+	    ret.type = tok_eof;
+	    return ret;
+	}
 	unget(in, c);
 	ret.type = (nls > 1 ? tok_eop : tok_white);
 	return ret;
@@ -350,12 +389,12 @@ token get_codepar_token(input *in) {
     token ret;
     rdstring rs = { 0, 0, NULL };
 
-    ret.pos = in->pos;
+    ret.pos = getpos(in);
     ret.type = tok_word;
     c = get(in);		       /* expect (and discard) one space */
     if (c == ' ') {
+	ret.pos = getpos(in);
 	c = get(in);
-	ret.pos = in->pos;
     }
     while (!isnl(c) && c != EOF) {
 	rdadd(&rs, c);
@@ -370,26 +409,34 @@ token get_codepar_token(input *in) {
 /*
  * Adds a new word to a linked list
  */
-static void addword(word newword, word ***hptrptr) {
+static word *addword(word newword, word ***hptrptr) {
     word *mnewword = smalloc(sizeof(word));
     *mnewword = newword;	       /* structure copy */
     mnewword->next = NULL;
     **hptrptr = mnewword;
     *hptrptr = &mnewword->next;
+    return mnewword;
 }
 
 /*
  * Adds a new paragraph to a linked list
  */
-static void addpara(paragraph newpara, paragraph ***hptrptr) {
+static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
     paragraph *mnewpara = smalloc(sizeof(paragraph));
     *mnewpara = newpara;	       /* structure copy */
     mnewpara->next = NULL;
     **hptrptr = mnewpara;
     *hptrptr = &mnewpara->next;
+    return mnewpara;
 }
 
 /*
+ * Destructor before token is reassigned; should catch most memory
+ * leaks
+ */
+#define dtor(t) ( sfree(t.text) )
+
+/*
  * Reads a single file (ie until get() returns EOF)
  */
 static void read_file(paragraph ***ret, input *in) {
@@ -397,16 +444,26 @@ static void read_file(paragraph ***ret, input *in) {
     paragraph par;
     word wd, **whptr;
     int style;
+    int already;
+    int type;
     struct stack_item {
 	enum {
-	    stack_ualt,		       /* \u alternative */
-	    stack_style,	       /* \e, \c, \cw */
-	    stack_idx,		       /* \I, \i, \ii */
-	    stack_nop		       /* do nothing (for error recovery) */
+	    stack_nop = 0,	       /* do nothing (for error recovery) */
+	    stack_ualt = 1,	       /* \u alternative */
+	    stack_style = 2,	       /* \e, \c, \cw */
+	    stack_idx = 4,	       /* \I, \i, \ii */
+	    stack_hyper = 8,	       /* \W */
 	} type;
 	word **whptr;		       /* to restore from \u alternatives */
     } *sitem;
     stack parsestk;
+    word *indexword, *uword;
+    rdstring indexstr;
+    int index_downcase, index_visible, indexing;
+    const rdstring nullrs = { 0, 0, NULL };
+    wchar_t uchr;
+
+    t.text = NULL;
 
     /*
      * Loop on each paragraph.
@@ -419,7 +476,7 @@ static void read_file(paragraph ***ret, input *in) {
 	/*
 	 * Get a token.
 	 */
-	t = get_token(in);
+	dtor(t), t = get_token(in);
 	if (t.type == tok_eof)
 	    return;
 
@@ -429,16 +486,18 @@ static void read_file(paragraph ***ret, input *in) {
 	if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
 	    par.type = para_Code;
 	    while (1) {
-		t = get_codepar_token(in);
+		dtor(t), t = get_codepar_token(in);
 		wd.type = word_WeakCode;
 		wd.text = ustrdup(t.text);
+		wd.alt = NULL;
+		wd.fpos = t.pos;
 		addword(wd, &whptr);
-		t = get_token(in);
+		dtor(t), t = get_token(in);
 		if (t.type == tok_white) {
 		    /*
 		     * The newline after a code-paragraph line
 		     */
-		    t = get_token(in);
+		    dtor(t), t = get_token(in);
 		}
 		if (t.type == tok_eop || t.type == tok_eof)
 		    break;
@@ -446,11 +505,12 @@ static void read_file(paragraph ***ret, input *in) {
 		    error(err_brokencodepara, &t.pos);
 		    addpara(par, ret);
 		    while (t.type != tok_eop)   /* error recovery: */
-			t = get_token(in);   /* eat rest of paragraph */
+			dtor(t), t = get_token(in);   /* eat rest of paragraph */
 		    continue;
 		}
 	    }
 	    addpara(par, ret);
+	    continue;
 	}
 
 	/*
@@ -470,7 +530,7 @@ static void read_file(paragraph ***ret, input *in) {
 		break;
 	      case c__comment:
 		do {
-		    t = get_token(in);
+		    dtor(t), t = get_token(in);
 		} while (t.type != tok_eop && t.type != tok_eof);
 		continue;	       /* next paragraph */
 		/*
@@ -488,8 +548,8 @@ static void read_file(paragraph ***ret, input *in) {
 	      case c_C: needkw = 2; par.type = para_Chapter; break;
 	      case c_H: needkw = 2; par.type = para_Heading; break;
 	      case c_IM: needkw = 2; par.type = para_IM; break;
-		/* FIXME: multiple levels of Subsect */
-	      case c_S: needkw = 2; par.type = para_Subsect; break;
+	      case c_S: needkw = 2; par.type = para_Subsect;
+		par.aux = t.aux; break;
 	      case c_U: needkw = 0; par.type = para_UnnumberedChapter; break;
 		/* For \b and \n the keyword is optional */
 	      case c_b: needkw = 4; par.type = para_Bullet; break;
@@ -508,14 +568,14 @@ static void read_file(paragraph ***ret, input *in) {
 		filepos fp;
 
 		/* Get keywords. */
-		t = get_token(in);
+		dtor(t), t = get_token(in);
 		fp = t.pos;
 		while (t.type == tok_lbrace) {
 		    /* This is a keyword. */
 		    nkeys++;
 		    /* FIXME: there will be bugs if anyone specifies an
 		     * empty keyword (\foo{}), so trap this case. */
-		    while (t = get_token(in),
+		    while (dtor(t), t = get_token(in),
 			   t.type == tok_word || t.type == tok_white) {
 			if (t.type == tok_white)
 			    rdadd(&rs, ' ');
@@ -524,11 +584,10 @@ static void read_file(paragraph ***ret, input *in) {
 		    }
 		    if (t.type != tok_rbrace) {
 			error(err_kwunclosed, &t.pos);
-			/* FIXME: memory leak */
 			continue;
 		    }
 		    rdadd(&rs, 0);     /* add string terminator */
-		    t = get_token(in); /* eat right brace */
+		    dtor(t), t = get_token(in); /* eat right brace */
 		}
 
 		rdadd(&rs, 0);     /* add string terminator */
@@ -547,8 +606,9 @@ static void read_file(paragraph ***ret, input *in) {
 		if (needkw == 8) {
 		    if (t.type != tok_eop) {
 			error(err_bodyillegal, &t.pos);
-			while (t.type != tok_eop)   /* error recovery: */
-			    t = get_token(in);   /* eat rest of paragraph */
+			/* Error recovery: eat the rest of the paragraph */
+			while (t.type != tok_eop)
+			    dtor(t), t = get_token(in);
 		    }
 		    addpara(par, ret);
 		    continue;	       /* next paragraph */
@@ -569,43 +629,71 @@ static void read_file(paragraph ***ret, input *in) {
 	 *  \I
 	 *  \u
 	 *  \W
+	 *  \date
 	 *  \\ \{ \}
 	 */
 	parsestk = stk_new();
 	style = word_Normal;
+	indexing = FALSE;
 	while (t.type != tok_eop && t.type != tok_eof) {
+	    already = FALSE;
 	    if (t.type == tok_cmd && t.cmd == c__escaped)
 		t.type = tok_word;     /* nice and simple */
 	    switch (t.type) {
 	      case tok_white:
+		if (whptr == &par.words)
+		    break;	       /* strip whitespace at start of para */
 		wd.text = NULL;
 		wd.type = word_WhiteSpace;
-		addword(wd, &whptr);
+		wd.alt = NULL;
+		wd.fpos = t.pos;
+		if (indexing)
+		    rdadd(&indexstr, ' ');
+		if (!indexing || index_visible)
+		    addword(wd, &whptr);
 		break;
 	      case tok_word:
-		wd.text = ustrdup(t.text);
-		wd.type = style;
-		addword(wd, &whptr);
+		if (indexing)
+		    rdadds(&indexstr, t.text);
+		if (!indexing || index_visible) {
+		    wd.text = ustrdup(t.text);
+		    wd.type = style;
+		    wd.alt = NULL;
+		    wd.fpos = t.pos;
+		    addword(wd, &whptr);
+		}
 		break;
 	      case tok_lbrace:
 		error(err_unexbrace, &t.pos);
-		/* FIXME: errorrec. Push nop. */
+		/* Error recovery: push nop */
+		sitem = smalloc(sizeof(*sitem));
+		sitem->type = stack_nop;
+		stk_push(parsestk, sitem);
 		break;
 	      case tok_rbrace:
 		sitem = stk_pop(parsestk);
 		if (!sitem)
 		    error(err_unexbrace, &t.pos);
-		else switch (sitem->type) {
-		  case stack_ualt:
-		    whptr = sitem->whptr;
-		    break;
-		  case stack_style:
-		    style = word_Normal;
-		    break;
-		  case stack_idx:
-		    /* FIXME: do this bit! */
-		  case stack_nop:
-		    break;
+		else {
+		    if (sitem->type & stack_ualt)
+			whptr = sitem->whptr;
+		    if (sitem->type & stack_style)
+			style = word_Normal;
+		    if (sitem->type & stack_idx) {
+			indexword->text = ustrdup(indexstr.text);
+			sfree(indexstr.text);
+			if (index_downcase)
+			    ustrlow(indexword->text);
+			indexing = FALSE;
+		    }
+		    if (sitem->type & stack_hyper) {
+			wd.text = NULL;
+			wd.type = word_HyperEnd;
+			wd.alt = NULL;
+			wd.fpos = t.pos;
+			if (!indexing || index_visible)
+			    addword(wd, &whptr);
+		    }
 		}
 		sfree(sitem);
 		break;
@@ -613,52 +701,108 @@ static void read_file(paragraph ***ret, input *in) {
 		switch (t.cmd) {
 		  case c_K:
 		  case c_k:
+		  case c_W:
+		  case c_date:
 		    /*
-		     * Keyword. We expect a left brace, some text,
-		     * and then a right brace. No nesting; no
-		     * arguments.
+		     * Keyword, hyperlink, or \date. We expect a
+		     * left brace, some text, and then a right
+		     * brace. No nesting; no arguments.
 		     */
+		    wd.fpos = t.pos;
 		    if (t.cmd == c_K)
 			wd.type = word_UpperXref;
-		    else
+		    else if (t.cmd == c_k)
 			wd.type = word_LowerXref;
-		    t = get_token(in);
+		    else if (t.cmd == c_W)
+			wd.type = word_HyperLink;
+		    else
+			wd.type = word_Normal;
+		    dtor(t), t = get_token(in);
 		    if (t.type != tok_lbrace) {
-			error(err_explbr, &t.pos);
-		    }
-		    {
+			if (wd.type == word_Normal) {
+			    time_t thetime = time(NULL);
+			    struct tm *broken = localtime(&thetime);
+			    already = TRUE;
+			    wd.text = ustrftime(NULL, broken);
+			    wd.type = style;
+			} else
+			    error(err_explbr, &t.pos);
+		    } else {
 			rdstring rs = { 0, 0, NULL };
-			while (t = get_token(in),
+			while (dtor(t), t = get_token(in),
 			       t.type == tok_word || t.type == tok_white) {
 			    if (t.type == tok_white)
 				rdadd(&rs, ' ');
 			    else
 				rdadds(&rs, t.text);
 			}
-			wd.text = ustrdup(rs.text);
+			if (wd.type == word_Normal) {
+			    time_t thetime = time(NULL);
+			    struct tm *broken = localtime(&thetime);
+			    wd.text = ustrftime(rs.text, broken);
+			    wd.type = style;
+			} else {
+			    wd.text = ustrdup(rs.text);
+			}
+			sfree(rs.text);
+			if (t.type != tok_rbrace) {
+			    error(err_kwexprbr, &t.pos);
+			}
 		    }
-		    if (t.type != tok_rbrace) {
-			error(err_kwexprbr, &t.pos);
+		    wd.alt = NULL;
+		    if (!indexing || index_visible)
+			addword(wd, &whptr);
+		    else
+			sfree(wd.text);
+		    if (wd.type == word_HyperLink) {
+			/*
+			 * Hyperlinks are different: they then
+			 * expect another left brace, to begin
+			 * delimiting the text marked by the link.
+			 */
+			dtor(t), t = get_token(in);
+			/*
+			 * Special cases: \W{}\c, \W{}\e, \W{}\cw
+			 */
+			if (t.type == tok_cmd &&
+			    (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
+			    if (style != word_Normal)
+				error(err_nestedstyles, &t.pos);
+			    else {
+				style = (t.cmd == c_c ? word_Code :
+					 t.cmd == c_cw ? word_WeakCode :
+					 word_Emph);
+				sitem->type |= stack_style;
+			    }
+			    dtor(t), t = get_token(in);
+			}
+			if (t.type != tok_lbrace) {
+			    error(err_explbr, &t.pos);
+			} else {
+			    sitem = smalloc(sizeof(*sitem));
+			    sitem->type = stack_hyper;
+			    stk_push(parsestk, sitem);
+			}
 		    }
-		    addword(wd, &whptr);
 		    break;
 		  case c_c:
 		  case c_cw:
 		  case c_e:
+		    type = t.cmd;
 		    if (style != word_Normal) {
 			error(err_nestedstyles, &t.pos);
 			/* Error recovery: eat lbrace, push nop. */
-			t = get_token(in);
+			dtor(t), t = get_token(in);
 			sitem = smalloc(sizeof(*sitem));
 			sitem->type = stack_nop;
 			stk_push(parsestk, sitem);
 		    }
-		    t = get_token(in);
+		    dtor(t), t = get_token(in);
 		    if (t.type != tok_lbrace) {
 			error(err_explbr, &t.pos);
 		    } else {
-			style = (t.cmd == c_c ? word_Code :
-				 t.cmd == c_cw ? word_WeakCode :
+			style = (type == c_c ? word_Code :
+				 type == c_cw ? word_WeakCode :
 				 word_Emph);
 			sitem = smalloc(sizeof(*sitem));
 			sitem->type = stack_style;
@@ -668,36 +812,91 @@ static void read_file(paragraph ***ret, input *in) {
 		  case c_i:
 		  case c_ii:
 		  case c_I:
-		    if (style != word_Normal) {
-			error(err_nestedstyles, &t.pos);
+		    type = t.cmd;
+		    if (indexing) {
+			error(err_nestedindex, &t.pos);
 			/* Error recovery: eat lbrace, push nop. */
-			t = get_token(in);
+			dtor(t), t = get_token(in);
 			sitem = smalloc(sizeof(*sitem));
 			sitem->type = stack_nop;
 			stk_push(parsestk, sitem);
 		    }
-		    t = get_token(in);
+		    sitem = smalloc(sizeof(*sitem));
+		    sitem->type = stack_idx;
+		    dtor(t), t = get_token(in);
+		    /*
+		     * Special cases: \i\c, \i\e, \i\cw
+		     */
+		    wd.fpos = t.pos;
+		    if (t.type == tok_cmd &&
+			(t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
+			if (style != word_Normal)
+			    error(err_nestedstyles, &t.pos);
+			else {
+			    style = (t.cmd == c_c ? word_Code :
+				     t.cmd == c_cw ? word_WeakCode :
+				     word_Emph);
+			    sitem->type |= stack_style;
+			}
+			dtor(t), t = get_token(in);
+		    }
 		    if (t.type != tok_lbrace) {
+			sfree(sitem);
 			error(err_explbr, &t.pos);
 		    } else {
+			/* Add an index-reference word with no text as yet */
+			wd.type = word_IndexRef;
+			wd.text = NULL;
+			wd.alt = NULL;
+			indexword = addword(wd, &whptr);
+			/* Set up a rdstring to read the index text */
+			indexstr = nullrs;
+			/* Flags so that we do the Right Things with text */
+			index_visible = (type != c_I);
+			index_downcase = (type == c_ii);
+			indexing = TRUE;
+			/* Stack item to close the indexing on exit */
+			stk_push(parsestk, sitem);
+		    }
+		    break;
+		  case c_u:
+		    uchr = t.aux;
+		    if (!indexing || index_visible) {
+			wchar_t text[2];
+			text[1] = 0;
+			text[0] = uchr;
+			wd.text = ustrdup(text);
+			wd.type = style;
+			wd.alt = NULL;
+			wd.fpos = t.pos;
+			uword = addword(wd, &whptr);
+		    }
+		    dtor(t), t = get_token(in);
+		    if (t.type == tok_lbrace) {
 			/*
-			 * FIXME: do something useful
-			 * Add an index-ref word and keep a pointer to it
-			 * Set a flag so that other addwords also update it
+			 * \u with a left brace. Until the brace
+			 * closes, all further words go on a
+			 * sidetrack from the main thread of the
+			 * paragraph.
 			 */
 			sitem = smalloc(sizeof(*sitem));
-			sitem->type = stack_idx;
+			sitem->type = stack_ualt;
+			sitem->whptr = whptr;
 			stk_push(parsestk, sitem);
+			whptr = &uword->alt;
+		    } else {
+			if (indexing)
+			    rdadd(&indexstr, uchr);
+			already = TRUE;
 		    }
 		    break;
-		  case c_u:
-		  case c_W:
 		  default:
 		    error(err_badmidcmd, t.text, &t.pos);
 		    break;
 		}
 	    }
-	    t = get_token(in);
+	    if (!already)
+		dtor(t), t = get_token(in);
 	}
 	/* Check the stack is empty */
 	if (NULL != (sitem = stk_pop(parsestk))) {
@@ -710,6 +909,7 @@ static void read_file(paragraph ***ret, input *in) {
 	stk_free(parsestk);
 	addpara(par, ret);
     }
+    dtor(t);
 }
 
 paragraph *read_input(input *in) {
@@ -719,9 +919,7 @@ paragraph *read_input(input *in) {
     while (in->currindex < in->nfiles) {
 	in->currfp = fopen(in->filenames[in->currindex], "r");
 	if (in->currfp) {
-	    in->pos.filename = in->filenames[in->currindex];
-	    in->pos.line = 1;
-	    in->pos.col = 1;
+	    setpos(in, in->filenames[in->currindex]);
 	    read_file(&hptr, in);
 	}
 	in->currindex++;
diff --git a/inputs/test.but b/inputs/test.but
index fbc1441..d5ce52d 100644
--- a/inputs/test.but
+++ b/inputs/test.but
@@ -6,7 +6,7 @@ feature that Buttress's input format supports. Creation date
 
 \copyright Copyright 1999 Simon Tatham. All rights reserved.
 
-\versionid $Id: test.but,v 1.3 1999/07/31 18:44:53 simon Exp $
+\versionid $Id: test.but,v 1.4 1999/08/09 10:02:07 simon Exp $
 
 \C{chap} First chapter title
 
@@ -15,8 +15,8 @@ has line breaks in   between words, multiple
   spaces (ignored), and \e{emphasised text} as well as \c{code
 fragments}.
 
-\cw{This} is weak code. And \k{sect} contains some other stuff.
-\K{head} does too.
+\cw{This} is weak code. And \k{head} contains some other stuff.
+\K{subhead} does too.
 
 \H{head} First section title
 
@@ -53,9 +53,9 @@ characters, to be precise. And their code equivalents, \c{\\},
 
 \S{subhead} First subheading
 
-So here's a \I{subheading}{subsection}subsection. Just incidentally,
-`this' is in quotes. \ii{Those} quotes had better work in all
-formats.
+So here's a \I{subheading}\I{subsection}subsection. Just
+incidentally, `this' is in quotes. \ii{Those} quotes had better work
+in all formats.
 
 We'll try for some Unicode here: \i{Schr\u00F6{oe}dinger}.
 
@@ -63,7 +63,7 @@ An index tag containing non-alternatived Unicode: \i{\u00BFChe?}
 
 An invisible index tag: \I{she seems to have an invisible tag}yeah.
 
-\S1{subsub} Smaller heading still
+\S2{sub-sub} Smaller heading still
 
 A tiny section. Awww. How cute.
 
diff --git a/keywords.c b/keywords.c
new file mode 100644
index 0000000..ff213c1
--- /dev/null
+++ b/keywords.c
@@ -0,0 +1,168 @@
+/*
+ * keywords.c: keep track of all cross-reference keywords
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <assert.h>
+#include "buttress.h"
+
+#define heapparent(x) (((x)+1)/2-1)
+#define heaplchild(x) (2*(x)+1)
+#define heaprchild(x) (2*(x)+2)
+
+#define key(x) ( kl->keys[(x)] )
+
+#define greater(x,y) ( ustrcmp(key(x)->key, key(y)->key) > 0 )
+#define swap(x,y) do { keyword *t=key(x); key(x)=key(y); key(y)=t; } while(0)
+
+static void heap_add(keywordlist *kl, keyword *key) {
+    int p;
+    if (kl->nkeywords >= kl->size) {
+	kl->size = kl->nkeywords + 128;
+	kl->keys = srealloc(kl->keys, sizeof(*kl->keys) * kl->size);
+    }
+    p = kl->nkeywords++;
+    kl->keys[p] = key;
+    while (heapparent(p) >= 0 && greater(p, heapparent(p))) {
+	swap(p, heapparent(p));
+	p = heapparent(p);
+    }
+}
+
+static void heap_sort(keywordlist *kl) {
+    int i, j;
+
+    kl->size = kl->nkeywords;
+    kl->keys = srealloc(kl->keys, sizeof(*kl->keys) * kl->size);
+
+    i = kl->nkeywords;
+    while (i > 1) {
+	i--;
+	swap(0, i);		       /* put greatest at end */
+	j = 0;
+	while (1) {
+	    int left = heaplchild(j), right = heaprchild(j);
+	    if (left >= i || !greater(left, j))
+		left = -1;
+	    if (right >= i || !greater(right, j))
+		right = -1;
+	    if (left >= 0 && right >= 0) {
+		if (greater(left, right))
+		    right = -1;
+		else
+		    left = -1;
+	    }
+	    if (left >= 0) { swap(j, left); j = left; }
+	    else if (right >= 0) { swap(j, right); j = right; }
+	    else break;
+	}
+    }
+    /* FIXME: check for duplicate keys; do what about them? */
+}
+
+static keyword *kw_lookup(keywordlist *kl, wchar_t *str) {
+    int i, j, k, cmp;
+
+    i = -1;
+    j = kl->nkeywords;
+    while (j-i > 1) {
+	k = (i+j)/2;
+	cmp = ustrcmp(str, kl->keys[k]->key);
+	if (cmp < 0)
+	    j = k;
+	else if (cmp > 0)
+	    i = k;
+	else
+	    return kl->keys[k];
+    }
+    return NULL;
+}
+
+/*
+ * This function reads through source form and collects the
+ * keywords. They get collected in a heap, sorted by Unicode
+ * collation, last at the top (so that we can Heapsort them when we
+ * finish).
+ */
+keywordlist *get_keywords(paragraph *source) {
+    keywordlist *kl = smalloc(sizeof(*kl));
+    numberstate *n = number_init();
+    int prevpara = para_NotParaType;
+
+    kl->nkeywords = 0;
+    kl->size = 0;
+    kl->keys = NULL;
+    for (; source; source = source->next) {
+	/*
+	 * Number the chapter / section / list-item / whatever.
+	 */
+	source->kwtext = number_mktext(n, source->type, source->aux,
+				       prevpara);
+	prevpara = source->type;
+
+	if (source->keyword && *source->keyword) {
+	    if (source->kwtext) {
+		wchar_t *p = source->keyword;
+		while (*p) {
+		    keyword *kw = smalloc(sizeof(*kw));
+		    kw->key = p;
+		    kw->text = source->kwtext;
+		    heap_add(kl, kw);
+		    p += ustrlen(p) + 1;
+		}
+	    }
+	}
+    }
+
+    number_free(n);
+
+    heap_sort(kl);
+
+    return kl;
+}
+
+void free_keywords(keywordlist *kl) {
+    int i;
+    for (i = 0; i < kl->nkeywords; i++)
+	sfree(kl->keys[i]);
+    sfree(kl);
+}
+
+void subst_keywords(paragraph *source, keywordlist *kl) {
+    for (; source; source = source->next) {
+	word *ptr;
+	for (ptr = source->words; ptr; ptr = ptr->next) {
+	    if (ptr->type == word_UpperXref ||
+		ptr->type == word_LowerXref) {
+		keyword *kw;
+		word **endptr, *close, *subst;
+
+		kw = kw_lookup(kl, ptr->text);
+		if (!kw) {
+		    error(err_nosuchkw, &ptr->fpos, ptr->text);
+		    subst = NULL;
+		} else
+		    subst = dup_word_list(kw->text);
+
+		if (subst && ptr->type == word_LowerXref)
+		    ustrlow(subst->text);
+
+		close = smalloc(sizeof(word));
+		close->text = NULL;
+		close->alt = NULL;
+		close->type = word_XrefEnd;
+		close->fpos = ptr->fpos;
+
+		close->next = ptr->next;
+		ptr->next = subst;
+
+		for (endptr = &ptr->next; *endptr; endptr = &(*endptr)->next)
+		    (*endptr)->fpos = ptr->fpos;
+
+		*endptr = close;
+		ptr = close;
+	    }
+	}
+    }
+}
diff --git a/main.c b/main.c
index a0d284a..6850212 100644
--- a/main.c
+++ b/main.c
@@ -6,12 +6,17 @@
 #include <stdlib.h>
 #include "buttress.h"
 
+static void dbg_prtsource(paragraph *sourceform);
+static void dbg_prtwordlist(int level, word *w);
+static void dbg_prtkws(keywordlist *kws);
+
 int main(int argc, char **argv) {
     char **infiles;
     char *outfile;
     int nfiles;
     int nogo;
     int errs;
+    int reportcols;
 
     /*
      * Set up initial (default) parameters.
@@ -20,6 +25,7 @@ int main(int argc, char **argv) {
     outfile = NULL;
     nfiles = 0;
     nogo = errs = FALSE;
+    reportcols = 0;
 
     if (argc == 1) {
 	usage();
@@ -67,6 +73,8 @@ int main(int argc, char **argv) {
 				errs = TRUE, error(err_optnoarg, opt);
 			    else
 				outfile = val;
+			} else if (!strcmp(opt, "-precise")) {
+			    reportcols = 1;
 			} else {
 			    errs = TRUE, error(err_nosuchopt, opt);
 			}
@@ -76,6 +84,7 @@ int main(int argc, char **argv) {
 		  case 'h':
 		  case 'V':
 		  case 'L':
+		  case 'P':
 		    /*
 		     * Option requiring no parameter.
 		     */
@@ -92,6 +101,9 @@ int main(int argc, char **argv) {
 			licence();
 			nogo = TRUE;
 			break;
+		      case 'P':
+			reportcols = 1;
+			break;
 		    }
 		    break;
 		  case 'o':
@@ -154,12 +166,14 @@ int main(int argc, char **argv) {
     {
 	input in;
 	paragraph *sourceform;
+	keywordlist *keywords;
 
 	in.filenames = infiles;
 	in.nfiles = nfiles;
 	in.currfp = NULL;
 	in.currindex = 0;
 	in.npushback = 0;
+	in.reportcols = reportcols;
 
 	sourceform = read_input(&in);
 	if (!sourceform)
@@ -167,43 +181,86 @@ int main(int argc, char **argv) {
 
 	sfree(infiles);
 
-	/*
-	 * FIXME: having read it, do something with it!
-	 */
-	{
-	    paragraph *p;
-	    word *w;
-	    for (p = sourceform; p; p = p->next) {
-		wchar_t *wp;
-		printf("para %d ", p->type);
-		if (p->keyword) {
-		    wp = p->keyword;
-		    while (*wp) {
-			putchar('\"');
-			for (; *wp; wp++)
-			    putchar(*wp);
-			putchar('\"');
-			if (*++wp)
-			    printf(", ");
-		    }
-		} else
-		    printf("(no keyword)");
-		printf(" {\n");
-		for (w = p->words; w; w = w->next) {
-		    printf("    word %d ", w->type);
-		    if (w->text) {
-			printf("\"");
-			for (wp = w->text; *wp; wp++)
-			    putchar(*wp);
-			printf("\"");
-		    } else
-			printf("(no text)");
-		    printf("\n");
-		}
-		printf("}\n");
+	keywords = get_keywords(sourceform);
+	dbg_prtkws(keywords);
+
+	subst_keywords(sourceform, keywords);
+	dbg_prtsource(sourceform);
+
+	free_para_list(sourceform);
+    }
+
+    return 0;
+}
+
+static void dbg_prtsource(paragraph *sourceform) {
+    /*
+     * Output source form in debugging format.
+     */
+
+    paragraph *p;
+    for (p = sourceform; p; p = p->next) {
+	wchar_t *wp;
+	printf("para %d ", p->type);
+	if (p->keyword) {
+	    wp = p->keyword;
+	    while (*wp) {
+		putchar('\"');
+		for (; *wp; wp++)
+		    putchar(*wp);
+		putchar('\"');
+		if (*++wp)
+		    printf(", ");
 	    }
+	} else
+	    printf("(no keyword)");
+	printf(" {\n");
+	dbg_prtwordlist(1, p->words);
+	printf("}\n");
+    }
+}
+
+static void dbg_prtkws(keywordlist *kws) {
+    /*
+     * Output keywords in debugging format.
+     */
+
+    int i;
+
+    for (i = 0; i < kws->nkeywords; i++) {
+	wchar_t *wp;
+	printf("keyword ");
+	wp = kws->keys[i]->key;
+	while (*wp) {
+	    putchar('\"');
+	    for (; *wp; wp++)
+		putchar(*wp);
+	    putchar('\"');
+	    if (*++wp)
+		printf(", ");
 	}
+	printf(" {\n");
+	dbg_prtwordlist(1, kws->keys[i]->text);
+	printf("}\n");
     }
+}
 
-    return 0;
+static void dbg_prtwordlist(int level, word *w) {
+    for (; w; w = w->next) {
+	wchar_t *wp;
+	printf("%*sword %d ", level*4, "", w->type);
+	if (w->text) {
+	    printf("\"");
+	    for (wp = w->text; *wp; wp++)
+		    putchar(*wp);
+	    printf("\"");
+	} else
+	    printf("(no text)");
+	if (w->alt) {
+	    printf(" alt = {\n");
+	    dbg_prtwordlist(level+1, w->alt);
+	    printf("%*s}", level*4, "");
+	}
+	printf("\n");
+    }
 }
diff --git a/malloc.c b/malloc.c
index 7d33085..aa833db 100644
--- a/malloc.c
+++ b/malloc.c
@@ -3,42 +3,101 @@
  */
 
 #include <stdlib.h>
+#include <stdarg.h>
 #include "buttress.h"
 
+#ifdef LOGALLOC
+#define LOGPARAMS char *file, int line,
+static FILE *logallocfp = NULL;
+static void logallocinit(void) {
+    if (!logallocfp) {
+	logallocfp = fopen("malloc.log", "w");
+	if (!logallocfp) {
+	    fprintf(stderr, "panic: unable to open malloc.log\n");
+	    exit(10);
+	}
+	setvbuf (logallocfp, NULL, _IOLBF, BUFSIZ);
+	fprintf(logallocfp, "null pointer is %p\n", NULL);
+    }
+}
+static void logprintf(char *fmt, ...) {
+    va_list ap;
+    va_start(ap, fmt);
+    vfprintf(logallocfp, fmt, ap);
+    va_end(ap);
+}
+#define LOGPRINT(x) ( logallocinit(), logprintf x )
+#else
+#define LOGPARAMS
+#define LOGPRINT(x)
+#endif
+
 /*
  * smalloc should guarantee to return a useful pointer - buttress
  * can do nothing except die when it's out of memory anyway
  */
-void *smalloc(int size) {
+void *(smalloc)(LOGPARAMS int size) {
     void *p = malloc(size);
     if (!p)
 	fatal(err_nomemory);
+    LOGPRINT(("%s %d malloc(%ld) returns %p\n",
+	      file, line, (long)size, p));
     return p;
 }
 
 /*
  * sfree should guaranteeably deal gracefully with freeing NULL
  */
-void sfree(void *p) {
-    if (p)
+void (sfree)(LOGPARAMS void *p) {
+    if (p) {
 	free(p);
+	LOGPRINT(("%s %d free(%p)\n",
+		  file, line, p));
+    }
 }
 
 /*
  * srealloc should guaranteeably be able to realloc NULL
  */
-void *srealloc(void *p, int size) {
+void *(srealloc)(LOGPARAMS void *p, int size) {
     void *q;
-    if (p)
+    if (p) {
 	q = realloc(p, size);
-    else
+	LOGPRINT(("%s %d realloc(%p,%ld) returns %p\n",
+		  file, line, p, (long)size, q));
+    } else {
 	q = malloc(size);
+	LOGPRINT(("%s %d malloc(%ld) returns %p\n",
+		  file, line, (long)size, q));
+    }
     if (!q)
 	fatal(err_nomemory);
     return q;
 }
 
 /*
+ * Duplicate a linked list of words
+ */
+word *dup_word_list(word *w) {
+    word *head, **eptr = &head;
+
+    while (w) {
+	word *newwd = smalloc(sizeof(word));
+	*newwd = *w;		       /* structure copy */
+	newwd->text = ustrdup(w->text);
+	if (w->alt)
+	    newwd->alt = dup_word_list(w->alt);
+	*eptr = newwd;
+	newwd->next = NULL;
+	eptr = &newwd->next;
+
+	w = w->next;
+    }
+
+    return head;
+}
+
+/*
  * Free a linked list of words
  */
 void free_word_list(word *w) {
@@ -46,6 +105,9 @@ void free_word_list(word *w) {
     while (w) {
 	t = w;
 	w = w->next;
+	sfree(t->text);
+	if (t->alt)
+	    free_word_list(t->alt);
 	sfree(t);
     }
 }
@@ -58,6 +120,8 @@ void free_para_list(paragraph *p) {
     while (p) {
 	t = p;
 	p = p->next;
+	sfree(t->keyword);
+	free_word_list(t->words);
 	sfree(t);
     }
 }
diff --git a/style.c b/style.c
new file mode 100644
index 0000000..bc16b2f
--- /dev/null
+++ b/style.c
@@ -0,0 +1,8 @@
+/*
+ * style.c: load and keep track of user style preferences
+ */
+
+#include <stdio.h>
+#include <stdlib.h>
+#include "buttress.h"
+
diff --git a/ustring.c b/ustring.c
index 00e26b6..86f013a 100644
--- a/ustring.c
+++ b/ustring.c
@@ -3,6 +3,7 @@
  */
 
 #include <wchar.h>
+#include <time.h>
 #include "buttress.h"
 
 wchar_t *ustrdup(wchar_t *s) {
@@ -45,3 +46,73 @@ wchar_t *ustrcpy(wchar_t *dest, wchar_t *source) {
     } while (*source++);
     return ret;
 }
+
+int ustrcmp(wchar_t *lhs, wchar_t *rhs) {
+    while (*lhs && *rhs && *lhs==*rhs)
+	lhs++, rhs++;
+    if (*lhs < *rhs)
+	return -1;
+    else if (*lhs > *rhs)
+	return 1;
+    return 0;
+}
+
+wchar_t *ustrlow(wchar_t *s) {
+    wchar_t *p = s;
+    while (*p) {
+	/* FIXME: this doesn't even come close */
+	if (*p >= 'A' && *p <= 'Z')
+	    *p += 'a'-'A';
+	p++;
+    }
+    return s;
+}
+
+#define USTRFTIME_DELTA 128
+wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
+    void *blk = NULL;
+    wchar_t *wblk, *wp;
+    char *fmt, *text, *p;
+    size_t size = 0;
+    size_t len;
+
+    /*
+     * strftime has the entertaining property that it returns 0
+     * _either_ on out-of-space _or_ on successful generation of
+     * the empty string. Hence we must ensure our format can never
+     * generate the empty string. Somebody throw a custard pie at
+     * whoever was responsible for that. Please?
+     */
+    if (wfmt) {
+	len = ustrlen(wfmt);
+	fmt = smalloc(2+len);
+	ustrtoa(wfmt, fmt+1, len+1);
+	fmt[0] = ' ';
+    } else
+	fmt = " %c";
+
+    while (1) {
+	size += USTRFTIME_DELTA;
+	blk = srealloc(blk, size);
+	len = strftime((char *)blk, size-1, fmt, timespec);
+	if (len > 0)
+	    break;
+    }
+
+    /* Note: +1 for the terminating 0, -1 for the initial space in fmt */
+    wblk = srealloc(blk, len * sizeof(wchar_t));
+    text = smalloc(len);
+    strftime(text, len, fmt+1, timespec);
+    /*
+     * We operate in the C locale, so this all ought to be kosher
+     * ASCII. If we ever move outside ASCII machines, we may need
+     * to make this more portable...
+     */
+    for (wp = wblk, p = text; *p; p++, wp++)
+	*wp = *p;
+    *wp = 0;
+    if (wfmt)
+	sfree(fmt);
+    sfree(text);
+    return wblk;
+}