1 files changed, 284 insertions, 86 deletions
diff --git a/input.c b/input.c
index e10884f..c6231a7 100644
--- a/input.c
+++ b/input.c
@@ -4,40 +4,70 @@
 
 #include <stdio.h>
 #include <assert.h>
+#include <time.h>
 #include "buttress.h"
 
 #define TAB_STOP 8		       /* for column number tracking */
 
+static void setpos(input *in, char *fname) {
+    in->pos[0].filename = fname;
+    in->pos[0].line = 1;
+    in->pos[0].col = (in->reportcols ? 1 : -1);
+    in->pos[1] = in->pos[0];
+    in->posptr = 1;
+}
+
+static filepos getpos(input *in) {
+    return in->pos[in->posptr];
+}
+
 static void unget(input *in, int c) {
     assert(in->npushback < INPUT_PUSHBACK_MAX);
     in->pushback[in->npushback++] = c;
+    in->posptr += lenof(in->pos)-1;
+    in->posptr %= lenof(in->pos);
 }
 
 /*
  * Can return EOF
  */
 static int get(input *in) {
-    if (in->npushback)
+    if (in->npushback) {
+	in->posptr++;
+	in->posptr %= lenof(in->pos);
 	return in->pushback[--in->npushback];
+    }
     else if (in->currfp) {
 	int c = getc(in->currfp);
+	filepos fp;
+
 	if (c == EOF) {
 	    fclose(in->currfp);
 	    in->currfp = NULL;
 	}
 	/* Track line numbers, for error reporting */
-	switch (c) {
-	  case '\t':
-	    in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
-	    break;
-	  case '\n':
-	    in->pos.col = 1;
-	    in->pos.line++;
-	    break;
-	  default:
-	    in->pos.col++;
-	    break;
+	fp = in->pos[in->posptr];
+	in->posptr++;
+	in->posptr %= lenof(in->pos);
+	if (in->reportcols) {
+	    switch (c) {
+	      case '\t':
+		fp.col = 1 + (fp.col + TAB_STOP-1) % TAB_STOP;
+		break;
+	      case '\n':
+		fp.col = 1;
+		fp.line++;
+		break;
+	      default:
+		fp.col++;
+		break;
+	    }
+	} else {
+	    fp.col = -1;
+	    if (c == '\n')
+		fp.line++;
 	}
+	in->pos[in->posptr] = fp;
 	/* FIXME: do input charmap translation. We should be returning
 	 * Unicode here. */
 	return c;
@@ -197,10 +227,15 @@ static void match_kw(token *tok) {
     if (tok->text[0] == 'S') {
 	/* We expect numeric characters thereafter. */
 	wchar_t *p = tok->text+1;
-	int n = 0;
-	while (*p && isdec(*p)) {
-	    n = 10 * n + fromdec(*p);
-	    p++;
+	int n;
+	if (!*p)
+	    n = 1;
+	else {
+	    n = 0;
+	    while (*p && isdec(*p)) {
+		n = 10 * n + fromdec(*p);
+		p++;
+	    }
 	}
 	if (!*p) {
 	    tok->cmd = c_S;
@@ -252,7 +287,7 @@ token get_token(input *in) {
     rdstring rs = { 0, 0, NULL };
 
     ret.text = NULL;		       /* default */
-    ret.pos = in->pos;
+    ret.pos = getpos(in);
     c = get(in);
     if (iswhite(c)) {		       /* tok_white or tok_eop */
 	nls = 0;
@@ -260,6 +295,10 @@ token get_token(input *in) {
 	    if (isnl(c))
 		nls++;
 	} while ((c = get(in)) != EOF && iswhite(c));
+	if (c == EOF) {
+	    ret.type = tok_eof;
+	    return ret;
+	}
 	unget(in, c);
 	ret.type = (nls > 1 ? tok_eop : tok_white);
 	return ret;
@@ -350,12 +389,12 @@ token get_codepar_token(input *in) {
     token ret;
     rdstring rs = { 0, 0, NULL };
 
-    ret.pos = in->pos;
+    ret.pos = getpos(in);
     ret.type = tok_word;
     c = get(in);		       /* expect (and discard) one space */
     if (c == ' ') {
+	ret.pos = getpos(in);
 	c = get(in);
-	ret.pos = in->pos;
     }
     while (!isnl(c) && c != EOF) {
 	rdadd(&rs, c);
@@ -370,26 +409,34 @@ token get_codepar_token(input *in) {
 /*
  * Adds a new word to a linked list
  */
-static void addword(word newword, word ***hptrptr) {
+static word *addword(word newword, word ***hptrptr) {
     word *mnewword = smalloc(sizeof(word));
     *mnewword = newword;	       /* structure copy */
     mnewword->next = NULL;
     **hptrptr = mnewword;
     *hptrptr = &mnewword->next;
+    return mnewword;
 }
 
 /*
  * Adds a new paragraph to a linked list
  */
-static void addpara(paragraph newpara, paragraph ***hptrptr) {
+static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
     paragraph *mnewpara = smalloc(sizeof(paragraph));
     *mnewpara = newpara;	       /* structure copy */
     mnewpara->next = NULL;
     **hptrptr = mnewpara;
     *hptrptr = &mnewpara->next;
+    return mnewpara;
 }
 
 /*
+ * Destructor before token is reassigned; should catch most memory
+ * leaks
+ */
+#define dtor(t) ( sfree(t.text) )
+
+/*
  * Reads a single file (ie until get() returns EOF)
  */
 static void read_file(paragraph ***ret, input *in) {
@@ -397,16 +444,26 @@ static void read_file(paragraph ***ret, input *in) {
     paragraph par;
     word wd, **whptr;
     int style;
+    int already;
+    int type;
     struct stack_item {
 	enum {
-	    stack_ualt,		       /* \u alternative */
-	    stack_style,	       /* \e, \c, \cw */
-	    stack_idx,		       /* \I, \i, \ii */
-	    stack_nop		       /* do nothing (for error recovery) */
+	    stack_nop = 0,	       /* do nothing (for error recovery) */
+	    stack_ualt = 1,	       /* \u alternative */
+	    stack_style = 2,	       /* \e, \c, \cw */
+	    stack_idx = 4,	       /* \I, \i, \ii */
+	    stack_hyper = 8,	       /* \W */
 	} type;
 	word **whptr;		       /* to restore from \u alternatives */
     } *sitem;
     stack parsestk;
+    word *indexword, *uword;
+    rdstring indexstr;
+    int index_downcase, index_visible, indexing;
+    const rdstring nullrs = { 0, 0, NULL };
+    wchar_t uchr;
+
+    t.text = NULL;
 
     /*
      * Loop on each paragraph.
@@ -419,7 +476,7 @@ static void read_file(paragraph ***ret, input *in) {
 	/*
 	 * Get a token.
 	 */
-	t = get_token(in);
+	dtor(t), t = get_token(in);
 	if (t.type == tok_eof)
 	    return;
 
@@ -429,16 +486,18 @@ static void read_file(paragraph ***ret, input *in) {
 	if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
 	    par.type = para_Code;
 	    while (1) {
-		t = get_codepar_token(in);
+		dtor(t), t = get_codepar_token(in);
 		wd.type = word_WeakCode;
 		wd.text = ustrdup(t.text);
+		wd.alt = NULL;
+		wd.fpos = t.pos;
 		addword(wd, &whptr);
-		t = get_token(in);
+		dtor(t), t = get_token(in);
 		if (t.type == tok_white) {
 		    /*
 		     * The newline after a code-paragraph line
 		     */
-		    t = get_token(in);
+		    dtor(t), t = get_token(in);
 		}
 		if (t.type == tok_eop || t.type == tok_eof)
 		    break;
@@ -446,11 +505,12 @@ static void read_file(paragraph ***ret, input *in) {
 		    error(err_brokencodepara, &t.pos);
 		    addpara(par, ret);
 		    while (t.type != tok_eop)   /* error recovery: */
-			t = get_token(in);   /* eat rest of paragraph */
+			dtor(t), t = get_token(in);   /* eat rest of paragraph */
 		    continue;
 		}
 	    }
 	    addpara(par, ret);
+	    continue;
 	}
 
 	/*
@@ -470,7 +530,7 @@ static void read_file(paragraph ***ret, input *in) {
 		break;
 	      case c__comment:
 		do {
-		    t = get_token(in);
+		    dtor(t), t = get_token(in);
 		} while (t.type != tok_eop && t.type != tok_eof);
 		continue;	       /* next paragraph */
 		/*
@@ -488,8 +548,8 @@ static void read_file(paragraph ***ret, input *in) {
 	      case c_C: needkw = 2; par.type = para_Chapter; break;
 	      case c_H: needkw = 2; par.type = para_Heading; break;
 	      case c_IM: needkw = 2; par.type = para_IM; break;
-		/* FIXME: multiple levels of Subsect */
-	      case c_S: needkw = 2; par.type = para_Subsect; break;
+	      case c_S: needkw = 2; par.type = para_Subsect;
+		par.aux = t.aux; break;
 	      case c_U: needkw = 0; par.type = para_UnnumberedChapter; break;
 		/* For \b and \n the keyword is optional */
 	      case c_b: needkw = 4; par.type = para_Bullet; break;
@@ -508,14 +568,14 @@ static void read_file(paragraph ***ret, input *in) {
 		filepos fp;
 
 		/* Get keywords. */
-		t = get_token(in);
+		dtor(t), t = get_token(in);
 		fp = t.pos;
 		while (t.type == tok_lbrace) {
 		    /* This is a keyword. */
 		    nkeys++;
 		    /* FIXME: there will be bugs if anyone specifies an
 		     * empty keyword (\foo{}), so trap this case. */
-		    while (t = get_token(in),
+		    while (dtor(t), t = get_token(in),
 			   t.type == tok_word || t.type == tok_white) {
 			if (t.type == tok_white)
 			    rdadd(&rs, ' ');
@@ -524,11 +584,10 @@ static void read_file(paragraph ***ret, input *in) {
 		    }
 		    if (t.type != tok_rbrace) {
 			error(err_kwunclosed, &t.pos);
-			/* FIXME: memory leak */
 			continue;
 		    }
 		    rdadd(&rs, 0);     /* add string terminator */
-		    t = get_token(in); /* eat right brace */
+		    dtor(t), t = get_token(in); /* eat right brace */
 		}
 
 		rdadd(&rs, 0);     /* add string terminator */
@@ -547,8 +606,9 @@ static void read_file(paragraph ***ret, input *in) {
 		if (needkw == 8) {
 		    if (t.type != tok_eop) {
 			error(err_bodyillegal, &t.pos);
-			while (t.type != tok_eop)   /* error recovery: */
-			    t = get_token(in);   /* eat rest of paragraph */
+			/* Error recovery: eat the rest of the paragraph */
+			while (t.type != tok_eop)
+			    dtor(t), t = get_token(in);
 		    }
 		    addpara(par, ret);
 		    continue;	       /* next paragraph */
@@ -569,43 +629,71 @@ static void read_file(paragraph ***ret, input *in) {
 	 *  \I
 	 *  \u
 	 *  \W
+	 *  \date
 	 *  \\ \{ \}
 	 */
 	parsestk = stk_new();
 	style = word_Normal;
+	indexing = FALSE;
 	while (t.type != tok_eop && t.type != tok_eof) {
+	    already = FALSE;
 	    if (t.type == tok_cmd && t.cmd == c__escaped)
 		t.type = tok_word;     /* nice and simple */
 	    switch (t.type) {
 	      case tok_white:
+		if (whptr == &par.words)
+		    break;	       /* strip whitespace at start of para */
 		wd.text = NULL;
 		wd.type = word_WhiteSpace;
-		addword(wd, &whptr);
+		wd.alt = NULL;
+		wd.fpos = t.pos;
+		if (indexing)
+		    rdadd(&indexstr, ' ');
+		if (!indexing || index_visible)
+		    addword(wd, &whptr);
 		break;
 	      case tok_word:
-		wd.text = ustrdup(t.text);
-		wd.type = style;
-		addword(wd, &whptr);
+		if (indexing)
+		    rdadds(&indexstr, t.text);
+		if (!indexing || index_visible) {
+		    wd.text = ustrdup(t.text);
+		    wd.type = style;
+		    wd.alt = NULL;
+		    wd.fpos = t.pos;
+		    addword(wd, &whptr);
+		}
 		break;
 	      case tok_lbrace:
 		error(err_unexbrace, &t.pos);
-		/* FIXME: errorrec. Push nop. */
+		/* Error recovery: push nop */
+		sitem = smalloc(sizeof(*sitem));
+		sitem->type = stack_nop;
+		stk_push(parsestk, sitem);
 		break;
 	      case tok_rbrace:
 		sitem = stk_pop(parsestk);
 		if (!sitem)
 		    error(err_unexbrace, &t.pos);
-		else switch (sitem->type) {
-		  case stack_ualt:
-		    whptr = sitem->whptr;
-		    break;
-		  case stack_style:
-		    style = word_Normal;
-		    break;
-		  case stack_idx:
-		    /* FIXME: do this bit! */
-		  case stack_nop:
-		    break;
+		else {
+		    if (sitem->type & stack_ualt)
+			whptr = sitem->whptr;
+		    if (sitem->type & stack_style)
+			style = word_Normal;
+		    if (sitem->type & stack_idx) {
+			indexword->text = ustrdup(indexstr.text);
+			sfree(indexstr.text);
+			if (index_downcase)
+			    ustrlow(indexword->text);
+			indexing = FALSE;
+		    }
+		    if (sitem->type & stack_hyper) {
+			wd.text = NULL;
+			wd.type = word_HyperEnd;
+			wd.alt = NULL;
+			wd.fpos = t.pos;
+			if (!indexing || index_visible)
+			    addword(wd, &whptr);
+		    }
 		}
 		sfree(sitem);
 		break;
@@ -613,52 +701,108 @@ static void read_file(paragraph ***ret, input *in) {
 		switch (t.cmd) {
 		  case c_K:
 		  case c_k:
+		  case c_W:
+		  case c_date:
 		    /*
-		     * Keyword. We expect a left brace, some text,
-		     * and then a right brace. No nesting; no
-		     * arguments.
+		     * Keyword, hyperlink, or \date. We expect a
+		     * left brace, some text, and then a right
+		     * brace. No nesting; no arguments.
 		     */
+		    wd.fpos = t.pos;
 		    if (t.cmd == c_K)
 			wd.type = word_UpperXref;
-		    else
+		    else if (t.cmd == c_k)
 			wd.type = word_LowerXref;
-		    t = get_token(in);
+		    else if (t.cmd == c_W)
+			wd.type = word_HyperLink;
+		    else
+			wd.type = word_Normal;
+		    dtor(t), t = get_token(in);
 		    if (t.type != tok_lbrace) {
-			error(err_explbr, &t.pos);
-		    }
-		    {
+			if (wd.type == word_Normal) {
+			    time_t thetime = time(NULL);
+			    struct tm *broken = localtime(&thetime);
+			    already = TRUE;
+			    wd.text = ustrftime(NULL, broken);
+			    wd.type = style;
+			} else
+			    error(err_explbr, &t.pos);
+		    } else {
 			rdstring rs = { 0, 0, NULL };
-			while (t = get_token(in),
+			while (dtor(t), t = get_token(in),
 			       t.type == tok_word || t.type == tok_white) {
 			    if (t.type == tok_white)
 				rdadd(&rs, ' ');
 			    else
 				rdadds(&rs, t.text);
 			}
-			wd.text = ustrdup(rs.text);
+			if (wd.type == word_Normal) {
+			    time_t thetime = time(NULL);
+			    struct tm *broken = localtime(&thetime);
+			    wd.text = ustrftime(rs.text, broken);
+			    wd.type = style;
+			} else {
+			    wd.text = ustrdup(rs.text);
+			}
+			sfree(rs.text);
+			if (t.type != tok_rbrace) {
+			    error(err_kwexprbr, &t.pos);
+			}
 		    }
-		    if (t.type != tok_rbrace) {
-			error(err_kwexprbr, &t.pos);
+		    wd.alt = NULL;
+		    if (!indexing || index_visible)
+			addword(wd, &whptr);
+		    else
+			sfree(wd.text);
+		    if (wd.type == word_HyperLink) {
+			/*
+			 * Hyperlinks are different: they then
+			 * expect another left brace, to begin
+			 * delimiting the text marked by the link.
+			 */
+			dtor(t), t = get_token(in);
+			/*
+			 * Special cases: \W{}\c, \W{}\e, \W{}\cw
+			 */
+			if (t.type == tok_cmd &&
+			    (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
+			    if (style != word_Normal)
+				error(err_nestedstyles, &t.pos);
+			    else {
+				style = (t.cmd == c_c ? word_Code :
+					 t.cmd == c_cw ? word_WeakCode :
+					 word_Emph);
+				sitem->type |= stack_style;
+			    }
+			    dtor(t), t = get_token(in);
+			}
+			if (t.type != tok_lbrace) {
+			    error(err_explbr, &t.pos);
+			} else {
+			    sitem = smalloc(sizeof(*sitem));
+			    sitem->type = stack_hyper;
+			    stk_push(parsestk, sitem);
+			}
 		    }
-		    addword(wd, &whptr);
 		    break;
 		  case c_c:
 		  case c_cw:
 		  case c_e:
+		    type = t.cmd;
 		    if (style != word_Normal) {
 			error(err_nestedstyles, &t.pos);
 			/* Error recovery: eat lbrace, push nop. */
-			t = get_token(in);
+			dtor(t), t = get_token(in);
 			sitem = smalloc(sizeof(*sitem));
 			sitem->type = stack_nop;
 			stk_push(parsestk, sitem);
 		    }
-		    t = get_token(in);
+		    dtor(t), t = get_token(in);
 		    if (t.type != tok_lbrace) {
 			error(err_explbr, &t.pos);
 		    } else {
-			style = (t.cmd == c_c ? word_Code :
-				 t.cmd == c_cw ? word_WeakCode :
+			style = (type == c_c ? word_Code :
+				 type == c_cw ? word_WeakCode :
 				 word_Emph);
 			sitem = smalloc(sizeof(*sitem));
 			sitem->type = stack_style;
@@ -668,36 +812,91 @@ static void read_file(paragraph ***ret, input *in) {
 		  case c_i:
 		  case c_ii:
 		  case c_I:
-		    if (style != word_Normal) {
-			error(err_nestedstyles, &t.pos);
+		    type = t.cmd;
+		    if (indexing) {
+			error(err_nestedindex, &t.pos);
 			/* Error recovery: eat lbrace, push nop. */
-			t = get_token(in);
+			dtor(t), t = get_token(in);
 			sitem = smalloc(sizeof(*sitem));
 			sitem->type = stack_nop;
 			stk_push(parsestk, sitem);
 		    }
-		    t = get_token(in);
+		    sitem = smalloc(sizeof(*sitem));
+		    sitem->type = stack_idx;
+		    dtor(t), t = get_token(in);
+		    /*
+		     * Special cases: \i\c, \i\e, \i\cw
+		     */
+		    wd.fpos = t.pos;
+		    if (t.type == tok_cmd &&
+			(t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) {
+			if (style != word_Normal)
+			    error(err_nestedstyles, &t.pos);
+			else {
+			    style = (t.cmd == c_c ? word_Code :
+				     t.cmd == c_cw ? word_WeakCode :
+				     word_Emph);
+			    sitem->type |= stack_style;
+			}
+			dtor(t), t = get_token(in);
+		    }
 		    if (t.type != tok_lbrace) {
+			sfree(sitem);
 			error(err_explbr, &t.pos);
 		    } else {
+			/* Add an index-reference word with no text as yet */
+			wd.type = word_IndexRef;
+			wd.text = NULL;
+			wd.alt = NULL;
+			indexword = addword(wd, &whptr);
+			/* Set up a rdstring to read the index text */
+			indexstr = nullrs;
+			/* Flags so that we do the Right Things with text */
+			index_visible = (type != c_I);
+			index_downcase = (type == c_ii);
+			indexing = TRUE;
+			/* Stack item to close the indexing on exit */
+			stk_push(parsestk, sitem);
+		    }
+		    break;
+		  case c_u:
+		    uchr = t.aux;
+		    if (!indexing || index_visible) {
+			wchar_t text[2];
+			text[1] = 0;
+			text[0] = uchr;
+			wd.text = ustrdup(text);
+			wd.type = style;
+			wd.alt = NULL;
+			wd.fpos = t.pos;
+			uword = addword(wd, &whptr);
+		    }
+		    dtor(t), t = get_token(in);
+		    if (t.type == tok_lbrace) {
 			/*
-			 * FIXME: do something useful
-			 * Add an index-ref word and keep a pointer to it
-			 * Set a flag so that other addwords also update it
+			 * \u with a left brace. Until the brace
+			 * closes, all further words go on a
+			 * sidetrack from the main thread of the
+			 * paragraph.
 			 */
 			sitem = smalloc(sizeof(*sitem));
-			sitem->type = stack_idx;
+			sitem->type = stack_ualt;
+			sitem->whptr = whptr;
 			stk_push(parsestk, sitem);
+			whptr = &uword->alt;
+		    } else {
+			if (indexing)
+			    rdadd(&indexstr, uchr);
+			already = TRUE;
 		    }
 		    break;
-		  case c_u:
-		  case c_W:
 		  default:
 		    error(err_badmidcmd, t.text, &t.pos);
 		    break;
 		}
 	    }
-	    t = get_token(in);
+	    if (!already)
+		dtor(t), t = get_token(in);
 	}
 	/* Check the stack is empty */
 	if (NULL != (sitem = stk_pop(parsestk))) {
@@ -710,6 +909,7 @@ static void read_file(paragraph ***ret, input *in) {
 	stk_free(parsestk);
 	addpara(par, ret);
     }
+    dtor(t);
 }
 
 paragraph *read_input(input *in) {
@@ -719,9 +919,7 @@ paragraph *read_input(input *in) {
     while (in->currindex < in->nfiles) {
 	in->currfp = fopen(in->filenames[in->currindex], "r");
 	if (in->currfp) {
-	    in->pos.filename = in->filenames[in->currindex];
-	    in->pos.line = 1;
-	    in->pos.col = 1;
+	    setpos(in, in->filenames[in->currindex]);
 	    read_file(&hptr, in);
 	}
 	in->currindex++;