5 files changed, 158 insertions, 6 deletions
diff --git a/biblio.c b/biblio.c
new file mode 100644
index 0000000..70e9411
--- /dev/null
+++ b/biblio.c
@@ -0,0 +1,104 @@
+/*
+ * biblio.c: process the bibliography
+ */
+
+#include <assert.h>
+#include "buttress.h"
+
+static wchar_t *gentext(int num) {
+    wchar_t text[22];
+    wchar_t *p = text + sizeof(text);
+    *--p = L'\0';
+    *--p = L']';
+    while (num != 0) {
+	assert(p > text);
+	*--p = L"0123456789"[num % 10];
+	num /= 10;
+    }
+    assert(p > text);
+    *--p = L'[';
+    return ustrdup(p);
+}
+
+static void cite_biblio(keywordlist *kl, wchar_t *key, filepos fpos) {
+    keyword *kw = kw_lookup(kl, key);
+    if (!kw)
+	error(err_nosuchkw, &fpos, key);
+    else {
+	/*
+	 * We've found a \k reference. If it's a
+	 * bibliography entry ...
+	 */
+	if (kw->para->type == para_Biblio) {
+	    /*
+	     * ... then mark the paragraph as cited.
+	     */
+	    kw->para->type = para_BiblioCited;
+	}
+    }
+}
+
+/*
+ * Make a pass through the source form, generating citation formats
+ * for bibliography entries and also marking which bibliography
+ * entries are actually cited (or \nocite-ed).
+ */
+
+void gen_citations(paragraph *source, keywordlist *kl) {
+    paragraph *para;
+    int bibnum = 0;
+
+    for (para = source; para; para = para->next) {
+	word *ptr;
+
+	/*
+	 * \BR and \nocite paragraphs get special processing here.
+	 */
+	if (para->type == para_BR) {
+	    keyword *kw = kw_lookup(kl, para->keyword);
+	    if (!kw) {
+		error(err_nosuchkw, &para->fpos, para->keyword);
+	    } else if (kw->text) {
+		error(err_multiBR, &para->fpos, para->keyword);
+	    } else {
+		kw->text = dup_word_list(para->words);
+	    }
+	} else if (para->type == para_NoCite) {
+	    wchar_t *wp = para->keyword;
+	    while (*wp) {
+		cite_biblio(kl, wp, para->fpos);
+		wp += 1+ustrlen(wp);
+	    }
+	}
+
+	/*
+	 * Scan for keyword references.
+	 */
+	for (ptr = para->words; ptr; ptr = ptr->next) {
+	    if (ptr->type == word_UpperXref ||
+		ptr->type == word_LowerXref)
+		cite_biblio(kl, ptr->text, ptr->fpos);
+	}
+    }
+
+    /*
+     * We're now almost done; all that remains is to scan through
+     * the cited bibliography entries and invent default citation
+     * texts for the ones that don't already have explicitly
+     * provided \BR text.
+     */
+    for (para = source; para; para = para->next) {
+	if (para->type == para_BiblioCited) {
+	    keyword *kw = kw_lookup(kl, para->keyword);
+	    assert(kw != NULL);
+	    if (!kw->text) {
+		word *wd = smalloc(sizeof(word));
+		wd->text = gentext(++bibnum);
+		wd->type = word_Normal;
+		wd->alt = NULL;
+		wd->next = NULL;
+		kw->text = wd;
+	    }
+	}
+    }
+}
diff --git a/buttress.h b/buttress.h
index 673af56..ef19d4c 100644
--- a/buttress.h
+++ b/buttress.h
@@ -132,6 +132,7 @@ enum {
     err_badmidcmd,		       /* invalid command in mid-para */
     err_unexbrace,		       /* unexpected brace */
     err_explbr,			       /* expected `{' after command */
+    err_commenteof,		       /* EOF inside braced comment */
     err_kwexprbr,		       /* expected `}' after cross-ref */
     err_missingrbrace,		       /* unclosed braces at end of para */
     err_nestedstyles,		       /* unable to nest text styles */
diff --git a/error.c b/error.c
index 678ddaa..cc574cc 100644
--- a/error.c
+++ b/error.c
@@ -96,6 +96,11 @@ static void do_error(int code, va_list ap) {
 	sprintf(error, "expected `{' after command");
 	flags = FILEPOS;
 	break;
+      case err_commenteof:
+	fpos = *va_arg(ap, filepos *);
+	sprintf(error, "end of file unexpected inside `\\#{...}' comment");
+	flags = FILEPOS;
+	break;
       case err_kwexprbr:
 	fpos = *va_arg(ap, filepos *);
 	sprintf(error, "expected `}' after cross-reference");
diff --git a/input.c b/input.c
index c53e135..bb193a3 100644
--- a/input.c
+++ b/input.c
@@ -445,6 +445,7 @@ static void read_file(paragraph ***ret, input *in) {
     word wd, **whptr;
     int style;
     int already;
+    int iswhite, seenwhite;
     int type;
     struct stack_item {
 	enum {
@@ -531,6 +532,8 @@ static void read_file(paragraph ***ret, input *in) {
 		needkw = -1;
 		break;
 	      case c__comment:
+		if (isbrace(in))
+		    break;	       /* `\#{': isn't a comment para */
 		do {
 		    dtor(t), t = get_token(in);
 		} while (t.type != tok_eop && t.type != tok_eof);
@@ -637,7 +640,9 @@ static void read_file(paragraph ***ret, input *in) {
 	parsestk = stk_new();
 	style = word_Normal;
 	indexing = FALSE;
+	seenwhite = TRUE;
 	while (t.type != tok_eop && t.type != tok_eof) {
+	    iswhite = FALSE;
 	    already = FALSE;
 	    if (t.type == tok_cmd && t.cmd == c__escaped)
 		t.type = tok_word;     /* nice and simple */
@@ -653,6 +658,7 @@ static void read_file(paragraph ***ret, input *in) {
 		    rdadd(&indexstr, ' ');
 		if (!indexing || index_visible)
 		    addword(wd, &whptr);
+		iswhite = TRUE;
 		break;
 	      case tok_word:
 		if (indexing)
@@ -701,6 +707,40 @@ static void read_file(paragraph ***ret, input *in) {
 		break;
 	      case tok_cmd:
 		switch (t.cmd) {
+		  case c__comment:
+		    /*
+		     * In-paragraph comment: \#{ balanced braces }
+		     *
+		     * Anything goes here; even tok_eop. We should
+		     * eat whitespace after the close brace _if_
+		     * there was whitespace before the \#.
+		     */
+		    dtor(t), t = get_token(in);
+		    if (t.type != tok_lbrace) {
+			error(err_explbr, &t.pos);
+		    } else {
+			int braces = 1;
+			while (braces > 0) {
+			    dtor(t), t = get_token(in);
+			    if (t.type == tok_lbrace)
+				braces++;
+			    else if (t.type == tok_rbrace)
+				braces--;
+			    else if (t.type == tok_eof) {
+				error(err_commenteof, &t.pos);
+				break;
+			    }
+			}
+		    }
+		    if (seenwhite) {
+			already = TRUE;
+			dtor(t), t = get_token(in);
+			if (t.type == tok_white) {
+			    iswhite = TRUE;
+			    already = FALSE;
+			}
+		    }
+		    break;
 		  case c_K:
 		  case c_k:
 		  case c_W:
@@ -899,6 +939,7 @@ static void read_file(paragraph ***ret, input *in) {
 	    }
 	    if (!already)
 		dtor(t), t = get_token(in);
+	    seenwhite = iswhite;
 	}
 	/* Check the stack is empty */
 	if (NULL != (sitem = stk_pop(parsestk))) {
diff --git a/inputs/test.but b/inputs/test.but
index 5c15dd2..0be237d 100644
--- a/inputs/test.but
+++ b/inputs/test.but
@@ -1,17 +1,18 @@
 \title Buttress: A Test Document
 
 \preamble This manual is a small joke effort, designed to use every
-feature that Buttress's input format supports. Creation date
-\date{%Y.%m.%d} (default format is \date).
+feature \#{ comment } that Buttress's input format supports. Creation
+date \date{%Y.%m.%d} (default format is \date).
 
-\copyright Copyright 1999 Simon Tatham. All rights reserved.
+\copyright Copyright 1999 Simon \#{second comment}Tatham. All rights
+reserved.
 
-\versionid $Id: test.but,v 1.5 1999/08/15 18:35:20 simon Exp $
+\versionid $Id: test.but,v 1.6 1999/09/12 15:38:06 simon Exp $
 
 \C{chap} First chapter title
 
-This is a paragraph of text. It
-has line breaks in   between words, multiple   
+This is a para\#{another{} comment}graph of text. It
+has line\#{yet another one} breaks in   between words, multiple
   spaces (ignored), and \e{emphasised text} as well as \c{code
 fragments}.