summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Tatham <anakin@pobox.com>1999-07-31 18:44:53 +0000
committerSimon Tatham <anakin@pobox.com>1999-07-31 18:44:53 +0000
commit0d14833a9c76c51cc7417d8fd60bec9d92714b8e (patch)
treec0716d398e83bc746baad088d5dfc215d5fea483
parent4c8c2b256ed01563a98b4cd820dc8ffef30d7fc1 (diff)
downloadhalibut-0d14833a9c76c51cc7417d8fd60bec9d92714b8e.zip
halibut-0d14833a9c76c51cc7417d8fd60bec9d92714b8e.tar.gz
halibut-0d14833a9c76c51cc7417d8fd60bec9d92714b8e.tar.bz2
halibut-0d14833a9c76c51cc7417d8fd60bec9d92714b8e.tar.xz
Further development work. Parser nearly finished
[originally from svn r187]
-rw-r--r--Makefile11
-rw-r--r--buttress.h79
-rw-r--r--error.c77
-rw-r--r--input.c682
-rw-r--r--inputs/test.but8
-rw-r--r--main.c30
-rw-r--r--malloc.c26
-rw-r--r--misc.c42
-rw-r--r--misc/buttress.sl4
-rw-r--r--ustring.c47
10 files changed, 965 insertions, 41 deletions
diff --git a/Makefile b/Makefile
index 23eb91e..739c91e 100644
--- a/Makefile
+++ b/Makefile
@@ -11,10 +11,15 @@ endif
all:
@test -d $(BUILDDIR) || mkdir $(BUILDDIR)
@make -C $(BUILDDIR) -f ../Makefile REALBUILD=yes
+clean:
+ @test -d $(BUILDDIR) || mkdir $(BUILDDIR)
+ @make -C $(BUILDDIR) -f ../Makefile clean REALBUILD=yes
else
# The `real' makefile part.
+CFLAGS += -Wall -W
+
ifdef RELEASE
ifndef VERSION
VERSION := $(RELEASE)
@@ -34,7 +39,8 @@ endif
SRC := ../
-MODULES := main malloc error help licence version
+MODULES := main malloc ustring error help licence version misc
+MODULES += input
OBJECTS := $(addsuffix .o,$(MODULES))
DEPS := $(addsuffix .d,$(MODULES))
@@ -48,6 +54,9 @@ buttress: $(OBJECTS)
version.o: FORCE
$(CC) $(VDEF) -MD -c $(SRC)version.c
+clean::
+ rm -f *.o buttress core
+
FORCE: # phony target to force version.o to be rebuilt every time
-include $(DEPS)
diff --git a/buttress.h b/buttress.h
index afb38ae..9dfa492 100644
--- a/buttress.h
+++ b/buttress.h
@@ -1,6 +1,9 @@
#ifndef BUTTRESS_BUTTRESS_H
#define BUTTRESS_BUTTRESS_H
+#include <stdio.h>
+#include <wchar.h>
+
#ifdef __GNUC__
#define NORETURN __attribute__((__noreturn__))
#endif
@@ -16,10 +19,20 @@
* Structure tags
*/
typedef struct input_Tag input;
+typedef struct filepos_Tag filepos;
typedef struct paragraph_Tag paragraph;
typedef struct word_Tag word;
/*
+ * Data structure to hold a file name and index, a line and a
+ * column number, for reporting errors
+ */
+struct filepos_Tag {
+ char *filename;
+ int line, col;
+};
+
+/*
* Data structure to hold all the file names etc for input
*/
#define INPUT_PUSHBACK_MAX 16
@@ -28,8 +41,9 @@ struct input_Tag {
int nfiles; /* how many in the list */
FILE *currfp; /* the currently open one */
int currindex; /* which one is that in the list */
- char pushback[INPUT_PUSHBACK_MAX]; /* pushed-back input characters */
+ int pushback[INPUT_PUSHBACK_MAX]; /* pushed-back input characters */
int npushback;
+ filepos pos;
};
/*
@@ -39,19 +53,27 @@ struct input_Tag {
struct paragraph_Tag {
paragraph *next;
int type;
- char *keyword; /* for IR, IA, and heading paras */
+ wchar_t *keyword; /* for most special paragraphs */
word *words; /* list of words in paragraph */
-} paragraph;
+};
enum {
- para_IA, /* index alias */
- para_IR, /* index rewrite */
+ para_IM, /* index merge */
+ para_BR, /* bibliography rewrite */
para_Chapter,
para_Appendix,
+ para_UnnumberedChapter,
para_Heading,
para_Subsect,
para_Normal,
+ para_Biblio,
para_Bullet,
- para_Code
+ para_NumberedList,
+ para_Code,
+ para_Copyright,
+ para_Preamble,
+ para_NoCite,
+ para_Title,
+ para_VersionID
};
/*
@@ -60,14 +82,18 @@ enum {
struct word_Tag {
word *next;
int type;
- char *text;
-}
+ wchar_t *text;
+};
enum {
word_Normal,
word_Emph,
- word_Code,
- word_IndexRef /* always invisible */
-}
+ word_Code, /* monospaced; `quoted' in text */
+ word_WeakCode, /* monospaced, normal in text */
+ word_UpperXref, /* \K */
+ word_LowerXref, /* \k */
+ word_IndexRef, /* (always an invisible one) */
+ word_WhiteSpace /* text is NULL or ignorable */
+};
/*
* error.c
@@ -79,6 +105,20 @@ enum {
err_optnoarg, /* option `-%s' requires an argument */
err_nosuchopt, /* unrecognised option `-%s' */
err_noinput, /* no input files */
+ err_cantopen, /* unable to open input file `%s' */
+ err_nodata, /* no data in input files */
+ err_brokencodepara, /* line in codepara didn't begin `\c' */
+ err_kwunclosed, /* expected `}' after keyword */
+ err_kwillegal, /* paragraph type expects no keyword */
+ err_kwexpected, /* paragraph type expects a keyword */
+ err_kwtoomany, /* paragraph type expects only 1 */
+ err_bodyillegal, /* paragraph type expects only kws! */
+ err_badmidcmd, /* invalid command in mid-para */
+ err_unexbrace, /* unexpected brace */
+ err_explbr, /* expected `{' after command */
+ err_kwexprbr, /* expected `}' after cross-ref */
+ err_missingrbrace, /* unclosed braces at end of para */
+ err_nestedstyles /* unable to nest text styles */
};
/*
@@ -89,6 +129,14 @@ void *srealloc(void *p, int size);
void sfree(void *p);
/*
+ * ustring.c
+ */
+wchar_t *ustrdup(wchar_t *s);
+char *ustrtoa(wchar_t *s, char *outbuf, int size);
+int ustrlen(wchar_t *s);
+wchar_t *ustrcpy(wchar_t *dest, wchar_t *source);
+
+/*
* help.c
*/
void help(void);
@@ -106,6 +154,15 @@ void licence(void);
const char *const version;
/*
+ * misc.c
+ */
+typedef struct stackTag *stack;
+stack stk_new(void);
+void stk_free(stack);
+void stk_push(stack, void *);
+void *stk_pop(stack);
+
+/*
* input.c
*/
paragraph *read_input(input *in);
diff --git a/error.c b/error.c
index 2c6e6fb..218a109 100644
--- a/error.c
+++ b/error.c
@@ -11,10 +11,14 @@
* Error flags
*/
#define PREFIX 0x0001 /* give `buttress:' prefix */
+#define FILEPOS 0x0002 /* give file position prefix */
static void do_error(int code, va_list ap) {
char error[1024];
+ char auxbuf[256];
char *sp;
+ wchar_t *wsp;
+ filepos fpos;
int flags;
switch(code) {
@@ -36,10 +40,83 @@ static void do_error(int code, va_list ap) {
sprintf(error, "no input files");
flags = PREFIX;
break;
+ case err_cantopen:
+ sp = va_arg(ap, char *);
+ sprintf(error, "unable to open input file `%.200s'", sp);
+ flags = PREFIX;
+ break;
+ case err_nodata: /* no arguments */
+ sprintf(error, "no data in input files");
+ flags = PREFIX;
+ break;
+ case err_brokencodepara:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "every line of a code paragraph should begin `\\c'");
+ flags = FILEPOS;
+ break;
+ case err_kwunclosed:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected `}' after paragraph keyword");
+ flags = FILEPOS;
+ break;
+ case err_kwexpected:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected a paragraph keyword");
+ flags = FILEPOS;
+ break;
+ case err_kwillegal:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected no paragraph keyword");
+ flags = FILEPOS;
+ break;
+ case err_kwtoomany:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected only one paragraph keyword");
+ flags = FILEPOS;
+ break;
+ case err_bodyillegal:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected no text after paragraph keyword");
+ flags = FILEPOS;
+ break;
+ case err_badmidcmd:
+ wsp = va_arg(ap, wchar_t *);
+ sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp);
+ flags = FILEPOS;
+ break;
+ case err_unexbrace:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "brace character unexpected in mid-paragraph");
+ flags = FILEPOS;
+ break;
+ case err_explbr:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected `{' after command");
+ flags = FILEPOS;
+ break;
+ case err_kwexprbr:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "expected `}' after cross-reference");
+ flags = FILEPOS;
+ break;
+ case err_missingrbrace:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "unclosed braces at end of paragraph");
+ flags = FILEPOS;
+ break;
+ case err_nestedstyles:
+ fpos = *va_arg(ap, filepos *);
+ sprintf(error, "unable to nest text styles");
+ flags = FILEPOS;
+ break;
}
if (flags & PREFIX)
fputs("buttress: ", stderr);
+ if (flags & FILEPOS)
+ fprintf(stderr, "%s:%d: ", fpos.filename, fpos.line);
fputs(error, stderr);
fputc('\n', stderr);
}
diff --git a/input.c b/input.c
index e70c6a7..e10884f 100644
--- a/input.c
+++ b/input.c
@@ -6,7 +6,9 @@
#include <assert.h>
#include "buttress.h"
-static void unget(input *in, char c) {
+#define TAB_STOP 8 /* for column number tracking */
+
+static void unget(input *in, int c) {
assert(in->npushback < INPUT_PUSHBACK_MAX);
in->pushback[in->npushback++] = c;
}
@@ -16,59 +18,697 @@ static void unget(input *in, char c) {
*/
static int get(input *in) {
if (in->npushback)
- return (unsigned char)in->pushback[--in->npushback];
+ return in->pushback[--in->npushback];
else if (in->currfp) {
int c = getc(in->currfp);
if (c == EOF) {
fclose(in->currfp);
in->currfp = NULL;
}
+ /* Track line numbers, for error reporting */
+ switch (c) {
+ case '\t':
+ in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP;
+ break;
+ case '\n':
+ in->pos.col = 1;
+ in->pos.line++;
+ break;
+ default:
+ in->pos.col++;
+ break;
+ }
+ /* FIXME: do input charmap translation. We should be returning
+ * Unicode here. */
return c;
} else
return EOF;
}
/*
+ * Small routines to amalgamate a string from an input source.
+ */
+typedef struct tagRdstring rdstring;
+struct tagRdstring {
+ int pos, size;
+ wchar_t *text;
+};
+static void rdadd(rdstring *rs, wchar_t c) {
+ if (rs->pos >= rs->size-1) {
+ rs->size = rs->pos + 128;
+ rs->text = srealloc(rs->text, rs->size * sizeof(wchar_t));
+ }
+ rs->text[rs->pos++] = c;
+ rs->text[rs->pos] = 0;
+}
+static void rdadds(rdstring *rs, wchar_t *p) {
+ int len = ustrlen(p);
+ if (rs->pos >= rs->size - len) {
+ rs->size = rs->pos + len + 128;
+ rs->text = srealloc(rs->text, rs->size * sizeof(wchar_t));
+ }
+ ustrcpy(rs->text + rs->pos, p);
+ rs->pos += len;
+}
+static wchar_t *rdtrim(rdstring *rs) {
+ rs->text = srealloc(rs->text, (rs->pos + 1) * sizeof(wchar_t));
+ return rs->text;
+}
+
+/*
* Lexical analysis of source files.
*/
-typedef struct tagToken token;
-struct tagToken {
+typedef struct token_Tag token;
+struct token_Tag {
int type;
- char *text;
+ int cmd, aux;
+ wchar_t *text;
+ filepos pos;
};
enum {
- tok_eof,
+ tok_eof, /* end of file */
tok_eop, /* end of paragraph */
- tok_word, /* an ordinary word */
+ tok_white, /* whitespace */
+ tok_word, /* a word or word fragment */
tok_cmd, /* \command */
- tok_bracetext /* {text} */
+ tok_lbrace, /* { */
+ tok_rbrace /* } */
};
+/* Buttress command keywords. */
+enum {
+ c__invalid, /* invalid command */
+ c__comment, /* comment command (\#) */
+ c__escaped, /* escaped character */
+ c_A, /* appendix heading */
+ c_B, /* bibliography entry */
+ c_BR, /* bibliography rewrite */
+ c_C, /* chapter heading */
+ c_H, /* heading */
+ c_I, /* invisible index mark */
+ c_IM, /* index merge/rewrite */
+ c_K, /* capitalised cross-reference */
+ c_S, /* aux field is 0, 1, 2, ... */
+ c_U, /* unnumbered-chapter heading */
+ c_W, /* Web hyperlink */
+ c_b, /* bulletted list */
+ c_c, /* code */
+ c_copyright, /* copyright statement */
+ c_cw, /* weak code */
+ c_date, /* document processing date */
+ c_e, /* emphasis */
+ c_i, /* visible index mark */
+ c_ii, /* uncapitalised visible index mark */
+ c_k, /* uncapitalised cross-reference */
+ c_n, /* numbered list */
+ c_nocite, /* bibliography trickery */
+ c_preamble, /* document preamble text */
+ c_title, /* document title */
+ c_u, /* aux field is char code */
+ c_versionid /* document RCS id */
+};
+
+/* Perhaps whitespace should be defined in a more Unicode-friendly way? */
+#define iswhite(c) ( (c)==32 || (c)==9 || (c)==13 || (c)==10 )
+#define isnl(c) ( (c)==10 )
+#define isdec(c) ( ((c)>='0'&&(c)<='9') )
+#define fromdec(c) ( (c)-'0' )
+#define ishex(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='F') || ((c)>='a'&&(c)<='f'))
+#define fromhex(c) ( (c)<='9' ? (c)-'0' : ((c)&0xDF) - ('A'-10) )
+#define iscmd(c) ( ((c)>='0'&&(c)<='9') || ((c)>='A'&&(c)<='Z') || ((c)>='a'&&(c)<='z'))
+
+/*
+ * Keyword comparison function. Like strcmp, but between a wchar_t *
+ * and a char *.
+ */
+static int kwcmp(wchar_t const *p, char const *q) {
+ int i;
+ do {
+ i = *p - *q;
+ } while (*p++ && *q++ && !i);
+ return i;
+}
+
+/*
+ * Match a keyword.
+ */
+static void match_kw(token *tok) {
+ /*
+ * FIXME. The ids are explicit in here so as to allow long-name
+ * equivalents to the various very short keywords.
+ */
+ static const struct { char const *name; int id; } keywords[] = {
+ {"#", c__comment}, /* comment command (\#) */
+ {"A", c_A}, /* appendix heading */
+ {"B", c_B}, /* bibliography entry */
+ {"BR", c_BR}, /* bibliography rewrite */
+ {"C", c_C}, /* chapter heading */
+ {"H", c_H}, /* heading */
+ {"I", c_I}, /* invisible index mark */
+ {"IM", c_IM}, /* index merge/rewrite */
+ {"K", c_K}, /* capitalised cross-reference */
+ {"U", c_U}, /* unnumbered-chapter heading */
+ {"W", c_W}, /* Web hyperlink */
+ {"\\", c__escaped}, /* escaped backslash (\\) */
+ {"b", c_b}, /* bulletted list */
+ {"c", c_c}, /* code */
+ {"copyright", c_copyright}, /* copyright statement */
+ {"cw", c_cw}, /* weak code */
+ {"date", c_date}, /* document processing date */
+ {"e", c_e}, /* emphasis */
+ {"i", c_i}, /* visible index mark */
+ {"ii", c_ii}, /* uncapitalised visible index mark */
+ {"k", c_k}, /* uncapitalised cross-reference */
+ {"n", c_n}, /* numbered list */
+ {"nocite", c_nocite}, /* bibliography trickery */
+ {"preamble", c_preamble}, /* document preamble text */
+ {"title", c_title}, /* document title */
+ {"versionid", c_versionid}, /* document RCS id */
+ {"{", c__escaped}, /* escaped lbrace (\{) */
+ {"}", c__escaped}, /* escaped rbrace (\}) */
+ };
+ int i, j, k, c;
+
+ /*
+ * Special cases: \S{0,1,2,...} and \uABCD. If the syntax
+ * doesn't match correctly, we just fall through to the
+ * binary-search phase.
+ */
+ if (tok->text[0] == 'S') {
+ /* We expect numeric characters thereafter. */
+ wchar_t *p = tok->text+1;
+ int n = 0;
+ while (*p && isdec(*p)) {
+ n = 10 * n + fromdec(*p);
+ p++;
+ }
+ if (!*p) {
+ tok->cmd = c_S;
+ tok->aux = n;
+ return;
+ }
+ } else if (tok->text[0] == 'u') {
+ /* We expect hex characters thereafter. */
+ wchar_t *p = tok->text+1;
+ int n = 0;
+ while (*p && ishex(*p)) {
+ n = 16 * n + fromhex(*p);
+ p++;
+ }
+ if (!*p) {
+ tok->cmd = c_u;
+ tok->aux = n;
+ return;
+ }
+ }
+
+ i = -1;
+ j = sizeof(keywords)/sizeof(*keywords);
+ while (j-i > 1) {
+ k = (i+j)/2;
+ c = kwcmp(tok->text, keywords[k].name);
+ if (c < 0)
+ j = k;
+ else if (c > 0)
+ i = k;
+ else /* c == 0 */ {
+ tok->cmd = keywords[k].id;
+ return;
+ }
+ }
+
+ tok->cmd = c__invalid;
+}
+
+
+/*
+ * Read a token from the input file, in the normal way (`normal' in
+ * the sense that code paragraphs work a different way).
+ */
+token get_token(input *in) {
+ int c;
+ int nls;
+ token ret;
+ rdstring rs = { 0, 0, NULL };
+
+ ret.text = NULL; /* default */
+ ret.pos = in->pos;
+ c = get(in);
+ if (iswhite(c)) { /* tok_white or tok_eop */
+ nls = 0;
+ do {
+ if (isnl(c))
+ nls++;
+ } while ((c = get(in)) != EOF && iswhite(c));
+ unget(in, c);
+ ret.type = (nls > 1 ? tok_eop : tok_white);
+ return ret;
+ } else if (c == EOF) { /* tok_eof */
+ ret.type = tok_eof;
+ return ret;
+ } else if (c == '\\') { /* tok_cmd */
+ c = get(in);
+ if (c == '\\' || c == '#' || c == '{' || c == '}') {
+ /* single-char command */
+ rdadd(&rs, c);
+ } else if (c == 'u') {
+ int len = 0;
+ do {
+ rdadd(&rs, c);
+ len++;
+ c = get(in);
+ } while (ishex(c) && len < 5);
+ unget(in, c);
+ } else if (iscmd(c)) {
+ do {
+ rdadd(&rs, c);
+ c = get(in);
+ } while (iscmd(c));
+ unget(in, c);
+ }
+ /*
+ * Now match the command against the list of available
+ * ones.
+ */
+ ret.type = tok_cmd;
+ ret.text = ustrdup(rs.text);
+ match_kw(&ret);
+ sfree(rs.text);
+ return ret;
+ } else if (c == '{') { /* tok_lbrace */
+ ret.type = tok_lbrace;
+ return ret;
+ } else if (c == '}') { /* tok_rbrace */
+ ret.type = tok_rbrace;
+ return ret;
+ } else { /* tok_word */
+ /*
+ * Read a word: the longest possible contiguous sequence of
+ * things other than whitespace, backslash, braces and
+ * hyphen. A hyphen terminates the word but is returned as
+ * part of it; everything else is pushed back for the next
+ * token.
+ */
+ while (1) {
+ if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
+ /* Put back the character that caused termination */
+ unget(in, c);
+ break;
+ } else {
+ rdadd(&rs, c);
+ if (c == '-')
+ break; /* hyphen terminates word */
+ }
+ c = get(in);
+ }
+ ret.type = tok_word;
+ ret.text = ustrdup(rs.text);
+ sfree(rs.text);
+ return ret;
+ }
+}
+
+/*
+ * Determine whether the next input character is an open brace (for
+ * telling code paragraphs from paragraphs which merely start with
+ * code).
+ */
+int isbrace(input *in) {
+ int c;
+
+ c = get(in);
+ unget(in, c);
+ return (c == '{');
+}
+
+/*
+ * Read the rest of a line that starts `\c'. Including nothing at
+ * all (tok_word with empty text).
+ */
+token get_codepar_token(input *in) {
+ int c;
+ token ret;
+ rdstring rs = { 0, 0, NULL };
+
+ ret.pos = in->pos;
+ ret.type = tok_word;
+ c = get(in); /* expect (and discard) one space */
+ if (c == ' ') {
+ c = get(in);
+ ret.pos = in->pos;
+ }
+ while (!isnl(c) && c != EOF) {
+ rdadd(&rs, c);
+ c = get(in);
+ }
+ unget(in, c);
+ ret.text = ustrdup(rs.text);
+ sfree(rs.text);
+ return ret;
+}
+
+/*
+ * Adds a new word to a linked list
+ */
+static void addword(word newword, word ***hptrptr) {
+ word *mnewword = smalloc(sizeof(word));
+ *mnewword = newword; /* structure copy */
+ mnewword->next = NULL;
+ **hptrptr = mnewword;
+ *hptrptr = &mnewword->next;
+}
+
/*
* Adds a new paragraph to a linked list
*/
-static paragraph addpara(paragraph ***hptrptr) {
- paragraph *newpara = smalloc(sizeof(paragraph));
- newpara->next = NULL;
- **hptrptr = newpara;
- *hptrptr = &newpara->next;
- return newpara;
+static void addpara(paragraph newpara, paragraph ***hptrptr) {
+ paragraph *mnewpara = smalloc(sizeof(paragraph));
+ *mnewpara = newpara; /* structure copy */
+ mnewpara->next = NULL;
+ **hptrptr = mnewpara;
+ *hptrptr = &mnewpara->next;
}
/*
* Reads a single file (ie until get() returns EOF)
*/
static void read_file(paragraph ***ret, input *in) {
- int c;
+ token t;
+ paragraph par;
+ word wd, **whptr;
+ int style;
+ struct stack_item {
+ enum {
+ stack_ualt, /* \u alternative */
+ stack_style, /* \e, \c, \cw */
+ stack_idx, /* \I, \i, \ii */
+ stack_nop /* do nothing (for error recovery) */
+ } type;
+ word **whptr; /* to restore from \u alternatives */
+ } *sitem;
+ stack parsestk;
+ /*
+ * Loop on each paragraph.
+ */
while (1) {
+ par.words = NULL;
+ par.keyword = NULL;
+ whptr = &par.words;
+
/*
* Get a token.
*/
- token t = get_token(in);
+ t = get_token(in);
if (t.type == tok_eof)
return;
- printf("token: %d\n", t.type);
+
+ /*
+ * Parse code paragraphs separately.
+ */
+ if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) {
+ par.type = para_Code;
+ while (1) {
+ t = get_codepar_token(in);
+ wd.type = word_WeakCode;
+ wd.text = ustrdup(t.text);
+ addword(wd, &whptr);
+ t = get_token(in);
+ if (t.type == tok_white) {
+ /*
+ * The newline after a code-paragraph line
+ */
+ t = get_token(in);
+ }
+ if (t.type == tok_eop || t.type == tok_eof)
+ break;
+ else if (t.type != tok_cmd || t.cmd != c_c) {
+ error(err_brokencodepara, &t.pos);
+ addpara(par, ret);
+ while (t.type != tok_eop) /* error recovery: */
+ t = get_token(in); /* eat rest of paragraph */
+ continue;
+ }
+ }
+ addpara(par, ret);
+ }
+
+ /*
+ * This token begins a paragraph. See if it's one of the
+ * special commands that define a paragraph type.
+ *
+ * (note that \# is special in a way, and \nocite takes no
+ * text)
+ */
+ par.type = para_Normal;
+ if (t.type == tok_cmd) {
+ int needkw;
+
+ switch (t.cmd) {
+ default:
+ needkw = -1;
+ break;
+ case c__comment:
+ do {
+ t = get_token(in);
+ } while (t.type != tok_eop && t.type != tok_eof);
+ continue; /* next paragraph */
+ /*
+ * `needkw' values:
+ *
+ * 0 -- no keywords at all
+ * 1 -- exactly one keyword
+ * 2 -- at least one keyword
+ * 4 -- any number of keywords including zero
+ * 8 -- at least one keyword and then nothing else
+ */
+ case c_A: needkw = 2; par.type = para_Appendix; break;
+ case c_B: needkw = 2; par.type = para_Biblio; break;
+ case c_BR: needkw = 1; par.type = para_BR; break;
+ case c_C: needkw = 2; par.type = para_Chapter; break;
+ case c_H: needkw = 2; par.type = para_Heading; break;
+ case c_IM: needkw = 2; par.type = para_IM; break;
+ /* FIXME: multiple levels of Subsect */
+ case c_S: needkw = 2; par.type = para_Subsect; break;
+ case c_U: needkw = 0; par.type = para_UnnumberedChapter; break;
+ /* For \b and \n the keyword is optional */
+ case c_b: needkw = 4; par.type = para_Bullet; break;
+ case c_n: needkw = 4; par.type = para_NumberedList; break;
+ case c_copyright: needkw = 0; par.type = para_Copyright; break;
+ /* For \nocite the keyword is _everything_ */
+ case c_nocite: needkw = 8; par.type = para_NoCite; break;
+ case c_preamble: needkw = 0; par.type = para_Preamble; break;
+ case c_title: needkw = 0; par.type = para_Title; break;
+ case c_versionid: needkw = 0; par.type = para_VersionID; break;
+ }
+
+ if (needkw >= 0) {
+ rdstring rs = { 0, 0, NULL };
+ int nkeys = 0;
+ filepos fp;
+
+ /* Get keywords. */
+ t = get_token(in);
+ fp = t.pos;
+ while (t.type == tok_lbrace) {
+ /* This is a keyword. */
+ nkeys++;
+ /* FIXME: there will be bugs if anyone specifies an
+ * empty keyword (\foo{}), so trap this case. */
+ while (t = get_token(in),
+ t.type == tok_word || t.type == tok_white) {
+ if (t.type == tok_white)
+ rdadd(&rs, ' ');
+ else
+ rdadds(&rs, t.text);
+ }
+ if (t.type != tok_rbrace) {
+ error(err_kwunclosed, &t.pos);
+ /* FIXME: memory leak */
+ continue;
+ }
+ rdadd(&rs, 0); /* add string terminator */
+ t = get_token(in); /* eat right brace */
+ }
+
+ rdadd(&rs, 0); /* add string terminator */
+
+ /* See whether we have the right number of keywords. */
+ if (needkw == 0 && nkeys > 0)
+ error(err_kwillegal, &fp);
+ if ((needkw & 11) && nkeys == 0)
+ error(err_kwexpected, &fp);
+ if ((needkw & 5) && nkeys > 1)
+ error(err_kwtoomany, &fp);
+
+ par.keyword = rdtrim(&rs);
+
+ /* Move to EOP in case of needkw==8 (no body) */
+ if (needkw == 8) {
+ if (t.type != tok_eop) {
+ error(err_bodyillegal, &t.pos);
+ while (t.type != tok_eop) /* error recovery: */
+ t = get_token(in); /* eat rest of paragraph */
+ }
+ addpara(par, ret);
+ continue; /* next paragraph */
+ }
+ }
+ }
+
+ /*
+ * Now read the actual paragraph, word by word, adding to
+ * the paragraph list.
+ *
+ * Mid-paragraph commands:
+ *
+ * \K \k
+ * \c \cw
+ * \e
+ * \i \ii
+ * \I
+ * \u
+ * \W
+ * \\ \{ \}
+ */
+ parsestk = stk_new();
+ style = word_Normal;
+ while (t.type != tok_eop && t.type != tok_eof) {
+ if (t.type == tok_cmd && t.cmd == c__escaped)
+ t.type = tok_word; /* nice and simple */
+ switch (t.type) {
+ case tok_white:
+ wd.text = NULL;
+ wd.type = word_WhiteSpace;
+ addword(wd, &whptr);
+ break;
+ case tok_word:
+ wd.text = ustrdup(t.text);
+ wd.type = style;
+ addword(wd, &whptr);
+ break;
+ case tok_lbrace:
+ error(err_unexbrace, &t.pos);
+ /* FIXME: errorrec. Push nop. */
+ break;
+ case tok_rbrace:
+ sitem = stk_pop(parsestk);
+ if (!sitem)
+ error(err_unexbrace, &t.pos);
+ else switch (sitem->type) {
+ case stack_ualt:
+ whptr = sitem->whptr;
+ break;
+ case stack_style:
+ style = word_Normal;
+ break;
+ case stack_idx:
+ /* FIXME: do this bit! */
+ case stack_nop:
+ break;
+ }
+ sfree(sitem);
+ break;
+ case tok_cmd:
+ switch (t.cmd) {
+ case c_K:
+ case c_k:
+ /*
+ * Keyword. We expect a left brace, some text,
+ * and then a right brace. No nesting; no
+ * arguments.
+ */
+ if (t.cmd == c_K)
+ wd.type = word_UpperXref;
+ else
+ wd.type = word_LowerXref;
+ t = get_token(in);
+ if (t.type != tok_lbrace) {
+ error(err_explbr, &t.pos);
+ }
+ {
+ rdstring rs = { 0, 0, NULL };
+ while (t = get_token(in),
+ t.type == tok_word || t.type == tok_white) {
+ if (t.type == tok_white)
+ rdadd(&rs, ' ');
+ else
+ rdadds(&rs, t.text);
+ }
+ wd.text = ustrdup(rs.text);
+ }
+ if (t.type != tok_rbrace) {
+ error(err_kwexprbr, &t.pos);
+ }
+ addword(wd, &whptr);
+ break;
+ case c_c:
+ case c_cw:
+ case c_e:
+ if (style != word_Normal) {
+ error(err_nestedstyles, &t.pos);
+ /* Error recovery: eat lbrace, push nop. */
+ t = get_token(in);
+ sitem = smalloc(sizeof(*sitem));
+ sitem->type = stack_nop;
+ stk_push(parsestk, sitem);
+ }
+ t = get_token(in);
+ if (t.type != tok_lbrace) {
+ error(err_explbr, &t.pos);
+ } else {
+ style = (t.cmd == c_c ? word_Code :
+ t.cmd == c_cw ? word_WeakCode :
+ word_Emph);
+ sitem = smalloc(sizeof(*sitem));
+ sitem->type = stack_style;
+ stk_push(parsestk, sitem);
+ }
+ break;
+ case c_i:
+ case c_ii:
+ case c_I:
+ if (style != word_Normal) {
+ error(err_nestedstyles, &t.pos);
+ /* Error recovery: eat lbrace, push nop. */
+ t = get_token(in);
+ sitem = smalloc(sizeof(*sitem));
+ sitem->type = stack_nop;
+ stk_push(parsestk, sitem);
+ }
+ t = get_token(in);
+ if (t.type != tok_lbrace) {
+ error(err_explbr, &t.pos);
+ } else {
+ /*
+ * FIXME: do something useful
+ * Add an index-ref word and keep a pointer to it
+ * Set a flag so that other addwords also update it
+ */
+ sitem = smalloc(sizeof(*sitem));
+ sitem->type = stack_idx;
+ stk_push(parsestk, sitem);
+ }
+ break;
+ case c_u:
+ case c_W:
+ default:
+ error(err_badmidcmd, t.text, &t.pos);
+ break;
+ }
+ }
+ t = get_token(in);
+ }
+ /* Check the stack is empty */
+ if (NULL != (sitem = stk_pop(parsestk))) {
+ do {
+ sfree(sitem);
+ sitem = stk_pop(parsestk);
+ } while (sitem);
+ error(err_missingrbrace, &t.pos);
+ }
+ stk_free(parsestk);
+ addpara(par, ret);
}
}
@@ -78,8 +718,14 @@ paragraph *read_input(input *in) {
while (in->currindex < in->nfiles) {
in->currfp = fopen(in->filenames[in->currindex], "r");
- if (in->currfp)
+ if (in->currfp) {
+ in->pos.filename = in->filenames[in->currindex];
+ in->pos.line = 1;
+ in->pos.col = 1;
read_file(&hptr, in);
+ }
in->currindex++;
}
+
+ return head;
}
diff --git a/inputs/test.but b/inputs/test.but
index ca159e9..fbc1441 100644
--- a/inputs/test.but
+++ b/inputs/test.but
@@ -6,7 +6,7 @@ feature that Buttress's input format supports. Creation date
\copyright Copyright 1999 Simon Tatham. All rights reserved.
-\versionid $Id: test.but,v 1.2 1999/02/07 13:17:47 simon Exp $
+\versionid $Id: test.but,v 1.3 1999/07/31 18:44:53 simon Exp $
\C{chap} First chapter title
@@ -63,7 +63,7 @@ An index tag containing non-alternatived Unicode: \i{\u00BFChe?}
An invisible index tag: \I{she seems to have an invisible tag}yeah.
-\S0{subsub} Smaller heading still
+\S1{subsub} Smaller heading still
A tiny section. Awww. How cute.
@@ -74,7 +74,7 @@ Here's an \i{appendix}, for no terribly good reason at all. See
It also contains a \W{http://www.gallery.uk.eu.org/}{hyperlink}.
-\U{bib} Bibliography
+\U Bibliography
\B{book} Some text describing a book.
@@ -83,7 +83,7 @@ the document even though there is no \cw{\\k} citing it.
\BR{book} [SillyCitation]
-\nocite{nocite}
+\nocite{bookwithoutcitation}{foo}
\B{uncited} If this text appears, there's an actual error.
diff --git a/main.c b/main.c
index ad77b46..a0d284a 100644
--- a/main.c
+++ b/main.c
@@ -112,7 +112,7 @@ int main(int argc, char **argv) {
*/
switch (c) {
case 'o':
- ofile = p;
+ outfile = p;
break;
}
p = NULL; /* prevent continued processing */
@@ -153,7 +153,7 @@ int main(int argc, char **argv) {
{
input in;
- paragraph sourceform;
+ paragraph *sourceform;
in.filenames = infiles;
in.nfiles = nfiles;
@@ -174,9 +174,31 @@ int main(int argc, char **argv) {
paragraph *p;
word *w;
for (p = sourceform; p; p = p->next) {
- printf("para %d \"%s\" {\n", p->type, p->keyword);
+ wchar_t *wp;
+ printf("para %d ", p->type);
+ if (p->keyword) {
+ wp = p->keyword;
+ while (*wp) {
+ putchar('\"');
+ for (; *wp; wp++)
+ putchar(*wp);
+ putchar('\"');
+ if (*++wp)
+ printf(", ");
+ }
+ } else
+ printf("(no keyword)");
+ printf(" {\n");
for (w = p->words; w; w = w->next) {
- printf(" word %d \"%s\"\n", w->type, w->text);
+ printf(" word %d ", w->type);
+ if (w->text) {
+ printf("\"");
+ for (wp = w->text; *wp; wp++)
+ putchar(*wp);
+ printf("\"");
+ } else
+ printf("(no text)");
+ printf("\n");
}
printf("}\n");
}
diff --git a/malloc.c b/malloc.c
index cf0f2e0..7d33085 100644
--- a/malloc.c
+++ b/malloc.c
@@ -35,5 +35,29 @@ void *srealloc(void *p, int size) {
q = malloc(size);
if (!q)
fatal(err_nomemory);
- return p;
+ return q;
+}
+
+/*
+ * Free a linked list of words
+ */
+void free_word_list(word *w) {
+ word *t;
+ while (w) {
+ t = w;
+ w = w->next;
+ sfree(t);
+ }
+}
+
+/*
+ * Free a linked list of paragraphs
+ */
+void free_para_list(paragraph *p) {
+ paragraph *t;
+ while (p) {
+ t = p;
+ p = p->next;
+ sfree(t);
+ }
}
diff --git a/misc.c b/misc.c
new file mode 100644
index 0000000..ec38016
--- /dev/null
+++ b/misc.c
@@ -0,0 +1,42 @@
+/*
+ * misc.c: miscellaneous useful items
+ */
+
+#include "buttress.h"
+
+struct stackTag {
+ void **data;
+ int sp;
+ int size;
+};
+
+stack stk_new(void) {
+ stack s;
+
+ s = smalloc(sizeof(*s));
+ s->sp = 0;
+ s->size = 0;
+ s->data = NULL;
+
+ return s;
+}
+
+void stk_free(stack s) {
+ sfree(s->data);
+ sfree(s);
+}
+
+void stk_push(stack s, void *item) {
+ if (s->size <= s->sp) {
+ s->size = s->sp + 32;
+ s->data = srealloc(s->data, s->size * sizeof(*s->data));
+ }
+ s->data[s->sp++] = item;
+}
+
+void *stk_pop(stack s) {
+ if (s->sp > 0)
+ return s->data[--s->sp];
+ else
+ return NULL;
+}
diff --git a/misc/buttress.sl b/misc/buttress.sl
index 13efc03..0d7e207 100644
--- a/misc/buttress.sl
+++ b/misc/buttress.sl
@@ -1,4 +1,4 @@
-% The functions here are common to both TeX and LaTeX modes.
+% Buttress mode for Jed.
$1 = "Buttress";
create_syntax_table ($1);
@@ -24,7 +24,7 @@ define_highlight_rule (".", "normal", $1);
build_highlight_table ($1);
#endif
-% This hook identifies lines containing TeX comments as paragraph separator
+% This hook identifies lines containing comments as paragraph separator
define buttress_is_comment() {
bol ();
while (ffind ("\\\\#")) go_right (3);
diff --git a/ustring.c b/ustring.c
new file mode 100644
index 0000000..00e26b6
--- /dev/null
+++ b/ustring.c
@@ -0,0 +1,47 @@
+/*
+ * ustring.c: Unicode string routines
+ */
+
+#include <wchar.h>
+#include "buttress.h"
+
+wchar_t *ustrdup(wchar_t *s) {
+ wchar_t *r;
+ if (s) {
+ r = smalloc((1+ustrlen(s)) * sizeof(wchar_t));
+ ustrcpy(r, s);
+ } else {
+ r = smalloc(1);
+ *r = 0;
+ }
+ return r;
+}
+
+char *ustrtoa(wchar_t *s, char *outbuf, int size) {
+ char *p;
+ if (!s) {
+ *outbuf = '\0';
+ return outbuf;
+ }
+ for (p = outbuf; *s && p < outbuf+size; p++,s++)
+ *p = *s;
+ if (p < outbuf+size)
+ *p = '\0';
+ else
+ outbuf[size-1] = '\0';
+ return outbuf;
+}
+
+int ustrlen(wchar_t *s) {
+ int len = 0;
+ while (*s++) len++;
+ return len;
+}
+
+wchar_t *ustrcpy(wchar_t *dest, wchar_t *source) {
+ wchar_t *ret = dest;
+ do {
+ *dest++ = *source;
+ } while (*source++);
+ return ret;
+}