diff options
| -rw-r--r-- | Makefile | 6 | ||||
| -rw-r--r-- | buttress.h | 79 | ||||
| -rw-r--r-- | contents.c | 150 | ||||
| -rw-r--r-- | error.c | 20 | ||||
| -rw-r--r-- | index.c | 8 | ||||
| -rw-r--r-- | input.c | 370 | ||||
| -rw-r--r-- | inputs/test.but | 14 | ||||
| -rw-r--r-- | keywords.c | 168 | ||||
| -rw-r--r-- | main.c | 127 | ||||
| -rw-r--r-- | malloc.c | 76 | ||||
| -rw-r--r-- | style.c | 8 | ||||
| -rw-r--r-- | ustring.c | 71 |
12 files changed, 954 insertions, 143 deletions
@@ -20,6 +20,10 @@ else CFLAGS += -Wall -W +ifdef LOGALLOC +CFLAGS += -DLOGALLOC +endif + ifdef RELEASE ifndef VERSION VERSION := $(RELEASE) @@ -40,7 +44,7 @@ endif SRC := ../ MODULES := main malloc ustring error help licence version misc -MODULES += input +MODULES += input keywords contents index style OBJECTS := $(addsuffix .o,$(MODULES)) DEPS := $(addsuffix .d,$(MODULES)) @@ -3,6 +3,7 @@ #include <stdio.h> #include <wchar.h> +#include <time.h> #ifdef __GNUC__ #define NORETURN __attribute__((__noreturn__)) @@ -15,6 +16,8 @@ #define FALSE 0 #endif +#define lenof(x) ( sizeof((x)) / sizeof(*(x)) ) + /* * Structure tags */ @@ -22,6 +25,10 @@ typedef struct input_Tag input; typedef struct filepos_Tag filepos; typedef struct paragraph_Tag paragraph; typedef struct word_Tag word; +typedef struct keywordlist_Tag keywordlist; +typedef struct keyword_Tag keyword; +typedef struct userstyle_Tag userstyle; +typedef struct numberstate_Tag numberstate; /* * Data structure to hold a file name and index, a line and a @@ -35,7 +42,7 @@ struct filepos_Tag { /* * Data structure to hold all the file names etc for input */ -#define INPUT_PUSHBACK_MAX 16 +#define INPUT_PUSHBACK_MAX 1 struct input_Tag { char **filenames; /* complete list of input files */ int nfiles; /* how many in the list */ @@ -43,7 +50,9 @@ struct input_Tag { int currindex; /* which one is that in the list */ int pushback[INPUT_PUSHBACK_MAX]; /* pushed-back input characters */ int npushback; - filepos pos; + int reportcols; /* report column numbers in errors */ + filepos pos[1+INPUT_PUSHBACK_MAX]; + int posptr; }; /* @@ -55,6 +64,8 @@ struct paragraph_Tag { int type; wchar_t *keyword; /* for most special paragraphs */ word *words; /* list of words in paragraph */ + int aux; /* number, in a numbered paragraph */ + word *kwtext; /* chapter/section indication */ }; enum { para_IM, /* index merge */ @@ -73,16 +84,18 @@ enum { para_Preamble, para_NoCite, para_Title, - para_VersionID + para_VersionID, + para_NotParaType /* placeholder value */ }; /* * Data structure to hold an individual word */ struct word_Tag { - word *next; + word *next, *alt; int type; wchar_t *text; + filepos fpos; }; enum { word_Normal, @@ -91,8 +104,11 @@ enum { word_WeakCode, /* monospaced, normal in text */ word_UpperXref, /* \K */ word_LowerXref, /* \k */ + word_XrefEnd, /* (invisible; no text) */ word_IndexRef, /* (always an invisible one) */ - word_WhiteSpace /* text is NULL or ignorable */ + word_WhiteSpace, /* text is NULL or ignorable */ + word_HyperLink, /* (invisible) */ + word_HyperEnd /* (also invisible; no text) */ }; /* @@ -118,15 +134,29 @@ enum { err_explbr, /* expected `{' after command */ err_kwexprbr, /* expected `}' after cross-ref */ err_missingrbrace, /* unclosed braces at end of para */ - err_nestedstyles /* unable to nest text styles */ + err_nestedstyles, /* unable to nest text styles */ + err_nestedindex, /* unable to nest `\i' thingys */ + err_nosuchkw /* unresolved cross-reference */ }; /* * malloc.c */ +#ifdef LOGALLOC +void *smalloc(char *file, int line, int size); +void *srealloc(char *file, int line, void *p, int size); +void sfree(char *file, int line, void *p); +#define smalloc(x) smalloc(__FILE__, __LINE__, x) +#define srealloc(x, y) srealloc(__FILE__, __LINE__, x, y) +#define sfree(x) sfree(__FILE__, __LINE__, x) +#else void *smalloc(int size); void *srealloc(void *p, int size); void sfree(void *p); +#endif +void free_word_list(word *w); +void free_para_list(paragraph *p); +word *dup_word_list(word *w); /* * ustring.c @@ -135,6 +165,9 @@ wchar_t *ustrdup(wchar_t *s); char *ustrtoa(wchar_t *s, char *outbuf, int size); int ustrlen(wchar_t *s); wchar_t *ustrcpy(wchar_t *dest, wchar_t *source); +int ustrcmp(wchar_t *lhs, wchar_t *rhs); +wchar_t *ustrlow(wchar_t *s); +wchar_t *ustrftime(wchar_t *fmt, struct tm *timespec); /* * help.c @@ -167,4 +200,38 @@ void *stk_pop(stack); */ paragraph *read_input(input *in); +/* + * keywords.c + */ +struct keywordlist_Tag { + int nkeywords; + int size; + keyword **keys; +}; +struct keyword_Tag { + wchar_t *key; /* the keyword itself */ + word *text; /* "Chapter 2", "Appendix Q"... */ + /* (NB: filepos are not set) */ +}; +keywordlist *get_keywords(paragraph *); +void free_keywords(keywordlist *); +void subst_keywords(paragraph *, keywordlist *); + +/* + * index.c + */ + +/* + * contents.c + */ +numberstate *number_init(void); +word *number_mktext(numberstate *, int, int, int); +void number_free(numberstate *); + +/* + * style.c + */ +struct userstyle_Tag { +}; + #endif diff --git a/contents.c b/contents.c new file mode 100644 index 0000000..c203852 --- /dev/null +++ b/contents.c @@ -0,0 +1,150 @@ +/* + * contents.c: build a table of contents + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include <limits.h> +#include "buttress.h" + +struct numberstate_Tag { + int chapternum; + int appendixnum; + int ischapter; + int *sectionlevels; + int maxsectlevel; + int listitem; +}; + +numberstate *number_init(void) { + numberstate *ret = smalloc(sizeof(numberstate)); + ret->chapternum = 0; + ret->appendixnum = -1; + ret->ischapter = 1; + ret->maxsectlevel = 32; + ret->sectionlevels = smalloc(ret->maxsectlevel * + sizeof(*ret->sectionlevels)); + ret->listitem = -1; + return ret; +} + +void number_free(numberstate *state) { + sfree(state); +} + +static void dotext(word ***wret, wchar_t *text) { + word *mnewword = smalloc(sizeof(word)); + mnewword->text = ustrdup(text); + mnewword->type = word_Normal; + mnewword->alt = NULL; + mnewword->next = NULL; + **wret = mnewword; + *wret = &mnewword->next; +} + +static void dospace(word ***wret) { + word *mnewword = smalloc(sizeof(word)); + mnewword->text = NULL; + mnewword->type = word_WhiteSpace; + mnewword->alt = NULL; + mnewword->next = NULL; + **wret = mnewword; + *wret = &mnewword->next; +} + +static void donumber(word ***wret, int num) { + wchar_t text[20]; + wchar_t *p = text + sizeof(text); + *--p = L'\0'; + while (num != 0) { + assert(p > text); + *--p = L"0123456789"[num % 10]; + num /= 10; + } + dotext(wret, p); +} + +static void doanumber(word ***wret, int num) { + wchar_t text[20]; + wchar_t *p; + int nletters, aton; + nletters = 1; + aton = 25; + while (num > aton) { + nletters++; + num -= aton+1; + if (aton < INT_MAX/26) + aton = (aton+1) * 26 - 1; + else + aton = INT_MAX; + } + p = text + sizeof(text); + *--p = L'\0'; + while (nletters--) { + assert(p > text); + *--p = L"ABCDEFGHIJKLMNOPQRSTUVWXYZ"[num % 26]; + num /= 26; + } + dotext(wret, p); +} + +word *number_mktext(numberstate *state, int para, int aux, int prev) { + word *ret = NULL; + word **pret = &ret; + int i, level; + + switch (para) { + case para_Chapter: + state->chapternum++; + for (i = 0; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext(&pret, L"Chapter"); + dospace(&pret); + donumber(&pret, state->chapternum); + state->ischapter = 1; + break; + case para_Heading: + case para_Subsect: + level = (para == para_Heading ? 0 : aux); + if (state->maxsectlevel <= level) { + state->maxsectlevel = level + 32; + state->sectionlevels = srealloc(state->sectionlevels, + state->maxsectlevel * + sizeof(*state->sectionlevels)); + } + state->sectionlevels[level]++; + for (i = level+1; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext(&pret, L"Section"); + dospace(&pret); + if (state->ischapter) + donumber(&pret, state->chapternum); + else + doanumber(&pret, state->appendixnum); + for (i = 0; i <= level; i++) { + dotext(&pret, L"."); + if (state->sectionlevels[i] == 0) + state->sectionlevels[i] = 1; + donumber(&pret, state->sectionlevels[i]); + } + break; + case para_Appendix: + state->appendixnum++; + for (i = 0; i < state->maxsectlevel; i++) + state->sectionlevels[i] = 0; + dotext(&pret, L"Appendix"); + dospace(&pret); + doanumber(&pret, state->appendixnum); + state->ischapter = 0; + break; + case para_NumberedList: + if (prev != para_NumberedList) + state->listitem = 0; + state->listitem++; + donumber(&pret, state->listitem); + break; + } + + return ret; +} @@ -111,12 +111,28 @@ static void do_error(int code, va_list ap) { sprintf(error, "unable to nest text styles"); flags = FILEPOS; break; + case err_nestedindex: + fpos = *va_arg(ap, filepos *); + sprintf(error, "unable to nest index markings"); + flags = FILEPOS; + break; + case err_nosuchkw: + fpos = *va_arg(ap, filepos *); + wsp = va_arg(ap, wchar_t *); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sprintf(error, "unable to resolve cross-reference to `%.200s'", sp); + flags = FILEPOS; + break; } if (flags & PREFIX) fputs("buttress: ", stderr); - if (flags & FILEPOS) - fprintf(stderr, "%s:%d: ", fpos.filename, fpos.line); + if (flags & FILEPOS) { + fprintf(stderr, "%s:%d:", fpos.filename, fpos.line); + if (fpos.col > 0) + fprintf(stderr, "%d:", fpos.col); + fputc(' ', stderr); + } fputs(error, stderr); fputc('\n', stderr); } @@ -0,0 +1,8 @@ +/* + * index.c: create and collate index data structures + */ + +#include <stdio.h> +#include <stdlib.h> +#include "buttress.h" + @@ -4,40 +4,70 @@ #include <stdio.h> #include <assert.h> +#include <time.h> #include "buttress.h" #define TAB_STOP 8 /* for column number tracking */ +static void setpos(input *in, char *fname) { + in->pos[0].filename = fname; + in->pos[0].line = 1; + in->pos[0].col = (in->reportcols ? 1 : -1); + in->pos[1] = in->pos[0]; + in->posptr = 1; +} + +static filepos getpos(input *in) { + return in->pos[in->posptr]; +} + static void unget(input *in, int c) { assert(in->npushback < INPUT_PUSHBACK_MAX); in->pushback[in->npushback++] = c; + in->posptr += lenof(in->pos)-1; + in->posptr %= lenof(in->pos); } /* * Can return EOF */ static int get(input *in) { - if (in->npushback) + if (in->npushback) { + in->posptr++; + in->posptr %= lenof(in->pos); return in->pushback[--in->npushback]; + } else if (in->currfp) { int c = getc(in->currfp); + filepos fp; + if (c == EOF) { fclose(in->currfp); in->currfp = NULL; } /* Track line numbers, for error reporting */ - switch (c) { - case '\t': - in->pos.col = 1 + (in->pos.col + TAB_STOP-1) % TAB_STOP; - break; - case '\n': - in->pos.col = 1; - in->pos.line++; - break; - default: - in->pos.col++; - break; + fp = in->pos[in->posptr]; + in->posptr++; + in->posptr %= lenof(in->pos); + if (in->reportcols) { + switch (c) { + case '\t': + fp.col = 1 + (fp.col + TAB_STOP-1) % TAB_STOP; + break; + case '\n': + fp.col = 1; + fp.line++; + break; + default: + fp.col++; + break; + } + } else { + fp.col = -1; + if (c == '\n') + fp.line++; } + in->pos[in->posptr] = fp; /* FIXME: do input charmap translation. We should be returning * Unicode here. */ return c; @@ -197,10 +227,15 @@ static void match_kw(token *tok) { if (tok->text[0] == 'S') { /* We expect numeric characters thereafter. */ wchar_t *p = tok->text+1; - int n = 0; - while (*p && isdec(*p)) { - n = 10 * n + fromdec(*p); - p++; + int n; + if (!*p) + n = 1; + else { + n = 0; + while (*p && isdec(*p)) { + n = 10 * n + fromdec(*p); + p++; + } } if (!*p) { tok->cmd = c_S; @@ -252,7 +287,7 @@ token get_token(input *in) { rdstring rs = { 0, 0, NULL }; ret.text = NULL; /* default */ - ret.pos = in->pos; + ret.pos = getpos(in); c = get(in); if (iswhite(c)) { /* tok_white or tok_eop */ nls = 0; @@ -260,6 +295,10 @@ token get_token(input *in) { if (isnl(c)) nls++; } while ((c = get(in)) != EOF && iswhite(c)); + if (c == EOF) { + ret.type = tok_eof; + return ret; + } unget(in, c); ret.type = (nls > 1 ? tok_eop : tok_white); return ret; @@ -350,12 +389,12 @@ token get_codepar_token(input *in) { token ret; rdstring rs = { 0, 0, NULL }; - ret.pos = in->pos; + ret.pos = getpos(in); ret.type = tok_word; c = get(in); /* expect (and discard) one space */ if (c == ' ') { + ret.pos = getpos(in); c = get(in); - ret.pos = in->pos; } while (!isnl(c) && c != EOF) { rdadd(&rs, c); @@ -370,26 +409,34 @@ token get_codepar_token(input *in) { /* * Adds a new word to a linked list */ -static void addword(word newword, word ***hptrptr) { +static word *addword(word newword, word ***hptrptr) { word *mnewword = smalloc(sizeof(word)); *mnewword = newword; /* structure copy */ mnewword->next = NULL; **hptrptr = mnewword; *hptrptr = &mnewword->next; + return mnewword; } /* * Adds a new paragraph to a linked list */ -static void addpara(paragraph newpara, paragraph ***hptrptr) { +static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) { paragraph *mnewpara = smalloc(sizeof(paragraph)); *mnewpara = newpara; /* structure copy */ mnewpara->next = NULL; **hptrptr = mnewpara; *hptrptr = &mnewpara->next; + return mnewpara; } /* + * Destructor before token is reassigned; should catch most memory + * leaks + */ +#define dtor(t) ( sfree(t.text) ) + +/* * Reads a single file (ie until get() returns EOF) */ static void read_file(paragraph ***ret, input *in) { @@ -397,16 +444,26 @@ static void read_file(paragraph ***ret, input *in) { paragraph par; word wd, **whptr; int style; + int already; + int type; struct stack_item { enum { - stack_ualt, /* \u alternative */ - stack_style, /* \e, \c, \cw */ - stack_idx, /* \I, \i, \ii */ - stack_nop /* do nothing (for error recovery) */ + stack_nop = 0, /* do nothing (for error recovery) */ + stack_ualt = 1, /* \u alternative */ + stack_style = 2, /* \e, \c, \cw */ + stack_idx = 4, /* \I, \i, \ii */ + stack_hyper = 8, /* \W */ } type; word **whptr; /* to restore from \u alternatives */ } *sitem; stack parsestk; + word *indexword, *uword; + rdstring indexstr; + int index_downcase, index_visible, indexing; + const rdstring nullrs = { 0, 0, NULL }; + wchar_t uchr; + + t.text = NULL; /* * Loop on each paragraph. @@ -419,7 +476,7 @@ static void read_file(paragraph ***ret, input *in) { /* * Get a token. */ - t = get_token(in); + dtor(t), t = get_token(in); if (t.type == tok_eof) return; @@ -429,16 +486,18 @@ static void read_file(paragraph ***ret, input *in) { if (t.type == tok_cmd && t.cmd == c_c && !isbrace(in)) { par.type = para_Code; while (1) { - t = get_codepar_token(in); + dtor(t), t = get_codepar_token(in); wd.type = word_WeakCode; wd.text = ustrdup(t.text); + wd.alt = NULL; + wd.fpos = t.pos; addword(wd, &whptr); - t = get_token(in); + dtor(t), t = get_token(in); if (t.type == tok_white) { /* * The newline after a code-paragraph line */ - t = get_token(in); + dtor(t), t = get_token(in); } if (t.type == tok_eop || t.type == tok_eof) break; @@ -446,11 +505,12 @@ static void read_file(paragraph ***ret, input *in) { error(err_brokencodepara, &t.pos); addpara(par, ret); while (t.type != tok_eop) /* error recovery: */ - t = get_token(in); /* eat rest of paragraph */ + dtor(t), t = get_token(in); /* eat rest of paragraph */ continue; } } addpara(par, ret); + continue; } /* @@ -470,7 +530,7 @@ static void read_file(paragraph ***ret, input *in) { break; case c__comment: do { - t = get_token(in); + dtor(t), t = get_token(in); } while (t.type != tok_eop && t.type != tok_eof); continue; /* next paragraph */ /* @@ -488,8 +548,8 @@ static void read_file(paragraph ***ret, input *in) { case c_C: needkw = 2; par.type = para_Chapter; break; case c_H: needkw = 2; par.type = para_Heading; break; case c_IM: needkw = 2; par.type = para_IM; break; - /* FIXME: multiple levels of Subsect */ - case c_S: needkw = 2; par.type = para_Subsect; break; + case c_S: needkw = 2; par.type = para_Subsect; + par.aux = t.aux; break; case c_U: needkw = 0; par.type = para_UnnumberedChapter; break; /* For \b and \n the keyword is optional */ case c_b: needkw = 4; par.type = para_Bullet; break; @@ -508,14 +568,14 @@ static void read_file(paragraph ***ret, input *in) { filepos fp; /* Get keywords. */ - t = get_token(in); + dtor(t), t = get_token(in); fp = t.pos; while (t.type == tok_lbrace) { /* This is a keyword. */ nkeys++; /* FIXME: there will be bugs if anyone specifies an * empty keyword (\foo{}), so trap this case. */ - while (t = get_token(in), + while (dtor(t), t = get_token(in), t.type == tok_word || t.type == tok_white) { if (t.type == tok_white) rdadd(&rs, ' '); @@ -524,11 +584,10 @@ static void read_file(paragraph ***ret, input *in) { } if (t.type != tok_rbrace) { error(err_kwunclosed, &t.pos); - /* FIXME: memory leak */ continue; } rdadd(&rs, 0); /* add string terminator */ - t = get_token(in); /* eat right brace */ + dtor(t), t = get_token(in); /* eat right brace */ } rdadd(&rs, 0); /* add string terminator */ @@ -547,8 +606,9 @@ static void read_file(paragraph ***ret, input *in) { if (needkw == 8) { if (t.type != tok_eop) { error(err_bodyillegal, &t.pos); - while (t.type != tok_eop) /* error recovery: */ - t = get_token(in); /* eat rest of paragraph */ + /* Error recovery: eat the rest of the paragraph */ + while (t.type != tok_eop) + dtor(t), t = get_token(in); } addpara(par, ret); continue; /* next paragraph */ @@ -569,43 +629,71 @@ static void read_file(paragraph ***ret, input *in) { * \I * \u * \W + * \date * \\ \{ \} */ parsestk = stk_new(); style = word_Normal; + indexing = FALSE; while (t.type != tok_eop && t.type != tok_eof) { + already = FALSE; if (t.type == tok_cmd && t.cmd == c__escaped) t.type = tok_word; /* nice and simple */ switch (t.type) { case tok_white: + if (whptr == &par.words) + break; /* strip whitespace at start of para */ wd.text = NULL; wd.type = word_WhiteSpace; - addword(wd, &whptr); + wd.alt = NULL; + wd.fpos = t.pos; + if (indexing) + rdadd(&indexstr, ' '); + if (!indexing || index_visible) + addword(wd, &whptr); break; case tok_word: - wd.text = ustrdup(t.text); - wd.type = style; - addword(wd, &whptr); + if (indexing) + rdadds(&indexstr, t.text); + if (!indexing || index_visible) { + wd.text = ustrdup(t.text); + wd.type = style; + wd.alt = NULL; + wd.fpos = t.pos; + addword(wd, &whptr); + } break; case tok_lbrace: error(err_unexbrace, &t.pos); - /* FIXME: errorrec. Push nop. */ + /* Error recovery: push nop */ + sitem = smalloc(sizeof(*sitem)); + sitem->type = stack_nop; + stk_push(parsestk, sitem); break; case tok_rbrace: sitem = stk_pop(parsestk); if (!sitem) error(err_unexbrace, &t.pos); - else switch (sitem->type) { - case stack_ualt: - whptr = sitem->whptr; - break; - case stack_style: - style = word_Normal; - break; - case stack_idx: - /* FIXME: do this bit! */ - case stack_nop: - break; + else { + if (sitem->type & stack_ualt) + whptr = sitem->whptr; + if (sitem->type & stack_style) + style = word_Normal; + if (sitem->type & stack_idx) { + indexword->text = ustrdup(indexstr.text); + sfree(indexstr.text); + if (index_downcase) + ustrlow(indexword->text); + indexing = FALSE; + } + if (sitem->type & stack_hyper) { + wd.text = NULL; + wd.type = word_HyperEnd; + wd.alt = NULL; + wd.fpos = t.pos; + if (!indexing || index_visible) + addword(wd, &whptr); + } } sfree(sitem); break; @@ -613,52 +701,108 @@ static void read_file(paragraph ***ret, input *in) { switch (t.cmd) { case c_K: case c_k: + case c_W: + case c_date: /* - * Keyword. We expect a left brace, some text, - * and then a right brace. No nesting; no - * arguments. + * Keyword, hyperlink, or \date. We expect a + * left brace, some text, and then a right + * brace. No nesting; no arguments. */ + wd.fpos = t.pos; if (t.cmd == c_K) wd.type = word_UpperXref; - else + else if (t.cmd == c_k) wd.type = word_LowerXref; - t = get_token(in); + else if (t.cmd == c_W) + wd.type = word_HyperLink; + else + wd.type = word_Normal; + dtor(t), t = get_token(in); if (t.type != tok_lbrace) { - error(err_explbr, &t.pos); - } - { + if (wd.type == word_Normal) { + time_t thetime = time(NULL); + struct tm *broken = localtime(&thetime); + already = TRUE; + wd.text = ustrftime(NULL, broken); + wd.type = style; + } else + error(err_explbr, &t.pos); + } else { rdstring rs = { 0, 0, NULL }; - while (t = get_token(in), + while (dtor(t), t = get_token(in), t.type == tok_word || t.type == tok_white) { if (t.type == tok_white) rdadd(&rs, ' '); else rdadds(&rs, t.text); } - wd.text = ustrdup(rs.text); + if (wd.type == word_Normal) { + time_t thetime = time(NULL); + struct tm *broken = localtime(&thetime); + wd.text = ustrftime(rs.text, broken); + wd.type = style; + } else { + wd.text = ustrdup(rs.text); + } + sfree(rs.text); + if (t.type != tok_rbrace) { + error(err_kwexprbr, &t.pos); + } } - if (t.type != tok_rbrace) { - error(err_kwexprbr, &t.pos); + wd.alt = NULL; + if (!indexing || index_visible) + addword(wd, &whptr); + else + sfree(wd.text); + if (wd.type == word_HyperLink) { + /* + * Hyperlinks are different: they then + * expect another left brace, to begin + * delimiting the text marked by the link. + */ + dtor(t), t = get_token(in); + /* + * Special cases: \W{}\c, \W{}\e, \W{}\cw + */ + if (t.type == tok_cmd && + (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) { + if (style != word_Normal) + error(err_nestedstyles, &t.pos); + else { + style = (t.cmd == c_c ? word_Code : + t.cmd == c_cw ? word_WeakCode : + word_Emph); + sitem->type |= stack_style; + } + dtor(t), t = get_token(in); + } + if (t.type != tok_lbrace) { + error(err_explbr, &t.pos); + } else { + sitem = smalloc(sizeof(*sitem)); + sitem->type = stack_hyper; + stk_push(parsestk, sitem); + } } - addword(wd, &whptr); break; case c_c: case c_cw: case c_e: + type = t.cmd; if (style != word_Normal) { error(err_nestedstyles, &t.pos); /* Error recovery: eat lbrace, push nop. */ - t = get_token(in); + dtor(t), t = get_token(in); sitem = smalloc(sizeof(*sitem)); sitem->type = stack_nop; stk_push(parsestk, sitem); } - t = get_token(in); + dtor(t), t = get_token(in); if (t.type != tok_lbrace) { error(err_explbr, &t.pos); } else { - style = (t.cmd == c_c ? word_Code : - t.cmd == c_cw ? word_WeakCode : + style = (type == c_c ? word_Code : + type == c_cw ? word_WeakCode : word_Emph); sitem = smalloc(sizeof(*sitem)); sitem->type = stack_style; @@ -668,36 +812,91 @@ static void read_file(paragraph ***ret, input *in) { case c_i: case c_ii: case c_I: - if (style != word_Normal) { - error(err_nestedstyles, &t.pos); + type = t.cmd; + if (indexing) { + error(err_nestedindex, &t.pos); /* Error recovery: eat lbrace, push nop. */ - t = get_token(in); + dtor(t), t = get_token(in); sitem = smalloc(sizeof(*sitem)); sitem->type = stack_nop; stk_push(parsestk, sitem); } - t = get_token(in); + sitem = smalloc(sizeof(*sitem)); + sitem->type = stack_idx; + dtor(t), t = get_token(in); + /* + * Special cases: \i\c, \i\e, \i\cw + */ + wd.fpos = t.pos; + if (t.type == tok_cmd && + (t.cmd == c_e || t.cmd == c_c || t.cmd == c_cw)) { + if (style != word_Normal) + error(err_nestedstyles, &t.pos); + else { + style = (t.cmd == c_c ? word_Code : + t.cmd == c_cw ? word_WeakCode : + word_Emph); + sitem->type |= stack_style; + } + dtor(t), t = get_token(in); + } if (t.type != tok_lbrace) { + sfree(sitem); error(err_explbr, &t.pos); } else { + /* Add an index-reference word with no text as yet */ + wd.type = word_IndexRef; + wd.text = NULL; + wd.alt = NULL; + indexword = addword(wd, &whptr); + /* Set up a rdstring to read the index text */ + indexstr = nullrs; + /* Flags so that we do the Right Things with text */ + index_visible = (type != c_I); + index_downcase = (type == c_ii); + indexing = TRUE; + /* Stack item to close the indexing on exit */ + stk_push(parsestk, sitem); + } + break; + case c_u: + uchr = t.aux; + if (!indexing || index_visible) { + wchar_t text[2]; + text[1] = 0; + text[0] = uchr; + wd.text = ustrdup(text); + wd.type = style; + wd.alt = NULL; + wd.fpos = t.pos; + uword = addword(wd, &whptr); + } + dtor(t), t = get_token(in); + if (t.type == tok_lbrace) { /* - * FIXME: do something useful - * Add an index-ref word and keep a pointer to it - * Set a flag so that other addwords also update it + * \u with a left brace. Until the brace + * closes, all further words go on a + * sidetrack from the main thread of the + * paragraph. */ sitem = smalloc(sizeof(*sitem)); - sitem->type = stack_idx; + sitem->type = stack_ualt; + sitem->whptr = whptr; stk_push(parsestk, sitem); + whptr = &uword->alt; + } else { + if (indexing) + rdadd(&indexstr, uchr); + already = TRUE; } break; - case c_u: - case c_W: default: error(err_badmidcmd, t.text, &t.pos); break; } } - t = get_token(in); + if (!already) + dtor(t), t = get_token(in); } /* Check the stack is empty */ if (NULL != (sitem = stk_pop(parsestk))) { @@ -710,6 +909,7 @@ static void read_file(paragraph ***ret, input *in) { stk_free(parsestk); addpara(par, ret); } + dtor(t); } paragraph *read_input(input *in) { @@ -719,9 +919,7 @@ paragraph *read_input(input *in) { while (in->currindex < in->nfiles) { in->currfp = fopen(in->filenames[in->currindex], "r"); if (in->currfp) { - in->pos.filename = in->filenames[in->currindex]; - in->pos.line = 1; - in->pos.col = 1; + setpos(in, in->filenames[in->currindex]); read_file(&hptr, in); } in->currindex++; diff --git a/inputs/test.but b/inputs/test.but index fbc1441..d5ce52d 100644 --- a/inputs/test.but +++ b/inputs/test.but @@ -6,7 +6,7 @@ feature that Buttress's input format supports. Creation date \copyright Copyright 1999 Simon Tatham. All rights reserved. -\versionid $Id: test.but,v 1.3 1999/07/31 18:44:53 simon Exp $ +\versionid $Id: test.but,v 1.4 1999/08/09 10:02:07 simon Exp $ \C{chap} First chapter title @@ -15,8 +15,8 @@ has line breaks in between words, multiple spaces (ignored), and \e{emphasised text} as well as \c{code fragments}. -\cw{This} is weak code. And \k{sect} contains some other stuff. -\K{head} does too. +\cw{This} is weak code. And \k{head} contains some other stuff. +\K{subhead} does too. \H{head} First section title @@ -53,9 +53,9 @@ characters, to be precise. And their code equivalents, \c{\\}, \S{subhead} First subheading -So here's a \I{subheading}{subsection}subsection. Just incidentally, -`this' is in quotes. \ii{Those} quotes had better work in all -formats. +So here's a \I{subheading}\I{subsection}subsection. Just +incidentally, `this' is in quotes. \ii{Those} quotes had better work +in all formats. We'll try for some Unicode here: \i{Schr\u00F6{oe}dinger}. @@ -63,7 +63,7 @@ An index tag containing non-alternatived Unicode: \i{\u00BFChe?} An invisible index tag: \I{she seems to have an invisible tag}yeah. -\S1{subsub} Smaller heading still +\S2{sub-sub} Smaller heading still A tiny section. Awww. How cute. diff --git a/keywords.c b/keywords.c new file mode 100644 index 0000000..ff213c1 --- /dev/null +++ b/keywords.c @@ -0,0 +1,168 @@ +/* + * keywords.c: keep track of all cross-reference keywords + */ + +#include <stdio.h> +#include <stdlib.h> +#include <assert.h> +#include "buttress.h" + +#define heapparent(x) (((x)+1)/2-1) +#define heaplchild(x) (2*(x)+1) +#define heaprchild(x) (2*(x)+2) + +#define key(x) ( kl->keys[(x)] ) + +#define greater(x,y) ( ustrcmp(key(x)->key, key(y)->key) > 0 ) +#define swap(x,y) do { keyword *t=key(x); key(x)=key(y); key(y)=t; } while(0) + +static void heap_add(keywordlist *kl, keyword *key) { + int p; + if (kl->nkeywords >= kl->size) { + kl->size = kl->nkeywords + 128; + kl->keys = srealloc(kl->keys, sizeof(*kl->keys) * kl->size); + } + p = kl->nkeywords++; + kl->keys[p] = key; + while (heapparent(p) >= 0 && greater(p, heapparent(p))) { + swap(p, heapparent(p)); + p = heapparent(p); + } +} + +static void heap_sort(keywordlist *kl) { + int i, j; + + kl->size = kl->nkeywords; + kl->keys = srealloc(kl->keys, sizeof(*kl->keys) * kl->size); + + i = kl->nkeywords; + while (i > 1) { + i--; + swap(0, i); /* put greatest at end */ + j = 0; + while (1) { + int left = heaplchild(j), right = heaprchild(j); + if (left >= i || !greater(left, j)) + left = -1; + if (right >= i || !greater(right, j)) + right = -1; + if (left >= 0 && right >= 0) { + if (greater(left, right)) + right = -1; + else + left = -1; + } + if (left >= 0) { swap(j, left); j = left; } + else if (right >= 0) { swap(j, right); j = right; } + else break; + } + } + /* FIXME: check for duplicate keys; do what about them? */ +} + +static keyword *kw_lookup(keywordlist *kl, wchar_t *str) { + int i, j, k, cmp; + + i = -1; + j = kl->nkeywords; + while (j-i > 1) { + k = (i+j)/2; + cmp = ustrcmp(str, kl->keys[k]->key); + if (cmp < 0) + j = k; + else if (cmp > 0) + i = k; + else + return kl->keys[k]; + } + return NULL; +} + +/* + * This function reads through source form and collects the + * keywords. They get collected in a heap, sorted by Unicode + * collation, last at the top (so that we can Heapsort them when we + * finish). + */ +keywordlist *get_keywords(paragraph *source) { + keywordlist *kl = smalloc(sizeof(*kl)); + numberstate *n = number_init(); + int prevpara = para_NotParaType; + + kl->nkeywords = 0; + kl->size = 0; + kl->keys = NULL; + for (; source; source = source->next) { + /* + * Number the chapter / section / list-item / whatever. + */ + source->kwtext = number_mktext(n, source->type, source->aux, + prevpara); + prevpara = source->type; + + if (source->keyword && *source->keyword) { + if (source->kwtext) { + wchar_t *p = source->keyword; + while (*p) { + keyword *kw = smalloc(sizeof(*kw)); + kw->key = p; + kw->text = source->kwtext; + heap_add(kl, kw); + p += ustrlen(p) + 1; + } + } + } + } + + number_free(n); + + heap_sort(kl); + + return kl; +} + +void free_keywords(keywordlist *kl) { + int i; + for (i = 0; i < kl->nkeywords; i++) + sfree(kl->keys[i]); + sfree(kl); +} + +void subst_keywords(paragraph *source, keywordlist *kl) { + for (; source; source = source->next) { + word *ptr; + for (ptr = source->words; ptr; ptr = ptr->next) { + if (ptr->type == word_UpperXref || + ptr->type == word_LowerXref) { + keyword *kw; + word **endptr, *close, *subst; + + kw = kw_lookup(kl, ptr->text); + if (!kw) { + error(err_nosuchkw, &ptr->fpos, ptr->text); + subst = NULL; + } else + subst = dup_word_list(kw->text); + + if (subst && ptr->type == word_LowerXref) + ustrlow(subst->text); + + close = smalloc(sizeof(word)); + close->text = NULL; + close->alt = NULL; + close->type = word_XrefEnd; + close->fpos = ptr->fpos; + + close->next = ptr->next; + ptr->next = subst; + + for (endptr = &ptr->next; *endptr; endptr = &(*endptr)->next) + (*endptr)->fpos = ptr->fpos; + + *endptr = close; + ptr = close; + } + } + } +} @@ -6,12 +6,17 @@ #include <stdlib.h> #include "buttress.h" +static void dbg_prtsource(paragraph *sourceform); +static void dbg_prtwordlist(int level, word *w); +static void dbg_prtkws(keywordlist *kws); + int main(int argc, char **argv) { char **infiles; char *outfile; int nfiles; int nogo; int errs; + int reportcols; /* * Set up initial (default) parameters. @@ -20,6 +25,7 @@ int main(int argc, char **argv) { outfile = NULL; nfiles = 0; nogo = errs = FALSE; + reportcols = 0; if (argc == 1) { usage(); @@ -67,6 +73,8 @@ int main(int argc, char **argv) { errs = TRUE, error(err_optnoarg, opt); else outfile = val; + } else if (!strcmp(opt, "-precise")) { + reportcols = 1; } else { errs = TRUE, error(err_nosuchopt, opt); } @@ -76,6 +84,7 @@ int main(int argc, char **argv) { case 'h': case 'V': case 'L': + case 'P': /* * Option requiring no parameter. */ @@ -92,6 +101,9 @@ int main(int argc, char **argv) { licence(); nogo = TRUE; break; + case 'P': + reportcols = 1; + break; } break; case 'o': @@ -154,12 +166,14 @@ int main(int argc, char **argv) { { input in; paragraph *sourceform; + keywordlist *keywords; in.filenames = infiles; in.nfiles = nfiles; in.currfp = NULL; in.currindex = 0; in.npushback = 0; + in.reportcols = reportcols; sourceform = read_input(&in); if (!sourceform) @@ -167,43 +181,86 @@ int main(int argc, char **argv) { sfree(infiles); - /* - * FIXME: having read it, do something with it! - */ - { - paragraph *p; - word *w; - for (p = sourceform; p; p = p->next) { - wchar_t *wp; - printf("para %d ", p->type); - if (p->keyword) { - wp = p->keyword; - while (*wp) { - putchar('\"'); - for (; *wp; wp++) - putchar(*wp); - putchar('\"'); - if (*++wp) - printf(", "); - } - } else - printf("(no keyword)"); - printf(" {\n"); - for (w = p->words; w; w = w->next) { - printf(" word %d ", w->type); - if (w->text) { - printf("\""); - for (wp = w->text; *wp; wp++) - putchar(*wp); - printf("\""); - } else - printf("(no text)"); - printf("\n"); - } - printf("}\n"); + keywords = get_keywords(sourceform); + dbg_prtkws(keywords); + + subst_keywords(sourceform, keywords); + dbg_prtsource(sourceform); + + free_para_list(sourceform); + } + + return 0; +} + +static void dbg_prtsource(paragraph *sourceform) { + /* + * Output source form in debugging format. + */ + + paragraph *p; + for (p = sourceform; p; p = p->next) { + wchar_t *wp; + printf("para %d ", p->type); + if (p->keyword) { + wp = p->keyword; + while (*wp) { + putchar('\"'); + for (; *wp; wp++) + putchar(*wp); + putchar('\"'); + if (*++wp) + printf(", "); } + } else + printf("(no keyword)"); + printf(" {\n"); + dbg_prtwordlist(1, p->words); + printf("}\n"); + } +} + +static void dbg_prtkws(keywordlist *kws) { + /* + * Output keywords in debugging format. + */ + + int i; + + for (i = 0; i < kws->nkeywords; i++) { + wchar_t *wp; + printf("keyword "); + wp = kws->keys[i]->key; + while (*wp) { + putchar('\"'); + for (; *wp; wp++) + putchar(*wp); + putchar('\"'); + if (*++wp) + printf(", "); } + printf(" {\n"); + dbg_prtwordlist(1, kws->keys[i]->text); + printf("}\n"); } +} - return 0; +static void dbg_prtwordlist(int level, word *w) { + for (; w; w = w->next) { + wchar_t *wp; + printf("%*sword %d ", level*4, "", w->type); + if (w->text) { + printf("\""); + for (wp = w->text; *wp; wp++) + putchar(*wp); + printf("\""); + } else + printf("(no text)"); + if (w->alt) { + printf(" alt = {\n"); + dbg_prtwordlist(level+1, w->alt); + printf("%*s}", level*4, ""); + } + printf("\n"); + } } @@ -3,42 +3,101 @@ */ #include <stdlib.h> +#include <stdarg.h> #include "buttress.h" +#ifdef LOGALLOC +#define LOGPARAMS char *file, int line, +static FILE *logallocfp = NULL; +static void logallocinit(void) { + if (!logallocfp) { + logallocfp = fopen("malloc.log", "w"); + if (!logallocfp) { + fprintf(stderr, "panic: unable to open malloc.log\n"); + exit(10); + } + setvbuf (logallocfp, NULL, _IOLBF, BUFSIZ); + fprintf(logallocfp, "null pointer is %p\n", NULL); + } +} +static void logprintf(char *fmt, ...) { + va_list ap; + va_start(ap, fmt); + vfprintf(logallocfp, fmt, ap); + va_end(ap); +} +#define LOGPRINT(x) ( logallocinit(), logprintf x ) +#else +#define LOGPARAMS +#define LOGPRINT(x) +#endif + /* * smalloc should guarantee to return a useful pointer - buttress * can do nothing except die when it's out of memory anyway */ -void *smalloc(int size) { +void *(smalloc)(LOGPARAMS int size) { void *p = malloc(size); if (!p) fatal(err_nomemory); + LOGPRINT(("%s %d malloc(%ld) returns %p\n", + file, line, (long)size, p)); return p; } /* * sfree should guaranteeably deal gracefully with freeing NULL */ -void sfree(void *p) { - if (p) +void (sfree)(LOGPARAMS void *p) { + if (p) { free(p); + LOGPRINT(("%s %d free(%p)\n", + file, line, p)); + } } /* * srealloc should guaranteeably be able to realloc NULL */ -void *srealloc(void *p, int size) { +void *(srealloc)(LOGPARAMS void *p, int size) { void *q; - if (p) + if (p) { q = realloc(p, size); - else + LOGPRINT(("%s %d realloc(%p,%ld) returns %p\n", + file, line, p, (long)size, q)); + } else { q = malloc(size); + LOGPRINT(("%s %d malloc(%ld) returns %p\n", + file, line, (long)size, q)); + } if (!q) fatal(err_nomemory); return q; } /* + * Duplicate a linked list of words + */ +word *dup_word_list(word *w) { + word *head, **eptr = &head; + + while (w) { + word *newwd = smalloc(sizeof(word)); + *newwd = *w; /* structure copy */ + newwd->text = ustrdup(w->text); + if (w->alt) + newwd->alt = dup_word_list(w->alt); + *eptr = newwd; + newwd->next = NULL; + eptr = &newwd->next; + + w = w->next; + } + + return head; +} + +/* * Free a linked list of words */ void free_word_list(word *w) { @@ -46,6 +105,9 @@ void free_word_list(word *w) { while (w) { t = w; w = w->next; + sfree(t->text); + if (t->alt) + free_word_list(t->alt); sfree(t); } } @@ -58,6 +120,8 @@ void free_para_list(paragraph *p) { while (p) { t = p; p = p->next; + sfree(t->keyword); + free_word_list(t->words); sfree(t); } } @@ -0,0 +1,8 @@ +/* + * style.c: load and keep track of user style preferences + */ + +#include <stdio.h> +#include <stdlib.h> +#include "buttress.h" + @@ -3,6 +3,7 @@ */ #include <wchar.h> +#include <time.h> #include "buttress.h" wchar_t *ustrdup(wchar_t *s) { @@ -45,3 +46,73 @@ wchar_t *ustrcpy(wchar_t *dest, wchar_t *source) { } while (*source++); return ret; } + +int ustrcmp(wchar_t *lhs, wchar_t *rhs) { + while (*lhs && *rhs && *lhs==*rhs) + lhs++, rhs++; + if (*lhs < *rhs) + return -1; + else if (*lhs > *rhs) + return 1; + return 0; +} + +wchar_t *ustrlow(wchar_t *s) { + wchar_t *p = s; + while (*p) { + /* FIXME: this doesn't even come close */ + if (*p >= 'A' && *p <= 'Z') + *p += 'a'-'A'; + p++; + } + return s; +} + +#define USTRFTIME_DELTA 128 +wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) { + void *blk = NULL; + wchar_t *wblk, *wp; + char *fmt, *text, *p; + size_t size = 0; + size_t len; + + /* + * strftime has the entertaining property that it returns 0 + * _either_ on out-of-space _or_ on successful generation of + * the empty string. Hence we must ensure our format can never + * generate the empty string. Somebody throw a custard pie at + * whoever was responsible for that. Please? + */ + if (wfmt) { + len = ustrlen(wfmt); + fmt = smalloc(2+len); + ustrtoa(wfmt, fmt+1, len+1); + fmt[0] = ' '; + } else + fmt = " %c"; + + while (1) { + size += USTRFTIME_DELTA; + blk = srealloc(blk, size); + len = strftime((char *)blk, size-1, fmt, timespec); + if (len > 0) + break; + } + + /* Note: +1 for the terminating 0, -1 for the initial space in fmt */ + wblk = srealloc(blk, len * sizeof(wchar_t)); + text = smalloc(len); + strftime(text, len, fmt+1, timespec); + /* + * We operate in the C locale, so this all ought to be kosher + * ASCII. If we ever move outside ASCII machines, we may need + * to make this more portable... + */ + for (wp = wblk, p = text; *p; p++, wp++) + *wp = *p; + *wp = 0; + if (wfmt) + sfree(fmt); + sfree(text); + return wblk; +} |