diff options
| -rw-r--r-- | bk_info.c | 33 | ||||
| -rw-r--r-- | bk_man.c | 27 | ||||
| -rw-r--r-- | bk_paper.c | 8 | ||||
| -rw-r--r-- | bk_pdf.c | 29 | ||||
| -rw-r--r-- | bk_ps.c | 29 | ||||
| -rw-r--r-- | bk_text.c | 27 | ||||
| -rw-r--r-- | bk_whlp.c | 31 | ||||
| -rw-r--r-- | bk_xhtml.c | 53 | ||||
| -rw-r--r-- | error.c | 14 | ||||
| -rw-r--r-- | halibut.h | 26 | ||||
| -rw-r--r-- | input.c | 89 | ||||
| -rw-r--r-- | main.c | 27 | ||||
| -rw-r--r-- | misc.c | 67 | ||||
| -rw-r--r-- | ustring.c | 154 |
14 files changed, 332 insertions, 282 deletions
@@ -80,7 +80,7 @@ static infoconfig info_configure(paragraph *source) { if (source->type == para_Config) { if (!ustricmp(source->keyword, L"info-filename")) { sfree(ret.filename); - ret.filename = utoa_dup(uadv(source->keyword)); + ret.filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"info-max-file-size")) { ret.maxfilesize = utoi(uadv(source->keyword)); } @@ -92,30 +92,7 @@ static infoconfig info_configure(paragraph *source) { paragraph *info_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"info-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"info-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("info-filename", filename, NULL); } void info_backend(paragraph *sourceform, keywordlist *keywords, @@ -235,11 +212,11 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, } rdaddsc(&intro_text, "INFO-DIR-SECTION "); - s = utoa_dup(section); + s = utoa_dup(section, CS_FIXME); rdaddsc(&intro_text, s); sfree(s); rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* "); - s = utoa_dup(shortname); + s = utoa_dup(shortname, CS_FIXME); rdaddsc(&intro_text, s); sfree(s); rdaddsc(&intro_text, ": ("); @@ -257,7 +234,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, } } rdaddsc(&intro_text, ". "); - s = utoa_dup(longname); + s = utoa_dup(longname, CS_FIXME); rdaddsc(&intro_text, s); sfree(s); rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n"); @@ -48,7 +48,7 @@ static manconfig man_configure(paragraph *source) { ret.mindepth = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"man-filename")) { sfree(ret.filename); - ret.filename = utoa_dup(uadv(source->keyword)); + ret.filename = dupstr(adv(source->origkeyword)); } } } @@ -64,30 +64,7 @@ static void man_conf_cleanup(manconfig cf) paragraph *man_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"man-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"man-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("man-filename", filename, NULL); } #define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */ @@ -510,7 +510,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, for (page = pages; page; page = page->next) { sprintf(buf, "%d", ++pagenum); - page->number = ufroma_dup(buf); + page->number = ufroma_dup(buf, CS_ASCII); } if (has_index) { @@ -524,7 +524,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, /* And don't forget the as-yet-uncreated index. */ sprintf(buf, "%d", ++pagenum); - first_index_page->number = ufroma_dup(buf); + first_index_page->number = ufroma_dup(buf, CS_ASCII); } } @@ -683,7 +683,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, for (page = ipages->next; page; page = page->next) { char buf[40]; sprintf(buf, "%d", ++pagenum); - page->number = ufroma_dup(buf); + page->number = ufroma_dup(buf, CS_ASCII); } /* @@ -1682,7 +1682,7 @@ static int render_text(page_data *page, para_data *pdata, line_data *ldata, if (text->type == word_HyperLink) { dest.type = URL; - dest.url = utoa_dup(text->text); + dest.url = utoa_dup(text->text, CS_ASCII); dest.page = NULL; } else if (text->type == word_PageXref) { dest.type = PAGE; @@ -10,30 +10,7 @@ paragraph *pdf_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"pdf-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"pdf-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("pdf-filename", filename, NULL); } typedef struct object_Tag object; @@ -88,7 +65,7 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, if (p->type == para_Config && p->parent) { if (!ustricmp(p->keyword, L"pdf-filename")) { sfree(filename); - filename = utoa_dup(uadv(p->keyword)); + filename = dupstr(adv(p->origkeyword)); } } } @@ -742,7 +719,7 @@ static int pdf_versionid(FILE *fp, word *words) switch (type) { case word_Normal: - text = utoa_dup(words->text); + text = utoa_dup(words->text, CS_ASCII); break; case word_WhiteSpace: text = dupstr(" "); @@ -10,30 +10,7 @@ static void ps_versionid(FILE *fp, word *words); paragraph *ps_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"ps-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"ps-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("ps-filename", filename, NULL); } void ps_backend(paragraph *sourceform, keywordlist *keywords, @@ -55,7 +32,7 @@ void ps_backend(paragraph *sourceform, keywordlist *keywords, if (p->type == para_Config && p->parent) { if (!ustricmp(p->keyword, L"ps-filename")) { sfree(filename); - filename = utoa_dup(uadv(p->keyword)); + filename = dupstr(adv(p->origkeyword)); } } } @@ -247,7 +224,7 @@ static void ps_versionid(FILE *fp, word *words) switch (type) { case word_Normal: - text = utoa_dup(words->text); + text = utoa_dup(words->text, CS_ASCII); break; case word_WhiteSpace: text = dupstr(" "); @@ -85,7 +85,7 @@ static textconfig text_configure(paragraph *source) { ret.indent = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"text-filename")) { sfree(ret.filename); - ret.filename = utoa_dup(uadv(source->keyword)); + ret.filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"text-indent-code")) { ret.indent_code = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"text-width")) { @@ -182,30 +182,7 @@ static textconfig text_configure(paragraph *source) { paragraph *text_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"text-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"text-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("text-filename", filename, NULL); } void text_backend(paragraph *sourceform, keywordlist *keywords, @@ -45,30 +45,7 @@ static void whlp_contents_write(struct bk_whlp_state *state, paragraph *whlp_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"winhelp-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"winhelp-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("winhelp-filename", filename, NULL); } void whlp_backend(paragraph *sourceform, keywordlist *keywords, @@ -129,7 +106,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords, p->parent->private_data = topicname; } else if (!ustricmp(p->keyword, L"winhelp-filename")) { sfree(filename); - filename = utoa_dup(uadv(p->keyword)); + filename = dupstr(adv(p->origkeyword)); } } } @@ -152,7 +129,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords, filename = newf; len = strlen(newf); } - cntname = mknewa(char, len); + cntname = mknewa(char, len+1); sprintf(cntname, "%.*s.cnt", len-4, filename); } @@ -671,7 +648,7 @@ static void whlp_rdaddwc(rdstringc *rs, word *text) { assert(text->type != word_CodeQuote && text->type != word_WkCodeQuote); if (removeattr(text->type) == word_Normal) { - if (whlp_convert(text->text, 0, &c, FALSE)) + if (whlp_convert(text->text, 0, &c, FALSE) || !text->alt) rdaddsc(rs, c); else whlp_rdaddwc(rs, text->alt); @@ -192,19 +192,19 @@ static xhtmlconfig xhtml_configure(paragraph *source) { if (!ustricmp(source->keyword, L"xhtml-contents-filename")) { sfree(ret.contents_filename); - ret.contents_filename = utoa_dup(uadv(source->keyword)); + ret.contents_filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) { sfree(ret.single_filename); - ret.single_filename = utoa_dup(uadv(source->keyword)); + ret.single_filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) { sfree(ret.index_filename); - ret.index_filename = utoa_dup(uadv(source->keyword)); + ret.index_filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) { sfree(ret.template_filename); - ret.template_filename = utoa_dup(uadv(source->keyword)); + ret.template_filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"xhtml-template-fragment")) { sfree(ret.template_fragment); - ret.template_fragment = utoa_dup(uadv(source->keyword)); + ret.template_fragment = utoa_dup(uadv(source->keyword), CS_ASCII); } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) { ret.contents_depth[0] = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) { @@ -304,45 +304,12 @@ paragraph *xhtml_config_filename(char *filename) * \cfg{xhtml-leaf-level}{0}; the rationale being that the user * wants their output _in that file_. */ + paragraph *p, *q; - paragraph *p[2]; - int i, len; - wchar_t *ufilename, *up; - - for (i = 0; i < 2; i++) { - p[i] = mknew(paragraph); - memset(p[i], 0, sizeof(*p[i])); - p[i]->type = para_Config; - p[i]->next = NULL; - p[i]->fpos.filename = "<command line>"; - p[i]->fpos.line = p[i]->fpos.col = -1; - } - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"xhtml-single-filename"); - p[0]->keyword = mknewa(wchar_t, len); - up = p[0]->keyword; - ustrcpy(up, L"xhtml-single-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p[0]->keyword < len); - sfree(ufilename); - - len = lenof(L"xhtml-leaf-level") + lenof(L"0") + 1; - p[1]->keyword = mknewa(wchar_t, len); - up = p[1]->keyword; - ustrcpy(up, L"xhtml-leaf-level"); - up = uadv(up); - ustrcpy(up, L"0"); - up = uadv(up); - *up = L'\0'; - assert(up - p[1]->keyword < len); - - p[0]->next = p[1]; - - return p[0]; + p = cmdline_cfg_simple("xhtml-single-filename", filename, NULL); + q = cmdline_cfg_simple("xhtml-leaf-level", "0", NULL); + p->next = q; + return p; } static xhtmlsection *xhtml_new_section(xhtmlsection *last) @@ -82,7 +82,7 @@ static void do_error(int code, va_list ap) { break; case err_badparatype: wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); fpos = *va_arg(ap, filepos *); sprintf(error, "command `%.200s' unrecognised at start of" " paragraph", sp); @@ -90,7 +90,7 @@ static void do_error(int code, va_list ap) { break; case err_badmidcmd: wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); fpos = *va_arg(ap, filepos *); sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp); flags = FILEPOS; @@ -138,20 +138,20 @@ static void do_error(int code, va_list ap) { case err_nosuchkw: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); sprintf(error, "unable to resolve cross-reference to `%.200s'", sp); flags = FILEPOS; break; case err_multiBR: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp); flags = FILEPOS; break; case err_nosuchidxtag: wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp); flags = 0; /* FIXME: need to get a filepos to here somehow */ @@ -164,7 +164,7 @@ static void do_error(int code, va_list ap) { case err_macroexists: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); sprintf(error, "macro `%.200s' already defined", sp); flags = FILEPOS; break; @@ -185,7 +185,7 @@ static void do_error(int code, va_list ap) { fpos = *va_arg(ap, filepos *); fpos2 = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); sprintf(error, "paragraph keyword `%.200s' already defined at ", sp); sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line); flags = FILEPOS; @@ -27,6 +27,12 @@ #include "tree234.h" /* + * FIXME: Charset temporary workarounds + */ +#define CS_FIXME CS_ISO8859_1 +#define CS_LOCAL CS_ISO8859_1 + +/* * Structure tags */ typedef struct input_Tag input; @@ -72,6 +78,7 @@ struct input_Tag { charset_state csstate; wchar_t wc[16]; /* wide chars from input conversion */ int nwc, wcpos; /* size of, and position in, wc[] */ + char *pushback_chars; /* used to save input-encoding data */ }; /* @@ -82,6 +89,7 @@ struct paragraph_Tag { paragraph *next; int type; wchar_t *keyword; /* for most special paragraphs */ + char *origkeyword; /* same again in original charset */ word *words; /* list of words in paragraph */ int aux; /* number, in a numbered paragraph * or subsection level @@ -266,11 +274,14 @@ char *dupstr(char *s); /* * ustring.c */ -wchar_t *ustrdup(wchar_t *s); -char *ustrtoa(wchar_t *s, char *outbuf, int size); -wchar_t *ustrfroma(char *s, wchar_t *outbuf, int size); -char *utoa_dup(wchar_t *s); -wchar_t *ufroma_dup(char *s); +wchar_t *ustrdup(wchar_t const *s); +char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset); +char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset); +wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset); +char *utoa_dup(wchar_t const *s, int charset); +char *utoa_dup_len(wchar_t const *s, int charset, int *len); +char *utoa_careful_dup(wchar_t const *s, int charset); +wchar_t *ufroma_dup(char const *s, int charset); int ustrlen(wchar_t const *s); wchar_t *uadv(wchar_t *s); wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source); @@ -304,6 +315,8 @@ const char *const version; /* * misc.c */ +char *adv(char *s); + typedef struct stackTag *stack; stack stk_new(void); void stk_free(stack); @@ -343,6 +356,9 @@ struct tagWrappedLine { }; wrappedline *wrap_para(word *, int, int, int (*)(void *, word *), void *, int); void wrap_free(wrappedline *); +void cmdline_cfg_add(paragraph *cfg, char *string); +paragraph *cmdline_cfg_new(void); +paragraph *cmdline_cfg_simple(char *string, ...); /* * input.c @@ -86,7 +86,7 @@ static void input_configure(input *in, paragraph *cfg) { assert(cfg->type == para_Config); if (!ustricmp(cfg->keyword, L"input-charset")) { - char *csname = utoa_dup(uadv(cfg->keyword)); + char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII); in->charset = charset_from_localenc(csname); sfree(csname); } @@ -95,7 +95,7 @@ static void input_configure(input *in, paragraph *cfg) { /* * Can return EOF */ -static int get(input *in, filepos *pos) { +static int get(input *in, filepos *pos, rdstringc *rsc) { int pushbackpt = in->stack ? in->stack->npushback : 0; if (in->npushback > pushbackpt) { --in->npushback; @@ -123,6 +123,10 @@ static int get(input *in, filepos *pos) { in->currfp = NULL; return EOF; } + + if (rsc) + rdaddc(rsc, c); + /* Track line numbers, for error reporting */ if (pos) *pos = in->pos; @@ -182,6 +186,7 @@ struct token_Tag { int type; int cmd, aux; wchar_t *text; + char *origtext; filepos pos; }; enum { @@ -373,31 +378,48 @@ static void match_kw(token *tok) { token get_token(input *in) { int c; int nls; + int prevpos; token ret; rdstring rs = { 0, 0, NULL }; + rdstringc rsc = { 0, 0, NULL }; filepos cpos; ret.text = NULL; /* default */ - c = get(in, &cpos); + ret.origtext = NULL; /* default */ + if (in->pushback_chars) { + rdaddsc(&rsc, in->pushback_chars); + sfree(in->pushback_chars); + in->pushback_chars = NULL; + } + c = get(in, &cpos, &rsc); ret.pos = cpos; if (iswhite(c)) { /* tok_white or tok_eop */ nls = 0; + prevpos = 0; do { if (isnl(c)) nls++; - } while ((c = get(in, &cpos)) != EOF && iswhite(c)); + prevpos = rsc.pos; + } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c)); if (c == EOF) { ret.type = tok_eof; + sfree(rsc.text); return ret; } + if (rsc.text) { + in->pushback_chars = dupstr(rsc.text + prevpos); + sfree(rsc.text); + } unget(in, c, &cpos); ret.type = (nls > 1 ? tok_eop : tok_white); return ret; } else if (c == EOF) { /* tok_eof */ ret.type = tok_eof; + sfree(rsc.text); return ret; } else if (c == '\\') { /* tok_cmd */ - c = get(in, &cpos); + rsc.pos = prevpos = 0; + c = get(in, &cpos, &rsc); if (c == '-' || c == '\\' || c == '_' || c == '#' || c == '{' || c == '}' || c == '.') { /* single-char command */ @@ -407,13 +429,15 @@ token get_token(input *in) { do { rdadd(&rs, c); len++; - c = get(in, &cpos); + prevpos = rsc.pos; + c = get(in, &cpos, &rsc); } while (ishex(c) && len < 5); unget(in, c, &cpos); } else if (iscmd(c)) { do { rdadd(&rs, c); - c = get(in, &cpos); + prevpos = rsc.pos; + c = get(in, &cpos, &rsc); } while (iscmd(c)); unget(in, c, &cpos); } @@ -423,14 +447,24 @@ token get_token(input *in) { */ ret.type = tok_cmd; ret.text = ustrdup(rs.text); + if (rsc.text) { + in->pushback_chars = dupstr(rsc.text + prevpos); + rsc.text[prevpos] = '\0'; + ret.origtext = dupstr(rsc.text); + } else { + ret.origtext = dupstr(""); + } match_kw(&ret); sfree(rs.text); + sfree(rsc.text); return ret; } else if (c == '{') { /* tok_lbrace */ ret.type = tok_lbrace; + sfree(rsc.text); return ret; } else if (c == '}') { /* tok_rbrace */ ret.type = tok_rbrace; + sfree(rsc.text); return ret; } else { /* tok_word */ /* @@ -442,6 +476,7 @@ token get_token(input *in) { * a hyphen. */ ret.aux = FALSE; /* assumed for now */ + prevpos = 0; while (1) { if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) { /* Put back the character that caused termination */ @@ -450,15 +485,25 @@ token get_token(input *in) { } else { rdadd(&rs, c); if (c == '-') { + prevpos = rsc.pos; ret.aux = TRUE; break; /* hyphen terminates word */ } } - c = get(in, &cpos); + prevpos = rsc.pos; + c = get(in, &cpos, &rsc); } ret.type = tok_word; ret.text = ustrdup(rs.text); + if (rsc.text) { + in->pushback_chars = dupstr(rsc.text + prevpos); + rsc.text[prevpos] = '\0'; + ret.origtext = dupstr(rsc.text); + } else { + ret.origtext = dupstr(""); + } sfree(rs.text); + sfree(rsc.text); return ret; } } @@ -472,7 +517,7 @@ int isbrace(input *in) { int c; filepos cpos; - c = get(in, &cpos); + c = get(in, &cpos, NULL); unget(in, c, &cpos); return (c == '{'); } @@ -488,15 +533,16 @@ token get_codepar_token(input *in) { filepos cpos; ret.type = tok_word; - c = get(in, &cpos); /* expect (and discard) one space */ + ret.origtext = NULL; + c = get(in, &cpos, NULL); /* expect (and discard) one space */ ret.pos = cpos; if (c == ' ') { - c = get(in, &cpos); + c = get(in, &cpos, NULL); ret.pos = cpos; } while (!isnl(c) && c != EOF) { int c2 = c; - c = get(in, &cpos); + c = get(in, &cpos, NULL); /* Discard \r just before \n. */ if (c2 != 13 || !isnl(c)) rdadd(&rs, c2); @@ -538,7 +584,7 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) { * Destructor before token is reassigned; should catch most memory * leaks */ -#define dtor(t) ( sfree(t.text) ) +#define dtor(t) ( sfree(t.text), sfree(t.origtext) ) /* * Reads a single file (ie until get() returns EOF) @@ -581,6 +627,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { wchar_t uchr; t.text = NULL; + t.origtext = NULL; macros = newtree234(macrocmp); already = FALSE; @@ -593,6 +640,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { int start_cmd = c__invalid; par.words = NULL; par.keyword = NULL; + par.origkeyword = NULL; whptr = &par.words; /* @@ -840,6 +888,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { if (needkw > 0) { rdstring rs = { 0, 0, NULL }; + rdstringc rsc = { 0, 0, NULL }; int nkeys = 0; filepos fp; @@ -857,20 +906,25 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { (t.type == tok_cmd && t.cmd == c__nbsp) || (t.type == tok_cmd && t.cmd == c__escaped)) { if (t.type == tok_white || - (t.type == tok_cmd && t.cmd == c__nbsp)) + (t.type == tok_cmd && t.cmd == c__nbsp)) { rdadd(&rs, ' '); - else + rdaddc(&rsc, ' '); + } else { rdadds(&rs, t.text); + rdaddsc(&rsc, t.origtext); + } } if (t.type != tok_rbrace) { error(err_kwunclosed, &t.pos); continue; } rdadd(&rs, 0); /* add string terminator */ + rdaddc(&rsc, 0); /* add string terminator */ dtor(t), t = get_token(in); /* eat right brace */ } - rdadd(&rs, 0); /* add string terminator */ + rdadd(&rs, 0); /* add string terminator */ + rdaddc(&rsc, 0); /* add string terminator */ /* See whether we have the right number of keywords. */ if ((needkw & 48) && nkeys > 0) @@ -901,6 +955,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } par.keyword = rdtrim(&rs); + par.origkeyword = rdtrimc(&rsc); /* Move to EOP in case of needkw==8 or 16 (no body) */ if (needkw & 24) { @@ -1464,6 +1519,8 @@ paragraph *read_input(input *in, indexdata *idx) { setpos(in, in->filenames[in->currindex]); in->charset = in->defcharset; in->csstate = charset_init_state; + in->wcpos = in->nwc = 0; + in->pushback_chars = NULL; read_file(&hptr, in, idx); } in->currindex++; @@ -181,36 +181,25 @@ int main(int argc, char **argv) { * into a config paragraph. */ { - wchar_t *keywords; - char *q; - wchar_t *u; + char *s = dupstr(p), *q, *r; paragraph *para; - keywords = mknewa(wchar_t, 2+strlen(p)); - - u = keywords; - q = p; + para = cmdline_cfg_new(); + q = r = s; while (*q) { if (*q == ':') { - *u++ = L'\0'; + *r = '\0'; + cmdline_cfg_add(para, s); + r = s; } else { if (*q == '\\' && q[1]) q++; - /* FIXME: lacks charset flexibility */ - *u++ = *q; + *r++ = *q; } q++; } - *u = L'\0'; - - para = mknew(paragraph); - memset(para, 0, sizeof(*para)); - para->type = para_Config; - para->keyword = keywords; - para->next = NULL; - para->fpos.filename = "<command line>"; - para->fpos.line = para->fpos.col = -1; + cmdline_cfg_add(para, s); if (cfg_tail) cfg_tail->next = para; @@ -2,8 +2,13 @@ * misc.c: miscellaneous useful items */ +#include <stdarg.h> #include "halibut.h" +char *adv(char *s) { + return s + 1 + strlen(s); +} + struct stackTag { void **data; int sp; @@ -479,3 +484,65 @@ void wrap_free(wrappedline *w) { w = t; } } + +void cmdline_cfg_add(paragraph *cfg, char *string) +{ + wchar_t *ustring; + int upos, ulen, pos, len; + + ulen = 0; + while (cfg->keyword[ulen]) + ulen += 1 + ustrlen(cfg->keyword+ulen); + len = 0; + while (cfg->origkeyword[len]) + len += 1 + strlen(cfg->origkeyword+len); + + ustring = ufroma_dup(string, CS_FIXME); + + upos = ulen; + ulen += 2 + ustrlen(ustring); + cfg->keyword = resize(cfg->keyword, ulen); + ustrcpy(cfg->keyword+upos, ustring); + cfg->keyword[ulen-1] = L'\0'; + + pos = len; + len += 2 + strlen(string); + cfg->origkeyword = resize(cfg->origkeyword, len); + strcpy(cfg->origkeyword+pos, string); + cfg->origkeyword[len-1] = '\0'; + + sfree(ustring); +} + +paragraph *cmdline_cfg_new(void) +{ + paragraph *p; + + p = mknew(paragraph); + memset(p, 0, sizeof(*p)); + p->type = para_Config; + p->next = NULL; + p->fpos.filename = "<command line>"; + p->fpos.line = p->fpos.col = -1; + p->keyword = ustrdup(L"\0"); + p->origkeyword = dupstr("\0"); + + return p; +} + +paragraph *cmdline_cfg_simple(char *string, ...) +{ + va_list ap; + char *s; + paragraph *p; + + p = cmdline_cfg_new(); + cmdline_cfg_add(p, string); + + va_start(ap, string); + while ((s = va_arg(ap, char *)) != NULL) + cmdline_cfg_add(p, s); + va_end(ap); + + return p; +} @@ -6,7 +6,7 @@ #include <time.h> #include "halibut.h" -wchar_t *ustrdup(wchar_t *s) { +wchar_t *ustrdup(wchar_t const *s) { wchar_t *r; if (s) { r = mknewa(wchar_t, 1+ustrlen(s)); @@ -18,59 +18,145 @@ wchar_t *ustrdup(wchar_t *s) { return r; } -char *ustrtoa(wchar_t *s, char *outbuf, int size) { - char *p; +static char *ustrtoa_internal(wchar_t const *s, char *outbuf, int size, + int charset, int careful) { + int len, ret, err; + charset_state state = CHARSET_INIT_STATE; + if (!s) { *outbuf = '\0'; return outbuf; } - for (p = outbuf; *s && p < outbuf+size; p++,s++) - *p = *s; - if (p < outbuf+size) - *p = '\0'; - else - outbuf[size-1] = '\0'; + + len = ustrlen(s); + size--; /* leave room for terminating NUL */ + *outbuf = '\0'; + while (len > 0) { + err = 0; + ret = charset_from_unicode(&s, &len, outbuf, size, charset, &state, + (careful ? &err : NULL)); + if (err) + return NULL; + if (!ret) + return outbuf; + size -= ret; + outbuf += ret; + *outbuf = '\0'; + } + /* + * Clean up + */ + ret = charset_from_unicode(NULL, 0, outbuf, size, charset, &state, NULL); + size -= ret; + outbuf += ret; + *outbuf = '\0'; return outbuf; } -wchar_t *ustrfroma(char *s, wchar_t *outbuf, int size) { - wchar_t *p; +char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset) { + return ustrtoa_internal(s, outbuf, size, charset, FALSE); +} + +char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset) { + return ustrtoa_internal(s, outbuf, size, charset, TRUE); +} + +wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset) { + int len, ret; + charset_state state = CHARSET_INIT_STATE; + if (!s) { *outbuf = L'\0'; return outbuf; } - for (p = outbuf; *s && p < outbuf+size; p++,s++) - *p = *s; - if (p < outbuf+size) - *p = '\0'; - else - outbuf[size-1] = '\0'; + + len = strlen(s); + size--; /* allow for terminating NUL */ + *outbuf = L'\0'; + while (len > 0) { + ret = charset_to_unicode(&s, &len, outbuf, size, + charset, &state, NULL, 0); + if (!ret) + return outbuf; + outbuf += ret; + size -= ret; + *outbuf = L'\0'; + } return outbuf; } -char *utoa_dup(wchar_t *s) { - int len; - char *buf = NULL; +char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful) +{ + char *outbuf; + int outpos, outlen, len, ret, err; + charset_state state = CHARSET_INIT_STATE; - len = ustrlen(s) + 1; - do { - buf = resize(buf, len); - ustrtoa(s, buf, len); - len = (3 * len) / 2 + 1; /* this guarantees a strict increase */ - } while ((int)strlen(buf) >= len-1); + if (!s) { + return dupstr(""); + } - buf = resize(buf, strlen(buf)+1); - return buf; + len = ustrlen(s); + + outlen = len + 10; + outbuf = mknewa(char, outlen); + + outpos = 0; + outbuf[outpos] = '\0'; + + while (len > 0) { + err = 0; + ret = charset_from_unicode(&s, &len, + outbuf + outpos, outlen - outpos - 1, + charset, &state, (careful ? &err : NULL)); + if (err) { + sfree(outbuf); + return NULL; + } + if (!ret) { + outlen = outlen * 3 / 2; + outbuf = resize(outbuf, outlen); + } + outpos += ret; + outbuf[outpos] = '\0'; + } + /* + * Clean up + */ + outlen = outpos + 32; + outbuf = resize(outbuf, outlen); + ret = charset_from_unicode(NULL, 0, + outbuf + outpos, outlen - outpos + 1, + charset, &state, NULL); + outpos += ret; + outbuf[outpos] = '\0'; + if (lenp) + *lenp = outpos; + return outbuf; } -wchar_t *ufroma_dup(char *s) { +char *utoa_dup(wchar_t const *s, int charset) +{ + return utoa_internal_dup(s, charset, NULL, FALSE); +} + +char *utoa_dup_len(wchar_t const *s, int charset, int *len) +{ + return utoa_internal_dup(s, charset, len, FALSE); +} + +char *utoa_careful_dup(wchar_t const *s, int charset) +{ + return utoa_internal_dup(s, charset, NULL, TRUE); +} + +wchar_t *ufroma_dup(char const *s, int charset) { int len; wchar_t *buf = NULL; len = strlen(s) + 1; do { buf = resize(buf, len); - ustrfroma(s, buf, len); + ustrfroma(s, buf, len, charset); len = (3 * len) / 2 + 1; /* this guarantees a strict increase */ } while (ustrlen(buf) >= len-1); @@ -183,6 +269,12 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) { size_t len; /* + * FIXME: really we ought to copy non-% parts of the format + * ourselves, and only resort to strftime for % parts. Also we + * should use wcsftime if it's present. + */ + + /* * strftime has the entertaining property that it returns 0 * _either_ on out-of-space _or_ on successful generation of * the empty string. Hence we must ensure our format can never @@ -192,7 +284,7 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) { if (wfmt) { len = ustrlen(wfmt); fmt = mknewa(char, 2+len); - ustrtoa(wfmt, fmt+1, len+1); + ustrtoa(wfmt, fmt+1, len+1, CS_ASCII); /* CS_FIXME? */ fmt[0] = ' '; } else fmt = " %c"; |