diff options
| author | Simon Tatham <anakin@pobox.com> | 2004-04-20 17:50:41 +0000 |
|---|---|---|
| committer | Simon Tatham <anakin@pobox.com> | 2004-04-20 17:50:41 +0000 |
| commit | 2b6def26f41457eba8f2056432cd1af68a5b58b0 (patch) | |
| tree | 6bc7c479673f48b7e488ea383c6076d4f28cf0fc | |
| parent | 8a9d3f97956db97b0813a6d24c486371ff14bd80 (diff) | |
| download | halibut-2b6def26f41457eba8f2056432cd1af68a5b58b0.zip halibut-2b6def26f41457eba8f2056432cd1af68a5b58b0.tar.gz halibut-2b6def26f41457eba8f2056432cd1af68a5b58b0.tar.bz2 halibut-2b6def26f41457eba8f2056432cd1af68a5b58b0.tar.xz | |
Infrastructure changes for character set support. ustrtoa,
ustrfroma, utoa_dup and ufroma_dup now take a charset parameter, and
also have a variety of subtly distinct forms. Also, when a \cfg
directive is seen in the input file, the precise octet strings for
each parameter are kept in their original form as well as being
translated into Unicode, so that when they represent filenames they
can be used verbatim.
[originally from svn r4097]
| -rw-r--r-- | bk_info.c | 33 | ||||
| -rw-r--r-- | bk_man.c | 27 | ||||
| -rw-r--r-- | bk_paper.c | 8 | ||||
| -rw-r--r-- | bk_pdf.c | 29 | ||||
| -rw-r--r-- | bk_ps.c | 29 | ||||
| -rw-r--r-- | bk_text.c | 27 | ||||
| -rw-r--r-- | bk_whlp.c | 31 | ||||
| -rw-r--r-- | bk_xhtml.c | 53 | ||||
| -rw-r--r-- | error.c | 14 | ||||
| -rw-r--r-- | halibut.h | 26 | ||||
| -rw-r--r-- | input.c | 89 | ||||
| -rw-r--r-- | main.c | 27 | ||||
| -rw-r--r-- | misc.c | 67 | ||||
| -rw-r--r-- | ustring.c | 154 |
14 files changed, 332 insertions, 282 deletions
@@ -80,7 +80,7 @@ static infoconfig info_configure(paragraph *source) { if (source->type == para_Config) { if (!ustricmp(source->keyword, L"info-filename")) { sfree(ret.filename); - ret.filename = utoa_dup(uadv(source->keyword)); + ret.filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"info-max-file-size")) { ret.maxfilesize = utoi(uadv(source->keyword)); } @@ -92,30 +92,7 @@ static infoconfig info_configure(paragraph *source) { paragraph *info_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"info-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"info-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("info-filename", filename, NULL); } void info_backend(paragraph *sourceform, keywordlist *keywords, @@ -235,11 +212,11 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, } rdaddsc(&intro_text, "INFO-DIR-SECTION "); - s = utoa_dup(section); + s = utoa_dup(section, CS_FIXME); rdaddsc(&intro_text, s); sfree(s); rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* "); - s = utoa_dup(shortname); + s = utoa_dup(shortname, CS_FIXME); rdaddsc(&intro_text, s); sfree(s); rdaddsc(&intro_text, ": ("); @@ -257,7 +234,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords, } } rdaddsc(&intro_text, ". "); - s = utoa_dup(longname); + s = utoa_dup(longname, CS_FIXME); rdaddsc(&intro_text, s); sfree(s); rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n"); @@ -48,7 +48,7 @@ static manconfig man_configure(paragraph *source) { ret.mindepth = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"man-filename")) { sfree(ret.filename); - ret.filename = utoa_dup(uadv(source->keyword)); + ret.filename = dupstr(adv(source->origkeyword)); } } } @@ -64,30 +64,7 @@ static void man_conf_cleanup(manconfig cf) paragraph *man_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"man-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"man-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("man-filename", filename, NULL); } #define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */ @@ -510,7 +510,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, for (page = pages; page; page = page->next) { sprintf(buf, "%d", ++pagenum); - page->number = ufroma_dup(buf); + page->number = ufroma_dup(buf, CS_ASCII); } if (has_index) { @@ -524,7 +524,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, /* And don't forget the as-yet-uncreated index. */ sprintf(buf, "%d", ++pagenum); - first_index_page->number = ufroma_dup(buf); + first_index_page->number = ufroma_dup(buf, CS_ASCII); } } @@ -683,7 +683,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords, for (page = ipages->next; page; page = page->next) { char buf[40]; sprintf(buf, "%d", ++pagenum); - page->number = ufroma_dup(buf); + page->number = ufroma_dup(buf, CS_ASCII); } /* @@ -1682,7 +1682,7 @@ static int render_text(page_data *page, para_data *pdata, line_data *ldata, if (text->type == word_HyperLink) { dest.type = URL; - dest.url = utoa_dup(text->text); + dest.url = utoa_dup(text->text, CS_ASCII); dest.page = NULL; } else if (text->type == word_PageXref) { dest.type = PAGE; @@ -10,30 +10,7 @@ paragraph *pdf_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"pdf-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"pdf-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("pdf-filename", filename, NULL); } typedef struct object_Tag object; @@ -88,7 +65,7 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords, if (p->type == para_Config && p->parent) { if (!ustricmp(p->keyword, L"pdf-filename")) { sfree(filename); - filename = utoa_dup(uadv(p->keyword)); + filename = dupstr(adv(p->origkeyword)); } } } @@ -742,7 +719,7 @@ static int pdf_versionid(FILE *fp, word *words) switch (type) { case word_Normal: - text = utoa_dup(words->text); + text = utoa_dup(words->text, CS_ASCII); break; case word_WhiteSpace: text = dupstr(" "); @@ -10,30 +10,7 @@ static void ps_versionid(FILE *fp, word *words); paragraph *ps_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"ps-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"ps-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("ps-filename", filename, NULL); } void ps_backend(paragraph *sourceform, keywordlist *keywords, @@ -55,7 +32,7 @@ void ps_backend(paragraph *sourceform, keywordlist *keywords, if (p->type == para_Config && p->parent) { if (!ustricmp(p->keyword, L"ps-filename")) { sfree(filename); - filename = utoa_dup(uadv(p->keyword)); + filename = dupstr(adv(p->origkeyword)); } } } @@ -247,7 +224,7 @@ static void ps_versionid(FILE *fp, word *words) switch (type) { case word_Normal: - text = utoa_dup(words->text); + text = utoa_dup(words->text, CS_ASCII); break; case word_WhiteSpace: text = dupstr(" "); @@ -85,7 +85,7 @@ static textconfig text_configure(paragraph *source) { ret.indent = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"text-filename")) { sfree(ret.filename); - ret.filename = utoa_dup(uadv(source->keyword)); + ret.filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"text-indent-code")) { ret.indent_code = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"text-width")) { @@ -182,30 +182,7 @@ static textconfig text_configure(paragraph *source) { paragraph *text_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"text-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"text-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("text-filename", filename, NULL); } void text_backend(paragraph *sourceform, keywordlist *keywords, @@ -45,30 +45,7 @@ static void whlp_contents_write(struct bk_whlp_state *state, paragraph *whlp_config_filename(char *filename) { - paragraph *p; - wchar_t *ufilename, *up; - int len; - - p = mknew(paragraph); - memset(p, 0, sizeof(*p)); - p->type = para_Config; - p->next = NULL; - p->fpos.filename = "<command line>"; - p->fpos.line = p->fpos.col = -1; - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"winhelp-filename"); - p->keyword = mknewa(wchar_t, len); - up = p->keyword; - ustrcpy(up, L"winhelp-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p->keyword < len); - sfree(ufilename); - - return p; + return cmdline_cfg_simple("winhelp-filename", filename, NULL); } void whlp_backend(paragraph *sourceform, keywordlist *keywords, @@ -129,7 +106,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords, p->parent->private_data = topicname; } else if (!ustricmp(p->keyword, L"winhelp-filename")) { sfree(filename); - filename = utoa_dup(uadv(p->keyword)); + filename = dupstr(adv(p->origkeyword)); } } } @@ -152,7 +129,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords, filename = newf; len = strlen(newf); } - cntname = mknewa(char, len); + cntname = mknewa(char, len+1); sprintf(cntname, "%.*s.cnt", len-4, filename); } @@ -671,7 +648,7 @@ static void whlp_rdaddwc(rdstringc *rs, word *text) { assert(text->type != word_CodeQuote && text->type != word_WkCodeQuote); if (removeattr(text->type) == word_Normal) { - if (whlp_convert(text->text, 0, &c, FALSE)) + if (whlp_convert(text->text, 0, &c, FALSE) || !text->alt) rdaddsc(rs, c); else whlp_rdaddwc(rs, text->alt); @@ -192,19 +192,19 @@ static xhtmlconfig xhtml_configure(paragraph *source) { if (!ustricmp(source->keyword, L"xhtml-contents-filename")) { sfree(ret.contents_filename); - ret.contents_filename = utoa_dup(uadv(source->keyword)); + ret.contents_filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) { sfree(ret.single_filename); - ret.single_filename = utoa_dup(uadv(source->keyword)); + ret.single_filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) { sfree(ret.index_filename); - ret.index_filename = utoa_dup(uadv(source->keyword)); + ret.index_filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) { sfree(ret.template_filename); - ret.template_filename = utoa_dup(uadv(source->keyword)); + ret.template_filename = dupstr(adv(source->origkeyword)); } else if (!ustricmp(source->keyword, L"xhtml-template-fragment")) { sfree(ret.template_fragment); - ret.template_fragment = utoa_dup(uadv(source->keyword)); + ret.template_fragment = utoa_dup(uadv(source->keyword), CS_ASCII); } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) { ret.contents_depth[0] = utoi(uadv(source->keyword)); } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) { @@ -304,45 +304,12 @@ paragraph *xhtml_config_filename(char *filename) * \cfg{xhtml-leaf-level}{0}; the rationale being that the user * wants their output _in that file_. */ + paragraph *p, *q; - paragraph *p[2]; - int i, len; - wchar_t *ufilename, *up; - - for (i = 0; i < 2; i++) { - p[i] = mknew(paragraph); - memset(p[i], 0, sizeof(*p[i])); - p[i]->type = para_Config; - p[i]->next = NULL; - p[i]->fpos.filename = "<command line>"; - p[i]->fpos.line = p[i]->fpos.col = -1; - } - - ufilename = ufroma_dup(filename); - len = ustrlen(ufilename) + 2 + lenof(L"xhtml-single-filename"); - p[0]->keyword = mknewa(wchar_t, len); - up = p[0]->keyword; - ustrcpy(up, L"xhtml-single-filename"); - up = uadv(up); - ustrcpy(up, ufilename); - up = uadv(up); - *up = L'\0'; - assert(up - p[0]->keyword < len); - sfree(ufilename); - - len = lenof(L"xhtml-leaf-level") + lenof(L"0") + 1; - p[1]->keyword = mknewa(wchar_t, len); - up = p[1]->keyword; - ustrcpy(up, L"xhtml-leaf-level"); - up = uadv(up); - ustrcpy(up, L"0"); - up = uadv(up); - *up = L'\0'; - assert(up - p[1]->keyword < len); - - p[0]->next = p[1]; - - return p[0]; + p = cmdline_cfg_simple("xhtml-single-filename", filename, NULL); + q = cmdline_cfg_simple("xhtml-leaf-level", "0", NULL); + p->next = q; + return p; } static xhtmlsection *xhtml_new_section(xhtmlsection *last) @@ -82,7 +82,7 @@ static void do_error(int code, va_list ap) { break; case err_badparatype: wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); fpos = *va_arg(ap, filepos *); sprintf(error, "command `%.200s' unrecognised at start of" " paragraph", sp); @@ -90,7 +90,7 @@ static void do_error(int code, va_list ap) { break; case err_badmidcmd: wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); fpos = *va_arg(ap, filepos *); sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp); flags = FILEPOS; @@ -138,20 +138,20 @@ static void do_error(int code, va_list ap) { case err_nosuchkw: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); sprintf(error, "unable to resolve cross-reference to `%.200s'", sp); flags = FILEPOS; break; case err_multiBR: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp); flags = FILEPOS; break; case err_nosuchidxtag: wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp); flags = 0; /* FIXME: need to get a filepos to here somehow */ @@ -164,7 +164,7 @@ static void do_error(int code, va_list ap) { case err_macroexists: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); sprintf(error, "macro `%.200s' already defined", sp); flags = FILEPOS; break; @@ -185,7 +185,7 @@ static void do_error(int code, va_list ap) { fpos = *va_arg(ap, filepos *); fpos2 = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf)); + sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); sprintf(error, "paragraph keyword `%.200s' already defined at ", sp); sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line); flags = FILEPOS; @@ -27,6 +27,12 @@ #include "tree234.h" /* + * FIXME: Charset temporary workarounds + */ +#define CS_FIXME CS_ISO8859_1 +#define CS_LOCAL CS_ISO8859_1 + +/* * Structure tags */ typedef struct input_Tag input; @@ -72,6 +78,7 @@ struct input_Tag { charset_state csstate; wchar_t wc[16]; /* wide chars from input conversion */ int nwc, wcpos; /* size of, and position in, wc[] */ + char *pushback_chars; /* used to save input-encoding data */ }; /* @@ -82,6 +89,7 @@ struct paragraph_Tag { paragraph *next; int type; wchar_t *keyword; /* for most special paragraphs */ + char *origkeyword; /* same again in original charset */ word *words; /* list of words in paragraph */ int aux; /* number, in a numbered paragraph * or subsection level @@ -266,11 +274,14 @@ char *dupstr(char *s); /* * ustring.c */ -wchar_t *ustrdup(wchar_t *s); -char *ustrtoa(wchar_t *s, char *outbuf, int size); -wchar_t *ustrfroma(char *s, wchar_t *outbuf, int size); -char *utoa_dup(wchar_t *s); -wchar_t *ufroma_dup(char *s); +wchar_t *ustrdup(wchar_t const *s); +char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset); +char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset); +wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset); +char *utoa_dup(wchar_t const *s, int charset); +char *utoa_dup_len(wchar_t const *s, int charset, int *len); +char *utoa_careful_dup(wchar_t const *s, int charset); +wchar_t *ufroma_dup(char const *s, int charset); int ustrlen(wchar_t const *s); wchar_t *uadv(wchar_t *s); wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source); @@ -304,6 +315,8 @@ const char *const version; /* * misc.c */ +char *adv(char *s); + typedef struct stackTag *stack; stack stk_new(void); void stk_free(stack); @@ -343,6 +356,9 @@ struct tagWrappedLine { }; wrappedline *wrap_para(word *, int, int, int (*)(void *, word *), void *, int); void wrap_free(wrappedline *); +void cmdline_cfg_add(paragraph *cfg, char *string); +paragraph *cmdline_cfg_new(void); +paragraph *cmdline_cfg_simple(char *string, ...); /* * input.c @@ -86,7 +86,7 @@ static void input_configure(input *in, paragraph *cfg) { assert(cfg->type == para_Config); if (!ustricmp(cfg->keyword, L"input-charset")) { - char *csname = utoa_dup(uadv(cfg->keyword)); + char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII); in->charset = charset_from_localenc(csname); sfree(csname); } @@ -95,7 +95,7 @@ static void input_configure(input *in, paragraph *cfg) { /* * Can return EOF */ -static int get(input *in, filepos *pos) { +static int get(input *in, filepos *pos, rdstringc *rsc) { int pushbackpt = in->stack ? in->stack->npushback : 0; if (in->npushback > pushbackpt) { --in->npushback; @@ -123,6 +123,10 @@ static int get(input *in, filepos *pos) { in->currfp = NULL; return EOF; } + + if (rsc) + rdaddc(rsc, c); + /* Track line numbers, for error reporting */ if (pos) *pos = in->pos; @@ -182,6 +186,7 @@ struct token_Tag { int type; int cmd, aux; wchar_t *text; + char *origtext; filepos pos; }; enum { @@ -373,31 +378,48 @@ static void match_kw(token *tok) { token get_token(input *in) { int c; int nls; + int prevpos; token ret; rdstring rs = { 0, 0, NULL }; + rdstringc rsc = { 0, 0, NULL }; filepos cpos; ret.text = NULL; /* default */ - c = get(in, &cpos); + ret.origtext = NULL; /* default */ + if (in->pushback_chars) { + rdaddsc(&rsc, in->pushback_chars); + sfree(in->pushback_chars); + in->pushback_chars = NULL; + } + c = get(in, &cpos, &rsc); ret.pos = cpos; if (iswhite(c)) { /* tok_white or tok_eop */ nls = 0; + prevpos = 0; do { if (isnl(c)) nls++; - } while ((c = get(in, &cpos)) != EOF && iswhite(c)); + prevpos = rsc.pos; + } while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c)); if (c == EOF) { ret.type = tok_eof; + sfree(rsc.text); return ret; } + if (rsc.text) { + in->pushback_chars = dupstr(rsc.text + prevpos); + sfree(rsc.text); + } unget(in, c, &cpos); ret.type = (nls > 1 ? tok_eop : tok_white); return ret; } else if (c == EOF) { /* tok_eof */ ret.type = tok_eof; + sfree(rsc.text); return ret; } else if (c == '\\') { /* tok_cmd */ - c = get(in, &cpos); + rsc.pos = prevpos = 0; + c = get(in, &cpos, &rsc); if (c == '-' || c == '\\' || c == '_' || c == '#' || c == '{' || c == '}' || c == '.') { /* single-char command */ @@ -407,13 +429,15 @@ token get_token(input *in) { do { rdadd(&rs, c); len++; - c = get(in, &cpos); + prevpos = rsc.pos; + c = get(in, &cpos, &rsc); } while (ishex(c) && len < 5); unget(in, c, &cpos); } else if (iscmd(c)) { do { rdadd(&rs, c); - c = get(in, &cpos); + prevpos = rsc.pos; + c = get(in, &cpos, &rsc); } while (iscmd(c)); unget(in, c, &cpos); } @@ -423,14 +447,24 @@ token get_token(input *in) { */ ret.type = tok_cmd; ret.text = ustrdup(rs.text); + if (rsc.text) { + in->pushback_chars = dupstr(rsc.text + prevpos); + rsc.text[prevpos] = '\0'; + ret.origtext = dupstr(rsc.text); + } else { + ret.origtext = dupstr(""); + } match_kw(&ret); sfree(rs.text); + sfree(rsc.text); return ret; } else if (c == '{') { /* tok_lbrace */ ret.type = tok_lbrace; + sfree(rsc.text); return ret; } else if (c == '}') { /* tok_rbrace */ ret.type = tok_rbrace; + sfree(rsc.text); return ret; } else { /* tok_word */ /* @@ -442,6 +476,7 @@ token get_token(input *in) { * a hyphen. */ ret.aux = FALSE; /* assumed for now */ + prevpos = 0; while (1) { if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) { /* Put back the character that caused termination */ @@ -450,15 +485,25 @@ token get_token(input *in) { } else { rdadd(&rs, c); if (c == '-') { + prevpos = rsc.pos; ret.aux = TRUE; break; /* hyphen terminates word */ } } - c = get(in, &cpos); + prevpos = rsc.pos; + c = get(in, &cpos, &rsc); } ret.type = tok_word; ret.text = ustrdup(rs.text); + if (rsc.text) { + in->pushback_chars = dupstr(rsc.text + prevpos); + rsc.text[prevpos] = '\0'; + ret.origtext = dupstr(rsc.text); + } else { + ret.origtext = dupstr(""); + } sfree(rs.text); + sfree(rsc.text); return ret; } } @@ -472,7 +517,7 @@ int isbrace(input *in) { int c; filepos cpos; - c = get(in, &cpos); + c = get(in, &cpos, NULL); unget(in, c, &cpos); return (c == '{'); } @@ -488,15 +533,16 @@ token get_codepar_token(input *in) { filepos cpos; ret.type = tok_word; - c = get(in, &cpos); /* expect (and discard) one space */ + ret.origtext = NULL; + c = get(in, &cpos, NULL); /* expect (and discard) one space */ ret.pos = cpos; if (c == ' ') { - c = get(in, &cpos); + c = get(in, &cpos, NULL); ret.pos = cpos; } while (!isnl(c) && c != EOF) { int c2 = c; - c = get(in, &cpos); + c = get(in, &cpos, NULL); /* Discard \r just before \n. */ if (c2 != 13 || !isnl(c)) rdadd(&rs, c2); @@ -538,7 +584,7 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) { * Destructor before token is reassigned; should catch most memory * leaks */ -#define dtor(t) ( sfree(t.text) ) +#define dtor(t) ( sfree(t.text), sfree(t.origtext) ) /* * Reads a single file (ie until get() returns EOF) @@ -581,6 +627,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { wchar_t uchr; t.text = NULL; + t.origtext = NULL; macros = newtree234(macrocmp); already = FALSE; @@ -593,6 +640,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { int start_cmd = c__invalid; par.words = NULL; par.keyword = NULL; + par.origkeyword = NULL; whptr = &par.words; /* @@ -840,6 +888,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { if (needkw > 0) { rdstring rs = { 0, 0, NULL }; + rdstringc rsc = { 0, 0, NULL }; int nkeys = 0; filepos fp; @@ -857,20 +906,25 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { (t.type == tok_cmd && t.cmd == c__nbsp) || (t.type == tok_cmd && t.cmd == c__escaped)) { if (t.type == tok_white || - (t.type == tok_cmd && t.cmd == c__nbsp)) + (t.type == tok_cmd && t.cmd == c__nbsp)) { rdadd(&rs, ' '); - else + rdaddc(&rsc, ' '); + } else { rdadds(&rs, t.text); + rdaddsc(&rsc, t.origtext); + } } if (t.type != tok_rbrace) { error(err_kwunclosed, &t.pos); continue; } rdadd(&rs, 0); /* add string terminator */ + rdaddc(&rsc, 0); /* add string terminator */ dtor(t), t = get_token(in); /* eat right brace */ } - rdadd(&rs, 0); /* add string terminator */ + rdadd(&rs, 0); /* add string terminator */ + rdaddc(&rsc, 0); /* add string terminator */ /* See whether we have the right number of keywords. */ if ((needkw & 48) && nkeys > 0) @@ -901,6 +955,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) { } par.keyword = rdtrim(&rs); + par.origkeyword = rdtrimc(&rsc); /* Move to EOP in case of needkw==8 or 16 (no body) */ if (needkw & 24) { @@ -1464,6 +1519,8 @@ paragraph *read_input(input *in, indexdata *idx) { setpos(in, in->filenames[in->currindex]); in->charset = in->defcharset; in->csstate = charset_init_state; + in->wcpos = in->nwc = 0; + in->pushback_chars = NULL; read_file(&hptr, in, idx); } in->currindex++; @@ -181,36 +181,25 @@ int main(int argc, char **argv) { * into a config paragraph. */ { - wchar_t *keywords; - char *q; - wchar_t *u; + char *s = dupstr(p), *q, *r; paragraph *para; - keywords = mknewa(wchar_t, 2+strlen(p)); - - u = keywords; - q = p; + para = cmdline_cfg_new(); + q = r = s; while (*q) { if (*q == ':') { - *u++ = L'\0'; + *r = '\0'; + cmdline_cfg_add(para, s); + r = s; } else { if (*q == '\\' && q[1]) q++; - /* FIXME: lacks charset flexibility */ - *u++ = *q; + *r++ = *q; } q++; } - *u = L'\0'; - - para = mknew(paragraph); - memset(para, 0, sizeof(*para)); - para->type = para_Config; - para->keyword = keywords; - para->next = NULL; - para->fpos.filename = "<command line>"; - para->fpos.line = para->fpos.col = -1; + cmdline_cfg_add(para, s); if (cfg_tail) cfg_tail->next = para; @@ -2,8 +2,13 @@ * misc.c: miscellaneous useful items */ +#include <stdarg.h> #include "halibut.h" +char *adv(char *s) { + return s + 1 + strlen(s); +} + struct stackTag { void **data; int sp; @@ -479,3 +484,65 @@ void wrap_free(wrappedline *w) { w = t; } } + +void cmdline_cfg_add(paragraph *cfg, char *string) +{ + wchar_t *ustring; + int upos, ulen, pos, len; + + ulen = 0; + while (cfg->keyword[ulen]) + ulen += 1 + ustrlen(cfg->keyword+ulen); + len = 0; + while (cfg->origkeyword[len]) + len += 1 + strlen(cfg->origkeyword+len); + + ustring = ufroma_dup(string, CS_FIXME); + + upos = ulen; + ulen += 2 + ustrlen(ustring); + cfg->keyword = resize(cfg->keyword, ulen); + ustrcpy(cfg->keyword+upos, ustring); + cfg->keyword[ulen-1] = L'\0'; + + pos = len; + len += 2 + strlen(string); + cfg->origkeyword = resize(cfg->origkeyword, len); + strcpy(cfg->origkeyword+pos, string); + cfg->origkeyword[len-1] = '\0'; + + sfree(ustring); +} + +paragraph *cmdline_cfg_new(void) +{ + paragraph *p; + + p = mknew(paragraph); + memset(p, 0, sizeof(*p)); + p->type = para_Config; + p->next = NULL; + p->fpos.filename = "<command line>"; + p->fpos.line = p->fpos.col = -1; + p->keyword = ustrdup(L"\0"); + p->origkeyword = dupstr("\0"); + + return p; +} + +paragraph *cmdline_cfg_simple(char *string, ...) +{ + va_list ap; + char *s; + paragraph *p; + + p = cmdline_cfg_new(); + cmdline_cfg_add(p, string); + + va_start(ap, string); + while ((s = va_arg(ap, char *)) != NULL) + cmdline_cfg_add(p, s); + va_end(ap); + + return p; +} @@ -6,7 +6,7 @@ #include <time.h> #include "halibut.h" -wchar_t *ustrdup(wchar_t *s) { +wchar_t *ustrdup(wchar_t const *s) { wchar_t *r; if (s) { r = mknewa(wchar_t, 1+ustrlen(s)); @@ -18,59 +18,145 @@ wchar_t *ustrdup(wchar_t *s) { return r; } -char *ustrtoa(wchar_t *s, char *outbuf, int size) { - char *p; +static char *ustrtoa_internal(wchar_t const *s, char *outbuf, int size, + int charset, int careful) { + int len, ret, err; + charset_state state = CHARSET_INIT_STATE; + if (!s) { *outbuf = '\0'; return outbuf; } - for (p = outbuf; *s && p < outbuf+size; p++,s++) - *p = *s; - if (p < outbuf+size) - *p = '\0'; - else - outbuf[size-1] = '\0'; + + len = ustrlen(s); + size--; /* leave room for terminating NUL */ + *outbuf = '\0'; + while (len > 0) { + err = 0; + ret = charset_from_unicode(&s, &len, outbuf, size, charset, &state, + (careful ? &err : NULL)); + if (err) + return NULL; + if (!ret) + return outbuf; + size -= ret; + outbuf += ret; + *outbuf = '\0'; + } + /* + * Clean up + */ + ret = charset_from_unicode(NULL, 0, outbuf, size, charset, &state, NULL); + size -= ret; + outbuf += ret; + *outbuf = '\0'; return outbuf; } -wchar_t *ustrfroma(char *s, wchar_t *outbuf, int size) { - wchar_t *p; +char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset) { + return ustrtoa_internal(s, outbuf, size, charset, FALSE); +} + +char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset) { + return ustrtoa_internal(s, outbuf, size, charset, TRUE); +} + +wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset) { + int len, ret; + charset_state state = CHARSET_INIT_STATE; + if (!s) { *outbuf = L'\0'; return outbuf; } - for (p = outbuf; *s && p < outbuf+size; p++,s++) - *p = *s; - if (p < outbuf+size) - *p = '\0'; - else - outbuf[size-1] = '\0'; + + len = strlen(s); + size--; /* allow for terminating NUL */ + *outbuf = L'\0'; + while (len > 0) { + ret = charset_to_unicode(&s, &len, outbuf, size, + charset, &state, NULL, 0); + if (!ret) + return outbuf; + outbuf += ret; + size -= ret; + *outbuf = L'\0'; + } return outbuf; } -char *utoa_dup(wchar_t *s) { - int len; - char *buf = NULL; +char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful) +{ + char *outbuf; + int outpos, outlen, len, ret, err; + charset_state state = CHARSET_INIT_STATE; - len = ustrlen(s) + 1; - do { - buf = resize(buf, len); - ustrtoa(s, buf, len); - len = (3 * len) / 2 + 1; /* this guarantees a strict increase */ - } while ((int)strlen(buf) >= len-1); + if (!s) { + return dupstr(""); + } - buf = resize(buf, strlen(buf)+1); - return buf; + len = ustrlen(s); + + outlen = len + 10; + outbuf = mknewa(char, outlen); + + outpos = 0; + outbuf[outpos] = '\0'; + + while (len > 0) { + err = 0; + ret = charset_from_unicode(&s, &len, + outbuf + outpos, outlen - outpos - 1, + charset, &state, (careful ? &err : NULL)); + if (err) { + sfree(outbuf); + return NULL; + } + if (!ret) { + outlen = outlen * 3 / 2; + outbuf = resize(outbuf, outlen); + } + outpos += ret; + outbuf[outpos] = '\0'; + } + /* + * Clean up + */ + outlen = outpos + 32; + outbuf = resize(outbuf, outlen); + ret = charset_from_unicode(NULL, 0, + outbuf + outpos, outlen - outpos + 1, + charset, &state, NULL); + outpos += ret; + outbuf[outpos] = '\0'; + if (lenp) + *lenp = outpos; + return outbuf; } -wchar_t *ufroma_dup(char *s) { +char *utoa_dup(wchar_t const *s, int charset) +{ + return utoa_internal_dup(s, charset, NULL, FALSE); +} + +char *utoa_dup_len(wchar_t const *s, int charset, int *len) +{ + return utoa_internal_dup(s, charset, len, FALSE); +} + +char *utoa_careful_dup(wchar_t const *s, int charset) +{ + return utoa_internal_dup(s, charset, NULL, TRUE); +} + +wchar_t *ufroma_dup(char const *s, int charset) { int len; wchar_t *buf = NULL; len = strlen(s) + 1; do { buf = resize(buf, len); - ustrfroma(s, buf, len); + ustrfroma(s, buf, len, charset); len = (3 * len) / 2 + 1; /* this guarantees a strict increase */ } while (ustrlen(buf) >= len-1); @@ -183,6 +269,12 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) { size_t len; /* + * FIXME: really we ought to copy non-% parts of the format + * ourselves, and only resort to strftime for % parts. Also we + * should use wcsftime if it's present. + */ + + /* * strftime has the entertaining property that it returns 0 * _either_ on out-of-space _or_ on successful generation of * the empty string. Hence we must ensure our format can never @@ -192,7 +284,7 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) { if (wfmt) { len = ustrlen(wfmt); fmt = mknewa(char, 2+len); - ustrtoa(wfmt, fmt+1, len+1); + ustrtoa(wfmt, fmt+1, len+1, CS_ASCII); /* CS_FIXME? */ fmt[0] = ' '; } else fmt = " %c"; |