diff options
| author | Simon Tatham <anakin@pobox.com> | 2004-04-22 17:27:05 +0000 |
|---|---|---|
| committer | Simon Tatham <anakin@pobox.com> | 2004-04-22 17:27:05 +0000 |
| commit | 55cf0a663723f6334b94de297776756b487c2cdf (patch) | |
| tree | 0ce89f99734e930c9324211811c6ecef915bebe3 | |
| parent | 2dfa498f92369018c3bbc1527df8cce5778fc6ae (diff) | |
| download | halibut-55cf0a663723f6334b94de297776756b487c2cdf.zip halibut-55cf0a663723f6334b94de297776756b487c2cdf.tar.gz halibut-55cf0a663723f6334b94de297776756b487c2cdf.tar.bz2 halibut-55cf0a663723f6334b94de297776756b487c2cdf.tar.xz | |
Support the locale-supplied character set where appropriate. It's
used for converting command-line -C directives into Unicode; it's
used for outputting Unicode strings to stderr in error messages; and
it's used as the default character set for input files (although I'd
be inclined to recommend everyone use \cfg{input-charset} in all
their source files to ensure their portability).
[originally from svn r4114]
Diffstat (limited to '')
| -rw-r--r-- | error.c | 26 | ||||
| -rw-r--r-- | halibut.h | 8 | ||||
| -rw-r--r-- | index.c | 2 | ||||
| -rw-r--r-- | main.c | 5 | ||||
| -rw-r--r-- | misc.c | 2 | ||||
| -rw-r--r-- | ustring.c | 64 |
6 files changed, 88 insertions, 19 deletions
@@ -15,7 +15,6 @@ static void do_error(int code, va_list ap) { char error[1024]; - char auxbuf[256]; char c; char *sp, *sp2; wchar_t *wsp; @@ -82,18 +81,20 @@ static void do_error(int code, va_list ap) { break; case err_badparatype: wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); fpos = *va_arg(ap, filepos *); sprintf(error, "command `%.200s' unrecognised at start of" " paragraph", sp); flags = FILEPOS; + sfree(sp); break; case err_badmidcmd: wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); fpos = *va_arg(ap, filepos *); sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp); flags = FILEPOS; + sfree(sp); break; case err_unexbrace: fpos = *va_arg(ap, filepos *); @@ -138,23 +139,26 @@ static void do_error(int code, va_list ap) { case err_nosuchkw: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); sprintf(error, "unable to resolve cross-reference to `%.200s'", sp); flags = FILEPOS; + sfree(sp); break; case err_multiBR: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp); flags = FILEPOS; + sfree(sp); break; case err_nosuchidxtag: + fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp); - flags = 0; - /* FIXME: need to get a filepos to here somehow */ + sfree(sp); + flags = FILEPOS; break; case err_cantopenw: sp = va_arg(ap, char *); @@ -164,9 +168,10 @@ static void do_error(int code, va_list ap) { case err_macroexists: fpos = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); sprintf(error, "macro `%.200s' already defined", sp); flags = FILEPOS; + sfree(sp); break; case err_sectjump: fpos = *va_arg(ap, filepos *); @@ -185,10 +190,11 @@ static void do_error(int code, va_list ap) { fpos = *va_arg(ap, filepos *); fpos2 = *va_arg(ap, filepos *); wsp = va_arg(ap, wchar_t *); - sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL); + sp = utoa_locale_dup(wsp); sprintf(error, "paragraph keyword `%.200s' already defined at ", sp); sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line); flags = FILEPOS; + sfree(sp); break; case err_misplacedlcont: fpos = *va_arg(ap, filepos *); @@ -27,12 +27,6 @@ #include "tree234.h" /* - * FIXME: Charset temporary workarounds - */ -#define CS_FIXME CS_ISO8859_1 -#define CS_LOCAL CS_ISO8859_1 - -/* * Structure tags */ typedef struct input_Tag input; @@ -282,6 +276,8 @@ char *utoa_dup(wchar_t const *s, int charset); char *utoa_dup_len(wchar_t const *s, int charset, int *len); char *utoa_careful_dup(wchar_t const *s, int charset); wchar_t *ufroma_dup(char const *s, int charset); +char *utoa_locale_dup(wchar_t const *s); +wchar_t *ufroma_locale_dup(char const *s); int ustrlen(wchar_t const *s); wchar_t *uadv(wchar_t *s); wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source); @@ -99,7 +99,7 @@ void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text, * warn (and drop it, since it won't be referenced). */ if (is_explicit) { - error(err_nosuchidxtag, tags); + error(err_nosuchidxtag, fpos, tags); continue; } @@ -3,6 +3,7 @@ */ #include <assert.h> +#include <locale.h> #include <stdio.h> #include <stdlib.h> #include "halibut.h" @@ -48,6 +49,8 @@ int main(int argc, char **argv) { paragraph *cfg, *cfg_tail; void *pre_backend_data[16]; + setlocale(LC_ALL, ""); + /* * Set up initial (default) parameters. */ @@ -260,7 +263,7 @@ int main(int argc, char **argv) { in.pushback = NULL; in.reportcols = reportcols; in.stack = NULL; - in.defcharset = CS_ASCII; + in.defcharset = locale_charset(); idx = make_index(); @@ -497,7 +497,7 @@ void cmdline_cfg_add(paragraph *cfg, char *string) while (cfg->origkeyword[len]) len += 1 + strlen(cfg->origkeyword+len); - ustring = ufroma_dup(string, CS_FIXME); + ustring = ufroma_locale_dup(string); upos = ulen; ulen += 2 + ustrlen(ustring); @@ -3,6 +3,8 @@ */ #include <wchar.h> +#include <stdlib.h> +#include <assert.h> #include <time.h> #include "halibut.h" @@ -164,6 +166,68 @@ wchar_t *ufroma_dup(char const *s, int charset) { return buf; } +char *utoa_locale_dup(wchar_t const *s) +{ + /* + * This variant uses the C library locale. + */ + char *ret; + int len; + size_t siz; + + len = ustrlen(s); + + ret = mknewa(char, 1 + MB_CUR_MAX * len); + + siz = wcstombs(ret, s, len); + + if (siz) { + assert(siz <= MB_CUR_MAX * len); + ret[siz] = '\0'; + ret = resize(ret, siz+1); + return ret; + } + + /* + * If that failed, try a different strategy (which we will also + * attempt in the total absence of wcstombs). Retrieve the + * locale's charset from nl_langinfo or equivalent, and use + * normal utoa_dup. + */ + return utoa_dup(s, charset_from_locale()); +} + +wchar_t *ufroma_locale_dup(char const *s) +{ + /* + * This variant uses the C library locale. + */ + wchar_t *ret; + int len; + size_t siz; + + len = strlen(s); + + ret = mknewa(wchar_t, 1 + 2*len); /* be conservative */ + + siz = mbstowcs(ret, s, len); + + if (siz) { + assert(siz <= (size_t)(2 * len)); + ret[siz] = L'\0'; + ret = resize(ret, siz+1); + return ret; + } + + /* + * If that failed, try a different strategy (which we will also + * attempt in the total absence of wcstombs). Retrieve the + * locale's charset from nl_langinfo or equivalent, and use + * normal ufroma_dup. + */ + return ufroma_dup(s, charset_from_locale()); +} + int ustrlen(wchar_t const *s) { int len = 0; while (*s++) len++; |