summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Tatham <anakin@pobox.com>2004-04-22 17:27:05 +0000
committerSimon Tatham <anakin@pobox.com>2004-04-22 17:27:05 +0000
commit55cf0a663723f6334b94de297776756b487c2cdf (patch)
tree0ce89f99734e930c9324211811c6ecef915bebe3
parent2dfa498f92369018c3bbc1527df8cce5778fc6ae (diff)
downloadhalibut-55cf0a663723f6334b94de297776756b487c2cdf.zip
halibut-55cf0a663723f6334b94de297776756b487c2cdf.tar.gz
halibut-55cf0a663723f6334b94de297776756b487c2cdf.tar.bz2
halibut-55cf0a663723f6334b94de297776756b487c2cdf.tar.xz
Support the locale-supplied character set where appropriate. It's
used for converting command-line -C directives into Unicode; it's used for outputting Unicode strings to stderr in error messages; and it's used as the default character set for input files (although I'd be inclined to recommend everyone use \cfg{input-charset} in all their source files to ensure their portability). [originally from svn r4114]
Diffstat (limited to '')
-rw-r--r--error.c26
-rw-r--r--halibut.h8
-rw-r--r--index.c2
-rw-r--r--main.c5
-rw-r--r--misc.c2
-rw-r--r--ustring.c64
6 files changed, 88 insertions, 19 deletions
diff --git a/error.c b/error.c
index abd8d61..42eea37 100644
--- a/error.c
+++ b/error.c
@@ -15,7 +15,6 @@
static void do_error(int code, va_list ap) {
char error[1024];
- char auxbuf[256];
char c;
char *sp, *sp2;
wchar_t *wsp;
@@ -82,18 +81,20 @@ static void do_error(int code, va_list ap) {
break;
case err_badparatype:
wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
+ sp = utoa_locale_dup(wsp);
fpos = *va_arg(ap, filepos *);
sprintf(error, "command `%.200s' unrecognised at start of"
" paragraph", sp);
flags = FILEPOS;
+ sfree(sp);
break;
case err_badmidcmd:
wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
+ sp = utoa_locale_dup(wsp);
fpos = *va_arg(ap, filepos *);
sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp);
flags = FILEPOS;
+ sfree(sp);
break;
case err_unexbrace:
fpos = *va_arg(ap, filepos *);
@@ -138,23 +139,26 @@ static void do_error(int code, va_list ap) {
case err_nosuchkw:
fpos = *va_arg(ap, filepos *);
wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
+ sp = utoa_locale_dup(wsp);
sprintf(error, "unable to resolve cross-reference to `%.200s'", sp);
flags = FILEPOS;
+ sfree(sp);
break;
case err_multiBR:
fpos = *va_arg(ap, filepos *);
wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
+ sp = utoa_locale_dup(wsp);
sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp);
flags = FILEPOS;
+ sfree(sp);
break;
case err_nosuchidxtag:
+ fpos = *va_arg(ap, filepos *);
wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
+ sp = utoa_locale_dup(wsp);
sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp);
- flags = 0;
- /* FIXME: need to get a filepos to here somehow */
+ sfree(sp);
+ flags = FILEPOS;
break;
case err_cantopenw:
sp = va_arg(ap, char *);
@@ -164,9 +168,10 @@ static void do_error(int code, va_list ap) {
case err_macroexists:
fpos = *va_arg(ap, filepos *);
wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
+ sp = utoa_locale_dup(wsp);
sprintf(error, "macro `%.200s' already defined", sp);
flags = FILEPOS;
+ sfree(sp);
break;
case err_sectjump:
fpos = *va_arg(ap, filepos *);
@@ -185,10 +190,11 @@ static void do_error(int code, va_list ap) {
fpos = *va_arg(ap, filepos *);
fpos2 = *va_arg(ap, filepos *);
wsp = va_arg(ap, wchar_t *);
- sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
+ sp = utoa_locale_dup(wsp);
sprintf(error, "paragraph keyword `%.200s' already defined at ", sp);
sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line);
flags = FILEPOS;
+ sfree(sp);
break;
case err_misplacedlcont:
fpos = *va_arg(ap, filepos *);
diff --git a/halibut.h b/halibut.h
index 33780ea..69953b6 100644
--- a/halibut.h
+++ b/halibut.h
@@ -27,12 +27,6 @@
#include "tree234.h"
/*
- * FIXME: Charset temporary workarounds
- */
-#define CS_FIXME CS_ISO8859_1
-#define CS_LOCAL CS_ISO8859_1
-
-/*
* Structure tags
*/
typedef struct input_Tag input;
@@ -282,6 +276,8 @@ char *utoa_dup(wchar_t const *s, int charset);
char *utoa_dup_len(wchar_t const *s, int charset, int *len);
char *utoa_careful_dup(wchar_t const *s, int charset);
wchar_t *ufroma_dup(char const *s, int charset);
+char *utoa_locale_dup(wchar_t const *s);
+wchar_t *ufroma_locale_dup(char const *s);
int ustrlen(wchar_t const *s);
wchar_t *uadv(wchar_t *s);
wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source);
diff --git a/index.c b/index.c
index 9850750..9a2d9df 100644
--- a/index.c
+++ b/index.c
@@ -99,7 +99,7 @@ void index_merge(indexdata *idx, int is_explicit, wchar_t *tags, word *text,
* warn (and drop it, since it won't be referenced).
*/
if (is_explicit) {
- error(err_nosuchidxtag, tags);
+ error(err_nosuchidxtag, fpos, tags);
continue;
}
diff --git a/main.c b/main.c
index f486a3d..61b37f7 100644
--- a/main.c
+++ b/main.c
@@ -3,6 +3,7 @@
*/
#include <assert.h>
+#include <locale.h>
#include <stdio.h>
#include <stdlib.h>
#include "halibut.h"
@@ -48,6 +49,8 @@ int main(int argc, char **argv) {
paragraph *cfg, *cfg_tail;
void *pre_backend_data[16];
+ setlocale(LC_ALL, "");
+
/*
* Set up initial (default) parameters.
*/
@@ -260,7 +263,7 @@ int main(int argc, char **argv) {
in.pushback = NULL;
in.reportcols = reportcols;
in.stack = NULL;
- in.defcharset = CS_ASCII;
+ in.defcharset = locale_charset();
idx = make_index();
diff --git a/misc.c b/misc.c
index 304cb1f..647d642 100644
--- a/misc.c
+++ b/misc.c
@@ -497,7 +497,7 @@ void cmdline_cfg_add(paragraph *cfg, char *string)
while (cfg->origkeyword[len])
len += 1 + strlen(cfg->origkeyword+len);
- ustring = ufroma_dup(string, CS_FIXME);
+ ustring = ufroma_locale_dup(string);
upos = ulen;
ulen += 2 + ustrlen(ustring);
diff --git a/ustring.c b/ustring.c
index 11a022c..1980a95 100644
--- a/ustring.c
+++ b/ustring.c
@@ -3,6 +3,8 @@
*/
#include <wchar.h>
+#include <stdlib.h>
+#include <assert.h>
#include <time.h>
#include "halibut.h"
@@ -164,6 +166,68 @@ wchar_t *ufroma_dup(char const *s, int charset) {
return buf;
}
+char *utoa_locale_dup(wchar_t const *s)
+{
+ /*
+ * This variant uses the C library locale.
+ */
+ char *ret;
+ int len;
+ size_t siz;
+
+ len = ustrlen(s);
+
+ ret = mknewa(char, 1 + MB_CUR_MAX * len);
+
+ siz = wcstombs(ret, s, len);
+
+ if (siz) {
+ assert(siz <= MB_CUR_MAX * len);
+ ret[siz] = '\0';
+ ret = resize(ret, siz+1);
+ return ret;
+ }
+
+ /*
+ * If that failed, try a different strategy (which we will also
+ * attempt in the total absence of wcstombs). Retrieve the
+ * locale's charset from nl_langinfo or equivalent, and use
+ * normal utoa_dup.
+ */
+ return utoa_dup(s, charset_from_locale());
+}
+
+wchar_t *ufroma_locale_dup(char const *s)
+{
+ /*
+ * This variant uses the C library locale.
+ */
+ wchar_t *ret;
+ int len;
+ size_t siz;
+
+ len = strlen(s);
+
+ ret = mknewa(wchar_t, 1 + 2*len); /* be conservative */
+
+ siz = mbstowcs(ret, s, len);
+
+ if (siz) {
+ assert(siz <= (size_t)(2 * len));
+ ret[siz] = L'\0';
+ ret = resize(ret, siz+1);
+ return ret;
+ }
+
+ /*
+ * If that failed, try a different strategy (which we will also
+ * attempt in the total absence of wcstombs). Retrieve the
+ * locale's charset from nl_langinfo or equivalent, and use
+ * normal ufroma_dup.
+ */
+ return ufroma_dup(s, charset_from_locale());
+}
+
int ustrlen(wchar_t const *s) {
int len = 0;
while (*s++) len++;