summaryrefslogtreecommitdiff
path: root/ustring.c
diff options
context:
space:
mode:
authorSimon Tatham <anakin@pobox.com>2004-04-22 17:27:05 +0000
committerSimon Tatham <anakin@pobox.com>2004-04-22 17:27:05 +0000
commit55cf0a663723f6334b94de297776756b487c2cdf (patch)
tree0ce89f99734e930c9324211811c6ecef915bebe3 /ustring.c
parent2dfa498f92369018c3bbc1527df8cce5778fc6ae (diff)
downloadhalibut-55cf0a663723f6334b94de297776756b487c2cdf.zip
halibut-55cf0a663723f6334b94de297776756b487c2cdf.tar.gz
halibut-55cf0a663723f6334b94de297776756b487c2cdf.tar.bz2
halibut-55cf0a663723f6334b94de297776756b487c2cdf.tar.xz
Support the locale-supplied character set where appropriate. It's
used for converting command-line -C directives into Unicode; it's used for outputting Unicode strings to stderr in error messages; and it's used as the default character set for input files (although I'd be inclined to recommend everyone use \cfg{input-charset} in all their source files to ensure their portability). [originally from svn r4114]
Diffstat (limited to 'ustring.c')
-rw-r--r--ustring.c64
1 files changed, 64 insertions, 0 deletions
diff --git a/ustring.c b/ustring.c
index 11a022c..1980a95 100644
--- a/ustring.c
+++ b/ustring.c
@@ -3,6 +3,8 @@
*/
#include <wchar.h>
+#include <stdlib.h>
+#include <assert.h>
#include <time.h>
#include "halibut.h"
@@ -164,6 +166,68 @@ wchar_t *ufroma_dup(char const *s, int charset) {
return buf;
}
+char *utoa_locale_dup(wchar_t const *s)
+{
+ /*
+ * This variant uses the C library locale.
+ */
+ char *ret;
+ int len;
+ size_t siz;
+
+ len = ustrlen(s);
+
+ ret = mknewa(char, 1 + MB_CUR_MAX * len);
+
+ siz = wcstombs(ret, s, len);
+
+ if (siz) {
+ assert(siz <= MB_CUR_MAX * len);
+ ret[siz] = '\0';
+ ret = resize(ret, siz+1);
+ return ret;
+ }
+
+ /*
+ * If that failed, try a different strategy (which we will also
+ * attempt in the total absence of wcstombs). Retrieve the
+ * locale's charset from nl_langinfo or equivalent, and use
+ * normal utoa_dup.
+ */
+ return utoa_dup(s, charset_from_locale());
+}
+
+wchar_t *ufroma_locale_dup(char const *s)
+{
+ /*
+ * This variant uses the C library locale.
+ */
+ wchar_t *ret;
+ int len;
+ size_t siz;
+
+ len = strlen(s);
+
+ ret = mknewa(wchar_t, 1 + 2*len); /* be conservative */
+
+ siz = mbstowcs(ret, s, len);
+
+ if (siz) {
+ assert(siz <= (size_t)(2 * len));
+ ret[siz] = L'\0';
+ ret = resize(ret, siz+1);
+ return ret;
+ }
+
+ /*
+ * If that failed, try a different strategy (which we will also
+ * attempt in the total absence of wcstombs). Retrieve the
+ * locale's charset from nl_langinfo or equivalent, and use
+ * normal ufroma_dup.
+ */
+ return ufroma_dup(s, charset_from_locale());
+}
+
int ustrlen(wchar_t const *s) {
int len = 0;
while (*s++) len++;