diff options
| author | Simon Tatham <anakin@pobox.com> | 2004-04-20 17:50:41 +0000 |
|---|---|---|
| committer | Simon Tatham <anakin@pobox.com> | 2004-04-20 17:50:41 +0000 |
| commit | 2b6def26f41457eba8f2056432cd1af68a5b58b0 (patch) | |
| tree | 6bc7c479673f48b7e488ea383c6076d4f28cf0fc /ustring.c | |
| parent | 8a9d3f97956db97b0813a6d24c486371ff14bd80 (diff) | |
| download | halibut-2b6def26f41457eba8f2056432cd1af68a5b58b0.zip halibut-2b6def26f41457eba8f2056432cd1af68a5b58b0.tar.gz halibut-2b6def26f41457eba8f2056432cd1af68a5b58b0.tar.bz2 halibut-2b6def26f41457eba8f2056432cd1af68a5b58b0.tar.xz | |
Infrastructure changes for character set support. ustrtoa,
ustrfroma, utoa_dup and ufroma_dup now take a charset parameter, and
also have a variety of subtly distinct forms. Also, when a \cfg
directive is seen in the input file, the precise octet strings for
each parameter are kept in their original form as well as being
translated into Unicode, so that when they represent filenames they
can be used verbatim.
[originally from svn r4097]
Diffstat (limited to 'ustring.c')
| -rw-r--r-- | ustring.c | 154 |
1 files changed, 123 insertions, 31 deletions
@@ -6,7 +6,7 @@ #include <time.h> #include "halibut.h" -wchar_t *ustrdup(wchar_t *s) { +wchar_t *ustrdup(wchar_t const *s) { wchar_t *r; if (s) { r = mknewa(wchar_t, 1+ustrlen(s)); @@ -18,59 +18,145 @@ wchar_t *ustrdup(wchar_t *s) { return r; } -char *ustrtoa(wchar_t *s, char *outbuf, int size) { - char *p; +static char *ustrtoa_internal(wchar_t const *s, char *outbuf, int size, + int charset, int careful) { + int len, ret, err; + charset_state state = CHARSET_INIT_STATE; + if (!s) { *outbuf = '\0'; return outbuf; } - for (p = outbuf; *s && p < outbuf+size; p++,s++) - *p = *s; - if (p < outbuf+size) - *p = '\0'; - else - outbuf[size-1] = '\0'; + + len = ustrlen(s); + size--; /* leave room for terminating NUL */ + *outbuf = '\0'; + while (len > 0) { + err = 0; + ret = charset_from_unicode(&s, &len, outbuf, size, charset, &state, + (careful ? &err : NULL)); + if (err) + return NULL; + if (!ret) + return outbuf; + size -= ret; + outbuf += ret; + *outbuf = '\0'; + } + /* + * Clean up + */ + ret = charset_from_unicode(NULL, 0, outbuf, size, charset, &state, NULL); + size -= ret; + outbuf += ret; + *outbuf = '\0'; return outbuf; } -wchar_t *ustrfroma(char *s, wchar_t *outbuf, int size) { - wchar_t *p; +char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset) { + return ustrtoa_internal(s, outbuf, size, charset, FALSE); +} + +char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset) { + return ustrtoa_internal(s, outbuf, size, charset, TRUE); +} + +wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset) { + int len, ret; + charset_state state = CHARSET_INIT_STATE; + if (!s) { *outbuf = L'\0'; return outbuf; } - for (p = outbuf; *s && p < outbuf+size; p++,s++) - *p = *s; - if (p < outbuf+size) - *p = '\0'; - else - outbuf[size-1] = '\0'; + + len = strlen(s); + size--; /* allow for terminating NUL */ + *outbuf = L'\0'; + while (len > 0) { + ret = charset_to_unicode(&s, &len, outbuf, size, + charset, &state, NULL, 0); + if (!ret) + return outbuf; + outbuf += ret; + size -= ret; + *outbuf = L'\0'; + } return outbuf; } -char *utoa_dup(wchar_t *s) { - int len; - char *buf = NULL; +char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful) +{ + char *outbuf; + int outpos, outlen, len, ret, err; + charset_state state = CHARSET_INIT_STATE; - len = ustrlen(s) + 1; - do { - buf = resize(buf, len); - ustrtoa(s, buf, len); - len = (3 * len) / 2 + 1; /* this guarantees a strict increase */ - } while ((int)strlen(buf) >= len-1); + if (!s) { + return dupstr(""); + } - buf = resize(buf, strlen(buf)+1); - return buf; + len = ustrlen(s); + + outlen = len + 10; + outbuf = mknewa(char, outlen); + + outpos = 0; + outbuf[outpos] = '\0'; + + while (len > 0) { + err = 0; + ret = charset_from_unicode(&s, &len, + outbuf + outpos, outlen - outpos - 1, + charset, &state, (careful ? &err : NULL)); + if (err) { + sfree(outbuf); + return NULL; + } + if (!ret) { + outlen = outlen * 3 / 2; + outbuf = resize(outbuf, outlen); + } + outpos += ret; + outbuf[outpos] = '\0'; + } + /* + * Clean up + */ + outlen = outpos + 32; + outbuf = resize(outbuf, outlen); + ret = charset_from_unicode(NULL, 0, + outbuf + outpos, outlen - outpos + 1, + charset, &state, NULL); + outpos += ret; + outbuf[outpos] = '\0'; + if (lenp) + *lenp = outpos; + return outbuf; } -wchar_t *ufroma_dup(char *s) { +char *utoa_dup(wchar_t const *s, int charset) +{ + return utoa_internal_dup(s, charset, NULL, FALSE); +} + +char *utoa_dup_len(wchar_t const *s, int charset, int *len) +{ + return utoa_internal_dup(s, charset, len, FALSE); +} + +char *utoa_careful_dup(wchar_t const *s, int charset) +{ + return utoa_internal_dup(s, charset, NULL, TRUE); +} + +wchar_t *ufroma_dup(char const *s, int charset) { int len; wchar_t *buf = NULL; len = strlen(s) + 1; do { buf = resize(buf, len); - ustrfroma(s, buf, len); + ustrfroma(s, buf, len, charset); len = (3 * len) / 2 + 1; /* this guarantees a strict increase */ } while (ustrlen(buf) >= len-1); @@ -183,6 +269,12 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) { size_t len; /* + * FIXME: really we ought to copy non-% parts of the format + * ourselves, and only resort to strftime for % parts. Also we + * should use wcsftime if it's present. + */ + + /* * strftime has the entertaining property that it returns 0 * _either_ on out-of-space _or_ on successful generation of * the empty string. Hence we must ensure our format can never @@ -192,7 +284,7 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) { if (wfmt) { len = ustrlen(wfmt); fmt = mknewa(char, 2+len); - ustrtoa(wfmt, fmt+1, len+1); + ustrtoa(wfmt, fmt+1, len+1, CS_ASCII); /* CS_FIXME? */ fmt[0] = ' '; } else fmt = " %c"; |