14 files changed, 332 insertions, 282 deletions
diff --git a/bk_info.c b/bk_info.c
index f7f83fb..fc1b292 100644
--- a/bk_info.c
+++ b/bk_info.c
@@ -80,7 +80,7 @@ static infoconfig info_configure(paragraph *source) {
 	if (source->type == para_Config) {
 	    if (!ustricmp(source->keyword, L"info-filename")) {
 		sfree(ret.filename);
-		ret.filename = utoa_dup(uadv(source->keyword));
+		ret.filename = dupstr(adv(source->origkeyword));
 	    } else if (!ustricmp(source->keyword, L"info-max-file-size")) {
 		ret.maxfilesize = utoi(uadv(source->keyword));
 	    }
@@ -92,30 +92,7 @@ static infoconfig info_configure(paragraph *source) {
 
 paragraph *info_config_filename(char *filename)
 {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"info-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"info-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("info-filename", filename, NULL);
 }
 
 void info_backend(paragraph *sourceform, keywordlist *keywords,
@@ -235,11 +212,11 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 	    }
 
 	    rdaddsc(&intro_text, "INFO-DIR-SECTION ");
-	    s = utoa_dup(section);
+	    s = utoa_dup(section, CS_FIXME);
 	    rdaddsc(&intro_text, s);
 	    sfree(s);
 	    rdaddsc(&intro_text, "\nSTART-INFO-DIR-ENTRY\n* ");
-	    s = utoa_dup(shortname);
+	    s = utoa_dup(shortname, CS_FIXME);
 	    rdaddsc(&intro_text, s);
 	    sfree(s);
 	    rdaddsc(&intro_text, ": (");
@@ -257,7 +234,7 @@ void info_backend(paragraph *sourceform, keywordlist *keywords,
 		}
 	    }
 	    rdaddsc(&intro_text, ".   ");
-	    s = utoa_dup(longname);
+	    s = utoa_dup(longname, CS_FIXME);
 	    rdaddsc(&intro_text, s);
 	    sfree(s);
 	    rdaddsc(&intro_text, "\nEND-INFO-DIR-ENTRY\n\n");
diff --git a/bk_man.c b/bk_man.c
index d03f6b7..0854e58 100644
--- a/bk_man.c
+++ b/bk_man.c
@@ -48,7 +48,7 @@ static manconfig man_configure(paragraph *source) {
 		ret.mindepth = utoi(uadv(source->keyword));
 	    } else if (!ustricmp(source->keyword, L"man-filename")) {
 		sfree(ret.filename);
-		ret.filename = utoa_dup(uadv(source->keyword));
+		ret.filename = dupstr(adv(source->origkeyword));
 	    }
 	}
     }
@@ -64,30 +64,7 @@ static void man_conf_cleanup(manconfig cf)
 
 paragraph *man_config_filename(char *filename)
 {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"man-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"man-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("man-filename", filename, NULL);
 }
 
 #define QUOTE_INITCTRL 1 /* quote initial . and ' on a line */
diff --git a/bk_paper.c b/bk_paper.c
index e1721ae..81050cb 100644
--- a/bk_paper.c
+++ b/bk_paper.c
@@ -510,7 +510,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
 
 	for (page = pages; page; page = page->next) {
 	    sprintf(buf, "%d", ++pagenum);
-	    page->number = ufroma_dup(buf);
+	    page->number = ufroma_dup(buf, CS_ASCII);
 	}
 
 	if (has_index) {
@@ -524,7 +524,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
 
 	    /* And don't forget the as-yet-uncreated index. */
 	    sprintf(buf, "%d", ++pagenum);
-	    first_index_page->number = ufroma_dup(buf);
+	    first_index_page->number = ufroma_dup(buf, CS_ASCII);
 	}
     }
 
@@ -683,7 +683,7 @@ void *paper_pre_backend(paragraph *sourceform, keywordlist *keywords,
 	for (page = ipages->next; page; page = page->next) {
 	    char buf[40];
 	    sprintf(buf, "%d", ++pagenum);
-	    page->number = ufroma_dup(buf);
+	    page->number = ufroma_dup(buf, CS_ASCII);
 	}
 
 	/*
@@ -1682,7 +1682,7 @@ static int render_text(page_data *page, para_data *pdata, line_data *ldata,
 
 	    if (text->type == word_HyperLink) {
 		dest.type = URL;
-		dest.url = utoa_dup(text->text);
+		dest.url = utoa_dup(text->text, CS_ASCII);
 		dest.page = NULL;
 	    } else if (text->type == word_PageXref) {
 		dest.type = PAGE;
diff --git a/bk_pdf.c b/bk_pdf.c
index f6babe5..e9904e7 100644
--- a/bk_pdf.c
+++ b/bk_pdf.c
@@ -10,30 +10,7 @@
 
 paragraph *pdf_config_filename(char *filename)
 {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"pdf-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"pdf-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("pdf-filename", filename, NULL);
 }
 
 typedef struct object_Tag object;
@@ -88,7 +65,7 @@ void pdf_backend(paragraph *sourceform, keywordlist *keywords,
 	if (p->type == para_Config && p->parent) {
 	    if (!ustricmp(p->keyword, L"pdf-filename")) {
 		sfree(filename);
-		filename = utoa_dup(uadv(p->keyword));
+		filename = dupstr(adv(p->origkeyword));
 	    }
 	}
     }
@@ -742,7 +719,7 @@ static int pdf_versionid(FILE *fp, word *words)
 
 	switch (type) {
 	  case word_Normal:
-	    text = utoa_dup(words->text);
+	    text = utoa_dup(words->text, CS_ASCII);
 	    break;
 	  case word_WhiteSpace:
 	    text = dupstr(" ");
diff --git a/bk_ps.c b/bk_ps.c
index 7a59ac0..358571e 100644
--- a/bk_ps.c
+++ b/bk_ps.c
@@ -10,30 +10,7 @@ static void ps_versionid(FILE *fp, word *words);
 
 paragraph *ps_config_filename(char *filename)
 {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"ps-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"ps-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("ps-filename", filename, NULL);
 }
 
 void ps_backend(paragraph *sourceform, keywordlist *keywords,
@@ -55,7 +32,7 @@ void ps_backend(paragraph *sourceform, keywordlist *keywords,
 	if (p->type == para_Config && p->parent) {
 	    if (!ustricmp(p->keyword, L"ps-filename")) {
 		sfree(filename);
-		filename = utoa_dup(uadv(p->keyword));
+		filename = dupstr(adv(p->origkeyword));
 	    }
 	}
     }
@@ -247,7 +224,7 @@ static void ps_versionid(FILE *fp, word *words)
 
 	switch (type) {
 	  case word_Normal:
-	    text = utoa_dup(words->text);
+	    text = utoa_dup(words->text, CS_ASCII);
 	    break;
 	  case word_WhiteSpace:
 	    text = dupstr(" ");
diff --git a/bk_text.c b/bk_text.c
index 499f232..ce4492d 100644
--- a/bk_text.c
+++ b/bk_text.c
@@ -85,7 +85,7 @@ static textconfig text_configure(paragraph *source) {
 		ret.indent = utoi(uadv(source->keyword));
 	    } else if (!ustricmp(source->keyword, L"text-filename")) {
 		sfree(ret.filename);
-		ret.filename = utoa_dup(uadv(source->keyword));
+		ret.filename = dupstr(adv(source->origkeyword));
 	    } else if (!ustricmp(source->keyword, L"text-indent-code")) {
 		ret.indent_code = utoi(uadv(source->keyword));
 	    } else if (!ustricmp(source->keyword, L"text-width")) {
@@ -182,30 +182,7 @@ static textconfig text_configure(paragraph *source) {
 
 paragraph *text_config_filename(char *filename)
 {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"text-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"text-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("text-filename", filename, NULL);
 }
 
 void text_backend(paragraph *sourceform, keywordlist *keywords,
diff --git a/bk_whlp.c b/bk_whlp.c
index 7738c71..c8a6524 100644
--- a/bk_whlp.c
+++ b/bk_whlp.c
@@ -45,30 +45,7 @@ static void whlp_contents_write(struct bk_whlp_state *state,
     
 paragraph *whlp_config_filename(char *filename)
 {
-    paragraph *p;
-    wchar_t *ufilename, *up;
-    int len;
-
-    p = mknew(paragraph);
-    memset(p, 0, sizeof(*p));
-    p->type = para_Config;
-    p->next = NULL;
-    p->fpos.filename = "<command line>";
-    p->fpos.line = p->fpos.col = -1;
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"winhelp-filename");
-    p->keyword = mknewa(wchar_t, len);
-    up = p->keyword;
-    ustrcpy(up, L"winhelp-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p->keyword < len);
-    sfree(ufilename);
-
-    return p;
+    return cmdline_cfg_simple("winhelp-filename", filename, NULL);
 }
 
 void whlp_backend(paragraph *sourceform, keywordlist *keywords,
@@ -129,7 +106,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
 		p->parent->private_data = topicname;
 	    } else if (!ustricmp(p->keyword, L"winhelp-filename")) {
 		sfree(filename);
-		filename = utoa_dup(uadv(p->keyword));
+		filename = dupstr(adv(p->origkeyword));
 	    }
 	}
     }
@@ -152,7 +129,7 @@ void whlp_backend(paragraph *sourceform, keywordlist *keywords,
 	    filename = newf;
 	    len = strlen(newf);
 	}
-	cntname = mknewa(char, len);
+	cntname = mknewa(char, len+1);
 	sprintf(cntname, "%.*s.cnt", len-4, filename);
     }
 
@@ -671,7 +648,7 @@ static void whlp_rdaddwc(rdstringc *rs, word *text) {
 	assert(text->type != word_CodeQuote &&
 	       text->type != word_WkCodeQuote);
 	if (removeattr(text->type) == word_Normal) {
-	    if (whlp_convert(text->text, 0, &c, FALSE))
+	    if (whlp_convert(text->text, 0, &c, FALSE) || !text->alt)
 		rdaddsc(rs, c);
 	    else
 		whlp_rdaddwc(rs, text->alt);
diff --git a/bk_xhtml.c b/bk_xhtml.c
index 4b9b98c..4d5069a 100644
--- a/bk_xhtml.c
+++ b/bk_xhtml.c
@@ -192,19 +192,19 @@ static xhtmlconfig xhtml_configure(paragraph *source)
     {
       if (!ustricmp(source->keyword, L"xhtml-contents-filename")) {
 	sfree(ret.contents_filename);
-	ret.contents_filename = utoa_dup(uadv(source->keyword));
+	ret.contents_filename = dupstr(adv(source->origkeyword));
       } else if (!ustricmp(source->keyword, L"xhtml-single-filename")) {
 	sfree(ret.single_filename);
-	ret.single_filename = utoa_dup(uadv(source->keyword));
+	ret.single_filename = dupstr(adv(source->origkeyword));
       } else if (!ustricmp(source->keyword, L"xhtml-index-filename")) {
 	sfree(ret.index_filename);
-	ret.index_filename = utoa_dup(uadv(source->keyword));
+	ret.index_filename = dupstr(adv(source->origkeyword));
       } else if (!ustricmp(source->keyword, L"xhtml-template-filename")) {
 	sfree(ret.template_filename);
-	ret.template_filename = utoa_dup(uadv(source->keyword));
+	ret.template_filename = dupstr(adv(source->origkeyword));
       } else if (!ustricmp(source->keyword, L"xhtml-template-fragment")) {
 	sfree(ret.template_fragment);
-	ret.template_fragment = utoa_dup(uadv(source->keyword));
+	ret.template_fragment = utoa_dup(uadv(source->keyword), CS_ASCII);
       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-0")) {
         ret.contents_depth[0] = utoi(uadv(source->keyword));
       } else if (!ustricmp(source->keyword, L"xhtml-contents-depth-1")) {
@@ -304,45 +304,12 @@ paragraph *xhtml_config_filename(char *filename)
      * \cfg{xhtml-leaf-level}{0}; the rationale being that the user
      * wants their output _in that file_.
      */
+    paragraph *p, *q;
 
-    paragraph *p[2];
-    int i, len;
-    wchar_t *ufilename, *up;
-
-    for (i = 0; i < 2; i++) {
-	p[i] = mknew(paragraph);
-	memset(p[i], 0, sizeof(*p[i]));
-	p[i]->type = para_Config;
-	p[i]->next = NULL;
-	p[i]->fpos.filename = "<command line>";
-	p[i]->fpos.line = p[i]->fpos.col = -1;
-    }
-
-    ufilename = ufroma_dup(filename);
-    len = ustrlen(ufilename) + 2 + lenof(L"xhtml-single-filename");
-    p[0]->keyword = mknewa(wchar_t, len);
-    up = p[0]->keyword;
-    ustrcpy(up, L"xhtml-single-filename");
-    up = uadv(up);
-    ustrcpy(up, ufilename);
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p[0]->keyword < len);
-    sfree(ufilename);
-
-    len = lenof(L"xhtml-leaf-level") + lenof(L"0") + 1;
-    p[1]->keyword = mknewa(wchar_t, len);
-    up = p[1]->keyword;
-    ustrcpy(up, L"xhtml-leaf-level");
-    up = uadv(up);
-    ustrcpy(up, L"0");
-    up = uadv(up);
-    *up = L'\0';
-    assert(up - p[1]->keyword < len);
-
-    p[0]->next = p[1];
-
-    return p[0];
+    p = cmdline_cfg_simple("xhtml-single-filename", filename, NULL);
+    q = cmdline_cfg_simple("xhtml-leaf-level", "0", NULL);
+    p->next = q;
+    return p;
 }
 
 static xhtmlsection *xhtml_new_section(xhtmlsection *last)
diff --git a/error.c b/error.c
index 5f5cbda..abd8d61 100644
--- a/error.c
+++ b/error.c
@@ -82,7 +82,7 @@ static void do_error(int code, va_list ap) {
 	break;
       case err_badparatype:
 	wsp = va_arg(ap, wchar_t *);
-	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
 	fpos = *va_arg(ap, filepos *);
 	sprintf(error, "command `%.200s' unrecognised at start of"
 		" paragraph", sp);
@@ -90,7 +90,7 @@ static void do_error(int code, va_list ap) {
 	break;
       case err_badmidcmd:
 	wsp = va_arg(ap, wchar_t *);
-	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
 	fpos = *va_arg(ap, filepos *);
 	sprintf(error, "command `%.200s' unexpected in mid-paragraph", sp);
 	flags = FILEPOS;
@@ -138,20 +138,20 @@ static void do_error(int code, va_list ap) {
       case err_nosuchkw:
 	fpos = *va_arg(ap, filepos *);
 	wsp = va_arg(ap, wchar_t *);
-	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
 	sprintf(error, "unable to resolve cross-reference to `%.200s'", sp);
 	flags = FILEPOS;
 	break;
       case err_multiBR:
 	fpos = *va_arg(ap, filepos *);
 	wsp = va_arg(ap, wchar_t *);
-	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
 	sprintf(error, "multiple `\\BR' entries given for `%.200s'", sp);
 	flags = FILEPOS;
 	break;
       case err_nosuchidxtag:
 	wsp = va_arg(ap, wchar_t *);
-	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
 	sprintf(error, "`\\IM' on unknown index tag `%.200s'", sp);
 	flags = 0;
 	/* FIXME: need to get a filepos to here somehow */
@@ -164,7 +164,7 @@ static void do_error(int code, va_list ap) {
       case err_macroexists:
 	fpos = *va_arg(ap, filepos *);
 	wsp = va_arg(ap, wchar_t *);
-	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
 	sprintf(error, "macro `%.200s' already defined", sp);
 	flags = FILEPOS;
 	break;
@@ -185,7 +185,7 @@ static void do_error(int code, va_list ap) {
 	fpos = *va_arg(ap, filepos *);
 	fpos2 = *va_arg(ap, filepos *);
 	wsp = va_arg(ap, wchar_t *);
-	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf));
+	sp = ustrtoa(wsp, auxbuf, sizeof(auxbuf), CS_LOCAL);
 	sprintf(error, "paragraph keyword `%.200s' already defined at ", sp);
 	sprintf(error + strlen(error), "%s:%d", fpos2.filename, fpos2.line);
 	flags = FILEPOS;
diff --git a/halibut.h b/halibut.h
index 9aa2c59..94493d4 100644
--- a/halibut.h
+++ b/halibut.h
@@ -27,6 +27,12 @@
 #include "tree234.h"
 
 /*
+ * FIXME: Charset temporary workarounds
+ */
+#define CS_FIXME CS_ISO8859_1
+#define CS_LOCAL CS_ISO8859_1
+
+/*
  * Structure tags
  */
 typedef struct input_Tag input;
@@ -72,6 +78,7 @@ struct input_Tag {
     charset_state csstate;
     wchar_t wc[16];		       /* wide chars from input conversion */
     int nwc, wcpos;		       /* size of, and position in, wc[] */
+    char *pushback_chars;	       /* used to save input-encoding data */
 };
 
 /*
@@ -82,6 +89,7 @@ struct paragraph_Tag {
     paragraph *next;
     int type;
     wchar_t *keyword;		       /* for most special paragraphs */
+    char *origkeyword;		       /* same again in original charset */
     word *words;		       /* list of words in paragraph */
     int aux;			       /* number, in a numbered paragraph
                                         * or subsection level
@@ -266,11 +274,14 @@ char *dupstr(char *s);
 /*
  * ustring.c
  */
-wchar_t *ustrdup(wchar_t *s);
-char *ustrtoa(wchar_t *s, char *outbuf, int size);
-wchar_t *ustrfroma(char *s, wchar_t *outbuf, int size);
-char *utoa_dup(wchar_t *s);
-wchar_t *ufroma_dup(char *s);
+wchar_t *ustrdup(wchar_t const *s);
+char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset);
+char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset);
+wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset);
+char *utoa_dup(wchar_t const *s, int charset);
+char *utoa_dup_len(wchar_t const *s, int charset, int *len);
+char *utoa_careful_dup(wchar_t const *s, int charset);
+wchar_t *ufroma_dup(char const *s, int charset);
 int ustrlen(wchar_t const *s);
 wchar_t *uadv(wchar_t *s);
 wchar_t *ustrcpy(wchar_t *dest, wchar_t const *source);
@@ -304,6 +315,8 @@ const char *const version;
 /*
  * misc.c
  */
+char *adv(char *s);
+
 typedef struct stackTag *stack;
 stack stk_new(void);
 void stk_free(stack);
@@ -343,6 +356,9 @@ struct tagWrappedLine {
 };
 wrappedline *wrap_para(word *, int, int, int (*)(void *, word *), void *, int);
 void wrap_free(wrappedline *);
+void cmdline_cfg_add(paragraph *cfg, char *string);
+paragraph *cmdline_cfg_new(void);
+paragraph *cmdline_cfg_simple(char *string, ...);
 
 /*
  * input.c
diff --git a/input.c b/input.c
index d607e86..1187e94 100644
--- a/input.c
+++ b/input.c
@@ -86,7 +86,7 @@ static void input_configure(input *in, paragraph *cfg) {
     assert(cfg->type == para_Config);
 
     if (!ustricmp(cfg->keyword, L"input-charset")) {
-	char *csname = utoa_dup(uadv(cfg->keyword));
+	char *csname = utoa_dup(uadv(cfg->keyword), CS_ASCII);
 	in->charset = charset_from_localenc(csname);
 	sfree(csname);
     }
@@ -95,7 +95,7 @@ static void input_configure(input *in, paragraph *cfg) {
 /*
  * Can return EOF
  */
-static int get(input *in, filepos *pos) {
+static int get(input *in, filepos *pos, rdstringc *rsc) {
     int pushbackpt = in->stack ? in->stack->npushback : 0;
     if (in->npushback > pushbackpt) {
 	--in->npushback;
@@ -123,6 +123,10 @@ static int get(input *in, filepos *pos) {
 		in->currfp = NULL;
 		return EOF;
 	    }
+
+	    if (rsc)
+		rdaddc(rsc, c);
+
 	    /* Track line numbers, for error reporting */
 	    if (pos)
 		*pos = in->pos;
@@ -182,6 +186,7 @@ struct token_Tag {
     int type;
     int cmd, aux;
     wchar_t *text;
+    char *origtext;
     filepos pos;
 };
 enum {
@@ -373,31 +378,48 @@ static void match_kw(token *tok) {
 token get_token(input *in) {
     int c;
     int nls;
+    int prevpos;
     token ret;
     rdstring rs = { 0, 0, NULL };
+    rdstringc rsc = { 0, 0, NULL };
     filepos cpos;
 
     ret.text = NULL;		       /* default */
-    c = get(in, &cpos);
+    ret.origtext = NULL;	       /* default */
+    if (in->pushback_chars) {
+	rdaddsc(&rsc, in->pushback_chars);
+	sfree(in->pushback_chars);
+	in->pushback_chars = NULL;
+    }
+    c = get(in, &cpos, &rsc);
     ret.pos = cpos;
     if (iswhite(c)) {		       /* tok_white or tok_eop */
 	nls = 0;
+	prevpos = 0;
 	do {
 	    if (isnl(c))
 		nls++;
-	} while ((c = get(in, &cpos)) != EOF && iswhite(c));
+	    prevpos = rsc.pos;
+	} while ((c = get(in, &cpos, &rsc)) != EOF && iswhite(c));
 	if (c == EOF) {
 	    ret.type = tok_eof;
+	    sfree(rsc.text);
 	    return ret;
 	}
+	if (rsc.text) {
+	    in->pushback_chars = dupstr(rsc.text + prevpos);
+	    sfree(rsc.text);
+	}
 	unget(in, c, &cpos);
 	ret.type = (nls > 1 ? tok_eop : tok_white);
 	return ret;
     } else if (c == EOF) {	       /* tok_eof */
 	ret.type = tok_eof;
+	sfree(rsc.text);
 	return ret;
     } else if (c == '\\') {	       /* tok_cmd */
-	c = get(in, &cpos);
+	rsc.pos = prevpos = 0;
+	c = get(in, &cpos, &rsc);
 	if (c == '-' || c == '\\' || c == '_' ||
 	    c == '#' || c == '{' || c == '}' || c == '.') {
 	    /* single-char command */
@@ -407,13 +429,15 @@ token get_token(input *in) {
 	    do {
 		rdadd(&rs, c);
 		len++;
-		c = get(in, &cpos);
+		prevpos = rsc.pos;
+		c = get(in, &cpos, &rsc);
 	    } while (ishex(c) && len < 5);
 	    unget(in, c, &cpos);
 	} else if (iscmd(c)) {
 	    do {
 		rdadd(&rs, c);
-		c = get(in, &cpos);
+		prevpos = rsc.pos;
+		c = get(in, &cpos, &rsc);
 	    } while (iscmd(c));
 	    unget(in, c, &cpos);
 	}
@@ -423,14 +447,24 @@ token get_token(input *in) {
 	 */
 	ret.type = tok_cmd;
 	ret.text = ustrdup(rs.text);
+	if (rsc.text) {
+	    in->pushback_chars = dupstr(rsc.text + prevpos);
+	    rsc.text[prevpos] = '\0';
+	    ret.origtext = dupstr(rsc.text);
+	} else {
+	    ret.origtext = dupstr("");
+	}
 	match_kw(&ret);
 	sfree(rs.text);
+	sfree(rsc.text);
 	return ret;
     } else if (c == '{') {	       /* tok_lbrace */
 	ret.type = tok_lbrace;
+	sfree(rsc.text);
 	return ret;
     } else if (c == '}') {	       /* tok_rbrace */
 	ret.type = tok_rbrace;
+	sfree(rsc.text);
 	return ret;
     } else {			       /* tok_word */
 	/*
@@ -442,6 +476,7 @@ token get_token(input *in) {
 	 * a hyphen.
 	 */
 	ret.aux = FALSE;	       /* assumed for now */
+	prevpos = 0;
 	while (1) {
 	    if (iswhite(c) || c=='{' || c=='}' || c=='\\' || c==EOF) {
 		/* Put back the character that caused termination */
@@ -450,15 +485,25 @@ token get_token(input *in) {
 	    } else {
 		rdadd(&rs, c);
 		if (c == '-') {
+		    prevpos = rsc.pos;
 		    ret.aux = TRUE;
 		    break;	       /* hyphen terminates word */
 		}
 	    }
-	    c = get(in, &cpos);
+	    prevpos = rsc.pos;
+	    c = get(in, &cpos, &rsc);
 	}
 	ret.type = tok_word;
 	ret.text = ustrdup(rs.text);
+	if (rsc.text) {
+	    in->pushback_chars = dupstr(rsc.text + prevpos);
+	    rsc.text[prevpos] = '\0';
+	    ret.origtext = dupstr(rsc.text);
+	} else {
+	    ret.origtext = dupstr("");
+	}
 	sfree(rs.text);
+	sfree(rsc.text);
 	return ret;
     }
 }
@@ -472,7 +517,7 @@ int isbrace(input *in) {
     int c;
     filepos cpos;
 
-    c = get(in, &cpos);
+    c = get(in, &cpos, NULL);
     unget(in, c, &cpos);
     return (c == '{');
 }
@@ -488,15 +533,16 @@ token get_codepar_token(input *in) {
     filepos cpos;
 
     ret.type = tok_word;
-    c = get(in, &cpos);		       /* expect (and discard) one space */
+    ret.origtext = NULL;
+    c = get(in, &cpos, NULL);	       /* expect (and discard) one space */
     ret.pos = cpos;
     if (c == ' ') {
-	c = get(in, &cpos);
+	c = get(in, &cpos, NULL);
 	ret.pos = cpos;
     }
     while (!isnl(c) && c != EOF) {
 	int c2 = c;
-	c = get(in, &cpos);
+	c = get(in, &cpos, NULL);
 	/* Discard \r just before \n. */
 	if (c2 != 13 || !isnl(c))
 	    rdadd(&rs, c2);
@@ -538,7 +584,7 @@ static paragraph *addpara(paragraph newpara, paragraph ***hptrptr) {
  * Destructor before token is reassigned; should catch most memory
  * leaks
  */
-#define dtor(t) ( sfree(t.text) )
+#define dtor(t) ( sfree(t.text), sfree(t.origtext) )
 
 /*
  * Reads a single file (ie until get() returns EOF)
@@ -581,6 +627,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
     wchar_t uchr;
 
     t.text = NULL;
+    t.origtext = NULL;
     macros = newtree234(macrocmp);
     already = FALSE;
 
@@ -593,6 +640,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
 	int start_cmd = c__invalid;
 	par.words = NULL;
 	par.keyword = NULL;
+	par.origkeyword = NULL;
 	whptr = &par.words;
 
 	/*
@@ -840,6 +888,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
 
 	    if (needkw > 0) {
 		rdstring rs = { 0, 0, NULL };
+		rdstringc rsc = { 0, 0, NULL };
 		int nkeys = 0;
 		filepos fp;
 
@@ -857,20 +906,25 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
 			   (t.type == tok_cmd && t.cmd == c__nbsp) ||
 			   (t.type == tok_cmd && t.cmd == c__escaped)) {
 			if (t.type == tok_white ||
-			    (t.type == tok_cmd && t.cmd == c__nbsp))
+			    (t.type == tok_cmd && t.cmd == c__nbsp)) {
 			    rdadd(&rs, ' ');
-			else
+			    rdaddc(&rsc, ' ');
+			} else {
 			    rdadds(&rs, t.text);
+			    rdaddsc(&rsc, t.origtext);
+			}
 		    }
 		    if (t.type != tok_rbrace) {
 			error(err_kwunclosed, &t.pos);
 			continue;
 		    }
 		    rdadd(&rs, 0);     /* add string terminator */
+		    rdaddc(&rsc, 0);   /* add string terminator */
 		    dtor(t), t = get_token(in); /* eat right brace */
 		}
 
-		rdadd(&rs, 0);     /* add string terminator */
+		rdadd(&rs, 0);	       /* add string terminator */
+		rdaddc(&rsc, 0);       /* add string terminator */
 
 		/* See whether we have the right number of keywords. */
 		if ((needkw & 48) && nkeys > 0)
@@ -901,6 +955,7 @@ static void read_file(paragraph ***ret, input *in, indexdata *idx) {
 		}
 
 		par.keyword = rdtrim(&rs);
+		par.origkeyword = rdtrimc(&rsc);
 
 		/* Move to EOP in case of needkw==8 or 16 (no body) */
 		if (needkw & 24) {
@@ -1464,6 +1519,8 @@ paragraph *read_input(input *in, indexdata *idx) {
 	    setpos(in, in->filenames[in->currindex]);
 	    in->charset = in->defcharset;
 	    in->csstate = charset_init_state;
+	    in->wcpos = in->nwc = 0;
+	    in->pushback_chars = NULL;
 	    read_file(&hptr, in, idx);
 	}
 	in->currindex++;
diff --git a/main.c b/main.c
index 182cbec..d18166e 100644
--- a/main.c
+++ b/main.c
@@ -181,36 +181,25 @@ int main(int argc, char **argv) {
 			 * into a config paragraph.
 			 */
 			{
-			    wchar_t *keywords;
-			    char *q;
-			    wchar_t *u;
+			    char *s = dupstr(p), *q, *r;
 			    paragraph *para;
 
-			    keywords = mknewa(wchar_t, 2+strlen(p));
-
-			    u = keywords;
-			    q = p;
+			    para = cmdline_cfg_new();
 
+			    q = r = s;
 			    while (*q) {
 				if (*q == ':') {
-				    *u++ = L'\0';
+				    *r = '\0';
+				    cmdline_cfg_add(para, s);
+				    r = s;
 				} else {
 				    if (*q == '\\' && q[1])
 					q++;
-				    /* FIXME: lacks charset flexibility */
-				    *u++ = *q;
+				    *r++ = *q;
 				}
 				q++;
 			    }
-			    *u = L'\0';
-
-			    para = mknew(paragraph);
-			    memset(para, 0, sizeof(*para));
-			    para->type = para_Config;
-			    para->keyword = keywords;
-			    para->next = NULL;
-			    para->fpos.filename = "<command line>";
-			    para->fpos.line = para->fpos.col = -1;
+			    cmdline_cfg_add(para, s);
 
 			    if (cfg_tail)
 				cfg_tail->next = para;
diff --git a/misc.c b/misc.c
index 6f4ddd4..304cb1f 100644
--- a/misc.c
+++ b/misc.c
@@ -2,8 +2,13 @@
  * misc.c: miscellaneous useful items
  */
 
+#include <stdarg.h>
 #include "halibut.h"
 
+char *adv(char *s) {
+    return s + 1 + strlen(s);
+}
+
 struct stackTag {
     void **data;
     int sp;
@@ -479,3 +484,65 @@ void wrap_free(wrappedline *w) {
 	w = t;
     }
 }
+
+void cmdline_cfg_add(paragraph *cfg, char *string)
+{
+    wchar_t *ustring;
+    int upos, ulen, pos, len;
+
+    ulen = 0;
+    while (cfg->keyword[ulen])
+	ulen += 1 + ustrlen(cfg->keyword+ulen);
+    len = 0;
+    while (cfg->origkeyword[len])
+	len += 1 + strlen(cfg->origkeyword+len);
+
+    ustring = ufroma_dup(string, CS_FIXME);
+
+    upos = ulen;
+    ulen += 2 + ustrlen(ustring);
+    cfg->keyword = resize(cfg->keyword, ulen);
+    ustrcpy(cfg->keyword+upos, ustring);
+    cfg->keyword[ulen-1] = L'\0';
+
+    pos = len;
+    len += 2 + strlen(string);
+    cfg->origkeyword = resize(cfg->origkeyword, len);
+    strcpy(cfg->origkeyword+pos, string);
+    cfg->origkeyword[len-1] = '\0';
+
+    sfree(ustring);
+}
+
+paragraph *cmdline_cfg_new(void)
+{
+    paragraph *p;
+
+    p = mknew(paragraph);
+    memset(p, 0, sizeof(*p));
+    p->type = para_Config;
+    p->next = NULL;
+    p->fpos.filename = "<command line>";
+    p->fpos.line = p->fpos.col = -1;
+    p->keyword = ustrdup(L"\0");
+    p->origkeyword = dupstr("\0");
+
+    return p;
+}
+
+paragraph *cmdline_cfg_simple(char *string, ...)
+{
+    va_list ap;
+    char *s;
+    paragraph *p;
+
+    p = cmdline_cfg_new();
+    cmdline_cfg_add(p, string);
+
+    va_start(ap, string);
+    while ((s = va_arg(ap, char *)) != NULL)
+	cmdline_cfg_add(p, s);
+    va_end(ap);
+
+    return p;
+}
diff --git a/ustring.c b/ustring.c
index 51c279b..169a377 100644
--- a/ustring.c
+++ b/ustring.c
@@ -6,7 +6,7 @@
 #include <time.h>
 #include "halibut.h"
 
-wchar_t *ustrdup(wchar_t *s) {
+wchar_t *ustrdup(wchar_t const *s) {
     wchar_t *r;
     if (s) {
 	r = mknewa(wchar_t, 1+ustrlen(s));
@@ -18,59 +18,145 @@ wchar_t *ustrdup(wchar_t *s) {
     return r;
 }
 
-char *ustrtoa(wchar_t *s, char *outbuf, int size) {
-    char *p;
+static char *ustrtoa_internal(wchar_t const *s, char *outbuf, int size,
+			      int charset, int careful) {
+    int len, ret, err;
+    charset_state state = CHARSET_INIT_STATE;
+
     if (!s) {
 	*outbuf = '\0';
 	return outbuf;
     }
-    for (p = outbuf; *s && p < outbuf+size; p++,s++)
-	*p = *s;
-    if (p < outbuf+size)
-	*p = '\0';
-    else
-	outbuf[size-1] = '\0';
+
+    len = ustrlen(s);
+    size--;			       /* leave room for terminating NUL */
+    *outbuf = '\0';
+    while (len > 0) {
+	err = 0;
+	ret = charset_from_unicode(&s, &len, outbuf, size, charset, &state,
+				   (careful ? &err : NULL));
+	if (err)
+	    return NULL;
+	if (!ret)
+	    return outbuf;
+	size -= ret;
+	outbuf += ret;
+	*outbuf = '\0';
+    }
+    /*
+     * Clean up
+     */
+    ret = charset_from_unicode(NULL, 0, outbuf, size, charset, &state, NULL);
+    size -= ret;
+    outbuf += ret;
+    *outbuf = '\0';
     return outbuf;
 }
 
-wchar_t *ustrfroma(char *s, wchar_t *outbuf, int size) {
-    wchar_t *p;
+char *ustrtoa(wchar_t const *s, char *outbuf, int size, int charset) {
+    return ustrtoa_internal(s, outbuf, size, charset, FALSE);
+}
+
+char *ustrtoa_careful(wchar_t const *s, char *outbuf, int size, int charset) {
+    return ustrtoa_internal(s, outbuf, size, charset, TRUE);
+}
+
+wchar_t *ustrfroma(char const *s, wchar_t *outbuf, int size, int charset) {
+    int len, ret;
+    charset_state state = CHARSET_INIT_STATE;
+
     if (!s) {
 	*outbuf = L'\0';
 	return outbuf;
     }
-    for (p = outbuf; *s && p < outbuf+size; p++,s++)
-	*p = *s;
-    if (p < outbuf+size)
-	*p = '\0';
-    else
-	outbuf[size-1] = '\0';
+
+    len = strlen(s);
+    size--;			       /* allow for terminating NUL */
+    *outbuf = L'\0';
+    while (len > 0) {
+	ret = charset_to_unicode(&s, &len, outbuf, size,
+				 charset, &state, NULL, 0);
+	if (!ret)
+	    return outbuf;
+	outbuf += ret;
+	size -= ret;
+	*outbuf = L'\0';
+    }
     return outbuf;
 }
 
-char *utoa_dup(wchar_t *s) {
-    int len;
-    char *buf = NULL;
+char *utoa_internal_dup(wchar_t const *s, int charset, int *lenp, int careful)
+{
+    char *outbuf;
+    int outpos, outlen, len, ret, err;
+    charset_state state = CHARSET_INIT_STATE;
 
-    len = ustrlen(s) + 1;
-    do {
-	buf = resize(buf, len);
-	ustrtoa(s, buf, len);
-	len = (3 * len) / 2 + 1;       /* this guarantees a strict increase */
-    } while ((int)strlen(buf) >= len-1);
+    if (!s) {
+	return dupstr("");
+    }
 
-    buf = resize(buf, strlen(buf)+1);
-    return buf;
+    len = ustrlen(s);
+
+    outlen = len + 10;
+    outbuf = mknewa(char, outlen);
+
+    outpos = 0;
+    outbuf[outpos] = '\0';
+
+    while (len > 0) {
+	err = 0;
+	ret = charset_from_unicode(&s, &len,
+				   outbuf + outpos, outlen - outpos - 1,
+				   charset, &state, (careful ? &err : NULL));
+	if (err) {
+	    sfree(outbuf);
+	    return NULL;
+	}
+	if (!ret) {
+	    outlen = outlen * 3 / 2;
+	    outbuf = resize(outbuf, outlen);
+	}
+	outpos += ret;
+	outbuf[outpos] = '\0';
+    }
+    /*
+     * Clean up
+     */
+    outlen = outpos + 32;
+    outbuf = resize(outbuf, outlen);
+    ret = charset_from_unicode(NULL, 0,
+			       outbuf + outpos, outlen - outpos + 1,
+			       charset, &state, NULL);
+    outpos += ret;
+    outbuf[outpos] = '\0';
+    if (lenp)
+	*lenp = outpos;
+    return outbuf;
 }
 
-wchar_t *ufroma_dup(char *s) {
+char *utoa_dup(wchar_t const *s, int charset)
+{
+    return utoa_internal_dup(s, charset, NULL, FALSE);
+}
+
+char *utoa_dup_len(wchar_t const *s, int charset, int *len)
+{
+    return utoa_internal_dup(s, charset, len, FALSE);
+}
+
+char *utoa_careful_dup(wchar_t const *s, int charset)
+{
+    return utoa_internal_dup(s, charset, NULL, TRUE);
+}
+
+wchar_t *ufroma_dup(char const *s, int charset) {
     int len;
     wchar_t *buf = NULL;
 
     len = strlen(s) + 1;
     do {
 	buf = resize(buf, len);
-	ustrfroma(s, buf, len);
+	ustrfroma(s, buf, len, charset);
 	len = (3 * len) / 2 + 1;       /* this guarantees a strict increase */
     } while (ustrlen(buf) >= len-1);
 
@@ -183,6 +269,12 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
     size_t len;
 
     /*
+     * FIXME: really we ought to copy non-% parts of the format
+     * ourselves, and only resort to strftime for % parts. Also we
+     * should use wcsftime if it's present.
+     */
+
+    /*
      * strftime has the entertaining property that it returns 0
      * _either_ on out-of-space _or_ on successful generation of
      * the empty string. Hence we must ensure our format can never
@@ -192,7 +284,7 @@ wchar_t *ustrftime(wchar_t *wfmt, struct tm *timespec) {
     if (wfmt) {
 	len = ustrlen(wfmt);
 	fmt = mknewa(char, 2+len);
-	ustrtoa(wfmt, fmt+1, len+1);
+	ustrtoa(wfmt, fmt+1, len+1, CS_ASCII);   /* CS_FIXME? */
 	fmt[0] = ' ';
     } else
 	fmt = " %c";