summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSimon Tatham <anakin@pobox.com>2004-03-30 20:12:37 +0000
committerSimon Tatham <anakin@pobox.com>2004-03-30 20:12:37 +0000
commit0c0dc7a2b51354e35c9f5185aee1c000a52b5e72 (patch)
tree24b7d07d4da1602e9dafce79998e196ad850aee0
parent4b18ed86b5c3b5a4d9af1a94f1c413cb9b4f85e1 (diff)
downloadhalibut-0c0dc7a2b51354e35c9f5185aee1c000a52b5e72.zip
halibut-0c0dc7a2b51354e35c9f5185aee1c000a52b5e72.tar.gz
halibut-0c0dc7a2b51354e35c9f5185aee1c000a52b5e72.tar.bz2
halibut-0c0dc7a2b51354e35c9f5185aee1c000a52b5e72.tar.xz
Fix index sorting so that it collates in a sensible order.
[originally from svn r4004]
-rw-r--r--halibut.h1
-rw-r--r--misc.c70
-rw-r--r--ustring.c5
3 files changed, 74 insertions, 2 deletions
diff --git a/halibut.h b/halibut.h
index 71087c4..30a1280 100644
--- a/halibut.h
+++ b/halibut.h
@@ -252,6 +252,7 @@ int ustrlen(wchar_t *s);
wchar_t *uadv(wchar_t *s);
wchar_t *ustrcpy(wchar_t *dest, wchar_t *source);
wchar_t utolower(wchar_t);
+int uisalpha(wchar_t);
int ustrcmp(wchar_t *lhs, wchar_t *rhs);
int ustricmp(wchar_t *lhs, wchar_t *rhs);
int utoi(wchar_t *);
diff --git a/misc.c b/misc.c
index 0de6c2a..a54cc6c 100644
--- a/misc.c
+++ b/misc.c
@@ -98,7 +98,7 @@ char *rdtrimc(rdstringc *rs) {
return rs->text;
}
-int compare_wordlists(word *a, word *b) {
+static int compare_wordlists_literally(word *a, word *b) {
int t;
while (a && b) {
if (a->type != b->type)
@@ -113,7 +113,7 @@ int compare_wordlists(word *a, word *b) {
if (c)
return c;
}
- c = compare_wordlists(a->alt, b->alt);
+ c = compare_wordlists_literally(a->alt, b->alt);
if (c)
return c;
a = a->next;
@@ -142,6 +142,72 @@ int compare_wordlists(word *a, word *b) {
return 0;
}
+int compare_wordlists(word *a, word *b) {
+ /*
+ * First we compare only the alphabetic content of the word
+ * lists, with case not a factor. If that comes out equal,
+ * _then_ we compare the word lists literally.
+ */
+ struct {
+ word *w;
+ int i;
+ wchar_t c;
+ } pos[2];
+
+ pos[0].w = a;
+ pos[1].w = b;
+ pos[0].i = pos[1].i = 0;
+
+ while (1) {
+ /*
+ * Find the next alphabetic character in each word list.
+ */
+ int k;
+
+ for (k = 0; k < 2; k++) {
+ /*
+ * Advance until we hit either an alphabetic character
+ * or the end of the word list.
+ */
+ while (1) {
+ if (!pos[k].w) {
+ /* End of word list. */
+ pos[k].c = 0;
+ break;
+ } else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) {
+ /* No characters remaining in this word; move on. */
+ pos[k].w = pos[k].w->next;
+ pos[k].i = 0;
+ } else if (!uisalpha(pos[k].w->text[pos[k].i])) {
+ /* This character isn't alphabetic; move on. */
+ pos[k].i++;
+ } else {
+ /* We have an alphabetic! Lowercase it and continue. */
+ pos[k].c = utolower(pos[k].w->text[pos[k].i]);
+ break;
+ }
+ }
+ }
+
+ if (pos[0].c < pos[1].c)
+ return -1;
+ else if (pos[0].c > pos[1].c)
+ return +1;
+
+ if (!pos[0].c)
+ break; /* they're equal */
+
+ pos[0].i++;
+ pos[1].i++;
+ }
+
+ /*
+ * If we reach here, the strings were alphabetically equal, so
+ * compare in more detail.
+ */
+ return compare_wordlists_literally(a, b);
+}
+
void mark_attr_ends(paragraph *sourceform) {
paragraph *p;
word *w, *wp;
diff --git a/ustring.c b/ustring.c
index 1573a19..0741dcf 100644
--- a/ustring.c
+++ b/ustring.c
@@ -73,6 +73,11 @@ wchar_t utolower(wchar_t c) {
return c;
}
+int uisalpha(wchar_t c) {
+ /* FIXME: this doesn't even come close */
+ return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z');
+}
+
int ustricmp(wchar_t *lhs, wchar_t *rhs) {
wchar_t lc, rc;
while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc)