diff options
| author | Simon Tatham <anakin@pobox.com> | 2004-03-30 20:12:37 +0000 |
|---|---|---|
| committer | Simon Tatham <anakin@pobox.com> | 2004-03-30 20:12:37 +0000 |
| commit | 0c0dc7a2b51354e35c9f5185aee1c000a52b5e72 (patch) | |
| tree | 24b7d07d4da1602e9dafce79998e196ad850aee0 | |
| parent | 4b18ed86b5c3b5a4d9af1a94f1c413cb9b4f85e1 (diff) | |
| download | halibut-0c0dc7a2b51354e35c9f5185aee1c000a52b5e72.zip halibut-0c0dc7a2b51354e35c9f5185aee1c000a52b5e72.tar.gz halibut-0c0dc7a2b51354e35c9f5185aee1c000a52b5e72.tar.bz2 halibut-0c0dc7a2b51354e35c9f5185aee1c000a52b5e72.tar.xz | |
Fix index sorting so that it collates in a sensible order.
[originally from svn r4004]
| -rw-r--r-- | halibut.h | 1 | ||||
| -rw-r--r-- | misc.c | 70 | ||||
| -rw-r--r-- | ustring.c | 5 |
3 files changed, 74 insertions, 2 deletions
@@ -252,6 +252,7 @@ int ustrlen(wchar_t *s); wchar_t *uadv(wchar_t *s); wchar_t *ustrcpy(wchar_t *dest, wchar_t *source); wchar_t utolower(wchar_t); +int uisalpha(wchar_t); int ustrcmp(wchar_t *lhs, wchar_t *rhs); int ustricmp(wchar_t *lhs, wchar_t *rhs); int utoi(wchar_t *); @@ -98,7 +98,7 @@ char *rdtrimc(rdstringc *rs) { return rs->text; } -int compare_wordlists(word *a, word *b) { +static int compare_wordlists_literally(word *a, word *b) { int t; while (a && b) { if (a->type != b->type) @@ -113,7 +113,7 @@ int compare_wordlists(word *a, word *b) { if (c) return c; } - c = compare_wordlists(a->alt, b->alt); + c = compare_wordlists_literally(a->alt, b->alt); if (c) return c; a = a->next; @@ -142,6 +142,72 @@ int compare_wordlists(word *a, word *b) { return 0; } +int compare_wordlists(word *a, word *b) { + /* + * First we compare only the alphabetic content of the word + * lists, with case not a factor. If that comes out equal, + * _then_ we compare the word lists literally. + */ + struct { + word *w; + int i; + wchar_t c; + } pos[2]; + + pos[0].w = a; + pos[1].w = b; + pos[0].i = pos[1].i = 0; + + while (1) { + /* + * Find the next alphabetic character in each word list. + */ + int k; + + for (k = 0; k < 2; k++) { + /* + * Advance until we hit either an alphabetic character + * or the end of the word list. + */ + while (1) { + if (!pos[k].w) { + /* End of word list. */ + pos[k].c = 0; + break; + } else if (!pos[k].w->text || !pos[k].w->text[pos[k].i]) { + /* No characters remaining in this word; move on. */ + pos[k].w = pos[k].w->next; + pos[k].i = 0; + } else if (!uisalpha(pos[k].w->text[pos[k].i])) { + /* This character isn't alphabetic; move on. */ + pos[k].i++; + } else { + /* We have an alphabetic! Lowercase it and continue. */ + pos[k].c = utolower(pos[k].w->text[pos[k].i]); + break; + } + } + } + + if (pos[0].c < pos[1].c) + return -1; + else if (pos[0].c > pos[1].c) + return +1; + + if (!pos[0].c) + break; /* they're equal */ + + pos[0].i++; + pos[1].i++; + } + + /* + * If we reach here, the strings were alphabetically equal, so + * compare in more detail. + */ + return compare_wordlists_literally(a, b); +} + void mark_attr_ends(paragraph *sourceform) { paragraph *p; word *w, *wp; @@ -73,6 +73,11 @@ wchar_t utolower(wchar_t c) { return c; } +int uisalpha(wchar_t c) { + /* FIXME: this doesn't even come close */ + return (c >= 'A' && c <= 'Z') || (c >= 'a' && c <= 'z'); +} + int ustricmp(wchar_t *lhs, wchar_t *rhs) { wchar_t lc, rc; while ((lc = utolower(*lhs)) == (rc = utolower(*rhs)) && lc && rc) |