summaryrefslogtreecommitdiff
path: root/apps/plugins/wikiviewer/shared/utf8_aux.c
blob: 3b30a7c70b35705488bb3dd26f03c3a39a717197 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
/***************************************************************************
 *             __________               __   ___.
 *   Open      \______   \ ____   ____ |  | _\_ |__   _______  ___
 *   Source     |       _//  _ \_/ ___\|  |/ /| __ \ /  _ \  \/  /
 *   Jukebox    |    |   (  <_> )  \___|    < | \_\ (  <_> > <  <
 *   Firmware   |____|_  /\____/ \___  >__|_ \|___  /\____/__/\_ \
 *                     \/            \/     \/    \/            \/
 * $Id$
 *
 * This program is free software; you can redistribute it and/or
 * modify it under the terms of the GNU General Public License
 * as published by the Free Software Foundation; either version 2
 * of the License, or (at your option) any later version.
 *
 * This software is distributed on an "AS IS" basis, WITHOUT WARRANTY OF ANY
 * KIND, either express or implied.
 *
 ****************************************************************************/

#include <ctype.h>
#include "utf8_aux.h"

char utf8strcnmp(const unsigned char *s1, const unsigned char *s2,uint16_t n1,uint16_t n2, const bool casesense)
{
    unsigned short c1,c2;
    const unsigned char *s1p,*s2p;
    s1p=s1;
    s2p=s2;
    for(;; )
    {
        if(s1p-s1==n1)
        {
            if(n1==n2&&n2==s2p-s2) return 0;
            else return -1;
        }

        if(s2p-s2==n2)
        {
            /* printf("N1:%u,N2:%u,s1p-s1:%d\n",n1,n2,s1p-s1); */
            if(n1==n2&&n1==s1p-s1) return 0;
            else return 1;
        }

        s1p=utf8decode(s1p,&c1);
        s2p=utf8decode(s2p,&c2);
        if(c1==' ') c1='_';

        if(c2==' ') c2='_';

        /* if(s1p-s1==1&&s2p-s2==1&&c1<128&&c2<128){ */
        if(!casesense && c1<128&&c2<128)
        {
            /* printf("TLC\n); */
            c1=tolower(c1);
            c2=tolower(c2);
        }

        if(c1<c2) return -1;
        else if (c1>c2) return 1;
    }
    /* printf("CMPEND\n"); */
    return 0; /*won't happen*/
}

/* Decode 1 UTF-8 char and return a pointer to the next char. */
const unsigned char* utf8decode(const unsigned char *utf8, unsigned short *ucs)
{
    unsigned char c = *utf8++;
    unsigned long code;
    int tail = 0;

    if ((c <= 0x7f) || (c >= 0xc2))
    {
        /* Start of new character. */
        if (c < 0x80)          /* U-00000000 - U-0000007F, 1 byte */
            code = c;
        else if (c < 0xe0)     /* U-00000080 - U-000007FF, 2 bytes */
        {
            tail = 1;
            code = c & 0x1f;
        }
        else if (c < 0xf0)     /* U-00000800 - U-0000FFFF, 3 bytes */
        {
            tail = 2;
            code = c & 0x0f;
        }
        else if (c < 0xf5)     /* U-00010000 - U-001FFFFF, 4 bytes */
        {
            tail = 3;
            code = c & 0x07;
        }
        else
            /* Invalid size. */
            code = 0xfffd;

        while (tail-- && ((c = *utf8++) != 0))
        {
            if ((c & 0xc0) == 0x80)
                /* Valid continuation character. */
                code = (code << 6) | (c & 0x3f);
            else
            {
                /* Invalid continuation char */
                code = 0xfffd;
                utf8--;
                break;
            }
        }
    }
    else
        /* Invalid UTF-8 char */
        code = 0xfffd;

    /* currently we don't support chars above U-FFFF */
    *ucs = (code < 0x10000) ? code : 0xfffd;
    return utf8;
}