[Fedora-directory-commits] ldapserver/ldap/servers/plugins/syntaxes phonetic.c, 1.6, 1.7

Noriko Hosoi nhosoi at fedoraproject.org
Tue Feb 3 19:15:28 UTC 2009


Author: nhosoi

Update of /cvs/dirsec/ldapserver/ldap/servers/plugins/syntaxes
In directory cvs1.fedora.phx.redhat.com:/tmp/cvs-serv27683

Modified Files:
	phonetic.c 
Log Message:
Resolves: #483668
Summary: Syntax plugin (phonetic): "Sounds like" does not support Western 
European characters
Description: added a support for Latin-1 characters (UNICODE:00C0 - 00FF)



Index: phonetic.c
===================================================================
RCS file: /cvs/dirsec/ldapserver/ldap/servers/plugins/syntaxes/phonetic.c,v
retrieving revision 1.6
retrieving revision 1.7
diff -u -r1.6 -r1.7
--- phonetic.c	12 Jan 2009 19:18:38 -0000	1.6
+++ phonetic.c	3 Feb 2009 19:15:26 -0000	1.7
@@ -230,268 +230,440 @@
         /* N  O  P  Q  R  S  T  U  V  W  X  Y  Z  */
 
 /* Macros to access character coding array */
-#define vowel(x)        ((x) != '\0' && vsvfn[(x) - 'A'] & 1)        /* AEIOU */
+#define vowel(x)     ((*(x) != '\0' && vsvfn[(*(x)) - 'A'] & 1) || /* AEIOU */ \
+   (((*(x)==0xC3) && (*((x)+1))) ?       ((0x80<=*((x)+1) && *((x)+1)<0x87) || \
+     (0x88<=*((x)+1) && *((x)+1)<0x90) || (0x92<=*((x)+1) && *((x)+1)<0x97) || \
+     (0x98<=*((x)+1) && *((x)+1)<0x9D) || (0xA0<=*((x)+1) && *((x)+1)<0xA7) || \
+     (0xA8<=*((x)+1) && *((x)+1)<0xB0) || (0xB2<=*((x)+1) && *((x)+1)<0xB7) || \
+     (0xB8<=*((x)+1) && *((x)+1)<0xBD)) : 0 ) /* Latin-1 characters */ )
+/*
+    case 0xC3:
+*/
 #define same(x)         ((x) != '\0' && vsvfn[(x) - 'A'] & 2)        /* FJLMNR */
 #define varson(x)       ((x) != '\0' && vsvfn[(x) - 'A'] & 4)        /* CGPST */
-#define frontv(x)       ((x) != '\0' && vsvfn[(x) - 'A'] & 8)        /* EIY */
+#define frontv(x)   ((*(x) != '\0' && vsvfn[(*(x)) - 'A'] & 8) ||    /* EIY */ \
+   (((*(x)==0xC3) && (*((x)+1))) ?       ((0x88<=*((x)+1) && *((x)+1)<0x90) || \
+     (0xA8<=*((x)+1) && *((x)+1)<0xB0)) : 0 ) /* Latin-1 E/I */ )
 #define noghf(x)        ((x) != '\0' && vsvfn[(x) - 'A'] & 16)        /* BDH */
 
 char *
 phonetic( char *Word )
 {
-        char            *n, *n_start, *n_end;        /* pointers to string */
-        char            *metaph_end;        /* pointers to metaph */
-        char            ntrans[42];        /* word with uppercase letters */
-        int             KSflag;        /* state flag for X -> KS */
-        char                buf[MAXPHONEMELEN + 2];
-        char                *Metaph;
-
-        /*
-         * Copy Word to internal buffer, dropping non-alphabetic characters
-         * and converting to upper case
-         */
-        n = ntrans + 4; n_end = ntrans + 35;
-        while (!iswordbreak( Word ) && n < n_end) {
-            if (isascii(*Word)) {
-                if (isalpha(*Word)) {
-                    *n++ = TOUPPER(*Word);
-                }
-                ++Word;
-            } else {
-                auto const size_t len = LDAP_UTF8COPY(n, Word);
-                n += len; Word += len;
+    unsigned char   *n, *n_start, *n_end;        /* pointers to string */
+    char            *metaph_end;        /* pointers to metaph */
+    char            ntrans[42];        /* word with uppercase letters */
+    int             KSflag;        /* state flag for X -> KS */
+    char                buf[MAXPHONEMELEN + 2];
+    char                *Metaph;
+
+    /*
+     * Copy Word to internal buffer, dropping non-alphabetic characters
+     * and converting to upper case
+     */
+    n = ntrans + 4; n_end = ntrans + 35;
+    while (!iswordbreak( Word ) && n < n_end) {
+        if (isascii(*Word)) {
+            if (isalpha(*Word)) {
+                *n++ = TOUPPER(*Word);
             }
+            ++Word;
+        } else {
+            auto const size_t len = LDAP_UTF8COPY(n, Word);
+            n += len; Word += len;
         }
-        Metaph = buf;
-        *Metaph = '\0';
-        if (n == ntrans + 4) {
-                return( slapi_ch_strdup( buf ) );                /* Return if null */
+    }
+    Metaph = buf;
+    *Metaph = '\0';
+    if (n == ntrans + 4) {
+            return( slapi_ch_strdup( buf ) );                /* Return if null */
+    }
+    n_end = n;                /* Set n_end to end of string */
+
+    /* ntrans[0] will always be == 0 */
+    ntrans[0] = '\0';
+    ntrans[1] = '\0';
+    ntrans[2] = '\0';
+    ntrans[3] = '\0';
+    *n++ = 0;
+    *n++ = 0;
+    *n++ = 0;
+    *n = 0;                        /* Pad with nulls */
+    n = ntrans + 4;                /* Assign pointer to start */
+
+    /* Check for PN, KN, GN, AE, WR, WH, and X at start */
+    switch (*n) {
+    case 'P':
+    case 'K':
+    case 'G':
+        /* 'PN', 'KN', 'GN' becomes 'N' */
+        if (*(n + 1) == 'N')
+            *n++ = 0;
+        break;
+    case 'A':
+        /* 'AE' becomes 'E' */
+        if (*(n + 1) == 'E')
+            *n++ = 0;
+        break;
+    case 'W':
+        /* 'WR' becomes 'R', and 'WH' to 'H' */
+        if (*(n + 1) == 'R')
+            *n++ = 0;
+        else if (*(n + 1) == 'H') {
+            *n++ = 0;
         }
-        n_end = n;                /* Set n_end to end of string */
+        break;
+    case 'X':
+        /* 'X' becomes 'S' */
+        *n = 'S';
+        break;
+    case 0xC3:
+        switch (*(n+1)) {
+        case 0x80:
+        case 0x81:
+        case 0x82:
+        case 0x83:
+        case 0x84:
+        case 0x85:
+            *n++ = 0;
+            *n = 'A';
+            break;
+        case 0x87:
+            *n++ = 0;
+            *n = 'C';
+            break;
+        case 0x86:
+        case 0x88:
+        case 0x89:
+        case 0x8A:
+        case 0x8B:
+            *n++ = 0;
+            *n = 'E';
+            break;
+        case 0x8C:
+        case 0x8D:
+        case 0x8E:
+        case 0x8F:
+            *n++ = 0;
+            *n = 'I';
+            break;
+        case 0x90:    /* eth: TH */
+            *n++ = 0;
+            *n = '0';
+            break;
+        case 0x91:
+            *n++ = 0;
+            *n = 'N';
+            break;
+        case 0x92:
+        case 0x93:
+        case 0x94:
+        case 0x95:
+        case 0x96:
+        case 0x98:
+            *n++ = 0;
+            *n = 'O';
+            break;
+        case 0x99:
+        case 0x9A:
+        case 0x9B:
+        case 0x9C:
+            *n++ = 0;
+            *n = 'U';
+            break;
+        case 0x9D:
+            *n++ = 0;
+            *n = 'Y';
+            break;
+        case 0x9E:
+            *n++ = 0;
+            *n = '0';    /* thorn: TH */
+            break;
+        case 0x9F:
+            *n++ = 0;
+            *n = 's';
+            break;
+        case 0xA0:
+        case 0xA1:
+        case 0xA2:
+        case 0xA3:
+        case 0xA4:
+        case 0xA5:
+            *n++ = 0;
+            *n = 'a';
+            break;
+        case 0xA6:
+            *n++ = 0;
+            *n = 'e';
+            break;
+        case 0xA7:
+            *n++ = 0;
+            *n = 'c';
+            break;
+        case 0xA8:
+        case 0xA9:
+        case 0xAA:
+        case 0xAB:
+            *n++ = 0;
+            *n = 'e';
+            break;
+        case 0xAC:
+        case 0xAD:
+        case 0xAE:
+        case 0xAF:
+            *n++ = 0;
+            *n = 'i';
+            break;
+        case 0xB0:
+            *n++ = 0;
+            *n = '0';    /* eth: th */
+            break;
+        case 0xB1:
+            *n++ = 0;
+            *n = 'n';
+            break;
+        case 0xB2:
+        case 0xB3:
+        case 0xB4:
+        case 0xB5:
+        case 0xB6:
+        case 0xB8:
+            *n++ = 0;
+            *n = 'o';
+            break;
+        case 0xB9:
+        case 0xBA:
+        case 0xBB:
+        case 0xBC:
+            *n++ = 0;
+            *n = 'u';
+            break;
+        case 0xBD:
+        case 0xBF:
+            *n++ = 0;
+            *n = 'y';
+            break;
+        case 0xBE:
+            *n++ = 0;
+            *n = '0';    /* thorn: th */
+            break;
+        }
+        break;
+    }
 
-        /* ntrans[0] will always be == 0 */
-        ntrans[0] = '\0';
-        ntrans[1] = '\0';
-        ntrans[2] = '\0';
-        ntrans[3] = '\0';
-        *n++ = 0;
-        *n++ = 0;
-        *n++ = 0;
-        *n = 0;                        /* Pad with nulls */
-        n = ntrans + 4;                /* Assign pointer to start */
-
-        /* Check for PN, KN, GN, AE, WR, WH, and X at start */
-        switch (*n) {
-        case 'P':
-        case 'K':
-        case 'G':
-                /* 'PN', 'KN', 'GN' becomes 'N' */
-                if (*(n + 1) == 'N')
-                        *n++ = 0;
-                break;
-        case 'A':
-                /* 'AE' becomes 'E' */
-                if (*(n + 1) == 'E')
-                        *n++ = 0;
-                break;
-        case 'W':
-                /* 'WR' becomes 'R', and 'WH' to 'H' */
-                if (*(n + 1) == 'R')
-                        *n++ = 0;
-                else if (*(n + 1) == 'H') {
-                        *(n + 1) = *n;
-                        *n++ = 0;
+    /*
+     * Now, loop step through string, stopping at end of string or when
+     * the computed 'metaph' is MAXPHONEMELEN characters long
+     */
+
+    KSflag = 0;                /* state flag for KS translation */
+    for (metaph_end = Metaph + MAXPHONEMELEN, n_start = n;
+         n <= n_end && Metaph < metaph_end; n++) {
+        if (KSflag) {
+            KSflag = 0;
+            *Metaph++ = 'S';
+        } else if (!isascii(*n)) {
+            switch (*n) {
+            case 0xC3:
+                if (n+1 <= n_end) {
+                    switch (*(++n)) {
+                    case 0x87:    /* C with cedilla */
+                    case 0x9F:    /* ess-zed */
+                    case 0xA7:    /* c with cedilla */
+                        *Metaph++ = 'S';
+                        break;
+                    case 0x90:    /* eth: TH */
+                    case 0x9E:    /* thorn: TH */
+                    case 0xB0:    /* eth: th */
+                    case 0xBE:    /* thorn: th */
+                        *Metaph++ = '0';
+                        break;
+                    case 0x91:
+                    case 0xB1:
+                        *Metaph++ = 'N';
+                        break;
+                    case 0x9D:
+                    case 0xBD:
+                    case 0xBF:
+                        *Metaph++ = 'Y';
+                        break;
+                    default:      /* skipping the rest */
+                        break;
+                    }
                 }
                 break;
-        case 'X':
-                /* 'X' becomes 'S' */
-                *n = 'S';
-                break;
-        }
+            default:
+                *Metaph++ = *n;
+            }
+        } else {
+            /* Drop duplicates except for CC */
+            if (*(n - 1) == *n && *n != 'C')
+                continue;
+            /* Check for F J L M N R or first letter vowel */
+            if (same(*n) || (n == n_start && vowel(n))) {
+                *Metaph++ = *n;
+            } else {
+                switch (*n) {
+                case 'B':
 
-        /*
-         * Now, loop step through string, stopping at end of string or when
-         * the computed 'metaph' is MAXPHONEMELEN characters long
-         */
-
-        KSflag = 0;                /* state flag for KS translation */
-        for (metaph_end = Metaph + MAXPHONEMELEN, n_start = n;
-             n <= n_end && Metaph < metaph_end; n++) {
-                if (KSflag) {
-                        KSflag = 0;
-                        *Metaph++ = 'S';
-                } else if (!isascii(*n)) {
+                    /*
+                     * B unless in -MB
+                     */
+                    if (n < (n_end - 1) && *(n - 1) != 'M') {
                         *Metaph++ = *n;
-                } else {
-                        /* Drop duplicates except for CC */
-                        if (*(n - 1) == *n && *n != 'C')
-                                continue;
-                        /* Check for F J L M N R or first letter vowel */
-                        if (same(*n) || (n == n_start && vowel(*n))) {
-                                *Metaph++ = *n;
+                    }
+                    break;
+                case 'C':
+
+                    /*
+                     * X if in -CIA-, -CH- else S if in
+                     * -CI-, -CE-, -CY- else dropped if
+                     * in -SCI-, -SCE-, -SCY- else K
+                     */
+                    if (*(n - 1) != 'S' || !frontv((n + 1))) {
+                        if (*(n + 1) == 'I' && *(n + 2) == 'A') {
+                            *Metaph++ = 'X';
+                        } else if (frontv((n + 1))) {
+                            *Metaph++ = 'S';
+                        } else if (*(n + 1) == 'H') {
+                            *Metaph++ = ((n == n_start && !vowel((n + 2)))
+                             || *(n - 1) == 'S')
+                                ? (char) 'K' : (char) 'X';
                         } else {
-                                switch (*n) {
-                                case 'B':
-
-                                        /*
-                                         * B unless in -MB
-                                         */
-                                        if (n < (n_end - 1) && *(n - 1) != 'M') {
-                                                *Metaph++ = *n;
-                                        }
-                                        break;
-                                case 'C':
-
-                                        /*
-                                         * X if in -CIA-, -CH- else S if in
-                                         * -CI-, -CE-, -CY- else dropped if
-                                         * in -SCI-, -SCE-, -SCY- else K
-                                         */
-                                        if (*(n - 1) != 'S' || !frontv(*(n + 1))) {
-                                                if (*(n + 1) == 'I' && *(n + 2) == 'A') {
-                                                        *Metaph++ = 'X';
-                                                } else if (frontv(*(n + 1))) {
-                                                        *Metaph++ = 'S';
-                                                } else if (*(n + 1) == 'H') {
-                                                        *Metaph++ = ((n == n_start && !vowel(*(n + 2)))
-                                                         || *(n - 1) == 'S')
-                                                            ? (char) 'K' : (char) 'X';
-                                                } else {
-                                                        *Metaph++ = 'K';
-                                                }
-                                        }
-                                        break;
-                                case 'D':
-
-                                        /*
-                                         * J if in DGE or DGI or DGY else T
-                                         */
-                                        *Metaph++ = (*(n + 1) == 'G' && frontv(*(n + 2)))
-                                            ? (char) 'J' : (char) 'T';
-                                        break;
-                                case 'G':
-
-                                        /*
-                                         * F if in -GH and not B--GH, D--GH,
-                                         * -H--GH, -H---GH else dropped if
-                                         * -GNED, -GN, -DGE-, -DGI-, -DGY-
-                                         * else J if in -GE-, -GI-, -GY- and
-                                         * not GG else K
-                                         */
-                                        if ((*(n + 1) != 'J' || vowel(*(n + 2))) &&
-                                            (*(n + 1) != 'N' || ((n + 1) < n_end &&
-                                                                 (*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
-                                            (*(n - 1) != 'D' || !frontv(*(n + 1))))
-                                                *Metaph++ = (frontv(*(n + 1)) &&
-                                                             *(n + 2) != 'G') ? (char) 'G' : (char) 'K';
-                                        else if (*(n + 1) == 'H' && !noghf(*(n - 3)) &&
-                                                 *(n - 4) != 'H')
-                                                *Metaph++ = 'F';
-                                        break;
-                                case 'H':
-
-                                        /*
-                                         * H if before a vowel and not after
-                                         * C, G, P, S, T else dropped
-                                         */
-                                        if (!varson(*(n - 1)) && (!vowel(*(n - 1)) ||
-                                                           vowel(*(n + 1))))
-                                                *Metaph++ = 'H';
-                                        break;
-                                case 'K':
-
-                                        /*
-                                         * dropped if after C else K
-                                         */
-                                        if (*(n - 1) != 'C')
-                                                *Metaph++ = 'K';
-                                        break;
-                                case 'P':
-
-                                        /*
-                                         * F if before H, else P
-                                         */
-                                        *Metaph++ = *(n + 1) == 'H' ?
-                                            (char) 'F' : (char) 'P';
-                                        break;
-                                case 'Q':
-
-                                        /*
-                                         * K
-                                         */
-                                        *Metaph++ = 'K';
-                                        break;
-                                case 'S':
-
-                                        /*
-                                         * X in -SH-, -SIO- or -SIA- else S
-                                         */
-                                        *Metaph++ = (*(n + 1) == 'H' ||
-                                                     (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
-                                                          *(n + 2) == 'A')))
-                                            ? (char) 'X' : (char) 'S';
-                                        break;
-                                case 'T':
-
-                                        /*
-                                         * X in -TIA- or -TIO- else 0 (zero)
-                                         * before H else dropped if in -TCH-
-                                         * else T
-                                         */
-                                        if (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
-                                                           *(n + 2) == 'A'))
-                                                *Metaph++ = 'X';
-                                        else if (*(n + 1) == 'H')
-                                                *Metaph++ = '0';
-                                        else if (*(n + 1) != 'C' || *(n + 2) != 'H')
-                                                *Metaph++ = 'T';
-                                        break;
-                                case 'V':
-
-                                        /*
-                                         * F
-                                         */
-                                        *Metaph++ = 'F';
-                                        break;
-                                case 'W':
-
-                                        /*
-                                         * W after a vowel, else dropped
-                                         */
-                                case 'Y':
-
-                                        /*
-                                         * Y unless followed by a vowel
-                                         */
-                                        if (vowel(*(n + 1)))
-                                                *Metaph++ = *n;
-                                        break;
-                                case 'X':
-
-                                        /*
-                                         * KS
-                                         */
-                                        if (n == n_start)
-                                                *Metaph++ = 'S';
-                                        else {
-                                                *Metaph++ = 'K';        /* Insert K, then S */
-                                                KSflag = 1;
-                                        }
-                                        break;
-                                case 'Z':
-
-                                        /*
-                                         * S
-                                         */
-                                        *Metaph++ = 'S';
-                                        break;
-                                }
+                            *Metaph++ = 'K';
                         }
+                    }
+                    break;
+                case 'D':
+
+                    /*
+                     * J if in DGE or DGI or DGY else T
+                     */
+                    *Metaph++ = (*(n + 1) == 'G' && frontv((n + 2)))
+                        ? (char) 'J' : (char) 'T';
+                    break;
+                case 'G':
+
+                    /*
+                     * F if in -GH and not B--GH, D--GH,
+                     * -H--GH, -H---GH else dropped if
+                     * -GNED, -GN, -DGE-, -DGI-, -DGY-
+                     * else J if in -GE-, -GI-, -GY- and
+                     * not GG else K
+                     */
+                    if ((*(n + 1) != 'J' || vowel((n + 2))) &&
+                        (*(n + 1) != 'N' || ((n + 1) < n_end &&
+                                 (*(n + 2) != 'E' || *(n + 3) != 'D'))) &&
+                        (*(n - 1) != 'D' || !frontv((n + 1))))
+                        *Metaph++ = (frontv((n + 1)) &&
+                                 *(n + 2) != 'G') ? (char) 'G' : (char) 'K';
+                    else if (*(n + 1) == 'H' && !noghf(*(n - 3)) &&
+                         *(n - 4) != 'H')
+                        *Metaph++ = 'F';
+                    break;
+                case 'H':
+
+                    /*
+                     * H if before a vowel and not after
+                     * C, G, P, S, T else dropped
+                     */
+                    if (!varson(*(n - 1)) && (!vowel((n - 1)) ||
+                               vowel((n + 1))))
+                        *Metaph++ = 'H';
+                    break;
+                case 'K':
+
+                    /*
+                     * dropped if after C else K
+                     */
+                    if (*(n - 1) != 'C')
+                        *Metaph++ = 'K';
+                    break;
+                case 'P':
+
+                    /*
+                     * F if before H, else P
+                     */
+                    *Metaph++ = *(n + 1) == 'H' ?
+                        (char) 'F' : (char) 'P';
+                    break;
+                case 'Q':
+
+                    /*
+                     * K
+                     */
+                    *Metaph++ = 'K';
+                    break;
+                case 'S':
+
+                    /*
+                     * X in -SH-, -SIO- or -SIA- else S
+                     */
+                    *Metaph++ = (*(n + 1) == 'H' ||
+                             (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
+                              *(n + 2) == 'A')))
+                        ? (char) 'X' : (char) 'S';
+                    break;
+                case 'T':
+
+                    /*
+                     * X in -TIA- or -TIO- else 0 (zero)
+                     * before H else dropped if in -TCH-
+                     * else T
+                     */
+                    if (*(n + 1) == 'I' && (*(n + 2) == 'O' ||
+                               *(n + 2) == 'A'))
+                        *Metaph++ = 'X';
+                    else if (*(n + 1) == 'H')
+                        *Metaph++ = '0';
+                    else if (*(n + 1) != 'C' || *(n + 2) != 'H')
+                        *Metaph++ = 'T';
+                    break;
+                case 'V':
+
+                    /*
+                     * F
+                     */
+                    *Metaph++ = 'F';
+                    break;
+                case 'W':
+
+                    /*
+                     * W after a vowel, else dropped
+                     */
+                case 'Y':
+
+                    /*
+                     * Y unless followed by a vowel
+                     */
+                    if (vowel((n + 1)))
+                        *Metaph++ = *n;
+                    break;
+                case 'X':
+
+                    /*
+                     * KS
+                     */
+                    if (n == n_start)
+                        *Metaph++ = 'S';
+                    else {
+                        *Metaph++ = 'K';    /* Insert K, then S */
+                        KSflag = 1;
+                    }
+                    break;
+                case 'Z':
+
+                    /*
+                     * S
+                     */
+                    *Metaph++ = 'S';
+                    break;
                 }
+            }
         }
+    }
 
-        *Metaph = 0;                /* Null terminate */
-        return( slapi_ch_strdup( buf ) );
+    *Metaph = 0;                /* Null terminate */
+    return( slapi_ch_strdup( buf ) );
 }
 
 #endif /* METAPHONE */




More information about the Fedora-directory-commits mailing list