/* COPYRIGHT NOTICE * * This code was pulled directly from the Text-DoubleMetaphone perl package, * version 0.07 * * The README mentions that the copyright is: * * Copyright 2000, Maurice Aubrey . * All rights reserved. * This code is based heavily on the C++ implementation by * Lawrence Philips and incorporates several bug fixes courtesy * of Kevin Atkinson . * * This module is free software; you may redistribute it and/or * modify it under the same terms as Perl itself. * * * Now, I'm not sure what that actually means as Perl is dual-licensed, * according to this: http://dev.perl.org/licenses/ * * In conclusion, I'm calling it GPLv1. * */ #include #include #include #include #include #include #include "double-metaphone.h" /* * * If META_USE_PERL_MALLOC is defined we use Perl's memory routines. * */ #ifdef META_USE_PERL_MALLOC #include "EXTERN.h" #include "perl.h" #define META_MALLOC(v, n, t) New(1, v, n, t) #define META_REALLOC(v, n, t) Renew(v, n, t) #define META_FREE(x) Safefree((x)) #else #define META_MALLOC(v, n, t) (v = (t *)malloc(((n) * sizeof(t)))) #define META_REALLOC(v, n, t) (v = (t *)realloc((v), ((n) * sizeof(t)))) #define META_FREE(x) free((x)) #endif /* META_USE_PERL_MALLOC */ metastring *NewMetaString(char *init_str) { metastring *s; char empty_string[] = ""; META_MALLOC(s, 1, metastring); assert(s != NULL); if (init_str == NULL) init_str = empty_string; s->length = strlen(init_str); /* preallocate a bit more for potential growth */ s->bufsize = s->length + 7; META_MALLOC(s->str, s->bufsize, char); assert(s->str != NULL); strncpy(s->str, init_str, s->length + 1); s->free_string_on_destroy = 1; return s; } void DestroyMetaString(metastring *s) { if (s == NULL) return; if (s->free_string_on_destroy && (s->str != NULL)) META_FREE(s->str); META_FREE(s); } void IncreaseBuffer(metastring *s, int chars_needed) { META_REALLOC(s->str, (s->bufsize + chars_needed + 10), char); assert(s->str != NULL); s->bufsize = s->bufsize + chars_needed + 10; } void MakeUpper(metastring *s) { char *i; for (i = s->str; *i; i++) { *i = toupper(*i); } } int IsVowel(metastring *s, int pos) { char c; if ((pos < 0) || (pos >= s->length)) return 0; c = *(s->str + pos); if ((c == 'A') || (c == 'E') || (c == 'I') || (c == 'O') || (c == 'U') || (c == 'Y')) return 1; return 0; } int SlavoGermanic(metastring *s) { if ((char *)strstr(s->str, "W")) return 1; else if ((char *)strstr(s->str, "K")) return 1; else if ((char *)strstr(s->str, "CZ")) return 1; else if ((char *)strstr(s->str, "WITZ")) return 1; else return 0; } int GetLength(metastring *s) { return s->length; } char GetAt(metastring *s, int pos) { if ((pos < 0) || (pos >= s->length)) return '\0'; return ((char)*(s->str + pos)); } void SetAt(metastring *s, int pos, char c) { if ((pos < 0) || (pos >= s->length)) return; *(s->str + pos) = c; } /* Caveats: the START value is 0 based */ int StringAt(metastring *s, int start, int length, ...) { char *test; char *pos; va_list ap; if ((start < 0) || (start >= s->length)) return 0; pos = (s->str + start); va_start(ap, length); do { test = va_arg(ap, char *); if (*test && (strncmp(pos, test, length) == 0)) { va_end(ap); return 1; } } while (strcmp(test, "")); va_end(ap); return 0; } void MetaphAdd(metastring *s, char *new_str) { int add_length; if (new_str == NULL) return; add_length = strlen(new_str); if ((s->length + add_length) > (s->bufsize - 1)) { IncreaseBuffer(s, add_length); } strcat(s->str, new_str); s->length += add_length; } void DoubleMetaphone(char *str, char **codes, int max_len) { int length; metastring *original; metastring *primary; metastring *secondary; int current; int last; current = 0; /* we need the real length and last prior to padding */ length = strlen(str); last = length - 1; original = NewMetaString(str); /* Pad original so we can index beyond end */ MetaphAdd(original, " "); primary = NewMetaString(""); secondary = NewMetaString(""); primary->free_string_on_destroy = 0; secondary->free_string_on_destroy = 0; MakeUpper(original); /* skip these when at start of word */ if (StringAt(original, 0, 2, "GN", "KN", "PN", "WR", "PS", "")) current += 1; /* Initial 'X' is pronounced 'Z' e.g. 'Xavier' */ if (GetAt(original, 0) == 'X') { MetaphAdd(primary, "S"); /* 'Z' maps to 'S' */ MetaphAdd(secondary, "S"); current += 1; } /* main loop */ while ((primary->length < max_len) || (secondary->length < max_len)) { if (current >= length) break; switch (GetAt(original, current)) { case 'A': case 'E': case 'I': case 'O': case 'U': case 'Y': if (current == 0) { /* all init vowels now map to 'A' */ MetaphAdd(primary, "A"); MetaphAdd(secondary, "A"); } current += 1; break; case 'B': /* "-mb", e.g", "dumb", already skipped over... */ MetaphAdd(primary, "P"); MetaphAdd(secondary, "P"); if (GetAt(original, current + 1) == 'B') current += 2; else current += 1; break; case '\xc7': /* C with cedilla */ MetaphAdd(primary, "S"); MetaphAdd(secondary, "S"); current += 1; break; case 'C': /* various germanic */ if ((current > 1) && !IsVowel(original, current - 2) && StringAt(original, (current - 1), 3, "ACH", "") && ((GetAt(original, current + 2) != 'I') && ((GetAt(original, current + 2) != 'E') || StringAt(original, (current - 2), 6, "BACHER", "MACHER", "")))) { MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); current += 2; break; } /* special case 'caesar' */ if ((current == 0) && StringAt(original, current, 6, "CAESAR", "")) { MetaphAdd(primary, "S"); MetaphAdd(secondary, "S"); current += 2; break; } /* italian 'chianti' */ if (StringAt(original, current, 4, "CHIA", "")) { MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); current += 2; break; } if (StringAt(original, current, 2, "CH", "")) { /* find 'michael' */ if ((current > 0) && StringAt(original, current, 4, "CHAE", "")) { MetaphAdd(primary, "K"); MetaphAdd(secondary, "X"); current += 2; break; } /* greek roots e.g. 'chemistry', 'chorus' */ if ((current == 0) && (StringAt(original, (current + 1), 5, "HARAC", "HARIS", "") || StringAt(original, (current + 1), 3, "HOR", "HYM", "HIA", "HEM", "")) && !StringAt(original, 0, 5, "CHORE", "")) { MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); current += 2; break; } /* germanic, greek, or otherwise 'ch' for 'kh' sound */ if ((StringAt(original, 0, 4, "VAN ", "VON ", "") || StringAt(original, 0, 3, "SCH", "")) /* 'architect but not 'arch', 'orchestra', 'orchid' */ || StringAt(original, (current - 2), 6, "ORCHES", "ARCHIT", "ORCHID", "") || StringAt(original, (current + 2), 1, "T", "S", "") || ((StringAt(original, (current - 1), 1, "A", "O", "U", "E", "") || (current == 0)) /* e.g., 'wachtler', 'wechsler', but not 'tichner' */ && StringAt(original, (current + 2), 1, "L", "R", "N", "M", "B", "H", "F", "V", "W", " ", ""))) { MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); } else { if (current > 0) { if (StringAt(original, 0, 2, "MC", "")) { /* e.g., "McHugh" */ MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); } else { MetaphAdd(primary, "X"); MetaphAdd(secondary, "K"); } } else { MetaphAdd(primary, "X"); MetaphAdd(secondary, "X"); } } current += 2; break; } /* e.g, 'czerny' */ if (StringAt(original, current, 2, "CZ", "") && !StringAt(original, (current - 2), 4, "WICZ", "")) { MetaphAdd(primary, "S"); MetaphAdd(secondary, "X"); current += 2; break; } /* e.g., 'focaccia' */ if (StringAt(original, (current + 1), 3, "CIA", "")) { MetaphAdd(primary, "X"); MetaphAdd(secondary, "X"); current += 3; break; } /* double 'C', but not if e.g. 'McClellan' */ if (StringAt(original, current, 2, "CC", "") && !((current == 1) && (GetAt(original, 0) == 'M'))) { /* 'bellocchio' but not 'bacchus' */ if (StringAt(original, (current + 2), 1, "I", "E", "H", "") && !StringAt(original, (current + 2), 2, "HU", "")) { /* 'accident', 'accede' 'succeed' */ if (((current == 1) && (GetAt(original, current - 1) == 'A')) || StringAt(original, (current - 1), 5, "UCCEE", "UCCES", "")) { MetaphAdd(primary, "KS"); MetaphAdd(secondary, "KS"); /* 'bacci', 'bertucci', other italian */ } else { MetaphAdd(primary, "X"); MetaphAdd(secondary, "X"); } current += 3; break; } else { /* Pierce's rule */ MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); current += 2; break; } } if (StringAt(original, current, 2, "CK", "CG", "CQ", "")) { MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); current += 2; break; } if (StringAt(original, current, 2, "CI", "CE", "CY", "")) { /* italian vs. english */ if (StringAt(original, current, 3, "CIO", "CIE", "CIA", "")) { MetaphAdd(primary, "S"); MetaphAdd(secondary, "X"); } else { MetaphAdd(primary, "S"); MetaphAdd(secondary, "S"); } current += 2; break; } /* else */ MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); /* name sent in 'mac caffrey', 'mac gregor */ if (StringAt(original, (current + 1), 2, " C", " Q", " G", "")) current += 3; else if (StringAt(original, (current + 1), 1, "C", "K", "Q", "") && !StringAt(original, (current + 1), 2, "CE", "CI", "")) current += 2; else current += 1; break; case 'D': if (StringAt(original, current, 2, "DG", "")) { if (StringAt(original, (current + 2), 1, "I", "E", "Y", "")) { /* e.g. 'edge' */ MetaphAdd(primary, "J"); MetaphAdd(secondary, "J"); current += 3; break; } else { /* e.g. 'edgar' */ MetaphAdd(primary, "TK"); MetaphAdd(secondary, "TK"); current += 2; break; } } if (StringAt(original, current, 2, "DT", "DD", "")) { MetaphAdd(primary, "T"); MetaphAdd(secondary, "T"); current += 2; break; } /* else */ MetaphAdd(primary, "T"); MetaphAdd(secondary, "T"); current += 1; break; case 'F': if (GetAt(original, current + 1) == 'F') current += 2; else current += 1; MetaphAdd(primary, "F"); MetaphAdd(secondary, "F"); break; case 'G': if (GetAt(original, current + 1) == 'H') { if ((current > 0) && !IsVowel(original, current - 1)) { MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); current += 2; break; } if (current < 3) { /* 'ghislane', ghiradelli */ if (current == 0) { if (GetAt(original, current + 2) == 'I') { MetaphAdd(primary, "J"); MetaphAdd(secondary, "J"); } else { MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); } current += 2; break; } } /* Parker's rule (with some further refinements) - e.g., 'hugh' */ if (((current > 1) && StringAt(original, (current - 2), 1, "B", "H", "D", "")) /* e.g., 'bough' */ || ((current > 2) && StringAt(original, (current - 3), 1, "B", "H", "D", "")) /* e.g., 'broughton' */ || ((current > 3) && StringAt(original, (current - 4), 1, "B", "H", ""))) { current += 2; break; } else { /* e.g., 'laugh', 'McLaughlin', 'cough', 'gough', 'rough', 'tough' */ if ((current > 2) && (GetAt(original, current - 1) == 'U') && StringAt(original, (current - 3), 1, "C", "G", "L", "R", "T", "")) { MetaphAdd(primary, "F"); MetaphAdd(secondary, "F"); } else if ((current > 0) && GetAt(original, current - 1) != 'I') { MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); } current += 2; break; } } if (GetAt(original, current + 1) == 'N') { if ((current == 1) && IsVowel(original, 0) && !SlavoGermanic(original)) { MetaphAdd(primary, "KN"); MetaphAdd(secondary, "N"); } else /* not e.g. 'cagney' */ if (!StringAt(original, (current + 2), 2, "EY", "") && (GetAt(original, current + 1) != 'Y') && !SlavoGermanic(original)) { MetaphAdd(primary, "N"); MetaphAdd(secondary, "KN"); } else { MetaphAdd(primary, "KN"); MetaphAdd(secondary, "KN"); } current += 2; break; } /* 'tagliaro' */ if (StringAt(original, (current + 1), 2, "LI", "") && !SlavoGermanic(original)) { MetaphAdd(primary, "KL"); MetaphAdd(secondary, "L"); current += 2; break; } /* -ges-,-gep-,-gel-, -gie- at beginning */ if ((current == 0) && ((GetAt(original, current + 1) == 'Y') || StringAt(original, (current + 1), 2, "ES", "EP", "EB", "EL", "EY", "IB", "IL", "IN", "IE", "EI", "ER", ""))) { MetaphAdd(primary, "K"); MetaphAdd(secondary, "J"); current += 2; break; } /* -ger-, -gy- */ if ((StringAt(original, (current + 1), 2, "ER", "") || (GetAt(original, current + 1) == 'Y')) && !StringAt(original, 0, 6, "DANGER", "RANGER", "MANGER", "") && !StringAt(original, (current - 1), 1, "E", "I", "") && !StringAt(original, (current - 1), 3, "RGY", "OGY", "")) { MetaphAdd(primary, "K"); MetaphAdd(secondary, "J"); current += 2; break; } /* italian e.g, 'biaggi' */ if (StringAt(original, (current + 1), 1, "E", "I", "Y", "") || StringAt(original, (current - 1), 4, "AGGI", "OGGI", "")) { /* obvious germanic */ if ((StringAt(original, 0, 4, "VAN ", "VON ", "") || StringAt(original, 0, 3, "SCH", "")) || StringAt(original, (current + 1), 2, "ET", "")) { MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); } else { /* always soft if french ending */ if (StringAt(original, (current + 1), 4, "IER ", "")) { MetaphAdd(primary, "J"); MetaphAdd(secondary, "J"); } else { MetaphAdd(primary, "J"); MetaphAdd(secondary, "K"); } } current += 2; break; } if (GetAt(original, current + 1) == 'G') current += 2; else current += 1; MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); break; case 'H': /* only keep if first & before vowel or btw. 2 vowels */ if (((current == 0) || IsVowel(original, current - 1)) && IsVowel(original, current + 1)) { MetaphAdd(primary, "H"); MetaphAdd(secondary, "H"); current += 2; } else /* also takes care of 'HH' */ current += 1; break; case 'J': /* obvious spanish, 'jose', 'san jacinto' */ if (StringAt(original, current, 4, "JOSE", "") || StringAt(original, 0, 4, "SAN ", "")) { if (((current == 0) && (GetAt(original, current + 4) == ' ')) || StringAt(original, 0, 4, "SAN ", "")) { MetaphAdd(primary, "H"); MetaphAdd(secondary, "H"); } else { MetaphAdd(primary, "J"); MetaphAdd(secondary, "H"); } current += 1; break; } if ((current == 0) && !StringAt(original, current, 4, "JOSE", "")) { MetaphAdd(primary, "J"); /* Yankelovich/Jankelowicz */ MetaphAdd(secondary, "A"); } else { /* spanish pron. of e.g. 'bajador' */ if (IsVowel(original, current - 1) && !SlavoGermanic(original) && ((GetAt(original, current + 1) == 'A') || (GetAt(original, current + 1) == 'O'))) { MetaphAdd(primary, "J"); MetaphAdd(secondary, "H"); } else { if (current == last) { MetaphAdd(primary, "J"); MetaphAdd(secondary, ""); } else { if (!StringAt(original, (current + 1), 1, "L", "T", "K", "S", "N", "M", "B", "Z", "") && !StringAt(original, (current - 1), 1, "S", "K", "L", "")) { MetaphAdd(primary, "J"); MetaphAdd(secondary, "J"); } } } } if (GetAt(original, current + 1) == 'J') /* it could happen! */ current += 2; else current += 1; break; case 'K': if (GetAt(original, current + 1) == 'K') current += 2; else current += 1; MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); break; case 'L': if (GetAt(original, current + 1) == 'L') { /* spanish e.g. 'cabrillo', 'gallegos' */ if (((current == (length - 3)) && StringAt(original, (current - 1), 4, "ILLO", "ILLA", "ALLE", "")) || ((StringAt(original, (last - 1), 2, "AS", "OS", "") || StringAt(original, last, 1, "A", "O", "")) && StringAt(original, (current - 1), 4, "ALLE", ""))) { MetaphAdd(primary, "L"); MetaphAdd(secondary, ""); current += 2; break; } current += 2; } else current += 1; MetaphAdd(primary, "L"); MetaphAdd(secondary, "L"); break; case 'M': if ((StringAt(original, (current - 1), 3, "UMB", "") && (((current + 1) == last) || StringAt(original, (current + 2), 2, "ER", ""))) /* 'dumb','thumb' */ || (GetAt(original, current + 1) == 'M')) current += 2; else current += 1; MetaphAdd(primary, "M"); MetaphAdd(secondary, "M"); break; case 'N': if (GetAt(original, current + 1) == 'N') current += 2; else current += 1; MetaphAdd(primary, "N"); MetaphAdd(secondary, "N"); break; case '\xd1': /* N with tilde */ current += 1; MetaphAdd(primary, "N"); MetaphAdd(secondary, "N"); break; case 'P': if (GetAt(original, current + 1) == 'H') { MetaphAdd(primary, "F"); MetaphAdd(secondary, "F"); current += 2; break; } /* also account for "campbell", "raspberry" */ if (StringAt(original, (current + 1), 1, "P", "B", "")) current += 2; else current += 1; MetaphAdd(primary, "P"); MetaphAdd(secondary, "P"); break; case 'Q': if (GetAt(original, current + 1) == 'Q') current += 2; else current += 1; MetaphAdd(primary, "K"); MetaphAdd(secondary, "K"); break; case 'R': /* french e.g. 'rogier', but exclude 'hochmeier' */ if ((current == last) && !SlavoGermanic(original) && StringAt(original, (current - 2), 2, "IE", "") && !StringAt(original, (current - 4), 2, "ME", "MA", "")) { MetaphAdd(primary, ""); MetaphAdd(secondary, "R"); } else { MetaphAdd(primary, "R"); MetaphAdd(secondary, "R"); } if (GetAt(original, current + 1) == 'R') current += 2; else current += 1; break; case 'S': /* special cases 'island', 'isle', 'carlisle', 'carlysle' */ if (StringAt(original, (current - 1), 3, "ISL", "YSL", "")) { current += 1; break; } /* special case 'sugar-' */ if ((current == 0) && StringAt(original, current, 5, "SUGAR", "")) { MetaphAdd(primary, "X"); MetaphAdd(secondary, "S"); current += 1; break; } if (StringAt(original, current, 2, "SH", "")) { /* germanic */ if (StringAt(original, (current + 1), 4, "HEIM", "HOEK", "HOLM", "HOLZ", "")) { MetaphAdd(primary, "S"); MetaphAdd(secondary, "S"); } else { MetaphAdd(primary, "X"); MetaphAdd(secondary, "X"); } current += 2; break; } /* italian & armenian */ if (StringAt(original, current, 3, "SIO", "SIA", "") || StringAt(original, current, 4, "SIAN", "")) { if (!SlavoGermanic(original)) { MetaphAdd(primary, "S"); MetaphAdd(secondary, "X"); } else { MetaphAdd(primary, "S"); MetaphAdd(secondary, "S"); } current += 3; break; } /* german & anglicisations, e.g. 'smith' match 'schmidt', 'snider' match 'schneider' also, -sz- in slavic language altho in hungarian it is pronounced 's' */ if (((current == 0) && StringAt(original, (current + 1), 1, "M", "N", "L", "W", "")) || StringAt(original, (current + 1), 1, "Z", "")) { MetaphAdd(primary, "S"); MetaphAdd(secondary, "X"); if (StringAt(original, (current + 1), 1, "Z", "")) current += 2; else current += 1; break; } if (StringAt(original, current, 2, "SC", "")) { /* Schlesinger's rule */ if (GetAt(original, current + 2) == 'H') { /* dutch origin, e.g. 'school', 'schooner' */ if (StringAt(original, (current + 3), 2, "OO", "ER", "EN", "UY", "ED", "EM", "")) { /* 'schermerhorn', 'schenker' */ if (StringAt(original, (current + 3), 2, "ER", "EN", "")) { MetaphAdd(primary, "X"); MetaphAdd(secondary, "SK"); } else { MetaphAdd(primary, "SK"); MetaphAdd(secondary, "SK"); } current += 3; break; } else { if ((current == 0) && !IsVowel(original, 3) && (GetAt(original, 3) != 'W')) { MetaphAdd(primary, "X"); MetaphAdd(secondary, "S"); } else { MetaphAdd(primary, "X"); MetaphAdd(secondary, "X"); } current += 3; break; } } if (StringAt(original, (current + 2), 1, "I", "E", "Y", "")) { MetaphAdd(primary, "S"); MetaphAdd(secondary, "S"); current += 3; break; } /* else */ MetaphAdd(primary, "SK"); MetaphAdd(secondary, "SK"); current += 3; break; } /* french e.g. 'resnais', 'artois' */ if ((current == last) && StringAt(original, (current - 2), 2, "AI", "OI", "")) { MetaphAdd(primary, ""); MetaphAdd(secondary, "S"); } else { MetaphAdd(primary, "S"); MetaphAdd(secondary, "S"); } if (StringAt(original, (current + 1), 1, "S", "Z", "")) current += 2; else current += 1; break; case 'T': if (StringAt(original, current, 4, "TION", "")) { MetaphAdd(primary, "X"); MetaphAdd(secondary, "X"); current += 3; break; } if (StringAt(original, current, 3, "TIA", "TCH", "")) { MetaphAdd(primary, "X"); MetaphAdd(secondary, "X"); current += 3; break; } if (StringAt(original, current, 2, "TH", "") || StringAt(original, current, 3, "TTH", "")) { /* special case 'thomas', 'thames' or germanic */ if (StringAt(original, (current + 2), 2, "OM", "AM", "") || StringAt(original, 0, 4, "VAN ", "VON ", "") || StringAt(original, 0, 3, "SCH", "")) { MetaphAdd(primary, "T"); MetaphAdd(secondary, "T"); } else { MetaphAdd(primary, "0"); /* yes, zero */ MetaphAdd(secondary, "T"); } current += 2; break; } if (StringAt(original, (current + 1), 1, "T", "D", "")) current += 2; else current += 1; MetaphAdd(primary, "T"); MetaphAdd(secondary, "T"); break; case 'V': if (GetAt(original, current + 1) == 'V') current += 2; else current += 1; MetaphAdd(primary, "F"); MetaphAdd(secondary, "F"); break; case 'W': /* can also be in middle of word */ if (StringAt(original, current, 2, "WR", "")) { MetaphAdd(primary, "R"); MetaphAdd(secondary, "R"); current += 2; break; } if ((current == 0) && (IsVowel(original, current + 1) || StringAt(original, current, 2, "WH", ""))) { /* Wasserman should match Vasserman */ if (IsVowel(original, current + 1)) { MetaphAdd(primary, "A"); MetaphAdd(secondary, "F"); } else { /* need Uomo to match Womo */ MetaphAdd(primary, "A"); MetaphAdd(secondary, "A"); } } /* Arnow should match Arnoff */ if (((current == last) && IsVowel(original, current - 1)) || StringAt(original, (current - 1), 5, "EWSKI", "EWSKY", "OWSKI", "OWSKY", "") || StringAt(original, 0, 3, "SCH", "")) { MetaphAdd(primary, ""); MetaphAdd(secondary, "F"); current += 1; break; } /* polish e.g. 'filipowicz' */ if (StringAt(original, current, 4, "WICZ", "WITZ", "")) { MetaphAdd(primary, "TS"); MetaphAdd(secondary, "FX"); current += 4; break; } /* else skip it */ current += 1; break; case 'X': /* french e.g. breaux */ if (!((current == last) && (StringAt(original, (current - 3), 3, "IAU", "EAU", "") || StringAt(original, (current - 2), 2, "AU", "OU", "")))) { MetaphAdd(primary, "KS"); MetaphAdd(secondary, "KS"); } if (StringAt(original, (current + 1), 1, "C", "X", "")) current += 2; else current += 1; break; case 'Z': /* chinese pinyin e.g. 'zhao' */ if (GetAt(original, current + 1) == 'H') { MetaphAdd(primary, "J"); MetaphAdd(secondary, "J"); current += 2; break; } else if (StringAt(original, (current + 1), 2, "ZO", "ZI", "ZA", "") || (SlavoGermanic(original) && ((current > 0) && GetAt(original, current - 1) != 'T'))) { MetaphAdd(primary, "S"); MetaphAdd(secondary, "TS"); } else { MetaphAdd(primary, "S"); MetaphAdd(secondary, "S"); } if (GetAt(original, current + 1) == 'Z') current += 2; else current += 1; break; default: current += 1; } /* printf("PRIMARY: %s\n", primary->str); printf("SECONDARY: %s\n", secondary->str); */ } if (primary->length > max_len) SetAt(primary, max_len, '\0'); if (secondary->length > max_len) SetAt(secondary, max_len, '\0'); *codes = primary->str; *++codes = secondary->str; DestroyMetaString(original); DestroyMetaString(primary); DestroyMetaString(secondary); }