module Text.PhoneticCode.Phonix (
phonix, phonixCodes,
phonixRules, phonixRulesPatSubsts,
applyPhonixRules )
where
import Data.List
import Data.Char
import Data.Array.IArray
import qualified Data.Set as Set
import Text.Regex
phonix :: String -> String
phonix = filter (/= '?')
. drop_trailing_sound
. encode
. applyPhonixRules
where
drop_trailing_sound = init . concatMap question_squash . group where
question_squash ('?' : _) = ['?']
question_squash l = l
filter_multiples = map head . group
encode "" = "0"
encode as@(a : _) = (devowel a :)
. drop 1
. filter_multiples
. map unsound $ as
unsound c | c >= 'A' && c <= 'Z' = phonixCodes ! c
unsound _ = '?'
devowel c | isVowely c = 'v'
devowel c = c
isVowely :: Char -> Bool
isVowely c = c `Set.member` (Set.fromList "AEIOUY")
phonixCodes :: Array Char Char
phonixCodes = accumArray updater '?' ('A', 'Z') codes where
updater '?' c = c
updater _ c = error ("updater called twice on " ++ [c])
groups = [('1', "BP"),
('2', "CGJKQ"),
('3', "DT"),
('4', "L"),
('5', "MN"),
('6', "R"),
('7', "FV"),
('8', "SXZ")]
codes = concatMap make_codes groups
make_codes (i, s) = zip s (repeat i)
phonixRules :: [(String, String)]
phonixRules = [
("DG","G"),
("CO","KO"),
("CA","KA"),
("CU","KU"),
("CY","SI"),
("CI","SI"),
("CE","SE"),
("^CLv","KL"),
("CK","K"),
("GC$","K"),
("JC$","K"),
("^CRv","KR"),
("^CHRv","KR"),
("^WR","R"),
("NC","NK"),
("CT","KT"),
("PH","F"),
("AA","AR"),
("SCH","SH"),
("BTL","TL"),
("GHT","T"),
("AUGH","ARF"),
(".LJv","LD"),
("LOUGH","LOW"),
("^Q","KW"),
("^KN","N"),
("GN$","N"),
("GHN","N"),
("GNE$","N"),
("GHNE","NE"),
("GNES$","NS"),
("^GN","N"),
(".GNc","N"),
("^PS","S"),
("^PT","T"),
("^CZ","C"),
("vWZ.","Z"),
(".CZ.","CH"),
("LZ","LSH"),
("RZ","RSH"),
(".Zv","S"),
("ZZ","TS"),
("cZ.","TS"),
("HROUGH","REW"),
("OUGH","OF"),
("vQv","KW"),
("vJv","Y"),
("^YJv","Y"),
("^GH","G"),
("vGH$","E"),
("^CY","S"),
("NX","NKS"),
("^PF","F"),
("DT$","T"),
("TL$","TIL"),
("DL$","DIL"),
("YTH","ITH"),
("^TJv","CH"),
("^TSJv","CH"),
("^TSv","T"),
("TCH","CH"),
("^vWSK","VSIKE"),
("^PNv","N"),
("^MNv","N"),
("vSTL","SL"),
("TNT$","ENT"),
("EAUX$","OH"),
("EXCI","ECS"),
("X","ECS"),
("NED$","ND"),
("JR","DR"),
("EE$","EA"),
("ZS","S"),
("vRc","AH"),
("vHRc","AH"),
("vHR$","AH"),
("RE$","AR"),
("vR$","AH"),
("LLE","LE"),
("cLE$","ILE"),
("cLES$","ILES"),
("E$",""),
("ES$","S"),
("vSS","AS"),
("vMB$","M"),
("MPTS","MPS"),
("MPS","MS"),
("MPT","MT") ]
applyPhonixRules :: String -> String
applyPhonixRules =
flip (foldl' res) phonixRulesREs .
map toUpper .
filter isAlpha
where
res target (pat, subst) = subRegex pat target subst
mre :: String -> Regex
mre s = mkRegexWithOpts s False True
phonixRulesREs :: [(Regex, String)]
phonixRulesREs =
map (\(pat, subst) -> (mre pat, subst)) phonixRulesPatSubsts
phonixRulesPatSubsts :: [(String, String)]
phonixRulesPatSubsts =
map reFormat phonixRules
where
reFormat (src, dst) =
let vowelSubst = mre "v"
consSubst = mre "c"
dotSubst = mre "\\." in
let src' = flip (subRegex dotSubst) "([A-Z])" $
flip (subRegex consSubst) "([BCDFGHJKLMNPQRSTVWXYZ])" $
flip (subRegex vowelSubst) "([AEIOU])" $
src in
let front =
case matchRegex (mre "^\\^?[^^A-Z]") src' of
Just _ -> "\\1"
Nothing -> ""
in
let back =
case matchRegex (mre "[^A-Z$]\\$?$") src' of
Just _ ->
case front of
"" -> "\\1"
_ -> "\\2"
Nothing -> ""
in
(src', front ++ dst ++ back)