module Text.PhoneticCode.Phonix (phonix, phonixCodes, phonixRules)
where
import Data.List
import Data.Char
import Data.Array.IArray
import Data.Maybe
import qualified Data.Set as Set
phonix :: String -> String
phonix = filter (/= '?')
. drop_trailing_sound
. encode
. apply_rules
. map toUpper
. dropWhile (not . isAlpha)
where
drop_trailing_sound = init . concatMap question_squash . group where
question_squash ('?' : _) = ['?']
question_squash l = l
apply_rules w = foldl' (flip $ uncurry gSubst) w phonixRules
filter_multiples = map head . group
encode "" = "0"
encode as@(a : _) = (devowel a :)
. drop 1
. filter_multiples
. map unsound $ as
unsound c | c >= 'A' && c <= 'Z' = phonixCodes ! c
unsound _ = '?'
devowel c | isVowely c = 'v'
devowel c = c
isVowely :: Char -> Bool
isVowely c = c `Set.member` (Set.fromList "AEIOUY")
isVowel :: Char -> Bool
isVowel c = c `Set.member` (Set.fromList "AEIOU")
globMatches :: Char -> Char -> Bool
globMatches 'v' = isVowel
globMatches 'c' = not . isVowel
globMatches '.' = const True
globMatches _ = const False
isGlob :: Char -> Bool
isGlob c = c `Set.member` (Set.fromList "vc.")
matches :: Char -> Char -> Bool
s `matches` t = s == t || s `globMatches` t
subst :: String -> String -> String -> String
subst "" _ _ = error "subst: bad pattern"
subst _ _ "" = ""
subst s d t
| length t < length s = t
| and (zipWith matches s t) = deglob ++ subst s d t'
| otherwise = skip
where
deglob = dn . d1 $ d where
d1 d0
| isGlob . head $ s = head t : d0
| otherwise = d0
dn d0
| isGlob . last $ s = d0 ++ [t !! (length s 1)]
| otherwise = d0
t' = drop (length s) t
skip = head t : subst s d (tail t)
gSubst :: String -> String -> String -> String
gSubst "" _ _ = error "gSubst: bad pattern"
gSubst s d t
| head s == '^' = let (t0, t1) = splitAt (length s 1) t in
subst (tail s) d t0 ++ t1
| last s == '$' = let (t0, t1) = splitAt (length t length s + 1) t in
t0 ++ subst (init s) d t1
| otherwise = subst s d t
phonixCodes :: Array Char Char
phonixCodes = accumArray updater '?' ('A', 'Z') codes where
updater '?' c = c
updater _ c = error ("updater called twice on " ++ [c])
groups = [('1', "BP"),
('2', "CGJKQ"),
('3', "DT"),
('4', "L"),
('5', "MN"),
('6', "R"),
('7', "FV"),
('8', "SXZ")]
codes = concatMap make_codes groups
make_codes (i, s) = zip s (repeat i)
phonixRules :: [(String, String)]
phonixRules = [
("DG","G"),
("CO","KO"),
("CA","KA"),
("CU","KU"),
("CY","SI"),
("CI","SI"),
("CE","SE"),
("^CLv","KL"),
("CK","K"),
("GC$","K"),
("JC$","K"),
("^CRv","KR"),
("^CHRv","KR"),
("^WR","R"),
("NC","NK"),
("CT","KT"),
("PH","F"),
("AA","AR"),
("SCH","SH"),
("BTL","TL"),
("GHT","T"),
("AUGH","ARF"),
(".LJv","LD"),
("LOUGH","LOW"),
("^Q","KW"),
("^KN","N"),
("GN$","N"),
("GHN","N"),
("GNE$","N"),
("GHNE","NE"),
("GNES$","NS"),
("^GN","N"),
(".GNc","N"),
("^PS","S"),
("^PT","T"),
("^CZ","C"),
("vWZ.","Z"),
(".CZ.","CH"),
("LZ","LSH"),
("RZ","RSH"),
(".Zv","S"),
("ZZ","TS"),
("cZ.","TS"),
("HROUGH","REW"),
("OUGH","OF"),
("vQv","KW"),
("vJv","Y"),
("^YJv","Y"),
("^GH","G"),
("vGH$","E"),
("^CY","S"),
("NX","NKS"),
("^PF","F"),
("DT$","T"),
("TL$","TIL"),
("DL$","DIL"),
("YTH","ITH"),
("^TJv","CH"),
("^TSJv","CH"),
("^TSv","T"),
("TCH","CH"),
("^vWSK","VSIKE"),
("^PNv","N"),
("^MNv","N"),
("vSTL","SL"),
("TNT$","ENT"),
("EAUX$","OH"),
("EXCI","ECS"),
("X","ECS"),
("NED$","ND"),
("JR","DR"),
("EE$","EA"),
("ZS","S"),
("vRc","AH"),
("vHRc","AH"),
("vHR$","AH"),
("RE$","AR"),
("vR$","AH"),
("LLE","LE"),
("cLE$","ILE"),
("cLES$","ILES"),
("E$",""),
("ES$","S"),
("vSS","AS"),
("vMB$","M"),
("MPTS","MPS"),
("MPS","MS"),
("MPT","MT") ]