module Text.Deburr (deburr) where
import Data.Char (isUpper)
deburr :: String -> String
deburr = snd . foldr f (Nothing, "")
where
f x = \case
(Nothing, _) -> (Just x, deburrLetter x Nothing)
(Just c, xs) -> (Just x, deburrLetter x (Just c) ++ xs)
deburrLetter :: Char -> Maybe Char -> String
deburrLetter n _ |
n == '\xc0' || n == '\xc1' || n == '\xc2' ||
n == '\xc3' || n == '\xc4' || n == '\xc5' ||
n == '\x0100' || n == '\x0102' || n == '\x0104' = "A"
|
n == '\xe0' || n == '\xe1' || n == '\xe2' ||
n == '\xe3' || n == '\xe4' || n == '\xe5' ||
n == '\x0101' || n == '\x0103' || n == '\x0105' = "a"
|
n == '\xc7' || n == '\x0106' || n == '\x0108' ||
n == '\x010a' || n == '\x010c' = "C"
|
n == '\xe7' || n == '\x0107' || n == '\x0109' ||
n == '\x010b' || n == '\x010d' = "c"
|
n == '\xd0' || n == '\x010e' || n == '\x0110' = "D"
|
n == '\xf0' || n == '\x010f' || n == '\x0111' = "d"
|
n == '\xc8' || n == '\xc9' || n == '\xca' ||
n == '\xcb' || n == '\x0112' || n == '\x0114' ||
n == '\x0116' || n == '\x0118' || n == '\x011a' = "E"
|
n == '\xe8' || n == '\xe9' || n == '\xea' ||
n == '\xeb' || n == '\x0113' || n == '\x0115' ||
n == '\x0117' || n == '\x0119' || n == '\x011b' = "e"
|
n == '\x011c' || n == '\x011e' || n == '\x0120' ||
n == '\x0122' = "G"
|
n == '\x011d' || n == '\x011f' || n == '\x0121' ||
n == '\x0123' = "g"
| n == '\x0124' || n == '\x0126' = "H"
| n == '\x0125' || n == '\x0127' = "h"
|
n == '\xcc' || n == '\xcd' || n == '\xce' ||
n == '\xcf' || n == '\x0128' || n == '\x012a' ||
n == '\x012c' || n == '\x012e' || n == '\x0130' = "I"
|
n == '\xec' || n == '\xed' || n == '\xee' ||
n == '\xef' || n == '\x0129' || n == '\x012b' ||
n == '\x012d' || n == '\x012f' || n == '\x0131' = "i"
| n == '\x0134' = "J"
| n == '\x0135' = "j"
| n == '\x0136' = "K"
| n == '\x0137' || n == '\x0138' = "k"
|
n == '\x0139' || n == '\x013b' || n == '\x013d' ||
n == '\x013f' || n == '\x0141' = "L"
|
n == '\x013a' || n == '\x013c' || n == '\x013e' ||
n == '\x0140' || n == '\x0142' = "l"
|
n == '\xd1' || n == '\x0143' || n == '\x0145' ||
n == '\x0147' || n == '\x014a' = "N"
|
n == '\xf1' || n == '\x0144' || n == '\x0146' ||
n == '\x0148' || n == '\x014b' = "n"
|
n == '\xd2' || n == '\xd3' || n == '\xd4' ||
n == '\xd5' || n == '\xd6' || n == '\xd8' ||
n == '\x014c' || n == '\x014e' || n == '\x0150' = "O"
|
n == '\xf2' || n == '\xf3' || n == '\xf4' ||
n == '\xf5' || n == '\xf6' || n == '\xf8' ||
n == '\x014d' || n == '\x014f' || n == '\x0151' = "o"
|
n == '\x0154' || n == '\x0156' || n == '\x0158' = "R"
|
n == '\x0155' || n == '\x0157' || n == '\x0159' = "r"
|
n == '\x015a' || n == '\x015c' || n == '\x015e' ||
n == '\x0160' = "S"
|
n == '\x015b' || n == '\x015d' || n == '\x015f' ||
n == '\x0161' || n == '\x017f' = "s"
|
n == '\x0162' || n == '\x0164' || n == '\x0166' = "T"
|
n == '\x0163' || n == '\x0165' || n == '\x0167' = "t"
|
n == '\xd9' || n == '\xda' || n == '\xdb' ||
n == '\xdc' || n == '\x0168' || n == '\x016a' ||
n == '\x016c' || n == '\x016e' || n == '\x0170' ||
n == '\x0172' = "U"
|
n == '\xf9' || n == '\xfa' || n == '\xfb' ||
n == '\xfc' || n == '\x0169' || n == '\x016b' ||
n == '\x016d' || n == '\x016f' || n == '\x0171' ||
n == '\x0173' = "u"
| n == '\x0174' = "W"
| n == '\x0175' = "w"
|
n == '\xdd' || n == '\x0176' || n == '\x0178' = "Y"
|
n == '\xfd' || n == '\xff' || n == '\x0177' = "y"
|
n == '\x0179' || n == '\x017b' || n == '\x017d' = "Z"
|
n == '\x017a' || n == '\x017c' || n == '\x017e' = "z"
deburrLetter n nxt | n == '\xc6' && maybe False isUpper nxt = "AE"
| n == '\xc6' = "Ae"
| n == '\xe6' = "ae"
| n == '\xde' && maybe False isUpper nxt = "TH"
| n == '\xde' = "Th"
| n == '\xfe' = "th"
| n == '\xdf' = "ss"
| n == '\x0132' = "IJ"
| n == '\x0133' = "ij"
| n == '\x0152' && maybe False isUpper nxt = "OE"
| n == '\x0152' = "Oe"
| n == '\x0153' = "oe"
| n == '\x0149' = "'n"
| otherwise = [n]