module CJK.Data.Unihan.DictionaryLikeData (
cangjie,
CheungBauer(..), cheungBauer,
cihai,
Fenn(..), fenn,
fourCornerCode,
frequency,
gradeLevel,
hdzRadBreak,
hkGlyph,
phonetic,
totalStrokes
) where
import qualified CJK.Data.Jyutping as Jyutping
import CJK.Data.Internal
import CJK.Data.Types
import CJK.Utilities
import Control.Applicative
import qualified Data.Text as Text
import qualified Data.Text.Lazy as TextL
import Data.Attoparsec.Text
import Data.Char
import Data.Maybe
import qualified Data.Map as M
import Data.Monoid
import Data.List
import System.IO.Unsafe
data CheungBauer = CB {
cbRadicalStrokeCount :: RadicalStrokeCount KangXiRadical,
cbCangjie :: Maybe CangjieInputCode,
cbReading :: [Jyutping.Phone]
} deriving (Show)
data Fenn = Fenn {
fennSoothill :: Maybe Int,
fennFrequency :: Maybe Int
} deriving (Show)
cangjie :: Char -> Maybe CangjieInputCode
cangjie c = M.lookup c (kCangjie dictionaryLikes)
cheungBauer :: Char -> [CheungBauer]
cheungBauer c = M.findWithDefault [] c (kCheungBauer dictionaryLikes)
cihai :: Char -> [Text.Text]
cihai c = M.findWithDefault [] c (kCihaiT dictionaryLikes)
fenn :: Char -> [Fenn]
fenn c = M.findWithDefault [] c (kFenn dictionaryLikes)
fourCornerCode :: Char -> [Text.Text]
fourCornerCode c = M.findWithDefault [] c (kFourCornerCode dictionaryLikes)
frequency :: Char -> Maybe Int
frequency c = M.lookup c (kFrequency dictionaryLikes)
gradeLevel :: Char -> Maybe Int
gradeLevel c = M.lookup c (kGradeLevel dictionaryLikes)
hdzRadBreak :: Char -> Maybe (Char, HDZEntry)
hdzRadBreak c = M.lookup c (kHDZRadBreak dictionaryLikes)
hkGlyph :: Char -> [Int]
hkGlyph c = M.findWithDefault [] c (kHKGlyph dictionaryLikes)
phonetic :: Char -> [Text.Text]
phonetic c = M.findWithDefault [] c (kPhonetic dictionaryLikes)
totalStrokes :: Char -> Maybe (StrokeCount, StrokeCount)
totalStrokes c = M.lookup c (kTotalStrokes dictionaryLikes)
data DictionaryLikesMap = DMS {
kCangjie :: M.Map Char CangjieInputCode,
kCheungBauer :: M.Map Char [CheungBauer],
kCihaiT :: M.Map Char [Text.Text],
kFenn :: M.Map Char [Fenn],
kFourCornerCode :: M.Map Char [Text.Text],
kFrequency :: M.Map Char Int,
kGradeLevel :: M.Map Char Int,
kHDZRadBreak :: M.Map Char (Char, HDZEntry),
kHKGlyph :: M.Map Char [Int],
kPhonetic :: M.Map Char [Text.Text],
kTotalStrokes :: M.Map Char (StrokeCount, StrokeCount)
} deriving (Show)
emptyDictionaryLikesMap :: DictionaryLikesMap
emptyDictionaryLikesMap = DMS M.empty M.empty M.empty M.empty M.empty M.empty M.empty M.empty M.empty M.empty M.empty
unionDictionaryLikesMap :: DictionaryLikesMap -> DictionaryLikesMap -> DictionaryLikesMap
unionDictionaryLikesMap (DMS a1 a2 a3 a4 a5 a6 a7 a8 a9 a10 a11) (DMS b1 b2 b3 b4 b5 b6 b7 b8 b9 b10 b11)
= DMS (plus a1 b1) (plus a2 b2) (plus a3 b3) (plus a4 b4) (plus a5 b5) (plus a6 b6)
(plus a7 b7) (plus a8 b8) (plus a9 b9) (plus a10 b10) (plus a11 b11)
where plus = M.unionWith (error "unionReadingsMap: impossible")
contents :: TextL.Text
contents = unsafePerformIO (readUTF8DataFile "data/Unihan/Unihan_DictionaryLikeData.txt")
dictionaryLikes :: DictionaryLikesMap
dictionaryLikes = parseLazy fileP contents
fileP :: Parser DictionaryLikesMap
fileP = fmap (foldl' unionDictionaryLikesMap emptyDictionaryLikesMap) (lineP `manyTill` endOfInput)
lineP :: Parser DictionaryLikesMap
lineP = do { c <- charP <* skipSpace; dataP <- dictionaryLikeP c <* skipSpace; dataP <* skipTrueSpace <* lineTerminator }
<|> char '#' *> manyTill anyChar lineTerminator *> pure emptyDictionaryLikesMap
<|> manyTill skipTrueSpace lineTerminator *> pure emptyDictionaryLikesMap
<?> "line"
dictionaryLikeP :: Char -> Parser (Parser DictionaryLikesMap)
dictionaryLikeP c = string "kCangjie" *> pure (liftA (\x -> emptyDictionaryLikesMap { kCangjie = mk x }) cangjieP)
<|> string "kCheungBauer" *> pure (liftA (\x -> emptyDictionaryLikesMap { kCheungBauer = mk x }) (cheungBauerP `sepBy1` skipTrueSpace))
<|> string "kCihaiT" *> pure (liftA (\x -> emptyDictionaryLikesMap { kCihaiT = mk x }) (takeWhile1 (\c -> isDigit c || c == '.') `sepBy1` skipTrueSpace))
<|> string "kFenn" *> pure (liftA (\x -> emptyDictionaryLikesMap { kFenn = mk x }) (fennP `sepBy1` skipTrueSpace))
<|> string "kFourCornerCode" *> pure (liftA (\x -> emptyDictionaryLikesMap { kFourCornerCode = mk x }) (takeWhile1 (\c -> isDigit c || c == '.') `sepBy1` skipTrueSpace))
<|> string "kFrequency" *> pure (liftA (\x -> emptyDictionaryLikesMap { kFrequency = mk x }) decimal)
<|> string "kGradeLevel" *> pure (liftA (\x -> emptyDictionaryLikesMap { kGradeLevel = mk x }) decimal)
<|> string "kHDZRadBreak" *> pure (liftA (\x -> emptyDictionaryLikesMap { kHDZRadBreak = mk x }) hdzRadBreakP)
<|> string "kHKGlyph" *> pure (liftA (\x -> emptyDictionaryLikesMap { kHKGlyph = mk x }) (decimal `sepBy1` skipTrueSpace))
<|> string "kPhonetic" *> pure (liftA (\x -> emptyDictionaryLikesMap { kPhonetic = mk x }) (takeWhile1 (\c -> isDigit c || isAsciiUpper c || c == '*') `sepBy1` skipTrueSpace))
<|> string "kTotalStrokes" *> pure (liftA (\x -> emptyDictionaryLikesMap { kTotalStrokes = mk x }) totalStrokesP)
where mk x = M.singleton c x
cangjieP :: Parser CangjieInputCode
cangjieP = takeWhile1 isAsciiUpper
cheungBauerP :: Parser CheungBauer
cheungBauerP = liftA3 CB rscP (char ';' *> optional cangjieP) (char ';' *> liftA concat (jyutpingPatternP `sepBy1` char ','))
where rscP = liftA2 RSC (fmap KangXi decimal) (char '/' *> decimal)
jyutpingPatternP :: Parser [Jyutping.Phone]
jyutpingPatternP = liftA2 (\sounds tones -> [Jyutping.Phone sound tone | sound <- sounds, tone <- tones]) soundP toneP
where
soundP = liftA2 (\opt nexts -> [here | next <- nexts, here <- [next, opt <> next]]) (char '[' *> takeWhile1 (/= ']') <* char ']') soundP
<|> liftA (\x -> [x]) (takeWhile1 (\c -> isAsciiUpper c || isAsciiLower c))
toneP = jyutpingToneP `sepBy1` char '/'
fennP :: Parser Fenn
fennP = liftA2 Fenn groupP (optional (char 'a') *> frequencyP)
where groupP = char '0' *> pure Nothing
<|> fmap Just decimal
frequencyP = char 'A' *> return (Just 1)
<|> char 'B' *> return (Just 2)
<|> char 'C' *> return (Just 3)
<|> char 'D' *> return (Just 4)
<|> char 'E' *> return (Just 5)
<|> char 'F' *> return (Just 6)
<|> char 'G' *> return (Just 7)
<|> char 'H' *> return (Just 8)
<|> char 'I' *> return (Just 9)
<|> char 'J' *> return (Just 10)
<|> char 'K' *> return (Just 11)
<|> char 'P' *> return Nothing
<|> char '*' *> return Nothing
hdzRadBreakP :: Parser (Char, HDZEntry)
hdzRadBreakP = liftA2 (,) anyChar (char '[' *> string "U+" *> takeWhile1 isHexDigit *> char ']' *> char ':' *> hdzEntryP)
totalStrokesP :: Parser (Int, Int)
totalStrokesP = liftA2 (\simp mb_trad -> (simp, fromMaybe simp mb_trad)) decimal (skipTrueSpace *> optional decimal)