{-# LANGUAGE TupleSections #-}
module Data.Kanji
(
Kanji
, kanji, _kanji
, allKanji
, isKanji, isHiragana, isKatakana
, CharCat(..)
, category
, Level(..)
, level
, percentSpread
, levelDist
, uniques
, densities
, elementaryDen
, middleDen
, highDen
) where
import Control.Arrow hiding (second)
import Data.Foldable (fold)
import Data.Kanji.Levels
import Data.Kanji.Types
import Data.List (group, sort)
import qualified Data.Map.Strict as M
import Data.Semigroup ((<>))
import qualified Data.Set as S
import qualified Data.Text as T
allKanji :: M.Map Level (S.Set Kanji)
allKanji = M.fromList . zip [ Ten .. ] $ map (S.map Kanji) ks
where ks = [ tenth, ninth, eighth, seventh, sixth
, fifth, fourth, third, preSecond, second ]
allKanji' :: M.Map Kanji Level
allKanji' = M.fromList . S.toList . fold $ M.mapWithKey (\k v -> S.map (,k) v) allKanji
level :: Kanji -> Level
level k = maybe Unknown id $ M.lookup k allKanji'
{-# INLINE level #-}
densities :: T.Text -> M.Map CharCat Float
densities t = M.fromList . map (head &&& f) . group . sort . map category $ T.unpack t
where f xs = fromIntegral (length xs) / fromIntegral (T.length t)
elementaryDen :: M.Map Level Float -> Float
elementaryDen m = M.foldl' (+) 0 . M.restrictKeys m $ S.fromList [ Five, Six .. ]
middleDen :: M.Map Level Float -> Float
middleDen m = M.foldl' (+) 0 . M.restrictKeys m $ S.fromList [ Three, Four .. ]
highDen :: M.Map Level Float -> Float
highDen m = M.foldl' (+) 0 . M.restrictKeys m $ S.fromList [ Two, PreTwo .. ]
levelDist :: [Kanji] -> M.Map Level Float
levelDist ks = M.fromList . map percentPair . group . sort $ map level ks
where percentPair qns = (head qns, fromIntegral (length qns) / totalKs)
totalKs = fromIntegral $ length ks
percentSpread :: [Kanji] -> M.Map Kanji Float
percentSpread ks = getPercent <$> kQuants
where getPercent q = fromIntegral q / totalKanji
kQuants = kanjiQuantities ks
totalKanji = fromIntegral $ M.foldl' (+) 0 kQuants
kanjiQuantities :: [Kanji] -> M.Map Kanji Int
kanjiQuantities = M.fromList . map (head &&& length) . group . sort
uniques :: [Kanji] -> M.Map Level (S.Set Kanji)
uniques = S.foldl' h M.empty . S.fromList
where h a k = (\l -> M.insertWith (<>) l (S.singleton k) a) $ level k