{-# LANGUAGE OverloadedStrings #-}
module CJK.Data.Unihan.NumericValues (
    NumericUse(..), numericValue
  ) where

import CJK.Utilities

import Control.Applicative

import qualified Data.Text as Text
import qualified Data.Text.Lazy as TextL
import Data.Attoparsec.Text

import Data.Char
import Data.Maybe
import qualified Data.Map as M
import Data.List

import System.IO.Unsafe


data NumericUse = AccountingUse -- ^ Used in the writing of accounting numerals (to prevent fraud)
                | OtherUse      -- ^ Used in certain unusual, specialized contexts
                | PrimaryUse    -- ^ Used in the writing of numbers in the standard fashion
                deriving (Eq, Show)

-- | The value of the character and the contexts in which it is used
numericValue :: Char -> Maybe (NumericUse, Integer)
numericValue c = M.lookup c numericValues


type NumericValuesMap = M.Map Char (NumericUse, Integer)

emptyNumericValuesMap :: NumericValuesMap
emptyNumericValuesMap = M.empty

unionNumericValuesMap :: NumericValuesMap -> NumericValuesMap -> NumericValuesMap
unionNumericValuesMap = M.unionWith (error "unionNumericValuesMap: impossible") -- There is at most one line for each (character, field name) combination

{-# NOINLINE contents #-}
contents :: TextL.Text
contents = unsafePerformIO (readUTF8DataFile "data/Unihan/Unihan_NumericValues.txt")

numericValues :: NumericValuesMap
numericValues = parseLazy fileP contents


fileP :: Parser NumericValuesMap
fileP = fmap (foldl' unionNumericValuesMap emptyNumericValuesMap) (lineP `manyTill` endOfInput)

lineP :: Parser NumericValuesMap
lineP = do { c <- charP <* skipSpace; dataP <- numericValueP c <* skipSpace; dataP <* skipTrueSpace <* lineTerminator }
    <|> char '#' *> manyTill anyChar lineTerminator *> pure emptyNumericValuesMap
    <|> manyTill skipTrueSpace lineTerminator *> pure emptyNumericValuesMap
    <?> "line"

numericValueP :: Char -> Parser (Parser NumericValuesMap)
numericValueP c = string "kAccountingNumeric" *> pure (liftA (mk AccountingUse) decimal)
              <|> string "kOtherNumeric"      *> pure (liftA (mk OtherUse)      decimal)
              <|> string "kPrimaryNumeric"    *> pure (liftA (mk PrimaryUse)    decimal)
  where mk use x = M.singleton c (use, x)