{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}
{-# LANGUAGE TypeSynonymInstances #-}
module Text.Seonbi.Unihan.KHangul
    ( CharacterSet (..)
    , HanjaReadings
    , HanjaReadingCitation (..)
    , KHangulData
    , Purpose (..)
    , kHangulData
    , kHangulData'
    ) where

import Data.Either

import Data.Aeson
import Data.Attoparsec.Text
import Data.ByteString.Lazy (fromStrict)
import Data.FileEmbed
import Data.Map.Strict
import Data.Set hiding (empty)
import System.FilePath (takeDirectory, (</>))

-- $setup
-- >>> import qualified Text.Show.Unicode
-- >>> :set -interactive-print=Text.Show.Unicode.uprint

-- | Maps all Hanja characters to their possible readings.
type KHangulData = Map Char HanjaReadings

-- | All readings of a Hanja character.
type HanjaReadings = Map Char HanjaReadingCitation

-- | Represents what standard a reading of character belongs to and a purpose
-- of the reading.
data HanjaReadingCitation =
    HanjaReadingCitation CharacterSet (Set Purpose) deriving (HanjaReadingCitation -> HanjaReadingCitation -> Bool
(HanjaReadingCitation -> HanjaReadingCitation -> Bool)
-> (HanjaReadingCitation -> HanjaReadingCitation -> Bool)
-> Eq HanjaReadingCitation
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c/= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
== :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c== :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
Eq, Eq HanjaReadingCitation
Eq HanjaReadingCitation
-> (HanjaReadingCitation -> HanjaReadingCitation -> Ordering)
-> (HanjaReadingCitation -> HanjaReadingCitation -> Bool)
-> (HanjaReadingCitation -> HanjaReadingCitation -> Bool)
-> (HanjaReadingCitation -> HanjaReadingCitation -> Bool)
-> (HanjaReadingCitation -> HanjaReadingCitation -> Bool)
-> (HanjaReadingCitation
    -> HanjaReadingCitation -> HanjaReadingCitation)
-> (HanjaReadingCitation
    -> HanjaReadingCitation -> HanjaReadingCitation)
-> Ord HanjaReadingCitation
HanjaReadingCitation -> HanjaReadingCitation -> Bool
HanjaReadingCitation -> HanjaReadingCitation -> Ordering
HanjaReadingCitation
-> HanjaReadingCitation -> HanjaReadingCitation
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: HanjaReadingCitation
-> HanjaReadingCitation -> HanjaReadingCitation
$cmin :: HanjaReadingCitation
-> HanjaReadingCitation -> HanjaReadingCitation
max :: HanjaReadingCitation
-> HanjaReadingCitation -> HanjaReadingCitation
$cmax :: HanjaReadingCitation
-> HanjaReadingCitation -> HanjaReadingCitation
>= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c>= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
> :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c> :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
<= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c<= :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
< :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
$c< :: HanjaReadingCitation -> HanjaReadingCitation -> Bool
compare :: HanjaReadingCitation -> HanjaReadingCitation -> Ordering
$ccompare :: HanjaReadingCitation -> HanjaReadingCitation -> Ordering
$cp1Ord :: Eq HanjaReadingCitation
Ord, Int -> HanjaReadingCitation -> ShowS
[HanjaReadingCitation] -> ShowS
HanjaReadingCitation -> String
(Int -> HanjaReadingCitation -> ShowS)
-> (HanjaReadingCitation -> String)
-> ([HanjaReadingCitation] -> ShowS)
-> Show HanjaReadingCitation
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [HanjaReadingCitation] -> ShowS
$cshowList :: [HanjaReadingCitation] -> ShowS
show :: HanjaReadingCitation -> String
$cshow :: HanjaReadingCitation -> String
showsPrec :: Int -> HanjaReadingCitation -> ShowS
$cshowsPrec :: Int -> HanjaReadingCitation -> ShowS
Show)

-- | Represents character set standards for Korean writing system.
data CharacterSet
    -- | KS X 1001 (정보 교환용 부호계).
    = KS_X_1001
    -- | KS X 1002 (정보 교환용 부호 확장 세트).
    | KS_X_1002
    -- | Represents that a Hanja character is not included in any Korean
    -- character set standards.
    | NonStandard
    deriving (CharacterSet -> CharacterSet -> Bool
(CharacterSet -> CharacterSet -> Bool)
-> (CharacterSet -> CharacterSet -> Bool) -> Eq CharacterSet
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CharacterSet -> CharacterSet -> Bool
$c/= :: CharacterSet -> CharacterSet -> Bool
== :: CharacterSet -> CharacterSet -> Bool
$c== :: CharacterSet -> CharacterSet -> Bool
Eq, Eq CharacterSet
Eq CharacterSet
-> (CharacterSet -> CharacterSet -> Ordering)
-> (CharacterSet -> CharacterSet -> Bool)
-> (CharacterSet -> CharacterSet -> Bool)
-> (CharacterSet -> CharacterSet -> Bool)
-> (CharacterSet -> CharacterSet -> Bool)
-> (CharacterSet -> CharacterSet -> CharacterSet)
-> (CharacterSet -> CharacterSet -> CharacterSet)
-> Ord CharacterSet
CharacterSet -> CharacterSet -> Bool
CharacterSet -> CharacterSet -> Ordering
CharacterSet -> CharacterSet -> CharacterSet
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: CharacterSet -> CharacterSet -> CharacterSet
$cmin :: CharacterSet -> CharacterSet -> CharacterSet
max :: CharacterSet -> CharacterSet -> CharacterSet
$cmax :: CharacterSet -> CharacterSet -> CharacterSet
>= :: CharacterSet -> CharacterSet -> Bool
$c>= :: CharacterSet -> CharacterSet -> Bool
> :: CharacterSet -> CharacterSet -> Bool
$c> :: CharacterSet -> CharacterSet -> Bool
<= :: CharacterSet -> CharacterSet -> Bool
$c<= :: CharacterSet -> CharacterSet -> Bool
< :: CharacterSet -> CharacterSet -> Bool
$c< :: CharacterSet -> CharacterSet -> Bool
compare :: CharacterSet -> CharacterSet -> Ordering
$ccompare :: CharacterSet -> CharacterSet -> Ordering
$cp1Ord :: Eq CharacterSet
Ord, Int -> CharacterSet -> ShowS
[CharacterSet] -> ShowS
CharacterSet -> String
(Int -> CharacterSet -> ShowS)
-> (CharacterSet -> String)
-> ([CharacterSet] -> ShowS)
-> Show CharacterSet
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CharacterSet] -> ShowS
$cshowList :: [CharacterSet] -> ShowS
show :: CharacterSet -> String
$cshow :: CharacterSet -> String
showsPrec :: Int -> CharacterSet -> ShowS
$cshowsPrec :: Int -> CharacterSet -> ShowS
Show)

-- | Represents purposes of Hanja characters.
data Purpose
    -- | Basic Hanja for educational use (漢文敎育用基礎漢字), a subset of
    -- Hanja defined in 1972 by a South Korean standard for educational use.
    = Education
    -- | Hanja for personal names (人名用漢字).
    | PersonalName
    deriving (Purpose -> Purpose -> Bool
(Purpose -> Purpose -> Bool)
-> (Purpose -> Purpose -> Bool) -> Eq Purpose
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Purpose -> Purpose -> Bool
$c/= :: Purpose -> Purpose -> Bool
== :: Purpose -> Purpose -> Bool
$c== :: Purpose -> Purpose -> Bool
Eq, Eq Purpose
Eq Purpose
-> (Purpose -> Purpose -> Ordering)
-> (Purpose -> Purpose -> Bool)
-> (Purpose -> Purpose -> Bool)
-> (Purpose -> Purpose -> Bool)
-> (Purpose -> Purpose -> Bool)
-> (Purpose -> Purpose -> Purpose)
-> (Purpose -> Purpose -> Purpose)
-> Ord Purpose
Purpose -> Purpose -> Bool
Purpose -> Purpose -> Ordering
Purpose -> Purpose -> Purpose
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Purpose -> Purpose -> Purpose
$cmin :: Purpose -> Purpose -> Purpose
max :: Purpose -> Purpose -> Purpose
$cmax :: Purpose -> Purpose -> Purpose
>= :: Purpose -> Purpose -> Bool
$c>= :: Purpose -> Purpose -> Bool
> :: Purpose -> Purpose -> Bool
$c> :: Purpose -> Purpose -> Bool
<= :: Purpose -> Purpose -> Bool
$c<= :: Purpose -> Purpose -> Bool
< :: Purpose -> Purpose -> Bool
$c< :: Purpose -> Purpose -> Bool
compare :: Purpose -> Purpose -> Ordering
$ccompare :: Purpose -> Purpose -> Ordering
$cp1Ord :: Eq Purpose
Ord, Int -> Purpose -> ShowS
[Purpose] -> ShowS
Purpose -> String
(Int -> Purpose -> ShowS)
-> (Purpose -> String) -> ([Purpose] -> ShowS) -> Show Purpose
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Purpose] -> ShowS
$cshowList :: [Purpose] -> ShowS
show :: Purpose -> String
$cshow :: Purpose -> String
showsPrec :: Int -> Purpose -> ShowS
$cshowsPrec :: Int -> Purpose -> ShowS
Show)

citationParser :: Parser HanjaReadingCitation
citationParser :: Parser HanjaReadingCitation
citationParser = do
    CharacterSet
charset' <- CharacterSet
-> Parser Text CharacterSet -> Parser Text CharacterSet
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option CharacterSet
NonStandard Parser Text CharacterSet
charset
    [Purpose]
purposes <- Parser Text Purpose -> Parser Text [Purpose]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' Parser Text Purpose
purpose
    HanjaReadingCitation -> Parser HanjaReadingCitation
forall (m :: * -> *) a. Monad m => a -> m a
return (HanjaReadingCitation -> Parser HanjaReadingCitation)
-> HanjaReadingCitation -> Parser HanjaReadingCitation
forall a b. (a -> b) -> a -> b
$ CharacterSet -> Set Purpose -> HanjaReadingCitation
HanjaReadingCitation CharacterSet
charset' (Set Purpose -> HanjaReadingCitation)
-> Set Purpose -> HanjaReadingCitation
forall a b. (a -> b) -> a -> b
$ [Purpose] -> Set Purpose
forall a. Ord a => [a] -> Set a
Data.Set.fromList [Purpose]
purposes
  where
    charset :: Parser CharacterSet
    charset :: Parser Text CharacterSet
charset = do
        Char
d <- Parser Char
digit
        case Char
d of
            Char
'0' -> CharacterSet -> Parser Text CharacterSet
forall (m :: * -> *) a. Monad m => a -> m a
return CharacterSet
KS_X_1001
            Char
'1' -> CharacterSet -> Parser Text CharacterSet
forall (m :: * -> *) a. Monad m => a -> m a
return CharacterSet
KS_X_1002
            Char
c -> String -> Parser Text CharacterSet
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
"Invalid kHangul character set code: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Char -> String
forall a. Show a => a -> String
show Char
c)
    purpose :: Parser Purpose
    purpose :: Parser Text Purpose
purpose = do
        Char
l <- Parser Char
letter
        case Char
l of
            Char
'E' -> Purpose -> Parser Text Purpose
forall (m :: * -> *) a. Monad m => a -> m a
return Purpose
Education
            Char
'N' -> Purpose -> Parser Text Purpose
forall (m :: * -> *) a. Monad m => a -> m a
return Purpose
PersonalName
            Char
c -> String -> Parser Text Purpose
forall (m :: * -> *) a. MonadFail m => String -> m a
fail (String
"Invalid kHangul purpose code: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ Char -> String
forall a. Show a => a -> String
show Char
c)

instance FromJSON HanjaReadingCitation where
    parseJSON :: Value -> Parser HanjaReadingCitation
parseJSON =
        String
-> (Text -> Parser HanjaReadingCitation)
-> Value
-> Parser HanjaReadingCitation
forall a. String -> (Text -> Parser a) -> Value -> Parser a
withText String
"kHangul value (e.g., 0E, 1N, 0EN)" ((Text -> Parser HanjaReadingCitation)
 -> Value -> Parser HanjaReadingCitation)
-> (Text -> Parser HanjaReadingCitation)
-> Value
-> Parser HanjaReadingCitation
forall a b. (a -> b) -> a -> b
$ \ Text
t ->
            case Parser HanjaReadingCitation
-> Text -> Either String HanjaReadingCitation
forall a. Parser a -> Text -> Either String a
parseOnly (Parser HanjaReadingCitation
citationParser Parser HanjaReadingCitation
-> Parser Text () -> Parser HanjaReadingCitation
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput) Text
t of
                Right HanjaReadingCitation
cite -> HanjaReadingCitation -> Parser HanjaReadingCitation
forall (m :: * -> *) a. Monad m => a -> m a
return HanjaReadingCitation
cite
                Left String
msg -> String -> Parser HanjaReadingCitation
forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
msg

kHangulData' :: Either String KHangulData
kHangulData' :: Either String KHangulData
kHangulData' = ByteString -> Either String KHangulData
forall a. FromJSON a => ByteString -> Either String a
eitherDecode (ByteString -> Either String KHangulData)
-> ByteString -> Either String KHangulData
forall a b. (a -> b) -> a -> b
$
    ByteString -> ByteString
fromStrict $(embedFile $ takeDirectory __FILE__ </> "kHangul.json")

-- | Data that map Hanja characters to their corresponding kHangul entries
-- (i.e., Hanja readings and citations).
--
-- >>> import Data.Map.Strict as M
-- >>> let Just entries = M.lookup '天' kHangulData
-- >>> entries
-- fromList [('천',HanjaReadingCitation KS_X_1001 (fromList [Education]))]
kHangulData :: KHangulData
kHangulData :: KHangulData
kHangulData = KHangulData -> Either String KHangulData -> KHangulData
forall b a. b -> Either a b -> b
fromRight KHangulData
forall k a. Map k a
empty Either String KHangulData
kHangulData'

{- HLINT ignore "Unused LANGUAGE pragma" -}