{-# LANGUAGE Trustworthy #-}

{-|
Description:    HTML named character reference definitions and lookup.

Copyright:      (c) 2020 Sam May
License:        MPL-2.0
Maintainer:     ag.eitilt@gmail.com

Stability:      provisional
Portability:    portable

The __[HTML](https://html.spec.whatwg.org/)__ standard defines a large number
of iconic character names for accessing commonly-used characters outside of the
file encoding, or just the capabilities of the keyboard used.  A simple
implementation would be a @'M.HashMap' 'String' 'String'@ (as a few names map
to multiple Unicode characters), but unfortunately the compatibility
restrictions of the parsing algorithm mean that type would be
less-than-performant; as the parser needs to check for a valid reference on
every character, the full reference pool would need to be searched each time
for @O(n*log m)@ with a large @m@.  (While @m@ can be reduced by filtering the
map at each step, amortized for @O(n*m*log m)@, there's no guarantee the
smaller @m@ is ultimately an improvement.)

This module instead organizes the character references into a search tree
indexed by 'Char', allowing each test step to operate over a much smaller
search space for @O(n*log 62)@, at the expense of slightly greater space
overhead.
-}
module Web.Mangrove.Parse.Common.Character
    ( CharacterReferenceTree ( .. )
    , ReferenceValue ( .. )
    , lookupCharacterReference
    , characterReferences
    ) where


import qualified Data.Aeson as J
import qualified Data.Bifunctor as F.B
import qualified Data.Either as E
import qualified Data.HashMap.Strict as M
import qualified Data.Text as T

import qualified System.IO.Unsafe as IO.Unsafe

import Paths_mangrove

import Control.Applicative ( (<|>) )
import Data.Aeson ( (.:) )
import System.FilePath ( (<.>) )


-- | __HTML:__
--      @[named character references]
--      (https://html.spec.whatwg.org/multipage/named-characters.html#named-character-references)@
-- 
-- The decomposition of the HTML named character reference table into a
-- search-optimized form.  The value type consists of the character reference
-- 'String' whose name terminates with the key 'Char', if one exists, alongside
-- any character references whose names are prefixed appropriately.
-- 
-- The ampersand and semicolon delimiting the character reference are not
-- considered part of the name for storage; the former is silently dropped,
-- while the latter is indicated by the value of 'isSemicolonOptional'.
-- 
-- For example, a minimal tree defining only the reference names @"&cent"@,
-- @"&cent;"@, and @"&centerdot;"@ would have the structure:
-- 
-- > [ ( 'c'
-- >   , Nothing
-- >   , [ ( 'e'
-- >       , Nothing
-- >       , [ ( 'n'
-- >           , Nothing
-- >           , [ ( 't'
-- >               , Just $ ReferenceValue True '\xA2'
-- >               , [ ( 'e'
-- >                   , Nothing
-- >                   , [ ( 'r'
-- >                       , Nothing
-- >                       , [ ( 'd'
-- >                           , Nothing
-- >                           , [ ( 'o'
-- >                               , Nothing
-- >                               , [ ( 't'
-- >                                   , Just $ ReferenceValue False '\xB7'
-- >                                   , []
-- >   ) ] ) ] ) ] ) ] ) ] ) ] ) ] ) ] ) ]
newtype CharacterReferenceTree =
    CharacterReferenceTree (M.HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
  deriving ( CharacterReferenceTree -> CharacterReferenceTree -> Bool
(CharacterReferenceTree -> CharacterReferenceTree -> Bool)
-> (CharacterReferenceTree -> CharacterReferenceTree -> Bool)
-> Eq CharacterReferenceTree
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CharacterReferenceTree -> CharacterReferenceTree -> Bool
$c/= :: CharacterReferenceTree -> CharacterReferenceTree -> Bool
== :: CharacterReferenceTree -> CharacterReferenceTree -> Bool
$c== :: CharacterReferenceTree -> CharacterReferenceTree -> Bool
Eq, Int -> CharacterReferenceTree -> ShowS
[CharacterReferenceTree] -> ShowS
CharacterReferenceTree -> String
(Int -> CharacterReferenceTree -> ShowS)
-> (CharacterReferenceTree -> String)
-> ([CharacterReferenceTree] -> ShowS)
-> Show CharacterReferenceTree
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CharacterReferenceTree] -> ShowS
$cshowList :: [CharacterReferenceTree] -> ShowS
show :: CharacterReferenceTree -> String
$cshow :: CharacterReferenceTree -> String
showsPrec :: Int -> CharacterReferenceTree -> ShowS
$cshowsPrec :: Int -> CharacterReferenceTree -> ShowS
Show, ReadPrec [CharacterReferenceTree]
ReadPrec CharacterReferenceTree
Int -> ReadS CharacterReferenceTree
ReadS [CharacterReferenceTree]
(Int -> ReadS CharacterReferenceTree)
-> ReadS [CharacterReferenceTree]
-> ReadPrec CharacterReferenceTree
-> ReadPrec [CharacterReferenceTree]
-> Read CharacterReferenceTree
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [CharacterReferenceTree]
$creadListPrec :: ReadPrec [CharacterReferenceTree]
readPrec :: ReadPrec CharacterReferenceTree
$creadPrec :: ReadPrec CharacterReferenceTree
readList :: ReadS [CharacterReferenceTree]
$creadList :: ReadS [CharacterReferenceTree]
readsPrec :: Int -> ReadS CharacterReferenceTree
$creadsPrec :: Int -> ReadS CharacterReferenceTree
Read )


-- | A collection of data describing how to replace some named character
-- reference with a Unicode character sequence.
data ReferenceValue = ReferenceValue
    { ReferenceValue -> Bool
isSemicolonOptional :: Bool
        -- ^ Whether the reference allows a compatibility form without a
        -- terminating semicolon.
    , ReferenceValue -> String
referenceValue :: String
        -- ^ The 'Char'(s) to insert into the document in place of the
        -- reference.
    }
  deriving ( ReferenceValue -> ReferenceValue -> Bool
(ReferenceValue -> ReferenceValue -> Bool)
-> (ReferenceValue -> ReferenceValue -> Bool) -> Eq ReferenceValue
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ReferenceValue -> ReferenceValue -> Bool
$c/= :: ReferenceValue -> ReferenceValue -> Bool
== :: ReferenceValue -> ReferenceValue -> Bool
$c== :: ReferenceValue -> ReferenceValue -> Bool
Eq, Int -> ReferenceValue -> ShowS
[ReferenceValue] -> ShowS
ReferenceValue -> String
(Int -> ReferenceValue -> ShowS)
-> (ReferenceValue -> String)
-> ([ReferenceValue] -> ShowS)
-> Show ReferenceValue
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ReferenceValue] -> ShowS
$cshowList :: [ReferenceValue] -> ShowS
show :: ReferenceValue -> String
$cshow :: ReferenceValue -> String
showsPrec :: Int -> ReferenceValue -> ShowS
$cshowsPrec :: Int -> ReferenceValue -> ShowS
Show, ReadPrec [ReferenceValue]
ReadPrec ReferenceValue
Int -> ReadS ReferenceValue
ReadS [ReferenceValue]
(Int -> ReadS ReferenceValue)
-> ReadS [ReferenceValue]
-> ReadPrec ReferenceValue
-> ReadPrec [ReferenceValue]
-> Read ReferenceValue
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [ReferenceValue]
$creadListPrec :: ReadPrec [ReferenceValue]
readPrec :: ReadPrec ReferenceValue
$creadPrec :: ReadPrec ReferenceValue
readList :: ReadS [ReferenceValue]
$creadList :: ReadS [ReferenceValue]
readsPrec :: Int -> ReadS ReferenceValue
$creadsPrec :: Int -> ReadS ReferenceValue
Read )


-- | User-friendly access into 'characterReferences', if the full name of the
-- potential character reference is already known.  Note that the underlying
-- map isn't structured as a traditional 'M.HashMap', and so lookup is @O(n)@
-- over the length of the name rather than @O(log m)@ over the size of the map.
-- 
-- This doesn't perform the longest-match calculations described by the HTML
-- standard, just a simple "does this string match a reference name" as if the
-- underlying structure were a flat @'M.HashMap' 'String' ref@.  The leading
-- ampersand and trailing semicolon may be present, but neither is required.
lookupCharacterReference :: String -> Maybe ReferenceValue
lookupCharacterReference :: String -> Maybe ReferenceValue
lookupCharacterReference = CharacterReferenceTree -> String -> Maybe ReferenceValue
lookupCharacterReference' CharacterReferenceTree
characterReferences (String -> Maybe ReferenceValue)
-> ShowS -> String -> Maybe ReferenceValue
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
dropAmpersand
  where dropAmpersand :: ShowS
dropAmpersand (Char
'&':String
cs) = String
cs
        dropAmpersand String
cs = String
cs

-- | Iterate through the reference tree according to the remainder of the
-- reference name.
lookupCharacterReference' :: CharacterReferenceTree -> String -> Maybe ReferenceValue
lookupCharacterReference' :: CharacterReferenceTree -> String -> Maybe ReferenceValue
lookupCharacterReference' CharacterReferenceTree
_ [] = Maybe ReferenceValue
forall a. Maybe a
Nothing
lookupCharacterReference' (CharacterReferenceTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
refs) [Char
c] = Char
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> Maybe (Maybe ReferenceValue, CharacterReferenceTree)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup Char
c HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
refs Maybe (Maybe ReferenceValue, CharacterReferenceTree)
-> ((Maybe ReferenceValue, CharacterReferenceTree)
    -> Maybe ReferenceValue)
-> Maybe ReferenceValue
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Maybe ReferenceValue, CharacterReferenceTree)
-> Maybe ReferenceValue
forall a b. (a, b) -> a
fst
lookupCharacterReference' (CharacterReferenceTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
refs) [Char
c, Char
';'] = Char
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> Maybe (Maybe ReferenceValue, CharacterReferenceTree)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup Char
c HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
refs Maybe (Maybe ReferenceValue, CharacterReferenceTree)
-> ((Maybe ReferenceValue, CharacterReferenceTree)
    -> Maybe ReferenceValue)
-> Maybe ReferenceValue
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Maybe ReferenceValue, CharacterReferenceTree)
-> Maybe ReferenceValue
forall a b. (a, b) -> a
fst
lookupCharacterReference' (CharacterReferenceTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
refs) (Char
c:String
cs) = do
    (Maybe ReferenceValue
_, CharacterReferenceTree
refs') <- Char
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> Maybe (Maybe ReferenceValue, CharacterReferenceTree)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup Char
c HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
refs
    CharacterReferenceTree -> String -> Maybe ReferenceValue
lookupCharacterReference' CharacterReferenceTree
refs' String
cs


-- | The full set of named character references defined by the HTML standard,
-- in a search-optimized form.  Unless the potential reference name isn't
-- completely and unambiguously known (e.g., during the resolution algorithm
-- described by the HTML standard), 'lookupCharacterReference' is the better
-- interface to use.
-- 
-- Uses 'IO.Unsafe.unsafePerformIO' internally, as the underlying file should
-- never change at runtime, and so every evaluation would be pure.
characterReferences :: CharacterReferenceTree
characterReferences :: CharacterReferenceTree
characterReferences = IO CharacterReferenceTree -> CharacterReferenceTree
forall a. IO a -> a
IO.Unsafe.unsafePerformIO (IO CharacterReferenceTree -> CharacterReferenceTree)
-> IO CharacterReferenceTree -> CharacterReferenceTree
forall a b. (a -> b) -> a -> b
$ do
    String
entities <- String -> IO String
getDataFileName (String -> IO String) -> String -> IO String
forall a b. (a -> b) -> a -> b
$ String
"entities" String -> ShowS
<.> String
"json"
    CharacterReferenceTree
-> (HashMap String CharacterData -> CharacterReferenceTree)
-> Maybe (HashMap String CharacterData)
-> CharacterReferenceTree
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> CharacterReferenceTree
CharacterReferenceTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
forall k v. HashMap k v
M.empty) HashMap String CharacterData -> CharacterReferenceTree
repackReferences (Maybe (HashMap String CharacterData) -> CharacterReferenceTree)
-> IO (Maybe (HashMap String CharacterData))
-> IO CharacterReferenceTree
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> IO (Maybe (HashMap String CharacterData))
forall a. FromJSON a => String -> IO (Maybe a)
J.decodeFileStrict String
entities
{-# NOINLINE characterReferences #-}


-- | Given a naïve map of character reference names to Unicode values, optimize
-- it for searching character-by-character.
repackReferences :: M.HashMap String CharacterData -> CharacterReferenceTree
repackReferences :: HashMap String CharacterData -> CharacterReferenceTree
repackReferences = ((String, CharacterData)
 -> CharacterReferenceTree -> CharacterReferenceTree)
-> CharacterReferenceTree
-> [(String, CharacterData)]
-> CharacterReferenceTree
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr ((String, String)
-> CharacterReferenceTree -> CharacterReferenceTree
repackReferences' ((String, String)
 -> CharacterReferenceTree -> CharacterReferenceTree)
-> ((String, CharacterData) -> (String, String))
-> (String, CharacterData)
-> CharacterReferenceTree
-> CharacterReferenceTree
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
-> (CharacterData -> String)
-> (String, CharacterData)
-> (String, String)
forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
F.B.bimap (Int -> ShowS
forall a. Int -> [a] -> [a]
drop Int
1) ((Int -> Char) -> [Int] -> String
forall a b. (a -> b) -> [a] -> [b]
map Int -> Char
forall a. Enum a => Int -> a
toEnum ([Int] -> String)
-> (CharacterData -> [Int]) -> CharacterData -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. CharacterData -> [Int]
codepoints))
    (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> CharacterReferenceTree
CharacterReferenceTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
forall k v. HashMap k v
M.empty) ([(String, CharacterData)] -> CharacterReferenceTree)
-> (HashMap String CharacterData -> [(String, CharacterData)])
-> HashMap String CharacterData
-> CharacterReferenceTree
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HashMap String CharacterData -> [(String, CharacterData)]
forall k v. HashMap k v -> [(k, v)]
M.toList

-- | Add a single key-value character reference pair to the growing search tree.
repackReferences' :: (String, String) -> CharacterReferenceTree -> CharacterReferenceTree
repackReferences' :: (String, String)
-> CharacterReferenceTree -> CharacterReferenceTree
repackReferences' (String, String)
ref (CharacterReferenceTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
refs) = HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> CharacterReferenceTree
CharacterReferenceTree (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
 -> CharacterReferenceTree)
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> CharacterReferenceTree
forall a b. (a -> b) -> a -> b
$
    ((Maybe ReferenceValue, CharacterReferenceTree)
 -> (Maybe ReferenceValue, CharacterReferenceTree)
 -> (Maybe ReferenceValue, CharacterReferenceTree))
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> HashMap k v -> HashMap k v -> HashMap k v
M.unionWith (Maybe ReferenceValue, CharacterReferenceTree)
-> (Maybe ReferenceValue, CharacterReferenceTree)
-> (Maybe ReferenceValue, CharacterReferenceTree)
joinTree ((String, String)
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
singletonReferenceTree (String, String)
ref) HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
refs

-- | 'M.unionWith' any character references sharing a prefix.  Note that if
-- multiple references have the same name (modulo semicolons) but different
-- values, the resulting value isn't necessarily predictable, nor guaranteed to
-- be stable between even minor library versions.
joinTree
    :: (Maybe ReferenceValue, CharacterReferenceTree)
        -- ^ The reference value to be added at some character.
    -> (Maybe ReferenceValue, CharacterReferenceTree)
        -- ^ The reference value already existing in the map.
    -> (Maybe ReferenceValue, CharacterReferenceTree)
joinTree :: (Maybe ReferenceValue, CharacterReferenceTree)
-> (Maybe ReferenceValue, CharacterReferenceTree)
-> (Maybe ReferenceValue, CharacterReferenceTree)
joinTree (Maybe ReferenceValue
l, CharacterReferenceTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
ls) (Maybe ReferenceValue
r, CharacterReferenceTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
rs) =
    ((ReferenceValue -> ReferenceValue)
-> Maybe ReferenceValue -> Maybe ReferenceValue
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ReferenceValue -> ReferenceValue
checkOptional (Maybe ReferenceValue -> Maybe ReferenceValue)
-> Maybe ReferenceValue -> Maybe ReferenceValue
forall a b. (a -> b) -> a -> b
$ Maybe ReferenceValue
l Maybe ReferenceValue
-> Maybe ReferenceValue -> Maybe ReferenceValue
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe ReferenceValue
r, HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> CharacterReferenceTree
CharacterReferenceTree (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
 -> CharacterReferenceTree)
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> CharacterReferenceTree
forall a b. (a -> b) -> a -> b
$ ((Maybe ReferenceValue, CharacterReferenceTree)
 -> (Maybe ReferenceValue, CharacterReferenceTree)
 -> (Maybe ReferenceValue, CharacterReferenceTree))
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> HashMap k v -> HashMap k v -> HashMap k v
M.unionWith (Maybe ReferenceValue, CharacterReferenceTree)
-> (Maybe ReferenceValue, CharacterReferenceTree)
-> (Maybe ReferenceValue, CharacterReferenceTree)
joinTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
ls HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
rs)
  where checkOptional :: ReferenceValue -> ReferenceValue
checkOptional ReferenceValue
ref = ReferenceValue
ref
            { isSemicolonOptional :: Bool
isSemicolonOptional =
                Bool -> (ReferenceValue -> Bool) -> Maybe ReferenceValue -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False ReferenceValue -> Bool
isSemicolonOptional Maybe ReferenceValue
l Bool -> Bool -> Bool
|| Bool -> (ReferenceValue -> Bool) -> Maybe ReferenceValue -> Bool
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Bool
False ReferenceValue -> Bool
isSemicolonOptional Maybe ReferenceValue
r
            }

-- | Generate a search tree containing the single key-value character reference
-- pair, to be merged into a larger accumulation.
singletonReferenceTree
    :: (String, String)
    -> M.HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
singletonReferenceTree :: (String, String)
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
singletonReferenceTree (String
key, String
ref) = HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
forall b a. b -> Either a b -> b
E.fromRight HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
forall k v. HashMap k v
M.empty (Either
   Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
 -> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
forall a b. (a -> b) -> a -> b
$
    (Char
 -> Either
      Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
 -> Either
      Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)))
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
-> String
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Char
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
singletonReferenceTree' (Bool
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
forall a b. a -> Either a b
Left Bool
True) String
key
  where singletonReferenceTree' :: Char
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
singletonReferenceTree' Char
';' Either
  Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
_ = Bool
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
forall a b. a -> Either a b
Left Bool
False
        singletonReferenceTree' Char
c (Left Bool
semicolon) = HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
forall a b. b -> Either a b
Right (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
 -> Either
      Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)))
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
forall a b. (a -> b) -> a -> b
$
            Char
-> (Maybe ReferenceValue, CharacterReferenceTree)
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
forall k v. Hashable k => k -> v -> HashMap k v
M.singleton Char
c (ReferenceValue -> Maybe ReferenceValue
forall a. a -> Maybe a
Just (ReferenceValue -> Maybe ReferenceValue)
-> ReferenceValue -> Maybe ReferenceValue
forall a b. (a -> b) -> a -> b
$ Bool -> String -> ReferenceValue
ReferenceValue Bool
semicolon String
ref, HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> CharacterReferenceTree
CharacterReferenceTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
forall k v. HashMap k v
M.empty)
        singletonReferenceTree' Char
c (Right HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
ref') = HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
forall a b. b -> Either a b
Right (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
 -> Either
      Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)))
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> Either
     Bool (HashMap Char (Maybe ReferenceValue, CharacterReferenceTree))
forall a b. (a -> b) -> a -> b
$
            Char
-> (Maybe ReferenceValue, CharacterReferenceTree)
-> HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
forall k v. Hashable k => k -> v -> HashMap k v
M.singleton Char
c (Maybe ReferenceValue
forall a. Maybe a
Nothing, HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
-> CharacterReferenceTree
CharacterReferenceTree HashMap Char (Maybe ReferenceValue, CharacterReferenceTree)
ref')


-- | Internal representation of the character reference definitions in the
-- @entities.json@ file; the names are provided by JSON dictionary keys.
newtype CharacterData = CharacterData
    { CharacterData -> [Int]
codepoints :: [Int]
        -- ^ The Unicode code points represented by a given name.
 -- , characters :: String
 --     Uses surrogate character points rather than high-Unicode characters,
 --     and so isn't as desirable to build from.
    }
  deriving ( CharacterData -> CharacterData -> Bool
(CharacterData -> CharacterData -> Bool)
-> (CharacterData -> CharacterData -> Bool) -> Eq CharacterData
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CharacterData -> CharacterData -> Bool
$c/= :: CharacterData -> CharacterData -> Bool
== :: CharacterData -> CharacterData -> Bool
$c== :: CharacterData -> CharacterData -> Bool
Eq, Int -> CharacterData -> ShowS
[CharacterData] -> ShowS
CharacterData -> String
(Int -> CharacterData -> ShowS)
-> (CharacterData -> String)
-> ([CharacterData] -> ShowS)
-> Show CharacterData
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CharacterData] -> ShowS
$cshowList :: [CharacterData] -> ShowS
show :: CharacterData -> String
$cshow :: CharacterData -> String
showsPrec :: Int -> CharacterData -> ShowS
$cshowsPrec :: Int -> CharacterData -> ShowS
Show, ReadPrec [CharacterData]
ReadPrec CharacterData
Int -> ReadS CharacterData
ReadS [CharacterData]
(Int -> ReadS CharacterData)
-> ReadS [CharacterData]
-> ReadPrec CharacterData
-> ReadPrec [CharacterData]
-> Read CharacterData
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [CharacterData]
$creadListPrec :: ReadPrec [CharacterData]
readPrec :: ReadPrec CharacterData
$creadPrec :: ReadPrec CharacterData
readList :: ReadS [CharacterData]
$creadList :: ReadS [CharacterData]
readsPrec :: Int -> ReadS CharacterData
$creadsPrec :: Int -> ReadS CharacterData
Read )
instance J.FromJSON CharacterData where
    parseJSON :: Value -> Parser CharacterData
parseJSON = String
-> (Object -> Parser CharacterData)
-> Value
-> Parser CharacterData
forall a. String -> (Object -> Parser a) -> Value -> Parser a
J.withObject String
"reference" ((Object -> Parser CharacterData) -> Value -> Parser CharacterData)
-> (Object -> Parser CharacterData)
-> Value
-> Parser CharacterData
forall a b. (a -> b) -> a -> b
$ \Object
v -> [Int] -> CharacterData
CharacterData
        ([Int] -> CharacterData) -> Parser [Int] -> Parser CharacterData
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Object
v Object -> Text -> Parser [Int]
forall a. FromJSON a => Object -> Text -> Parser a
.: String -> Text
T.pack String
"codepoints"
     -- <*> v .: T.pack "characters"