-- | Low-level parsers for XML references.
--
-- All documentation examples assume the following setup:
--
-- > :set -XOverloadedStrings
-- > import Data.Attoparsec.ByteString
module Data.XML.Parser.Low.Reference where

import           Control.Applicative
import           Control.Arrow              ((>>>))
import           Data.Char
import           Data.Functor
import           Data.Text                  (Text)
import qualified Data.Text                  as Text
import           Data.XML.Parser.Low.Entity
import           Data.XML.Parser.Low.Name
import           Numeric
import           Text.Parser.Char
import           Text.Parser.Combinators

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Data.Attoparsec.ByteString

-- | <https://www.w3.org/TR/REC-xml/#dt-entref>
--
-- Entity reference, or character reference.
data Reference = EntityRef Text | CharRef Char
  deriving (Reference -> Reference -> Bool
(Reference -> Reference -> Bool)
-> (Reference -> Reference -> Bool) -> Eq Reference
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Reference -> Reference -> Bool
$c/= :: Reference -> Reference -> Bool
== :: Reference -> Reference -> Bool
$c== :: Reference -> Reference -> Bool
Eq, Eq Reference
Eq Reference
-> (Reference -> Reference -> Ordering)
-> (Reference -> Reference -> Bool)
-> (Reference -> Reference -> Bool)
-> (Reference -> Reference -> Bool)
-> (Reference -> Reference -> Bool)
-> (Reference -> Reference -> Reference)
-> (Reference -> Reference -> Reference)
-> Ord Reference
Reference -> Reference -> Bool
Reference -> Reference -> Ordering
Reference -> Reference -> Reference
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Reference -> Reference -> Reference
$cmin :: Reference -> Reference -> Reference
max :: Reference -> Reference -> Reference
$cmax :: Reference -> Reference -> Reference
>= :: Reference -> Reference -> Bool
$c>= :: Reference -> Reference -> Bool
> :: Reference -> Reference -> Bool
$c> :: Reference -> Reference -> Bool
<= :: Reference -> Reference -> Bool
$c<= :: Reference -> Reference -> Bool
< :: Reference -> Reference -> Bool
$c< :: Reference -> Reference -> Bool
compare :: Reference -> Reference -> Ordering
$ccompare :: Reference -> Reference -> Ordering
$cp1Ord :: Eq Reference
Ord, ReadPrec [Reference]
ReadPrec Reference
Int -> ReadS Reference
ReadS [Reference]
(Int -> ReadS Reference)
-> ReadS [Reference]
-> ReadPrec Reference
-> ReadPrec [Reference]
-> Read Reference
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Reference]
$creadListPrec :: ReadPrec [Reference]
readPrec :: ReadPrec Reference
$creadPrec :: ReadPrec Reference
readList :: ReadS [Reference]
$creadList :: ReadS [Reference]
readsPrec :: Int -> ReadS Reference
$creadsPrec :: Int -> ReadS Reference
Read, Int -> Reference -> ShowS
[Reference] -> ShowS
Reference -> String
(Int -> Reference -> ShowS)
-> (Reference -> String)
-> ([Reference] -> ShowS)
-> Show Reference
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Reference] -> ShowS
$cshowList :: [Reference] -> ShowS
show :: Reference -> String
$cshow :: Reference -> String
showsPrec :: Int -> Reference -> ShowS
$cshowsPrec :: Int -> Reference -> ShowS
Show)

-- | Resolve reference into raw text.
expandReference :: Alternative m => EntityDecoder -> Reference -> m Text
expandReference :: EntityDecoder -> Reference -> m Text
expandReference EntityDecoder
_ (CharRef Char
c)      = Text -> m Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> m Text) -> Text -> m Text
forall a b. (a -> b) -> a -> b
$ String -> Text
Text.pack [Char
c]
expandReference EntityDecoder
f (EntityRef Text
name) = m Text -> (Text -> m Text) -> Maybe Text -> m Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe m Text
forall (f :: * -> *) a. Alternative f => f a
empty Text -> m Text
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe Text -> m Text) -> Maybe Text -> m Text
forall a b. (a -> b) -> a -> b
$ EntityDecoder -> Text -> Maybe Text
runEntityDecoder EntityDecoder
f Text
name

-- | Same as @expandReference decodePredefinedEntities@, provided for convenience.
expandReference' :: Reference -> Maybe Text
expandReference' :: Reference -> Maybe Text
expandReference' = EntityDecoder -> Reference -> Maybe Text
forall (m :: * -> *).
Alternative m =>
EntityDecoder -> Reference -> m Text
expandReference EntityDecoder
decodePredefinedEntities

-- | <https://www.w3.org/TR/REC-xml/#NT-Reference>
--
-- >>> parseOnly tokenReference "&#x3C;"
-- Right (CharRef '<')
-- >>> parseOnly tokenReference "&docdate;"
-- Right (EntityRef "docdate")
tokenReference :: CharParsing m => Monad m => m Reference
tokenReference :: m Reference
tokenReference = (Text -> Reference
EntityRef (Text -> Reference) -> m Text -> m Reference
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> m Text
entityRef) m Reference -> m Reference -> m Reference
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Reference
CharRef (Char -> Reference) -> m Char -> m Reference
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> m Char
decCharRef) m Reference -> m Reference -> m Reference
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Reference
CharRef (Char -> Reference) -> m Char -> m Reference
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> m Char
hexCharRef) where
  entityRef :: m Text
entityRef = Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
'&' m Char -> m Text -> m Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> m Text
forall (m :: * -> *). (CharParsing m, Monad m) => m Text
tokenName m Text -> m Char -> m Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
';'
  decCharRef :: m Char
decCharRef = m String -> m Char -> m Char -> m Char
forall (m :: * -> *) bra ket a.
Applicative m =>
m bra -> m ket -> m a -> m a
between (String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"&#") (Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
';') (m Char -> m Char) -> m Char -> m Char
forall a b. (a -> b) -> a -> b
$
    m Char -> m String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some m Char
forall (m :: * -> *). CharParsing m => m Char
digit m String -> (String -> m Int) -> m Int
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (ReadS Int
forall a. (Eq a, Num a) => ReadS a
readDec ReadS Int -> ([(Int, String)] -> m Int) -> String -> m Int
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> String -> [(Int, String)] -> m Int
forall (m :: * -> *) a b.
(Monad m, Parsing m) =>
String -> [(a, b)] -> m a
liftParser String
"decimal") m Int -> (Int -> Char) -> m Char
forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> Int -> Char
chr

  hexCharRef :: m Char
hexCharRef = m String -> m Char -> m Char -> m Char
forall (m :: * -> *) bra ket a.
Applicative m =>
m bra -> m ket -> m a -> m a
between (String -> m String
forall (m :: * -> *). CharParsing m => String -> m String
string String
"&#x") (Char -> m Char
forall (m :: * -> *). CharParsing m => Char -> m Char
char Char
';') (m Char -> m Char) -> m Char -> m Char
forall a b. (a -> b) -> a -> b
$
    m Char -> m String
forall (f :: * -> *) a. Alternative f => f a -> f [a]
some m Char
forall (m :: * -> *). CharParsing m => m Char
hexDigit m String -> (String -> m Int) -> m Int
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (ReadS Int
forall a. (Eq a, Num a) => ReadS a
readHex ReadS Int -> ([(Int, String)] -> m Int) -> String -> m Int
forall k (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>> String -> [(Int, String)] -> m Int
forall (m :: * -> *) a b.
(Monad m, Parsing m) =>
String -> [(a, b)] -> m a
liftParser String
"hexadecimal") m Int -> (Int -> Char) -> m Char
forall (f :: * -> *) a b. Functor f => f a -> (a -> b) -> f b
<&> Int -> Char
chr
  liftParser :: String -> [(a, b)] -> m a
liftParser String
_ ((a
result, b
_):[(a, b)]
_) = a -> m a
forall (m :: * -> *) a. Monad m => a -> m a
return a
result
  liftParser String
message [(a, b)]
_         = String -> m a
forall (m :: * -> *) a. Parsing m => String -> m a
unexpected (String -> m a) -> String -> m a
forall a b. (a -> b) -> a -> b
$ String
"Failed to parse " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> String
message