{-# LANGUAGE CPP #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternSynonyms #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE UndecidableInstances #-}
module Text.RDF.RDF4H.XmlParser.Xmlbf
(
parse
, parseM
, ParserT
, parserT
, runParserT
, ParserState
, initialParserState
, pElement
, pAnyElement
, pName
, pAttr
, pAttrs
, pChildren
, pText
, pEndOfInput
, encode
, Node
, node
, pattern Element
, element
, element'
, pattern Text
, text
, text'
, dfpos
, dfposM
, dfpre
, dfpreM
, FromXml(fromXml)
, ToXml(toXml)
)
where
import Control.Applicative (Alternative(empty, (<|>)), liftA2)
import Control.DeepSeq (NFData(rnf))
import Control.Monad (MonadPlus(mplus, mzero), join, when, ap)
import qualified Control.Monad.Catch as Ex
import Control.Monad.Error.Class (MonadError(catchError, throwError))
import Control.Monad.Cont (MonadCont(callCC))
import qualified Control.Monad.Fail
import Control.Monad.Fix (MonadFix(mfix))
import Control.Monad.IO.Class (MonadIO(liftIO))
import Control.Monad.Morph (MFunctor(hoist))
import Control.Monad.Reader.Class (MonadReader(local, ask))
import Control.Monad.State.Class (MonadState(state))
import Control.Monad.Trans (MonadTrans(lift))
import Control.Monad.Zip (MonadZip(mzipWith))
import Control.Selective (Selective(select))
import qualified Data.ByteString.Builder as BB
import qualified Data.ByteString.Builder.Prim as BBP
import qualified Data.Char as Char
import Data.Foldable (for_, toList)
import Data.Functor.Identity (Identity(Identity), runIdentity)
import qualified Data.HashMap.Strict as HM
import Data.Kind (Type)
import Data.Semigroup
import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import qualified Data.Text.Lazy as TL
import qualified Data.Text.Lazy.Encoding as TL
import Data.Traversable (for)
import Data.Word (Word8)
data Node
= Element' !T.Text !(HM.HashMap T.Text T.Text) ![Node]
| Text' !TL.Text
deriving (Eq)
instance NFData Node where
rnf = \case
Element' n as cs -> rnf n `seq` rnf as `seq` rnf cs `seq` ()
Text' t -> rnf t `seq` ()
{-# INLINABLE rnf #-}
instance Show Node where
showsPrec n = \x -> showParen (n > 10) $ case x of
Text' t -> showString "Text " . showsPrec 0 t
Element' t as cs ->
showString "Element " .
showsPrec 0 t . showChar ' ' .
showsPrec 0 (HM.toList as) . showChar ' ' .
showsPrec 0 cs
pattern Element :: T.Text -> HM.HashMap T.Text T.Text -> [Node] -> Node
pattern Element t as cs <- Element' t as cs
{-# COMPLETE Element #-}
pattern Text :: TL.Text -> Node
pattern Text t <- Text' t
{-# COMPLETE Text #-}
node
:: (T.Text -> HM.HashMap T.Text T.Text -> [Node] -> a)
-> (TL.Text -> a)
-> Node
-> a
{-# INLINE node #-}
node fe ft = \case
Text' t -> ft t
Element' t as cs -> fe t as cs
normalize :: [Node] -> [Node]
{-# INLINE normalize #-}
normalize = \case
Text' "" : ns -> normalize ns
Text' a : Text' b : ns -> normalize (text (a <> b) <> ns)
Text' a : ns -> Text' a : normalize ns
Element' t as cs : ns -> Element' t as (normalize cs) : normalize ns
[] -> []
text
:: TL.Text
-> [Node]
{-# INLINE text #-}
text t = case text' t of
Right x -> [x]
Left _ -> []
text'
:: TL.Text
-> Either String Node
{-# INLINE text' #-}
text' = \case
"" -> Left "Empty text"
t -> Right (Text' t)
element
:: T.Text
-> HM.HashMap T.Text T.Text
-> [Node]
-> [Node]
{-# INLINE element #-}
element t hm ns = case element' t hm ns of
Right x -> [x]
Left _ -> []
element'
:: T.Text
-> HM.HashMap T.Text T.Text
-> [Node]
-> Either String Node
element' t0 hm0 ns0 = do
when (t0 /= T.strip t0)
(Left ("Element name has surrounding whitespace: " ++ show t0))
when (T.null t0)
(Left ("Element name is blank: " ++ show t0))
for_ (HM.keys hm0) $ \k -> do
when (k /= T.strip k)
(Left ("Attribute name has surrounding whitespace: " ++ show k))
when (T.null k)
(Left ("Attribute name is blank: " ++ show k))
Right (Element' t0 hm0 (normalize ns0))
class FromXml a where
fromXml :: ParserT m a
data ParserState
= STop ![Node]
| SReg !T.Text !(HM.HashMap T.Text T.Text) ![Node]
initialParserState :: [Node] -> ParserState
initialParserState = STop . normalize
{-# INLINE initialParserState #-}
newtype ParserT (m :: Type -> Type) (a :: Type)
= ParserT (ParserState -> m (ParserState, Either String a))
parserT
:: (ParserState -> m (ParserState, Either String a))
-> ParserT m a
parserT = ParserT
{-# INLINE parserT #-}
runParserT
:: ParserT m a
-> ParserState
-> m (ParserState, Either String a)
runParserT (ParserT f) = f
{-# INLINE runParserT #-}
parseM
:: Applicative m
=> ParserT m a
-> [Node]
-> m (Either String a)
parseM p = fmap snd . runParserT p . initialParserState
{-# INLINE parseM #-}
parse
:: ParserT Identity a
-> [Node]
-> Either String a
parse p = runIdentity . parseM p
{-# INLINE parse #-}
#if MIN_VERSION_base(4,9,0)
instance (Monad m, Semigroup a) => Semigroup (ParserT m a) where
(<>) = liftA2 (<>)
{-# INLINE (<>) #-}
#endif
instance (Monad m, Monoid a) => Monoid (ParserT m a) where
mempty = pure mempty
{-# INLINE mempty #-}
mappend = liftA2 mappend
{-# INLINE mappend #-}
instance Functor m => Functor (ParserT m) where
fmap f = \pa -> ParserT (\s -> fmap (fmap (fmap f)) (runParserT pa s))
{-# INLINE fmap #-}
instance Monad m => Applicative (ParserT m) where
pure = \a -> ParserT (\s -> pure (s, Right a))
{-# INLINE pure #-}
(<*>) = ap
{-# INLINE (<*>) #-}
instance Monad m => Alternative (ParserT m) where
empty = pFail "empty"
{-# INLINE empty #-}
pa <|> pb = ParserT (\s0 -> do
(s1, ea) <- runParserT pa s0
case ea of
Right a -> pure (s1, Right a)
Left _ -> runParserT pb s0)
{-# INLINABLE (<|>) #-}
instance Monad m => Selective (ParserT m) where
select pe pf = ParserT (\s0 -> do
(s1, eeab) <- runParserT pe s0
case eeab of
Right (Right b) -> pure (s1, Right b)
Right (Left a) -> runParserT (pf <*> pure a) s1
Left msg -> pure (s1, Left msg))
{-# INLINABLE select #-}
instance Monad m => Monad (ParserT m) where
return = pure
{-# INLINE return #-}
pa >>= kpb = ParserT (\s0 -> do
(s1, ea) <- runParserT pa s0
case ea of
Right a -> runParserT (kpb a) s1
Left msg -> pure (s1, Left msg))
{-# INLINABLE (>>=) #-}
fail = pFail
{-# INLINE fail #-}
#if MIN_VERSION_base(4,9,0)
instance Monad m => Control.Monad.Fail.MonadFail (ParserT m) where
fail = pFail
{-# INLINE fail #-}
#endif
pFail :: Applicative m => String -> ParserT m a
pFail = \msg -> ParserT (\s -> pure (s, Left msg))
{-# INLINE pFail #-}
instance Monad m => MonadPlus (ParserT m) where
mzero = empty
{-# INLINE mzero #-}
mplus = (<|>)
{-# INLINE mplus #-}
instance MonadFix m => MonadFix (ParserT m) where
mfix f =
let die = \msg -> error ("mfix (ParserT): " <> msg)
in ParserT (\s0 ->
mfix (\ ~(_s1, ea) -> runParserT (f (either die id ea)) s0))
instance MonadZip m => MonadZip (ParserT m) where
mzipWith f pa pb = ParserT (\s0 -> do
(s1, ea) <- runParserT pa s0
case ea of
Right a0 ->
mzipWith (\a1 (s2, eb) -> (s2, fmap (f a1) eb))
(pure a0) (runParserT pb s1)
Left msg -> pure (s1, Left msg))
{-# INLINABLE mzipWith #-}
instance MonadTrans ParserT where
lift = \ma -> ParserT (\s -> ma >>= \a -> pure (s, Right a))
{-# INLINE lift #-}
instance MFunctor ParserT where
hoist nat = \p -> ParserT (\s -> nat (runParserT p s))
{-# INLINE hoist #-}
instance MonadIO m => MonadIO (ParserT m) where
liftIO = lift . liftIO
{-# INLINE liftIO #-}
instance MonadReader r m => MonadReader r (ParserT m) where
ask = lift ask
{-# INLINE ask #-}
local f = \p -> ParserT (\s -> local f (runParserT p s))
{-# INLINE local #-}
instance MonadState s m => MonadState s (ParserT m) where
state = lift . state
{-# INLINE state #-}
instance MonadError e m => MonadError e (ParserT m) where
throwError = lift . throwError
{-# INLINABLE throwError #-}
catchError ma h = ParserT (\s ->
catchError (runParserT ma s)
(\e -> runParserT (h e) s))
{-# INLINABLE catchError #-}
instance Ex.MonadThrow m => Ex.MonadThrow (ParserT m) where
throwM = lift . Ex.throwM
{-# INLINABLE throwM #-}
instance Ex.MonadCatch m => Ex.MonadCatch (ParserT m) where
catch ma h = ParserT (\s ->
Ex.catch (runParserT ma s)
(\e -> runParserT (h e) s))
{-# INLINABLE catch #-}
instance Ex.MonadMask m => Ex.MonadMask (ParserT m) where
mask f = ParserT (\s ->
Ex.mask (\u ->
runParserT (f (\p -> ParserT (u . runParserT p))) s))
{-# INLINABLE mask #-}
uninterruptibleMask f = ParserT (\s ->
Ex.uninterruptibleMask (\u ->
runParserT (f (\p -> ParserT (u . runParserT p))) s))
{-# INLINABLE uninterruptibleMask #-}
generalBracket acq rel use = ParserT (\s0 -> do
((_sb,eb), (sc,ec)) <- Ex.generalBracket
(runParserT acq s0)
(\(s1, ea) ec -> case ea of
Right a -> case ec of
Ex.ExitCaseSuccess (s2, Right b) ->
runParserT (rel a (Ex.ExitCaseSuccess b)) s2
Ex.ExitCaseSuccess (s2, Left msg) ->
pure (s2, Left msg)
Ex.ExitCaseException e ->
runParserT (rel a (Ex.ExitCaseException e)) s1
Ex.ExitCaseAbort ->
runParserT (rel a Ex.ExitCaseAbort) s1
Left msg ->
pure (s1, Left msg))
(\(s1, ea) -> case ea of
Right a -> runParserT (use a) s1
Left msg ->
pure (s1, Left msg))
pure (sc, flip (,) <$> ec <*> eb))
instance MonadCont m => MonadCont (ParserT m) where
callCC f = ParserT (\s0 ->
callCC (\c -> runParserT (f (\a -> ParserT (\s1 -> c (s1, Right a)))) s0))
pElement
:: Monad m
=> T.Text
-> ParserT m a
-> ParserT m a
pElement t0 p0 = ParserT $ \case
SReg t1 as0 (Element' t as cs : cs0) | t == t0 ->
runParserT p0 (SReg t as cs) >>= \case
(_, Right a) -> pure (SReg t1 as0 cs0, Right a)
(s1, Left msg) -> pure (s1, Left msg)
STop (Element' t as cs : cs0) | t == t0 ->
runParserT p0 (SReg t as cs) >>= \case
(_, Right a) -> pure (STop cs0, Right a)
(s1, Left msg) -> pure (s1, Left msg)
SReg t as (Text' x : cs) | TL.all Char.isSpace x ->
runParserT (pElement t0 p0) (SReg t as cs)
STop (Text' x : cs) | TL.all Char.isSpace x ->
runParserT (pElement t0 p0) (STop cs)
s0 -> pure (s0, Left ("Missing element " <> show t0))
{-# INLINABLE pElement #-}
pAnyElement
:: Monad m
=> ParserT m a
-> ParserT m a
pAnyElement p0 = ParserT $ \case
SReg t0 as0 (Element' t as cs : cs0) ->
runParserT p0 (SReg t as cs) >>= \case
(_, Right a) -> pure (SReg t0 as0 cs0, Right a)
(s1, Left msg) -> pure (s1, Left msg)
STop (Element' t as cs : cs0) ->
runParserT p0 (SReg t as cs) >>= \case
(_, Right a) -> pure (STop cs0, Right a)
(s1, Left msg) -> pure (s1, Left msg)
SReg t as (Text' x : cs) | TL.all Char.isSpace x ->
runParserT (pAnyElement p0) (SReg t as cs)
STop (Text' x : cs) | TL.all Char.isSpace x ->
runParserT (pAnyElement p0) (STop cs)
s0 -> pure (s0, Left "Missing element")
{-# INLINABLE pAnyElement #-}
pName
:: Applicative m
=> ParserT m T.Text
pName = ParserT (\s -> case s of
SReg t _ _ -> pure (s, Right t)
_ -> pure (s, Left "Before selecting an name, you must select an element"))
{-# INLINABLE pName #-}
pAttr
:: Applicative m
=> T.Text
-> ParserT m T.Text
pAttr n = ParserT (\s -> case s of
SReg t as cs -> case HM.lookup n as of
Just x -> pure (SReg t (HM.delete n as) cs, Right x)
Nothing -> pure (s, Left ("Missing attribute " <> show n))
_ -> pure (s, Left "Before selecting an attribute, you must select an element"))
{-# INLINABLE pAttr #-}
pAttrs
:: Applicative m
=> ParserT m (HM.HashMap T.Text T.Text)
pAttrs = ParserT (\s -> case s of
SReg t as cs -> pure (SReg t mempty cs, Right as)
_ -> pure (s, Left "Before selecting an attribute, you must select an element"))
{-# INLINABLE pAttrs #-}
pChildren
:: Applicative m
=> ParserT m [Node]
pChildren = ParserT (\case
STop cs -> pure (STop mempty, Right cs)
SReg t as cs -> pure (SReg t as mempty, Right cs))
{-# INLINABLE pChildren #-}
pText
:: Applicative m
=> ParserT m TL.Text
pText = ParserT (\case
STop (Text x : ns) -> pure (STop ns, Right x)
SReg t as (Text x : cs) -> pure (SReg t as cs, Right x)
s0 -> pure (s0, Left "Missing text node"))
{-# INLINABLE pText #-}
pEndOfInput :: Applicative m => ParserT m ()
pEndOfInput = ParserT (\s -> case isEof s of
True -> pure (s, Right ())
False -> pure (s, Left "Not end of input yet"))
{-# INLINABLE pEndOfInput #-}
isEof :: ParserState -> Bool
isEof = \case
SReg _ as cs -> HM.null as && null cs
STop ns -> null ns
{-# INLINE isEof #-}
class ToXml a where
toXml :: a -> [Node]
encode :: [Node] -> BB.Builder
encode xs = mconcat (map encodeNode xs)
where
encodeNode :: Node -> BB.Builder
encodeNode = \case
Text x -> encodeXmlUtf8Lazy x
Element t as cs ->
"<" <> encodeUtf8 t
<> encodeAttrs (">" <> encode cs <> "</" <> encodeUtf8 t <> ">") as
{-# INLINE encodeNode #-}
encodeAttrs :: BB.Builder -> HM.HashMap T.Text T.Text -> BB.Builder
encodeAttrs = HM.foldlWithKey'
(\o k v -> " " <> encodeUtf8 k <> "=\"" <> encodeXmlUtf8 v <> "\"" <> o)
{-# INLINE encodeAttrs #-}
dfpos :: ((Node -> [Node]) -> Node -> [Node]) -> Node -> [Node]
dfpos f = runIdentity . dfposM (\k -> Identity . f (runIdentity . k))
dfposM :: Monad m => ((Node -> m [Node]) -> Node -> m [Node]) -> Node -> m [Node]
dfposM f = \n0 -> do
c1 <- traverseChildren (dfposM f) (cursorFromNode n0)
c2 <- traverseRightSiblings (dfposM f) c1
fmap (normalize . join)
(traverse (f (dfposM f)) (cursorSiblings c2))
dfpre :: ((Node -> [Node]) -> Node -> [Node]) -> Node -> [Node]
dfpre f = runIdentity . dfpreM (\k -> Identity . f (runIdentity . k))
dfpreM :: Monad m => ((Node -> m [Node]) -> Node -> m [Node]) -> Node -> m [Node]
dfpreM f = \n0 -> do
ns <- f (dfpreM f) n0
fmap (normalize . join) $ for ns $ \n -> do
c1 <- traverseChildren (dfpreM f) (cursorFromNode n)
cursorSiblings <$> traverseRightSiblings (dfpreM f) c1
data Cursor = Cursor
{ _cursorCurrent :: !Node
, _cursorLefts :: !(Seq Node)
, _cursorRights :: !(Seq Node)
, _cursorParents :: !(Seq (Seq Node, T.Text, HM.HashMap T.Text T.Text, Seq Node))
}
traverseChildren :: Monad m => (Node -> m [Node]) -> Cursor -> m Cursor
{-# INLINABLE traverseChildren #-}
traverseChildren f c0 = case _cursorCurrent c0 of
Text _ -> pure c0
Element t as cs -> do
n1s <- fmap (normalize . join) (traverse f cs)
pure (c0 {_cursorCurrent = Element' t as n1s})
traverseRightSiblings :: Monad m => (Node -> m [Node]) -> Cursor -> m Cursor
{-# INLINABLE traverseRightSiblings #-}
traverseRightSiblings f c0 = case cursorRemoveRight c0 of
Nothing -> pure c0
Just (n1, c1) -> do
n2s <- fmap normalize (f n1)
traverseRightSiblings f (cursorInsertManyRight n2s c1)
cursorFromNode :: Node -> Cursor
{-# INLINE cursorFromNode #-}
cursorFromNode n = Cursor n mempty mempty mempty
cursorSiblings :: Cursor -> [Node]
{-# INLINE cursorSiblings #-}
cursorSiblings (Cursor cur ls rs _) =
toList (Seq.reverse ls <> (cur Seq.<| rs))
cursorRemoveRight :: Cursor -> Maybe (Node, Cursor)
{-# INLINABLE cursorRemoveRight #-}
cursorRemoveRight = \case
Cursor n ls rs0 ps | not (Seq.null rs0) ->
case Seq.viewl rs0 of
r Seq.:< rs -> Just (r, Cursor n ls rs ps)
_ -> undefined
_ -> Nothing
cursorInsertManyRight :: [Node] -> Cursor -> Cursor
{-# INLINE cursorInsertManyRight #-}
cursorInsertManyRight ns (Cursor nn ls rs ps) =
Cursor nn ls (Seq.fromList ns <> rs) ps
encodeUtf8 :: T.Text -> BB.Builder
{-# INLINE encodeUtf8 #-}
encodeUtf8 = T.encodeUtf8Builder
encodeXmlUtf8 :: T.Text -> BB.Builder
{-# INLINE encodeXmlUtf8 #-}
encodeXmlUtf8 = T.encodeUtf8BuilderEscaped xmlEscaped
encodeXmlUtf8Lazy :: TL.Text -> BB.Builder
{-# INLINE encodeXmlUtf8Lazy #-}
encodeXmlUtf8Lazy = TL.encodeUtf8BuilderEscaped xmlEscaped
xmlEscaped :: BBP.BoundedPrim Word8
{-# INLINE xmlEscaped #-}
xmlEscaped =
BBP.condB (== 38) (fixed5 (38,(97,(109,(112,59))))) $
BBP.condB (== 60) (fixed4 (38,(108,(116,59)))) $
BBP.condB (== 62) (fixed4 (38,(103,(116,59)))) $
BBP.condB (== 34) (fixed5 (38,(35,(51,(52,59))))) $
BBP.liftFixedToBounded BBP.word8
where
{-# INLINE fixed4 #-}
fixed4 :: (Word8, (Word8, (Word8, Word8))) -> BBP.BoundedPrim Word8
fixed4 x = BBP.liftFixedToBounded
(const x BBP.>$< BBP.word8 BBP.>*< BBP.word8
BBP.>*< BBP.word8 BBP.>*< BBP.word8)
{-# INLINE fixed5 #-}
fixed5 :: (Word8, (Word8, (Word8, (Word8, Word8)))) -> BBP.BoundedPrim Word8
fixed5 x = BBP.liftFixedToBounded
(const x BBP.>$< BBP.word8 BBP.>*< BBP.word8
BBP.>*< BBP.word8 BBP.>*< BBP.word8 BBP.>*< BBP.word8)