{-# LANGUAGE BangPatterns               #-}
{-# LANGUAGE CPP                        #-}
{-# LANGUAGE DeriveDataTypeable         #-}
{-# LANGUAGE DeriveFunctor              #-}
{-# LANGUAGE FlexibleContexts           #-}
{-# LANGUAGE FlexibleInstances          #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE OverloadedStrings          #-}
{-# LANGUAGE PatternGuards              #-}
{-# LANGUAGE RankNTypes                 #-}
{-# LANGUAGE StandaloneDeriving         #-}
{-# LANGUAGE TypeFamilies               #-}
-- | This module provides both a native Haskell solution for parsing XML
-- documents into a stream of events, and a set of parser combinators for
-- dealing with a stream of events.
--
-- As a simple example:
--
-- >>> :set -XOverloadedStrings
-- >>> import Conduit (runConduit, (.|))
-- >>> import Data.Text (Text, unpack)
-- >>> import Data.XML.Types (Event)
-- >>> data Person = Person Int Text Text deriving Show
-- >>> :{
-- let parsePerson :: MonadThrow m => ConduitT Event o m (Maybe Person)
--     parsePerson = tag' "person" parseAttributes $ \(age, goodAtHaskell) -> do
--       name <- content
--       return $ Person (read $ unpack age) name goodAtHaskell
--       where parseAttributes = (,) <$> requireAttr "age" <*> requireAttr "goodAtHaskell" <* ignoreAttrs
--     parsePeople :: MonadThrow m => ConduitT Event o m (Maybe [Person])
--     parsePeople = tagNoAttr "people" $ many parsePerson
--     inputXml = mconcat
--       [ "<?xml version=\"1.0\" encoding=\"utf-8\"?>"
--       , "<people>"
--       , "  <person age=\"25\" goodAtHaskell=\"yes\">Michael</person>"
--       , "  <person age=\"2\" goodAtHaskell=\"might become\">Eliezer</person>"
--       , "</people>"
--       ]
-- :}
--
-- >>> runConduit $ parseLBS def inputXml .| force "people required" parsePeople
-- [Person 25 "Michael" "yes",Person 2 "Eliezer" "might become"]
--
--
-- This module also supports streaming results using 'yield'.
-- This allows parser results to be processed using conduits
-- while a particular parser (e.g. 'many') is still running.
-- Without using streaming results, you have to wait until the parser finished
-- before you can process the result list. Large XML files might be easier
-- to process by using streaming results.
-- See http://stackoverflow.com/q/21367423/2597135 for a related discussion.
--
-- >>> import Data.Conduit.List as CL
-- >>> :{
-- let parsePeople' :: MonadThrow m => ConduitT Event Person m (Maybe ())
--     parsePeople' = tagNoAttr "people" $ manyYield parsePerson
-- :}
--
-- >>> runConduit $ parseLBS def inputXml .| force "people required" parsePeople' .| CL.mapM_ print
-- Person 25 "Michael" "yes"
-- Person 2 "Eliezer" "might become"
--
-- Previous versions of this module contained a number of more sophisticated
-- functions written by Aristid Breitkreuz and Dmitry Olshansky. To keep this
-- package simpler, those functions are being moved to a separate package. This
-- note will be updated with the name of the package(s) when available.
module Text.XML.Stream.Parse
    ( -- * Parsing XML files
      parseBytes
    , parseBytesPos
    , parseText
    , parseTextPos
    , detectUtf
    , parseFile
    , parseLBS
      -- ** Parser settings
    , ParseSettings
    , def
    , DecodeEntities
    , DecodeIllegalCharacters
    , psDecodeEntities
    , psDecodeIllegalCharacters
    , psRetainNamespaces
    , psEntityExpansionSizeLimit
    , psIgnoreInternalEntityDeclarations
      -- *** Entity decoding
    , decodeXmlEntities
    , decodeHtmlEntities
      -- * Event parsing
    , tag
    , tag'
    , tagNoAttr
    , tagIgnoreAttrs
    , content
    , contentMaybe
      -- * Ignoring tags/trees
    , ignoreEmptyTag
    , ignoreTree
    , ignoreContent
    , ignoreTreeContent
    , ignoreAnyTreeContent
      -- * Streaming events
    , takeContent
    , takeTree
    , takeTreeContent
    , takeAnyTreeContent
      -- * Tag name matching
    , NameMatcher(..)
    , matching
    , anyOf
    , anyName
      -- * Attribute parsing
    , AttrParser
    , attr
    , requireAttr
    , optionalAttr
    , requireAttrRaw
    , optionalAttrRaw
    , ignoreAttrs
      -- * Combinators
    , orE
    , choose
    , many
    , many_
    , manyIgnore
    , many'
    , force
      -- * Streaming combinators
    , manyYield
    , manyYield'
    , manyIgnoreYield
      -- * Exceptions
    , XmlException (..)
      -- * Other types
    , PositionRange
    , EventPos
    ) where
import           Conduit
import           Control.Applicative          (Alternative (empty, (<|>)),
                                               Applicative (..), (<$>))
import qualified Control.Applicative          as A
import           Control.Arrow                ((***))
import           Control.Exception            (Exception (..), SomeException)
import           Control.Monad                (ap, liftM, void)
import           Control.Monad.Fix            (fix)
import           Control.Monad.IO.Class       (liftIO)
import           Control.Monad.Trans.Class    (lift)
import           Control.Monad.Trans.Maybe    (MaybeT (..))
import           Control.Monad.Trans.Resource (MonadResource, MonadThrow (..),
                                               throwM)
import           Data.Attoparsec.Internal     (concatReverse)
import           Data.Attoparsec.Text         (Parser, anyChar, char, manyTill,
                                               skipWhile, string, takeWhile,
                                               takeWhile1, (<?>),
                                               notInClass, skipMany, skipMany1,
                                               satisfy, peekChar)
import qualified Data.Attoparsec.Text         as AT
import qualified Data.ByteString              as S
import qualified Data.ByteString.Lazy         as L
import qualified Data.ByteString.Builder      as Builder
import           Data.Char                    (isSpace)
import           Data.Conduit.Attoparsec      (PositionRange, conduitParser)
import qualified Data.Conduit.Text            as CT
import           Data.Default.Class           (Default (..))
import           Data.List                    (foldl', intercalate)
import qualified Data.Map                     as Map
import           Data.Maybe                   (fromMaybe, isNothing, mapMaybe)
import           Data.String                  (IsString (..))
import           Data.Text                    (Text, pack)
import qualified Data.Text                    as T
import           Data.Text.Encoding           (decodeUtf8With)
import           Data.Text.Encoding.Error     (lenientDecode)
import           Data.Typeable                (Typeable)
import           Data.XML.Types               (Content (..), Event (..),
                                               ExternalID (..),
                                               Instruction (..), Name (..))
import           Prelude                      hiding (takeWhile)
import           Text.XML.Stream.Token

-- $setup
-- >>> :set -XOverloadedStrings
-- >>> import Conduit
-- >>> import Control.Monad (void, join)

type EntityTable = [(Text, Text)]

tokenToEvent :: ParseSettings -> EntityTable -> [NSLevel] -> Token -> (EntityTable, [NSLevel], [Event])
tokenToEvent :: ParseSettings
-> EntityTable
-> [NSLevel]
-> Token
-> (EntityTable, [NSLevel], [Event])
tokenToEvent ParseSettings
_ EntityTable
es [NSLevel]
n (TokenXMLDeclaration [TAttribute]
_) = (EntityTable
es, [NSLevel]
n, [])
tokenToEvent ParseSettings
_ EntityTable
es [NSLevel]
n (TokenInstruction Instruction
i) = (EntityTable
es, [NSLevel]
n, [Instruction -> Event
EventInstruction Instruction
i])
tokenToEvent ParseSettings
ps EntityTable
es [NSLevel]
n (TokenBeginElement TName
name [TAttribute]
as Bool
isClosed Int
_) =
    (EntityTable
es, [NSLevel]
n', if Bool
isClosed then [Event
begin, Event
end] else [Event
begin])
  where
    l0 :: NSLevel
l0 = case [NSLevel]
n of
            []  -> Maybe Text -> Map Text Text -> NSLevel
NSLevel forall a. Maybe a
Nothing forall k a. Map k a
Map.empty
            NSLevel
x:[NSLevel]
_ -> NSLevel
x
    ([TAttribute] -> [TAttribute]
as', NSLevel
l') = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' ([TAttribute] -> [TAttribute], NSLevel)
-> TAttribute -> ([TAttribute] -> [TAttribute], NSLevel)
go (forall a. a -> a
id, NSLevel
l0) [TAttribute]
as
    go :: ([TAttribute] -> [TAttribute], NSLevel)
-> TAttribute -> ([TAttribute] -> [TAttribute], NSLevel)
go ([TAttribute] -> [TAttribute]
front, NSLevel
l) (TName Maybe Text
kpref Text
kname, [Content]
val) =
        (([TAttribute] -> [TAttribute]) -> [TAttribute] -> [TAttribute]
addNS [TAttribute] -> [TAttribute]
front, NSLevel
l'')
      where
        isPrefixed :: Bool
isPrefixed = Maybe Text
kpref forall a. Eq a => a -> a -> Bool
== forall a. a -> Maybe a
Just Text
"xmlns"
        isUnprefixed :: Bool
isUnprefixed = forall a. Maybe a -> Bool
isNothing Maybe Text
kpref Bool -> Bool -> Bool
&& Text
kname forall a. Eq a => a -> a -> Bool
== Text
"xmlns"

        addNS :: ([TAttribute] -> [TAttribute]) -> [TAttribute] -> [TAttribute]
addNS
            | Bool -> Bool
not (ParseSettings -> Bool
psRetainNamespaces ParseSettings
ps) Bool -> Bool -> Bool
&& (Bool
isPrefixed Bool -> Bool -> Bool
|| Bool
isUnprefixed) = forall a. a -> a
id
            | Bool
otherwise = (forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((TName
tname, ParseSettings -> EntityTable -> [Content] -> [Content]
resolveEntities' ParseSettings
ps EntityTable
es [Content]
val)forall a. a -> [a] -> [a]
:))
          where
            resolveEntities' :: ParseSettings -> EntityTable -> [Content] -> [Content]
resolveEntities' ParseSettings
ps' EntityTable
es' [Content]
xs =
              forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe Token -> Maybe Content
extractTokenContent
                (ParseSettings -> EntityTable -> [Token] -> [Token]
resolveEntities ParseSettings
ps' EntityTable
es'
                  (forall a b. (a -> b) -> [a] -> [b]
map Content -> Token
TokenContent [Content]
xs))
            extractTokenContent :: Token -> Maybe Content
extractTokenContent (TokenContent Content
c) = forall a. a -> Maybe a
Just Content
c
            extractTokenContent Token
_ = forall a. Maybe a
Nothing

            tname :: TName
tname
                | Bool
isPrefixed = Maybe Text -> Text -> TName
TName forall a. Maybe a
Nothing (Text
"xmlns:" Text -> Text -> Text
`T.append` Text
kname)
                | Bool
otherwise = Maybe Text -> Text -> TName
TName Maybe Text
kpref Text
kname

        l'' :: NSLevel
l''
            | Bool
isPrefixed =
                NSLevel
l { prefixes :: Map Text Text
prefixes = forall k a. Ord k => k -> a -> Map k a -> Map k a
Map.insert Text
kname ([Content] -> Text
contentsToText [Content]
val)
                                     forall a b. (a -> b) -> a -> b
$ NSLevel -> Map Text Text
prefixes NSLevel
l }
            | Bool
isUnprefixed =
                NSLevel
l { defaultNS :: Maybe Text
defaultNS = if Text -> Bool
T.null forall a b. (a -> b) -> a -> b
$ [Content] -> Text
contentsToText [Content]
val
                                            then forall a. Maybe a
Nothing
                                            else forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ [Content] -> Text
contentsToText [Content]
val }
            | Bool
otherwise = NSLevel
l

    n' :: [NSLevel]
n' = if Bool
isClosed then [NSLevel]
n else NSLevel
l' forall a. a -> [a] -> [a]
: [NSLevel]
n
    fixAttName :: TAttribute -> (Name, [Content])
fixAttName (TName
name', [Content]
val) = (Bool -> NSLevel -> TName -> Name
tnameToName Bool
True NSLevel
l' TName
name', [Content]
val)
    elementName :: Name
elementName = Bool -> NSLevel -> TName -> Name
tnameToName Bool
False NSLevel
l' TName
name
    begin :: Event
begin = Name -> [(Name, [Content])] -> Event
EventBeginElement Name
elementName forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map TAttribute -> (Name, [Content])
fixAttName forall a b. (a -> b) -> a -> b
$ [TAttribute] -> [TAttribute]
as' []
    end :: Event
end = Name -> Event
EventEndElement Name
elementName
tokenToEvent ParseSettings
_ EntityTable
es [NSLevel]
n (TokenEndElement TName
name) =
    (EntityTable
es, [NSLevel]
n', [Name -> Event
EventEndElement forall a b. (a -> b) -> a -> b
$ Bool -> NSLevel -> TName -> Name
tnameToName Bool
False NSLevel
l TName
name])
  where
    (NSLevel
l, [NSLevel]
n') =
        case [NSLevel]
n of
            []   -> (Maybe Text -> Map Text Text -> NSLevel
NSLevel forall a. Maybe a
Nothing forall k a. Map k a
Map.empty, [])
            NSLevel
x:[NSLevel]
xs -> (NSLevel
x, [NSLevel]
xs)
tokenToEvent ParseSettings
ps EntityTable
es [NSLevel]
n tok :: Token
tok@(TokenContent c :: Content
c@(ContentEntity Text
e))
  = case forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
e EntityTable
es of
      Just Text
_  -> (EntityTable
es, [NSLevel]
n, forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Token -> [Event]
toEvents [Token]
newtoks)
      Maybe Text
Nothing -> (EntityTable
es, [NSLevel]
n, [Content -> Event
EventContent Content
c])
 where
  toEvents :: Token -> [Event]
toEvents Token
t =
    let (EntityTable
_, [NSLevel]
_, [Event]
events) = ParseSettings
-> EntityTable
-> [NSLevel]
-> Token
-> (EntityTable, [NSLevel], [Event])
tokenToEvent ParseSettings
ps [] [NSLevel]
n Token
t
     in [Event]
events
  newtoks :: [Token]
newtoks = ParseSettings -> EntityTable -> [Token] -> [Token]
resolveEntities ParseSettings
ps EntityTable
es [Token
tok]
tokenToEvent ParseSettings
_ EntityTable
es [NSLevel]
n (TokenContent Content
c) = (EntityTable
es, [NSLevel]
n, [Content -> Event
EventContent Content
c])
tokenToEvent ParseSettings
_ EntityTable
es [NSLevel]
n (TokenComment Text
c) = (EntityTable
es, [NSLevel]
n, [Text -> Event
EventComment Text
c])
tokenToEvent ParseSettings
_ EntityTable
es [NSLevel]
n (TokenDoctype Text
t Maybe ExternalID
eid EntityTable
es') = (EntityTable
es forall a. [a] -> [a] -> [a]
++ EntityTable
es', [NSLevel]
n, [Text -> Maybe ExternalID -> Event
EventBeginDoctype Text
t Maybe ExternalID
eid, Event
EventEndDoctype])
tokenToEvent ParseSettings
_ EntityTable
es [NSLevel]
n (TokenCDATA Text
t) = (EntityTable
es, [NSLevel]
n, [Text -> Event
EventCDATA Text
t])

resolveEntities :: ParseSettings
                -> EntityTable
                -> [Token]
                -> [Token]
resolveEntities :: ParseSettings -> EntityTable -> [Token] -> [Token]
resolveEntities ParseSettings
ps EntityTable
entities = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Token -> [Token] -> [Token]
go []
 where
  go :: Token -> [Token] -> [Token]
go tok :: Token
tok@(TokenContent (ContentEntity Text
e)) [Token]
toks
    = case EntityTable -> Text -> Maybe [Token]
expandEntity EntityTable
entities Text
e of
        Just [Token]
xs -> forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Token -> [Token] -> [Token]
go [Token]
toks [Token]
xs
        Maybe [Token]
Nothing ->  Token
tok forall a. a -> [a] -> [a]
: [Token]
toks
  go Token
tok [Token]
toks = Token
tok forall a. a -> [a] -> [a]
: [Token]
toks
  expandEntity :: EntityTable -> Text -> Maybe [Token]
expandEntity EntityTable
es Text
e
    | Just Text
t <- forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
e EntityTable
es =
      case forall a. Parser a -> Text -> Either String a
AT.parseOnly (forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill
                          (ParseSettings -> Parser Token
parseToken ParseSettings
ps :: Parser Token)
                          forall t. Chunk t => Parser t ()
AT.endOfInput) Text
t of
        Left String
_      -> forall a. Maybe a
Nothing
        Right [Token]
xs    -> -- recursively expand
                       let es' :: EntityTable
es' = forall a. (a -> Bool) -> [a] -> [a]
filter (\(Text
x,Text
_) -> Text
x forall a. Eq a => a -> a -> Bool
/= Text
e) EntityTable
es
                        in forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (EntityTable
-> Token -> Maybe ([Token], Int) -> Maybe ([Token], Int)
goent EntityTable
es') (forall a. a -> Maybe a
Just ([], Int
0)) [Token]
xs
          -- we delete e from the entity map in resolving its contents,
          -- to avoid infinite loops in recursive expansion.
    | Bool
otherwise     = forall a. Maybe a
Nothing
  goent :: EntityTable
-> Token -> Maybe ([Token], Int) -> Maybe ([Token], Int)
goent EntityTable
_ Token
_ Maybe ([Token], Int)
Nothing = forall a. Maybe a
Nothing
  goent EntityTable
es (TokenContent (ContentEntity Text
e)) (Just ([Token]
cs, Int
size))
    = EntityTable -> Text -> Maybe [Token]
expandEntity EntityTable
es Text
e forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (EntityTable
-> Token -> Maybe ([Token], Int) -> Maybe ([Token], Int)
goent EntityTable
es) (forall a. a -> Maybe a
Just ([Token]
cs, Int
size))
  goent EntityTable
_ Token
tok (Just ([Token]
toks, Int
size)) =
    let toksize :: Int
toksize = forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$
                  ByteString -> Int64
L.length (Builder -> ByteString
Builder.toLazyByteString (Token -> Builder
tokenToBuilder Token
tok))
     in case Int
size forall a. Num a => a -> a -> a
+ Int
toksize of
      Int
n | Int
n forall a. Ord a => a -> a -> Bool
> ParseSettings -> Int
psEntityExpansionSizeLimit ParseSettings
ps -> forall a. Maybe a
Nothing
        | Bool
otherwise -> forall a. a -> Maybe a
Just (Token
tokforall a. a -> [a] -> [a]
:[Token]
toks, Int
n)


tnameToName :: Bool -> NSLevel -> TName -> Name
tnameToName :: Bool -> NSLevel -> TName -> Name
tnameToName Bool
_ NSLevel
_ (TName (Just Text
"xml") Text
name) =
    Text -> Maybe Text -> Maybe Text -> Name
Name Text
name (forall a. a -> Maybe a
Just Text
"http://www.w3.org/XML/1998/namespace") (forall a. a -> Maybe a
Just Text
"xml")
tnameToName Bool
isAttr (NSLevel Maybe Text
def' Map Text Text
_) (TName Maybe Text
Nothing Text
name) =
    Text -> Maybe Text -> Maybe Text -> Name
Name Text
name (if Bool
isAttr then forall a. Maybe a
Nothing else Maybe Text
def') forall a. Maybe a
Nothing
tnameToName Bool
_ (NSLevel Maybe Text
_ Map Text Text
m) (TName (Just Text
pref) Text
name) =
    case forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
pref Map Text Text
m of
        Just Text
ns -> Text -> Maybe Text -> Maybe Text -> Name
Name Text
name (forall a. a -> Maybe a
Just Text
ns) (forall a. a -> Maybe a
Just Text
pref)
        Maybe Text
Nothing -> Text -> Maybe Text -> Maybe Text -> Name
Name Text
name forall a. Maybe a
Nothing (forall a. a -> Maybe a
Just Text
pref) -- FIXME is this correct?

-- | Automatically determine which UTF variant is being used. This function
-- first checks for BOMs, removing them as necessary, and then check for the
-- equivalent of <?xml for each of UTF-8, UTF-16LE/BE, and UTF-32LE/BE. It
-- defaults to assuming UTF-8.
detectUtf :: MonadThrow m => ConduitT S.ByteString T.Text m ()
detectUtf :: forall (m :: * -> *). MonadThrow m => ConduitT ByteString Text m ()
detectUtf =
    forall {m :: * -> *}.
MonadThrow m =>
(ByteString -> ByteString) -> ConduitT ByteString Text m ()
conduit forall a. a -> a
id
  where
    conduit :: (ByteString -> ByteString) -> ConduitT ByteString Text m ()
conduit ByteString -> ByteString
front = forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return ()) ((ByteString -> ByteString)
-> ByteString -> ConduitT ByteString Text m ()
push ByteString -> ByteString
front)

    push :: (ByteString -> ByteString)
-> ByteString -> ConduitT ByteString Text m ()
push ByteString -> ByteString
front ByteString
bss =
        forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (ByteString -> ByteString) -> ConduitT ByteString Text m ()
conduit
               (forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry forall (m :: * -> *).
MonadThrow m =>
ByteString -> Maybe Codec -> ConduitT ByteString Text m ()
checkXMLDecl)
               (forall {t}.
(t -> ByteString)
-> t -> Either (ByteString -> ByteString) (ByteString, Maybe Codec)
getEncoding ByteString -> ByteString
front ByteString
bss)

    getEncoding :: (t -> ByteString)
-> t -> Either (ByteString -> ByteString) (ByteString, Maybe Codec)
getEncoding t -> ByteString
front t
bs'
        | ByteString -> Int
S.length ByteString
bs forall a. Ord a => a -> a -> Bool
< Int
4 =
            forall a b. a -> Either a b
Left (ByteString
bs ByteString -> ByteString -> ByteString
`S.append`)
        | Bool
otherwise =
            forall a b. b -> Either a b
Right (ByteString
bsOut, Maybe Codec
mcodec)
      where
        bs :: ByteString
bs = t -> ByteString
front t
bs'
        bsOut :: ByteString
bsOut = ByteString -> ByteString -> ByteString
S.append (Int -> ByteString -> ByteString
S.drop Int
toDrop ByteString
x) ByteString
y
        (ByteString
x, ByteString
y) = Int -> ByteString -> (ByteString, ByteString)
S.splitAt Int
4 ByteString
bs
        (Int
toDrop, Maybe Codec
mcodec) =
            case ByteString -> [Word8]
S.unpack ByteString
x of
                [Word8
0x00, Word8
0x00, Word8
0xFE, Word8
0xFF] -> (Int
4, forall a. a -> Maybe a
Just Codec
CT.utf32_be)
                [Word8
0xFF, Word8
0xFE, Word8
0x00, Word8
0x00] -> (Int
4, forall a. a -> Maybe a
Just Codec
CT.utf32_le)
                Word8
0xFE : Word8
0xFF: [Word8]
_           -> (Int
2, forall a. a -> Maybe a
Just Codec
CT.utf16_be)
                Word8
0xFF : Word8
0xFE: [Word8]
_           -> (Int
2, forall a. a -> Maybe a
Just Codec
CT.utf16_le)
                Word8
0xEF : Word8
0xBB: Word8
0xBF : [Word8]
_    -> (Int
3, forall a. a -> Maybe a
Just Codec
CT.utf8)
                [Word8
0x00, Word8
0x00, Word8
0x00, Word8
0x3C] -> (Int
0, forall a. a -> Maybe a
Just Codec
CT.utf32_be)
                [Word8
0x3C, Word8
0x00, Word8
0x00, Word8
0x00] -> (Int
0, forall a. a -> Maybe a
Just Codec
CT.utf32_le)
                [Word8
0x00, Word8
0x3C, Word8
0x00, Word8
0x3F] -> (Int
0, forall a. a -> Maybe a
Just Codec
CT.utf16_be)
                [Word8
0x3C, Word8
0x00, Word8
0x3F, Word8
0x00] -> (Int
0, forall a. a -> Maybe a
Just Codec
CT.utf16_le)
                [Word8]
_                        -> (Int
0, forall a. Maybe a
Nothing) -- Assuming UTF-8

checkXMLDecl :: MonadThrow m
             => S.ByteString
             -> Maybe CT.Codec
             -> ConduitT S.ByteString T.Text m ()
checkXMLDecl :: forall (m :: * -> *).
MonadThrow m =>
ByteString -> Maybe Codec -> ConduitT ByteString Text m ()
checkXMLDecl ByteString
bs (Just Codec
codec) = forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover ByteString
bs forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *).
MonadThrow m =>
Codec -> ConduitT ByteString Text m ()
CT.decode Codec
codec
checkXMLDecl ByteString
bs0 Maybe Codec
Nothing =
    forall {m :: * -> *}.
MonadThrow m =>
[ByteString]
-> (Text -> IResult Text Token)
-> ByteString
-> ConduitT ByteString Text m ()
loop [] (forall a. Parser a -> Text -> Result a
AT.parse (ParseSettings -> Parser Token
parseToken forall a. Default a => a
def)) ByteString
bs0
  where
    loop :: [ByteString]
-> (Text -> IResult Text Token)
-> ByteString
-> ConduitT ByteString Text m ()
loop [ByteString]
chunks0 Text -> IResult Text Token
parser ByteString
nextChunk =
        case Text -> IResult Text Token
parser forall a b. (a -> b) -> a -> b
$ OnDecodeError -> ByteString -> Text
decodeUtf8With OnDecodeError
lenientDecode ByteString
nextChunk of
            AT.Fail{}                             -> ConduitT ByteString Text m ()
fallback
            AT.Partial Text -> IResult Text Token
f                          -> forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe ConduitT ByteString Text m ()
fallback ([ByteString]
-> (Text -> IResult Text Token)
-> ByteString
-> ConduitT ByteString Text m ()
loop [ByteString]
chunks Text -> IResult Text Token
f)
            AT.Done Text
_ (TokenXMLDeclaration [TAttribute]
attrs) -> [TAttribute] -> ConduitT ByteString Text m ()
findEncoding [TAttribute]
attrs
            AT.Done{}                             -> ConduitT ByteString Text m ()
fallback
      where
        chunks :: [ByteString]
chunks = ByteString
nextChunk forall a. a -> [a] -> [a]
: [ByteString]
chunks0
        fallback :: ConduitT ByteString Text m ()
fallback = Codec -> ConduitT ByteString Text m ()
complete Codec
CT.utf8
        complete :: Codec -> ConduitT ByteString Text m ()
complete Codec
codec = forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover [ByteString]
chunks forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *).
MonadThrow m =>
Codec -> ConduitT ByteString Text m ()
CT.decode Codec
codec

        findEncoding :: [TAttribute] -> ConduitT ByteString Text m ()
findEncoding [] = ConduitT ByteString Text m ()
fallback
        findEncoding ((TName Maybe Text
_ Text
"encoding", [ContentText Text
enc]):[TAttribute]
_) =
            case Text -> Text
T.toLower Text
enc of
                Text
"iso-8859-1" -> Codec -> ConduitT ByteString Text m ()
complete Codec
CT.iso8859_1
                Text
"utf-8"      -> Codec -> ConduitT ByteString Text m ()
complete Codec
CT.utf8
                Text
_            -> Codec -> ConduitT ByteString Text m ()
complete Codec
CT.utf8
        findEncoding (TAttribute
_:[TAttribute]
xs) = [TAttribute] -> ConduitT ByteString Text m ()
findEncoding [TAttribute]
xs

type EventPos = (Maybe PositionRange, Event)

-- | Parses a byte stream into 'Event's. This function is implemented fully in
-- Haskell using attoparsec-text for parsing. The produced error messages do
-- not give line/column information, so you may prefer to stick with the parser
-- provided by libxml-enumerator. However, this has the advantage of not
-- relying on any C libraries.
--
-- This relies on 'detectUtf' to determine character encoding, and 'parseText'
-- to do the actual parsing.
parseBytes :: MonadThrow m
           => ParseSettings
           -> ConduitT S.ByteString Event m ()
parseBytes :: forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT ByteString Event m ()
parseBytes = forall (m :: * -> *) o1 o2 i r.
Monad m =>
(o1 -> o2) -> ConduitT i o1 m r -> ConduitT i o2 m r
mapOutput forall a b. (a, b) -> b
snd forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT ByteString EventPos m ()
parseBytesPos

parseBytesPos :: MonadThrow m
              => ParseSettings
              -> ConduitT S.ByteString EventPos m ()
parseBytesPos :: forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT ByteString EventPos m ()
parseBytesPos ParseSettings
ps = forall (m :: * -> *). MonadThrow m => ConduitT ByteString Text m ()
detectUtf forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT Text EventPos m ()
parseTextPos ParseSettings
ps

dropBOM :: Monad m => ConduitT T.Text T.Text m ()
dropBOM :: forall (m :: * -> *). Monad m => ConduitT Text Text m ()
dropBOM =
    forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return ()) Text -> ConduitT Text Text m ()
push
  where
    push :: Text -> ConduitT Text Text m ()
push Text
t =
        case Text -> Maybe (Char, Text)
T.uncons Text
t of
            Maybe (Char, Text)
Nothing -> forall (m :: * -> *). Monad m => ConduitT Text Text m ()
dropBOM
            Just (Char
c, Text
cs) ->
                let output :: Text
output
                        | Char
c forall a. Eq a => a -> a -> Bool
== Char
'\xfeef' = Text
cs
                        | Bool
otherwise = Text
t
                 in forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Text
output forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall {o}. ConduitT o o m ()
idConduit
    idConduit :: ConduitT o o m ()
idConduit = forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return ()) (\o
x -> forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield o
x forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT o o m ()
idConduit)

-- | Parses a character stream into 'Event's. This function is implemented
-- fully in Haskell using attoparsec-text for parsing. The produced error
-- messages do not give line/column information, so you may prefer to stick
-- with the parser provided by libxml-enumerator. However, this has the
-- advantage of not relying on any C libraries.
--
-- Since 1.2.4
parseText :: MonadThrow m => ParseSettings -> ConduitT T.Text Event m ()
parseText :: forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT Text Event m ()
parseText = forall (m :: * -> *) o1 o2 i r.
Monad m =>
(o1 -> o2) -> ConduitT i o1 m r -> ConduitT i o2 m r
mapOutput forall a b. (a, b) -> b
snd forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT Text EventPos m ()
parseTextPos


-- | Same as 'parseText', but includes the position of each event.
--
-- Since 1.2.4
parseTextPos :: MonadThrow m
          => ParseSettings
          -> ConduitT T.Text EventPos m ()
parseTextPos :: forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT Text EventPos m ()
parseTextPos ParseSettings
de =
    forall (m :: * -> *). Monad m => ConduitT Text Text m ()
dropBOM
        forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| ConduitT Text (PositionRange, Token) m ()
tokenize
        forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *).
Monad m =>
ParseSettings -> ConduitT (PositionRange, Token) EventPos m ()
toEventC ParseSettings
de
        forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| ConduitT EventPos EventPos m ()
addBeginEnd
  where
    tokenize :: ConduitT Text (PositionRange, Token) m ()
tokenize = forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT Text (PositionRange, Token) m ()
conduitToken ParseSettings
de
    addBeginEnd :: ConduitT EventPos EventPos m ()
addBeginEnd = forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (forall a. Maybe a
Nothing, Event
EventBeginDocument) forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall {a}. ConduitT (Maybe a, Event) (Maybe a, Event) m ()
addEnd
    addEnd :: ConduitT (Maybe a, Event) (Maybe a, Event) m ()
addEnd = forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe
        (forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (forall a. Maybe a
Nothing, Event
EventEndDocument))
        (\(Maybe a, Event)
e -> forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Maybe a, Event)
e forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT (Maybe a, Event) (Maybe a, Event) m ()
addEnd)

toEventC :: Monad m => ParseSettings -> ConduitT (PositionRange, Token) EventPos m ()
toEventC :: forall (m :: * -> *).
Monad m =>
ParseSettings -> ConduitT (PositionRange, Token) EventPos m ()
toEventC ParseSettings
ps =
    EntityTable
-> [NSLevel] -> ConduitT (PositionRange, Token) EventPos m ()
go [] []
  where
    go :: EntityTable
-> [NSLevel] -> ConduitT (PositionRange, Token) EventPos m ()
go !EntityTable
es ![NSLevel]
levels =
        forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return ()) (PositionRange, Token)
-> ConduitT (PositionRange, Token) EventPos m ()
push
      where
        push :: (PositionRange, Token)
-> ConduitT (PositionRange, Token) EventPos m ()
push (PositionRange
position, Token
token) =
            forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield forall b c a. (b -> c) -> (a -> b) -> a -> c
. (,) (forall a. a -> Maybe a
Just PositionRange
position)) [Event]
events forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> EntityTable
-> [NSLevel] -> ConduitT (PositionRange, Token) EventPos m ()
go EntityTable
es' [NSLevel]
levels'
          where
            (EntityTable
es', [NSLevel]
levels', [Event]
events) = ParseSettings
-> EntityTable
-> [NSLevel]
-> Token
-> (EntityTable, [NSLevel], [Event])
tokenToEvent ParseSettings
ps EntityTable
es [NSLevel]
levels Token
token


type DecodeEntities = Text -> Content
type DecodeIllegalCharacters = Int -> Maybe Char

data ParseSettings = ParseSettings
    { ParseSettings -> DecodeEntities
psDecodeEntities          :: DecodeEntities
    , ParseSettings -> Bool
psRetainNamespaces        :: Bool
    -- ^ Whether the original xmlns attributes should be retained in the parsed
    -- values. For more information on motivation, see:
    --
    -- <https://github.com/snoyberg/xml/issues/38>
    --
    -- Default: False
    --
    -- Since 1.2.1
    , ParseSettings -> DecodeIllegalCharacters
psDecodeIllegalCharacters :: DecodeIllegalCharacters
    -- ^ How to decode illegal character references (@&#[0-9]+;@ or @&#x[0-9a-fA-F]+;@).
    --
    -- Character references within the legal ranges defined by <https://www.w3.org/TR/REC-xml/#NT-Char the standard> are automatically parsed.
    -- Others are passed to this function.
    --
    -- Default: @const Nothing@
    --
    -- Since 1.7.1
    , ParseSettings -> Int
psEntityExpansionSizeLimit :: Int
    -- ^ Maximum number of characters allowed in expanding an
    -- internal entity.  This is intended to protect against the
    -- billion laughs attack.
    --
    -- Default: @8192@
    --
    -- Since 1.9.1
    , ParseSettings -> Bool
psIgnoreInternalEntityDeclarations :: Bool
    -- ^ Whether to resolve any but the predefined entities.
    --
    -- Default: @False@
    }

instance Default ParseSettings where
    def :: ParseSettings
def = ParseSettings
        { psDecodeEntities :: DecodeEntities
psDecodeEntities = DecodeEntities
decodeXmlEntities
        , psRetainNamespaces :: Bool
psRetainNamespaces = Bool
False
        , psDecodeIllegalCharacters :: DecodeIllegalCharacters
psDecodeIllegalCharacters = forall a b. a -> b -> a
const forall a. Maybe a
Nothing
        , psEntityExpansionSizeLimit :: Int
psEntityExpansionSizeLimit = Int
8192
        , psIgnoreInternalEntityDeclarations :: Bool
psIgnoreInternalEntityDeclarations = Bool
False
        }

conduitToken :: MonadThrow m => ParseSettings -> ConduitT T.Text (PositionRange, Token) m ()
conduitToken :: forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT Text (PositionRange, Token) m ()
conduitToken = forall a (m :: * -> *) b.
(AttoparsecInput a, MonadThrow m) =>
Parser a b -> ConduitT a (PositionRange, b) m ()
conduitParser forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParseSettings -> Parser Token
parseToken

parseToken :: ParseSettings -> Parser Token
parseToken :: ParseSettings -> Parser Token
parseToken ParseSettings
settings = do
  Maybe Char
mbc <- Parser (Maybe Char)
peekChar
  case Maybe Char
mbc of
    Just Char
'<' -> Char -> Parser Char
char Char
'<' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
parseLt
    Maybe Char
_        -> Content -> Token
TokenContent forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParseSettings -> Bool -> Bool -> Parser Content
parseContent ParseSettings
settings Bool
False Bool
False
  where
    parseLt :: Parser Token
parseLt = do
        Maybe Char
mbc <- Parser (Maybe Char)
peekChar
        case Maybe Char
mbc of
          Just Char
'?' -> Char -> Parser ()
char' Char
'?' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
parseInstr
          Just Char
'!' -> Char -> Parser ()
char' Char
'!' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
                        (Parser Token
parseComment forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
parseCdata forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Token
parseDoctype)
          Just Char
'/' -> Char -> Parser ()
char' Char
'/' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Token
parseEnd
          Maybe Char
_        -> Parser Token
parseBegin
    parseInstr :: Parser Token
parseInstr = (do
        Text
name <- Parser Text
parseIdent
        if Text
name forall a. Eq a => a -> a -> Bool
== Text
"xml"
            then do
                [TAttribute]
as <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
A.many forall a b. (a -> b) -> a -> b
$ ParseSettings -> Parser TAttribute
parseAttribute ParseSettings
settings
                Parser ()
skipSpace
                Char -> Parser ()
char' Char
'?'
                Char -> Parser ()
char' Char
'>'
                Parser ()
newline forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return ()
                forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [TAttribute] -> Token
TokenXMLDeclaration [TAttribute]
as
            else do
                Parser ()
skipSpace
                Text
x <- String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser Char
anyChar (Text -> Parser Text
string Text
"?>")
                forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Instruction -> Token
TokenInstruction forall a b. (a -> b) -> a -> b
$ Text -> Text -> Instruction
Instruction Text
name Text
x)
          forall i a. Parser i a -> String -> Parser i a
<?> String
"instruction"
    parseComment :: Parser Token
parseComment = (do
        Char -> Parser ()
char' Char
'-'
        Char -> Parser ()
char' Char
'-'
        Text
c <- String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser Char
anyChar (Text -> Parser Text
string Text
"-->")
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Token
TokenComment Text
c) forall i a. Parser i a -> String -> Parser i a
<?> String
"comment"
    parseCdata :: Parser Token
parseCdata = (do
        Text
_ <- Text -> Parser Text
string Text
"[CDATA["
        Text
t <- String -> Text
T.pack forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser Char
anyChar (Text -> Parser Text
string Text
"]]>")
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Token
TokenCDATA Text
t) forall i a. Parser i a -> String -> Parser i a
<?> String
"CDATA"
    parseDoctype :: Parser Token
parseDoctype = (do
        Text
_ <- Text -> Parser Text
string Text
"DOCTYPE"
        Parser ()
skipSpace
        TName
name <- Parser TName
parseName
        let i :: Text
i =
                case TName
name of
                    TName Maybe Text
Nothing Text
x  -> Text
x
                    TName (Just Text
x) Text
y -> [Text] -> Text
T.concat [Text
x, Text
":", Text
y]
        Parser ()
skipSpace
        Maybe ExternalID
eid <- forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. a -> Maybe a
Just Parser Text ExternalID
parsePublicID forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
               forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. a -> Maybe a
Just Parser Text ExternalID
parseSystemID forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
               forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
        Parser ()
skipSpace
        Maybe Char
mbc <- Parser (Maybe Char)
peekChar
        EntityTable
ents <- case Maybe Char
mbc of
                  Just Char
'[' ->
                    do Char -> Parser ()
char' Char
'['
                       EntityTable
ents <- forall {c}. (EntityTable -> c) -> Parser Text c
parseDeclarations forall a. a -> a
id
                       Parser ()
skipSpace
                       if ParseSettings -> Bool
psIgnoreInternalEntityDeclarations ParseSettings
settings then
                         forall (m :: * -> *) a. Monad m => a -> m a
return []
                       else
                         forall (m :: * -> *) a. Monad m => a -> m a
return EntityTable
ents
                  Maybe Char
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return []
        Char -> Parser ()
char' Char
'>'
        Parser ()
newline forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return ()
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Text -> Maybe ExternalID -> EntityTable -> Token
TokenDoctype Text
i Maybe ExternalID
eid EntityTable
ents) forall i a. Parser i a -> String -> Parser i a
<?> String
"DOCTYPE"
    parseDeclarations :: (EntityTable -> c) -> Parser Text c
parseDeclarations EntityTable -> c
front =  -- we ignore everything but ENTITY
        (Char -> Parser ()
char' Char
']' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (EntityTable -> c
front [])) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
        (Parser Text (EntityTable -> EntityTable)
parseEntity forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \EntityTable -> EntityTable
f -> (EntityTable -> c) -> Parser Text c
parseDeclarations (EntityTable -> c
front forall b c a. (b -> c) -> (a -> b) -> a -> c
. EntityTable -> EntityTable
f)) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
        (Text -> Parser Text
string Text
"<!--" forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill Parser Char
anyChar (Text -> Parser Text
string Text
"-->") forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
           (EntityTable -> c) -> Parser Text c
parseDeclarations EntityTable -> c
front) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
         -- this clause handles directives like <!ELEMENT
         -- and processing instructions:
        (do Char -> Parser ()
char' Char
'<'
            forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany
               (forall (f :: * -> *) a. Functor f => f a -> f ()
void ((Char -> Bool) -> Parser Text
takeWhile1 (String -> Char -> Bool
notInClass String
"]<>'\"")) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (f :: * -> *) a. Functor f => f a -> f ()
void Parser Text
quotedText)
            Char -> Parser ()
char' Char
'>'
            (EntityTable -> c) -> Parser Text c
parseDeclarations EntityTable -> c
front) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
        (forall (f :: * -> *) a. Alternative f => f a -> f ()
skipMany1 ((Char -> Bool) -> Parser Char
satisfy (String -> Char -> Bool
notInClass String
"]<>")) forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
            (EntityTable -> c) -> Parser Text c
parseDeclarations EntityTable -> c
front)
    parseEntity :: Parser Text (EntityTable -> EntityTable)
parseEntity = (do
        Text
_ <- Text -> Parser Text
string Text
"<!ENTITY"
        Parser ()
skipSpace
        Bool
isParameterEntity <- forall (f :: * -> *) a. Alternative f => a -> f a -> f a
AT.option Bool
False (Bool
True forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ (Char -> Parser ()
char' Char
'%' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ()
skipSpace))
        Text
i <- Parser Text
parseIdent
        Text
t <- Parser Text
quotedText
        Parser ()
skipSpace
        Char -> Parser ()
char' Char
'>'
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
          if Bool
isParameterEntity
             then forall a. a -> a
id
             else ((Text
i, Text
t)forall a. a -> [a] -> [a]
:)) forall i a. Parser i a -> String -> Parser i a
<?> String
"entity"
    parsePublicID :: Parser Text ExternalID
parsePublicID = Text -> Text -> ExternalID
PublicID forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Text -> Parser Text
string Text
"PUBLIC" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
quotedText) forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Text
quotedText
    parseSystemID :: Parser Text ExternalID
parseSystemID = Text -> ExternalID
SystemID forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Text -> Parser Text
string Text
"SYSTEM" forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser Text
quotedText)
    quotedText :: Parser Text
quotedText = (do
        Parser ()
skipSpace
        Char -> Parser Text
between Char
'"' forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Text
between Char
'\'') forall i a. Parser i a -> String -> Parser i a
<?> String
"quoted text"
    between :: Char -> Parser Text
between Char
c = do
        Char -> Parser ()
char' Char
c
        Text
x <- (Char -> Bool) -> Parser Text
takeWhile (forall a. Eq a => a -> a -> Bool
/=Char
c)
        Char -> Parser ()
char' Char
c
        forall (m :: * -> *) a. Monad m => a -> m a
return Text
x
    parseEnd :: Parser Token
parseEnd = (do
        Parser ()
skipSpace
        TName
n <- Parser TName
parseName
        Parser ()
skipSpace
        Char -> Parser ()
char' Char
'>'
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ TName -> Token
TokenEndElement TName
n) forall i a. Parser i a -> String -> Parser i a
<?> String
"close tag"
    parseBegin :: Parser Token
parseBegin = (do
        Parser ()
skipSpace
        TName
n <- Parser TName
parseName
        [TAttribute]
as <- forall (f :: * -> *) a. Alternative f => f a -> f [a]
A.many forall a b. (a -> b) -> a -> b
$ ParseSettings -> Parser TAttribute
parseAttribute ParseSettings
settings
        Parser ()
skipSpace
        Bool
isClose <- (Char -> Parser Char
char Char
'/' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return Bool
True) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return Bool
False
        Char -> Parser ()
char' Char
'>'
        forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ TName -> [TAttribute] -> Bool -> Int -> Token
TokenBeginElement TName
n [TAttribute]
as Bool
isClose Int
0) forall i a. Parser i a -> String -> Parser i a
<?> String
"open tag"

parseAttribute :: ParseSettings -> Parser TAttribute
parseAttribute :: ParseSettings -> Parser TAttribute
parseAttribute ParseSettings
settings = (do
    Parser ()
skipSpace
    TName
key <- Parser TName
parseName
    Parser ()
skipSpace
    Char -> Parser ()
char' Char
'='
    Parser ()
skipSpace
    [Content]
val <- Parser Text [Content]
squoted forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Text [Content]
dquoted
    forall (m :: * -> *) a. Monad m => a -> m a
return (TName
key, [Content]
val)) forall i a. Parser i a -> String -> Parser i a
<?> String
"attribute"
  where
    squoted :: Parser Text [Content]
squoted = Char -> Parser Char
char Char
'\'' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill (ParseSettings -> Bool -> Bool -> Parser Content
parseContent ParseSettings
settings Bool
False Bool
True) (Char -> Parser Char
char Char
'\'')
    dquoted :: Parser Text [Content]
dquoted = Char -> Parser Char
char  Char
'"' forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
manyTill (ParseSettings -> Bool -> Bool -> Parser Content
parseContent ParseSettings
settings Bool
True Bool
False) (Char -> Parser Char
char  Char
'"')

parseName :: Parser TName
parseName :: Parser TName
parseName =
  (Text -> Maybe Text -> TName
name forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text
parseIdent forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> forall (f :: * -> *) a. Alternative f => f a -> f (Maybe a)
A.optional (Char -> Parser Char
char Char
':' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text
parseIdent)) forall i a. Parser i a -> String -> Parser i a
<?> String
"name"
  where
    name :: Text -> Maybe Text -> TName
name Text
i1 Maybe Text
Nothing   = Maybe Text -> Text -> TName
TName forall a. Maybe a
Nothing Text
i1
    name Text
i1 (Just Text
i2) = Maybe Text -> Text -> TName
TName (forall a. a -> Maybe a
Just Text
i1) Text
i2

parseIdent :: Parser Text
parseIdent :: Parser Text
parseIdent = (Char -> Bool) -> Parser Text
takeWhile1 Char -> Bool
valid forall i a. Parser i a -> String -> Parser i a
<?> String
"identifier"
  where
    valid :: Char -> Bool
valid Char
'&'  = Bool
False
    valid Char
'<'  = Bool
False
    valid Char
'>'  = Bool
False
    valid Char
':'  = Bool
False
    valid Char
'?'  = Bool
False
    valid Char
'='  = Bool
False
    valid Char
'"'  = Bool
False
    valid Char
'\'' = Bool
False
    valid Char
'/'  = Bool
False
    valid Char
';'  = Bool
False
    valid Char
'#'  = Bool
False
    valid Char
'['  = Bool
False
    valid Char
']'  = Bool
False
    valid Char
c    = Bool -> Bool
not forall a b. (a -> b) -> a -> b
$ Char -> Bool
isXMLSpace Char
c

parseContent :: ParseSettings
             -> Bool -- break on double quote
             -> Bool -- break on single quote
             -> Parser Content
parseContent :: ParseSettings -> Bool -> Bool -> Parser Content
parseContent (ParseSettings DecodeEntities
decodeEntities Bool
_ DecodeIllegalCharacters
decodeIllegalCharacters Int
_ Bool
_) Bool
breakDouble Bool
breakSingle = Parser Content
parseReference forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Parser Content
parseTextContent forall i a. Parser i a -> String -> Parser i a
<?> String
"text content") where
  parseReference :: Parser Content
parseReference = do
    Char -> Parser ()
char' Char
'&'
    Content
t <- Parser Content
parseEntityRef forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Content
parseHexCharRef forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser Content
parseDecCharRef
    Char -> Parser ()
char' Char
';'
    forall (m :: * -> *) a. Monad m => a -> m a
return Content
t
  parseEntityRef :: Parser Content
parseEntityRef = do
    TName Maybe Text
ma Text
b <- Parser TName
parseName
    let name :: Text
name = forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" (Text -> Text -> Text
`T.append` Text
":") Maybe Text
ma Text -> Text -> Text
`T.append` Text
b
    forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case Text
name of
      Text
"lt"   -> DecodeEntities
ContentText Text
"<"
      Text
"gt"   -> DecodeEntities
ContentText Text
">"
      Text
"amp"  -> DecodeEntities
ContentText Text
"&"
      Text
"quot" -> DecodeEntities
ContentText Text
"\""
      Text
"apos" -> DecodeEntities
ContentText Text
"'"
      Text
_      -> DecodeEntities
decodeEntities Text
name
  parseHexCharRef :: Parser Content
parseHexCharRef = do
    forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ Text -> Parser Text
string Text
"#x"
    Int
n <- forall a. (Integral a, Bits a) => Parser a
AT.hexadecimal
    case DecodeIllegalCharacters
toValidXmlChar Int
n forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> DecodeIllegalCharacters
decodeIllegalCharacters Int
n of
      Maybe Char
Nothing -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Invalid character from hexadecimal character reference."
      Just Char
c  -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ DecodeEntities
ContentText forall a b. (a -> b) -> a -> b
$ Char -> Text
T.singleton Char
c
  parseDecCharRef :: Parser Content
parseDecCharRef = do
    forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ Text -> Parser Text
string Text
"#"
    Int
n <- forall a. Integral a => Parser a
AT.decimal
    case DecodeIllegalCharacters
toValidXmlChar Int
n forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> DecodeIllegalCharacters
decodeIllegalCharacters Int
n of
      Maybe Char
Nothing -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"Invalid character from decimal character reference."
      Just Char
c  -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ DecodeEntities
ContentText forall a b. (a -> b) -> a -> b
$ Char -> Text
T.singleton Char
c

  -- Turns @\r\n@ and @\r@ into @\n@. See
  -- <https://www.w3.org/TR/REC-xml/#sec-line-ends>.
  parseTextContent :: Parser Content
parseTextContent = do
    -- Read until the end of this piece of content
    -- OR until a carriage return. In the second case, we use
    -- handleCR to normalize \r and \r\n into \n.
    Text
firstChunk <- (Char -> Bool) -> Parser Text
takeWhile Char -> Bool
valid
    Maybe Char
mbC <- Parser (Maybe Char)
peekChar
    case Maybe Char
mbC of
      Just Char
'\r' ->
        [Text] -> Parser Content
handleCR [Text
firstChunk]
      Maybe Char
_ ->
        forall {m :: * -> *}. MonadFail m => Text -> m Content
exit Text
firstChunk

  -- This is a duplication of the logic above and could be used instead.
  -- Specialising these cases to the case "full text content contains no carriage return"
  -- considerably speeds up execution when no carriage returns are in the original source.
  handleCRPeek :: [Text] -> Parser Content
handleCRPeek [Text]
chunks = do
    Maybe Char
mbC <- Parser (Maybe Char)
peekChar
    case Maybe Char
mbC of
      Just Char
'\r' ->
        [Text] -> Parser Content
handleCR [Text]
chunks
      Maybe Char
_ ->
        forall {m :: * -> *}. MonadFail m => [Text] -> m Content
exit' [Text]
chunks

  handleCR :: [Text] -> Parser Content
handleCR [Text]
chunks = do
    -- We know that the next character is a carriage return. Discard it.
    Char
_ <- Parser Char
anyChar
    -- Read the next chunk.
    Text
chunk <- (Char -> Bool) -> Parser Text
takeWhile Char -> Bool
valid
    case Text -> Maybe (Char, Text)
T.uncons Text
chunk of
      -- If it starts with newline, we're good:
      -- We've already discarded the carriage return.
      -- This is the case that replaces \r\n by \n.
      Just (Char
'\n', Text
_) ->
        [Text] -> Parser Content
handleCRPeek forall a b. (a -> b) -> a -> b
$ Text
chunk forall a. a -> [a] -> [a]
: [Text]
chunks
      -- Otherwise, we'll have to insert a newline.
      -- This is the case that replaces \r by \n.
      Just (Char, Text)
_ ->
        [Text] -> Parser Content
handleCRPeek forall a b. (a -> b) -> a -> b
$ Text
chunk forall a. a -> [a] -> [a]
: Text
"\n" forall a. a -> [a] -> [a]
: [Text]
chunks
      -- If the chunk is empty, we've either hit another carriage
      -- return or the end of this piece of content. Since we've discarded
      -- a carriage return we need to insert a newline.
      Maybe (Char, Text)
Nothing ->
        [Text] -> Parser Content
handleCRPeek forall a b. (a -> b) -> a -> b
$ Text
"\n" forall a. a -> [a] -> [a]
: [Text]
chunks


  -- exit and exit' fail if the emitted text content is empty.
  -- exit' uses Data.Text.concat to efficiently concatenate the collected
  -- chunks.
  exit :: Text -> m Content
exit Text
c
    | Text -> Bool
T.null Text
c = forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
"parseTextContent"
    | Bool
otherwise = forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$ DecodeEntities
ContentText Text
c

  exit' :: [Text] -> m Content
exit' [Text]
cs = forall {m :: * -> *}. MonadFail m => Text -> m Content
exit forall a b. (a -> b) -> a -> b
$ [Text] -> Text
T.concat forall a b. (a -> b) -> a -> b
$ forall a. [a] -> [a]
reverse [Text]
cs

  -- Check whether a character is valid text content (e.g. not a <)
  -- OR a carriage return. The latter is used above in parseTextContent
  -- to normalize line endings.
  valid :: Char -> Bool
valid Char
'"'  = Bool -> Bool
not Bool
breakDouble
  valid Char
'\'' = Bool -> Bool
not Bool
breakSingle
  valid Char
'&'  = Bool
False -- amp
  valid Char
'<'  = Bool
False -- lt
  valid Char
'\r' = Bool
False
  valid Char
_    = Bool
True

-- | Is this codepoint a valid XML character? See
-- <https://www.w3.org/TR/xml/#charsets>. This is proudly XML 1.0 only.
toValidXmlChar :: Int -> Maybe Char
toValidXmlChar :: DecodeIllegalCharacters
toValidXmlChar Int
n
  | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Int, Int) -> Bool
checkRange [(Int, Int)]
ranges = forall a. a -> Maybe a
Just (forall a. Enum a => Int -> a
toEnum Int
n)
  | Bool
otherwise = forall a. Maybe a
Nothing
  where
    --Inclusive lower bound, inclusive upper bound.
    ranges :: [(Int, Int)]
    ranges :: [(Int, Int)]
ranges =
      [ (Int
0x9, Int
0xA)
      , (Int
0xD, Int
0xD)
      , (Int
0x20, Int
0xD7FF)
      , (Int
0xE000, Int
0xFFFD)
      , (Int
0x10000, Int
0x10FFFF)
      ]
    checkRange :: (Int, Int) -> Bool
checkRange (Int
lb, Int
ub) = Int
lb forall a. Ord a => a -> a -> Bool
<= Int
n Bool -> Bool -> Bool
&& Int
n forall a. Ord a => a -> a -> Bool
<= Int
ub

skipSpace :: Parser ()
skipSpace :: Parser ()
skipSpace = (Char -> Bool) -> Parser ()
skipWhile Char -> Bool
isXMLSpace

-- | Determines whether a character is an XML white space. The list of
-- white spaces is given by
--
-- >  S ::= (#x20 | #x9 | #xD | #xA)+
--
-- in <http://www.w3.org/TR/2008/REC-xml-20081126/#sec-common-syn>.
isXMLSpace :: Char -> Bool
isXMLSpace :: Char -> Bool
isXMLSpace Char
' '  = Bool
True
isXMLSpace Char
'\t' = Bool
True
isXMLSpace Char
'\r' = Bool
True
isXMLSpace Char
'\n' = Bool
True
isXMLSpace Char
_    = Bool
False

newline :: Parser ()
newline :: Parser ()
newline = forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ (Char -> Parser Char
char Char
'\r' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser Char
char Char
'\n') forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Char -> Parser Char
char Char
'\n'

char' :: Char -> Parser ()
char' :: Char -> Parser ()
char' = forall (f :: * -> *) a. Functor f => f a -> f ()
void forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Parser Char
char

data ContentType = Ignore | IsContent Text | IsError String | NotContent

-- | Grabs the next piece of content if available. This function skips over any
-- comments, instructions or entities, and concatenates all content until the next start
-- or end tag.
contentMaybe :: MonadThrow m => ConduitT Event o m (Maybe Text)
contentMaybe :: forall (m :: * -> *) o.
MonadThrow m =>
ConduitT Event o m (Maybe Text)
contentMaybe = do
    Maybe Event
x <- forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
peekC
    case Maybe Event -> ContentType
pc' Maybe Event
x of
        ContentType
Ignore      -> forall (m :: * -> *) a o. Monad m => Int -> ConduitT a o m ()
dropC Int
1 forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) o.
MonadThrow m =>
ConduitT Event o m (Maybe Text)
contentMaybe
        IsContent Text
t -> forall (m :: * -> *) a o. Monad m => Int -> ConduitT a o m ()
dropC Int
1 forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a. a -> Maybe a
Just (forall {m :: * -> *} {o}.
MonadThrow m =>
([Text] -> [Text]) -> ConduitT Event o m Text
takeContents (Text
tforall a. a -> [a] -> [a]
:))
        IsError String
e   -> forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM forall a b. (a -> b) -> a -> b
$ String -> Maybe Event -> XmlException
InvalidEntity String
e Maybe Event
x
        ContentType
NotContent  -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
  where
    pc' :: Maybe Event -> ContentType
pc' Maybe Event
Nothing  = ContentType
NotContent
    pc' (Just Event
x) = Event -> ContentType
pc Event
x
    pc :: Event -> ContentType
pc (EventContent (ContentText Text
t))   = Text -> ContentType
IsContent Text
t
    pc (EventContent (ContentEntity Text
e)) = String -> ContentType
IsError forall a b. (a -> b) -> a -> b
$ String
"Unknown entity: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Text
e
    pc (EventCDATA Text
t)                   = Text -> ContentType
IsContent Text
t
    pc EventBeginElement{}              = ContentType
NotContent
    pc EventEndElement{}                = ContentType
NotContent
    pc EventBeginDocument{}             = ContentType
Ignore
    pc Event
EventEndDocument                 = ContentType
Ignore
    pc EventBeginDoctype{}              = ContentType
Ignore
    pc Event
EventEndDoctype                  = ContentType
Ignore
    pc EventInstruction{}               = ContentType
Ignore
    pc EventComment{}                   = ContentType
Ignore
    takeContents :: ([Text] -> [Text]) -> ConduitT Event o m Text
takeContents [Text] -> [Text]
front = do
        Maybe Event
x <- forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
peekC
        case Maybe Event -> ContentType
pc' Maybe Event
x of
            ContentType
Ignore      -> forall (m :: * -> *) a o. Monad m => Int -> ConduitT a o m ()
dropC Int
1 forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ([Text] -> [Text]) -> ConduitT Event o m Text
takeContents [Text] -> [Text]
front
            IsContent Text
t -> forall (m :: * -> *) a o. Monad m => Int -> ConduitT a o m ()
dropC Int
1 forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ([Text] -> [Text]) -> ConduitT Event o m Text
takeContents ([Text] -> [Text]
front forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:) Text
t)
            IsError String
e   -> forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM forall a b. (a -> b) -> a -> b
$ String -> Maybe Event -> XmlException
InvalidEntity String
e Maybe Event
x
            ContentType
NotContent  -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [Text] -> Text
T.concat forall a b. (a -> b) -> a -> b
$ [Text] -> [Text]
front []

-- | Grabs the next piece of content. If none if available, returns 'T.empty'.
-- This is simply a wrapper around 'contentMaybe'.
content :: MonadThrow m => ConduitT Event o m Text
content :: forall (m :: * -> *) o. MonadThrow m => ConduitT Event o m Text
content = forall a. a -> Maybe a -> a
fromMaybe Text
T.empty forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) o.
MonadThrow m =>
ConduitT Event o m (Maybe Text)
contentMaybe


isWhitespace :: Event -> Bool
isWhitespace :: Event -> Bool
isWhitespace Event
EventBeginDocument             = Bool
True
isWhitespace Event
EventEndDocument               = Bool
True
isWhitespace EventBeginDoctype{}            = Bool
True
isWhitespace Event
EventEndDoctype                = Bool
True
isWhitespace EventInstruction{}             = Bool
True
isWhitespace (EventContent (ContentText Text
t)) = (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isSpace Text
t
isWhitespace EventComment{}                 = Bool
True
isWhitespace (EventCDATA Text
t)                 = (Char -> Bool) -> Text -> Bool
T.all Char -> Bool
isSpace Text
t
isWhitespace Event
_                              = Bool
False


-- | The most generic way to parse a tag. It takes a 'NameMatcher' to check whether
-- this is a correct tag name, an 'AttrParser' to handle attributes, and
-- then a parser to deal with content.
--
-- 'Events' are consumed if and only if the tag name and its attributes match.
--
-- This function automatically absorbs its balancing closing tag, and will
-- throw an exception if not all of the attributes or child elements are
-- consumed. If you want to allow extra attributes, see 'ignoreAttrs'.
--
-- This function automatically ignores comments, instructions and whitespace.
tag :: MonadThrow m
    => NameMatcher a -- ^ Check if this is a correct tag name
                     --   and return a value that can be used to get an @AttrParser@.
                     --   If this fails, the function will return @Nothing@
    -> (a -> AttrParser b) -- ^ Given the value returned by the name checker, this function will
                           --   be used to get an @AttrParser@ appropriate for the specific tag.
                           --   If the @AttrParser@ fails, the function will also return @Nothing@
    -> (b -> ConduitT Event o m c) -- ^ Handler function to handle the attributes and children
                                   --   of a tag, given the value return from the @AttrParser@
    -> ConduitT Event o m (Maybe c)
tag :: forall (m :: * -> *) a b o c.
MonadThrow m =>
NameMatcher a
-> (a -> AttrParser b)
-> (b -> ConduitT Event o m c)
-> ConduitT Event o m (Maybe c)
tag NameMatcher a
nameMatcher a -> AttrParser b
attrParser b -> ConduitT Event o m c
f = do
  (Maybe Event
x, [Event]
leftovers) <- forall {m :: * -> *} {o}.
Monad m =>
[Event] -> ConduitT Event o m (Maybe Event, [Event])
dropWS []
  Maybe c
res <- case Maybe Event
x of
    Just (EventBeginElement Name
name [(Name, [Content])]
as) -> case forall a. NameMatcher a -> Name -> Maybe a
runNameMatcher NameMatcher a
nameMatcher Name
name of
      Just a
y -> case forall {b}.
AttrParser b -> [(Name, [Content])] -> Either SomeException b
runAttrParser' (a -> AttrParser b
attrParser a
y) [(Name, [Content])]
as of
        Left SomeException
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
        Right b
z -> do
          c
z' <- b -> ConduitT Event o m c
f b
z
          (Maybe Event
a, [Event]
_leftovers') <- forall {m :: * -> *} {o}.
Monad m =>
[Event] -> ConduitT Event o m (Maybe Event, [Event])
dropWS []
          case Maybe Event
a of
            Just (EventEndElement Name
name')
              | Name
name forall a. Eq a => a -> a -> Bool
== Name
name' -> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just c
z')
            Maybe Event
_ -> forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM forall a b. (a -> b) -> a -> b
$ Name -> Maybe Event -> XmlException
InvalidEndElement Name
name Maybe Event
a
      Maybe a
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
    Maybe Event
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing

  case Maybe c
res of
    -- Did not parse, put back all of the leading whitespace events and the
    -- final observed event generated by dropWS
    Maybe c
Nothing -> forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover [Event]
leftovers
    -- Parse succeeded, discard all of those whitespace events and the
    -- first parsed event
    Maybe c
_       -> forall (m :: * -> *) a. Monad m => a -> m a
return ()

  forall (m :: * -> *) a. Monad m => a -> m a
return Maybe c
res
  where
    -- Drop Events until we encounter a non-whitespace element. Return all of
    -- the events consumed here (including the first non-whitespace event) so
    -- that the calling function can treat them as leftovers if the parse fails
    dropWS :: [Event] -> ConduitT Event o m (Maybe Event, [Event])
dropWS [Event]
leftovers = do
        Maybe Event
x <- forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await
        let leftovers' :: [Event]
leftovers' = forall b a. b -> (a -> b) -> Maybe a -> b
maybe forall a. a -> a
id (:) Maybe Event
x [Event]
leftovers

        case Event -> Bool
isWhitespace forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe Event
x of
          Just Bool
True -> [Event] -> ConduitT Event o m (Maybe Event, [Event])
dropWS [Event]
leftovers'
          Maybe Bool
_         -> forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe Event
x, [Event]
leftovers')
    runAttrParser' :: AttrParser b -> [(Name, [Content])] -> Either SomeException b
runAttrParser' AttrParser b
p [(Name, [Content])]
as =
        case forall a.
AttrParser a
-> [(Name, [Content])]
-> Either SomeException ([(Name, [Content])], a)
runAttrParser AttrParser b
p [(Name, [Content])]
as of
            Left SomeException
e           -> forall a b. a -> Either a b
Left SomeException
e
            Right ([], b
x)    -> forall a b. b -> Either a b
Right b
x
            Right ([(Name, [Content])]
attr', b
_) -> forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ forall e. Exception e => e -> SomeException
toException forall a b. (a -> b) -> a -> b
$ [(Name, [Content])] -> XmlException
UnparsedAttributes [(Name, [Content])]
attr'

-- | A simplified version of 'tag' where the 'NameMatcher' result isn't forwarded to the attributes parser.
--
-- Since 1.5.0
tag' :: MonadThrow m
     => NameMatcher a -> AttrParser b -> (b -> ConduitT Event o m c)
     -> ConduitT Event o m (Maybe c)
tag' :: forall (m :: * -> *) a b o c.
MonadThrow m =>
NameMatcher a
-> AttrParser b
-> (b -> ConduitT Event o m c)
-> ConduitT Event o m (Maybe c)
tag' NameMatcher a
a AttrParser b
b = forall (m :: * -> *) a b o c.
MonadThrow m =>
NameMatcher a
-> (a -> AttrParser b)
-> (b -> ConduitT Event o m c)
-> ConduitT Event o m (Maybe c)
tag NameMatcher a
a (forall a b. a -> b -> a
const AttrParser b
b)

-- | A further simplified tag parser, which requires that no attributes exist.
tagNoAttr :: MonadThrow m
          => NameMatcher a -- ^ Check if this is a correct tag name
          -> ConduitT Event o m b -- ^ Handler function to handle the children of the matched tag
          -> ConduitT Event o m (Maybe b)
tagNoAttr :: forall (m :: * -> *) a o b.
MonadThrow m =>
NameMatcher a
-> ConduitT Event o m b -> ConduitT Event o m (Maybe b)
tagNoAttr NameMatcher a
name ConduitT Event o m b
f = forall (m :: * -> *) a b o c.
MonadThrow m =>
NameMatcher a
-> AttrParser b
-> (b -> ConduitT Event o m c)
-> ConduitT Event o m (Maybe c)
tag' NameMatcher a
name (forall (m :: * -> *) a. Monad m => a -> m a
return ()) forall a b. (a -> b) -> a -> b
$ forall a b. a -> b -> a
const ConduitT Event o m b
f


-- | A further simplified tag parser, which ignores all attributes, if any exist
tagIgnoreAttrs :: MonadThrow m
               => NameMatcher a -- ^ Check if this is a correct tag name
               -> ConduitT Event o m b -- ^ Handler function to handle the children of the matched tag
               -> ConduitT Event o m (Maybe b)
tagIgnoreAttrs :: forall (m :: * -> *) a o b.
MonadThrow m =>
NameMatcher a
-> ConduitT Event o m b -> ConduitT Event o m (Maybe b)
tagIgnoreAttrs NameMatcher a
name ConduitT Event o m b
f = forall (m :: * -> *) a b o c.
MonadThrow m =>
NameMatcher a
-> AttrParser b
-> (b -> ConduitT Event o m c)
-> ConduitT Event o m (Maybe c)
tag' NameMatcher a
name AttrParser ()
ignoreAttrs forall a b. (a -> b) -> a -> b
$ forall a b. a -> b -> a
const ConduitT Event o m b
f


-- | Ignore an empty tag and all of its attributes.
--   This does not ignore the tag recursively
--   (i.e. it assumes there are no child elements).
--   This function returns @Just ()@ if the tag matched.
--
-- Since 1.5.0
ignoreEmptyTag :: MonadThrow m
          => NameMatcher a -- ^ Check if this is a correct tag name
          -> ConduitT Event o m (Maybe ())
ignoreEmptyTag :: forall (m :: * -> *) a o.
MonadThrow m =>
NameMatcher a -> ConduitT Event o m (Maybe ())
ignoreEmptyTag NameMatcher a
nameMatcher = forall (m :: * -> *) a o b.
MonadThrow m =>
NameMatcher a
-> ConduitT Event o m b -> ConduitT Event o m (Maybe b)
tagIgnoreAttrs NameMatcher a
nameMatcher (forall (m :: * -> *) a. Monad m => a -> m a
return ())


ignored :: Monad m => ConduitT i o m ()
ignored :: forall (m :: * -> *) i o. Monad m => ConduitT i o m ()
ignored = forall a. (a -> a) -> a
fix forall a b. (a -> b) -> a -> b
$ \ConduitT i o m ()
recurse -> do
  Maybe i
event <- forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await
  case Maybe i
event of
    Just i
_ -> ConduitT i o m ()
recurse
    Maybe i
_      -> forall (m :: * -> *) a. Monad m => a -> m a
return ()


-- | Same as `takeTree`, without yielding `Event`s.
--
-- >>> runConduit $ parseLBS def "<a>content</a><b></b>" .| (ignoreTree "a" ignoreAttrs >> sinkList)
-- [EventBeginElement (Name {nameLocalName = "b", ...}) [],EventEndElement (Name {nameLocalName = "b", ...}),EventEndDocument]
--
-- >>> runConduit $ parseLBS def "<a>content</a>" .| (ignoreTree "b" ignoreAttrs >> sinkList)
-- [EventBeginElement (Name {nameLocalName = "a", ...}) [],EventContent (ContentText "content"),EventEndElement (Name {nameLocalName = "a", ...}),EventEndDocument]
--
-- >>> runConduit $ parseLBS def "content<a></a>" .| (ignoreTree anyName ignoreAttrs >> sinkList)
-- [EventContent (ContentText "content"),EventBeginElement (Name {nameLocalName = "a", ...}) [],EventEndElement (Name {nameLocalName = "a", ...}),EventEndDocument]
--
-- Since 1.9.0
ignoreTree :: MonadThrow m => NameMatcher a -> AttrParser b -> ConduitT Event o m (Maybe ())
ignoreTree :: forall (m :: * -> *) a b o.
MonadThrow m =>
NameMatcher a -> AttrParser b -> ConduitT Event o m (Maybe ())
ignoreTree NameMatcher a
nameMatcher AttrParser b
attrParser = forall (m :: * -> *) a b r c.
Monad m =>
ConduitT a b m r -> ConduitT b c m () -> ConduitT a c m r
fuseUpstream (forall (m :: * -> *) a b.
MonadThrow m =>
NameMatcher a -> AttrParser b -> ConduitT Event Event m (Maybe ())
takeTree NameMatcher a
nameMatcher AttrParser b
attrParser) forall (m :: * -> *) i o. Monad m => ConduitT i o m ()
ignored

-- | Same as `takeContent`, without yielding `Event`s.
--
-- >>> runConduit $ parseLBS def "<a>content</a>" .| (ignoreContent >> sinkList)
-- [EventBeginElement (Name {nameLocalName = "a", ...}) [],EventContent (ContentText "content"),EventEndElement (Name {nameLocalName = "a", ...}),EventEndDocument]
--
-- >>> runConduit $ parseLBS def "content<a></a>" .| (ignoreContent >> sinkList)
-- [EventBeginElement (Name {nameLocalName = "a", ...}) [],EventEndElement (Name {nameLocalName = "a", ...}),EventEndDocument]
--
-- >>> runConduit $ parseLBS def "content<a></a>" .| (ignoreContent >> sinkList)
-- [EventBeginElement (Name {nameLocalName = "a", ...}) [],EventEndElement (Name {nameLocalName = "a", ...}),EventEndDocument]
--
-- Since 1.9.0
ignoreContent :: MonadThrow m => ConduitT Event o m (Maybe ())
ignoreContent :: forall (m :: * -> *) o.
MonadThrow m =>
ConduitT Event o m (Maybe ())
ignoreContent = forall (m :: * -> *) a b r c.
Monad m =>
ConduitT a b m r -> ConduitT b c m () -> ConduitT a c m r
fuseUpstream forall (m :: * -> *).
MonadThrow m =>
ConduitT Event Event m (Maybe ())
takeContent forall (m :: * -> *) i o. Monad m => ConduitT i o m ()
ignored


-- | Same as `takeTreeContent`, without yielding `Event`s.
--
-- >>> runConduit $ parseLBS def "<a>content</a><b></b>" .| (ignoreTreeContent "a" ignoreAttrs >> sinkList)
-- [EventBeginElement (Name {nameLocalName = "b", ...}) [],EventEndElement (Name {nameLocalName = "b", ...}),EventEndDocument]
--
-- >>> runConduit $ parseLBS def "<a>content</a>" .| (ignoreTreeContent "b" ignoreAttrs >> sinkList)
-- [EventBeginElement (Name {nameLocalName = "a", ...}) [],EventContent (ContentText "content"),EventEndElement (Name {nameLocalName = "a", ...}),EventEndDocument]
--
-- >>> runConduit $ parseLBS def "content<a></a>" .| (ignoreTreeContent anyName ignoreAttrs >> sinkList)
-- [EventBeginElement (Name {nameLocalName = "a", ...}) [],EventEndElement (Name {nameLocalName = "a", ...}),EventEndDocument]
--
-- Since 1.5.0
ignoreTreeContent :: MonadThrow m => NameMatcher a -> AttrParser b -> ConduitT Event o m (Maybe ())
ignoreTreeContent :: forall (m :: * -> *) a b o.
MonadThrow m =>
NameMatcher a -> AttrParser b -> ConduitT Event o m (Maybe ())
ignoreTreeContent NameMatcher a
namePred AttrParser b
attrParser = forall (m :: * -> *) a b r c.
Monad m =>
ConduitT a b m r -> ConduitT b c m () -> ConduitT a c m r
fuseUpstream (forall (m :: * -> *) a b.
MonadThrow m =>
NameMatcher a -> AttrParser b -> ConduitT Event Event m (Maybe ())
takeTreeContent NameMatcher a
namePred AttrParser b
attrParser) forall (m :: * -> *) i o. Monad m => ConduitT i o m ()
ignored


-- | Same as `takeAnyTreeContent`, without yielding `Event`s.
--
-- >>> runConduit $ parseLBS def "<a>content</a><b></b>" .| (ignoreAnyTreeContent >> sinkList)
-- [EventBeginElement (Name {nameLocalName = "b", ...}) [],EventEndElement (Name {nameLocalName = "b", ...}),EventEndDocument]
--
-- >>> runConduit $ parseLBS def "text<b></b>" .| (ignoreAnyTreeContent >> sinkList)
-- [EventBeginElement (Name {nameLocalName = "b", ...}) [],EventEndElement (Name {nameLocalName = "b", ...}),EventEndDocument]
--
-- Since 1.5.0
ignoreAnyTreeContent :: MonadThrow m => ConduitT Event o m (Maybe ())
ignoreAnyTreeContent :: forall (m :: * -> *) o.
MonadThrow m =>
ConduitT Event o m (Maybe ())
ignoreAnyTreeContent = forall (m :: * -> *) a b r c.
Monad m =>
ConduitT a b m r -> ConduitT b c m () -> ConduitT a c m r
fuseUpstream forall (m :: * -> *).
MonadThrow m =>
ConduitT Event Event m (Maybe ())
takeAnyTreeContent forall (m :: * -> *) i o. Monad m => ConduitT i o m ()
ignored


-- | Get the value of the first parser which returns 'Just'. If no parsers
-- succeed (i.e., return @Just@), this function returns 'Nothing'.
--
-- > orE a b = choose [a, b]
--
-- Warning: `orE` doesn't backtrack. See 'choose' for detailed explanation.
orE :: Monad m
    => ConduitT Event o m (Maybe a) -- ^ The first (preferred) parser
    -> ConduitT Event o m (Maybe a) -- ^ The second parser, only executed if the first parser fails
    -> ConduitT Event o m (Maybe a)
orE :: forall (m :: * -> *) o a.
Monad m =>
ConduitT Event o m (Maybe a)
-> ConduitT Event o m (Maybe a) -> ConduitT Event o m (Maybe a)
orE ConduitT Event o m (Maybe a)
a ConduitT Event o m (Maybe a)
b = ConduitT Event o m (Maybe a)
a forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \Maybe a
x -> forall b a. b -> (a -> b) -> Maybe a -> b
maybe ConduitT Event o m (Maybe a)
b (forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. Monad m => a -> m a
return Maybe a
x) Maybe a
x

-- | Get the value of the first parser which returns 'Just'. If no parsers
-- succeed (i.e., return 'Just'), this function returns 'Nothing'.
--
-- Warning: 'choose' doesn't backtrack. If a parser consumed some events,
-- subsequent parsers will continue from the following events. This can be a
-- problem if parsers share an accepted prefix of events, so an earlier
-- (failing) parser will discard the events that the later parser could
-- potentially succeed on.
--
-- An other problematic case is using 'choose' to implement order-independent
-- parsing using a set of parsers, with a final trailing ignore-anything-else
-- action.  In this case, certain trees might be skipped.
--
-- >>> :{
-- let parse2Tags name1 name2 = do
--       tag1 <- tagNoAttr name1 (pure ())
--       tag2 <- tagNoAttr name2 (pure tag1)
--       return $ join tag2
-- :}
--
-- >>> :{
-- runConduit $ parseLBS def "<a></a><b></b>" .| choose
--   [ parse2Tags "a" "b"
--   , parse2Tags "a" "c"
--   ]
-- :}
-- Just ()
--
-- >>> :{
-- runConduit $ parseLBS def "<a></a><b></b>" .| choose
--   [ parse2Tags "a" "c"
--   , parse2Tags "a" "b"
--   ]
-- :}
-- Nothing
choose :: Monad m
       => [ConduitT Event o m (Maybe a)] -- ^ List of parsers that will be tried in order.
       -> ConduitT Event o m (Maybe a)   -- ^ Result of the first parser to succeed, or @Nothing@
                                         --   if no parser succeeded
choose :: forall (m :: * -> *) o a.
Monad m =>
[ConduitT Event o m (Maybe a)] -> ConduitT Event o m (Maybe a)
choose []     = forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
choose (ConduitT Event o m (Maybe a)
i:[ConduitT Event o m (Maybe a)]
is) = ConduitT Event o m (Maybe a)
i forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) o a.
Monad m =>
[ConduitT Event o m (Maybe a)] -> ConduitT Event o m (Maybe a)
choose [ConduitT Event o m (Maybe a)]
is) (forall (m :: * -> *) a. Monad m => a -> m a
return forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. a -> Maybe a
Just)

-- | Force an optional parser into a required parser. All of the 'tag'
-- functions, 'attr', 'choose' and 'many' deal with 'Maybe' parsers. Use this when you
-- want to finally force something to happen.
force :: MonadThrow m
      => String -- ^ Error message
      -> m (Maybe a) -- ^ Optional parser to be forced
      -> m a
force :: forall (m :: * -> *) a.
MonadThrow m =>
String -> m (Maybe a) -> m a
force String
msg m (Maybe a)
i = m (Maybe a)
i forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM forall a b. (a -> b) -> a -> b
$ String -> Maybe Event -> XmlException
XmlException String
msg forall a. Maybe a
Nothing) forall (m :: * -> *) a. Monad m => a -> m a
return

-- | A helper function which reads a file from disk using 'enumFile', detects
-- character encoding using 'detectUtf', parses the XML using 'parseBytes', and
-- then hands off control to your supplied parser.
parseFile :: MonadResource m
          => ParseSettings
          -> FilePath
          -> ConduitT i Event m ()
parseFile :: forall (m :: * -> *) i.
MonadResource m =>
ParseSettings -> String -> ConduitT i Event m ()
parseFile ParseSettings
ps String
fp = forall (m :: * -> *) i.
MonadResource m =>
String -> ConduitT i ByteString m ()
sourceFile String
fp forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *) (n :: * -> *) i o r.
Monad m =>
(forall a. m a -> n a) -> ConduitT i o m r -> ConduitT i o n r
transPipe forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT ByteString Event m ()
parseBytes ParseSettings
ps)

-- | Parse an event stream from a lazy 'L.ByteString'.
parseLBS :: MonadThrow m
         => ParseSettings
         -> L.ByteString
         -> ConduitT i Event m ()
parseLBS :: forall (m :: * -> *) i.
MonadThrow m =>
ParseSettings -> ByteString -> ConduitT i Event m ()
parseLBS ParseSettings
ps ByteString
lbs = forall (m :: * -> *) lazy strict i.
(Monad m, LazySequence lazy strict) =>
lazy -> ConduitT i strict m ()
sourceLazy ByteString
lbs forall (m :: * -> *) a b c r.
Monad m =>
ConduitT a b m () -> ConduitT b c m r -> ConduitT a c m r
.| forall (m :: * -> *).
MonadThrow m =>
ParseSettings -> ConduitT ByteString Event m ()
parseBytes ParseSettings
ps

data XmlException = XmlException
    { XmlException -> String
xmlErrorMessage :: String
    , XmlException -> Maybe Event
xmlBadInput     :: Maybe Event
    }
                  | InvalidEndElement Name (Maybe Event)
                  | InvalidEntity String (Maybe Event)
                  | MissingAttribute String
                  | UnparsedAttributes [(Name, [Content])]
    deriving (Int -> XmlException -> ShowS
[XmlException] -> ShowS
XmlException -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [XmlException] -> ShowS
$cshowList :: [XmlException] -> ShowS
show :: XmlException -> String
$cshow :: XmlException -> String
showsPrec :: Int -> XmlException -> ShowS
$cshowsPrec :: Int -> XmlException -> ShowS
Show, Typeable)

instance Exception XmlException where
#if MIN_VERSION_base(4, 8, 0)
  displayException :: XmlException -> String
displayException (XmlException String
msg (Just Event
event)) = String
"Error while parsing XML event " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Event
event forall a. [a] -> [a] -> [a]
++ String
": " forall a. [a] -> [a] -> [a]
++ String
msg
  displayException (XmlException String
msg Maybe Event
_) = String
"Error while parsing XML: " forall a. [a] -> [a] -> [a]
++ String
msg
  displayException (InvalidEndElement Name
name (Just Event
event)) = String
"Error while parsing XML event: expected </" forall a. [a] -> [a] -> [a]
++ Text -> String
T.unpack (Name -> Text
nameLocalName Name
name) forall a. [a] -> [a] -> [a]
++ String
">, got " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Event
event
  displayException (InvalidEndElement Name
name Maybe Event
_) = String
"Error while parsing XML event: expected </" forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Name
name forall a. [a] -> [a] -> [a]
++ String
">, got nothing"
  displayException (InvalidEntity String
msg (Just Event
event)) = String
"Error while parsing XML entity " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Event
event forall a. [a] -> [a] -> [a]
++ String
": " forall a. [a] -> [a] -> [a]
++ String
msg
  displayException (InvalidEntity String
msg Maybe Event
_) = String
"Error while parsing XML entity: " forall a. [a] -> [a] -> [a]
++ String
msg
  displayException (MissingAttribute String
msg) = String
"Missing required attribute: " forall a. [a] -> [a] -> [a]
++ String
msg
  displayException (UnparsedAttributes [(Name, [Content])]
attrs) = forall a. Show a => a -> String
show (forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Name, [Content])]
attrs) forall a. [a] -> [a] -> [a]
++ String
" remaining unparsed attributes: \n" forall a. [a] -> [a] -> [a]
++ forall a. [a] -> [[a]] -> [a]
intercalate String
"\n" (forall a. Show a => a -> String
show forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(Name, [Content])]
attrs)
#endif


-- | A @NameMatcher@ describes which names a tag parser is allowed to match.
--
-- Since 1.5.0
newtype NameMatcher a = NameMatcher { forall a. NameMatcher a -> Name -> Maybe a
runNameMatcher :: Name -> Maybe a }

deriving instance Functor NameMatcher

instance Applicative NameMatcher where
  pure :: forall a. a -> NameMatcher a
pure a
a = forall a. (Name -> Maybe a) -> NameMatcher a
NameMatcher forall a b. (a -> b) -> a -> b
$ forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a. Applicative f => a -> f a
pure a
a
  NameMatcher Name -> Maybe (a -> b)
f <*> :: forall a b. NameMatcher (a -> b) -> NameMatcher a -> NameMatcher b
<*> NameMatcher Name -> Maybe a
a = forall a. (Name -> Maybe a) -> NameMatcher a
NameMatcher forall a b. (a -> b) -> a -> b
$ \Name
name -> Name -> Maybe (a -> b)
f Name
name forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Name -> Maybe a
a Name
name

-- | 'NameMatcher's can be combined with @\<|\>@
instance Alternative NameMatcher where
  empty :: forall a. NameMatcher a
empty = forall a. (Name -> Maybe a) -> NameMatcher a
NameMatcher forall a b. (a -> b) -> a -> b
$ forall a b. a -> b -> a
const forall a. Maybe a
Nothing
  NameMatcher Name -> Maybe a
f <|> :: forall a. NameMatcher a -> NameMatcher a -> NameMatcher a
<|> NameMatcher Name -> Maybe a
g = forall a. (Name -> Maybe a) -> NameMatcher a
NameMatcher (\Name
a -> Name -> Maybe a
f Name
a forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Name -> Maybe a
g Name
a)

-- | Match a single 'Name' in a concise way.
-- Note that 'Name' is namespace sensitive: when using the 'IsString' instance,
-- use @"{http:\/\/a\/b}c"@ to match the tag @c@ in the XML namespace @http://a/b@
instance (a ~ Name) => IsString (NameMatcher a) where
  fromString :: String -> NameMatcher a
fromString String
s = (Name -> Bool) -> NameMatcher Name
matching (forall a. Eq a => a -> a -> Bool
== forall a. IsString a => String -> a
fromString String
s)

-- | @matching f@ matches @name@ iff @f name@ is true. Returns the matched 'Name'.
--
-- Since 1.5.0
matching :: (Name -> Bool) -> NameMatcher Name
matching :: (Name -> Bool) -> NameMatcher Name
matching Name -> Bool
f = forall a. (Name -> Maybe a) -> NameMatcher a
NameMatcher forall a b. (a -> b) -> a -> b
$ \Name
name -> if Name -> Bool
f Name
name then forall a. a -> Maybe a
Just Name
name else forall a. Maybe a
Nothing

-- | Matches any 'Name'. Returns the matched 'Name'.
--
-- Since 1.5.0
anyName :: NameMatcher Name
anyName :: NameMatcher Name
anyName = (Name -> Bool) -> NameMatcher Name
matching (forall a b. a -> b -> a
const Bool
True)

-- | Matches any 'Name' from the given list. Returns the matched 'Name'.
--
-- Since 1.5.0
anyOf :: [Name] -> NameMatcher Name
anyOf :: [Name] -> NameMatcher Name
anyOf [Name]
values = (Name -> Bool) -> NameMatcher Name
matching (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Name]
values)


-- | A monad for parsing attributes. By default, it requires you to deal with
-- all attributes present on an element, and will throw an exception if there
-- are unhandled attributes. Use the 'requireAttr', 'attr' et al
-- functions for handling an attribute, and 'ignoreAttrs' if you would like to
-- skip the rest of the attributes on an element.
--
-- 'Alternative' instance behaves like 'First' monoid: it chooses first
-- parser which doesn't fail.
newtype AttrParser a = AttrParser { forall a.
AttrParser a
-> [(Name, [Content])]
-> Either SomeException ([(Name, [Content])], a)
runAttrParser :: [(Name, [Content])] -> Either SomeException ([(Name, [Content])], a) }

instance Monad AttrParser where
    return :: forall a. a -> AttrParser a
return a
a = forall a.
([(Name, [Content])]
 -> Either SomeException ([(Name, [Content])], a))
-> AttrParser a
AttrParser forall a b. (a -> b) -> a -> b
$ \[(Name, [Content])]
as -> forall a b. b -> Either a b
Right ([(Name, [Content])]
as, a
a)
    (AttrParser [(Name, [Content])]
-> Either SomeException ([(Name, [Content])], a)
f) >>= :: forall a b. AttrParser a -> (a -> AttrParser b) -> AttrParser b
>>= a -> AttrParser b
g = forall a.
([(Name, [Content])]
 -> Either SomeException ([(Name, [Content])], a))
-> AttrParser a
AttrParser forall a b. (a -> b) -> a -> b
$ \[(Name, [Content])]
as ->
        forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a b. a -> Either a b
Left (\([(Name, [Content])]
as', a
f') -> forall a.
AttrParser a
-> [(Name, [Content])]
-> Either SomeException ([(Name, [Content])], a)
runAttrParser (a -> AttrParser b
g a
f') [(Name, [Content])]
as') ([(Name, [Content])]
-> Either SomeException ([(Name, [Content])], a)
f [(Name, [Content])]
as)
instance Functor AttrParser where
    fmap :: forall a b. (a -> b) -> AttrParser a -> AttrParser b
fmap = forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM
instance Applicative AttrParser where
    pure :: forall a. a -> AttrParser a
pure = forall (m :: * -> *) a. Monad m => a -> m a
return
    <*> :: forall a b. AttrParser (a -> b) -> AttrParser a -> AttrParser b
(<*>) = forall (m :: * -> *) a b. Monad m => m (a -> b) -> m a -> m b
ap
instance Alternative AttrParser where
    empty :: forall a. AttrParser a
empty = forall a.
([(Name, [Content])]
 -> Either SomeException ([(Name, [Content])], a))
-> AttrParser a
AttrParser forall a b. (a -> b) -> a -> b
$ forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ forall e. Exception e => e -> SomeException
toException forall a b. (a -> b) -> a -> b
$ String -> Maybe Event -> XmlException
XmlException String
"AttrParser.empty" forall a. Maybe a
Nothing
    AttrParser [(Name, [Content])]
-> Either SomeException ([(Name, [Content])], a)
f <|> :: forall a. AttrParser a -> AttrParser a -> AttrParser a
<|> AttrParser [(Name, [Content])]
-> Either SomeException ([(Name, [Content])], a)
g = forall a.
([(Name, [Content])]
 -> Either SomeException ([(Name, [Content])], a))
-> AttrParser a
AttrParser forall a b. (a -> b) -> a -> b
$ \[(Name, [Content])]
x ->
        forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ [(Name, [Content])]
-> Either SomeException ([(Name, [Content])], a)
g [(Name, [Content])]
x) forall a b. b -> Either a b
Right ([(Name, [Content])]
-> Either SomeException ([(Name, [Content])], a)
f [(Name, [Content])]
x)
instance MonadThrow AttrParser where
    throwM :: forall e a. Exception e => e -> AttrParser a
throwM = forall a.
([(Name, [Content])]
 -> Either SomeException ([(Name, [Content])], a))
-> AttrParser a
AttrParser forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. a -> b -> a
const forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM

optionalAttrRaw :: ((Name, [Content]) -> Maybe b) -> AttrParser (Maybe b)
optionalAttrRaw :: forall b. ((Name, [Content]) -> Maybe b) -> AttrParser (Maybe b)
optionalAttrRaw (Name, [Content]) -> Maybe b
f =
    forall a.
([(Name, [Content])]
 -> Either SomeException ([(Name, [Content])], a))
-> AttrParser a
AttrParser forall a b. (a -> b) -> a -> b
$ ([(Name, [Content])] -> [(Name, [Content])])
-> [(Name, [Content])]
-> Either SomeException ([(Name, [Content])], Maybe b)
go forall a. a -> a
id
  where
    go :: ([(Name, [Content])] -> [(Name, [Content])])
-> [(Name, [Content])]
-> Either SomeException ([(Name, [Content])], Maybe b)
go [(Name, [Content])] -> [(Name, [Content])]
front [] = forall a b. b -> Either a b
Right ([(Name, [Content])] -> [(Name, [Content])]
front [], forall a. Maybe a
Nothing)
    go [(Name, [Content])] -> [(Name, [Content])]
front ((Name, [Content])
a:[(Name, [Content])]
as) =
        forall b a. b -> (a -> b) -> Maybe a -> b
maybe (([(Name, [Content])] -> [(Name, [Content])])
-> [(Name, [Content])]
-> Either SomeException ([(Name, [Content])], Maybe b)
go ([(Name, [Content])] -> [(Name, [Content])]
front forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:) (Name, [Content])
a) [(Name, [Content])]
as)
              (\b
b -> forall a b. b -> Either a b
Right ([(Name, [Content])] -> [(Name, [Content])]
front [(Name, [Content])]
as, forall a. a -> Maybe a
Just b
b))
              ((Name, [Content]) -> Maybe b
f (Name, [Content])
a)

requireAttrRaw :: String -> ((Name, [Content]) -> Maybe b) -> AttrParser b
requireAttrRaw :: forall b. String -> ((Name, [Content]) -> Maybe b) -> AttrParser b
requireAttrRaw String
msg (Name, [Content]) -> Maybe b
f = forall b. ((Name, [Content]) -> Maybe b) -> AttrParser (Maybe b)
optionalAttrRaw (Name, [Content]) -> Maybe b
f forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>=
    forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a.
([(Name, [Content])]
 -> Either SomeException ([(Name, [Content])], a))
-> AttrParser a
AttrParser forall a b. (a -> b) -> a -> b
$ forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ forall e. Exception e => e -> SomeException
toException forall a b. (a -> b) -> a -> b
$ String -> XmlException
MissingAttribute String
msg)
          forall (m :: * -> *) a. Monad m => a -> m a
return

-- | Return the value for an attribute if present.
attr :: Name -> AttrParser (Maybe Text)
attr :: Name -> AttrParser (Maybe Text)
attr Name
n = forall b. ((Name, [Content]) -> Maybe b) -> AttrParser (Maybe b)
optionalAttrRaw
    (\(Name
x, [Content]
y) -> if Name
x forall a. Eq a => a -> a -> Bool
== Name
n then forall a. a -> Maybe a
Just ([Content] -> Text
contentsToText [Content]
y) else forall a. Maybe a
Nothing)

-- | Shortcut composition of 'force' and 'attr'.
requireAttr :: Name -> AttrParser Text
requireAttr :: Name -> AttrParser Text
requireAttr Name
n = forall (m :: * -> *) a.
MonadThrow m =>
String -> m (Maybe a) -> m a
force (String
"Missing attribute: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Name
n) forall a b. (a -> b) -> a -> b
$ Name -> AttrParser (Maybe Text)
attr Name
n


{-# DEPRECATED optionalAttr "Please use 'attr'." #-}
optionalAttr :: Name -> AttrParser (Maybe Text)
optionalAttr :: Name -> AttrParser (Maybe Text)
optionalAttr = Name -> AttrParser (Maybe Text)
attr

contentsToText :: [Content] -> Text
contentsToText :: [Content] -> Text
contentsToText = [Text] -> Text
T.concat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map Content -> Text
toText where
  toText :: Content -> Text
toText (ContentText Text
t)   = Text
t
  toText (ContentEntity Text
e) = [Text] -> Text
T.concat [Text
"&", Text
e, Text
";"]

-- | Skip the remaining attributes on an element. Since this will clear the
-- list of attributes, you must call this /after/ any calls to 'requireAttr',
-- 'optionalAttr', etc.
ignoreAttrs :: AttrParser ()
ignoreAttrs :: AttrParser ()
ignoreAttrs = forall a.
([(Name, [Content])]
 -> Either SomeException ([(Name, [Content])], a))
-> AttrParser a
AttrParser forall a b. (a -> b) -> a -> b
$ forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ forall a b. b -> Either a b
Right ([], ())

-- | Keep parsing elements as long as the parser returns 'Just'.
many :: Monad m
     => ConduitT Event o m (Maybe a)
     -> ConduitT Event o m [a]
many :: forall (m :: * -> *) o a.
Monad m =>
ConduitT Event o m (Maybe a) -> ConduitT Event o m [a]
many ConduitT Event o m (Maybe a)
i = forall (m :: * -> *) o a b.
Monad m =>
ConduitT Event o m (Maybe a)
-> ConduitT Event o m (Maybe b) -> ConduitT Event o m [a]
manyIgnore ConduitT Event o m (Maybe a)
i forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing

-- | Like 'many' but discards the results without building an intermediate list.
--
-- Since 1.5.0
many_ :: MonadThrow m
      => ConduitT Event o m (Maybe a)
      -> ConduitT Event o m ()
many_ :: forall (m :: * -> *) o a.
MonadThrow m =>
ConduitT Event o m (Maybe a) -> ConduitT Event o m ()
many_ ConduitT Event o m (Maybe a)
consumer = forall (m :: * -> *) b.
MonadThrow m =>
ConduitT Event b m (Maybe b)
-> ConduitT Event b m (Maybe ()) -> ConduitT Event b m ()
manyIgnoreYield (forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing) (forall (f :: * -> *) a. Functor f => f a -> f ()
void forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ConduitT Event o m (Maybe a)
consumer)

-- | Keep parsing elements as long as the parser returns 'Just'
--   or the ignore parser returns 'Just'.
manyIgnore :: Monad m
           => ConduitT Event o m (Maybe a)
           -> ConduitT Event o m (Maybe b)
           -> ConduitT Event o m [a]
manyIgnore :: forall (m :: * -> *) o a b.
Monad m =>
ConduitT Event o m (Maybe a)
-> ConduitT Event o m (Maybe b) -> ConduitT Event o m [a]
manyIgnore ConduitT Event o m (Maybe a)
i ConduitT Event o m (Maybe b)
ignored' = ([a] -> [a]) -> ConduitT Event o m [a]
go forall a. a -> a
id where
  go :: ([a] -> [a]) -> ConduitT Event o m [a]
go [a] -> [a]
front = ConduitT Event o m (Maybe a)
i forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (([a] -> [a]) -> ConduitT Event o m [a]
onFail [a] -> [a]
front) (\a
y -> ([a] -> [a]) -> ConduitT Event o m [a]
go forall a b. (a -> b) -> a -> b
$ [a] -> [a]
front forall b c a. (b -> c) -> (a -> b) -> a -> c
. (:) a
y)
  -- onFail is called if the main parser fails
  onFail :: ([a] -> [a]) -> ConduitT Event o m [a]
onFail [a] -> [a]
front = ConduitT Event o m (Maybe b)
ignored' forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ [a] -> [a]
front []) (forall a b. a -> b -> a
const forall a b. (a -> b) -> a -> b
$ ([a] -> [a]) -> ConduitT Event o m [a]
go [a] -> [a]
front)

-- | Like @many@, but any tags and content the consumer doesn't match on
--   are silently ignored.
many' :: MonadThrow m
      => ConduitT Event o m (Maybe a)
      -> ConduitT Event o m [a]
many' :: forall (m :: * -> *) o a.
MonadThrow m =>
ConduitT Event o m (Maybe a) -> ConduitT Event o m [a]
many' ConduitT Event o m (Maybe a)
consumer = forall (m :: * -> *) o a b.
Monad m =>
ConduitT Event o m (Maybe a)
-> ConduitT Event o m (Maybe b) -> ConduitT Event o m [a]
manyIgnore ConduitT Event o m (Maybe a)
consumer forall (m :: * -> *) o.
MonadThrow m =>
ConduitT Event o m (Maybe ())
ignoreAnyTreeContent


-- | Like 'many', but uses 'yield' so the result list can be streamed
--   to downstream conduits without waiting for 'manyYield' to finish
manyYield :: Monad m
          => ConduitT a b m (Maybe b)
          -> ConduitT a b m ()
manyYield :: forall (m :: * -> *) a b.
Monad m =>
ConduitT a b m (Maybe b) -> ConduitT a b m ()
manyYield ConduitT a b m (Maybe b)
consumer = forall a. (a -> a) -> a
fix forall a b. (a -> b) -> a -> b
$ \ConduitT a b m ()
loop ->
  ConduitT a b m (Maybe b)
consumer forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return ()) (\b
x -> forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield b
x forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT a b m ()
loop)

-- | Like 'manyIgnore', but uses 'yield' so the result list can be streamed
--   to downstream conduits without waiting for 'manyIgnoreYield' to finish
manyIgnoreYield :: MonadThrow m
                => ConduitT Event b m (Maybe b) -- ^ Consuming parser that generates the result stream
                -> ConduitT Event b m (Maybe ()) -- ^ Ignore parser that consumes elements to be ignored
                -> ConduitT Event b m ()
manyIgnoreYield :: forall (m :: * -> *) b.
MonadThrow m =>
ConduitT Event b m (Maybe b)
-> ConduitT Event b m (Maybe ()) -> ConduitT Event b m ()
manyIgnoreYield ConduitT Event b m (Maybe b)
consumer ConduitT Event b m (Maybe ())
ignoreParser = forall a. (a -> a) -> a
fix forall a b. (a -> b) -> a -> b
$ \ConduitT Event b m ()
loop ->
  ConduitT Event b m (Maybe b)
consumer forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (ConduitT Event b m () -> ConduitT Event b m ()
onFail ConduitT Event b m ()
loop) (\b
x -> forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield b
x forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ConduitT Event b m ()
loop)
  where onFail :: ConduitT Event b m () -> ConduitT Event b m ()
onFail ConduitT Event b m ()
loop = ConduitT Event b m (Maybe ())
ignoreParser forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return ()) (forall a b. a -> b -> a
const ConduitT Event b m ()
loop)

-- | Like 'many'', but uses 'yield' so the result list can be streamed
--   to downstream conduits without waiting for 'manyYield'' to finish
manyYield' :: MonadThrow m
           => ConduitT Event b m (Maybe b)
           -> ConduitT Event b m ()
manyYield' :: forall (m :: * -> *) b.
MonadThrow m =>
ConduitT Event b m (Maybe b) -> ConduitT Event b m ()
manyYield' ConduitT Event b m (Maybe b)
consumer = forall (m :: * -> *) b.
MonadThrow m =>
ConduitT Event b m (Maybe b)
-> ConduitT Event b m (Maybe ()) -> ConduitT Event b m ()
manyIgnoreYield ConduitT Event b m (Maybe b)
consumer forall (m :: * -> *) o.
MonadThrow m =>
ConduitT Event o m (Maybe ())
ignoreAnyTreeContent


-- | Stream a single content 'Event'.
--
-- Returns @Just ()@ if a content 'Event' was consumed, @Nothing@ otherwise.
--
-- >>> runConduit $ parseLBS def "content<a></a>" .| void takeContent .| sinkList
-- [EventBeginDocument,EventContent (ContentText "content")]
--
-- If next event isn't a content, nothing is consumed.
--
-- >>> runConduit $ parseLBS def "<a>content</a>" .| void takeContent .| sinkList
-- [EventBeginDocument]
--
-- Since 1.5.0
takeContent :: MonadThrow m => ConduitT Event Event m (Maybe ())
takeContent :: forall (m :: * -> *).
MonadThrow m =>
ConduitT Event Event m (Maybe ())
takeContent = do
  Maybe Event
event <- forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await
  case Maybe Event
event of
    Just e :: Event
e@EventContent{} -> forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Event
e forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just ())
    Just e :: Event
e@EventCDATA{}   -> forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Event
e forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just ())
    Just Event
e                -> if Event -> Bool
isWhitespace Event
e then forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Event
e forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *).
MonadThrow m =>
ConduitT Event Event m (Maybe ())
takeContent else forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover Event
e forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
    Maybe Event
_                     -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing

-- | Stream 'Event's corresponding to a single XML element that matches given 'NameMatcher' and 'AttrParser', from the opening- to the closing-tag.
--
-- >>> runConduit $ parseLBS def "<a>content</a><b></b>" .| void (takeTree "a" ignoreAttrs) .| sinkList
-- [EventBeginDocument,EventBeginElement (Name {nameLocalName = "a", ...}) [],EventContent (ContentText "content"),EventEndElement (Name {nameLocalName = "a", ...})]
--
-- >>> runConduit $ parseLBS def "<a>content</a><b></b>" .| void (takeTree "b" ignoreAttrs) .| sinkList
-- [EventBeginDocument]
--
-- If next 'Event' isn't an element, nothing is consumed.
--
-- >>> runConduit $ parseLBS def "text<a></a>" .| void (takeTree "a" ignoreAttrs) .| sinkList
-- [EventBeginDocument]
--
-- If an opening-tag is consumed but no matching closing-tag is found, an 'XmlException' is thrown.
--
-- >>> runConduit $ parseLBS def "<a><b></b>" .| void (takeTree "a" ignoreAttrs) .| sinkList
-- *** Exception: InvalidEndElement (Name {nameLocalName = "a", nameNamespace = Nothing, namePrefix = Nothing}) Nothing
--
-- This function automatically ignores comments, instructions and whitespace.
--
-- Returns @Just ()@ if an element was consumed, 'Nothing' otherwise.
--
-- Since 1.5.0
takeTree :: MonadThrow m => NameMatcher a -> AttrParser b -> ConduitT Event Event m (Maybe ())
takeTree :: forall (m :: * -> *) a b.
MonadThrow m =>
NameMatcher a -> AttrParser b -> ConduitT Event Event m (Maybe ())
takeTree NameMatcher a
nameMatcher AttrParser b
attrParser = do
  Maybe Event
event <- forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await
  case Maybe Event
event of
    Just e :: Event
e@(EventBeginElement Name
name [(Name, [Content])]
as) -> case forall a. NameMatcher a -> Name -> Maybe a
runNameMatcher NameMatcher a
nameMatcher Name
name of
      Just a
_ -> case forall a.
AttrParser a
-> [(Name, [Content])]
-> Either SomeException ([(Name, [Content])], a)
runAttrParser AttrParser b
attrParser [(Name, [Content])]
as of
        Right ([(Name, [Content])], b)
_ -> do
          forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Event
e
          forall {m :: * -> *} {a}. Monad m => m (Maybe a) -> m ()
whileJust forall (m :: * -> *).
MonadThrow m =>
ConduitT Event Event m (Maybe ())
takeAnyTreeContent
          Maybe Event
endEvent <- forall (m :: * -> *) i o. Monad m => ConduitT i o m (Maybe i)
await
          case Maybe Event
endEvent of
            Just e' :: Event
e'@(EventEndElement Name
name') | Name
name forall a. Eq a => a -> a -> Bool
== Name
name' -> forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Event
e' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (forall a. a -> Maybe a
Just ())
            Maybe Event
_                                               -> forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM forall a b. (a -> b) -> a -> b
$ Name -> Maybe Event -> XmlException
InvalidEndElement Name
name Maybe Event
endEvent
        Either SomeException ([(Name, [Content])], b)
_ -> forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover Event
e forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
      Maybe a
_ -> forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover Event
e forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing

    Just Event
e -> if Event -> Bool
isWhitespace Event
e then forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield Event
e forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a b.
MonadThrow m =>
NameMatcher a -> AttrParser b -> ConduitT Event Event m (Maybe ())
takeTree NameMatcher a
nameMatcher AttrParser b
attrParser else forall i o (m :: * -> *). i -> ConduitT i o m ()
leftover Event
e forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
    Maybe Event
_ -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
  where
    whileJust :: m (Maybe a) -> m ()
whileJust m (Maybe a)
f = forall a. (a -> a) -> a
fix forall a b. (a -> b) -> a -> b
$ \m ()
loop -> m (Maybe a)
f forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall (m :: * -> *) a. Monad m => a -> m a
return ()) (forall a b. a -> b -> a
const m ()
loop)

-- | Like 'takeTree', but can also stream a content 'Event'.
--
-- >>> runConduit $ parseLBS def "<a>content</a><b></b>" .| void (takeTreeContent "a" ignoreAttrs) .| sinkList
-- [EventBeginDocument,EventBeginElement (Name {nameLocalName = "a", ...}) [],EventContent (ContentText "content"),EventEndElement (Name {nameLocalName = "a", ...})]
--
-- >>> runConduit $ parseLBS def "<a>content</a><b></b>" .| void (takeTreeContent "b" ignoreAttrs) .| sinkList
-- [EventBeginDocument]
--
-- >>> runConduit $ parseLBS def "content<a></a><b></b>" .| void (takeTreeContent "a" ignoreAttrs) .| sinkList
-- [EventBeginDocument,EventContent (ContentText "content")]
--
-- Since 1.5.0
takeTreeContent :: MonadThrow m => NameMatcher a -> AttrParser b -> ConduitT Event Event m (Maybe ())
takeTreeContent :: forall (m :: * -> *) a b.
MonadThrow m =>
NameMatcher a -> AttrParser b -> ConduitT Event Event m (Maybe ())
takeTreeContent NameMatcher a
nameMatcher AttrParser b
attrParser = forall (m :: * -> *) a. MaybeT m a -> m (Maybe a)
runMaybeT forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. m (Maybe a) -> MaybeT m a
MaybeT (forall (m :: * -> *) a b.
MonadThrow m =>
NameMatcher a -> AttrParser b -> ConduitT Event Event m (Maybe ())
takeTree NameMatcher a
nameMatcher AttrParser b
attrParser) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *) a. m (Maybe a) -> MaybeT m a
MaybeT forall (m :: * -> *).
MonadThrow m =>
ConduitT Event Event m (Maybe ())
takeContent

-- | Like 'takeTreeContent', without checking for tag name or attributes.
--
-- >>> runConduit $ parseLBS def "text<a></a>" .| void takeAnyTreeContent .| sinkList
-- [EventBeginDocument,EventContent (ContentText "text")]
--
-- >>> runConduit $ parseLBS def "</a><b></b>" .| void takeAnyTreeContent .| sinkList
-- [EventBeginDocument]
--
-- >>> runConduit $ parseLBS def "<b><c></c></b></a>text" .| void takeAnyTreeContent .| sinkList
-- [EventBeginDocument,EventBeginElement (Name {nameLocalName = "b", ...}) [],EventBeginElement (Name {nameLocalName = "c", ...}) [],EventEndElement (Name {nameLocalName = "c", ...}),EventEndElement (Name {nameLocalName = "b", ...})]
--
-- Since 1.5.0
takeAnyTreeContent :: MonadThrow m
                => ConduitT Event Event m (Maybe ())
takeAnyTreeContent :: forall (m :: * -> *).
MonadThrow m =>
ConduitT Event Event m (Maybe ())
takeAnyTreeContent = forall (m :: * -> *) a b.
MonadThrow m =>
NameMatcher a -> AttrParser b -> ConduitT Event Event m (Maybe ())
takeTreeContent NameMatcher Name
anyName AttrParser ()
ignoreAttrs


-- | Default implementation of 'DecodeEntities', which leaves the
-- entity as-is. Numeric character references and the five standard
-- entities (lt, gt, amp, quot, pos) are handled internally by the
-- parser.
decodeXmlEntities :: DecodeEntities
decodeXmlEntities :: DecodeEntities
decodeXmlEntities = DecodeEntities
ContentEntity

-- | HTML4-compliant entity decoder. Handles the additional 248
-- entities defined by HTML 4 and XHTML 1.
--
-- Note that HTML 5 introduces a drastically larger number of entities, and
-- this code does not recognize most of them.
decodeHtmlEntities :: DecodeEntities
decodeHtmlEntities :: DecodeEntities
decodeHtmlEntities Text
t =
  forall b a. b -> (a -> b) -> Maybe a -> b
maybe (DecodeEntities
ContentEntity Text
t) DecodeEntities
ContentText forall a b. (a -> b) -> a -> b
$ forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
t Map Text Text
htmlEntities

htmlEntities :: Map.Map T.Text T.Text
htmlEntities :: Map Text Text
htmlEntities = forall k a. Ord k => [(k, a)] -> Map k a
Map.fromList
    forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (String -> Text
pack forall (a :: * -> * -> *) b c b' c'.
Arrow a =>
a b c -> a b' c' -> a (b, b') (c, c')
*** String -> Text
pack) -- Work around the long-compile-time bug
    [ (String
"nbsp", String
"\160")
    , (String
"iexcl", String
"\161")
    , (String
"cent", String
"\162")
    , (String
"pound", String
"\163")
    , (String
"curren", String
"\164")
    , (String
"yen", String
"\165")
    , (String
"brvbar", String
"\166")
    , (String
"sect", String
"\167")
    , (String
"uml", String
"\168")
    , (String
"copy", String
"\169")
    , (String
"ordf", String
"\170")
    , (String
"laquo", String
"\171")
    , (String
"not", String
"\172")
    , (String
"shy", String
"\173")
    , (String
"reg", String
"\174")
    , (String
"macr", String
"\175")
    , (String
"deg", String
"\176")
    , (String
"plusmn", String
"\177")
    , (String
"sup2", String
"\178")
    , (String
"sup3", String
"\179")
    , (String
"acute", String
"\180")
    , (String
"micro", String
"\181")
    , (String
"para", String
"\182")
    , (String
"middot", String
"\183")
    , (String
"cedil", String
"\184")
    , (String
"sup1", String
"\185")
    , (String
"ordm", String
"\186")
    , (String
"raquo", String
"\187")
    , (String
"frac14", String
"\188")
    , (String
"frac12", String
"\189")
    , (String
"frac34", String
"\190")
    , (String
"iquest", String
"\191")
    , (String
"Agrave", String
"\192")
    , (String
"Aacute", String
"\193")
    , (String
"Acirc", String
"\194")
    , (String
"Atilde", String
"\195")
    , (String
"Auml", String
"\196")
    , (String
"Aring", String
"\197")
    , (String
"AElig", String
"\198")
    , (String
"Ccedil", String
"\199")
    , (String
"Egrave", String
"\200")
    , (String
"Eacute", String
"\201")
    , (String
"Ecirc", String
"\202")
    , (String
"Euml", String
"\203")
    , (String
"Igrave", String
"\204")
    , (String
"Iacute", String
"\205")
    , (String
"Icirc", String
"\206")
    , (String
"Iuml", String
"\207")
    , (String
"ETH", String
"\208")
    , (String
"Ntilde", String
"\209")
    , (String
"Ograve", String
"\210")
    , (String
"Oacute", String
"\211")
    , (String
"Ocirc", String
"\212")
    , (String
"Otilde", String
"\213")
    , (String
"Ouml", String
"\214")
    , (String
"times", String
"\215")
    , (String
"Oslash", String
"\216")
    , (String
"Ugrave", String
"\217")
    , (String
"Uacute", String
"\218")
    , (String
"Ucirc", String
"\219")
    , (String
"Uuml", String
"\220")
    , (String
"Yacute", String
"\221")
    , (String
"THORN", String
"\222")
    , (String
"szlig", String
"\223")
    , (String
"agrave", String
"\224")
    , (String
"aacute", String
"\225")
    , (String
"acirc", String
"\226")
    , (String
"atilde", String
"\227")
    , (String
"auml", String
"\228")
    , (String
"aring", String
"\229")
    , (String
"aelig", String
"\230")
    , (String
"ccedil", String
"\231")
    , (String
"egrave", String
"\232")
    , (String
"eacute", String
"\233")
    , (String
"ecirc", String
"\234")
    , (String
"euml", String
"\235")
    , (String
"igrave", String
"\236")
    , (String
"iacute", String
"\237")
    , (String
"icirc", String
"\238")
    , (String
"iuml", String
"\239")
    , (String
"eth", String
"\240")
    , (String
"ntilde", String
"\241")
    , (String
"ograve", String
"\242")
    , (String
"oacute", String
"\243")
    , (String
"ocirc", String
"\244")
    , (String
"otilde", String
"\245")
    , (String
"ouml", String
"\246")
    , (String
"divide", String
"\247")
    , (String
"oslash", String
"\248")
    , (String
"ugrave", String
"\249")
    , (String
"uacute", String
"\250")
    , (String
"ucirc", String
"\251")
    , (String
"uuml", String
"\252")
    , (String
"yacute", String
"\253")
    , (String
"thorn", String
"\254")
    , (String
"yuml", String
"\255")
    , (String
"OElig", String
"\338")
    , (String
"oelig", String
"\339")
    , (String
"Scaron", String
"\352")
    , (String
"scaron", String
"\353")
    , (String
"Yuml", String
"\376")
    , (String
"fnof", String
"\402")
    , (String
"circ", String
"\710")
    , (String
"tilde", String
"\732")
    , (String
"Alpha", String
"\913")
    , (String
"Beta", String
"\914")
    , (String
"Gamma", String
"\915")
    , (String
"Delta", String
"\916")
    , (String
"Epsilon", String
"\917")
    , (String
"Zeta", String
"\918")
    , (String
"Eta", String
"\919")
    , (String
"Theta", String
"\920")
    , (String
"Iota", String
"\921")
    , (String
"Kappa", String
"\922")
    , (String
"Lambda", String
"\923")
    , (String
"Mu", String
"\924")
    , (String
"Nu", String
"\925")
    , (String
"Xi", String
"\926")
    , (String
"Omicron", String
"\927")
    , (String
"Pi", String
"\928")
    , (String
"Rho", String
"\929")
    , (String
"Sigma", String
"\931")
    , (String
"Tau", String
"\932")
    , (String
"Upsilon", String
"\933")
    , (String
"Phi", String
"\934")
    , (String
"Chi", String
"\935")
    , (String
"Psi", String
"\936")
    , (String
"Omega", String
"\937")
    , (String
"alpha", String
"\945")
    , (String
"beta", String
"\946")
    , (String
"gamma", String
"\947")
    , (String
"delta", String
"\948")
    , (String
"epsilon", String
"\949")
    , (String
"zeta", String
"\950")
    , (String
"eta", String
"\951")
    , (String
"theta", String
"\952")
    , (String
"iota", String
"\953")
    , (String
"kappa", String
"\954")
    , (String
"lambda", String
"\955")
    , (String
"mu", String
"\956")
    , (String
"nu", String
"\957")
    , (String
"xi", String
"\958")
    , (String
"omicron", String
"\959")
    , (String
"pi", String
"\960")
    , (String
"rho", String
"\961")
    , (String
"sigmaf", String
"\962")
    , (String
"sigma", String
"\963")
    , (String
"tau", String
"\964")
    , (String
"upsilon", String
"\965")
    , (String
"phi", String
"\966")
    , (String
"chi", String
"\967")
    , (String
"psi", String
"\968")
    , (String
"omega", String
"\969")
    , (String
"thetasym", String
"\977")
    , (String
"upsih", String
"\978")
    , (String
"piv", String
"\982")
    , (String
"ensp", String
"\8194")
    , (String
"emsp", String
"\8195")
    , (String
"thinsp", String
"\8201")
    , (String
"zwnj", String
"\8204")
    , (String
"zwj", String
"\8205")
    , (String
"lrm", String
"\8206")
    , (String
"rlm", String
"\8207")
    , (String
"ndash", String
"\8211")
    , (String
"mdash", String
"\8212")
    , (String
"lsquo", String
"\8216")
    , (String
"rsquo", String
"\8217")
    , (String
"sbquo", String
"\8218")
    , (String
"ldquo", String
"\8220")
    , (String
"rdquo", String
"\8221")
    , (String
"bdquo", String
"\8222")
    , (String
"dagger", String
"\8224")
    , (String
"Dagger", String
"\8225")
    , (String
"bull", String
"\8226")
    , (String
"hellip", String
"\8230")
    , (String
"permil", String
"\8240")
    , (String
"prime", String
"\8242")
    , (String
"Prime", String
"\8243")
    , (String
"lsaquo", String
"\8249")
    , (String
"rsaquo", String
"\8250")
    , (String
"oline", String
"\8254")
    , (String
"frasl", String
"\8260")
    , (String
"euro", String
"\8364")
    , (String
"image", String
"\8465")
    , (String
"weierp", String
"\8472")
    , (String
"real", String
"\8476")
    , (String
"trade", String
"\8482")
    , (String
"alefsym", String
"\8501")
    , (String
"larr", String
"\8592")
    , (String
"uarr", String
"\8593")
    , (String
"rarr", String
"\8594")
    , (String
"darr", String
"\8595")
    , (String
"harr", String
"\8596")
    , (String
"crarr", String
"\8629")
    , (String
"lArr", String
"\8656")
    , (String
"uArr", String
"\8657")
    , (String
"rArr", String
"\8658")
    , (String
"dArr", String
"\8659")
    , (String
"hArr", String
"\8660")
    , (String
"forall", String
"\8704")
    , (String
"part", String
"\8706")
    , (String
"exist", String
"\8707")
    , (String
"empty", String
"\8709")
    , (String
"nabla", String
"\8711")
    , (String
"isin", String
"\8712")
    , (String
"notin", String
"\8713")
    , (String
"ni", String
"\8715")
    , (String
"prod", String
"\8719")
    , (String
"sum", String
"\8721")
    , (String
"minus", String
"\8722")
    , (String
"lowast", String
"\8727")
    , (String
"radic", String
"\8730")
    , (String
"prop", String
"\8733")
    , (String
"infin", String
"\8734")
    , (String
"ang", String
"\8736")
    , (String
"and", String
"\8743")
    , (String
"or", String
"\8744")
    , (String
"cap", String
"\8745")
    , (String
"cup", String
"\8746")
    , (String
"int", String
"\8747")
    , (String
"there4", String
"\8756")
    , (String
"sim", String
"\8764")
    , (String
"cong", String
"\8773")
    , (String
"asymp", String
"\8776")
    , (String
"ne", String
"\8800")
    , (String
"equiv", String
"\8801")
    , (String
"le", String
"\8804")
    , (String
"ge", String
"\8805")
    , (String
"sub", String
"\8834")
    , (String
"sup", String
"\8835")
    , (String
"nsub", String
"\8836")
    , (String
"sube", String
"\8838")
    , (String
"supe", String
"\8839")
    , (String
"oplus", String
"\8853")
    , (String
"otimes", String
"\8855")
    , (String
"perp", String
"\8869")
    , (String
"sdot", String
"\8901")
    , (String
"lceil", String
"\8968")
    , (String
"rceil", String
"\8969")
    , (String
"lfloor", String
"\8970")
    , (String
"rfloor", String
"\8971")
    , (String
"lang", String
"\9001")
    , (String
"rang", String
"\9002")
    , (String
"loz", String
"\9674")
    , (String
"spades", String
"\9824")
    , (String
"clubs", String
"\9827")
    , (String
"hearts", String
"\9829")
    , (String
"diams", String
"\9830")
    ]