module Data.Conduit.Parser.XML
(
tag
, tagName
, tagPredicate
, tagNoAttr
, AttributeMap
, AttrParser()
, attr
, ignoreAttrs
, content
, Reexport.parseBytes
, Reexport.parseBytesPos
, parseText
, Reexport.parseTextPos
, Reexport.detectUtf
, Reexport.parseFile
, Reexport.parseLBS
, Reexport.ParseSettings()
, Reexport.DecodeEntities
, Reexport.psDecodeEntities
, Reexport.psRetainNamespaces
, Reexport.decodeXmlEntities
, Reexport.decodeHtmlEntities
, Reexport.XmlException(..)
) where
import Control.Applicative
import Control.Exception
import Control.Monad
import Control.Monad.Catch
import Data.Char
import Data.Conduit hiding (await)
import Data.Conduit.Parser
import Data.Conduit.Parser.XML.Internal
import Data.Map as Map hiding (map)
import Data.Text as Text (Text, all, unpack)
import Data.XML.Types
import Text.Parser.Combinators
import qualified Text.XML.Stream.Parse as Reexport
tag :: (MonadCatch m)
=> (Name -> Maybe a)
-> (a -> AttrParser b)
-> (b -> ConduitParser Event m c)
-> ConduitParser Event m c
tag checkName attrParser f = do
skipMany ignored
(name, attributes) <- beginElement
a <- maybe (unexpected $ "Invalid element name: " ++ show name) return $ checkName name
b <- either (unexpected . show) return $ runAttrParser' (attrParser a) attributes
result <- f b
skipMany ignored
endName <- endElement
when (endName /= name) . unexpected $ "Invalid closing tag: expected </" ++ unpack (nameLocalName name) ++ ">, got </" ++ unpack (nameLocalName endName) ++ ">"
return result
where ignored = beginDocument <|> endDocument <|> void beginDoctype <|> void endDoctype <|> void instruction <|> void comment <|> spaceContent
spaceContent :: (MonadCatch m) => ConduitParser Event m ()
spaceContent = do
t <- contentText
unless (Text.all isSpace t) . unexpected $ "Unexpected textual content: " ++ unpack t
runAttrParser' parser attributes = case runAttrParser parser attributes of
Left e -> Left e
Right (a, x) -> if Map.null a then Right x else Left . toException $ Reexport.UnparsedAttributes (Map.toList a)
tagPredicate :: (MonadCatch m) => (Name -> Bool) -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b
tagPredicate p attrParser = tag (guard . p) (const attrParser)
tagName :: (MonadCatch m) => Name -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b
tagName name = tagPredicate (== name)
tagNoAttr :: MonadCatch m => Name -> ConduitParser Event m a -> ConduitParser Event m a
tagNoAttr name f = tagName name (return ()) $ const f
content :: MonadCatch m => ConduitParser Event m Text
content = do
skipMany ignored
mconcat <$> sepEndBy text ignored
where ignored = beginDocument <|> endDocument <|> void beginDoctype <|> endDoctype <|> void instruction <|> void comment
newtype AttrParser a = AttrParser { runAttrParser :: AttributeMap -> Either SomeException (AttributeMap, a) }
instance Monad AttrParser where
return a = AttrParser $ \attributes -> Right (attributes, a)
(AttrParser p) >>= f = AttrParser $ p >=> (\(attributes', a) -> runAttrParser (f a) attributes')
instance Functor AttrParser where
fmap = liftM
instance Applicative AttrParser where
pure = return
(<*>) = ap
instance Alternative AttrParser where
empty = AttrParser $ const $ Left $ toException $ Reexport.XmlException "AttrParser.empty" Nothing
AttrParser f <|> AttrParser g = AttrParser $ \x -> either (const $ g x) Right (f x)
instance MonadThrow AttrParser where
throwM = AttrParser . const . throwM
attr :: Name -> AttrParser Text
attr name = AttrParser $ \attrs -> maybe raiseError (returnValue attrs) (Map.lookup name attrs)
where raiseError = Left . toException $ Reexport.XmlException ("Missing attribute: " ++ show name) Nothing
returnValue attrs contents = Right (Map.delete name attrs, contentsToText contents)
ignoreAttrs :: AttrParser ()
ignoreAttrs = AttrParser . const $ Right (mempty, ())
contentsToText :: [Content] -> Text
contentsToText =
mconcat . map toText
where
toText (ContentText t) = t
toText (ContentEntity e) = mconcat ["&", e, ";"]
parseText :: (MonadThrow m) => Reexport.ParseSettings -> Conduit Text m Event
parseText = Reexport.parseText'