hextream-0.2.0.0: Streaming-friendly XML parsers

Safe HaskellNone
LanguageHaskell2010

Data.XML.Parser.High

Description

High-level XML parsers, built on top of Data.XML.Parser.Mid:

  • entity references are expanded
  • CDATAs are abstracted away
  • comments are ignored
  • whitespace between tokens is ignored
  • duplicate attributes are ignored

All documentation examples assume the following setup:

:set -XOverloadedStrings
import Data.Attoparsec.ByteString
Synopsis

Documentation

data Prolog Source #

XML document prolog.

Instances
Eq Prolog Source # 
Instance details

Defined in Data.XML.Parser.High

Methods

(==) :: Prolog -> Prolog -> Bool #

(/=) :: Prolog -> Prolog -> Bool #

Ord Prolog Source # 
Instance details

Defined in Data.XML.Parser.High

Read Prolog Source # 
Instance details

Defined in Data.XML.Parser.High

Show Prolog Source # 
Instance details

Defined in Data.XML.Parser.High

data Token Source #

Instances
Eq Token Source # 
Instance details

Defined in Data.XML.Parser.High

Methods

(==) :: Token -> Token -> Bool #

(/=) :: Token -> Token -> Bool #

Ord Token Source # 
Instance details

Defined in Data.XML.Parser.High

Methods

compare :: Token -> Token -> Ordering #

(<) :: Token -> Token -> Bool #

(<=) :: Token -> Token -> Bool #

(>) :: Token -> Token -> Bool #

(>=) :: Token -> Token -> Bool #

max :: Token -> Token -> Token #

min :: Token -> Token -> Token #

Read Token Source # 
Instance details

Defined in Data.XML.Parser.High

Show Token Source # 
Instance details

Defined in Data.XML.Parser.High

Methods

showsPrec :: Int -> Token -> ShowS #

show :: Token -> String #

showList :: [Token] -> ShowS #

data TokenParser m a Source #

A parser that consumes whole Tokens.

Instances
Monad m => Monad (TokenParser m) Source # 
Instance details

Defined in Data.XML.Parser.High

Methods

(>>=) :: TokenParser m a -> (a -> TokenParser m b) -> TokenParser m b #

(>>) :: TokenParser m a -> TokenParser m b -> TokenParser m b #

return :: a -> TokenParser m a #

fail :: String -> TokenParser m a #

Functor m => Functor (TokenParser m) Source # 
Instance details

Defined in Data.XML.Parser.High

Methods

fmap :: (a -> b) -> TokenParser m a -> TokenParser m b #

(<$) :: a -> TokenParser m b -> TokenParser m a #

(Parsing m, Monad m) => MonadFail (TokenParser m) Source # 
Instance details

Defined in Data.XML.Parser.High

Methods

fail :: String -> TokenParser m a #

Applicative m => Applicative (TokenParser m) Source # 
Instance details

Defined in Data.XML.Parser.High

Methods

pure :: a -> TokenParser m a #

(<*>) :: TokenParser m (a -> b) -> TokenParser m a -> TokenParser m b #

liftA2 :: (a -> b -> c) -> TokenParser m a -> TokenParser m b -> TokenParser m c #

(*>) :: TokenParser m a -> TokenParser m b -> TokenParser m b #

(<*) :: TokenParser m a -> TokenParser m b -> TokenParser m a #

Alternative m => Alternative (TokenParser m) Source # 
Instance details

Defined in Data.XML.Parser.High

Methods

empty :: TokenParser m a #

(<|>) :: TokenParser m a -> TokenParser m a -> TokenParser m a #

some :: TokenParser m a -> TokenParser m [a] #

many :: TokenParser m a -> TokenParser m [a] #

data ContentParser m a Source #

How to parse tag content.

Instances
Functor m => Functor (ContentParser m) Source # 
Instance details

Defined in Data.XML.Parser.High

Methods

fmap :: (a -> b) -> ContentParser m a -> ContentParser m b #

(<$) :: a -> ContentParser m b -> ContentParser m a #

noContent :: Applicative m => ContentParser m () Source #

Assert that content is empty.

>>> parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'>body</tag>"
Left ...
>>> parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'></tag>"
Right ()
>>> parseOnly (runTokenParser $ tag' anyName anyAttr noContent) "<tag key='value'/>"
Right ()

withContent :: TokenParser m a -> ContentParser m a Source #

Assert that content is not empty, and parse it using given token parser.

>>> parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'>body</tag>"
Right "body"
>>> parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'></tag>"
Left ...
>>> parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag key='value'/>"
Left ...
>>> parseOnly (runTokenParser $ tag' anyName anyAttr (withContent $ pure ())) "<tag key='value'/>"
Left ...

anyContent :: CharParsing m => Monad m => ContentParser m () Source #

Accept any content, including empty content.

>>> parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'>body</tag>"
Right ()
>>> parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'></tag>"
Right ()
>>> parseOnly (runTokenParser $ tag' anyName anyAttr anyContent) "<tag key='value'/>"
Right ()

prolog :: CharParsing m => Monad m => TokenParser m Prolog Source #

https://www.w3.org/TR/REC-xml/#NT-prolog

>>> parseOnly (runTokenParser prolog) "<?xml version='1.0'?><!DOCTYPE greeting>"
Right (Prolog {prologXmlDeclaration = Just (XMLDeclaration "1.0" ...), prologInstructions = [], prologDoctype = Just (Doctype "greeting" ...)})
>>> parseOnly (runTokenParser prolog) "<?xml version='1.0'?>  <!-- comments and whitespace are ignored --><!DOCTYPE greeting>"
Right (Prolog {prologXmlDeclaration = Just (XMLDeclaration "1.0" ...), prologInstructions = [], prologDoctype = Just (Doctype "greeting" ...)})

instruction :: CharParsing m => Monad m => TokenParser m Instruction Source #

Parse a processing instruction.

https://www.w3.org/TR/REC-xml/#dt-pi

>>> parseOnly (runTokenParser instruction) "<?php echo 'Hello World!'; ?>"
Right (Instruction "php" "echo 'Hello World!'; ")
>>> parseOnly (runTokenParser instruction) "<!-- comments and whitespace are ignored -->  <?php echo 'Hello World!'; ?>"
Right (Instruction "php" "echo 'Hello World!'; ")

textContent :: CharParsing m => Monad m => EntityDecoder -> TokenParser m Text Source #

Parse textual content of a tag, including CDATA.

textContent' :: CharParsing m => Monad m => TokenParser m Text Source #

Same as textContent (decodePredefinedEntities <> decodeHtmlEntities), provided for convenience.

>>> parseOnly (runTokenParser $ tag' anyName anyAttr (withContent textContent')) "<tag>body<!-- Ignored comment --><![CDATA[<innertag>innerbody</innertag>]]></tag>"
Right "body<innertag>innerbody</innertag>"

tag Source #

Arguments

:: CharParsing m 
=> Monad m 
=> EntityDecoder

How to expand entity references

-> NameParser a

How to parse tag name

-> (a -> AttrParser b)

How to parse tag attributes

-> (b -> ContentParser m c)

How to parse tag content

-> TokenParser m c 

Generic tag parser.

tag' Source #

Arguments

:: CharParsing m 
=> Monad m 
=> NameParser a

How to parse tag name

-> AttrParser b

How to parse tag attributes

-> ContentParser m c

How to parse tag content

-> TokenParser m c 

Simplified version of tag:

  • no state forwarding between name, attributes and content parsers
  • uses decodePredefinedEntities <> decodeHtmlEntities to expand entity references

anyTag :: CharParsing m => Monad m => TokenParser m () Source #

Parse a tag with any name, any attributes and any content.

>>> parseOnly (runTokenParser anyTag) "<tag key='value'>body</tag>"
Right ()
>>> parseOnly (runTokenParser anyTag) "<tag key='value'/>"
Right ()
>>> parseOnly (runTokenParser anyTag) "<!-- ignored comment --><tag key='value'/>"
Right ()
>>> parseOnly (runTokenParser anyTag) "<tag key='value'>body<!-- ignored comment --></tag>"
Right ()

anyToken' :: CharParsing m => Monad m => TokenParser m Token Source #

Same as anyToken (decodePredefinedEntities <> decodeHtmlEntities), provided for convenience.