{-# LANGUAGE GeneralizedNewtypeDeriving #-} -- | Functions to extract data from parsed XML. -- -- = Example -- -- Suppose you have an xml file of books like this: -- -- > -- > -- > -- > John Doe -- > Some book -- > -- > -- > You -- > The Great Event -- > -- > ... -- > -- -- And a data type for a book: -- -- > data Book = Book { bookId :: Int -- > , isbn :: Maybe String -- > , author, title :: String -- > } -- -- You can parse the xml file into a generic tree structure using -- 'Text.XML.Light.Input.parseXMLDoc' from the `xml` package. -- -- Using this library one can define extractors to extract data from -- the generic tree. -- -- @ -- library = 'element' "library" $ 'children' $ 'only' $ 'many' book -- -- book = 'element' "book" $ do -- i <- 'attribAs' "id" 'Text.XML.Light.Extractors.Extra.integer' -- s <- 'optional' ('attrib' "isbn") -- 'children' $ do -- a <- 'element' "author" $ 'contents' $ 'text' -- t <- 'element' "title" $ 'contents' $ 'text' -- return $ Book { bookId = i, author = a, title = t, isbn = s } -- -- extractLibrary :: 'XML.Element' -> 'Either' 'ExtractionErr' [Book] -- extractLibrary = 'extractDocContents' library -- @ -- -- = Notes -- -- * The "Control.Applicative" module contains some useful -- combinators like 'optional', 'many' and '<|>'. -- -- * The "Text.XML.Light.Extractors.ShowErr" contains some -- predefined functions to convert error values to strings. -- -- * The "Text.XML.Light.Extractors.Extra" module provides some -- functions to read numeric data. -- module Text.XML.Light.Extractors ( -- * Errors Path , Err(..) , ExtractionErr(..) -- * Element extraction , ElementExtractor , extractElement , attrib , attribAs , children , contents -- * Contents extraction , ContentsExtractor , extractContents , extractDocContents , element , text , textAs , choice , anyContent , eoc , only ) where import Control.Applicative import Text.XML.Light.Types as XML import qualified Text.XML.Light.Proc as XML import Text.XML.Light.Extractors.Internal (ExtractionErr, Err, Path) import qualified Text.XML.Light.Extractors.Internal as Internal import Text.XML.Light.Extractors.Internal.Result hiding (throwError, throwFatal) import qualified Text.XML.Light.Extractors.Internal.Result as R -------------------------------------------------------------------------------- newtype ElementExtractor a = ElementExtractor (Internal.ElementExtractor a) deriving (Applicative, Alternative, Functor, Monad) newtype ContentsExtractor a = ContentsExtractor (Internal.ContentsExtractor a) deriving (Applicative, Alternative, Functor, Monad) -------------------------------------------------------------------------------- -- | @extractElement p element@ extracts @element@ with @p@. extractElement :: ElementExtractor a -> XML.Element -> Either ExtractionErr a extractElement (ElementExtractor p) elem = toEither $ Internal.runElementExtractor p elem [] -- | @attrib name@ extracts the value of attribute @name@. attrib :: String -> ElementExtractor String attrib = ElementExtractor . Internal.attrib -- | @attribAs name f@ extracts the value of attribute @name@ and runs -- it through a conversion/validation function. attribAs :: String -> (String -> Either Err a) -> ElementExtractor a attribAs name = ElementExtractor . (Internal.attribAs name) -- | @children p@ extract only child elements with @p@. children :: ContentsExtractor a -> ElementExtractor a children (ContentsExtractor p) = ElementExtractor (Internal.children p) -- | @contents p@ extract contents with @p@. contents :: ContentsExtractor a -> ElementExtractor a contents (ContentsExtractor p) = ElementExtractor (Internal.contents p) -------------------------------------------------------------------------------- -- | @extractContents p contents@ extracts the contents with @p@. extractContents :: ContentsExtractor a -> [XML.Content] -> Either ExtractionErr a extractContents (ContentsExtractor p) cs = toEither (fst <$> Internal.runContentsExtractor p cs 1 []) -- | Using 'Text.XML.Light.Input.parseXMLDoc' produces a single -- 'Element'. Such an element can be extracted using this function. extractDocContents :: ContentsExtractor a -> XML.Element -> Either ExtractionErr a extractDocContents p = extractContents p . return . Elem -- | @only p@ fails if there is more contents than extracted by @p@. -- -- > only p = p <* eoc only :: ContentsExtractor a -> ContentsExtractor a only p = p <* eoc -- | Succeeds only when there is no more content. eoc :: ContentsExtractor () eoc = ContentsExtractor Internal.eoc -- | @element name p@ extracts a @name@ element with @p@. element :: String -> ElementExtractor a -> ContentsExtractor a element name (ElementExtractor a) = ContentsExtractor $ Internal.element name a -- | Extracts text. text :: ContentsExtractor String text = ContentsExtractor Internal.text -- | Extracts text applied to a conversion function. textAs :: (String -> Either Err a) -> ContentsExtractor a textAs = ContentsExtractor . Internal.textAs -- | Extracts first matching. choice :: [ContentsExtractor a] -> ContentsExtractor a choice = foldr (<|>) empty -- | Extracts one 'Content' item. anyContent :: ContentsExtractor Content anyContent = ContentsExtractor Internal.anyContent