module Text.XML.DOM.Parser
(
ParserData(..)
, pdCursor, pdAxis, pdPath
, ParserError(..)
, pePath, peDetails
, ParserErrors(..)
, throwParserError
, renderPath
, DomParser
, runDomParser
, ContentParser
, unitFromDom
, voidFromDom
, textFromContent
, stringFromContent
, charFromContent
, intFromContent
, integerFromContent
, doubleFromContent
, fixedFromContent
, boolFromContent
, FromDom(..)
, FromContent(..)
, inElem, inElems, nonEmptyInElems, maybeInElem, inElemsPred
, inAxis, inDescendants, inTags
, tryCurrentContent, tryCurrentContentText
, currentContent
, tryCurrentName , currentName
, tryCurrentAttr , currentAttr
, elemContent, nonEmptyElemsContent, elemsContent, maybeElemContent
, fromContentR
, CurrentContent(..)
, currentNodes
, checkCurrentLaxName, checkCurrentName
) where
import Control.Applicative
import Control.Exception
import Control.Lens
import Control.Monad
import Control.Monad.Except
import Control.Monad.Reader.Class
import Control.Monad.Trans.Except
import Control.Monad.Trans.Reader (Reader, runReader)
import Data.Fixed
import Data.Functor.Compose
import qualified Data.List as L
import Data.List.NonEmpty (NonEmpty(..))
import qualified Data.List.NonEmpty as NE
import Data.Maybe
import Data.Text (Text)
import qualified Data.Text as T
import Data.Traversable
import Data.Typeable
import Data.Void
import GHC.Generics (Generic)
import Text.Read (readMaybe)
import Text.Shakespeare.Text (st)
import Text.XML
import Text.XML.Cursor
data ParserError
= PENotFound
{ _pePath :: ![Text]
}
| PEWrongFormat
{ _pePath :: ![Text]
, _peDetails :: Text
}
| PEWrongTagName
{ _pePath :: ![Text]
, _peDetails :: !Text
}
| PENotElement
{ _pePath :: ![Text]
}
| PEContentNotFound
{ _pePath :: ![Text]
}
| PEOther
{ _pePath :: ![Text]
, _peDetails :: !Text
} deriving (Eq, Ord, Show, Generic)
makeLenses ''ParserError
makePrisms ''ParserError
instance Exception ParserError
data ParserData = ParserData
{ _pdCursor :: !Cursor
, _pdAxis :: !Axis
, _pdPath :: ![Text]
} deriving (Generic)
makeLenses ''ParserData
newtype ParserErrors = ParserErrors
{ unParserErrors :: [ParserError]
} deriving (Ord, Eq, Show, Generic)
makeWrapped ''ParserErrors
instance Exception ParserErrors
type DomParser = ExceptT [ParserError] (Reader ParserData)
type ContentParser a = Text -> Either Text a
renderPath :: [Text] -> String
renderPath [] = "document"
renderPath path
= T.unpack
$ mconcat
$ L.intersperse ">" path
throwParserError :: ([Text] -> ParserError) -> DomParser a
throwParserError mkerr = do
pd <- ask
let err = mkerr $ pd ^. pdPath
throwE [err]
nodeName :: Node -> Maybe Name
nodeName (NodeElement el) = Just $ elementName el
nodeName _ = Nothing
runDomParser :: Document
-> DomParser a
-> Either [ParserError] a
runDomParser doc par =
let cur = fromDocument doc
Just root = nameLocalName <$> nodeName (node cur)
pd = ParserData
{ _pdCursor = cur
, _pdAxis = pure
, _pdPath = [root]
}
in runReader (runExceptT par) pd
takeFirstElem :: [Text] -> [a] -> DomParser a
takeFirstElem path [] = throwE $ [PENotFound path]
takeFirstElem _ (a:_) = return a
inElem :: Text -> DomParser a -> DomParser a
inElem name p = do
pd <- ask
let newpath = (pd ^. pdPath) ++ [name]
newcur <- takeFirstElem newpath
$ (pd ^. pdCursor) $/ (pd ^. pdAxis) >=> (laxElement name)
let newdata = ParserData
{ _pdCursor = newcur
, _pdAxis = pure
, _pdPath = newpath
}
local (const newdata) p
inElemsPred :: (Traversable f)
=> ([Cursor] -> f Cursor)
-> Text
-> DomParser a
-> DomParser (f a)
inElemsPred cpred name p = do
pd <- ask
let newpath = (pd ^. pdPath) ++ [name]
curs = cpred
$ (pd ^. pdCursor)
$/ (pd ^. pdAxis) >=> (laxElement name)
for curs $ \cur -> do
let newdata = ParserData
{ _pdCursor = cur
, _pdAxis = pure
, _pdPath = newpath
}
local (const newdata) p
inElems :: Text -> DomParser a -> DomParser [a]
inElems = inElemsPred id
nonEmptyInElems :: Text -> DomParser a -> DomParser (NonEmpty a)
nonEmptyInElems name parser
= (getCompose <$> inElemsPred (Compose . NE.nonEmpty) name parser)
>>= maybe (throwParserError PENotFound) return
maybeInElem :: Text -> DomParser a -> DomParser (Maybe a)
maybeInElem = inElemsPred listToMaybe
inAxis :: [Text]
-> Axis
-> DomParser a
-> DomParser a
inAxis pathsuff axis parser = do
local ( over pdAxis (>=> axis)
. over pdPath (++ pathsuff) ) parser
inTags :: [Text]
-> DomParser a
-> DomParser a
inTags names p =
let axis = foldr (&/) pure $ map laxElement names
in inAxis names axis p
inDescendants :: DomParser a -> DomParser a
inDescendants = inAxis ["*"] descendant
tryCurrentName :: DomParser (Maybe Name)
tryCurrentName = do
pd <- ask
return $ nodeName $ node $ pd ^. pdCursor
currentName :: DomParser Name
currentName = tryCurrentName >>= \case
Nothing -> throwParserError PENotElement
Just name -> return name
checkCurrentName :: (Name -> Maybe Text)
-> DomParser ()
checkCurrentName predicate = do
n <- currentName
case predicate n of
Nothing -> return ()
Just msg -> throwParserError $ \p -> PEWrongTagName p msg
checkCurrentLaxName :: Text -> DomParser ()
checkCurrentLaxName name =
let msg = [st|Expected tag name: #{name}|]
predicate n = if (nameLocalName n == name)
then Nothing
else Just msg
in checkCurrentName predicate
tryCurrentContentText :: DomParser (Maybe Text)
tryCurrentContentText = do
pd <- ask
let txt = (pd ^. pdCursor) $/ (pd ^. pdAxis) >=> content
return $ case txt of
[] -> Nothing
x -> Just $ mconcat x
tryCurrentContent :: ContentParser a -> DomParser (Maybe a)
tryCurrentContent cparse = do
tmay <- tryCurrentContentText
for tmay $ \t -> case cparse t of
Left err -> throwParserError
$ \p -> PEWrongFormat p err
Right a -> pure a
currentContent :: ContentParser a -> DomParser a
currentContent cparse
= tryCurrentContent cparse
>>= maybe (throwParserError PEContentNotFound) return
elemContent :: Text -> ContentParser a -> DomParser a
elemContent name cparse
= inElem name
$ currentContent cparse
nonEmptyElemsContent :: Text -> ContentParser a -> DomParser (NonEmpty a)
nonEmptyElemsContent name cparse
= (NE.nonEmpty <$> elemsContent name cparse)
>>= maybe (throwParserError PENotFound) return
elemsContent :: Text -> ContentParser a -> DomParser [a]
elemsContent name cparse
= fmap catMaybes
$ inElems name
$ tryCurrentContent cparse
maybeElemContent :: Text -> ContentParser a -> DomParser (Maybe a)
maybeElemContent name cparse
= fmap join
$ maybeInElem name
$ tryCurrentContent cparse
currentAttr :: Text -> DomParser Text
currentAttr aname = do
pd <- ask
let newpath = (pd ^. pdPath) ++ [mappend "attribute " aname]
takeFirstElem newpath
$ laxAttribute aname (pd ^. pdCursor)
tryCurrentAttr :: Text -> DomParser (Maybe Text)
tryCurrentAttr aname = do
pd <- ask
return $ listToMaybe $ laxAttribute aname $ pd ^. pdCursor
unitFromDom :: DomParser ()
unitFromDom = pure ()
voidFromDom :: DomParser Void
voidFromDom = mzero
textFromContent :: ContentParser Text
textFromContent = Right
stringFromContent :: ContentParser String
stringFromContent = Right . T.unpack
charFromContent :: ContentParser Char
charFromContent t = case T.unpack $ T.strip t of
[a] -> Right a
x -> let msg = [st|Tag sould contain exactly one char, but it contains: #{x}|]
in Left msg
intFromContent :: ContentParser Int
intFromContent = fromContentR
integerFromContent :: ContentParser Integer
integerFromContent = fromContentR
doubleFromContent :: ContentParser Double
doubleFromContent = fromContentR
fixedFromContent :: (HasResolution a, Typeable a) => ContentParser (Fixed a)
fixedFromContent = fromContentR
boolFromContent :: ContentParser Bool
boolFromContent t =
let
lowt = T.toLower $ T.strip t
tvals = ["y", "yes", "t", "true", "1"]
fvals = ["n", "no", "f", "false", "0"]
in if | lowt `elem` tvals -> return True
| lowt `elem` fvals -> return False
| otherwise ->
let msg = [st|Could not read "#{t}" as Bool|]
in Left msg
class FromDom a where
fromDom :: DomParser a
instance FromDom () where
fromDom = unitFromDom
class FromContent a where
fromContent :: ContentParser a
instance FromContent Text where
fromContent t = Right t
instance FromContent String where
fromContent t = pure $ T.unpack t
instance FromContent Char where
fromContent = charFromContent
fromContentR :: forall a. (Read a, Typeable a) => Text -> Either Text a
fromContentR t = case readMaybe $ T.unpack $ T.strip t of
Nothing ->
let name = typeRep (Proxy :: Proxy a)
in Left [st|Unreadable #{show name}: #{t}|]
Just x -> Right x
instance FromContent Int where
fromContent = intFromContent
instance FromContent Integer where
fromContent = integerFromContent
instance FromContent Double where
fromContent = doubleFromContent
instance (HasResolution a, Typeable a) => FromContent (Fixed a) where
fromContent = fixedFromContent
instance FromContent Bool where
fromContent = boolFromContent
newtype CurrentContent a = CurrentContent
{ unCurrentContent :: a
} deriving (Ord, Eq, Show, Generic)
instance (FromContent a) => FromDom (CurrentContent a) where
fromDom = CurrentContent <$> currentContent fromContent
currentNodes :: DomParser [Node]
currentNodes = do
pd <- ask
let curs = (pd ^. pdCursor) $/ (pd ^. pdAxis)
return $ fmap node curs