-- | This module provides for simple DOM traversal. It is inspired by XPath. There are two central concepts here:
--
-- * A 'Cursor' represents a node in the DOM. It also contains information on the node's /location/. While the 'Node' datatype will only know of its children, a @Cursor@ knows about its parent and siblings as well. (The underlying mechanism allowing this is called a zipper, see <http://www.haskell.org/haskellwiki/Zipper> and <http://www.haskell.org/haskellwiki/Tying_the_Knot>.)
--
-- * An 'Axis', in its simplest form, takes a @Cursor@ and returns a list of @Cursor@s. It is used for selections, such as finding children, ancestors, etc. Axes can be chained together to express complex rules, such as all children named /foo/.
--
-- The terminology used in this module is taken directly from the XPath
-- specification: <http://www.w3.org/TR/xpath/>. For those familiar with XPath,
-- the one major difference is that attributes are not considered nodes in this
-- module.
module Text.XML.Cursor
    (
    -- * Data types
      Cursor
    , Axis
    -- * Production
    , fromDocument
    , fromNode
    , cut
    -- * Axes
    , parent
    , CG.precedingSibling
    , CG.followingSibling
    , child
    , node
    , CG.preceding
    , CG.following
    , CG.ancestor
    , descendant
    , orSelf
      -- ** Filters
    , check
    , checkNode
    , checkElement
    , checkName
    , anyElement
    , element
    , laxElement
    , content
    , attribute
    , laxAttribute
    , hasAttribute
    , attributeIs
    -- * Operators
    , (CG.&|)
    , (CG.&/)
    , (CG.&//)
    , (CG.&.//)
    , (CG.$|)
    , (CG.$/)
    , (CG.$//)
    , (CG.$.//)
    , (CG.>=>)
    -- * Type classes
    , Boolean(..)
    -- * Error handling
    , force
    , forceM
    ) where

import           Control.Exception            (Exception)
import           Control.Monad
import           Control.Monad.Trans.Resource (MonadThrow, throwM)
import           Data.Function                (on)
import qualified Data.Map                     as Map
import           Data.Maybe                   (maybeToList)
import qualified Data.Text                    as T
import           Text.XML
import           Text.XML.Cursor.Generic      (child, descendant, node, orSelf,
                                               parent)
import qualified Text.XML.Cursor.Generic      as CG

-- TODO: Consider [Cursor] -> [Cursor]?
-- | The type of an Axis that returns a list of Cursors.
-- They are roughly modeled after <http://www.w3.org/TR/xpath/#axes>.
--
-- Axes can be composed with '>=>', where e.g. @f >=> g@ means that on all results of
-- the @f@ axis, the @g@ axis will be applied, and all results joined together.
-- Because Axis is just a type synonym for @Cursor -> [Cursor]@, it is possible to use
-- other standard functions like '>>=' or 'concatMap' similarly.
--
-- The operators '&|', '&/', '&//' and '&.//' can be used to combine axes so that the second
-- axis works on the context nodes, children, descendants, respectively the context node as
-- well as its descendants of the results of the first axis.
--
-- The operators '$|', '$/', '$//' and '$.//' can be used to apply an axis (right-hand side)
-- to a cursor so that it is applied on the cursor itself, its children, its descendants,
-- respectively itself and its descendants.
--
-- Note that many of these operators also work on /generalised Axes/ that can return
-- lists of something other than Cursors, for example Content elements.
type Axis = Cursor -> [Cursor]

-- XPath axes as in http://www.w3.org/TR/xpath/#axes

-- TODO: Decide whether to use an existing package for this
-- | Something that can be used in a predicate check as a boolean.
class Boolean a where
    bool :: a -> Bool

instance Boolean Bool where
    bool :: Bool -> Bool
bool = Bool -> Bool
forall a. a -> a
id
instance Boolean [a] where
    bool :: [a] -> Bool
bool = Bool -> Bool
not (Bool -> Bool) -> ([a] -> Bool) -> [a] -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null
instance Boolean (Maybe a) where
    bool :: Maybe a -> Bool
bool (Just a
_) = Bool
True
    bool Maybe a
_        = Bool
False
instance Boolean (Either a b) where
    bool :: Either a b -> Bool
bool (Left a
_)  = Bool
False
    bool (Right b
_) = Bool
True

-- | A cursor: contains an XML 'Node' and pointers to its children, ancestors and siblings.
type Cursor = CG.Cursor Node

-- | Cut a cursor off from its parent. The idea is to allow restricting the scope of queries on it.
cut :: Cursor -> Cursor
cut :: Cursor -> Cursor
cut = Node -> Cursor
fromNode (Node -> Cursor) -> (Cursor -> Node) -> Cursor -> Cursor
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Cursor -> Node
forall node. Cursor node -> node
CG.node

-- | Convert a 'Document' to a 'Cursor'. It will point to the document root.
fromDocument :: Document -> Cursor
fromDocument :: Document -> Cursor
fromDocument = Node -> Cursor
fromNode (Node -> Cursor) -> (Document -> Node) -> Document -> Cursor
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> Node
NodeElement (Element -> Node) -> (Document -> Element) -> Document -> Node
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Document -> Element
documentRoot

-- | Convert a 'Node' to a 'Cursor' (without parents).
fromNode :: Node -> Cursor
fromNode :: Node -> Cursor
fromNode =
    (Node -> [Node]) -> Node -> Cursor
forall node. (node -> [node]) -> node -> Cursor node
CG.toCursor Node -> [Node]
cs
  where
    cs :: Node -> [Node]
cs (NodeElement (Element Name
_ Map Name Text
_ [Node]
x)) = [Node]
x
    cs Node
_                             = []

-- | Filter cursors that don't pass a check.
check :: Boolean b => (Cursor -> b) -> Axis
check :: (Cursor -> b) -> Axis
check Cursor -> b
f Cursor
c = [Cursor
c | b -> Bool
forall a. Boolean a => a -> Bool
bool (b -> Bool) -> b -> Bool
forall a b. (a -> b) -> a -> b
$ Cursor -> b
f Cursor
c]

-- | Filter nodes that don't pass a check.
checkNode :: Boolean b => (Node -> b) -> Axis
checkNode :: (Node -> b) -> Axis
checkNode Node -> b
f = (Cursor -> b) -> Axis
forall b. Boolean b => (Cursor -> b) -> Axis
check (Node -> b
f (Node -> b) -> (Cursor -> Node) -> Cursor -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Cursor -> Node
forall node. Cursor node -> node
node)

-- | Filter elements that don't pass a check, and remove all non-elements.
checkElement :: Boolean b => (Element -> b) -> Axis
checkElement :: (Element -> b) -> Axis
checkElement Element -> b
f Cursor
c = case Cursor -> Node
forall node. Cursor node -> node
node Cursor
c of
                     NodeElement Element
e -> [Cursor
c | b -> Bool
forall a. Boolean a => a -> Bool
bool (b -> Bool) -> b -> Bool
forall a b. (a -> b) -> a -> b
$ Element -> b
f Element
e]
                     Node
_ -> []

-- | Filter elements that don't pass a name check, and remove all non-elements.
checkName :: Boolean b => (Name -> b) -> Axis
checkName :: (Name -> b) -> Axis
checkName Name -> b
f = (Element -> b) -> Axis
forall b. Boolean b => (Element -> b) -> Axis
checkElement (Name -> b
f (Name -> b) -> (Element -> Name) -> Element -> b
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Element -> Name
elementName)

-- | Remove all non-elements. Compare roughly to XPath:
-- /A node test * is true for any node of the principal node type. For example, child::* will select all element children of the context node [...]/.
anyElement :: Axis
anyElement :: Axis
anyElement = (Element -> Bool) -> Axis
forall b. Boolean b => (Element -> b) -> Axis
checkElement (Bool -> Element -> Bool
forall a b. a -> b -> a
const Bool
True)

-- | Select only those elements with a matching tag name. XPath:
-- /A node test that is a QName is true if and only if the type of the node (see [5 Data Model]) is the principal node type and has an expanded-name equal to the expanded-name specified by the QName./
element :: Name -> Axis
element :: Name -> Axis
element Name
n = (Name -> Bool) -> Axis
forall b. Boolean b => (Name -> b) -> Axis
checkName (Name -> Name -> Bool
forall a. Eq a => a -> a -> Bool
== Name
n)

-- | Select only those elements with a loosely matching tag name. Namespace and case are ignored. XPath:
-- /A node test that is a QName is true if and only if the type of the node (see [5 Data Model]) is the principal node type and has an expanded-name equal to the expanded-name specified by the QName./
laxElement :: T.Text -> Axis
laxElement :: Text -> Axis
laxElement Text
n = (Name -> Bool) -> Axis
forall b. Boolean b => (Name -> b) -> Axis
checkName ((Text -> Text -> Bool) -> (Text -> Text) -> Text -> Text -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
on Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
(==) Text -> Text
T.toCaseFold Text
n (Text -> Bool) -> (Name -> Text) -> Name -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Name -> Text
nameLocalName)

-- | Select only text nodes, and directly give the 'Content' values. XPath:
-- /The node test text() is true for any text node./
--
-- Note that this is not strictly an 'Axis', but will work with most combinators.
content :: Cursor -> [T.Text]
content :: Cursor -> [Text]
content Cursor
c = case Cursor -> Node
forall node. Cursor node -> node
node Cursor
c of
              (NodeContent Text
v) -> [Text
v]
              Node
_               -> []

-- | Select attributes on the current element (or nothing if it is not an element). XPath:
-- /the attribute axis contains the attributes of the context node; the axis will be empty unless the context node is an element/
--
-- Note that this is not strictly an 'Axis', but will work with most combinators.
--
-- The return list of the generalised axis contains as elements lists of 'Content'
-- elements, each full list representing an attribute value.
attribute :: Name -> Cursor -> [T.Text]
attribute :: Name -> Cursor -> [Text]
attribute Name
n Cursor
c =
    case Cursor -> Node
forall node. Cursor node -> node
node Cursor
c of
        NodeElement Element
e -> Maybe Text -> [Text]
forall a. Maybe a -> [a]
maybeToList (Maybe Text -> [Text]) -> Maybe Text -> [Text]
forall a b. (a -> b) -> a -> b
$ Name -> Map Name Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Name
n (Map Name Text -> Maybe Text) -> Map Name Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Element -> Map Name Text
elementAttributes Element
e
        Node
_             -> []

-- | Select attributes on the current element (or nothing if it is not an element).  Namespace and case are ignored. XPath:
-- /the attribute axis contains the attributes of the context node; the axis will be empty unless the context node is an element/
--
-- Note that this is not strictly an 'Axis', but will work with most combinators.
--
-- The return list of the generalised axis contains as elements lists of 'Content'
-- elements, each full list representing an attribute value.
laxAttribute :: T.Text -> Cursor -> [T.Text]
laxAttribute :: Text -> Cursor -> [Text]
laxAttribute Text
n Cursor
c =
    case Cursor -> Node
forall node. Cursor node -> node
node Cursor
c of
        NodeElement Element
e -> do
            (Name
n', Text
v) <- Map Name Text -> [(Name, Text)]
forall k a. Map k a -> [(k, a)]
Map.toList (Map Name Text -> [(Name, Text)])
-> Map Name Text -> [(Name, Text)]
forall a b. (a -> b) -> a -> b
$ Element -> Map Name Text
elementAttributes Element
e
            Bool -> [()]
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> [()]) -> Bool -> [()]
forall a b. (a -> b) -> a -> b
$ ((Text -> Text -> Bool) -> (Text -> Text) -> Text -> Text -> Bool
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
on Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
(==) Text -> Text
T.toCaseFold) Text
n (Name -> Text
nameLocalName Name
n')
            Text -> [Text]
forall (m :: * -> *) a. Monad m => a -> m a
return Text
v
        Node
_ -> []

-- | Select only those element nodes with the given attribute.
hasAttribute :: Name -> Axis
hasAttribute :: Name -> Axis
hasAttribute Name
n Cursor
c =
    case Cursor -> Node
forall node. Cursor node -> node
node Cursor
c of
        NodeElement (Element Name
_ Map Name Text
as [Node]
_) -> [Cursor] -> (Text -> [Cursor]) -> Maybe Text -> [Cursor]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] ([Cursor] -> Text -> [Cursor]
forall a b. a -> b -> a
const [Cursor
c]) (Maybe Text -> [Cursor]) -> Maybe Text -> [Cursor]
forall a b. (a -> b) -> a -> b
$ Name -> Map Name Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Name
n Map Name Text
as
        Node
_ -> []

-- | Select only those element nodes containing the given attribute key/value pair.
attributeIs :: Name -> T.Text -> Axis
attributeIs :: Name -> Text -> Axis
attributeIs Name
n Text
v Cursor
c =
    case Cursor -> Node
forall node. Cursor node -> node
node Cursor
c of
        NodeElement (Element Name
_ Map Name Text
as [Node]
_) -> [ Cursor
c | Text -> Maybe Text
forall a. a -> Maybe a
Just Text
v Maybe Text -> Maybe Text -> Bool
forall a. Eq a => a -> a -> Bool
== Name -> Map Name Text -> Maybe Text
forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Name
n Map Name Text
as]
        Node
_                            -> []

force :: (Exception e, MonadThrow f) => e -> [a] -> f a
force :: e -> [a] -> f a
force e
e []    = e -> f a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM e
e
force e
_ (a
x:[a]
_) = a -> f a
forall (m :: * -> *) a. Monad m => a -> m a
return a
x

forceM :: (Exception e, MonadThrow f) => e -> [f a] -> f a
forceM :: e -> [f a] -> f a
forceM e
e []    = e -> f a
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM e
e
forceM e
_ (f a
x:[f a]
_) = f a
x