{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE CPP #-}
{- |
This module uses HXT to transverse an HTML document using CSS selectors.

The most important function here is 'findBySelector', it takes a CSS query and
a string containing the HTML to look into,
and it returns a list of the HTML fragments that matched the given query.

Only a subset of the CSS spec is currently supported:

 * By tag name: /table td a/

 * By class names: /.container .content/

 * By Id: /#oneId/

 * By attribute: /[hasIt]/, /[exact=match]/, /[contains*=text]/, /[starts^=with]/, /[ends$=with]/

 * Union: /a, span, p/

 * Immediate children: /div > p/

 * Get jiggy with it: /div[data-attr=yeah] > .mon, .foo.bar div, #oneThing/

-}

module Yesod.Test.TransversingCSS (
  findBySelector,
  findAttributeBySelector,
  HtmlLBS,
  Query,
  -- * For HXT hackers
  -- | These functions expose some low level details that you can blissfully ignore.
  parseQuery,
  runQuery,
  Selector(..),
  SelectorGroup(..)

  )
where

import Yesod.Test.CssQuery
import qualified Data.Text as T
import qualified Control.Applicative
import Text.XML
import Text.XML.Cursor
import qualified Data.ByteString.Lazy as L
import qualified Text.HTML.DOM as HD
import Text.Blaze.Html (toHtml)
import Text.Blaze.Html.Renderer.String (renderHtml)

type Query = T.Text
type HtmlLBS = L.ByteString

-- | Perform a css 'Query' on 'Html'. Returns Either
--
-- * Left: Query parse error.
--
-- * Right: List of matching Html fragments.
findBySelector :: HtmlLBS -> Query -> Either String [String]
findBySelector :: HtmlLBS -> Query -> Either String [String]
findBySelector HtmlLBS
html Query
query =
  (Cursor Node -> String) -> [Cursor Node] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map (Html -> String
renderHtml (Html -> String) -> (Cursor Node -> Html) -> Cursor Node -> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Node -> Html
forall a. ToMarkup a => a -> Html
toHtml (Node -> Html) -> (Cursor Node -> Node) -> Cursor Node -> Html
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Cursor Node -> Node
forall node. Cursor node -> node
node) ([Cursor Node] -> [String])
-> Either String [Cursor Node] -> Either String [String]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Control.Applicative.<$> HtmlLBS -> Query -> Either String [Cursor Node]
findCursorsBySelector HtmlLBS
html Query
query

-- | Perform a css 'Query' on 'Html'. Returns Either
--
-- * Left: Query parse error.
--
-- * Right: List of matching Cursors
findCursorsBySelector :: HtmlLBS -> Query -> Either String [Cursor]
findCursorsBySelector :: HtmlLBS -> Query -> Either String [Cursor Node]
findCursorsBySelector HtmlLBS
html Query
query =
  Cursor Node -> [[SelectorGroup]] -> [Cursor Node]
runQuery (Document -> Cursor Node
fromDocument (Document -> Cursor Node) -> Document -> Cursor Node
forall a b. (a -> b) -> a -> b
$ HtmlLBS -> Document
HD.parseLBS HtmlLBS
html)
       ([[SelectorGroup]] -> [Cursor Node])
-> Either String [[SelectorGroup]] -> Either String [Cursor Node]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Control.Applicative.<$> Query -> Either String [[SelectorGroup]]
parseQuery Query
query

-- | Perform a css 'Query' on 'Html'. Returns Either
--
-- * Left: Query parse error.
--
-- * Right: List of matching Cursors
--
-- @since 1.5.7
findAttributeBySelector :: HtmlLBS -> Query -> T.Text -> Either String [[T.Text]]
findAttributeBySelector :: HtmlLBS -> Query -> Query -> Either String [[Query]]
findAttributeBySelector HtmlLBS
html Query
query Query
attr =
  (Cursor Node -> [Query]) -> [Cursor Node] -> [[Query]]
forall a b. (a -> b) -> [a] -> [b]
map (Query -> Cursor Node -> [Query]
laxAttribute Query
attr) ([Cursor Node] -> [[Query]])
-> Either String [Cursor Node] -> Either String [[Query]]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
Control.Applicative.<$> HtmlLBS -> Query -> Either String [Cursor Node]
findCursorsBySelector HtmlLBS
html Query
query


-- Run a compiled query on Html, returning a list of matching Html fragments.
runQuery :: Cursor -> [[SelectorGroup]] -> [Cursor]
runQuery :: Cursor Node -> [[SelectorGroup]] -> [Cursor Node]
runQuery Cursor Node
html [[SelectorGroup]]
query = ([SelectorGroup] -> [Cursor Node])
-> [[SelectorGroup]] -> [Cursor Node]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (Cursor Node -> [SelectorGroup] -> [Cursor Node]
runGroup Cursor Node
html) [[SelectorGroup]]
query

runGroup :: Cursor -> [SelectorGroup] -> [Cursor]
runGroup :: Cursor Node -> [SelectorGroup] -> [Cursor Node]
runGroup Cursor Node
c [] = [Cursor Node
c]
runGroup Cursor Node
c (DirectChildren [Selector]
s:[SelectorGroup]
gs) = (Cursor Node -> [Cursor Node]) -> [Cursor Node] -> [Cursor Node]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((Cursor Node -> [SelectorGroup] -> [Cursor Node])
-> [SelectorGroup] -> Cursor Node -> [Cursor Node]
forall a b c. (a -> b -> c) -> b -> a -> c
flip Cursor Node -> [SelectorGroup] -> [Cursor Node]
runGroup [SelectorGroup]
gs) ([Cursor Node] -> [Cursor Node]) -> [Cursor Node] -> [Cursor Node]
forall a b. (a -> b) -> a -> b
$ Cursor Node
c Cursor Node -> (Cursor Node -> [Cursor Node]) -> [Cursor Node]
forall node a. Cursor node -> (Cursor node -> [a]) -> [a]
$/ [Selector] -> Cursor Node -> [Cursor Node]
selectors [Selector]
s
runGroup Cursor Node
c (DeepChildren [Selector]
s:[SelectorGroup]
gs) = (Cursor Node -> [Cursor Node]) -> [Cursor Node] -> [Cursor Node]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((Cursor Node -> [SelectorGroup] -> [Cursor Node])
-> [SelectorGroup] -> Cursor Node -> [Cursor Node]
forall a b c. (a -> b -> c) -> b -> a -> c
flip Cursor Node -> [SelectorGroup] -> [Cursor Node]
runGroup [SelectorGroup]
gs) ([Cursor Node] -> [Cursor Node]) -> [Cursor Node] -> [Cursor Node]
forall a b. (a -> b) -> a -> b
$ Cursor Node
c Cursor Node -> (Cursor Node -> [Cursor Node]) -> [Cursor Node]
forall node a. Cursor node -> (Cursor node -> [a]) -> [a]
$// [Selector] -> Cursor Node -> [Cursor Node]
selectors [Selector]
s

selectors :: [Selector] -> Cursor -> [Cursor]
selectors :: [Selector] -> Cursor Node -> [Cursor Node]
selectors [Selector]
ss Cursor Node
c
    | (Selector -> Bool) -> [Selector] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (Cursor Node -> Selector -> Bool
selector Cursor Node
c) [Selector]
ss = [Cursor Node
c]
    | Bool
otherwise = []

selector :: Cursor -> Selector -> Bool
selector :: Cursor Node -> Selector -> Bool
selector Cursor Node
c (ById Query
x) = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ [Cursor Node] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([Cursor Node] -> Bool) -> [Cursor Node] -> Bool
forall a b. (a -> b) -> a -> b
$ Name -> Query -> Cursor Node -> [Cursor Node]
attributeIs Name
"id" Query
x Cursor Node
c
selector Cursor Node
c (ByClass Query
x) =
    case Name -> Cursor Node -> [Query]
attribute Name
"class" Cursor Node
c of
        Query
t:[Query]
_ -> Query
x Query -> [Query] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` Query -> [Query]
T.words Query
t
        [] -> Bool
False
selector Cursor Node
c (ByTagName Query
t) = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ [Cursor Node] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([Cursor Node] -> Bool) -> [Cursor Node] -> Bool
forall a b. (a -> b) -> a -> b
$ Name -> Cursor Node -> [Cursor Node]
element (Query -> Maybe Query -> Maybe Query -> Name
Name Query
t Maybe Query
forall a. Maybe a
Nothing Maybe Query
forall a. Maybe a
Nothing) Cursor Node
c
selector Cursor Node
c (ByAttrExists Query
t) = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ [Cursor Node] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([Cursor Node] -> Bool) -> [Cursor Node] -> Bool
forall a b. (a -> b) -> a -> b
$ Name -> Cursor Node -> [Cursor Node]
hasAttribute (Query -> Maybe Query -> Maybe Query -> Name
Name Query
t Maybe Query
forall a. Maybe a
Nothing Maybe Query
forall a. Maybe a
Nothing) Cursor Node
c
selector Cursor Node
c (ByAttrEquals Query
t Query
v) = Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$ [Cursor Node] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null ([Cursor Node] -> Bool) -> [Cursor Node] -> Bool
forall a b. (a -> b) -> a -> b
$ Name -> Query -> Cursor Node -> [Cursor Node]
attributeIs (Query -> Maybe Query -> Maybe Query -> Name
Name Query
t Maybe Query
forall a. Maybe a
Nothing Maybe Query
forall a. Maybe a
Nothing) Query
v Cursor Node
c
selector Cursor Node
c (ByAttrContains Query
n Query
v) =
    case Name -> Cursor Node -> [Query]
attribute (Query -> Maybe Query -> Maybe Query -> Name
Name Query
n Maybe Query
forall a. Maybe a
Nothing Maybe Query
forall a. Maybe a
Nothing) Cursor Node
c of
        Query
t:[Query]
_ -> Query
v Query -> Query -> Bool
`T.isInfixOf` Query
t
        [] -> Bool
False
selector Cursor Node
c (ByAttrStarts Query
n Query
v) =
    case Name -> Cursor Node -> [Query]
attribute (Query -> Maybe Query -> Maybe Query -> Name
Name Query
n Maybe Query
forall a. Maybe a
Nothing Maybe Query
forall a. Maybe a
Nothing) Cursor Node
c of
        Query
t:[Query]
_ -> Query
v Query -> Query -> Bool
`T.isPrefixOf` Query
t
        [] -> Bool
False
selector Cursor Node
c (ByAttrEnds Query
n Query
v) =
    case Name -> Cursor Node -> [Query]
attribute (Query -> Maybe Query -> Maybe Query -> Name
Name Query
n Maybe Query
forall a. Maybe a
Nothing Maybe Query
forall a. Maybe a
Nothing) Cursor Node
c of
        Query
t:[Query]
_ -> Query
v Query -> Query -> Bool
`T.isSuffixOf` Query
t
        [] -> Bool
False