{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_HADDOCK hide #-}
module Text.HTML.Scalpel.Internal.Select (
    SelectContext (..)
,   TagSpec
,   TagInfo (..)

,   select
,   tagsToSpec
) where

import Text.HTML.Scalpel.Internal.Select.Types

import Control.Applicative ((<$>), (<|>))
import Data.Maybe (catMaybes, isJust, fromJust, fromMaybe)

import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import qualified Data.Tree as Tree
import qualified Data.Vector as Vector
import qualified Text.HTML.TagSoup as TagSoup
import qualified Text.StringLike as TagSoup


type Index = Int
type Name = Maybe T.Text
type CloseOffset = Maybe Index

-- | The span of a tag in terms of the index of the opening tag and the index of
-- the closing tag. If there is no closing tag the closing tag is equal to the
-- opening tag.
data Span = Span !Int !Int

-- | A representation of the hierarchal structure of a document. Nodes of the
-- tree are spans which mark the start and end of a tag. The tree is organized
-- such that tags that appear earlier in the parsed string appear earlier in the
-- list of nodes, and that a given node is completely within the span of its
-- parent.
type TagForest = Tree.Forest Span

-- | A tag and associated precomputed meta data that is accessed in tight inner
-- loops during scraping.
data TagInfo str = TagInfo {
                   forall str. TagInfo str -> Tag str
infoTag    :: !(TagSoup.Tag str)
                 , forall str. TagInfo str -> Name
infoName   :: !Name
                 , forall str. TagInfo str -> CloseOffset
infoOffset :: !CloseOffset
                 }

-- | A vector of tags and precomputed meta data. A vector is used because it
-- allows for constant time slicing and sharing memory between the slices.
type TagVector str = Vector.Vector (TagInfo str)

-- | Ephemeral meta-data that each TagSpec is tagged with. This type contains
-- information that is not intrinsic in the sub-tree that corresponds to a given
-- TagSpec.
data SelectContext = SelectContext {
                     -- | `select` generates a list of `TagSpec`s that match a
                     -- selector. This indicates the index in the result list
                     -- that this TagSpec corresponds to.
                     SelectContext -> Index
ctxPosition :: !Index
                     -- | True if the current context is the result of chrooting
                     -- to a sub-tree. This is used by serial scrapers to
                     -- determine how to generate zippered sibling nodes.
                   , SelectContext -> Bool
ctxInChroot :: !Bool
                   }

-- | A structured representation of the parsed tags that provides fast element
-- look up via a vector of tags, and fast traversal via a rose tree of tags.
type TagSpec str = (TagVector str, TagForest, SelectContext)

-- | The 'select' function takes a 'Selectable' value and a list of
-- 'TagSoup.Tag's and returns a list of every subsequence of the given list of
-- Tags that matches the given selector.
select :: (TagSoup.StringLike str)
       => Selector -> TagSpec str -> [TagSpec str]
select :: forall str.
StringLike str =>
Selector -> TagSpec str -> [TagSpec str]
select Selector
s TagSpec str
tagSpec = [TagSpec str]
newSpecs
    where
        (MkSelector [(SelectNode, SelectSettings)]
nodes) = Selector
s
        newSpecs :: [TagSpec str]
newSpecs =
            forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith forall {a} {b} {c}. Index -> (a, b, c) -> (a, b, SelectContext)
applyPosition [Index
0..] (forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes [(SelectNode, SelectSettings)]
nodes TagSpec str
tagSpec TagSpec str
tagSpec [])
        applyPosition :: Index -> (a, b, c) -> (a, b, SelectContext)
applyPosition Index
p (a
tags, b
f, c
_) = (a
tags, b
f, Index -> Bool -> SelectContext
SelectContext Index
p Bool
True)

-- | Creates a TagSpec from a list of tags parsed by TagSoup.
tagsToSpec :: forall str. (TagSoup.StringLike str)
           => [TagSoup.Tag str] -> TagSpec str
tagsToSpec :: forall str. StringLike str => [Tag str] -> TagSpec str
tagsToSpec [Tag str]
tags = (TagVector str
vector, TagForest
tree, SelectContext
ctx)
    where
        vector :: TagVector str
vector = forall str. StringLike str => [Tag str] -> TagVector str
tagsToVector [Tag str]
tags
        tree :: TagForest
tree   = forall str. StringLike str => TagVector str -> TagForest
vectorToTree TagVector str
vector
        ctx :: SelectContext
ctx    = Index -> Bool -> SelectContext
SelectContext Index
0 Bool
False

-- | Annotate each tag with the offset to the corresponding closing tag. This
-- annotating is done in O(n * log(n)).
--
-- The algorithm works on a list of tags annotated with their index. It
-- maintains a map of unclosed open tags keyed by tag name.
--
--      (1) When an open tag is encountered it is pushed onto the list keyed by
--          its name.
--
--      (2) When a closing tag is encountered the corresponding opening tag is
--          popped, the offset between the two are computed, the opening tag is
--          annotated with the offset between the two, and both are added to the
--          result set.
--
--      (3) When any other tag is encountered it is added to the result set
--          immediately.
--
--      (4) After all tags are either in the result set or the state, all
--          unclosed tags from the state are added to the result set without a
--          closing offset.
--
--      (5) The result set is then sorted by their indices.
tagsToVector :: forall str. (TagSoup.StringLike str)
             => [TagSoup.Tag str] -> TagVector str
tagsToVector :: forall str. StringLike str => [Tag str] -> TagVector str
tagsToVector [Tag str]
tags = let indexed :: [(Tag str, Index)]
indexed  = forall a b. [a] -> [b] -> [(a, b)]
zip [Tag str]
tags [Index
0..]
                        total :: Index
total    = forall (t :: * -> *) a. Foldable t => t a -> Index
length [(Tag str, Index)]
indexed
                        unsorted :: [(Index, TagInfo str)]
unsorted = [(Tag str, Index)]
-> Map Text [(Tag str, Index)] -> [(Index, TagInfo str)]
go [(Tag str, Index)]
indexed forall k a. Map k a
Map.empty
                        emptyVec :: Vector a
emptyVec = forall a. Index -> a -> Vector a
Vector.replicate Index
total forall a. HasCallStack => a
undefined
                     in forall {a}. Vector a
emptyVec forall a. Vector a -> [(Index, a)] -> Vector a
Vector.// [(Index, TagInfo str)]
unsorted
    where
        go :: [(TagSoup.Tag str, Index)]
           -> Map.Map T.Text [(TagSoup.Tag str, Index)]
           -> [(Index, TagInfo str)]
        go :: [(Tag str, Index)]
-> Map Text [(Tag str, Index)] -> [(Index, TagInfo str)]
go [] Map Text [(Tag str, Index)]
state =
                forall a b. (a -> b) -> [a] -> [b]
map (\(Tag str
t, Index
i) -> (Index
i, forall str. Tag str -> Name -> CloseOffset -> TagInfo str
TagInfo Tag str
t (forall {str}. StringLike str => Tag str -> Name
maybeName Tag str
t) forall a. Maybe a
Nothing))
                              forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat
                              forall a b. (a -> b) -> a -> b
$ forall k a. Map k a -> [a]
Map.elems Map Text [(Tag str, Index)]
state
            where
                maybeName :: Tag str -> Name
maybeName Tag str
t | forall str. Tag str -> Bool
TagSoup.isTagOpen Tag str
t  = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall str. StringLike str => Tag str -> Text
getTagName Tag str
t
                            | forall str. Tag str -> Bool
TagSoup.isTagClose Tag str
t = forall a. a -> Maybe a
Just forall a b. (a -> b) -> a -> b
$ forall str. StringLike str => Tag str -> Text
getTagName Tag str
t
                            | Bool
otherwise            = forall a. Maybe a
Nothing
        go ((Tag str
tag, Index
index) : [(Tag str, Index)]
xs) Map Text [(Tag str, Index)]
state
            | forall str. Tag str -> Bool
TagSoup.isTagClose Tag str
tag =
                let maybeOpen :: Maybe (Tag str, Index)
maybeOpen = forall a. [a] -> a
head forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall k a. Ord k => k -> Map k a -> Maybe a
Map.lookup Text
tagName Map Text [(Tag str, Index)]
state
                    state' :: Map Text [(Tag str, Index)]
state'    = forall k a.
Ord k =>
(Maybe a -> Maybe a) -> k -> Map k a -> Map k a
Map.alter forall a. Maybe [a] -> Maybe [a]
popTag Text
tagName Map Text [(Tag str, Index)]
state
                    info :: TagInfo str
info      = forall str. Tag str -> Name -> CloseOffset -> TagInfo str
TagInfo Tag str
tag (forall a. a -> Maybe a
Just Text
tagName) forall a. Maybe a
Nothing
                    res :: [(Index, TagInfo str)]
res       = forall a. [Maybe a] -> [a]
catMaybes [
                                  forall a. a -> Maybe a
Just (Index
index, TagInfo str
info)
                              ,   (Tag str, Index) -> (Index, TagInfo str)
calcOffset forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Maybe (Tag str, Index)
maybeOpen
                              ]
                 in [(Index, TagInfo str)]
res forall a. [a] -> [a] -> [a]
++ [(Tag str, Index)]
-> Map Text [(Tag str, Index)] -> [(Index, TagInfo str)]
go [(Tag str, Index)]
xs Map Text [(Tag str, Index)]
state'
            | forall str. Tag str -> Bool
TagSoup.isTagOpen Tag str
tag =
                [(Tag str, Index)]
-> Map Text [(Tag str, Index)] -> [(Index, TagInfo str)]
go [(Tag str, Index)]
xs (forall k a.
Ord k =>
(Maybe a -> Maybe a) -> k -> Map k a -> Map k a
Map.alter Maybe [(Tag str, Index)] -> Maybe [(Tag str, Index)]
appendTag Text
tagName Map Text [(Tag str, Index)]
state)
            | Bool
otherwise =
                let info :: TagInfo str
info = forall str. Tag str -> Name -> CloseOffset -> TagInfo str
TagInfo Tag str
tag forall a. Maybe a
Nothing forall a. Maybe a
Nothing
                in (Index
index, TagInfo str
info) forall a. a -> [a] -> [a]
: [(Tag str, Index)]
-> Map Text [(Tag str, Index)] -> [(Index, TagInfo str)]
go [(Tag str, Index)]
xs Map Text [(Tag str, Index)]
state
            where
                tagName :: Text
tagName = forall str. StringLike str => Tag str -> Text
getTagName Tag str
tag

                appendTag :: Maybe [(TagSoup.Tag str, Index)]
                          -> Maybe [(TagSoup.Tag str, Index)]
                appendTag :: Maybe [(Tag str, Index)] -> Maybe [(Tag str, Index)]
appendTag Maybe [(Tag str, Index)]
m = ((Tag str
tag, Index
index) forall a. a -> [a] -> [a]
:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Maybe [(Tag str, Index)]
m forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall a. a -> Maybe a
Just [])

                calcOffset :: (TagSoup.Tag str, Index) -> (Index, TagInfo str)
                calcOffset :: (Tag str, Index) -> (Index, TagInfo str)
calcOffset (Tag str
t, Index
i) =
                    let offset :: Index
offset = Index
index forall a. Num a => a -> a -> a
- Index
i
                        info :: TagInfo str
info   = forall str. Tag str -> Name -> CloseOffset -> TagInfo str
TagInfo Tag str
t (forall a. a -> Maybe a
Just Text
tagName) (forall a. a -> Maybe a
Just Index
offset)
                     in Index
offset seq :: forall a b. a -> b -> b
`seq` (Index
i, TagInfo str
info)

                popTag :: Maybe [a] -> Maybe [a]
                popTag :: forall a. Maybe [a] -> Maybe [a]
popTag (Just (a
_ : a
y : [a]
xs)) = let s :: [a]
s = a
y forall a. a -> [a] -> [a]
: [a]
xs in [a]
s seq :: forall a b. a -> b -> b
`seq` forall a. a -> Maybe a
Just [a]
s
                popTag Maybe [a]
_                   = forall a. Maybe a
Nothing

getTagName :: TagSoup.StringLike str => TagSoup.Tag str -> T.Text
getTagName :: forall str. StringLike str => Tag str -> Text
getTagName (TagSoup.TagOpen str
name [Attribute str]
_) = forall a b. (StringLike a, StringLike b) => a -> b
TagSoup.castString str
name
getTagName (TagSoup.TagClose str
name)  = forall a b. (StringLike a, StringLike b) => a -> b
TagSoup.castString str
name
getTagName Tag str
_                        = forall a. HasCallStack => a
undefined

-- | Builds a forest describing the structure of the tags within a given vector.
-- The nodes of the forest are tag spans which mark the indices within the
-- vector of an open and close pair. The tree is organized such for any node n
-- the parent of node n is the smallest span that completely encapsulates the
-- span of node n.
vectorToTree :: TagSoup.StringLike str => TagVector str -> TagForest
vectorToTree :: forall str. StringLike str => TagVector str -> TagForest
vectorToTree TagVector str
tags = TagForest -> TagForest
fixup forall a b. (a -> b) -> a -> b
$ Index -> Index -> TagForest
forestWithin Index
0 (forall a. Vector a -> Index
Vector.length TagVector str
tags)
    where
        forestWithin :: Int -> Int -> TagForest
        forestWithin :: Index -> Index -> TagForest
forestWithin !Index
lo !Index
hi
            | Index
hi forall a. Ord a => a -> a -> Bool
<= Index
lo   = []
            | Bool
shouldSkip = Index -> Index -> TagForest
forestWithin (Index
lo forall a. Num a => a -> a -> a
+ Index
1) Index
hi
            | Bool
otherwise  = forall a. a -> [Tree a] -> Tree a
Tree.Node (Index -> Index -> Span
Span Index
lo Index
closeIndex) TagForest
subForest
                         forall a. a -> [a] -> [a]
: Index -> Index -> TagForest
forestWithin (Index
closeIndex forall a. Num a => a -> a -> a
+ Index
1) Index
hi
            where
                info :: TagInfo str
info       = TagVector str
tags forall a. Vector a -> Index -> a
Vector.! Index
lo
                isOpen :: Bool
isOpen     = forall str. Tag str -> Bool
TagSoup.isTagOpen forall a b. (a -> b) -> a -> b
$ forall str. TagInfo str -> Tag str
infoTag TagInfo str
info
                isText :: Bool
isText     = forall str. Tag str -> Bool
TagSoup.isTagText forall a b. (a -> b) -> a -> b
$ forall str. TagInfo str -> Tag str
infoTag TagInfo str
info
                shouldSkip :: Bool
shouldSkip = Bool -> Bool
not Bool
isOpen Bool -> Bool -> Bool
&& Bool -> Bool
not Bool
isText
                closeIndex :: Index
closeIndex = Index
lo forall a. Num a => a -> a -> a
+ forall a. a -> Maybe a -> a
fromMaybe Index
0 (forall str. TagInfo str -> CloseOffset
infoOffset TagInfo str
info)
                subForest :: TagForest
subForest  = Index -> Index -> TagForest
forestWithin (Index
lo forall a. Num a => a -> a -> a
+ Index
1) Index
closeIndex

        -- Lifts nodes whose closing tags lay outside their parent tags up to
        -- within a parent node that encompasses the node's entire span.
        fixup :: TagForest -> TagForest
        fixup :: TagForest -> TagForest
fixup [] = []
        fixup (Tree.Node (Span Index
lo Index
hi) TagForest
subForest : TagForest
siblings)
            = forall a. a -> [Tree a] -> Tree a
Tree.Node (Index -> Index -> Span
Span Index
lo Index
hi) TagForest
ok forall a. a -> [a] -> [a]
: TagForest
bad
            where
                (TagForest
ok, TagForest
bad) = TagForest -> TagForest -> (TagForest, TagForest)
malformed (TagForest -> TagForest
fixup TagForest
siblings) forall a b. (a -> b) -> a -> b
$ TagForest -> TagForest
fixup TagForest
subForest

                malformed :: TagForest -- Forest to prepend bad trees on.
                          -> TagForest  -- Remaining trees to examine.
                          -> (TagForest, TagForest)
                malformed :: TagForest -> TagForest -> (TagForest, TagForest)
malformed TagForest
preBad [] = ([], TagForest
preBad)
                malformed TagForest
preBad (n :: Tree Span
n@(Tree.Node (Span Index
_ Index
nHi) TagForest
_) : TagForest
ns)
                    | Index
hi forall a. Ord a => a -> a -> Bool
< Index
nHi  = (TagForest
ok, Tree Span
n forall a. a -> [a] -> [a]
: TagForest
bad)
                    | Bool
otherwise = (Tree Span
n forall a. a -> [a] -> [a]
: TagForest
ok, TagForest
bad)
                    where (TagForest
ok, TagForest
bad) = TagForest -> TagForest -> (TagForest, TagForest)
malformed TagForest
preBad TagForest
ns

-- | Generates a list of 'TagSpec's that match the given list of 'SelectNode's.
-- This is is done in linear time with respect to the number of tags.
--
-- The algorithm is a simple DFS traversal of the tag forest. While traversing
-- the forest if the current SelectNode is satisfied by the current node in the
-- tree the SelectNode is popped and the current node's sub-forest is traversed
-- with the remaining SelectNodes. If there is only a single SelectNode then any
-- node encountered that satisfies the SelectNode is returned as an answer.
selectNodes :: TagSoup.StringLike str
            => [(SelectNode, SelectSettings)]
            -> TagSpec str
            -> TagSpec str
            -> [TagSpec str]
            -> [TagSpec str]
selectNodes :: forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes []  TagSpec str
_          TagSpec str
_ [TagSpec str]
acc = [TagSpec str]
acc
selectNodes [(SelectNode, SelectSettings)
_] (TagVector str
_, [], SelectContext
_) TagSpec str
_ [TagSpec str]
acc = [TagSpec str]
acc
-- Now that there is only a single SelectNode to satisfy, search the remaining
-- forests and generates a TagSpec for each node that satisfies the condition.
selectNodes [(SelectNode, SelectSettings)
n] cur :: TagSpec str
cur@(TagVector str
tags, Tree Span
f : TagForest
fs, SelectContext
ctx) TagSpec str
root [TagSpec str]
acc
    | MatchResult
MatchOk forall a. Eq a => a -> a -> Bool
== MatchResult
matchResult
        = (TagSpec str
shrunkSpec forall a. a -> [a] -> [a]
:)
        forall a b. (a -> b) -> a -> b
$ forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes [(SelectNode, SelectSettings)
n] (TagVector str
tags, TagForest
fs, SelectContext
ctx) TagSpec str
root
        forall a b. (a -> b) -> a -> b
$ forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes [(SelectNode, SelectSettings)
n] (TagVector str
tags, forall a. Tree a -> [Tree a]
Tree.subForest Tree Span
f, SelectContext
ctx) TagSpec str
root [TagSpec str]
acc
    | MatchResult
MatchCull forall a. Eq a => a -> a -> Bool
== MatchResult
matchResult
        = forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes [(SelectNode, SelectSettings)
n] (TagVector str
tags, TagForest
fs, SelectContext
ctx) TagSpec str
root [TagSpec str]
acc
    | Bool
otherwise
        = forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes [(SelectNode, SelectSettings)
n] (TagVector str
tags, forall a. Tree a -> [Tree a]
Tree.subForest Tree Span
f, SelectContext
ctx) TagSpec str
root
        forall a b. (a -> b) -> a -> b
$ forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes [(SelectNode, SelectSettings)
n] (TagVector str
tags, TagForest
fs, SelectContext
ctx) TagSpec str
root [TagSpec str]
acc
    where
        Span Index
lo Index
hi = forall a. Tree a -> a
Tree.rootLabel Tree Span
f
        shrunkSpec :: TagSpec str
shrunkSpec = (
                       forall a. Index -> Index -> Vector a -> Vector a
Vector.slice Index
lo (Index
hi forall a. Num a => a -> a -> a
- Index
lo forall a. Num a => a -> a -> a
+ Index
1) TagVector str
tags
                     , [forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Span -> Span
recenter Tree Span
f]
                     , SelectContext
ctx
                     )
        recenter :: Span -> Span
recenter (Span Index
nLo Index
nHi) = Index -> Index -> Span
Span (Index
nLo forall a. Num a => a -> a -> a
- Index
lo) (Index
nHi forall a. Num a => a -> a -> a
- Index
lo)
        info :: TagInfo str
info = TagVector str
tags forall a. Vector a -> Index -> a
Vector.! Index
lo
        matchResult :: MatchResult
matchResult = forall str.
StringLike str =>
(SelectNode, SelectSettings)
-> TagInfo str -> TagSpec str -> TagSpec str -> MatchResult
nodeMatches (SelectNode, SelectSettings)
n TagInfo str
info TagSpec str
cur TagSpec str
root
-- There are multiple SelectNodes that need to be satisfied. If the current node
-- satisfies the condition, then the current nodes sub-forest is searched for
-- matches of the remaining SelectNodes.
selectNodes ((SelectNode, SelectSettings)
_ : [(SelectNode, SelectSettings)]
_) (TagVector str
_, [], SelectContext
_) TagSpec str
_ [TagSpec str]
acc = [TagSpec str]
acc
selectNodes ((SelectNode, SelectSettings)
n : [(SelectNode, SelectSettings)]
ns) cur :: TagSpec str
cur@(TagVector str
tags, Tree Span
f : TagForest
fs, SelectContext
ctx) TagSpec str
root [TagSpec str]
acc
    | MatchResult
MatchOk forall a. Eq a => a -> a -> Bool
== MatchResult
matchResult
        = forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes [(SelectNode, SelectSettings)]
ns       (TagVector str
tags, forall a. Tree a -> [Tree a]
Tree.subForest Tree Span
f forall a. [a] -> [a] -> [a]
++ TagForest
siblings, SelectContext
ctx)
                               -- The new root node is the just matched node
                               -- plus the lifted siblings.
                               (TagVector str
tags, Tree Span
f forall a. a -> [a] -> [a]
: TagForest
siblings, SelectContext
ctx)
        forall a b. (a -> b) -> a -> b
$ forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes ((SelectNode, SelectSettings)
n forall a. a -> [a] -> [a]
: [(SelectNode, SelectSettings)]
ns) (TagVector str
tags, TagForest
fs, SelectContext
ctx) TagSpec str
root [TagSpec str]
acc
    | MatchResult
MatchCull forall a. Eq a => a -> a -> Bool
== MatchResult
matchResult
        = forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes ((SelectNode, SelectSettings)
n forall a. a -> [a] -> [a]
: [(SelectNode, SelectSettings)]
ns) (TagVector str
tags, TagForest
fs, SelectContext
ctx) TagSpec str
root [TagSpec str]
acc
    | Bool
otherwise
        = forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes ((SelectNode, SelectSettings)
n forall a. a -> [a] -> [a]
: [(SelectNode, SelectSettings)]
ns) (TagVector str
tags, forall a. Tree a -> [Tree a]
Tree.subForest Tree Span
f, SelectContext
ctx) TagSpec str
root
        forall a b. (a -> b) -> a -> b
$ forall str.
StringLike str =>
[(SelectNode, SelectSettings)]
-> TagSpec str -> TagSpec str -> [TagSpec str] -> [TagSpec str]
selectNodes ((SelectNode, SelectSettings)
n forall a. a -> [a] -> [a]
: [(SelectNode, SelectSettings)]
ns) (TagVector str
tags, TagForest
fs, SelectContext
ctx) TagSpec str
root [TagSpec str]
acc
    where
        Span Index
lo Index
hi = forall a. Tree a -> a
Tree.rootLabel Tree Span
f
        info :: TagInfo str
info = TagVector str
tags forall a. Vector a -> Index -> a
Vector.! Index
lo
        matchResult :: MatchResult
matchResult = forall str.
StringLike str =>
(SelectNode, SelectSettings)
-> TagInfo str -> TagSpec str -> TagSpec str -> MatchResult
nodeMatches (SelectNode, SelectSettings)
n TagInfo str
info TagSpec str
cur TagSpec str
root

        -- In the case of a match, it is possible that there are children nested
        -- within the sibling forests that are potentially valid matches for the
        -- current node despite not being direct children. This can happen with
        -- malformed HTML, for example <a><b><c></c><a></b>. In this case <c>
        -- would be a child of <b> which would be a sibling of <a>.
        --
        -- In order to match <c> it must be lifted out of <b>'s sub forest when
        -- matching <a>.
        treeLo :: Tree Span -> Index
treeLo Tree Span
tree | (Span Index
lo Index
_) <- forall a. Tree a -> a
Tree.rootLabel Tree Span
tree = Index
lo
        treeHi :: Tree Span -> Index
treeHi Tree Span
tree | (Span Index
_ Index
hi) <- forall a. Tree a -> a
Tree.rootLabel Tree Span
tree = Index
hi

        liftSiblings :: TagForest -> TagForest -> TagForest
liftSiblings []       TagForest
acc = TagForest
acc
        liftSiblings (Tree Span
t : TagForest
ts) TagForest
acc
          | Index
lo forall a. Ord a => a -> a -> Bool
< Tree Span -> Index
treeLo Tree Span
t Bool -> Bool -> Bool
&& Tree Span -> Index
treeHi Tree Span
t forall a. Ord a => a -> a -> Bool
< Index
hi = Tree Span
t forall a. a -> [a] -> [a]
: TagForest -> TagForest -> TagForest
liftSiblings TagForest
ts TagForest
acc
          | Index
hi forall a. Ord a => a -> a -> Bool
< Tree Span -> Index
treeLo Tree Span
t Bool -> Bool -> Bool
|| Tree Span -> Index
treeHi Tree Span
t forall a. Ord a => a -> a -> Bool
< Index
lo = TagForest -> TagForest -> TagForest
liftSiblings TagForest
ts TagForest
acc
          | Bool
otherwise                      = TagForest -> TagForest -> TagForest
liftSiblings (forall a. Tree a -> [Tree a]
Tree.subForest Tree Span
t)
                                           forall a b. (a -> b) -> a -> b
$ TagForest -> TagForest -> TagForest
liftSiblings TagForest
ts TagForest
acc
        siblings :: TagForest
siblings = TagForest -> TagForest -> TagForest
liftSiblings TagForest
fs []

-- | The result of nodeMatches, can either be a match, a failure, or a failure
-- that culls all children of the current node.
data MatchResult = MatchOk | MatchFail | MatchCull
  deriving (MatchResult -> MatchResult -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: MatchResult -> MatchResult -> Bool
$c/= :: MatchResult -> MatchResult -> Bool
== :: MatchResult -> MatchResult -> Bool
$c== :: MatchResult -> MatchResult -> Bool
Eq)

-- | Ands together two MatchResult values.
andMatch :: MatchResult -> MatchResult -> MatchResult
andMatch :: MatchResult -> MatchResult -> MatchResult
andMatch MatchResult
MatchOk MatchResult
MatchOk = MatchResult
MatchOk
andMatch MatchResult
MatchCull MatchResult
_     = MatchResult
MatchCull
andMatch MatchResult
_ MatchResult
MatchCull     = MatchResult
MatchCull
andMatch MatchResult
_ MatchResult
_             = MatchResult
MatchFail

-- | Turns a boolean value into a MatchResult.
boolMatch :: Bool -> MatchResult
boolMatch :: Bool -> MatchResult
boolMatch Bool
True  = MatchResult
MatchOk
boolMatch Bool
False = MatchResult
MatchFail

-- | Returns True if a tag satisfies a given SelectNode's condition.
nodeMatches :: TagSoup.StringLike str
            => (SelectNode, SelectSettings)
            -> TagInfo str
            -> TagSpec str
            -> TagSpec str
            -> MatchResult
nodeMatches :: forall str.
StringLike str =>
(SelectNode, SelectSettings)
-> TagInfo str -> TagSpec str -> TagSpec str -> MatchResult
nodeMatches (SelectNode Text
node [AttributePredicate]
preds, SelectSettings
settings) TagInfo str
info TagSpec str
cur TagSpec str
root =
    forall str.
StringLike str =>
SelectSettings -> TagSpec str -> TagSpec str -> MatchResult
checkSettings SelectSettings
settings TagSpec str
cur TagSpec str
root MatchResult -> MatchResult -> MatchResult
`andMatch` forall str.
StringLike str =>
Text -> [AttributePredicate] -> TagInfo str -> MatchResult
checkTag Text
node [AttributePredicate]
preds TagInfo str
info
nodeMatches (SelectAny [AttributePredicate]
preds      , SelectSettings
settings) TagInfo str
info TagSpec str
cur TagSpec str
root =
    forall str.
StringLike str =>
SelectSettings -> TagSpec str -> TagSpec str -> MatchResult
checkSettings SelectSettings
settings TagSpec str
cur TagSpec str
root MatchResult -> MatchResult -> MatchResult
`andMatch` forall str.
StringLike str =>
[AttributePredicate] -> Tag str -> MatchResult
checkPreds [AttributePredicate]
preds (forall str. TagInfo str -> Tag str
infoTag TagInfo str
info)
nodeMatches (SelectNode
SelectText           , SelectSettings
settings) TagInfo str
info TagSpec str
cur TagSpec str
root =
    forall str.
StringLike str =>
SelectSettings -> TagSpec str -> TagSpec str -> MatchResult
checkSettings SelectSettings
settings TagSpec str
cur TagSpec str
root MatchResult -> MatchResult -> MatchResult
`andMatch`
    Bool -> MatchResult
boolMatch (forall str. Tag str -> Bool
TagSoup.isTagText forall a b. (a -> b) -> a -> b
$ forall str. TagInfo str -> Tag str
infoTag TagInfo str
info)

-- | Given a SelectSettings, the current node under consideration, and the last
-- matched node, returns true IFF the current node satisfies all of the
-- selection settings.
checkSettings :: TagSoup.StringLike str
              => SelectSettings -> TagSpec str -> TagSpec str -> MatchResult
checkSettings :: forall str.
StringLike str =>
SelectSettings -> TagSpec str -> TagSpec str -> MatchResult
checkSettings (SelectSettings (Just Index
depth))
              (TagVector str
_, Tree Span
curRoot : TagForest
_, SelectContext
_)
              (TagVector str
_, TagForest
root, SelectContext
_)
  | Index
depthOfCur forall a. Ord a => a -> a -> Bool
< Index
depth = MatchResult
MatchFail
  | Index
depthOfCur forall a. Ord a => a -> a -> Bool
> Index
depth = MatchResult
MatchCull
  | Bool
otherwise          = MatchResult
MatchOk
  where
      Span Index
curLo Index
curHi = forall a. Tree a -> a
Tree.rootLabel Tree Span
curRoot
      mapTree :: (a -> b) -> t (Tree a) -> [b]
mapTree a -> b
f = forall a b. (a -> b) -> [a] -> [b]
map a -> b
f forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap forall a. Tree a -> [a]
Tree.flatten
      depthOfCur :: Index
depthOfCur = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum forall a b. (a -> b) -> a -> b
$ forall {t :: * -> *} {a} {b}.
Foldable t =>
(a -> b) -> t (Tree a) -> [b]
mapTree forall {a}. Num a => Span -> a
oneIfContainsCur TagForest
root
      oneIfContainsCur :: Span -> a
oneIfContainsCur (Span Index
lo Index
hi)
          | Index
lo forall a. Ord a => a -> a -> Bool
< Index
curLo Bool -> Bool -> Bool
&& Index
curHi forall a. Ord a => a -> a -> Bool
< Index
hi = a
1
          | Bool
otherwise = a
0
checkSettings (SelectSettings CloseOffset
_) (TagVector str, TagForest, SelectContext)
_ (TagVector str, TagForest, SelectContext)
_ = MatchResult
MatchOk

-- | Given a tag name and a list of attribute predicates return a function that
-- returns true if a given tag matches the supplied name and predicates.
checkTag :: TagSoup.StringLike str
         => T.Text -> [AttributePredicate] -> TagInfo str -> MatchResult
checkTag :: forall str.
StringLike str =>
Text -> [AttributePredicate] -> TagInfo str -> MatchResult
checkTag Text
name [AttributePredicate]
preds (TagInfo Tag str
tag Name
tagName CloseOffset
_)
      =  Bool -> MatchResult
boolMatch (
          forall str. Tag str -> Bool
TagSoup.isTagOpen Tag str
tag
        Bool -> Bool -> Bool
&& forall a. Maybe a -> Bool
isJust Name
tagName
        Bool -> Bool -> Bool
&& Text
name forall a. Eq a => a -> a -> Bool
== forall a. HasCallStack => Maybe a -> a
fromJust Name
tagName
      ) MatchResult -> MatchResult -> MatchResult
`andMatch` forall str.
StringLike str =>
[AttributePredicate] -> Tag str -> MatchResult
checkPreds [AttributePredicate]
preds Tag str
tag

-- | Returns True if a tag satisfies a list of attribute predicates.
checkPreds :: TagSoup.StringLike str
           => [AttributePredicate] -> TagSoup.Tag str -> MatchResult
checkPreds :: forall str.
StringLike str =>
[AttributePredicate] -> Tag str -> MatchResult
checkPreds []    Tag str
tag = Bool -> MatchResult
boolMatch
                     forall a b. (a -> b) -> a -> b
$ forall str. Tag str -> Bool
TagSoup.isTagOpen Tag str
tag Bool -> Bool -> Bool
|| forall str. Tag str -> Bool
TagSoup.isTagText Tag str
tag
checkPreds [AttributePredicate]
preds Tag str
tag = Bool -> MatchResult
boolMatch
                     forall a b. (a -> b) -> a -> b
$ forall str. Tag str -> Bool
TagSoup.isTagOpen Tag str
tag Bool -> Bool -> Bool
&& forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (forall str.
StringLike str =>
AttributePredicate -> [Attribute str] -> Bool
`checkPred` [Attribute str]
attrs) [AttributePredicate]
preds
    where (TagSoup.TagOpen str
_ [Attribute str]
attrs) = Tag str
tag