module Text.HTML.TagSoup.Tree.Selection (
selectGroup,
select,
selectAll,
canMatch,
matchesWith
) where
import Control.Applicative
import Control.Category ((>>>))
import Data.Maybe
import Text.CSS3.Selectors.Syntax
import Text.HTML.TagSoup
import Text.HTML.TagSoup.Tree
import Text.HTML.TagSoup.Tree.Util
import Text.HTML.TagSoup.Tree.Zipper
import Text.StringLike
import Text.StringLike.Matchable
selectGroup :: Matchable str => SelectorGroup -> TagTree str -> [[Maybe (TagTreePos str)]]
selectGroup (SelectorGroup sel sels) = map (\(pos, bs) -> map (\b -> if b then Just pos else Nothing) bs)
. filter (or . snd) . selectAll (sel : sels)
select :: Matchable str => Selector -> TagTree str -> [TagTreePos str]
select sel = map fst . filter (head . snd) . selectAll [sel]
selectAll :: Matchable str => [Selector] -> TagTree str -> [(TagTreePos str, [Bool])]
selectAll sels = traverseTree (\pos -> [(pos, map (`canMatch` pos) sels)]) . fromTagTree
canMatch :: Matchable str => Selector -> TagTreePos str -> Bool
canMatch sel = (&&) <$> simpleSelSeqPred s <*> matches ss (map combPosList cs)
where
(s : ss, cs) = chain sel
combPosList :: Combinator -> TagTreePos str -> [TagTreePos str]
combPosList c = case c of
Descendant -> iteratePos parent
Child -> take 1 . iteratePos parent
AdjacentSibling -> take 1 . iteratePos prevSibling
GeneralSibling -> iteratePos prevSibling
matches :: Matchable str => [SimpleSelectorSequence] -> [TagTreePos str -> [TagTreePos str]] -> TagTreePos str -> Bool
matches ss cfs = foldr (\(s, cf) rest -> any ((&&) <$> simpleSelSeqPred s <*> rest) . cf) (const True) (zip ss cfs)
chain :: Selector -> ([SimpleSelectorSequence], [Combinator])
chain sel = let (seqs, combs) = chain' sel in (reverse seqs, reverse combs)
where
chain' (Selector seq mbComb) = case mbComb of
Nothing -> (seq : [], [])
Just (comb, seq') -> let (seqs, combs) = chain' seq'
in (seq : seqs, comb : combs)
simpleSelSeqPred :: Matchable str => SimpleSelectorSequence -> TagTreePos str -> Bool
simpleSelSeqPred (SimpleSelectorSequence head tails) = foldr1 (liftA2 (&&)) $ hFilter head : map tFilter tails
where
hFilter :: Matchable str => HeadSimpleSelector -> TagTreePos str -> Bool
hFilter sel = case sel of
TypeSelector n -> content >>> hasTagBranchName (fromString n)
UniversalSelector -> content >>> isTagBranch
tFilter :: Matchable str => TailSimpleSelector -> TagTreePos str -> Bool
tFilter sel = case sel of
AttributeSelector a Nothing -> content >>> hasTagBranchAttr (fromString a)
AttributeSelector a (Just (op, v)) -> content >>> findTagBranchAttr (fromString a)
>>> maybe False (matchesWith op $ fromString v)
ClassSelector v -> content >>> findTagBranchAttr (fromString "class")
>>> maybe False (matchesWith IncludesMatch $ fromString v)
IDSelector v -> content >>> findTagBranchAttr (fromString "id")
>>> maybe False (matchesWith ExactMatch $ fromString v)
PseudoClass p -> pseudo p
pseudo :: Matchable str => PseudoClass -> TagTreePos str -> Bool
pseudo p = case p of
Root -> parents >>> null
NthChild p -> error "simpleSelSeqPred: nth-child not implemented yet"
NthLastChild p -> error "simpleSelSeqPred: nth-last-child not implemented yet"
NthOfType p -> error "simpleSelSeqPred: nth-of-type not implemented yet"
NthLastOfType p -> error "simpleSelSeqPred: nth-last-of-type not implemented yet"
FirstChild -> (&&) <$> (parents >>> not . null) <*> (before >>> null)
LastChild -> (&&) <$> (parents >>> not . null) <*> (after >>> null)
FirstOfType -> \pos -> let name = tagBranchName $ content pos
in (&&) <$> (parents >>> not . null)
<*> (before >>> all (not . hasTagBranchName name)) $ pos
LastOfType -> \pos -> let name = tagBranchName $ content pos
in (&&) <$> (parents >>> not . null)
<*> (after >>> all (not . hasTagBranchName name)) $ pos
OnlyChild -> (&&) <$> pseudo FirstChild <*> pseudo LastChild
OnlyOfType -> (&&) <$> pseudo FirstOfType <*> pseudo LastOfType
Empty -> content >>> children >>> null
Not (Right (PseudoClass (Not _))) -> error "simpleSelSeqPred: nested :not pseudo classes"
Not (Left sel) -> not . hFilter sel
Not (Right sel) -> not . tFilter sel
matchesWith :: Matchable str => AttributeOperator -> str -> str -> Bool
matchesWith op = case op of
ExactMatch -> matchesExactly
IncludesMatch -> matchesWordOf
DashMatch -> \v s -> v `matchesExactly` s || (v `append` fromChar '-') `matchesPrefixOf` s
PrefixMatch -> matchesPrefixOf
SuffixMatch -> matchesSuffixOf
InfixMatch -> matchesInfixOf