module Text.HTML.Scalpel.Internal.Select (
CloseOffset
, select
, select_
, tagWithOffset
) where
import Text.HTML.Scalpel.Internal.Select.Types
import Control.Applicative ((<$>), (<|>))
import Control.Arrow (first)
import Data.List (tails)
import Data.Maybe (catMaybes)
import GHC.Exts (sortWith)
import qualified Data.Map.Strict as Map
import qualified Text.HTML.TagSoup as TagSoup
import qualified Text.StringLike as TagSoup
type CloseOffset = Maybe Int
select :: (Ord str, TagSoup.StringLike str, Selectable s)
=> s
-> [(TagSoup.Tag str, CloseOffset)]
-> [[(TagSoup.Tag str, CloseOffset)]]
select s = selectNodes nodes
where (MkSelector nodes) = toSelector s
select_ :: (Ord str, TagSoup.StringLike str, Selectable s)
=> s
-> [(TagSoup.Tag str, CloseOffset)]
-> [[TagSoup.Tag str]]
select_ s = map (map fst) . select s
tagWithOffset :: forall str. (Ord str, TagSoup.StringLike str)
=> [TagSoup.Tag str] -> [(TagSoup.Tag str, CloseOffset)]
tagWithOffset tags = let indexed = zip tags [0..]
unsorted = go indexed Map.empty
sorted = sortWith snd unsorted
in map fst sorted
where
go :: [(TagSoup.Tag str, Int)]
-> Map.Map str [(TagSoup.Tag str, Int)]
-> [((TagSoup.Tag str, CloseOffset), Int)]
go [] state = map (first (, Nothing)) $ concat $ Map.elems state
go (x@(tag, index) : xs) state
| TagSoup.isTagClose tag =
let maybeOpen = head <$> Map.lookup tagName state
state' = Map.alter popTag tagName state
res = catMaybes [
Just ((tag, Nothing), index)
, calcOffset <$> maybeOpen
]
in res ++ go xs state'
| TagSoup.isTagOpen tag = go xs (Map.alter appendTag tagName state)
| otherwise = ((tag, Nothing), index) : go xs state
where
tagName = getTagName tag
appendTag :: Maybe [(TagSoup.Tag str, Int)]
-> Maybe [(TagSoup.Tag str, Int)]
appendTag m = (x :) <$> (m <|> Just [])
calcOffset :: (t, Int) -> ((t, Maybe Int), Int)
calcOffset (t, i) =
let offset = index i
in offset `seq` ((t, Just offset), i)
popTag :: Maybe [a] -> Maybe [a]
popTag (Just (_ : y : xs)) = let s = y : xs in s `seq` Just s
popTag _ = Nothing
selectNodes :: TagSoup.StringLike str
=> [SelectNode]
-> [(TagSoup.Tag str, CloseOffset)]
-> [[(TagSoup.Tag str, CloseOffset)]]
selectNodes nodes tags = head' $ reverse results
where results = [concatMap (selectNode s) ts | s <- nodes
| ts <- [tags] : results]
head' [] = []
head' (x:_) = x
selectNode :: TagSoup.StringLike str
=> SelectNode
-> [(TagSoup.Tag str, CloseOffset)]
-> [[(TagSoup.Tag str, CloseOffset)]]
selectNode (SelectNode node attributes) tags = concatMap extractTagBlock nodes
where nodes = filter (checkTag node attributes) $ tails tags
selectNode (SelectAny attributes) tags = concatMap extractTagBlock nodes
where nodes = filter (checkPreds attributes) $ tails tags
checkTag :: TagSoup.StringLike str
=> String
-> [AttributePredicate]
-> [(TagSoup.Tag str, CloseOffset)]
-> Bool
checkTag name preds tags@((TagSoup.TagOpen str _, _) : _)
= TagSoup.fromString name == str && checkPreds preds tags
checkTag _ _ _ = False
checkPreds :: TagSoup.StringLike str
=> [AttributePredicate] -> [(TagSoup.Tag str, CloseOffset)] -> Bool
checkPreds preds ((TagSoup.TagOpen _ attrs, _) : _)
= and [or [checkPred p attr | attr <- attrs] | p <- preds]
checkPreds _ _ = False
extractTagBlock :: TagSoup.StringLike str
=> [(TagSoup.Tag str, CloseOffset)]
-> [[(TagSoup.Tag str, CloseOffset)]]
extractTagBlock (ctag@(tag, maybeOffset) : tags)
| not $ TagSoup.isTagOpen tag = []
| Just offset <- maybeOffset = [takeOrClose ctag offset tags]
| otherwise = [[ctag, (closeForOpen tag, Nothing)]]
extractTagBlock _ = []
takeOrClose :: TagSoup.StringLike str
=> (TagSoup.Tag str, CloseOffset)
-> Int
-> [(TagSoup.Tag str, CloseOffset)]
-> [(TagSoup.Tag str, CloseOffset)]
takeOrClose open@(tag, _) offset tags = go offset tags (open :)
where
go 0 _ f = f []
go _ [] _ = [open, (closeForOpen tag, Nothing)]
go i (x : xs) f = go (i 1) xs (f . (x :))
closeForOpen :: TagSoup.StringLike str => TagSoup.Tag str -> TagSoup.Tag str
closeForOpen = TagSoup.TagClose . getTagName
getTagName :: TagSoup.StringLike str => TagSoup.Tag str -> str
getTagName (TagSoup.TagOpen name _) = name
getTagName (TagSoup.TagClose name) = name
getTagName _ = undefined