{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections #-}
{-# LANGUAGE BangPatterns #-}
{-# OPTIONS_HADDOCK hide #-}
module Text.HTML.Scalpel.Internal.Select (
SelectContext (..)
, TagSpec
, TagInfo (..)
, select
, tagsToSpec
) where
import Text.HTML.Scalpel.Internal.Select.Types
import Control.Applicative ((<$>), (<|>))
import Data.Maybe (catMaybes, isJust, fromJust, fromMaybe)
import qualified Data.Map.Strict as Map
import qualified Data.Text as T
import qualified Data.Tree as Tree
import qualified Data.Vector as Vector
import qualified Text.HTML.TagSoup as TagSoup
import qualified Text.StringLike as TagSoup
type Index = Int
type Name = Maybe T.Text
type CloseOffset = Maybe Index
data Span = Span !Int !Int
type TagForest = Tree.Forest Span
data TagInfo str = TagInfo {
infoTag :: !(TagSoup.Tag str)
, infoName :: !Name
, infoOffset :: !CloseOffset
}
type TagVector str = Vector.Vector (TagInfo str)
data SelectContext = SelectContext {
ctxPosition :: !Index
, ctxInChroot :: !Bool
}
type TagSpec str = (TagVector str, TagForest, SelectContext)
select :: (TagSoup.StringLike str)
=> Selector -> TagSpec str -> [TagSpec str]
select s tagSpec = newSpecs
where
(MkSelector nodes) = s
newSpecs =
zipWith applyPosition [0..] (selectNodes nodes tagSpec tagSpec [])
applyPosition p (tags, f, _) = (tags, f, SelectContext p True)
tagsToSpec :: forall str. (TagSoup.StringLike str)
=> [TagSoup.Tag str] -> TagSpec str
tagsToSpec tags = (vector, tree, ctx)
where
vector = tagsToVector tags
tree = vectorToTree vector
ctx = SelectContext 0 False
tagsToVector :: forall str. (TagSoup.StringLike str)
=> [TagSoup.Tag str] -> TagVector str
tagsToVector tags = let indexed = zip tags [0..]
total = length indexed
unsorted = go indexed Map.empty
emptyVec = Vector.replicate total undefined
in emptyVec Vector.// unsorted
where
go :: [(TagSoup.Tag str, Index)]
-> Map.Map T.Text [(TagSoup.Tag str, Index)]
-> [(Index, TagInfo str)]
go [] state =
map (\(t, i) -> (i, TagInfo t (maybeName t) Nothing))
$ concat
$ Map.elems state
where
maybeName t | TagSoup.isTagOpen t = Just $ getTagName t
| TagSoup.isTagClose t = Just $ getTagName t
| otherwise = Nothing
go ((tag, index) : xs) state
| TagSoup.isTagClose tag =
let maybeOpen = head <$> Map.lookup tagName state
state' = Map.alter popTag tagName state
info = TagInfo tag (Just tagName) Nothing
res = catMaybes [
Just (index, info)
, calcOffset <$> maybeOpen
]
in res ++ go xs state'
| TagSoup.isTagOpen tag =
go xs (Map.alter appendTag tagName state)
| otherwise =
let info = TagInfo tag Nothing Nothing
in (index, info) : go xs state
where
tagName = getTagName tag
appendTag :: Maybe [(TagSoup.Tag str, Index)]
-> Maybe [(TagSoup.Tag str, Index)]
appendTag m = ((tag, index) :) <$> (m <|> Just [])
calcOffset :: (TagSoup.Tag str, Index) -> (Index, TagInfo str)
calcOffset (t, i) =
let offset = index - i
info = TagInfo t (Just tagName) (Just offset)
in offset `seq` (i, info)
popTag :: Maybe [a] -> Maybe [a]
popTag (Just (_ : y : xs)) = let s = y : xs in s `seq` Just s
popTag _ = Nothing
getTagName :: TagSoup.StringLike str => TagSoup.Tag str -> T.Text
getTagName (TagSoup.TagOpen name _) = TagSoup.castString name
getTagName (TagSoup.TagClose name) = TagSoup.castString name
getTagName _ = undefined
vectorToTree :: TagSoup.StringLike str => TagVector str -> TagForest
vectorToTree tags = fixup $ forestWithin 0 (Vector.length tags)
where
forestWithin :: Int -> Int -> TagForest
forestWithin !lo !hi
| hi <= lo = []
| shouldSkip = forestWithin (lo + 1) hi
| otherwise = Tree.Node (Span lo closeIndex) subForest
: forestWithin (closeIndex + 1) hi
where
info = tags Vector.! lo
isOpen = TagSoup.isTagOpen $ infoTag info
isText = TagSoup.isTagText $ infoTag info
shouldSkip = not isOpen && not isText
closeIndex = lo + fromMaybe 0 (infoOffset info)
subForest = forestWithin (lo + 1) closeIndex
fixup :: TagForest -> TagForest
fixup [] = []
fixup (Tree.Node (Span lo hi) subForest : siblings)
= Tree.Node (Span lo hi) ok : bad
where
(ok, bad) = malformed (fixup siblings) $ fixup subForest
malformed :: TagForest
-> TagForest
-> (TagForest, TagForest)
malformed preBad [] = ([], preBad)
malformed preBad (n@(Tree.Node (Span _ nHi) _) : ns)
| hi < nHi = (ok, n : bad)
| otherwise = (n : ok, bad)
where (ok, bad) = malformed preBad ns
selectNodes :: TagSoup.StringLike str
=> [(SelectNode, SelectSettings)]
-> TagSpec str
-> TagSpec str
-> [TagSpec str]
-> [TagSpec str]
selectNodes [] _ _ acc = acc
selectNodes [_] (_, [], _) _ acc = acc
selectNodes [n] cur@(tags, f : fs, ctx) root acc
| MatchOk == matchResult
= (shrunkSpec :)
$ selectNodes [n] (tags, fs, ctx) root
$ selectNodes [n] (tags, Tree.subForest f, ctx) root acc
| MatchCull == matchResult
= selectNodes [n] (tags, fs, ctx) root acc
| otherwise
= selectNodes [n] (tags, Tree.subForest f, ctx) root
$ selectNodes [n] (tags, fs, ctx) root acc
where
Span lo hi = Tree.rootLabel f
shrunkSpec = (
Vector.slice lo (hi - lo + 1) tags
, [fmap recenter f]
, ctx
)
recenter (Span nLo nHi) = Span (nLo - lo) (nHi - lo)
info = tags Vector.! lo
matchResult = nodeMatches n info cur root
selectNodes (_ : _) (_, [], _) _ acc = acc
selectNodes (n : ns) cur@(tags, f : fs, ctx) root acc
| MatchOk == matchResult
= selectNodes ns (tags, Tree.subForest f ++ siblings, ctx)
(tags, f : siblings, ctx)
$ selectNodes (n : ns) (tags, fs, ctx) root acc
| MatchCull == matchResult
= selectNodes (n : ns) (tags, fs, ctx) root acc
| otherwise
= selectNodes (n : ns) (tags, Tree.subForest f, ctx) root
$ selectNodes (n : ns) (tags, fs, ctx) root acc
where
Span lo hi = Tree.rootLabel f
info = tags Vector.! lo
matchResult = nodeMatches n info cur root
treeLo tree | (Span lo _) <- Tree.rootLabel tree = lo
treeHi tree | (Span _ hi) <- Tree.rootLabel tree = hi
liftSiblings [] acc = acc
liftSiblings (t : ts) acc
| lo < treeLo t && treeHi t < hi = t : liftSiblings ts acc
| hi < treeLo t || treeHi t < lo = liftSiblings ts acc
| otherwise = liftSiblings (Tree.subForest t)
$ liftSiblings ts acc
siblings = liftSiblings fs []
data MatchResult = MatchOk | MatchFail | MatchCull
deriving (Eq)
andMatch :: MatchResult -> MatchResult -> MatchResult
andMatch MatchOk MatchOk = MatchOk
andMatch MatchCull _ = MatchCull
andMatch _ MatchCull = MatchCull
andMatch _ _ = MatchFail
boolMatch :: Bool -> MatchResult
boolMatch True = MatchOk
boolMatch False = MatchFail
nodeMatches :: TagSoup.StringLike str
=> (SelectNode, SelectSettings)
-> TagInfo str
-> TagSpec str
-> TagSpec str
-> MatchResult
nodeMatches (SelectNode node preds, settings) info cur root =
checkSettings settings cur root `andMatch` checkTag node preds info
nodeMatches (SelectAny preds , settings) info cur root =
checkSettings settings cur root `andMatch` checkPreds preds (infoTag info)
nodeMatches (SelectText , settings) info cur root =
checkSettings settings cur root `andMatch`
boolMatch (TagSoup.isTagText $ infoTag info)
checkSettings :: TagSoup.StringLike str
=> SelectSettings -> TagSpec str -> TagSpec str -> MatchResult
checkSettings (SelectSettings (Just depth))
(_, curRoot : _, _)
(_, root, _)
| depthOfCur < depth = MatchFail
| depthOfCur > depth = MatchCull
| otherwise = MatchOk
where
Span curLo curHi = Tree.rootLabel curRoot
mapTree f = map f . concatMap Tree.flatten
depthOfCur = sum $ mapTree oneIfContainsCur root
oneIfContainsCur (Span lo hi)
| lo < curLo && curHi < hi = 1
| otherwise = 0
checkSettings (SelectSettings _) _ _ = MatchOk
checkTag :: TagSoup.StringLike str
=> T.Text -> [AttributePredicate] -> TagInfo str -> MatchResult
checkTag name preds (TagInfo tag tagName _)
= boolMatch (
TagSoup.isTagOpen tag
&& isJust tagName
&& name == fromJust tagName
) `andMatch` checkPreds preds tag
checkPreds :: TagSoup.StringLike str
=> [AttributePredicate] -> TagSoup.Tag str -> MatchResult
checkPreds [] tag = boolMatch
$ TagSoup.isTagOpen tag || TagSoup.isTagText tag
checkPreds preds tag = boolMatch
$ TagSoup.isTagOpen tag && all (`checkPred` attrs) preds
where (TagSoup.TagOpen _ attrs) = tag