module Webrexp.Exprtypes
(
WebRef (..)
, NodeRange (..)
, Op (..)
, ActionExpr (..)
, WebRexp (..)
, RepeatCount (..)
, BuiltinFunc (..)
, simplifyNodeRanges
, foldWebRexp
, assignWebrexpIndices
, prettyShowWebRef
, packRefFiltering
, isInNodeRange
, isOperatorBoolean
, isActionPredicate
) where
import Data.List( sort, mapAccumR )
data WebRef =
Wildcard
| Elem String
| OfClass WebRef String
| Attrib WebRef String
| OfName WebRef String
deriving Show
data NodeRange =
Index Int
| Interval Int Int
deriving (Eq, Show)
instance Ord NodeRange where
compare (Index a) (Index b) = compare a b
compare (Index a) (Interval b c)
| a < b = LT
| a > c = GT
| otherwise = GT
compare (Interval a _) (Index c)
| a < c = LT
| a > c = GT
| otherwise = LT
compare (Interval a b) (Interval c d) =
case compare a c of
EQ -> compare b d
e -> e
simplifySortedNodeRanges :: [NodeRange] -> [NodeRange]
simplifySortedNodeRanges [] = []
simplifySortedNodeRanges [a] = [a]
simplifySortedNodeRanges (i@(Interval a b):idx@(Index c):xs)
| a <= c && c <= b = simplifySortedNodeRanges (i:xs)
| otherwise = i : simplifySortedNodeRanges (idx:xs)
simplifySortedNodeRanges (i1@(Index a):i2@(Index b):xs)
| a == b = simplifySortedNodeRanges (i1:xs)
| otherwise = i1 : simplifySortedNodeRanges (i2:xs)
simplifySortedNodeRanges (i1@(Index _):i2@(Interval _ _):xs) =
i1 : simplifySortedNodeRanges (i2:xs)
simplifySortedNodeRanges (i1@(Interval a b):i2@(Interval c d):xs)
| a <= c && c <= b =
simplifySortedNodeRanges $ Interval a (max b d) : xs
| otherwise = i1 : simplifySortedNodeRanges (i2:xs)
simplifyNodeRanges :: [NodeRange] -> [NodeRange]
simplifyNodeRanges = simplifySortedNodeRanges . sort . map rangeRearranger
where rangeRearranger i@(Index _) = i
rangeRearranger i@(Interval a b)
| a == b = Index a
| a > b = Interval b a
| otherwise = i
data Op =
OpAdd
| OpSub
| OpMul
| OpDiv
| OpLt
| OpLe
| OpGt
| OpGe
| OpEq
| OpNe
| OpAnd
| OpOr
| OpMatch
| OpContain
| OpBegin
| OpEnd
| OpSubstring
| OpHyphenBegin
| OpConcat
deriving (Eq, Show, Enum)
data BuiltinFunc =
BuiltinTrim
| BuiltinSubsitute
| BuiltinToNum
| BuiltinToString
| BuiltinFormat
| BuiltinSystem
deriving (Eq, Show, Enum)
data ActionExpr =
ActionExprs [ActionExpr]
| BinOp Op ActionExpr ActionExpr
| ARef String
| CstI Int
| CstS String
| NodeReplace ActionExpr
| OutputAction
| Call BuiltinFunc [ActionExpr]
deriving (Eq, Show)
data RepeatCount =
RepeatTimes Int
| RepeatAtLeast Int
| RepeatBetween Int Int
deriving (Show)
data WebRexp =
Branch [WebRexp]
| Unions [WebRexp]
| List [WebRexp]
| Star WebRexp
| Repeat RepeatCount WebRexp
| Alternative WebRexp WebRexp
| Unique Int
| Str String
| Action ActionExpr
| Range Int [NodeRange]
| Ref WebRef
| DirectChild WebRef
| ConstrainedRef WebRef ActionExpr
| DiggLink
| NextSibling
| PreviousSibling
| Parent
deriving Show
isOperatorBoolean :: Op -> Bool
isOperatorBoolean op = op `elem`
[ OpLt, OpLe, OpGt, OpGe, OpEq, OpNe
, OpAnd, OpOr, OpMatch
, OpContain, OpBegin, OpEnd, OpSubstring
, OpHyphenBegin ]
isActionPredicate :: ActionExpr -> Bool
isActionPredicate (BinOp op _ _) = isOperatorBoolean op
isActionPredicate _ = False
foldWebRexp :: (a -> WebRexp -> (a, WebRexp)) -> a -> WebRexp -> (a, WebRexp)
foldWebRexp f acc (Repeat count sub) = f acc' $ Repeat count sub'
where (acc', sub') = foldWebRexp f acc sub
foldWebRexp f acc (Alternative a b) = f acc'' $ Alternative a' b'
where (acc', a') = foldWebRexp f acc a
(acc'', b') = foldWebRexp f acc' b
foldWebRexp f acc (Unions subs) = f acc' $ Unions subs'
where (acc', subs') = mapAccumR (foldWebRexp f) acc subs
foldWebRexp f acc (Branch subs) = f acc' $ Branch subs'
where (acc', subs') = mapAccumR (foldWebRexp f) acc subs
foldWebRexp f acc (List subs) = f acc' $ List subs'
where (acc', subs') = mapAccumR (foldWebRexp f) acc subs
foldWebRexp f acc (Star sub) = f acc' $ Star sub'
where (acc', sub') = foldWebRexp f acc sub
foldWebRexp f acc e@(ConstrainedRef _ _) = f acc e
foldWebRexp f acc e@(DirectChild _) = f acc e
foldWebRexp f acc e@(Unique _) = f acc e
foldWebRexp f acc e@(Str _) = f acc e
foldWebRexp f acc e@(Action _) = f acc e
foldWebRexp f acc e@(Range _ _) = f acc e
foldWebRexp f acc e@(Ref _) = f acc e
foldWebRexp f acc e@DiggLink = f acc e
foldWebRexp f acc e@NextSibling = f acc e
foldWebRexp f acc e@PreviousSibling = f acc e
foldWebRexp f acc e@Parent = f acc e
assignWebrexpIndices :: WebRexp -> (Int, Int, WebRexp)
assignWebrexpIndices expr = (uniqueCount, rangeCount, packRefFiltering fexpr)
where (uniqueCount, expr') = setUniqueIndices expr
(rangeCount, fexpr) = setRangeIndices expr'
packRefFiltering :: WebRexp -> WebRexp
packRefFiltering = snd . foldWebRexp packer ()
where packer () (List lst) = ((), List $ refActionFind lst)
packer () a = ((), a)
refActionFind [] = []
refActionFind (Ref a: Action act: xs) =
case actionSpliter act of
([], _) -> Ref a : Action act : refActionFind xs
(some, []) ->
ConstrainedRef a (actioner some) : refActionFind xs
(some, [rest]) ->
ConstrainedRef a (actioner some) : Action rest
: refActionFind xs
(some, rest) ->
ConstrainedRef a (actioner some) : Action (actioner rest)
: refActionFind xs
refActionFind (x:xs) = x : refActionFind xs
actioner [a] = a
actioner x = ActionExprs x
actionSpliter (ActionExprs exprs) =
break isActionPredicate exprs
actionSpliter a = if isActionPredicate a
then ([a], [])
else ([], [a])
setUniqueIndices :: WebRexp -> (Int, WebRexp)
setUniqueIndices expr = foldWebRexp uniqueCounter 0 expr
where uniqueCounter acc (Unique _) = (acc + 1, Unique acc)
uniqueCounter acc e = (acc, e)
setRangeIndices :: WebRexp -> (Int, WebRexp)
setRangeIndices expr = foldWebRexp uniqueCounter 0 expr
where uniqueCounter acc (Range _ r) = (acc + 1, Range acc r)
uniqueCounter acc e = (acc, e)
prettyShowWebRef :: WebRef -> String
prettyShowWebRef Wildcard = "_"
prettyShowWebRef (Elem s) = s
prettyShowWebRef (OfClass r s) = prettyShowWebRef r ++ "." ++ s
prettyShowWebRef (Attrib r s) = prettyShowWebRef r ++ "@" ++ s
prettyShowWebRef (OfName r s) = prettyShowWebRef r ++ "#" ++ s
isInNodeRange :: Int -> [NodeRange] -> Bool
isInNodeRange _ [] = False
isInNodeRange i (Index ii:xs)
| i == ii = True
| i < ii = False
| otherwise = isInNodeRange i xs
isInNodeRange i (Interval beg end:xs)
| beg <= i && i <= end = True
| beg >= i = False
| otherwise = isInNodeRange i xs