module Text.Webrexp.Exprtypes
(
WebRef (..)
, NodeRange (..)
, Op (..)
, ActionExpr (..)
, WebRexp (..)
, RepeatCount (..)
, BuiltinFunc (..)
, simplifyNodeRanges
, foldWebRexp
, assignWebrexpIndices
, prettyShowWebRef
, packRefFiltering
, isInNodeRange
, isOperatorBoolean
, isActionPredicate
) where
import Data.List( sort, mapAccumR )
import Language.Haskell.TH.Syntax
data WebRef =
Wildcard
| Elem String
| OfClass WebRef String
| Attrib WebRef String
| OfName WebRef String
deriving Show
data NodeRange =
Index Int
| Interval Int Int
deriving (Eq, Show)
instance Ord NodeRange where
compare (Index a) (Index b) = compare a b
compare (Index a) (Interval b c)
| a < b = LT
| a > c = GT
| otherwise = GT
compare (Interval a _) (Index c)
| a < c = LT
| a > c = GT
| otherwise = LT
compare (Interval a b) (Interval c d) =
case compare a c of
EQ -> compare b d
e -> e
simplifySortedNodeRanges :: [NodeRange] -> [NodeRange]
simplifySortedNodeRanges [] = []
simplifySortedNodeRanges [a] = [a]
simplifySortedNodeRanges (i@(Interval a b):idx@(Index c):xs)
| a <= c && c <= b = simplifySortedNodeRanges (i:xs)
| otherwise = i : simplifySortedNodeRanges (idx:xs)
simplifySortedNodeRanges (i1@(Index a):i2@(Index b):xs)
| a == b = simplifySortedNodeRanges (i1:xs)
| otherwise = i1 : simplifySortedNodeRanges (i2:xs)
simplifySortedNodeRanges (i1@(Index _):i2@(Interval _ _):xs) =
i1 : simplifySortedNodeRanges (i2:xs)
simplifySortedNodeRanges (i1@(Interval a b):i2@(Interval c d):xs)
| a <= c && c <= b =
simplifySortedNodeRanges $ Interval a (max b d) : xs
| otherwise = i1 : simplifySortedNodeRanges (i2:xs)
simplifyNodeRanges :: [NodeRange] -> [NodeRange]
simplifyNodeRanges = simplifySortedNodeRanges . sort . map rangeRearranger
where rangeRearranger i@(Index _) = i
rangeRearranger i@(Interval a b)
| a == b = Index a
| a > b = Interval b a
| otherwise = i
data Op =
OpAdd
| OpSub
| OpMul
| OpDiv
| OpLt
| OpLe
| OpGt
| OpGe
| OpEq
| OpNe
| OpAnd
| OpOr
| OpMatch
| OpContain
| OpBegin
| OpEnd
| OpSubstring
| OpHyphenBegin
| OpConcat
deriving (Eq, Show, Enum)
data BuiltinFunc =
BuiltinTrim
| BuiltinSubsitute
| BuiltinToNum
| BuiltinToString
| BuiltinFormat
| BuiltinSystem
deriving (Eq, Show, Enum)
data ActionExpr =
ActionExprs [ActionExpr]
| BinOp Op ActionExpr ActionExpr
| ARef String
| CstI Int
| CstS String
| NodeReplace ActionExpr
| OutputAction
| DeepOutputAction
| NodeNameOutputAction
| Call BuiltinFunc [ActionExpr]
deriving (Eq, Show)
data RepeatCount =
RepeatTimes Int
| RepeatAtLeast Int
| RepeatBetween Int Int
deriving (Show)
data WebRexp =
Branch [WebRexp]
| Unions [WebRexp]
| List [WebRexp]
| Star WebRexp
| Repeat RepeatCount WebRexp
| Alternative WebRexp WebRexp
| Unique Int
| Str String
| Action ActionExpr
| Range Int [NodeRange]
| Ref WebRef
| DirectChild WebRef
| ConstrainedRef WebRef ActionExpr
| DiggLink
| DumpLink
| NextSibling
| PreviousSibling
| Parent
deriving Show
isOperatorBoolean :: Op -> Bool
isOperatorBoolean op = op `elem`
[ OpLt, OpLe, OpGt, OpGe, OpEq, OpNe
, OpAnd, OpOr, OpMatch
, OpContain, OpBegin, OpEnd, OpSubstring
, OpHyphenBegin ]
isActionPredicate :: ActionExpr -> Bool
isActionPredicate (BinOp op _ _) = isOperatorBoolean op
isActionPredicate _ = False
foldWebRexp :: (a -> WebRexp -> (a, WebRexp)) -> a -> WebRexp -> (a, WebRexp)
foldWebRexp f acc (Repeat count sub) = f acc' $ Repeat count sub'
where (acc', sub') = foldWebRexp f acc sub
foldWebRexp f acc (Alternative a b) = f acc'' $ Alternative a' b'
where (acc', a') = foldWebRexp f acc a
(acc'', b') = foldWebRexp f acc' b
foldWebRexp f acc (Unions subs) = f acc' $ Unions subs'
where (acc', subs') = mapAccumR (foldWebRexp f) acc subs
foldWebRexp f acc (Branch subs) = f acc' $ Branch subs'
where (acc', subs') = mapAccumR (foldWebRexp f) acc subs
foldWebRexp f acc (List subs) = f acc' $ List subs'
where (acc', subs') = mapAccumR (foldWebRexp f) acc subs
foldWebRexp f acc (Star sub) = f acc' $ Star sub'
where (acc', sub') = foldWebRexp f acc sub
foldWebRexp f acc e@(ConstrainedRef _ _) = f acc e
foldWebRexp f acc e@(DirectChild _) = f acc e
foldWebRexp f acc e@(Unique _) = f acc e
foldWebRexp f acc e@(Str _) = f acc e
foldWebRexp f acc e@(Action _) = f acc e
foldWebRexp f acc e@(Range _ _) = f acc e
foldWebRexp f acc e@(Ref _) = f acc e
foldWebRexp f acc e@DiggLink = f acc e
foldWebRexp f acc e@DumpLink = f acc e
foldWebRexp f acc e@NextSibling = f acc e
foldWebRexp f acc e@PreviousSibling = f acc e
foldWebRexp f acc e@Parent = f acc e
assignWebrexpIndices :: WebRexp -> (Int, Int, WebRexp)
assignWebrexpIndices expr = (uniqueCount, rangeCount, packRefFiltering fexpr)
where (uniqueCount, expr') = setUniqueIndices expr
(rangeCount, fexpr) = setRangeIndices expr'
packRefFiltering :: WebRexp -> WebRexp
packRefFiltering = snd . foldWebRexp packer ()
where packer () (List lst) = ((), List $ refActionFind lst)
packer () a = ((), a)
refActionFind [] = []
refActionFind (Ref a: Action act: xs) =
case actionSpliter act of
([], _) -> Ref a : Action act : refActionFind xs
(some, []) ->
ConstrainedRef a (actioner some) : refActionFind xs
(some, [rest]) ->
ConstrainedRef a (actioner some) : Action rest
: refActionFind xs
(some, rest) ->
ConstrainedRef a (actioner some) : Action (actioner rest)
: refActionFind xs
refActionFind (x:xs) = x : refActionFind xs
actioner [a] = a
actioner x = ActionExprs x
actionSpliter (ActionExprs exprs) =
break isActionPredicate exprs
actionSpliter a = if isActionPredicate a
then ([a], [])
else ([], [a])
setUniqueIndices :: WebRexp -> (Int, WebRexp)
setUniqueIndices expr = foldWebRexp uniqueCounter 0 expr
where uniqueCounter acc (Unique _) = (acc + 1, Unique acc)
uniqueCounter acc e = (acc, e)
setRangeIndices :: WebRexp -> (Int, WebRexp)
setRangeIndices expr = foldWebRexp uniqueCounter 0 expr
where uniqueCounter acc (Range _ r) = (acc + 1, Range acc r)
uniqueCounter acc e = (acc, e)
prettyShowWebRef :: WebRef -> String
prettyShowWebRef Wildcard = "_"
prettyShowWebRef (Elem s) = s
prettyShowWebRef (OfClass r s) = prettyShowWebRef r ++ "." ++ s
prettyShowWebRef (Attrib r s) = prettyShowWebRef r ++ "@" ++ s
prettyShowWebRef (OfName r s) = prettyShowWebRef r ++ "#" ++ s
isInNodeRange :: Int -> [NodeRange] -> Bool
isInNodeRange _ [] = False
isInNodeRange i (Index ii:xs)
| i == ii = True
| i < ii = False
| otherwise = isInNodeRange i xs
isInNodeRange i (Interval beg end:xs)
| beg <= i && i <= end = True
| beg >= i = False
| otherwise = isInNodeRange i xs
instance Lift WebRef where
lift Wildcard = [| Wildcard |]
lift (Elem str) = [| Elem str |]
lift (OfClass ref str) = [| OfClass ref str |]
lift (Attrib ref str) = [| Attrib ref str |]
lift (OfName ref str) = [| OfName ref str |]
instance Lift NodeRange where
lift (Index i) = [| Index i |]
lift (Interval i1 i2) = [| Interval i1 i2 |]
instance Lift Op where
lift = return . ConE . mkName . show
instance Lift BuiltinFunc where
lift = return . ConE . mkName . show
instance Lift ActionExpr where
lift OutputAction = [| OutputAction |]
lift DeepOutputAction = [| DeepOutputAction |]
lift NodeNameOutputAction = [| NodeNameOutputAction |]
lift (ActionExprs lst) = [| ActionExprs lst |]
lift (ARef str) = [| ARef str |]
lift (CstI i) = [| CstI i |]
lift (CstS str) = [| CstS str |]
lift (NodeReplace a) = [| NodeReplace a |]
lift (Call b lst) = [| Call b lst |]
lift (BinOp op a1 a2) = [| BinOp op a1 a2 |]
instance Lift RepeatCount where
lift (RepeatTimes i) = [| RepeatTimes i |]
lift (RepeatAtLeast i) = [| RepeatAtLeast i |]
lift (RepeatBetween i1 i2) = [| RepeatBetween i1 i2 |]
instance Lift WebRexp where
lift (Branch lst) = [| Branch lst |]
lift (Unions lst) = [| Unions lst |]
lift (List lst) = [| List lst |]
lift (Star w) = [| Star w |]
lift (Repeat count w) = [| Repeat count w |]
lift (Alternative w1 w2) = [| Alternative w1 w2 |]
lift (Unique i) = [| Unique i |]
lift (Str i) = [| Str i |]
lift (Action a) = [| Action a |]
lift (Range i lst) = [| Range i lst |]
lift (Ref ref) = [| Ref ref |]
lift (DirectChild ref) = [| DirectChild ref |]
lift (ConstrainedRef ref action) = [| ConstrainedRef ref action |]
lift a = return . ConE . mkName $ show a