module Text.Webrexp.WebContext
(
WebCrawler
, WebContextT ( .. )
, WebContext
, NodeContext (..)
, EvalState (..)
, BinBlob (..)
, Context
, HistoryPath (..)
, Counter
, SeenCounter
, ValidSeenCounter
, StateNumber
, (^:)
, (^+)
, LogLevel (..)
, setLogLevel
, getUniqueName
, getUserAgent
, setUserAgent
, setOutput
, getOutput
, getHttpDelay
, setHttpDelay
, isVerbose
, evalWithEmptyContext
, executeWithEmptyContext
, repurposeNode
, prepareLogger
, pushCurrentState
, popCurrentState
, recordNode
, popLastRecord
, accumulateCurrentState
, popAccumulation
, pushToBranchContext
, popBranchContext
, addToBranchContext
, setBucketCount
, incrementGetRangeCounter
, hasResourceBeenVisited
, setResourceVisited
, debugLog
, textOutput
)
where
import System.IO
import Control.Applicative
import Control.Arrow( first )
import Control.Monad
import Control.Monad.ST
import Control.Monad.IO.Class
import Control.Monad.Trans.Class
import Data.Functor.Identity
import Data.Array.MArray
import qualified Data.Set as Set
import Control.Exception( IOException )
import qualified Control.Exception as Ex
import qualified Text.Webrexp.ProjectByteString as B
import Text.Webrexp.GraphWalker
import Text.Webrexp.IOMock
type WebCrawler array node rezPath a = WebContextT array node rezPath IO a
type WebContext array node rezPath a = WebContextT array node rezPath Identity a
data HistoryPath node =
ImmutableHistory [(node, Int)]
| MutableHistory [node]
(^+) :: [(node, Int)] -> HistoryPath node -> HistoryPath node
(^+) nodes (MutableHistory hist) = MutableHistory $ map fst nodes ++ hist
(^+) nodes (ImmutableHistory hist) = ImmutableHistory $ nodes ++ hist
(^:) :: (node, Int) -> HistoryPath node -> HistoryPath node
(^:) (node, _) (MutableHistory hist) = MutableHistory $ node : hist
(^:) node (ImmutableHistory hist) = ImmutableHistory $ node : hist
data NodeContext node rezPath = NodeContext
{
parents :: HistoryPath node
, this :: node
, rootRef :: rezPath
}
repurposeNode :: (nodeA -> nodeB) -> NodeContext nodeA rezPath
-> NodeContext nodeB rezPath
repurposeNode f node = NodeContext
{ parents = historyPatcher $ parents node
, this = f $ this node
, rootRef = rootRef node
}
where historyPatcher (ImmutableHistory hist) =
ImmutableHistory $ map (first f) hist
historyPatcher (MutableHistory hist) =
MutableHistory $ map f hist
data BinBlob rezPath = BinBlob
{
sourcePath :: rezPath
, blobData :: B.ByteString
}
data EvalState node rezPath =
Node (NodeContext node rezPath)
| Text String
| Blob (BinBlob rezPath)
data LogLevel = Quiet
| Normal
| Verbose
deriving (Eq)
type Counter = Int
type SeenCounter = Int
type ValidSeenCounter = Int
type StateNumber = Int
data Context array node rezPath = Context
{
contextStack :: [([EvalState node rezPath], Counter)]
, waitingStates :: [(EvalState node rezPath, StateNumber)]
, branchContext :: [(EvalState node rezPath, SeenCounter, ValidSeenCounter)]
, uniqueBucket :: array Int (Set.Set String)
, countBucket :: array Int Counter
, mustGatherData :: Bool
, gatheredData :: [Either String String]
, logLevel :: LogLevel
, httpDelay :: Int
, httpUserAgent :: String
, defaultOutput :: Handle
, uniqueNameCount :: Int
}
newtype (Monad m) => WebContextT array node rezPath m a =
WebContextT { runWebContextT :: Context array node rezPath
-> m (a, Context array node rezPath) }
instance (Functor m, Monad m) => Functor (WebContextT array node rezPath m) where
fmap f a = WebContextT $ \c ->
fmap (first f) $ runWebContextT a c
instance (Functor m, Monad m) => Applicative (WebContextT array node rezPath m) where
pure = return
(<*>) = ap
instance (Monad m) => Monad (WebContextT array node rezPath m) where
return a =
WebContextT $ \c -> return (a, c)
(WebContextT val) >>= f = WebContextT $ \c -> do
(val', c') <- val c
runWebContextT (f val') c'
instance MonadTrans (WebContextT array node rezPath) where
lift m = WebContextT $ \c -> do
a <- m
return (a, c)
instance (MonadIO m) => MonadIO (WebContextT array node rezPath m) where
liftIO = lift . liftIO
instance IOMockable (WebContextT array node rezPath IO) where
performIO act = Just <$> liftIO act
instance IOMockable (WebContextT array node rezPath (ST s)) where
performIO _ = return Nothing
emptyContext :: Context array node rezPath
emptyContext = Context
{ contextStack = []
, waitingStates = []
, branchContext = []
, logLevel = Normal
, httpDelay = 1500
, httpUserAgent = ""
, mustGatherData = False
, gatheredData = []
, defaultOutput = stdout
, uniqueBucket = undefined
, countBucket = undefined
, uniqueNameCount = 1
}
getUniqueName :: (Monad m) => WebContextT array node rezPath m FilePath
getUniqueName = WebContextT $ \c ->
let i = uniqueNameCount c
in return ("file_" ++ show i, c { uniqueNameCount = i + 1 })
setHttpDelay :: (Monad m) => Int -> WebContextT array node rezPath m ()
setHttpDelay delay = WebContextT $ \c ->
return ((), c{ httpDelay = delay })
getHttpDelay :: (Monad m) => WebContextT array node rezPath m Int
getHttpDelay = WebContextT $ \c -> return (httpDelay c, c)
setOutput :: (Monad m) => Handle -> WebContextT array node rezPath m ()
setOutput handle = WebContextT $ \c ->
return ((), c{ defaultOutput = handle })
getOutput :: (Monad m) => WebContextT array node rezPath m Handle
getOutput = WebContextT $ \c -> return (defaultOutput c, c)
isDataOutputedDirectly :: (Monad m) => WebContextT array node rezPath m Bool
isDataOutputedDirectly = WebContextT $ \c ->
return (not $ mustGatherData c, c)
setUserAgent :: (Monad m) => String -> WebContextT array node rezPath m ()
setUserAgent usr = WebContextT $ \c ->
return ((), c{ httpUserAgent = usr })
getUserAgent :: (Monad m) => WebContextT array node rezPath m String
getUserAgent = WebContextT $ \c -> return (httpUserAgent c, c)
setLogLevel :: (Monad m) => LogLevel -> WebContextT array node rezPath m ()
setLogLevel lvl = WebContextT $ \c ->
return ((), c{logLevel = lvl})
isVerbose :: (Monad m) => WebContextT array node rezPath m Bool
isVerbose = WebContextT $ \c ->
return (logLevel c == Verbose, c)
accumulateCurrentState :: (Monad m)
=> ([EvalState node rezPath], StateNumber)
-> WebContextT array node rezPath m ()
accumulateCurrentState lst = WebContextT $ \c ->
return ((), c{ contextStack = lst : contextStack c })
popAccumulation :: (Monad m)
=> WebContextT array node rezPath m ([EvalState node rezPath], StateNumber)
popAccumulation = WebContextT $ \c ->
case contextStack c of
[] -> error "Empty context stack, implementation bug"
(x:xs) -> return (x, c{ contextStack = xs })
pushCurrentState :: (Monad m)
=> [EvalState node rezPath]
-> WebContextT array node rezPath m ()
pushCurrentState lst = WebContextT $ \c ->
return ((), c{ contextStack = (lst, 0) : contextStack c })
popCurrentState :: (Monad m)
=> WebContextT array node rezPath m [EvalState node rezPath]
popCurrentState = WebContextT $ \c ->
case contextStack c of
[] -> error "Empty context stack, implementation bug"
((x,_):xs) ->
return (x, c{ contextStack = xs })
evalWithEmptyContext :: (Monad m)
=> WebContextT array node rezPath m a -> m a
evalWithEmptyContext val = do
(finalVal, _context) <- runWebContextT val emptyContext
return finalVal
executeWithEmptyContext :: (Monad m)
=> WebContextT array node rezPath m a -> m [Either String String]
executeWithEmptyContext val = do
(_, context) <- runWebContextT val (emptyContext { mustGatherData = True })
return $ gatheredData context
prepareLogger :: (Monad m, IOMockable (WebContextT array node rezPath m))
=> WebContextT array node rezPath m
(Logger (WebContextT array node rezPath m)
,Logger (WebContextT array node rezPath m)
,Logger (WebContextT array node rezPath m))
prepareLogger = WebContextT $ \c ->
let silenceLog _ = return ()
errLog msg = performIO (hPutStrLn stderr msg) >> return ()
normalLog = textOutput
in case (mustGatherData c, logLevel c) of
(True, _) -> return ((silenceLog, gatheringLog . Left, silenceLog), c)
(_, Quiet) -> return ((silenceLog, errLog, silenceLog), c)
(_, Normal) -> return ((normalLog, errLog, silenceLog), c)
(_, Verbose) -> return ((normalLog, errLog, normalLog), c)
debugLog :: (IOMockable (WebContextT array node rezPath m), Monad m)
=> String -> WebContextT array node rezPath m ()
debugLog str = do
verb <- isVerbose
when verb (performIO (putStrLn str) >> return ())
textOutput :: (Monad m, IOMockable (WebContextT array node rezPath m))
=> String -> WebContextT array node rezPath m ()
textOutput str = do
direct <- isDataOutputedDirectly
if not direct
then gatheringLog $ Right str
else do handle <- getOutput
_ <- performIO $ Ex.catch (hPutStr handle str)
(\e -> hPutStrLn stderr $ "Writing error : " ++
show (e :: IOException))
return ()
gatheringLog :: (Monad m) => Either String String -> WebContextT array node rezPath m ()
gatheringLog d = WebContextT $ \c ->
return ((), c { gatheredData = gatheredData c ++ [d] })
recordNode :: (Monad m)
=> (EvalState node rezPath, StateNumber) -> WebContextT array node rezPath m ()
recordNode n = WebContextT $ \c ->
return ((), c{ waitingStates = n : waitingStates c })
popLastRecord :: (Monad m)
=> WebContextT array node rezPath m (EvalState node rezPath, StateNumber)
popLastRecord = WebContextT $ \c ->
case waitingStates c of
[] -> error "popLAst Record - Empty stack!!!"
(x:xs) -> return (x, c{ waitingStates = xs })
pushToBranchContext :: (Monad m)
=> (EvalState node rezPath, SeenCounter, ValidSeenCounter)
-> WebContextT array node rezPath m ()
pushToBranchContext cont = WebContextT $ \c ->
return ((), c{ branchContext = cont : branchContext c })
popBranchContext :: (Monad m)
=> WebContextT array node rezPath m ( EvalState node rezPath
, SeenCounter
, ValidSeenCounter )
popBranchContext = WebContextT $ \c ->
case branchContext c of
[] -> error "popBranchContext - empty branch context"
(x:xs) -> return (x, c{ branchContext = xs })
addToBranchContext :: (Monad m)
=> SeenCounter -> ValidSeenCounter
-> WebContextT array node rezPath m ()
addToBranchContext count validCount = WebContextT $ \c ->
case branchContext c of
[] -> error "addToBranchContext - empty context stack"
((e,co,vc):xs) -> return ((), c{ branchContext = (e,co + count
,vc + validCount): xs})
setBucketCount :: ( Monad m
, MArray array Counter m
, MArray array (Set.Set String) m
)
=> Int
-> Int
-> WebContextT array node rezPath m ()
setBucketCount uniquecount rangeCount = WebContextT $ \c -> do
setArray <- newArray (0, uniquecount 1) Set.empty
counterArray <- newArray (0, rangeCount 1) 0
return ((), c{ uniqueBucket = setArray
, countBucket = counterArray } )
incrementGetRangeCounter :: (Monad m, MArray array Counter m)
=> Int -> WebContextT array node rezPath m Int
incrementGetRangeCounter bucket = WebContextT $ \c -> do
num <- countBucket c `readArray` bucket
writeArray (countBucket c) bucket $ num + 1
return (num, c)
hasResourceBeenVisited :: (Monad m, MArray array (Set.Set String) m)
=> Int -> String -> WebContextT array node rezPath m Bool
hasResourceBeenVisited bucketId str = WebContextT $ \c -> do
set <- uniqueBucket c `readArray` bucketId
return (str `Set.member` set, c)
setResourceVisited :: (Monad m, MArray array (Set.Set String) m)
=> Int -> String -> WebContextT array node rezPath m ()
setResourceVisited bucketId str = WebContextT $ \c -> do
set <- uniqueBucket c `readArray` bucketId
let newSet = str `Set.insert` set
writeArray (uniqueBucket c) bucketId newSet
return ((), c)