module Webrexp.WebContext
(
WebCrawler
, WebContextT ( .. )
, WebContext
, NodeContext (..)
, EvalState (..)
, BinBlob (..)
, Context
, HistoryPath (..)
, Counter
, SeenCounter
, ValidSeenCounter
, StateNumber
, (^:)
, (^+)
, LogLevel (..)
, setLogLevel
, getUserAgent
, setUserAgent
, setOutput
, getOutput
, getHttpDelay
, setHttpDelay
, isVerbose
, evalWithEmptyContext
, repurposeNode
, prepareLogger
, pushCurrentState
, popCurrentState
, recordNode
, popLastRecord
, accumulateCurrentState
, popAccumulation
, pushToBranchContext
, popBranchContext
, addToBranchContext
, setBucketCount
, incrementGetRangeCounter
, hasResourceBeenVisited
, setResourceVisited
)
where
import System.IO
import Control.Applicative
import Control.Arrow( first )
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Class
import Data.Functor.Identity
import Data.Array.IO
import qualified Data.Set as Set
import qualified Webrexp.ProjectByteString as B
import Webrexp.GraphWalker
type WebCrawler node rezPath a = WebContextT node rezPath IO a
type WebContext node rezPath a = WebContextT node rezPath Identity a
data HistoryPath node =
ImmutableHistory [(node, Int)]
| MutableHistory [node]
(^+) :: [(node, Int)] -> HistoryPath node -> HistoryPath node
(^+) nodes (MutableHistory hist) = MutableHistory $ map fst nodes ++ hist
(^+) nodes (ImmutableHistory hist) = ImmutableHistory $ nodes ++ hist
(^:) :: (node, Int) -> HistoryPath node -> HistoryPath node
(^:) (node, _) (MutableHistory hist) = MutableHistory $ node : hist
(^:) node (ImmutableHistory hist) = ImmutableHistory $ node : hist
data NodeContext node rezPath = NodeContext
{
parents :: HistoryPath node
, this :: node
, rootRef :: rezPath
}
repurposeNode :: (nodeA -> nodeB) -> NodeContext nodeA rezPath
-> NodeContext nodeB rezPath
repurposeNode f node = NodeContext
{ parents = historyPatcher $ parents node
, this = f $ this node
, rootRef = rootRef node
}
where historyPatcher (ImmutableHistory hist) =
ImmutableHistory $ map (first f) hist
historyPatcher (MutableHistory hist) =
MutableHistory $ map f hist
data BinBlob rezPath = BinBlob
{
sourcePath :: rezPath
, blobData :: B.ByteString
}
data EvalState node rezPath =
Node (NodeContext node rezPath)
| Text String
| Blob (BinBlob rezPath)
data LogLevel = Quiet
| Normal
| Verbose
deriving (Eq)
type Counter = Int
type SeenCounter = Int
type ValidSeenCounter = Int
type StateNumber = Int
data Context node rezPath = Context
{
contextStack :: [([EvalState node rezPath], Counter)]
, waitingStates :: [(EvalState node rezPath, StateNumber)]
, branchContext :: [(EvalState node rezPath, SeenCounter, ValidSeenCounter)]
, uniqueBucket :: IOArray Int (Set.Set String)
, countBucket :: IOUArray Int Counter
, logLevel :: LogLevel
, httpDelay :: Int
, httpUserAgent :: String
, defaultOutput :: Handle
}
newtype (Monad m) => WebContextT node rezPath m a =
WebContextT { runWebContextT :: Context node rezPath
-> m (a, Context node rezPath ) }
instance (Functor m, Monad m) => Functor (WebContextT node rezPath m) where
fmap f a = WebContextT $ \c ->
fmap (first f) $ runWebContextT a c
instance (Functor m, Monad m) => Applicative (WebContextT node rezPath m) where
pure = return
(<*>) = ap
instance (Monad m) => Monad (WebContextT node rezPath m) where
return a =
WebContextT $ \c -> return (a, c)
(WebContextT val) >>= f = WebContextT $ \c -> do
(val', c') <- val c
runWebContextT (f val') c'
instance MonadTrans (WebContextT node rezPath) where
lift m = WebContextT $ \c -> do
a <- m
return (a, c)
instance (MonadIO m) => MonadIO (WebContextT node rezPath m) where
liftIO = lift . liftIO
emptyContext :: Context node rezPath
emptyContext = Context
{ contextStack = []
, waitingStates = []
, branchContext = []
, logLevel = Normal
, httpDelay = 1500
, httpUserAgent = ""
, defaultOutput = stdout
, uniqueBucket = undefined
, countBucket = undefined
}
setHttpDelay :: (Monad m) => Int -> WebContextT node rezPath m ()
setHttpDelay delay = WebContextT $ \c ->
return ((), c{ httpDelay = delay })
getHttpDelay :: (Monad m) => WebContextT node rezPath m Int
getHttpDelay = WebContextT $ \c -> return (httpDelay c, c)
setOutput :: (Monad m) => Handle -> WebContextT node rezPath m ()
setOutput handle = WebContextT $ \c ->
return ((), c{ defaultOutput = handle })
getOutput :: (Monad m) => WebContextT node rezPath m Handle
getOutput = WebContextT $ \c -> return (defaultOutput c, c)
setUserAgent :: (Monad m) => String -> WebContextT node rezPath m ()
setUserAgent usr = WebContextT $ \c ->
return ((), c{ httpUserAgent = usr })
getUserAgent :: (Monad m) => WebContextT node rezPath m String
getUserAgent = WebContextT $ \c -> return (httpUserAgent c, c)
setLogLevel :: (Monad m) => LogLevel -> WebContextT node rezPath m ()
setLogLevel lvl = WebContextT $ \c ->
return ((), c{logLevel = lvl})
isVerbose :: (Monad m) => WebContextT node rezPath m Bool
isVerbose = WebContextT $ \c ->
return (logLevel c == Verbose, c)
accumulateCurrentState :: (Monad m)
=> ([EvalState node rezPath], StateNumber)
-> WebContextT node rezPath m ()
accumulateCurrentState lst = WebContextT $ \c ->
return ((), c{ contextStack = lst : contextStack c })
popAccumulation :: (Monad m)
=> WebContextT node rezPath m ([EvalState node rezPath], StateNumber)
popAccumulation = WebContextT $ \c ->
case contextStack c of
[] -> error "Empty context stack, implementation bug"
(x:xs) -> return (x, c{ contextStack = xs })
pushCurrentState :: (Monad m)
=> [EvalState node rezPath]
-> WebContextT node rezPath m ()
pushCurrentState lst = WebContextT $ \c ->
return ((), c{ contextStack = (lst, 0) : contextStack c })
popCurrentState :: (Monad m)
=> WebContextT node rezPath m [EvalState node rezPath]
popCurrentState = WebContextT $ \c ->
case contextStack c of
[] -> error "Empty context stack, implementation bug"
((x,_):xs) ->
return (x, c{ contextStack = xs })
evalWithEmptyContext :: (Monad m)
=> WebContextT node rezPath m a -> m a
evalWithEmptyContext val = do
(finalVal, _context) <- runWebContextT val emptyContext
return finalVal
prepareLogger :: (Monad m)
=> WebContextT node rezPath m (Logger, Logger, Logger)
prepareLogger = WebContextT $ \c ->
let silenceLog _ = return ()
errLog = hPutStrLn stderr
normalLog = putStrLn
in case logLevel c of
Quiet -> return ((silenceLog, errLog, silenceLog), c)
Normal -> return ((normalLog, errLog, silenceLog), c)
Verbose -> return ((normalLog, errLog, normalLog), c)
recordNode :: (Monad m)
=> (EvalState node rezPath, StateNumber) -> WebContextT node rezPath m ()
recordNode n = WebContextT $ \c ->
return ((), c{ waitingStates = n : waitingStates c })
popLastRecord :: (Monad m)
=> WebContextT node rezPath m (EvalState node rezPath, StateNumber)
popLastRecord = WebContextT $ \c ->
case waitingStates c of
[] -> error "popLAst Record - Empty stack!!!"
(x:xs) -> return (x, c{ waitingStates = xs })
pushToBranchContext :: (Monad m)
=> (EvalState node rezPath, SeenCounter, ValidSeenCounter)
-> WebContextT node rezPath m ()
pushToBranchContext cont = WebContextT $ \c ->
return ((), c{ branchContext = cont : branchContext c })
popBranchContext :: (Monad m)
=> WebContextT node rezPath m (EvalState node rezPath, SeenCounter, ValidSeenCounter)
popBranchContext = WebContextT $ \c ->
case branchContext c of
[] -> error "popBranchContext - empty branch context"
(x:xs) -> return (x, c{ branchContext = xs })
addToBranchContext :: (Monad m)
=> SeenCounter -> ValidSeenCounter -> WebContextT node rezPath m ()
addToBranchContext count validCount = WebContextT $ \c ->
case branchContext c of
[] -> error "addToBranchContext - empty context stack"
((e,co,vc):xs) -> return ((), c{ branchContext = (e,co + count
,vc + validCount): xs})
setBucketCount :: (Monad m, MonadIO m)
=> Int
-> Int
-> WebContextT node rezPath m ()
setBucketCount uniquecount rangeCount = WebContextT $ \c -> do
arr <- liftIO $ newArray (0, uniquecount 1) Set.empty
counter <- liftIO $ newArray (0, rangeCount 1) 0
return ((), c{ uniqueBucket = arr
, countBucket = counter })
incrementGetRangeCounter :: (Monad m, MonadIO m)
=> Int -> WebContextT node rezPath m Int
incrementGetRangeCounter bucket = WebContextT $ \c -> do
num <- liftIO $ countBucket c `readArray`bucket
liftIO . (countBucket c `writeArray` bucket) $ num + 1
return (num, c)
hasResourceBeenVisited :: (Monad m, MonadIO m)
=> Int -> String -> WebContextT node rezPath m Bool
hasResourceBeenVisited bucketId str = WebContextT $ \c -> do
set <- liftIO $ uniqueBucket c `readArray`bucketId
return (str `Set.member` set, c)
setResourceVisited :: (Monad m, MonadIO m)
=> Int -> String -> WebContextT node rezPath m ()
setResourceVisited bucketId str = WebContextT $ \c -> do
set <- liftIO $ uniqueBucket c `readArray` bucketId
let newSet = str `Set.insert` set
liftIO $ (uniqueBucket c `writeArray`bucketId) newSet
return ((), c)