{-# LANGUAGE ScopedTypeVariables #-}
-- | This module define the state carryied during the webrexp
-- evaluation. This state is implemented as a monad transformer
-- on top of 'IO'.
module Webrexp.WebContext
    ( 
    -- * Types
      WebCrawler
    , WebContextT ( .. )
    , WebContext 
    , NodeContext (..)
    , EvalState (..)
    , BinBlob (..)
    , Context
    , HistoryPath (..)

    -- * Aliases
    -- Only used to provide more meaningful type signatures
    , Counter
    , SeenCounter
    , ValidSeenCounter
    , StateNumber

    -- * Node manipulation function/operators
    , (^:)
    , (^+)

    -- * Crawling configuration
    , LogLevel (..)
    , setLogLevel 
    , getUserAgent 
    , setUserAgent 
    , setOutput 
    , getOutput
    , getHttpDelay 
    , setHttpDelay 
    , isVerbose

    -- * User function
    , evalWithEmptyContext
    , repurposeNode 
    
    -- * Implementation info
    -- ** Evaluation function
    , prepareLogger 

    -- ** State manipulation functions
    , pushCurrentState 
    , popCurrentState 

    -- * DFS evaluator
    -- ** Node list
    , recordNode 
    , popLastRecord 

    -- ** Branch context
    , accumulateCurrentState 
    , popAccumulation 

    , pushToBranchContext 
    , popBranchContext 
    , addToBranchContext 

    -- ** Unicity manipulation function
    , setBucketCount 
    , incrementGetRangeCounter 
    , hasResourceBeenVisited
    , setResourceVisited
    )
    where

import System.IO
import Control.Applicative
import Control.Arrow( first )
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Class
import Data.Functor.Identity
import Data.Array.IO
import qualified Data.Set as Set

import qualified Webrexp.ProjectByteString as B
import Webrexp.GraphWalker

-- | Typical use of the WebContextT monad transformer
-- allowing to download information
type WebCrawler node rezPath a = WebContextT node rezPath IO a

-- | WebContext is 'WebContextT' as a simple Monad
type WebContext node rezPath a = WebContextT node rezPath Identity a

-- | Record a graph path in a document, from the last indirect
-- node to this one.
data HistoryPath node =
      -- | A path in an immutable graph. The graph that
      -- doesn't move under our feets, so we store the
      -- index of the followgin node in the path.
      ImmutableHistory [(node, Int)]

      -- | If the graph is suceptible to move under our
      -- feets, we have to search again for the position
      -- of the node in the parent node.
    | MutableHistory   [node]

-- | Fuse two history together, is equivalent to the '++'
-- operator for list.
(^+) :: [(node, Int)] -> HistoryPath node -> HistoryPath node
(^+) nodes (MutableHistory hist) = MutableHistory $ map fst nodes ++ hist
(^+) nodes (ImmutableHistory hist) = ImmutableHistory $ nodes ++ hist

-- | Append at info at the beginning of an history,
-- equivalent to the ':' operator for lists.
(^:) :: (node, Int) -> HistoryPath node -> HistoryPath node
(^:) (node, _) (MutableHistory hist) = MutableHistory $ node : hist
(^:) node (ImmutableHistory hist) = ImmutableHistory $ node : hist

-- | Represent a graph node and the path
-- used to go up to it.
data NodeContext node rezPath = NodeContext
    { -- | Path from the root of the document to
      -- 'this' node.
      parents :: HistoryPath node

      -- | Real node value
    , this :: node              

      -- | The last indirect path used to get to this node.
    , rootRef :: rezPath       
    }

-- | Function useful if used in combination of an union-node :
-- - A function produce a node context for a specific type
-- - You want to generalise it for a complex union
-- - Use this function :)
--
-- For example to produce a simple union node :
--
-- > repurposeNode UnionRight $ initialSimpleNode
repurposeNode :: (nodeA -> nodeB) -> NodeContext nodeA rezPath
              -> NodeContext nodeB rezPath
repurposeNode f node = NodeContext
    { parents = historyPatcher $ parents node
    , this = f $ this node
    , rootRef = rootRef node
    }
  where historyPatcher (ImmutableHistory hist) =
            ImmutableHistory $ map (first f) hist
        historyPatcher (MutableHistory hist) =
            MutableHistory $ map f hist

-- | Represent a binary blob, often downloaded.
data BinBlob rezPath = BinBlob
    { -- | The last indirect path used to get to this blob.
      sourcePath :: rezPath

      -- | The binary data
    , blobData :: B.ByteString
    }

-- | This type represent the temporary results
-- of the evaluation of regexp.
data EvalState node rezPath =
      Node (NodeContext node rezPath)
    | Text String
    | Blob (BinBlob rezPath)

-- | Type used to represent the current logging level.
-- Default is 'Normal'
data LogLevel = Quiet -- ^ Only display the dumped information
              | Normal -- ^ Display dumped information and IOs
              | Verbose -- ^ Display many debugging information
              deriving (Eq)

-- | An int used as a counter
type Counter = Int

-- | Number of elements seen at a state in the automata.
type SeenCounter = Int

-- | Number of elements which arrived by a true transition
-- to a state in the automata.
type ValidSeenCounter = Int

-- | Just an index to a state in the automata.
type StateNumber = Int

-- | Internal data context.
data Context node rezPath = Context
    { -- | Context stack used in breadth-first evaluation
      contextStack :: [([EvalState node rezPath], Counter)]

      -- | State waiting to be executed in a depth-
      -- first execution.
    , waitingStates :: [(EvalState node rezPath, StateNumber)]

      -- | State used to implement branches in the depth
      -- first evaluator.
    , branchContext :: [(EvalState node rezPath, SeenCounter, ValidSeenCounter)]

      -- | Buckets used for uniqueness pruning, all
      -- evaluation kind.
    , uniqueBucket :: IOArray Int (Set.Set String) 

      -- | Counters used for range evaluation in DFS
    , countBucket :: IOUArray Int Counter

      -- | Current log level
    , logLevel :: LogLevel
    , httpDelay :: Int
    , httpUserAgent :: String
    , defaultOutput :: Handle
    }

--------------------------------------------------
----            Monad definitions
--------------------------------------------------
newtype (Monad m) => WebContextT node rezPath m a =
    WebContextT { runWebContextT :: Context node rezPath
                                 -> m (a, Context node rezPath ) }

instance (Functor m, Monad m) => Functor (WebContextT node rezPath m) where
    {-# INLINE fmap #-}
    fmap f a = WebContextT $ \c ->
        fmap (first f) $ runWebContextT a c

instance (Functor m, Monad m) => Applicative (WebContextT node rezPath m) where
    pure = return
    (<*>) = ap

instance (Monad m) => Monad (WebContextT node rezPath m) where
    {-# INLINE return #-}
    return a =
        WebContextT $ \c -> return (a, c)

    {-# INLINE (>>=) #-}
    (WebContextT val) >>= f = WebContextT $ \c -> do
        (val', c') <- val c
        runWebContextT (f val') c'

instance MonadTrans (WebContextT node rezPath) where
    lift m = WebContextT $ \c -> do
        a <- m
        return (a, c)

instance (MonadIO m) => MonadIO (WebContextT node rezPath m) where
    liftIO = lift . liftIO

--------------------------------------------------
----            Context manipulation
--------------------------------------------------

emptyContext :: Context node rezPath
emptyContext = Context
    { contextStack = []
    , waitingStates = []
    , branchContext = []
    , logLevel = Normal
    , httpDelay = 1500
    , httpUserAgent = ""
    , defaultOutput = stdout
    , uniqueBucket = undefined
    , countBucket = undefined
    }

--------------------------------------------------
----            Getter/Setter
--------------------------------------------------

-- | Setter for the wait time between two indirect
-- operations.
--
-- The value is stored but not used yet.
setHttpDelay :: (Monad m) => Int -> WebContextT node rezPath m ()
setHttpDelay delay = WebContextT $ \c ->
        return ((), c{ httpDelay = delay })

-- | return the value set by 'setHttpDelay'
getHttpDelay :: (Monad m) => WebContextT node rezPath m Int
getHttpDelay = WebContextT $ \c -> return (httpDelay c, c)

-- | Define the text output for written text.
setOutput :: (Monad m) => Handle -> WebContextT node rezPath m ()
setOutput handle = WebContextT $ \c ->
        return ((), c{ defaultOutput = handle })

-- | Retrieve the default file output used for text.
getOutput :: (Monad m) => WebContextT node rezPath m Handle
getOutput = WebContextT $ \c -> return (defaultOutput c, c)

-- | Set the user agent which must be used for indirect operations
--
-- The value is stored but not used yet.
setUserAgent :: (Monad m) => String -> WebContextT node rezPath m ()
setUserAgent usr = WebContextT $ \c ->
    return ((), c{ httpUserAgent = usr })

-- | return the value set by 'setUserAgent'
getUserAgent :: (Monad m) => WebContextT node rezPath m String
getUserAgent = WebContextT $ \c -> return (httpUserAgent c, c)

-- | Set the value of the logging level.
setLogLevel :: (Monad m) => LogLevel -> WebContextT node rezPath m ()
setLogLevel lvl = WebContextT $ \c ->
    return ((), c{logLevel = lvl})

-- | Tell if the current 'LoggingLevel' is set to 'Verbose'
isVerbose :: (Monad m) => WebContextT node rezPath m Bool
isVerbose = WebContextT $ \c -> 
    return (logLevel c == Verbose, c)

-- | TODO : write documentation
accumulateCurrentState :: (Monad m)
                       => ([EvalState node rezPath], StateNumber)
                       -> WebContextT node rezPath m ()
accumulateCurrentState lst = WebContextT $ \c ->
        return ((), c{ contextStack = lst : contextStack c })

-- | TODO : write documentation
popAccumulation :: (Monad m)
                => WebContextT node rezPath m ([EvalState node rezPath], StateNumber)
popAccumulation = WebContextT $ \c ->
    case contextStack c of
         []     -> error "Empty context stack, implementation bug"
         (x:xs) -> return (x, c{ contextStack = xs })

-- | Internally the monad store a stack of state : the list
-- of currently evaluated 'EvalState'. Pushing this context
-- with store all the current nodes in it, waiting for later
-- retrieval.
pushCurrentState :: (Monad m)
                 => [EvalState node rezPath]
                 -> WebContextT node rezPath m ()
pushCurrentState lst = WebContextT $ \c ->
        return ((), c{ contextStack = (lst, 0) : contextStack c })

-- | Inverse operation of 'pushCurrentState', retrieve
-- stored nodes.
popCurrentState :: (Monad m)
                => WebContextT node rezPath m [EvalState node rezPath]
popCurrentState = WebContextT $ \c ->
    case contextStack c of
         []     -> error "Empty context stack, implementation bug"
         ((x,_):xs) -> 
            return (x, c{ contextStack = xs })

-- | Helper function used to start the evaluation of a webrexp
-- with a default context, with sane defaults.
evalWithEmptyContext :: (Monad m)
                     => WebContextT node rezPath m a -> m a
evalWithEmptyContext val = do
    (finalVal, _context) <- runWebContextT val emptyContext
    return finalVal

-- | Return normal, error, verbose logger
prepareLogger :: (Monad m)
              => WebContextT node rezPath m (Logger, Logger, Logger)
prepareLogger = WebContextT $ \c ->
    let silenceLog _ = return ()
        errLog = hPutStrLn stderr
        normalLog = putStrLn
    in case logLevel c of
      Quiet -> return ((silenceLog, errLog, silenceLog), c)
      Normal -> return ((normalLog, errLog, silenceLog), c)
      Verbose -> return ((normalLog, errLog, normalLog), c)

--------------------------------------------------
----            Depth First evaluation
--------------------------------------------------

-- | Record a node in the context for the DFS evaluation.
recordNode :: (Monad m) 
           => (EvalState node rezPath, StateNumber) -> WebContextT node rezPath m ()
recordNode n = WebContextT $ \c ->
    return ((), c{ waitingStates = n : waitingStates c })

-- | Get the last record from the top of the stack
popLastRecord :: (Monad m)
              => WebContextT node rezPath m (EvalState node rezPath, StateNumber)
popLastRecord = WebContextT $ \c ->
    case waitingStates c of
      [] -> error "popLAst Record - Empty stack!!!"
      (x:xs) -> return (x, c{ waitingStates = xs })


-- | Add a \'frame\' context to the current DFS evaluation.
-- A frame context possess a node to revert to and two counters.
--
--  * A counter for seen nodes which must be evaluated before
--    backtracking
--
--  * A counter for valid node count, to keep track if the whole
--    frame has a valid result or not.
--
-- You can look at 'popBranchContext' and 'addToBranchContext'
-- for other frame manipulation functions.
pushToBranchContext :: (Monad m)
                    => (EvalState node rezPath, SeenCounter, ValidSeenCounter)
                    -> WebContextT node rezPath m ()
pushToBranchContext cont = WebContextT $ \c ->
    return ((), c{ branchContext = cont : branchContext c })

-- | Retrieve the frame on the top of the stack.
-- for more information regarding frames see 'pushToBranchContext'
popBranchContext :: (Monad m)
                 => WebContextT node rezPath m (EvalState node rezPath, SeenCounter, ValidSeenCounter)
popBranchContext = WebContextT $ \c ->
    case branchContext c of
      [] -> error "popBranchContext - empty branch context"
      (x:xs) -> return (x, c{ branchContext = xs })

-- | Add seen node count and valid node count to the current
-- frame.
--
-- for more information regarding frames see 'pushToBranchContext'
addToBranchContext :: (Monad m)
                   => SeenCounter -> ValidSeenCounter -> WebContextT node rezPath m ()
addToBranchContext count validCount = WebContextT $ \c ->
    case branchContext c of
      [] -> error "addToBranchContext - empty context stack"
      ((e,co,vc):xs) -> return ((), c{ branchContext = (e,co + count
                                                      ,vc + validCount): xs})

--------------------------------------------------
----            Unique bucket
--------------------------------------------------

-- | Initialisation function which must be called before the
-- beginning of a webrexp execution.
--
-- Inform the monad of the number of 'Unique' bucket in the
-- expression, permitting the allocation of the required number
-- of Set to hold them.
setBucketCount :: (Monad m, MonadIO m)
               => Int -- ^ Unique bucket count
               -> Int -- ^ Range counter count
               -> WebContextT node rezPath m ()
setBucketCount uniquecount rangeCount = WebContextT $ \c -> do
    arr <- liftIO $ newArray (0, uniquecount - 1) Set.empty
    counter <- liftIO $ newArray (0, rangeCount - 1) 0
    return ((), c{ uniqueBucket = arr
                 , countBucket = counter })

-- | Used for node range, return the current value of the
-- counter and increment it.
incrementGetRangeCounter :: (Monad m, MonadIO m)
                         => Int -> WebContextT node rezPath m Int
incrementGetRangeCounter bucket = WebContextT $ \c -> do
    num <- liftIO $ countBucket c `readArray`bucket
    liftIO . (countBucket c `writeArray` bucket) $ num + 1
    return (num, c)

-- | Tell if a string has already been recorded for a bucket ID.
-- Used for the implementation of the 'Unique' constructor of a webrexp.
--
-- Return False, unless 'setResourceVisited' has been called with the same
-- string before.
hasResourceBeenVisited :: (Monad m, MonadIO m)
                       => Int -> String -> WebContextT node rezPath m Bool
hasResourceBeenVisited bucketId str = WebContextT $ \c -> do
    set <- liftIO $ uniqueBucket c `readArray`bucketId
    return (str `Set.member` set, c)

-- | Record the visit of a string. 'hasResourceBeenVisited' will return True
-- for the same string after this call.
setResourceVisited :: (Monad m, MonadIO m)
                   => Int -> String -> WebContextT node rezPath m ()
setResourceVisited bucketId str = WebContextT $ \c -> do
    set <- liftIO $ uniqueBucket c `readArray` bucketId
    let newSet = str `Set.insert` set
    liftIO $ (uniqueBucket c `writeArray`bucketId) newSet
    return ((), c)