{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE FlexibleInstances #-}
-- | This module define the state carryied during the webrexp
-- evaluation. This state is implemented as a monad transformer
-- on top of 'IO'.
module Text.Webrexp.WebContext
    ( 
    -- * Types
      WebCrawler
    , WebContextT ( .. )
    , WebContext 
    , NodeContext (..)
    , EvalState (..)
    , BinBlob (..)
    , Context
    , HistoryPath (..)

    -- * Aliases
    -- Only used to provide more meaningful type signatures
    , Counter
    , SeenCounter
    , ValidSeenCounter
    , StateNumber

    -- * Node manipulation function/operators
    , (^:)
    , (^+)

    -- * Crawling configuration
    , LogLevel (..)
    , setLogLevel 
    , getUniqueName
    , getUserAgent 
    , setUserAgent 
    , setOutput 
    , getOutput
    , getHttpDelay 
    , setHttpDelay 
    , isVerbose

    -- * User function
    , evalWithEmptyContext
    , executeWithEmptyContext 
    , repurposeNode 
    
    -- * Implementation info
    -- ** Evaluation function
    , prepareLogger 

    -- ** State manipulation functions
    , pushCurrentState 
    , popCurrentState 

    -- * DFS evaluator
    -- ** Node list
    , recordNode 
    , popLastRecord 

    -- ** Branch context
    , accumulateCurrentState 
    , popAccumulation 

    , pushToBranchContext 
    , popBranchContext 
    , addToBranchContext 

    -- ** Unicity manipulation function
    , setBucketCount 
    , incrementGetRangeCounter 
    , hasResourceBeenVisited
    , setResourceVisited

    -- * Log system
    , debugLog
    , textOutput
    )
    where

import System.IO
import Control.Applicative
import Control.Arrow( first )
import Control.Monad
import Control.Monad.ST
import Control.Monad.IO.Class
import Control.Monad.Trans.Class
import Data.Functor.Identity
import Data.Array.MArray
import qualified Data.Set as Set
import Control.Exception( IOException )
import qualified Control.Exception as Ex

import qualified Text.Webrexp.ProjectByteString as B
import Text.Webrexp.GraphWalker
import Text.Webrexp.IOMock

-- | Typical use of the WebContextT monad transformer
-- allowing to download information
type WebCrawler array node rezPath a = WebContextT array node rezPath IO a

-- | WebContext is 'WebContextT' as a simple Monad
type WebContext array node rezPath a = WebContextT array node rezPath Identity a

-- | Record a graph path in a document, from the last indirect
-- node to this one.
data HistoryPath node =
      -- | A path in an immutable graph. The graph that
      -- doesn't move under our feets, so we store the
      -- index of the followgin node in the path.
      ImmutableHistory [(node, Int)]

      -- | If the graph is suceptible to move under our
      -- feets, we have to search again for the position
      -- of the node in the parent node.
    | MutableHistory   [node]

-- | Fuse two history together, is equivalent to the '++'
-- operator for list.
(^+) :: [(node, Int)] -> HistoryPath node -> HistoryPath node
(^+) nodes (MutableHistory hist) = MutableHistory $ map fst nodes ++ hist
(^+) nodes (ImmutableHistory hist) = ImmutableHistory $ nodes ++ hist

-- | Append at info at the beginning of an history,
-- equivalent to the ':' operator for lists.
(^:) :: (node, Int) -> HistoryPath node -> HistoryPath node
(^:) (node, _) (MutableHistory hist) = MutableHistory $ node : hist
(^:) node (ImmutableHistory hist) = ImmutableHistory $ node : hist

-- | Represent a graph node and the path
-- used to go up to it.
data NodeContext node rezPath = NodeContext
    { -- | Path from the root of the document to
      -- 'this' node.
      parents :: HistoryPath node

      -- | Real node value
    , this :: node              

      -- | The last indirect path used to get to this node.
    , rootRef :: rezPath       
    }

-- | Function useful if used in combination of an union-node :
-- - A function produce a node context for a specific type
-- - You want to generalise it for a complex union
-- - Use this function :)
--
-- For example to produce a simple union node :
--
-- > repurposeNode UnionRight $ initialSimpleNode
repurposeNode :: (nodeA -> nodeB) -> NodeContext nodeA rezPath
              -> NodeContext nodeB rezPath
repurposeNode f node = NodeContext
    { parents = historyPatcher $ parents node
    , this = f $ this node
    , rootRef = rootRef node
    }
  where historyPatcher (ImmutableHistory hist) =
            ImmutableHistory $ map (first f) hist
        historyPatcher (MutableHistory hist) =
            MutableHistory $ map f hist

-- | Represent a binary blob, often downloaded.
data BinBlob rezPath = BinBlob
    { -- | The last indirect path used to get to this blob.
      sourcePath :: rezPath

      -- | The binary data
    , blobData :: B.ByteString
    }

-- | This type represent the temporary results
-- of the evaluation of regexp.
data EvalState node rezPath =
      Node (NodeContext node rezPath)
    | Text String
    | Blob (BinBlob rezPath)

-- | Type used to represent the current logging level.
-- Default is 'Normal'
data LogLevel = Quiet -- ^ Only display the dumped information
              | Normal -- ^ Display dumped information and IOs
              | Verbose -- ^ Display many debugging information
              deriving (Eq)

-- | An int used as a counter
type Counter = Int

-- | Number of elements seen at a state in the automata.
type SeenCounter = Int

-- | Number of elements which arrived by a true transition
-- to a state in the automata.
type ValidSeenCounter = Int

-- | Just an index to a state in the automata.
type StateNumber = Int

-- | Internal data context.
data Context array node rezPath = Context
    { -- | Context stack used in breadth-first evaluation
      contextStack :: [([EvalState node rezPath], Counter)]

      -- | State waiting to be executed in a depth-
      -- first execution.
    , waitingStates :: [(EvalState node rezPath, StateNumber)]

      -- | State used to implement branches in the depth
      -- first evaluator.
    , branchContext :: [(EvalState node rezPath, SeenCounter, ValidSeenCounter)]

      -- | Buckets used for uniqueness pruning, all
      -- evaluation kind.
    , uniqueBucket :: array Int (Set.Set String)

      -- | Counters used for range evaluation in DFS
    , countBucket :: array Int Counter

      -- | Tell if we must keep the found information in memory
      -- instead of directly dumping it on screen.
    , mustGatherData :: Bool

      -- | If you want to run a webrexp in library mode
    , gatheredData :: [Either String String]

      -- | Current log level
    , logLevel :: LogLevel
    , httpDelay :: Int
    , httpUserAgent :: String
    , defaultOutput :: Handle
    , uniqueNameCount :: Int
    }

--------------------------------------------------
----            Monad definitions
--------------------------------------------------
newtype (Monad m) => WebContextT array node rezPath m a =
    WebContextT { runWebContextT :: Context array node rezPath
                                 -> m (a, Context array node rezPath) }

instance (Functor m, Monad m) => Functor (WebContextT array node rezPath m) where
    {-# INLINE fmap #-}
    fmap f a = WebContextT $ \c ->
        fmap (first f) $ runWebContextT a c

instance (Functor m, Monad m) => Applicative (WebContextT array node rezPath m) where
    pure = return
    (<*>) = ap

instance (Monad m) => Monad (WebContextT array node rezPath m) where
    {-# INLINE return #-}
    return a =
        WebContextT $ \c -> return (a, c)

    {-# INLINE (>>=) #-}
    (WebContextT val) >>= f = WebContextT $ \c -> do
        (val', c') <- val c
        runWebContextT (f val') c'

instance MonadTrans (WebContextT array node rezPath) where
    lift m = WebContextT $ \c -> do
        a <- m
        return (a, c)

instance (MonadIO m) => MonadIO (WebContextT array node rezPath m) where
    liftIO = lift . liftIO

instance IOMockable (WebContextT array node rezPath IO) where
    performIO act = Just <$> liftIO act

instance IOMockable (WebContextT array node rezPath (ST s)) where
    performIO _ = return Nothing

--------------------------------------------------
----            Context manipulation
--------------------------------------------------

emptyContext :: Context array node rezPath
emptyContext = Context
    { contextStack = []
    , waitingStates = []
    , branchContext = []
    , logLevel = Normal
    , httpDelay = 1500
    , httpUserAgent = ""
    , mustGatherData = False
    , gatheredData = []
    , defaultOutput = stdout
    , uniqueBucket = undefined
    , countBucket = undefined
    , uniqueNameCount = 1
    }

--------------------------------------------------
----            Getter/Setter
--------------------------------------------------

-- | Return an "pseudo" unique, a filename not used during the
-- run of the current expression.
getUniqueName :: (Monad m) => WebContextT array node rezPath m FilePath
getUniqueName = WebContextT $ \c ->
    let i = uniqueNameCount c
    in return ("file_" ++ show i, c { uniqueNameCount = i + 1 })

-- | Setter for the wait time between two indirect
-- operations.
--
-- The value is stored but not used yet.
setHttpDelay :: (Monad m) => Int -> WebContextT array node rezPath m ()
setHttpDelay delay = WebContextT $ \c ->
        return ((), c{ httpDelay = delay })

-- | return the value set by 'setHttpDelay'
getHttpDelay :: (Monad m) => WebContextT array node rezPath m Int
getHttpDelay = WebContextT $ \c -> return (httpDelay c, c)

-- | Define the text output for written text.
setOutput :: (Monad m) => Handle -> WebContextT array node rezPath m ()
setOutput handle = WebContextT $ \c ->
        return ((), c{ defaultOutput = handle })

-- | Retrieve the default file output used for text.
getOutput :: (Monad m) => WebContextT array node rezPath m Handle
getOutput = WebContextT $ \c -> return (defaultOutput c, c)

isDataOutputedDirectly :: (Monad m) => WebContextT array node rezPath m Bool
isDataOutputedDirectly = WebContextT $ \c ->
        return (not $ mustGatherData c, c)

-- | Set the user agent which must be used for indirect operations
--
-- The value is stored but not used yet.
setUserAgent :: (Monad m) => String -> WebContextT array node rezPath m ()
setUserAgent usr = WebContextT $ \c ->
    return ((), c{ httpUserAgent = usr })

-- | return the value set by 'setUserAgent'
getUserAgent :: (Monad m) => WebContextT array node rezPath m String
getUserAgent = WebContextT $ \c -> return (httpUserAgent c, c)

-- | Set the value of the logging level.
setLogLevel :: (Monad m) => LogLevel -> WebContextT array node rezPath m ()
setLogLevel lvl = WebContextT $ \c ->
    return ((), c{logLevel = lvl})

-- | Tell if the current 'LoggingLevel' is set to 'Verbose'
isVerbose :: (Monad m) => WebContextT array node rezPath m Bool
isVerbose = WebContextT $ \c -> 
    return (logLevel c == Verbose, c)

-- | TODO : write documentation
accumulateCurrentState :: (Monad m)
                       => ([EvalState node rezPath], StateNumber)
                       -> WebContextT array node rezPath m ()
accumulateCurrentState lst = WebContextT $ \c ->
        return ((), c{ contextStack = lst : contextStack c })

-- | TODO : write documentation
popAccumulation :: (Monad m)
                => WebContextT array node rezPath m ([EvalState node rezPath], StateNumber)
popAccumulation = WebContextT $ \c ->
    case contextStack c of
         []     -> error "Empty context stack, implementation bug"
         (x:xs) -> return (x, c{ contextStack = xs })

-- | Internally the monad store a stack of state : the list
-- of currently evaluated 'EvalState'. Pushing this context
-- with store all the current nodes in it, waiting for later
-- retrieval.
pushCurrentState :: (Monad m)
                 => [EvalState node rezPath]
                 -> WebContextT array node rezPath m ()
pushCurrentState lst = WebContextT $ \c ->
        return ((), c{ contextStack = (lst, 0) : contextStack c })

-- | Inverse operation of 'pushCurrentState', retrieve
-- stored nodes.
popCurrentState :: (Monad m)
                => WebContextT array node rezPath m [EvalState node rezPath]
popCurrentState = WebContextT $ \c ->
    case contextStack c of
         []     -> error "Empty context stack, implementation bug"
         ((x,_):xs) -> 
            return (x, c{ contextStack = xs })

-- | Helper function used to start the evaluation of a webrexp
-- with a default context, with sane defaults.
evalWithEmptyContext :: (Monad m)
                     => WebContextT array node rezPath m a -> m a
evalWithEmptyContext val = do
    (finalVal, _context) <- runWebContextT val emptyContext
    return finalVal

-- | Helper function used to evaluate a webrexp and get back
-- data with a default context with sane defaults.
executeWithEmptyContext :: (Monad m)
                        => WebContextT array node rezPath m a -> m [Either String String]
executeWithEmptyContext val = do
    (_, context) <- runWebContextT val (emptyContext { mustGatherData = True })
    return $ gatheredData context

-- | Return normal, error, verbose logger
prepareLogger :: (Monad m, IOMockable (WebContextT array node rezPath m))
              => WebContextT array node rezPath m 
                    (Logger (WebContextT array node rezPath m)
                    ,Logger (WebContextT array node rezPath m)
                    ,Logger (WebContextT array node rezPath m))
prepareLogger = WebContextT $ \c ->
    let silenceLog _ = return ()
        errLog msg = performIO (hPutStrLn stderr msg) >> return ()
        normalLog = textOutput
    in case (mustGatherData c, logLevel c) of
      (True, _) -> return ((silenceLog, gatheringLog . Left, silenceLog), c)
      (_, Quiet) -> return ((silenceLog, errLog, silenceLog), c)
      (_, Normal) -> return ((normalLog, errLog, silenceLog), c)
      (_, Verbose) -> return ((normalLog, errLog, normalLog), c)

-- | Debugging function, only displayed in verbose
-- logging mode.
debugLog :: (IOMockable (WebContextT array node rezPath m), Monad m)
         => String -> WebContextT array node rezPath m ()
debugLog str = do
    verb <- isVerbose
    when verb (performIO (putStrLn str) >> return ())


-- | If a webrexp output some text, it must go through
-- this function. It ensure the writting in the correct
-- file.
textOutput :: (Monad m, IOMockable (WebContextT array node rezPath m)) 
           => String -> WebContextT array node rezPath m ()
textOutput str = do
    direct <- isDataOutputedDirectly 
    if not direct
       then gatheringLog $ Right str
       else do handle <- getOutput
               _ <- performIO $ Ex.catch (hPutStr handle str)
                       (\e -> hPutStrLn stderr $ "Writing error : " ++ 
                                                show (e :: IOException))
               return ()

-- | Keep track of an error or a normal log in the application monad
-- transformer.
gatheringLog :: (Monad m) => Either String String -> WebContextT array node rezPath m ()
gatheringLog d = WebContextT $ \c ->
    return ((), c { gatheredData = gatheredData c ++ [d] })


--------------------------------------------------
----            Depth First evaluation
--------------------------------------------------

-- | Record a node in the context for the DFS evaluation.
recordNode :: (Monad m) 
           => (EvalState node rezPath, StateNumber) -> WebContextT array node rezPath m ()
recordNode n = WebContextT $ \c ->
    return ((), c{ waitingStates = n : waitingStates c })

-- | Get the last record from the top of the stack
popLastRecord :: (Monad m)
              => WebContextT array node rezPath m (EvalState node rezPath, StateNumber)
popLastRecord = WebContextT $ \c ->
    case waitingStates c of
      [] -> error "popLAst Record - Empty stack!!!"
      (x:xs) -> return (x, c{ waitingStates = xs })


-- | Add a \'frame\' context to the current DFS evaluation.
-- A frame context possess a node to revert to and two counters.
--
--  * A counter for seen nodes which must be evaluated before
--    backtracking
--
--  * A counter for valid node count, to keep track if the whole
--    frame has a valid result or not.
--
-- You can look at 'popBranchContext' and 'addToBranchContext'
-- for other frame manipulation functions.
pushToBranchContext :: (Monad m)
                    => (EvalState node rezPath, SeenCounter, ValidSeenCounter)
                    -> WebContextT array node rezPath m ()
pushToBranchContext cont = WebContextT $ \c ->
    return ((), c{ branchContext = cont : branchContext c })

-- | Retrieve the frame on the top of the stack.
-- for more information regarding frames see 'pushToBranchContext'
popBranchContext :: (Monad m)
                 => WebContextT array node rezPath m ( EvalState node rezPath
                                                     , SeenCounter
                                                     , ValidSeenCounter )
popBranchContext = WebContextT $ \c ->
    case branchContext c of
      [] -> error "popBranchContext - empty branch context"
      (x:xs) -> return (x, c{ branchContext = xs })

-- | Add seen node count and valid node count to the current
-- frame.
--
-- for more information regarding frames see 'pushToBranchContext'
addToBranchContext :: (Monad m)
                   => SeenCounter -> ValidSeenCounter 
                   -> WebContextT array node rezPath m ()
addToBranchContext count validCount = WebContextT $ \c ->
    case branchContext c of
      [] -> error "addToBranchContext - empty context stack"
      ((e,co,vc):xs) -> return ((), c{ branchContext = (e,co + count
                                                      ,vc + validCount): xs})

--------------------------------------------------
----            Unique bucket
--------------------------------------------------

-- | Initialisation function which must be called before the
-- beginning of a webrexp execution.
--
-- Inform the monad of the number of 'Unique' bucket in the
-- expression, permitting the allocation of the required number
-- of Set to hold them.
setBucketCount :: ( Monad m
                  , MArray array Counter m
                  , MArray array (Set.Set String) m
                  )
               => Int -- ^ Unique bucket count
               -> Int -- ^ Range counter count
               -> WebContextT array node rezPath m ()
setBucketCount uniquecount rangeCount = WebContextT $ \c -> do
    setArray <- newArray (0, uniquecount - 1) Set.empty
    counterArray <- newArray (0, rangeCount - 1) 0
    return ((), c{ uniqueBucket = setArray
                 , countBucket = counterArray } )

-- | Used for node range, return the current value of the
-- counter and increment it.
incrementGetRangeCounter :: (Monad m, MArray array Counter m)
                         => Int -> WebContextT array node rezPath m Int
incrementGetRangeCounter bucket = WebContextT $ \c -> do
    num <- countBucket c `readArray` bucket
    writeArray (countBucket c) bucket $ num + 1
    return (num, c)

-- | Tell if a string has already been recorded for a bucket ID.
-- Used for the implementation of the 'Unique' constructor of a webrexp.
--
-- Return False, unless 'setResourceVisited' has been called with the same
-- string before.
hasResourceBeenVisited :: (Monad m, MArray array (Set.Set String) m)
                       => Int -> String -> WebContextT array node rezPath m Bool
hasResourceBeenVisited bucketId str = WebContextT $ \c -> do
    set <- uniqueBucket c `readArray` bucketId
    return (str `Set.member` set, c)

-- | Record the visit of a string. 'hasResourceBeenVisited' will return True
-- for the same string after this call.
setResourceVisited :: (Monad m, MArray array (Set.Set String) m)
                   => Int -> String -> WebContextT array node rezPath m ()
setResourceVisited bucketId str = WebContextT $ \c -> do
    set <- uniqueBucket c `readArray` bucketId
    let newSet = str `Set.insert` set
    writeArray (uniqueBucket c) bucketId newSet
    return ((), c)