{-
 -
 - Copyright (c) 2009-2010 Johnny Morrice
 -
 - Permission is hereby granted, free of charge, to any person
 - obtaining a copy of this software and associated documentation 
 - files (the "Software"), to deal in the Software without 
 - restriction, including without limitation the rights to use, copy, 
 - modify, merge, publish, distribute, sublicense, and/or sell copies 
 - of the Software, and to permit persons to whom the Software is 
 - furnished to do so, subject to the following conditions:
 -
 - The above copyright notice and this permission notice shall be 
 - included in all copies or substantial portions of the Software.
 -
 - THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 
 - EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
 - MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 
 - NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
 - BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
 - ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
 - CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
 - SOFTWARE.
 -
-}

-- | This module describes the state of shpider computations, and provides a monad transformer over it.
module Network.Shpider.State 
   ( module Control.Monad.State
   , ShpiderState (..)
   , Page (..)
   , Shpider
   , emptyPage
   , runShpider
   , runShpiderSt
   )
   where

import Control.Monad.State
import Data.Maybe
import Data.Time
import Network.Curl
import Network.Shpider.Forms
import Network.Shpider.Links
import Text.HTML.TagSoup.Parsec


-- | The shpider state holds all the options for shpider transactions, the current page and all the `CurlOption`s used when calling curl.
data ShpiderState =
   SS { htmlOnlyDownloads :: Bool
      , startPage :: String
      , dontLeaveDomain :: Bool
      , curlOpts :: [ CurlOption ]
      , currentPage :: Page 
      , visited :: Maybe [ String ]
      , downloadThrottle :: Maybe Int
      -- ^ Whether to wait at least N micro-seconds between downloads
      -- or form submissions. Defaults to 'Nothing'.
      , lastDownloadTime :: Maybe UTCTime
      }
   deriving Show

-- | The type of Shpider computations.  A state transformer over `ShpiderState` and `IO`.
type Shpider =
   StateT ShpiderState IO

-- | Run a Shpider computation, returning the result with the state.
runShpiderSt :: Shpider a -> IO ( a , ShpiderState )
runShpiderSt f =
   withCurlDo $ runStateT f initialSt

-- | Run a Shpider computation, returning the result.
runShpider :: Shpider a -> IO a
runShpider f = do
   ( res , _ ) <- runShpiderSt f
   return res

-- | The initial shpider state.
-- Currently, CurlTimeout is hard wired to 3, and cookies are saved in a file called "cookies".
initialSt :: ShpiderState
initialSt =
   SS { startPage = ""
      , htmlOnlyDownloads = False
      , dontLeaveDomain = False
      , curlOpts = [ CurlCookieFile "cookies"
                   , CurlCookieJar "cookies"
                   ]
      , currentPage = emptyPage 
      , visited = Nothing 
      , downloadThrottle = Nothing
      , lastDownloadTime = Nothing
      }

-- | The Page datatype.  Holds `Link`s, `Form`s, the parsed [ `Tag` ], the page source, and the page's absolute URL.
data Page =
   Page { links :: [ Link ]
        , forms :: [ Form ]
        , tags :: [ Tag String ]
        , source :: String
        , addr :: String
        }
   deriving Show

-- | An empty page, containing no information.
emptyPage :: Page
emptyPage =
   Page { links = []
        , forms = []
        , source = ""
        , tags = []
        , addr =""
        }