-- | This module describes the state of shpider computations, and provides a monad transformer over it.
module Network.Shpider.State 
   ( module Control.Monad.State
   , ShpiderState (..)
   , Page (..)
   , Shpider
   , emptyPage
   , runShpider
   , runShpiderSt
   )
   where

import Control.Monad.State

import Network.Shpider.Curl.Curl

import Data.Maybe

import Text.HTML.TagSoup.Parsec

import Network.Shpider.Forms
import Network.Shpider.Links

-- | The shpider state holds all the options for shpider transactions, the current page and all the `CurlOption`s used when calling curl.
data ShpiderState =
   SS { htmlOnlyDownloads :: Bool
      , startPage :: String
      , dontLeaveDomain :: Bool
      , curlOpts :: [ CurlOption ]
      , currentPage :: Page 
      , visited :: Maybe [ String ]
      }
   deriving Show

-- | The type of Shpider computations.  A state transformer over `ShpiderState` and `IO`.
type Shpider =
   StateT ShpiderState IO

-- | Run a Shpider computation, returning the result with the state.
runShpiderSt :: Shpider a -> IO ( a , ShpiderState )
runShpiderSt f =
   withCurlDo $ runStateT f initialSt

-- | Run a Shpider computation, returning the result.
runShpider :: Shpider a -> IO a
runShpider f = do
   ( res , _ ) <- runShpiderSt f
   return res

-- | The initial shpider state.
-- Currently, CurlTimeout is hard wired to 3, and cookies are saved in a file called "cookies".
initialSt :: ShpiderState
initialSt =
   SS { startPage = ""
      , htmlOnlyDownloads = False
      , dontLeaveDomain = False
      , curlOpts = [ CurlTimeout 3
                   , CurlCookieFile "cookies"
                   , CurlCookieJar "cookies"
                   ]
      , currentPage = emptyPage 
      , visited = Nothing 
      }

-- | The Page datatype.  Holds `Link`s, `Form`s, the parsed [ `Tag` ], the page source, and the page's absolute URL.
data Page =
   Page { links :: [ Link ]
        , forms :: [ Form ]
        , tags :: [ Tag ]
        , source :: String
        , addr :: String
        }
   deriving Show

-- | An empty page, containing no information.
emptyPage :: Page
emptyPage =
   Page { links = []
        , forms = []
        , source = ""
        , tags = []
        , addr =""
        }