module Network.Shpider.Options where
import Data.Maybe
import Network.Shpider.Curl.Opts
import Network.Shpider.Curl.Types
import Network.Shpider.State
import Network.Shpider.URL
import Network.Shpider.TextUtils
stayOnDomain :: Bool -> Shpider ( )
stayOnDomain b = do
shpider <- get
put $ shpider { dontLeaveDomain =
b
}
setTimeOut :: Long -> Shpider ( )
setTimeOut s = do
shpider <- get
let isTimeout c =
case c of
( CurlTimeout _ ) ->
True
_ ->
False
timeoutPresent =
not $ null $ filter isTimeout $ curlOpts shpider
put $ shpider { curlOpts =
if not timeoutPresent
then
CurlTimeout s : curlOpts shpider
else
map ( \ c ->
if isTimeout c
then
CurlTimeout s
else
c
)
( curlOpts shpider )
}
setStartPage :: String -> Shpider ( )
setStartPage uncleanUrl = do
shpider <- get
if isAbsoluteUrl url
then
put $ shpider { startPage =
url
}
else
error "The start page must be an absolute URL"
where
url =
escapeSpaces uncleanUrl
getStartPage :: Shpider String
getStartPage = do
shpider <- get
return $ startPage shpider
onlyDownloadHtml :: Bool -> Shpider ( )
onlyDownloadHtml b = do
st <- get
put $ st { htmlOnlyDownloads = b }
setCurrentPage :: Page -> Shpider ( )
setCurrentPage p = do
shpider <- get
put $ shpider { currentPage = p }
getCurrentPage :: Shpider Page
getCurrentPage = do
sh <- get
return $ currentPage sh
keepTrack :: Shpider ( )
keepTrack = do
shpider <- get
put $ shpider { visited = Just [ ] }