module Network.CrawlChain.Crawling (
crawl,
crawlAndStore, CrawlActionDescriber,
Crawler
) where
import qualified Data.ByteString.Char8 as BC
import qualified Network.Http.Client as C
import Network.CrawlChain.CrawlAction
import Network.CrawlChain.CrawlResult
import Network.CrawlChain.Util
import Network.Http.ClientFacade
type Crawler = CrawlAction -> IO CrawlResult
type CrawlActionDescriber = CrawlAction -> String
crawl :: Crawler
crawl action = delaySeconds 1 >> crawlInternal action
crawlAndStore :: CrawlActionDescriber -> Crawler
crawlAndStore describer = (>>= store) . crawl
where
store :: CrawlResult -> IO CrawlResult
store cr = store' (crawlingResultStatus cr)
where
store' CrawlingOk = storeResult (crawlingContent cr)
store' (CrawlingFailed msg) = storeResult msg
store' status = error $ "unexpected status: " ++ (show status)
storeResult filecontent'= do
writeFile' (describer $ crawlingAction cr) filecontent'
return cr
where
writeFile' n c = do
putStrLn $ "writing to " ++ n
writeFile n c
crawlInternal :: CrawlAction -> IO CrawlResult
crawlInternal action = do
response <- doRequest action
logMsg $ "Crawled " ++ (show action)
return $ CrawlResult action (BC.unpack response) CrawlingOk
where
doRequest :: CrawlAction -> IO (BC.ByteString)
doRequest (GetRequest url) = getRequest (BC.pack url)
doRequest (PostRequest urlString ps pType) = doPost (BC.pack urlString) formParams pType where
formParams = map (\(a, b) -> (BC.pack a, BC.pack b)) ps
doPost :: BC.ByteString -> [(BC.ByteString, BC.ByteString)] -> PostType -> IO BC.ByteString
doPost url params postType = doPost' postType where
doPost' :: PostType -> IO BC.ByteString
doPost' Undefined = doPost' PostForm
doPost' PostForm = C.postForm url params C.concatHandler
doPost' PostAJAX = ajaxRequest url params