module Text.Webrexp (
ParseableType( .. )
, queryDocument
, queryDocumentM
, evalWebRexp
, evalWebRexpDepthFirst
, parseWebRexp
, evalParsedWebRexp
, executeParsedWebRexp
, Conf (..)
, defaultConf
, evalWebRexpWithConf
) where
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.ST
import Text.Parsec
import System.IO
import System.Exit
import Data.Array.ST
import Data.Array.IO
import Text.Webrexp.Exprtypes
import Text.Webrexp.Parser( webRexpParser )
import Text.Webrexp.HaXmlNode
import Text.Webrexp.HxtNode
import Text.Webrexp.JsonNode
import Text.Webrexp.UnionNode
import Text.Webrexp.DirectoryNode
import Text.Webrexp.GraphWalker
import Text.Webrexp.ResourcePath
import Text.Webrexp.WebContext
import Text.Webrexp.WebRexpAutomata
import Text.Webrexp.Remote.MimeTypes
import qualified Text.Webrexp.ProjectByteString as B
data Conf = Conf
{ hammeringDelay :: Int
, userAgent :: String
, output :: Handle
, verbose :: Bool
, quiet :: Bool
, expr :: String
, showHelp :: Bool
, depthEvaluation :: Bool
, outputGraphViz :: Bool
}
defaultConf :: Conf
defaultConf = Conf
{ hammeringDelay = 1500
, userAgent = ""
, output = stdout
, verbose = False
, quiet = False
, expr = ""
, showHelp = False
, outputGraphViz = False
, depthEvaluation = True
}
type CrawledNode =
UnionNode (UnionNode HxtNode HaXmLNode)
(UnionNode JsonNode DirectoryNode)
type Crawled a = WebCrawler IOArray CrawledNode ResourcePath a
type MemoryCrawl s a = WebContextT (STArray s) CrawledNode ResourcePath (ST s) a
initialState :: IO (EvalState CrawledNode ResourcePath)
initialState = do
node <- currentDirectoryNode
return . Node $ repurposeNode (UnionRight . UnionRight) node
queryDocument :: ParseableType -> B.ByteString -> WebRexp -> [Either String String]
queryDocument docType str query = runST $ queryDocumentM docType str query
queryDocumentM :: forall s . ParseableType -> B.ByteString -> WebRexp
-> ST s [Either String String]
queryDocumentM docType str query = executeWithEmptyContext todo
where ignoreLog _ = return ()
loggers = (ignoreLog, ignoreLog, ignoreLog)
todo :: MemoryCrawl s Bool
todo = do
initialNode <- parseUnion loggers (Just docType) (Local "") str
case initialNode of
AccessError -> return False
DataBlob _ _ -> return False
Result rezPath a ->
let initNode = NodeContext { rootRef = rezPath
, this = a
, parents = ImmutableHistory [] }
in do setLogLevel Quiet
evalDepthFirst (Node initNode) query
parseWebRexp :: String -> Maybe WebRexp
parseWebRexp str =
case runParser webRexpParser () "expr" str of
Left _ -> Nothing
Right e -> Just e
evalParsedWebRexp :: WebRexp -> IO Bool
evalParsedWebRexp wexpr = evalWithEmptyContext crawled
where crawled :: Crawled Bool = evalBreadthFirst (Text "") wexpr
executeParsedWebRexp :: WebRexp -> IO [Either String String]
executeParsedWebRexp wexpr = executeWithEmptyContext crawled
where crawled :: Crawled Bool = evalDepthFirst (Text "") wexpr
evalWebRexp :: String -> IO Bool
evalWebRexp = evalWebRexpWithEvaluator $ evalBreadthFirst (Text "")
evalWebRexpDepthFirst :: String -> IO Bool
evalWebRexpDepthFirst = evalWebRexpWithEvaluator $ evalDepthFirst (Text "")
evalWebRexpWithEvaluator :: (WebRexp -> Crawled Bool) -> String -> IO Bool
evalWebRexpWithEvaluator evaluator str =
case runParser webRexpParser () "expr" str of
Left err -> do
putStrLn $ "Parsing error :\n" ++ show err
return False
Right wexpr ->
let crawled :: Crawled Bool = evaluator wexpr
in evalWithEmptyContext crawled
evalWebRexpWithConf :: Conf -> IO Bool
evalWebRexpWithConf conf =
case runParser webRexpParser () "expr" (expr conf) of
Left err -> do
putStrLn "Parsing error :\n"
print err
return False
Right wexpr -> do
when (outputGraphViz conf)
(do let packed = packRefFiltering wexpr
dumpAutomata (expr conf) stdout $ buildAutomata packed
exitWith ExitSuccess)
when (verbose conf)
(do putStrLn $ "code " ++ show (expr conf)
print wexpr)
let crawled :: Crawled Bool = do
setUserAgent $ userAgent conf
setOutput $ output conf
setHttpDelay $ hammeringDelay conf
when (quiet conf) (setLogLevel Quiet)
when (verbose conf) (setLogLevel Verbose)
initState <- liftIO initialState
if depthEvaluation conf
then evalDepthFirst initState wexpr
else evalBreadthFirst initState wexpr
rez <- evalWithEmptyContext crawled
when (output conf /= stdout)
(hClose $ output conf)
return rez