-- |Crawls an HTML page for JavaScript
module WebBits.JavaScript.Crawl
( getPageJavaScript
) where
import WebBits.Common
import Control.Monad
import Data.Char (toLower)
import Data.Generics
import System.IO
import Text.ParserCombinators.Parsec(parse,setPosition,incSourceColumn,Column,sourceLine,sourceColumn)
import WebBits.Html.Syntax
import qualified WebBits.JavaScript.JavaScript as Js
-- |Returns the source of the script.
scriptSrc:: Js.ParsedJsHtml -> [String]
scriptSrc (Element tag attrs _ _) | (map toLower tag) == "script" =
case attributeValue "src" attrs of -- TODO: Check for type="javascript"?
Just "" -> []
Just url -> [url]
Nothing -> []
scriptSrc _ =
[]
-- |Returns a list of URIs for external Javascript files referenced in the page.
importedScripts:: Js.ParsedJsHtml -> [String]
importedScripts = everything (++) (mkQ [] scriptSrc)
-- |Returns the top-level statements of a script.
scriptText :: Js.ParsedJsHtml -> [Js.ParsedStatement]
scriptText (Script (Js.Script _ stmts) _) = stmts
scriptText _ = []
eventHandlers :: [String]
eventHandlers = ["onload","onclick"];
-- ,"onmousemove","onmouseover","onmousedown","onmouseout","onmouseup","onselectstart", "onkeypress"]
attrScript :: Attribute SourcePos Js.ParsedJavaScript
-> IO [Js.ParsedStatement]
attrScript (Attribute id val loc) | id `elem` eventHandlers = do
let eventId = drop 2 id -- drop the "on" prefix
let scriptText = if "javascript:" `isPrefixOf` val then drop 11 val else val
let eventListenerPrefix = "addEventListener('" ++ eventId ++ "', function(event) { "
let prefixLen = length eventListenerPrefix
let eventListenerText = eventListenerPrefix ++ scriptText ++ " });"
let parser = do
setPosition (incSourceColumn loc (-prefixLen))
Js.parseExpression
case parse parser (sourceName loc) eventListenerText of
Left err -> do
fail $ "Error parsiing JavaScript in an attribute at " ++ show loc ++
"\nThe script was:\n\n" ++ eventListenerText
Right e -> return [Js.ExprStmt loc e]
attrScript _ = return []
inpageAttrScripts :: Js.ParsedJsHtml -> IO [Js.ParsedStatement]
inpageAttrScripts = everything (liftM2 (++)) (mkQ (return []) attrScript)
inpageScripts :: Js.ParsedJsHtml -> [Js.ParsedStatement]
inpageScripts = everything (++) (mkQ [] scriptText)
parseJsFile path = do
text <- readFile path
case Js.parseScriptFromString path text of
Left err -> fail (show err)
Right js -> hPutStrLn stderr ("Read file " ++ path) >> return js
-- |Given an HTML page, crawls all external Javascript files and returns a list
-- of statements, concatenated from all files.
getPageJavascript:: Js.ParsedJsHtml -> IO [Js.ParsedStatement]
getPageJavascript page = do
let importURIs = importedScripts page
let inpageJs = inpageScripts page
attrScripts <- inpageAttrScripts page
importedScripts <- mapM parseJsFile importURIs
return $ (concatMap Js.scriptStatements importedScripts ++ attrScripts) ++ inpageJs
getPageJavaScript:: Js.ParsedJsHtml -> IO [Js.ParsedStatement] -- monomorphism
getPageJavaScript = getPageJavascript