-- |Crawls an HTML page for JavaScript module BrownPLT.JavaScript.Crawl ( getPageJavaScript ) where import Control.Monad import Data.List import Data.Char (toLower) import Data.Generics import System.IO import Text.ParserCombinators.Parsec.Pos (SourcePos, sourceName) import Text.ParserCombinators.Parsec(parse,setPosition,incSourceColumn,Column,sourceLine,sourceColumn) import BrownPLT.Html.Syntax import qualified BrownPLT.JavaScript as Js import BrownPLT.JavaScript.HtmlEmbedding instance Typeable SourcePos where typeOf _ = mkTyConApp (mkTyCon "Text.ParserCombinators.Parsec.Pos.SourcePos") [] -- Complete guesswork. It seems to work. -- This definition is incomplete. instance Data SourcePos where -- We treat source locations as opaque. After all, we don't have access to -- the constructor. gfoldl k z pos = z pos toConstr _ = sourcePosConstr1 where sourcePosConstr1 = mkConstr sourcePosDatatype "SourcePos" [] Prefix sourcePosDatatype = mkDataType "SourcePos" [sourcePosConstr1] gunfold = error "gunfold is not defined for SourcePos" dataTypeOf = error "dataTypeOf is not defined for SourcePos" -- |Returns the source of the script. scriptSrc:: ParsedJsHtml -> [String] scriptSrc (Element tag attrs _ _) | (map toLower tag) == "script" = case attributeValue "src" attrs of -- TODO: Check for type="javascript"? Just "" -> [] Just url -> [url] Nothing -> [] scriptSrc _ = [] -- |Returns a list of URIs for external Javascript files referenced in the page. importedScripts:: ParsedJsHtml -> [String] importedScripts = everything (++) (mkQ [] scriptSrc) -- |Returns the top-level statements of a script. scriptText :: ParsedJsHtml -> [ParsedStatement] scriptText (Script (Js.Script _ stmts) _) = stmts scriptText _ = [] eventHandlers :: [String] eventHandlers = ["onload","onclick"]; -- ,"onmousemove","onmouseover","onmousedown","onmouseout","onmouseup","onselectstart", "onkeypress"] attrScript :: Attribute SourcePos ParsedJavaScript -> IO [ParsedStatement] attrScript (Attribute id val loc) | id `elem` eventHandlers = do let eventId = drop 2 id -- drop the "on" prefix let scriptText = if "javascript:" `isPrefixOf` val then drop 11 val else val let eventListenerPrefix = "addEventListener('" ++ eventId ++ "', function(event) { " let prefixLen = length eventListenerPrefix let eventListenerText = eventListenerPrefix ++ scriptText ++ " });" let parser = do setPosition (incSourceColumn loc (-prefixLen)) Js.parseExpression case parse parser (sourceName loc) eventListenerText of Left err -> do fail $ "Error parsiing JavaScript in an attribute at " ++ show loc ++ "\nThe script was:\n\n" ++ eventListenerText Right e -> return [Js.ExprStmt loc e] attrScript _ = return [] inpageAttrScripts :: ParsedJsHtml -> IO [ParsedStatement] inpageAttrScripts = everything (liftM2 (++)) (mkQ (return []) attrScript) inpageScripts :: ParsedJsHtml -> [ParsedStatement] inpageScripts = everything (++) (mkQ [] scriptText) parseJsFile path = do text <- readFile path case Js.parseScriptFromString path text of Left err -> fail (show err) Right js -> hPutStrLn stderr ("Read file " ++ path) >> return js -- |Given an HTML page, crawls all external Javascript files and returns a list -- of statements, concatenated from all files. getPageJavascript:: ParsedJsHtml -> IO [ParsedStatement] getPageJavascript page = do let importURIs = importedScripts page let inpageJs = inpageScripts page attrScripts <- inpageAttrScripts page importedScripts <- mapM parseJsFile importURIs let unScript (Js.Script _ ss) = ss return $ (concatMap unScript importedScripts ++ attrScripts) ++ inpageJs getPageJavaScript:: ParsedJsHtml -> IO [ParsedStatement] -- monomorphism getPageJavaScript = getPageJavascript