module BrownPLT.JavaScript.Crawl
( getPageJavaScript
) where
import Control.Monad
import Data.List
import Data.Char (toLower)
import Data.Generics
import System.IO
import Text.ParserCombinators.Parsec.Pos (SourcePos, sourceName)
import Text.ParserCombinators.Parsec(parse,setPosition,incSourceColumn,Column,sourceLine,sourceColumn)
import BrownPLT.Html.Syntax
import qualified BrownPLT.JavaScript as Js
instance Typeable SourcePos where
typeOf _ =
mkTyConApp (mkTyCon "Text.ParserCombinators.Parsec.Pos.SourcePos") []
instance Data SourcePos where
gfoldl k z pos = z pos
toConstr _ = sourcePosConstr1 where
sourcePosConstr1 = mkConstr sourcePosDatatype "SourcePos" [] Prefix
sourcePosDatatype = mkDataType "SourcePos" [sourcePosConstr1]
gunfold = error "gunfold is not defined for SourcePos"
dataTypeOf = error "dataTypeOf is not defined for SourcePos"
scriptSrc:: Js.ParsedJsHtml -> [String]
scriptSrc (Element tag attrs _ _) | (map toLower tag) == "script" =
case attributeValue "src" attrs of
Just "" -> []
Just url -> [url]
Nothing -> []
scriptSrc _ =
[]
importedScripts:: Js.ParsedJsHtml -> [String]
importedScripts = everything (++) (mkQ [] scriptSrc)
scriptText :: Js.ParsedJsHtml -> [Js.ParsedStatement]
scriptText (Script (Js.Script _ stmts) _) = stmts
scriptText _ = []
eventHandlers :: [String]
eventHandlers = ["onload","onclick"];
attrScript :: Attribute SourcePos Js.ParsedJavaScript
-> IO [Js.ParsedStatement]
attrScript (Attribute id val loc) | id `elem` eventHandlers = do
let eventId = drop 2 id
let scriptText = if "javascript:" `isPrefixOf` val then drop 11 val else val
let eventListenerPrefix = "addEventListener('" ++ eventId ++ "', function(event) { "
let prefixLen = length eventListenerPrefix
let eventListenerText = eventListenerPrefix ++ scriptText ++ " });"
let parser = do
setPosition (incSourceColumn loc (prefixLen))
Js.parseExpression
case parse parser (sourceName loc) eventListenerText of
Left err -> do
fail $ "Error parsiing JavaScript in an attribute at " ++ show loc ++
"\nThe script was:\n\n" ++ eventListenerText
Right e -> return [Js.ExprStmt loc e]
attrScript _ = return []
inpageAttrScripts :: Js.ParsedJsHtml -> IO [Js.ParsedStatement]
inpageAttrScripts = everything (liftM2 (++)) (mkQ (return []) attrScript)
inpageScripts :: Js.ParsedJsHtml -> [Js.ParsedStatement]
inpageScripts = everything (++) (mkQ [] scriptText)
parseJsFile path = do
text <- readFile path
case Js.parseScriptFromString path text of
Left err -> fail (show err)
Right js -> hPutStrLn stderr ("Read file " ++ path) >> return js
getPageJavascript:: Js.ParsedJsHtml -> IO [Js.ParsedStatement]
getPageJavascript page = do
let importURIs = importedScripts page
let inpageJs = inpageScripts page
attrScripts <- inpageAttrScripts page
importedScripts <- mapM parseJsFile importURIs
let unScript (Js.Script _ ss) = ss
return $ (concatMap unScript importedScripts ++ attrScripts) ++ inpageJs
getPageJavaScript:: Js.ParsedJsHtml -> IO [Js.ParsedStatement]
getPageJavaScript = getPageJavascript