-- |Crawls an HTML page for JavaScript
module WebBits.JavaScript.Crawl 
  ( getPageJavaScript
  ) where

import WebBits.Common
import Control.Monad
import Data.Char (toLower)
import Data.Generics
import System.IO
import Text.ParserCombinators.Parsec(parse,setPosition,incSourceColumn,Column,sourceLine,sourceColumn)

import WebBits.Html.Syntax
import qualified WebBits.JavaScript.JavaScript as Js

-- |Returns the source of the script.
scriptSrc:: Js.ParsedJsHtml -> [String]
scriptSrc (Element tag attrs _ _) | (map toLower tag) == "script" =
  case attributeValue "src" attrs of -- TODO: Check for type="javascript"?
    Just ""  -> []
    Just url -> [url]
    Nothing  -> []
scriptSrc _ =
  []

-- |Returns a list of URIs for external Javascript files referenced in the page.
importedScripts:: Js.ParsedJsHtml -> [String]
importedScripts = everything (++) (mkQ [] scriptSrc)

-- |Returns the top-level statements of a script.
scriptText :: Js.ParsedJsHtml -> [Js.ParsedStatement]
scriptText (Script (Js.Script _ stmts) _) = stmts
scriptText _ = []

eventHandlers :: [String]
eventHandlers = ["onload","onclick"]; 
-- ,"onmousemove","onmouseover","onmousedown","onmouseout","onmouseup","onselectstart", "onkeypress"]

attrScript :: Attribute SourcePos Js.ParsedJavaScript 
           -> IO [Js.ParsedStatement]
attrScript (Attribute id val loc) | id `elem` eventHandlers = do
  let eventId = drop 2 id -- drop the "on" prefix
  let scriptText = if "javascript:" `isPrefixOf` val then drop 11 val else val
  let eventListenerPrefix = "addEventListener('" ++ eventId ++ "', function(event) { "
  let prefixLen = length eventListenerPrefix
  let eventListenerText = eventListenerPrefix ++ scriptText ++ " });"
  let parser = do
        setPosition (incSourceColumn loc (-prefixLen))
        Js.parseExpression
  case parse parser (sourceName loc) eventListenerText of
    Left err -> do
      fail $ "Error parsiing JavaScript in an attribute at " ++ show loc ++
             "\nThe script was:\n\n" ++ eventListenerText
    Right e -> return [Js.ExprStmt loc e]
attrScript _ = return []

inpageAttrScripts :: Js.ParsedJsHtml -> IO [Js.ParsedStatement]
inpageAttrScripts = everything (liftM2 (++)) (mkQ (return []) attrScript)

inpageScripts :: Js.ParsedJsHtml -> [Js.ParsedStatement]
inpageScripts = everything (++) (mkQ [] scriptText)

parseJsFile path = do
  text <- readFile path
  case Js.parseScriptFromString path text of
    Left err -> fail (show err)
    Right js -> hPutStrLn stderr ("Read file " ++ path) >> return js

-- |Given an HTML page, crawls all external Javascript files and returns a list
-- of statements, concatenated from all files.
getPageJavascript:: Js.ParsedJsHtml -> IO [Js.ParsedStatement]
getPageJavascript page = do
  let importURIs = importedScripts page
  let inpageJs   = inpageScripts page
  attrScripts <- inpageAttrScripts page
  importedScripts <- mapM parseJsFile importURIs
  return $ (concatMap Js.scriptStatements importedScripts ++ attrScripts) ++ inpageJs

getPageJavaScript:: Js.ParsedJsHtml -> IO [Js.ParsedStatement] -- monomorphism
getPageJavaScript = getPageJavascript