{-# LANGUAGE TypeSynonymInstances #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Webrexp.HaXmlNode( HaXmLNode ) where

import Control.Applicative
import Control.Monad.IO.Class
import Data.Maybe( catMaybes )
import Network.HTTP
import Network.URI
import Text.XML.HaXml.Types
import Text.XML.HaXml.Posn
import Text.XML.HaXml.Parse
import Text.XML.HaXml.Html.Parse
import System.Directory

import qualified Webrexp.ProjectByteString as B

import Webrexp.GraphWalker
import Webrexp.ResourcePath
import Webrexp.Remote.MimeTypes
import Webrexp.UnionNode

type HaXmLNode = Content Posn

instance PartialGraph HaXmLNode ResourcePath where
    dummyElem = undefined

    isResourceParseable _ _ ParseableHTML = True
    isResourceParseable _ _ ParseableXML = True
    isResourceParseable _ _ _ = False

    parseResource _ ParseableHTML bindata = Just $ CElem e noPos
        where (Document _prolog _ e _) = htmlParse "" $ B.unpack bindata
    parseResource _ ParseableXML bindata =
        case xmlParse' "" $ B.unpack bindata of
            Left _ -> Nothing
            Right (Document _prolog _ e _) -> Just $ CElem e noPos
    parseResource _ _ _ = error "Cannot parse"

instance GraphWalker HaXmLNode ResourcePath where
    accessGraph = loadHtml


    attribOf attrName (CElem (Elem _ attrList _) _) =
        show <$> lookup attrName attrList
    attribOf _ _ = Nothing

    childrenOf = return . pureChildren

    nameOf (CElem (Elem n _ _) _) = Just n
    nameOf _ = Nothing

    indirectLinks n =
        catMaybes [ attribOf "href" n >>= importPath
                  , attribOf "src" n >>= importPath ]

    isHistoryMutable _ = False

    valueOf (CString _ sdata _) = sdata
    valueOf a = case pureChildren a of
       (CString _ txt _:_) -> txt
       _ -> ""

pureChildren :: HaXmLNode -> [HaXmLNode]
pureChildren (CElem (Elem _ _ children) _) = children
pureChildren _ = []

parserOfKind :: Maybe ParseableType
             -> ResourcePath
             -> B.ByteString -> AccessResult HaXmLNode ResourcePath
parserOfKind Nothing datapath = DataBlob datapath
parserOfKind (Just ParseableHTML) datapath = \file ->
    let (Document _prolog _ e _) = htmlParse "" $ B.unpack file
    in Result datapath $ CElem e noPos
parserOfKind (Just ParseableXML) datapath = \file ->
    case xmlParse' "" $ B.unpack file of
         Left _ -> AccessError
         Right (Document _prolog _ e _) -> Result datapath $ CElem e noPos
parserOfKind (Just ParseableJson) datapath = DataBlob datapath

-- | Given a resource path, do the required loading
loadHtml :: (MonadIO m)
         => Loggers -> ResourcePath
         -> m (AccessResult HaXmLNode ResourcePath)
loadHtml (logger, _errLog, _verbose) datapath@(Local s) = do
    liftIO . logger $ "Opening file : '" ++ s ++ "'"
    realFile <- liftIO $ doesFileExist s
    if not realFile
       then return AccessError
       else do file <- liftIO $ B.readFile s
       	       let kind = getParseKind s
       	       return $ parserOfKind kind datapath file

loadHtml loggers@(logger, _, verbose) datapath@(Remote uri) = do
  liftIO . logger $ "Downloading URL : '" ++ show uri ++ "'"
  (u, rsp) <- downloadBinary loggers uri
  let contentType = retrieveHeaders HdrContentType rsp
  case contentType of
    [] -> return AccessError
    (hdr:_) ->
       let logString = "Downloaded (" ++ show u ++ ") ["
                        ++ hdrValue hdr ++ "] "
           
       	   kind = getParseKind (uriPath uri)
       in do liftIO $ verbose logString
             return . parserOfKind kind datapath $ rspBody rsp