module Text.Webrexp.HaXmlNode( HaXmLNode ) where
import Control.Applicative
import Data.Maybe( catMaybes )
import Data.List( find )
import Network.HTTP
import Network.URI
import Text.XML.HaXml.Types
import Text.XML.HaXml.Posn
import Text.XML.HaXml.Parse
import Text.XML.HaXml.Html.Parse
import System.Directory
import qualified Text.Webrexp.ProjectByteString as B
import Text.Webrexp.IOMock
import Text.Webrexp.GraphWalker
import Text.Webrexp.ResourcePath
import Text.Webrexp.Remote.MimeTypes
import Text.Webrexp.UnionNode
type HaXmLNode = Content Posn
instance PartialGraph HaXmLNode ResourcePath where
isResourceParseable _ _ ParseableXML = True
isResourceParseable _ _ _ = False
parseResource _ _ ParseableXML bindata =
case xmlParse' "" $ B.unpack bindata of
Left _ -> return Nothing
Right (Document _prolog _ e _) -> return . Just $ CElem e noPos
parseResource _ _ _ _ = error "Cannot parse"
haxmlNameToString :: QName -> String
haxmlNameToString (N n) = n
haxmlNameToString (QN _ n) = n
instance GraphWalker HaXmLNode ResourcePath where
accessGraph = loadHtml
attribOf attrName (CElem (Elem _ attrList _) _) =
show . snd <$> find nameFinder attrList
where nameFinder (n,_) = haxmlNameToString n == attrName
attribOf _ _ = Nothing
childrenOf = return . pureChildren
nameOf (CElem (Elem n _ _) _) = Just $ haxmlNameToString n
nameOf _ = Nothing
indirectLinks n =
catMaybes [ attribOf "href" n >>= importPath
, attribOf "src" n >>= importPath ]
isHistoryMutable _ = False
valueOf (CString _ sdata _) = sdata
valueOf a = case pureChildren a of
(CString _ txt _:_) -> txt
_ -> ""
rawAccess = accessResourcePath
pureChildren :: HaXmLNode -> [HaXmLNode]
pureChildren (CElem (Elem _ _ children) _) = children
pureChildren _ = []
parserOfKind :: Maybe ParseableType
-> ResourcePath
-> B.ByteString -> AccessResult HaXmLNode ResourcePath
parserOfKind Nothing datapath = DataBlob datapath
parserOfKind (Just ParseableHTML) datapath = \file ->
let (Document _prolog _ e _) = htmlParse "" $ B.unpack file
in Result datapath $ CElem e noPos
parserOfKind (Just ParseableXML) datapath = \file ->
case xmlParse' "" $ B.unpack file of
Left _ -> AccessError
Right (Document _prolog _ e _) -> Result datapath $ CElem e noPos
parserOfKind (Just ParseableJson) datapath = DataBlob datapath
loadHtml :: (Monad m, IOMockable m)
=> Loggers m -> ResourcePath
-> m (AccessResult HaXmLNode ResourcePath)
loadHtml (logger, _errLog, _verbose) datapath@(Local s) = do
logger $ "Opening file : '" ++ s ++ "'"
realFile <- performIO $ doesFileExist s
case realFile of
Just True -> performIO (B.readFile s) >>=
maybe (return AccessError)
(let kind = getParseKind s
in return . parserOfKind kind datapath)
_ -> return AccessError
loadHtml loggers@(logger, _, verbose) datapath@(Remote uri) = do
logger $ "Downloading URL : '" ++ show uri ++ "'"
(u, rsp) <- downloadBinary loggers uri
let contentType = retrieveHeaders HdrContentType rsp
case contentType of
[] -> return AccessError
(hdr:_) ->
let logString = "Downloaded (" ++ show u ++ ") ["
++ hdrValue hdr ++ "] "
kind = getParseKind (uriPath uri)
in do verbose logString
return . parserOfKind kind datapath $ rspBody rsp