module Text.XML.HXT.Parser.XmlInput
( getXmlContents
, getXmlEntityContents
, getUrlContents
, getContentLength
, guessDocEncoding
, runInLocalURIContext
, runInNewURIContext
, getBaseURI
, setBaseURI
, getAbsolutURI
, isStandaloneDocument
)
where
import Text.XML.HXT.DOM.XmlTree
import Text.XML.HXT.DOM.XmlState
import Text.XML.HXT.Parser.DefaultURI
( setDefaultURI
)
import Text.XML.HXT.Parser.XmlParser
( parseXmlDocEncodingSpec
, parseXmlEntityEncodingSpec
, removeEncodingSpec
)
import Text.XML.HXT.DOM.Unicode
( getDecodingFct
, guessEncoding
, normalizeNL
)
import Text.XML.HXT.Parser.XmlOutput
( traceTree
, traceSource
, traceMsg
)
import Text.XML.HXT.Parser.ProtocolHandler
( getProtocolHandler
)
import Network.URI
( parseURIReference
, relativeTo
, uriScheme
)
import Data.Maybe
getXmlContents' :: XmlFilter -> XmlStateFilter a
getXmlContents' parseEncodingSpec
= getContent'
`whenM` isRoot
where
getContent' t'
= ( liftMf (setStatus c_ok "getXmlContents")
.>>
getUrlContents
.>>
liftF parseEncodingSpec
.>>
guessDocEncoding
.>>
traceMsg 1 ("getXmlContents: content read and decoded from " ++ show input')
.>>
traceTree
.>>
traceSource
) t'
where
input' = valueOf a_source t'
getXmlContents :: XmlStateFilter a
getXmlContents
= getXmlContents' parseXmlDocEncodingSpec
.>>
setBaseURIFilter
.>>
setStandAloneFilter
getXmlEntityContents :: XmlStateFilter a
getXmlEntityContents
= getXmlContents' parseXmlEntityEncodingSpec
.>>
liftF (processChildren removeEncodingSpec)
.>>
setBaseURIFilter
setBaseURIFilter :: XmlStateFilter a
setBaseURIFilter
= performAction (\ t -> setBaseURI (valueOf transferURI t))
`whenM` isRoot
runInLocalURIContext :: XmlStateFilter a -> XmlStateFilter a
runInLocalURIContext f t
= do
oldContext <- getBaseURI
trace 2 ("runInLocalURIContext: save base URI " ++ show oldContext)
res <- f t
setBaseURI oldContext
trace 2 ("runInLocalURIContext: restore base URI " ++ show oldContext)
return res
runInNewURIContext :: String -> XmlStateFilter a -> XmlStateFilter a
runInNewURIContext uri f
| null uri
= f
| otherwise
= runInLocalURIContext
( \ t -> ( do
trace 2 ("runInNewURIContext: new base URI " ++ show uri)
setBaseURI uri
f t
)
)
guessDocEncoding :: XmlStateFilter a
guessDocEncoding
= addDocEncoding
`whenM` isRoot
where
addDocEncoding n'
= do
trace 2 ( "guessDocEncoding: encoding is " ++ show guess)
( encFilter (getDecodingFct guess)
.>>
issueError
.>>
liftMf (addAttr transferEncoding guess) ) n'
where
guess :: String
guess = head . filter (not . null)
$ [ (guessEncoding . showXText . getChildren) n'
, valueOf transferEncoding n'
, valueOf a_encoding n'
, utf8
]
encFilter (Just fct)
= decodeDoc fct
encFilter Nothing
= addFatal ("encoding scheme not supported: " ++ show guess)
decodeDoc df n''
| null errs = liftMf (replaceChildren (xtext (normalizeNL res))) $ n''
| otherwise = ( liftMf (replaceChildren (xtext ""))
.>>
( issueDecodingErrs
+++>>
addFatal "decoding errors detected"
)
) n''
where
str = xshow . getChildren $ n''
(res, errs) = df str
issueDecodingErrs = catM . map (issueErr . (guess ++) . (" encoding error" ++)) $ errs
getDefaultURI :: XState state String
getDefaultURI
= do
uri <- getSysParam transferDefaultURI
if null uri
then do
setDefaultURI
getDefaultURI
else return uri
setBaseURI :: String -> XState state ()
setBaseURI str
= do
trace 2 ("setBaseURI: new base URI: " ++ show str)
setSysParam transferURI str
getBaseURI :: XState state String
getBaseURI
= do
uri <- getSysParam transferURI
if null uri
then do
res <- getDefaultURI
setBaseURI res
getBaseURI
else return uri
getAbsolutURI :: String -> XState state String
getAbsolutURI uri
= do
baseUri <- getBaseURI
return $ expandURI uri baseUri
expandURI :: String -> String -> String
expandURI uri base
= fromMaybe "" $ expand
where
expand = do
base' <- parseURIReference base
uri' <- parseURIReference uri
abs' <- relativeTo uri' base'
return $ show abs'
setStandAloneFilter :: XmlStateFilter a
setStandAloneFilter
= performAction setStandAlone
where
setStandAlone t
= do
if null standalone
then return ()
else do
trace 2 ("setStandAloneFilter: standalone=" ++ show standaloneVal)
setSysParam a_standalone standaloneVal
where
standalone = getValue a_standalone t
standaloneVal = showXText standalone
isStandaloneDocument :: XState state Bool
isStandaloneDocument
= do
val <- getSysParam a_standalone
return (val == "yes")
getUrlContents :: XmlStateFilter a
getUrlContents
= getCont
`whenM` isRoot
where
getCont n'
= do
trace 1 ("getUrlContent: reading " ++ show src)
uri <- getAbsolutURI src
if null uri
then urlErr ( "illegal URI for input: " ++ show src )
else let
uri' = fromJust $ parseURIReference uri
proto = init . uriScheme $ uri'
handler = getProtocolHandler proto
in
( liftMf (addAttr transferProtocol proto
.>
addAttr transferURI uri
)
.>>
handler uri'
) $ n'
where
src = valueOf a_source n'
urlErr msg = addFatal msg n'
getContentLength :: XmlFilter
getContentLength
= addAttrl contentLengthAttr
`when` isRoot
where
http_contentLength = httpPrefix ++ "Content-Length"
contentLengthAttr :: XmlFilter
contentLengthAttr t
= choice [ hasAttr a_contentLength
:-> none
, hasAttr http_contentLength
:-> mkXAttr a_contentLength (getValue http_contentLength)
, this
:-> mkXAttr a_contentLength getLength
] t
getLength :: XmlFilter
getLength t
= xtext (show . length . xshow . getChildren $ t)