-- ------------------------------------------------------------ {- | Module : Text.XML.HXT.Arrow.DocumentOutput Copyright : Copyright (C) 2005-9 Uwe Schmidt License : MIT Maintainer : Uwe Schmidt (uwe@fh-wedel.de) Stability : experimental Portability: portable State arrows for document output -} -- ------------------------------------------------------------ module Text.XML.HXT.Arrow.DocumentOutput ( putXmlDocument , putXmlTree , putXmlSource , encodeDocument , encodeDocument' ) where import Control.Arrow import Control.Arrow.ArrowExc import Control.Arrow.ArrowIf import Control.Arrow.ArrowIO import Control.Arrow.ArrowList import Control.Arrow.ArrowTree import Control.Arrow.ListArrow import qualified Data.ByteString.Lazy as BS import Data.Maybe import Data.String.Unicode (getOutputEncodingFct') import Text.XML.HXT.DOM.Interface import qualified Text.XML.HXT.DOM.ShowXml as XS import Text.XML.HXT.Arrow.Edit (addHeadlineToXmlDoc, addXmlPi, addXmlPiEncoding, escapeHtmlRefs, escapeXmlRefs, indentDoc, numberLinesInXmlDoc, treeRepOfXmlDoc) import Text.XML.HXT.Arrow.XmlArrow import Text.XML.HXT.Arrow.XmlState import Text.XML.HXT.Arrow.XmlState.TypeDefs import System.IO (Handle, IOMode (..), hClose, hPutStrLn, hSetBinaryMode, openBinaryFile, openFile, stdout) -- ------------------------------------------------------------ -- -- | Write the contents of a document tree into an output stream (file or stdout). -- -- If textMode is set, writing is done with Haskell string output, else (default) -- writing is done with lazy ByteString output putXmlDocument :: Bool -> String -> IOStateArrow s XmlTree XmlTree putXmlDocument textMode dst = perform putDoc where putDoc = ( if textMode then ( xshow getChildren >>> tryA (arrIO (\ s -> hPutDocument (\h -> hPutStrLn h s))) ) else ( xshowBlob getChildren >>> tryA (arrIO (\ s -> hPutDocument (\h -> do BS.hPutStr h s BS.hPutStr h (stringToBlob "\n") ) ) ) ) ) >>> ( ( traceMsg 1 ("io error, document not written to " ++ outFile) >>> arr show >>> mkError c_fatal >>> filterErrorMsg ) ||| ( traceMsg 2 ("document written to " ++ outFile ++ ", textMode = " ++ show textMode) >>> none ) ) where isStdout = null dst || dst == "-" outFile = if isStdout then "stdout" else show dst hPutDocument :: (Handle -> IO ()) -> IO () hPutDocument action | isStdout = do hSetBinaryMode stdout (not textMode) action stdout hSetBinaryMode stdout False | otherwise = do handle <- ( if textMode then openFile else openBinaryFile ) dst WriteMode action handle hClose handle -- | -- write the tree representation of a document to a file putXmlTree :: String -> IOStateArrow s XmlTree XmlTree putXmlTree dst = perform ( treeRepOfXmlDoc >>> addHeadlineToXmlDoc >>> putXmlDocument True dst ) -- | -- write a document with indentaion and line numers putXmlSource :: String -> IOStateArrow s XmlTree XmlTree putXmlSource dst = perform ( (this ) `whenNot` isRoot >>> indentDoc >>> numberLinesInXmlDoc >>> addHeadlineToXmlDoc >>> putXmlDocument True dst ) -- ------------------------------------------------------------ getEncodingParam :: IOStateArrow s XmlTree String getEncodingParam = catA [ getSysVar theOutputEncoding -- 4. guess: take output encoding parameter from global state , getSysVar theInputEncoding -- 5. guess: take encoding parameter from global state , constA utf8 -- default : utf8 ] >. (head . filter (not . null)) getOutputEncoding :: String -> IOStateArrow s XmlTree String getOutputEncoding defaultEnc = getEC $< getEncodingParam where getEC enc' = fromLA $ getOutputEncoding' defaultEnc enc' encodeDocument :: Bool -> Bool -> String -> IOStateArrow s XmlTree XmlTree encodeDocument quoteXml supressXmlPi defaultEnc = encode $< getOutputEncoding defaultEnc where encode enc = traceMsg 2 ("encodeDocument: encoding is " ++ show enc) >>> ( encodeDocument' quoteXml supressXmlPi enc `orElse` ( issueFatal ("encoding scheme not supported: " ++ show enc) >>> setDocumentStatusFromSystemState "encoding document" ) ) -- ------------------------------------------------------------ isBinaryDoc :: LA XmlTree XmlTree isBinaryDoc = ( ( getAttrValue transferMimeType >>^ stringToLower ) >>> isA (\ t -> not (null t || isTextMimeType t || isXmlMimeType t)) ) `guards` this getOutputEncoding' :: String -> String -> LA XmlTree String getOutputEncoding' defaultEnc defaultEnc2 = catA [ isBinaryDoc >>> -- 0. guess: binary data found: no encoding at all constA isoLatin1 -- the content should usually be a blob -- this handling is like the decoding in DocumentInput, -- there nothing is decoded for non text or non xml contents , getChildren -- 1. guess: evaluate >>> ( ( isPi >>> hasName t_xml ) `guards` getAttrValue a_encoding ) , constA defaultEnc -- 2. guess: explicit parameter, may be "" , getAttrValue a_output_encoding -- 3. guess: take output encoding parameter in root node , constA defaultEnc2 -- default : UNICODE or utf8 ] >. (head . filter (not . null)) -- make the filter deterministic: take 1. entry from list of guesses encodeDocument' :: ArrowXml a => Bool -> Bool -> String -> a XmlTree XmlTree encodeDocument' quoteXml supressXmlPi defaultEnc = fromLA (encode $< getOutputEncoding' defaultEnc utf8) where encode :: String -> LA XmlTree XmlTree encode encodingScheme | encodingScheme == unicodeString = replaceChildren ( (getChildren >. XS.xshow'' cQuot aQuot) >>> mkText ) | isNothing encodeFct = none | otherwise = ( if supressXmlPi then processChildren (none `when` isXmlPi) else ( addXmlPi >>> addXmlPiEncoding encodingScheme ) ) >>> ( isLatin1Blob `orElse` encodeDoc (fromJust encodeFct) ) >>> addAttr a_output_encoding encodingScheme where (cQuot, aQuot) | quoteXml = escapeXmlRefs | otherwise = escapeHtmlRefs encodeFct = getOutputEncodingFct' encodingScheme encodeDoc ef = replaceChildren ( xshowBlobWithEnc cQuot aQuot ef getChildren >>> mkBlob ) xshowBlobWithEnc cenc aenc enc f = f >. XS.xshow' cenc aenc enc -- if encoding scheme is isolatin1 and the contents is a single blob (bytestring) -- the encoding is the identity. -- This optimization enables processing (copying) of none XML contents -- without any conversions from and to strings isLatin1Blob | encodingScheme /= isoLatin1 = none | otherwise = childIsSingleBlob `guards` this where childIsSingleBlob = listA getChildren >>> isA (length >>> (== 1)) >>> unlistA >>> isBlob -- ------------------------------------------------------------