-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Arrow.WriteDocument
   Copyright  : Copyright (C) 2005-9 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : experimental
   Portability: portable

   Compound arrow for writing XML documents

-}

-- ------------------------------------------------------------

module Text.XML.HXT.Arrow.WriteDocument
    ( writeDocument
    , writeDocumentToString
    , prepareContents
    )
where

import Control.Arrow				-- arrow classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree

import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlIOStateArrow

import Text.XML.HXT.Arrow.Edit			( escapeHtmlDoc
						, escapeXmlDoc
						, haskellRepOfXmlDoc
						, indentDoc
                                                , addDefaultDTDecl
						, preventEmptyElements
						, removeDocWhiteSpace
						, treeRepOfXmlDoc
						)

import Text.XML.HXT.Arrow.DocumentOutput	( putXmlDocument
						, encodeDocument
						, encodeDocument'
						)

-- ------------------------------------------------------------

{- |
the main filter for writing documents

this filter can be configured by an option list like 'Text.XML.HXT.Arrow.ReadDocument.readDocument'

usage: @ writeDocument optionList destination @

if @ destination @ is the empty string or \"-\", stdout is used as output device

available options are

* 'a_indent' : indent document for readability, (default: no indentation)

- 'a_remove_whitespace' : remove all redundant whitespace for shorten text (default: no removal)

- 'a_output_encoding' : encoding of document, default is 'a_encoding' or 'utf8'

- 'a_output_xml' : (default) issue XML: quote special XML chars \>,\<,\",\',& where neccessary
                   add XML processing instruction
                   and encode document with respect to 'a_output_encoding',
                   if explicitly switched of, the plain text is issued, this is useful
                   for non XML output, e.g. generated Haskell code, LaTex, Java, ...

- 'a_output_html' : issue XHTML: quote alle XML chars, use HTML entity refs or char refs for none ASCII chars

- 'a_no_empty_elements' : do not write the short form \<name .../\> for empty elements. When 'a_output_html' is set,
                          the always empty HTML elements are still written in short form, but not the others, as e.g. the script element.
                          Empty script elements, like \<script href=\"...\"/\>, are always a problem for firefox and others.
                          When XML output is generated with this option, all empty elements are written in the long form.

- 'a_no_empty_elem_for' : do not generate empty elements for the element names given in the comma separated list of this option value.
                          This option overwrites the above described 'a_no_empty_elements' option

- 'a_add_default_dtd' : if the document to be written was build by reading another document containing a Document Type Declaration,
                        this DTD is inserted into the output document (default: no insert)

- 'a_no_xml_pi' : suppress generation of \<?xml ... ?\> processing instruction

- 'a_show_tree' : show tree representation of document (for debugging)

- 'a_show_haskell' : show Haskell representaion of document (for debugging)

 a minimal main program for copying a document
 has the following structure:

> module Main
> where
> 
> import Text.XML.HXT.Arrow
> 
> main        :: IO ()
> main
>     = do
>       runX ( readDocument  [] "hello.xml"
>              >>>
>              writeDocument [] "bye.xml"
>            )
>       return ()

an example for copying a document to standard output with tracing and evaluation of
error code is:

> module Main
> where
> 
> import Text.XML.HXT.Arrow
> import System.Exit
> 
> main        :: IO ()
> main
>     = do
>       [rc] <- runX ( readDocument  [ (a_trace, "1")
>                                    ] "hello.xml"
>                      >>>
>                      writeDocument [ (a_output_encoding, isoLatin1)
>                                    ] "-"        -- output to stdout
>                      >>>
>                      getErrStatus
>                    )
>       exitWith ( if rc >= c_err
>                  then ExitFailure 1
>                  else ExitSuccess
>                )
-}

writeDocument	:: Attributes -> String -> IOStateArrow s XmlTree XmlTree
writeDocument userOptions dst
    = perform ( traceMsg 1 ("writeDocument: destination is " ++ show dst)
		>>>
		prepareContents userOptions encodeDocument
		>>>
		putXmlDocument textMode dst
		>>>
		traceMsg 1 "writeDocument: finished"
	      )
      `when`
      documentStatusOk
    where
    textMode	= optionIsSet a_text_mode userOptions

-- ------------------------------------------------------------

-- |
-- Convert a document into a string. Formating is done the same way
-- and with the same options as in 'writeDocument'. Default output encoding is
-- no encoding, that means the result is a normal unicode encode haskell string.
-- The default may be overwritten with the 'Text.XML.HXT.XmlKeywords.a_output_encoding' option.
-- The XML PI can be suppressed by the 'Text.XML.HXT.XmlKeywords.a_no_xml_pi' option.
--
-- This arrow fails, when the encoding scheme is not supported.
-- The arrow is pure, it does not run in the IO monad.
-- The XML PI is suppressed, if not explicitly turned on with an
-- option @ (a_no_xml_pi, v_0) @

writeDocumentToString	:: ArrowXml a => Attributes  -> a XmlTree String
writeDocumentToString userOptions
    = prepareContents ( addEntries
                        userOptions
                        [ (a_output_encoding, unicodeString)
                        , (a_no_xml_pi, v_1)
                        ]
                      ) encodeDocument'
      >>>
      xshow getChildren

-- ------------------------------------------------------------

-- |
-- indent and format output

prepareContents	:: ArrowXml a => Attributes -> (Bool -> String -> a XmlTree XmlTree) -> a XmlTree XmlTree
prepareContents userOptions encodeDoc
    = indent
      >>>
      addDtd
      >>>
      format
    where
    formatEmptyElems
        | not (null noEmptyElemFor)     = preventEmptyElements noEmptyElemFor
	| hasOption a_no_empty_elements
          ||
          hasOption a_output_xhtml      = preventEmptyElements []
	| otherwise                     = const this
    addDtd
        | hasOption a_add_default_dtd   = addDefaultDTDecl
        | otherwise                     = this
    indent
	| hasOption a_indent		= indentDoc			-- document indentation
	| hasOption a_remove_whitespace	= removeDocWhiteSpace		-- remove all whitespace between tags
	| otherwise			= this

    format
	| hasOption a_show_tree		= treeRepOfXmlDoc
	| hasOption a_show_haskell	= haskellRepOfXmlDoc
	| hasOption a_output_html	= formatEmptyElems True
					  >>>
					  escapeHtmlDoc			-- escape al XML and HTML chars >= 128
					  >>>
					  encodeDoc			-- convert doc into text with respect to output encoding with ASCII as default
					    suppressXmlPi ( lookupDef usAscii a_output_encoding options )
	| hasOption a_output_xml	= formatEmptyElems (hasOption a_output_xhtml)
					  >>>
					  escapeXmlDoc			-- escape lt, gt, amp, quot, 
					  >>>
					  encodeDoc			-- convert doc into text with respect to output encoding
					    suppressXmlPi ( lookupDef "" a_output_encoding options )
	| otherwise			= this

    suppressXmlPi							-- remove <?xml ... ?> when set
	= hasOption a_no_xml_pi

    noEmptyElemFor
        = words
          . map (\ c -> if c == ',' then ' ' else c)
          . lookup1 a_no_empty_elem_for
          $ options

    hasOption n
	= optionIsSet n options

    options = addEntries 
              userOptions 
	      [ ( a_indent,		v_0 )
	      , ( a_remove_whitespace,	v_0 )
	      , ( a_output_xml,		v_1 )
	      , ( a_show_tree,		v_0 )
	      , ( a_show_haskell,	v_0 )
	      , ( a_output_html,	v_0 )
	      , ( a_output_xhtml,	v_0 )
	      , ( a_no_xml_pi,          v_0 )
	      , ( a_no_empty_elements,  v_0 )
              , ( a_no_empty_elem_for,  ""  )
              , ( a_add_default_dtd,    v_0 )
	      ]

-- ------------------------------------------------------------