-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Arrow.WriteDocument
   Copyright  : Copyright (C) 2005-9 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : experimental
   Portability: portable

   Compound arrow for writing XML documents

-}

-- ------------------------------------------------------------

module Text.XML.HXT.Arrow.WriteDocument
    ( writeDocument
    , writeDocumentToString
    , prepareContents
    )
where

import Control.Arrow                            -- arrow classes
import Control.Arrow.ArrowList
import Control.Arrow.ArrowIf
import Control.Arrow.ArrowTree

import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlIOStateArrow

import Text.XML.HXT.Arrow.Edit                  ( escapeHtmlDoc
                                                , escapeXmlDoc
                                                , haskellRepOfXmlDoc
                                                , indentDoc
                                                , addDefaultDTDecl
                                                , preventEmptyElements
                                                , removeDocWhiteSpace
                                                , treeRepOfXmlDoc
                                                )

import Text.XML.HXT.Arrow.DocumentOutput        ( putXmlDocument
                                                , encodeDocument
                                                , encodeDocument'
                                                )

-- ------------------------------------------------------------

{- |
the main filter for writing documents

this filter can be configured by an option list like 'Text.XML.HXT.Arrow.ReadDocument.readDocument'

usage: @ writeDocument optionList destination @

if @ destination @ is the empty string or \"-\", stdout is used as output device

available options are

* 'a_indent' : indent document for readability, (default: no indentation)

- 'a_remove_whitespace' : remove all redundant whitespace for shorten text (default: no removal)

- 'a_output_encoding' : encoding of document, default is 'a_encoding' or 'utf8'

- 'a_output_xml' : (default) issue XML: quote special XML chars \>,\<,\",\',& where neccessary
                   add XML processing instruction
                   and encode document with respect to 'a_output_encoding',
                   if explicitly switched of, the plain text is issued, this is useful
                   for non XML output, e.g. generated Haskell code, LaTex, Java, ...

- 'a_output_html' : issue XHTML: quote alle XML chars, use HTML entity refs or char refs for none ASCII chars

- 'a_no_empty_elements' : do not write the short form \<name .../\> for empty elements. When 'a_output_html' is set,
                          the always empty HTML elements are still written in short form, but not the others, as e.g. the script element.
                          Empty script elements, like \<script href=\"...\"/\>, are always a problem for firefox and others.
                          When XML output is generated with this option, all empty elements are written in the long form.

- 'a_no_empty_elem_for' : do not generate empty elements for the element names given in the comma separated list of this option value.
                          This option overwrites the above described 'a_no_empty_elements' option

- 'a_add_default_dtd' : if the document to be written was build by reading another document containing a Document Type Declaration,
                        this DTD is inserted into the output document (default: no insert)

- 'a_no_xml_pi' : suppress generation of \<?xml ... ?\> processing instruction

- 'a_show_tree' : show tree representation of document (for debugging)

- 'a_show_haskell' : show Haskell representaion of document (for debugging)

 a minimal main program for copying a document
 has the following structure:

> module Main
> where
>
> import Text.XML.HXT.Arrow
>
> main        :: IO ()
> main
>     = do
>       runX ( readDocument  [] "hello.xml"
>              >>>
>              writeDocument [] "bye.xml"
>            )
>       return ()

an example for copying a document to standard output with tracing and evaluation of
error code is:

> module Main
> where
>
> import Text.XML.HXT.Arrow
> import System.Exit
>
> main        :: IO ()
> main
>     = do
>       [rc] <- runX ( readDocument  [ (a_trace, "1")
>                                    ] "hello.xml"
>                      >>>
>                      writeDocument [ (a_output_encoding, isoLatin1)
>                                    ] "-"        -- output to stdout
>                      >>>
>                      getErrStatus
>                    )
>       exitWith ( if rc >= c_err
>                  then ExitFailure 1
>                  else ExitSuccess
>                )
-}

writeDocument   :: Attributes -> String -> IOStateArrow s XmlTree XmlTree
writeDocument userOptions dst
    = perform ( traceMsg 1 ("writeDocument: destination is " ++ show dst)
                >>>
                prepareContents userOptions encodeDocument
                >>>
                putXmlDocument textMode dst
                >>>
                traceMsg 1 "writeDocument: finished"
              )
      `when`
      documentStatusOk
    where
    textMode    = optionIsSet a_text_mode userOptions

-- ------------------------------------------------------------

-- |
-- Convert a document into a string. Formating is done the same way
-- and with the same options as in 'writeDocument'. Default output encoding is
-- no encoding, that means the result is a normal unicode encode haskell string.
-- The default may be overwritten with the 'Text.XML.HXT.XmlKeywords.a_output_encoding' option.
-- The XML PI can be suppressed by the 'Text.XML.HXT.XmlKeywords.a_no_xml_pi' option.
--
-- This arrow fails, when the encoding scheme is not supported.
-- The arrow is pure, it does not run in the IO monad.
-- The XML PI is suppressed, if not explicitly turned on with an
-- option @ (a_no_xml_pi, v_0) @

writeDocumentToString   :: ArrowXml a => Attributes  -> a XmlTree String
writeDocumentToString userOptions
    = prepareContents ( addEntries
                        userOptions
                        [ (a_output_encoding, unicodeString)
                        , (a_no_xml_pi, v_1)
                        ]
                      ) encodeDocument'
      >>>
      xshow getChildren

-- ------------------------------------------------------------

-- |
-- indent and format output

prepareContents :: ArrowXml a => Attributes -> (Bool -> String -> a XmlTree XmlTree) -> a XmlTree XmlTree
prepareContents userOptions encodeDoc
    = indent
      >>>
      addDtd
      >>>
      format
    where
    formatEmptyElems
        | not (null noEmptyElemFor)     = preventEmptyElements noEmptyElemFor
        | hasOption a_no_empty_elements
          ||
          hasOption a_output_xhtml      = preventEmptyElements []
        | otherwise                     = const this
    addDtd
        | hasOption a_add_default_dtd   = addDefaultDTDecl
        | otherwise                     = this
    indent
        | hasOption a_indent            = indentDoc                     -- document indentation
        | hasOption a_remove_whitespace = removeDocWhiteSpace           -- remove all whitespace between tags
        | otherwise                     = this

    format
        | hasOption a_show_tree         = treeRepOfXmlDoc
        | hasOption a_show_haskell      = haskellRepOfXmlDoc
        | hasOption a_output_html       = formatEmptyElems True
                                          >>>
                                          escapeHtmlDoc                 -- escape al XML and HTML chars >= 128
                                          >>>
                                          encodeDoc                     -- convert doc into text with respect to output encoding with ASCII as default
                                            suppressXmlPi ( lookupDef usAscii a_output_encoding options )
        | hasOption a_output_xml        = formatEmptyElems (hasOption a_output_xhtml)
                                          >>>
                                          escapeXmlDoc                  -- escape lt, gt, amp, quot,
                                          >>>
                                          encodeDoc                     -- convert doc into text with respect to output encoding
                                            suppressXmlPi ( lookupDef "" a_output_encoding options )
        | otherwise                     = this

    suppressXmlPi                                                       -- remove <?xml ... ?> when set
        = hasOption a_no_xml_pi

    noEmptyElemFor
        = words
          . map (\ c -> if c == ',' then ' ' else c)
          . lookup1 a_no_empty_elem_for
          $ options

    hasOption n
        = optionIsSet n options

    options = addEntries
              userOptions
              [ ( a_indent,             v_0 )
              , ( a_remove_whitespace,  v_0 )
              , ( a_output_xml,         v_1 )
              , ( a_show_tree,          v_0 )
              , ( a_show_haskell,       v_0 )
              , ( a_output_html,        v_0 )
              , ( a_output_xhtml,       v_0 )
              , ( a_no_xml_pi,          v_0 )
              , ( a_no_empty_elements,  v_0 )
              , ( a_no_empty_elem_for,  ""  )
              , ( a_add_default_dtd,    v_0 )
              ]

-- ------------------------------------------------------------