-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Arrow.TagSoupInterface
   Copyright  : Copyright (C) 2010 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : stable
   Portability: portable

   Interface for TagSoup Parser

-}

-- ------------------------------------------------------------

module Text.XML.HXT.Arrow.TagSoupInterface
where

import Control.Arrow
import Control.Arrow.ArrowList
import Control.Arrow.ArrowTree

import Data.String.Unicode            ( normalizeNL )

import Text.XML.HXT.DOM.Interface

import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlState.TypeDefs

import qualified Text.XML.HXT.Parser.TagSoup as TS

-- ------------------------------------------------------------

{- |
   The system config option to enable the tagsoup parser

Here is an example, how to use it:

> ...
> import Text.HXT.XML.Core
> import Text.HXT.XML.TagSoup
> ...
>
> readDocument [ withTagSoup ] "some-file.xml"
> ...

reads the given document and parses it with the lazy tagsoup parser.
There is no validation enabled.
-}


withTagSoup                     :: SysConfig
withTagSoup                     = setS (theTagSoup       .&&&.
                                        theExpat         .&&&.
                                        theTagSoupParser
                                       ) (True, (False, parseHtmlTagSoup))

-- | Turns off tagsoup parsing. The build in HXT parser will be used.

withoutTagSoup                  :: SysConfig
withoutTagSoup                  = setS theTagSoup False

-- ------------------------------------------------------------

-- | The Tagsoup parser arrow

parseHtmlTagSoup                :: IOSArrow XmlTree XmlTree
parseHtmlTagSoup                = parse
                                  $< getSysVar
                                     (theCheckNamespaces .&&&.
                                      theWarnings        .&&&.
                                      thePreserveComment .&&&.
                                      theRemoveWS        .&&&.
                                      theLowerCaseNames
                                     )
    where
    parse (withNamespaces', (withWarnings', (preserveCmt', (removeWS', lowerCaseNames'))))
                                = traceMsg 1 ("parse document with tagsoup " ++
                                              ( if lowerCaseNames' then "HT" else "X" ) ++ "ML parser"
                                             )
                                  >>>
                                  replaceChildren
                                  ( ( getAttrValue a_source               -- get source name
                                      &&&
                                      (xshow getChildren >>^ normalizeNL) -- get string to be parsed and normalize newline char
                                    )
                                    >>>
                                    arr2L (TS.parseHtmlTagSoup withNamespaces' withWarnings' preserveCmt' removeWS' lowerCaseNames')
                                  )

-- ------------------------------------------------------------