-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.Arrow.TagSoupInterface
   Copyright  : Copyright (C) 2005 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : stable
   Portability: portable

   Interface for TagSoup Parser

-}

-- ------------------------------------------------------------

module Text.XML.HXT.Arrow.TagSoupInterface
where

import Control.Arrow
import Control.Arrow.ArrowList
import Control.Arrow.ArrowTree

import Text.XML.HXT.DOM.Interface

import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlState.TypeDefs

import qualified Text.XML.HXT.Parser.TagSoup as TS

-- ------------------------------------------------------------

-- | enable TagSoup parsing

withTagSoup                     :: SysConfig
withTagSoup                     = setS (theTagSoup .&&&. theTagSoupParser) (True, parseHtmlTagSoup)

-- ------------------------------------------------------------

-- | The Tagsoup parser arrow

parseHtmlTagSoup                :: IOSArrow XmlTree XmlTree
parseHtmlTagSoup                = parse
                                  $< getSysVar
                                     (theCheckNamespaces .&&&.
                                      theWarnings        .&&&.
                                      thePreserveComment .&&&.
                                      theRemoveWS        .&&&.
                                      theLowerCaseNames
                                     )
    where
    parse (withNamespaces', (withWarnings', (preserveCmt', (removeWS', lowerCaseNames'))))
                                = traceMsg 1 ("parse document with tagsoup " ++
                                              ( if lowerCaseNames' then "HT" else "X" ) ++ "ML parser"
                                             )
                                  >>>
                                  replaceChildren
                                  ( ( getAttrValue a_source               -- get source name
                                      &&&
                                      xshow getChildren
                                    )                                     -- get string to be parsed
                                    >>>
                                    arr2L (TS.parseHtmlTagSoup withNamespaces' withWarnings' preserveCmt' removeWS' lowerCaseNames')
                                  )

-- ------------------------------------------------------------