module Text.XML.HXT.Arrow.TagSoupInterface
where
import Control.Arrow
import Control.Arrow.ArrowList
import Control.Arrow.ArrowTree
import Data.String.Unicode ( normalizeNL )
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.Arrow.XmlArrow
import Text.XML.HXT.Arrow.XmlState
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import qualified Text.XML.HXT.Parser.TagSoup as TS
withTagSoup :: SysConfig
withTagSoup = setS (theTagSoup .&&&.
theExpat .&&&.
theTagSoupParser
) (True, (False, parseHtmlTagSoup))
withoutTagSoup :: SysConfig
withoutTagSoup = setS theTagSoup False
parseHtmlTagSoup :: IOSArrow XmlTree XmlTree
parseHtmlTagSoup = parse
$< getSysVar
(theCheckNamespaces .&&&.
theWarnings .&&&.
thePreserveComment .&&&.
theRemoveWS .&&&.
theLowerCaseNames
)
where
parse (withNamespaces', (withWarnings', (preserveCmt', (removeWS', lowerCaseNames'))))
= traceMsg 1 ("parse document with tagsoup " ++
( if lowerCaseNames' then "HT" else "X" ) ++ "ML parser"
)
>>>
replaceChildren
( ( getAttrValue a_source
&&&
(xshow getChildren >>^ normalizeNL)
)
>>>
arr2L (TS.parseHtmlTagSoup withNamespaces' withWarnings' preserveCmt' removeWS' lowerCaseNames')
)