module Text.HTML.WraXML.Tree.HXT
(tidy, format, fromHTMLString, fromHTMLStringMetaEncoding,
errorAnnFromHTMLStringMetaEncoding,
errorAnnFromHTMLStringOpt, ErrorMsg,
getMetaHTTPHeaders, ) where
import qualified Text.XML.HXT.DOM.XmlNode as XmlNode
import qualified Text.XML.HXT.Arrow.XmlState.ErrorHandling as HXTError
import qualified Text.XML.HXT.Core as HXT
import Text.XML.HXT.Core (withInputEncoding, isoLatin1, yes)
import Text.XML.HXT.Arrow.XmlState (runX)
import Text.XML.HXT.DOM.QualifiedName (QName, qualifiedName)
import Text.XML.HXT.DOM.TypeDefs (XNode(..))
import Text.XML.HXT.DOM.ShowXml (xshow)
import qualified Data.Tree.Class as HXTTree
import Data.Tree.NTree.TypeDefs (NTree(NTree))
import qualified Text.XML.WraXML.Tree.HXT as WraHXT
import qualified Text.HTML.WraXML.Tree as HtmlTree
import qualified Text.XML.WraXML.Tree as XmlTree
import qualified Text.XML.Basic.Name.LowerCase as NameLC
import qualified Text.XML.Basic.Name as Name
import qualified Text.XML.Basic.Format as Format
import Text.XML.HXT.Parser.HtmlParsec (isEmptyHtmlTag)
import Control.Category ((>>>))
import Control.Monad (msum)
import Data.Maybe (fromMaybe)
tidy :: String -> IO String
tidy :: String -> IO String
tidy String
input =
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\[String
str] -> String
str) forall a b. (a -> b) -> a -> b
$
forall c. IOSArrow XmlTree c -> IO [c]
runX forall a b. (a -> b) -> a -> b
$
forall b s a. IO b -> IOSLA s a b
ioSLAFromIO (String -> IO XmlTree
fromHTMLString String
input)
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall (a :: * -> * -> *).
ArrowXml a =>
SysConfigList -> a XmlTree String
HXT.writeDocumentToString [SysConfig
HXT.withOutputHTML]
ioSLAFromIO :: IO b -> HXT.IOSLA s a b
ioSLAFromIO :: forall b s a. IO b -> IOSLA s a b
ioSLAFromIO IO b
act = forall s a b. (s -> a -> IO (s, [b])) -> IOSLA s a b
HXT.IOSLA forall a b. (a -> b) -> a -> b
$ \s
s a
_ -> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\b
b -> (s
s,[b
b])) IO b
act
format :: HXT.XmlTree -> String
format :: XmlTree -> String
format XmlTree
leaf = XmlTrees -> ShowS
formatTrees (forall (t :: * -> *) a. Tree t => t a -> [t a]
HXTTree.getChildren XmlTree
leaf) String
""
formatTrees :: HXT.XmlTrees -> ShowS
formatTrees :: XmlTrees -> ShowS
formatTrees = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) forall a. a -> a
id forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map XmlTree -> ShowS
formatTree
formatTree :: HXT.XmlTree -> ShowS
formatTree :: XmlTree -> ShowS
formatTree XmlTree
leaf =
case XmlTree
leaf of
(NTree (XPi QName
n XmlTrees
al) XmlTrees
_) ->
String -> ShowS
showString String
"<?"
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
QName -> ShowS
formatQName QName
n
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
(forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr forall b c a. (b -> c) -> (a -> b) -> a -> c
(.) forall a. a -> a
id forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map XmlTree -> ShowS
showPiAttr) XmlTrees
al
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
String -> ShowS
showString String
"?>"
where
showPiAttr :: HXT.XmlTree -> String -> String
showPiAttr :: XmlTree -> ShowS
showPiAttr a :: XmlTree
a@(NTree (XAttr QName
an) XmlTrees
cs) =
if QName -> String
qualifiedName QName
an forall a. Eq a => a -> a -> Bool
== String
HXT.a_value
then ShowS
Format.blank forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlTrees -> ShowS
formatTrees XmlTrees
cs
else XmlTree -> ShowS
formatTree XmlTree
a
showPiAttr XmlTree
_ = forall a. a -> a
id
(NTree (XTag QName
t XmlTrees
al) XmlTrees
cs) ->
if forall (t :: * -> *) a. Foldable t => t a -> Bool
null XmlTrees
cs Bool -> Bool -> Bool
&& String -> Bool
isEmptyHtmlTag (QName -> String
qualifiedName QName
t)
then ShowS
Format.lt forall b c a. (b -> c) -> (a -> b) -> a -> c
. QName -> ShowS
formatQName QName
t forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlTrees -> ShowS
formatTrees XmlTrees
al forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
Format.gt
else ShowS
Format.lt forall b c a. (b -> c) -> (a -> b) -> a -> c
. QName -> ShowS
formatQName QName
t forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlTrees -> ShowS
formatTrees XmlTrees
al forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
Format.gt
forall b c a. (b -> c) -> (a -> b) -> a -> c
. XmlTrees -> ShowS
formatTrees XmlTrees
cs
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
Format.lt forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
Format.slash forall b c a. (b -> c) -> (a -> b) -> a -> c
. QName -> ShowS
formatQName QName
t forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
Format.gt
(NTree (XAttr QName
an) XmlTrees
cs) ->
ShowS
Format.blank forall b c a. (b -> c) -> (a -> b) -> a -> c
. QName -> ShowS
formatQName QName
an forall b c a. (b -> c) -> (a -> b) -> a -> c
. ShowS
Format.eq forall b c a. (b -> c) -> (a -> b) -> a -> c
.
String -> ShowS
Format.stringQuoted (XmlTrees -> ShowS
formatTrees XmlTrees
cs String
"")
(NTree (XError Int
l String
e) XmlTrees
_) ->
String -> ShowS
showString String
"<!-- ERROR (" forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString (forall a. Show a => a -> String
show Int
l) forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString String
"):\n"
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString String
e forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString String
"\n-->"
XmlTree
_ -> (XmlTrees -> String
xshow [XmlTree
leaf] forall a. [a] -> [a] -> [a]
++)
formatQName :: QName -> ShowS
formatQName :: QName -> ShowS
formatQName = String -> ShowS
showString forall b c a. (b -> c) -> (a -> b) -> a -> c
. QName -> String
qualifiedName
findMetaEncoding :: String -> IO (Maybe String)
findMetaEncoding :: String -> IO (Maybe String)
findMetaEncoding String
str =
do [T () T String]
htmlTrees <- forall name.
(Tag name, Attribute name) =>
String -> IO [T () name String]
xmlTreesFromHTMLString String
str
forall (m :: * -> *) a. Monad m => a -> m a
return (forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum (forall a b. (a -> b) -> [a] -> [b]
map forall name i.
(Tag name, Attribute name) =>
T i name String -> Maybe String
HtmlTree.findMetaEncoding
([T () T String]
htmlTrees :: [XmlTree.T () NameLC.T String])))
getMetaHTTPHeaders :: String -> IO [(String, String)]
String
str =
do [T () T String]
htmlTrees <- forall name.
(Tag name, Attribute name) =>
String -> IO [T () name String]
xmlTreesFromHTMLString String
str
forall (m :: * -> *) a. Monad m => a -> m a
return (forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap forall name i.
(Tag name, Attribute name) =>
T i name String -> [(String, String)]
HtmlTree.getMetaHTTPHeaders
([T () T String]
htmlTrees :: [XmlTree.T () NameLC.T String]))
xmlTreesFromHTMLString ::
(Name.Tag name, Name.Attribute name) =>
String -> IO [XmlTree.T () name String]
xmlTreesFromHTMLString :: forall name.
(Tag name, Attribute name) =>
String -> IO [T () name String]
xmlTreesFromHTMLString String
str =
do XmlTree
hxtTree <- String -> IO XmlTree
fromHTMLString String
str
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
forall a b. (a -> b) -> [a] -> [b]
map (forall i name. T i name T -> T i name String
XmlTree.unescape forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall name. (Tag name, Attribute name) => XmlTree -> T () name T
WraHXT.toXmlTree)
(forall a. (a -> Bool) -> [a] -> [a]
filter (String -> XNode -> Bool
WraHXT.checkTagName String
"html" forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Tree t => t a -> a
HXTTree.getNode)
(forall (t :: * -> *) a. Tree t => t a -> [t a]
HXTTree.getChildren XmlTree
hxtTree))
fromHTMLStringMetaEncoding :: String -> IO HXT.XmlTree
fromHTMLStringMetaEncoding :: String -> IO XmlTree
fromHTMLStringMetaEncoding String
str =
do Maybe String
enc <- String -> IO (Maybe String)
findMetaEncoding String
str
SysConfigList -> String -> IO XmlTree
fromHTMLStringOpt [String -> SysConfig
withInputEncoding forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a -> a
fromMaybe String
isoLatin1 Maybe String
enc] String
str
fromHTMLString :: String -> IO HXT.XmlTree
fromHTMLString :: String -> IO XmlTree
fromHTMLString = SysConfigList -> String -> IO XmlTree
fromHTMLStringOpt [String -> SysConfig
withInputEncoding String
isoLatin1]
fromHTMLStringOpt :: HXT.SysConfigList -> String -> IO HXT.XmlTree
fromHTMLStringOpt :: SysConfigList -> String -> IO XmlTree
fromHTMLStringOpt SysConfigList
options String
input =
do (XmlTree
tree,[ErrorMsg]
_,Maybe Int
_) <- SysConfigList -> String -> IO (XmlTree, [ErrorMsg], Maybe Int)
errorAnnFromHTMLStringOpt SysConfigList
options String
input
forall (m :: * -> *) a. Monad m => a -> m a
return XmlTree
tree
type ErrorMsg = (Int,String)
{-# WARNING errorAnnFromHTMLStringMetaEncoding
"error collection does not work currently" #-}
errorAnnFromHTMLStringMetaEncoding ::
String -> IO (HXT.XmlTree, [ErrorMsg], Maybe Int)
errorAnnFromHTMLStringMetaEncoding :: String -> IO (XmlTree, [ErrorMsg], Maybe Int)
errorAnnFromHTMLStringMetaEncoding String
str = do
Maybe String
enc <- String -> IO (Maybe String)
findMetaEncoding String
str
SysConfigList -> String -> IO (XmlTree, [ErrorMsg], Maybe Int)
errorAnnFromHTMLStringOpt [String -> SysConfig
withInputEncoding forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a -> a
fromMaybe String
isoLatin1 Maybe String
enc] String
str
{-# WARNING errorAnnFromHTMLStringOpt
"error collection does not work currently" #-}
errorAnnFromHTMLStringOpt ::
HXT.SysConfigList -> String -> IO (HXT.XmlTree, [ErrorMsg], Maybe Int)
errorAnnFromHTMLStringOpt :: SysConfigList -> String -> IO (XmlTree, [ErrorMsg], Maybe Int)
errorAnnFromHTMLStringOpt = SysConfigList -> String -> IO (XmlTree, [ErrorMsg], Maybe Int)
errorAnnFromHTMLStringInternal
errorAnnFromHTMLStringInternal ::
HXT.SysConfigList -> String -> IO (HXT.XmlTree, [ErrorMsg], Maybe Int)
errorAnnFromHTMLStringInternal :: SysConfigList -> String -> IO (XmlTree, [ErrorMsg], Maybe Int)
errorAnnFromHTMLStringInternal SysConfigList
options String
contents = do
(XmlTree
root_:XmlTrees
errs) <-
forall c. IOSArrow XmlTree c -> IO [c]
runX forall a b. (a -> b) -> a -> b
$
forall s b. IOStateArrow s b b
HXT.errorMsgCollect
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s b. SysConfigList -> String -> IOStateArrow s b XmlTree
HXT.readString (Bool -> SysConfig
HXT.withParseHTML Bool
yes forall a. a -> [a] -> [a]
: SysConfigList
options) String
contents
forall {k} (cat :: k -> k -> *) (a :: k) (b :: k) (c :: k).
Category cat =>
cat a b -> cat b c -> cat a c
>>>
forall s. IOStateArrow s XmlTree XmlTree
HXTError.filterErrorMsg
let elvl :: Maybe Int
elvl = forall a. XmlNode a => a -> Maybe Int
XmlNode.getErrorLevel XmlTree
root_
let errMsgs :: [ErrorMsg]
errMsgs =
forall a b. (a -> b) -> [a] -> [b]
map ((\(XError Int
level String
msg) -> (Int
level, String
msg)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (t :: * -> *) a. Tree t => t a -> a
HXTTree.getNode) XmlTrees
errs
forall (m :: * -> *) a. Monad m => a -> m a
return (XmlTree
root_, [ErrorMsg]
errMsgs, Maybe Int
elvl)