module Text.XML.HXT.DOM.EditFilters
( canonicalizeTree
, canonicalizeAllNodes
, canonicalizeForXPath
, collapseXText
, collapseAllXText
, indentDoc
, removeWhiteSpace
, removeAllWhiteSpace
, removeDocWhiteSpace
, removeComment
, removeAllComment
, transfCdata , transfAllCdata
, transfCdataEscaped , transfAllCdataEscaped
, transfCharRef , transfAllCharRef
, escapeXmlDoc
, escapeXmlText
, escapeXmlAttrValue
, unparseXmlDoc
, numberLinesInXmlDoc , numberLines
, treeRepOfXmlDoc
, haskellRepOfXmlDoc
, addHeadlineToXmlDoc
, addXmlPiToDoc
)
where
import Text.XML.HXT.DOM.XmlTree
import Text.XML.HXT.DOM.Unicode
( getOutputEncodingFct)
import Text.XML.HXT.DOM.FormatXmlTree
( formatXmlContents )
import Data.Maybe
removeComment :: XmlFilter
removeComment = none `when` isXCmt
removeAllComment :: XmlFilter
removeAllComment = processBottomUp removeComment
removeWhiteSpace :: XmlFilter
removeWhiteSpace = none `when` isWhiteSpace
removeAllWhiteSpace :: XmlFilter
removeAllWhiteSpace = processBottomUp removeWhiteSpace
transfAllCdataEscaped :: XmlFilter
transfAllCdataEscaped = processBottomUp transfCdataEscaped
transfCdataEscaped :: XmlFilter
transfCdataEscaped (NTree (XCdata str) _)
= xtext str
transfCdataEscaped n
= [n]
transfAllCdata :: XmlFilter
transfAllCdata = processBottomUp transfCdata
transfCdata :: XmlFilter
transfCdata (NTree (XCdata str) _)
= xtext str
transfCdata n
= [n]
transfCharRef :: XmlFilter
transfCharRef (NTree (XCharRef i) _)
= xtext $ [toEnum i]
transfCharRef n
= [n]
transfAllCharRef :: XmlFilter
transfAllCharRef = processBottomUp transfCharRef
canonicalizeTree :: XmlFilter -> XmlFilter
canonicalizeTree toBeRemoved
= processChildren (none `when` isXText)
.>
processBottomUp canonicalize1Node
where
canonicalize1Node :: XmlFilter
canonicalize1Node
= (deep isXPi `when` isXDTD)
.>
(none `when` toBeRemoved)
.>
( processAttr ( processChildren transfCharRef
.>
collapseXText
) `when` isXTag)
.>
transfCdata
.>
transfCharRef
.>
collapseXText
canonicalizeAllNodes :: XmlFilter
canonicalizeAllNodes
= canonicalizeTree (isXCmt
+++
isPi t_xml
)
canonicalizeForXPath :: XmlFilter
canonicalizeForXPath
= canonicalizeTree (isPi t_xml)
collapseXText :: XmlFilter
collapseXText n
= replaceChildren (collapseXText' $ getChildren n) n
where
collapseXText' :: XmlSFilter
collapseXText' ((NTree n1 _) : (NTree n2 _) : zs)
| isXTextNode n1 && isXTextNode n2
= collapseXText' $ (xtext (t1 ++ t2)) ++ zs
where
t1 = textOfXNode n1
t2 = textOfXNode n2
collapseXText' (x:xs)
= x : (collapseXText' xs)
collapseXText' []
= []
collapseAllXText :: XmlFilter
collapseAllXText = processBottomUp collapseXText
indentDoc :: XmlFilter
indentDoc
= modifyChildren (indentRootChildren $$)
`when`
isRoot
where
indentRootChildren :: XmlFilter
indentRootChildren
= insertNL `o` indentChild `o` removeText
where
removeText
= none `when` isXText
insertNL
= this +++ txt "\n"
indentChild
= modifyChildren (indentTrees (insertIndentation 2) False 1)
`whenNot`
isXDTD
removeDocWhiteSpace :: XmlFilter
removeDocWhiteSpace
= processChildren processRootElement
`when`
isRoot
where
processRootElement :: XmlFilter
processRootElement
= removeText .> processChild
where
removeText
= none `when` isWhiteSpace
processChild
= choice [ isXDTD
:-> removeAllWhiteSpace
, this
:-> modifyChildren
(indentTrees insertNothing False 1)
]
indentTrees :: (Int -> XmlFilter) -> Bool -> Int -> XmlTrees -> XmlTrees
indentTrees _ _ _ []
= []
indentTrees indentFilter preserveSpace level ts
= (lsf $$ ls) ++ indentRest rs
where
(ls, rs)
= break (satisfies isXTag) ts
isSignificant :: Bool
isSignificant
= preserveSpace
||
any (satisfies isSignificantPart) ls
isSignificantPart :: XmlFilter
isSignificantPart
= cat [ isXText `guards` neg isWhiteSpace
, isXCdata
, isXCharRef
, isXEntityRef
]
lsf :: XmlFilter
lsf
| isSignificant
= this
| otherwise
= (none `when` isWhiteSpace)
.>
(indentFilter level +++ this)
indentRest :: XmlSFilter
indentRest []
| isSignificant
= []
| otherwise
= indentFilter (level 1) (mkXTextTree "")
indentRest (t':ts')
= ( (modifyChildren indentChildren .> lsf)
`when`
isXTag
$ t'
)
++
( if null ts'
then indentRest ts'
else indentTrees indentFilter preserveSpace level ts'
)
where
xmlSpaceAttrName = "xml:space"
xmlSpaceAttrValue = valueOf xmlSpaceAttrName t'
preserveSpace'
= fromMaybe preserveSpace
.
lookup xmlSpaceAttrValue
$ [ ("preserve", True)
, ("default", False)
]
indentChildren
| all (satisfies isWhiteSpace) $ getChildren t'
= (none $$)
| otherwise
= indentTrees indentFilter preserveSpace' (level + 1)
insertIndentation :: Int -> Int -> XmlFilter
insertIndentation indentWidth level
= txt ('\n' : replicate (level * indentWidth) ' ')
insertNothing :: Int -> XmlFilter
insertNothing _ = none
escapeString :: (Char -> Bool) -> String -> XmlTrees
escapeString _isEsc []
= []
escapeString isEsc (c:s1)
| isEsc c
= mkXCharRefTree (fromEnum c) : escapeString isEsc s1
escapeString isEsc s
= mkXTextTree s1 : escapeString isEsc s2
where
(s1, s2) = break isEsc s
escapeText :: (Char -> Bool) -> XmlFilter
escapeText isEsc (NTree n _)
| isXTextNode n
= escapeString isEsc (textOfXNode n)
escapeText isEsc (NTree (XCmt c) _)
= xcmt (xshow . escapeString isEsc $ c)
escapeText _ t
= [t]
escapeXmlText :: XmlFilter
escapeXmlText = escapeText (`elem` "<&")
escapeXmlAttrValue :: XmlFilter
escapeXmlAttrValue = escapeText (`elem` "<>\"\'&\n\r\t")
escapeXmlDoc :: XmlFilter
escapeXmlDoc
= choice [ isXTag :-> (processChildren (escapeXmlDoc)
.>
processAttr escVal
)
, isXText :-> escapeXmlText
, isXDTD :-> processTopDown escDTD
, this :-> this
]
where
escVal = processChildren escapeXmlAttrValue
escDTD = escVal `when` (isEntity +++ isParameterEntity)
unparseXmlDoc :: XmlFilter
unparseXmlDoc n
= modifyChildren ((modifyText encFct $$) . xmlTreesToText) n
where
encFct = fromMaybe id (getOutputEncodingFct (encSpec n))
encSpec :: XmlTree -> String
encSpec n
= head . filter (not . null)
$ [ valueOf a_output_encoding n
, valueOf a_encoding n
, utf8
]
addXmlPiToDoc :: XmlFilter
addXmlPiToDoc n
= ( modifyChildren addX
`whenNot`
(getChildren .> isPi t_xml)
)
.>
processChildren ( addAttr a_encoding enc
`when`
isPi t_xml
)
$ n
where
enc = encSpec n
addX cs = mkXmlDeclTree (xattr a_version "1.0") : xtext "\n" ++ cs
numberLinesInXmlDoc :: XmlFilter
numberLinesInXmlDoc
= modifyChildren ((modifyText numberLines $$) . xmlTreesToText)
numberLines :: String -> String
numberLines str
= concat $
zipWith (\ n l -> lineNr n ++ l ++ "\n") [1..] (lines str)
where
lineNr :: Int -> String
lineNr n = (reverse (take 6(reverse (show n) ++ replicate 6 ' '))) ++ " "
treeRepOfXmlDoc :: XmlFilter
treeRepOfXmlDoc
= rootTag [getAttrl] [formatXmlContents]
haskellRepOfXmlDoc :: XmlFilter
haskellRepOfXmlDoc n
= rootTag [getAttrl] [txt $ show n] n
addHeadlineToXmlDoc :: XmlFilter
addHeadlineToXmlDoc n
= replaceChildren (xtext title ++ getChildren n ++ xtext "\n") n
where
headline = "content of: " ++ valueOf a_source n
underline = map (\_ -> '=') headline
title = "\n" ++ headline ++ "\n" ++ underline ++ "\n\n"