module Text.XMLParser.XMLParser (
xmlParser
,parseXML
,showXML
,Tag(..)
) where
import Text.ParserCombinators.Parsec
import Control.Monad
import Data.Monoid
import Data.Either
import Data.List
import Data.Char
keys = map fst
escapeCodes =
[("gt",'>')
,("lt",'<')
,("amp",'&')
,("quot",'\"')
,("apos",'\'')]
data Tag = Tag [(String,String)] String (Maybe [Tag])
| TagString String
| TagCData String
deriving (Show,Eq)
spaceOut p = between (many space) (many space) p
attr = do
attr <- spaceOut $ many1 (satisfy (liftM2 ((not.).(||)) isSpace (`elem` "=<>")))
string "="
value <- spaceOut $ between (string "\"") (string "\"") (many1 (noneOf "\""))
<|> between (string "'") (string "'") (many1 (noneOf "'"))
return (attr,value)
many1Till p e = do
notFollowedBy e
x <- p
xs <- manyTill p (try e)
return (x:xs)
many1Till' p e = do
xs <- many1Till p (lookAhead e)
y <- e
return (xs,y)
openTag = do
string "<"
skipMany space
many1Till' anyChar $ do
attrs <- spaceOut $ try (many attr)
string ">"
return attrs
closeTag str = do
x <- string "</"
y <- string str
z <- (string ">")
return y
sparse p t = parse p "" t
sameConstructor (Left _) (Left _) = True
sameConstructor (Right _) (Right _) = True
sameConstructor _ _ = False
tagWithoutContent = do
(x,a) <- tagWithoutContent'
return (Tag a x Nothing)
tagWithoutContent' = do
string "<"
skipMany space
many1Till' anyChar $ do
attrs <- spaceOut $ try (many (try attr))
string "/>"
return attrs
groupEithers :: [Either a b] -> [Either [a] [b]]
groupEithers xs = map (\xs@(x:_) -> either (const (Left (lefts xs))) (const (Right (rights xs))) x) (groupBy sameConstructor xs)
closedTag = do
(x,a) <- openTag
let cd = fmap Right cdata'
let frag = fmap Left fragmentParse
let text = fmap Right (noneOf "><" >>= return . (:[]) )
y <- manyTill ( try cd <|> try frag <|> text) (try $ closeTag x)
let s = map (either head ( TagString . replaceEntities . concat )) (groupEithers y)
return (Tag a x (Just s))
fragmentParse = do
b <- closedTag
return b
xmlParser = do
x <- try tagWithoutContent <|> fragmentParse
eof
return x
showAttribute (x,y) = x++"="++"\""++y++"\""
showAttributes atts = concat $ intersperse " " $ map showAttribute atts
showXML (Tag attr str inner) = case inner of
Nothing -> "<"++str++attr''++"/>"
(Just xs) -> "<"++str++attr''++">"++(concatMap showXML xs)++"</"++str++">"
where attr'' = if null attr' then attr'
else " "++attr'
attr' = showAttributes attr
showXML (TagString str) = str
showXML (TagCData str) =str
escapeCode = do
char '&'
x <- choice (map (\(k,v) -> string k >> return v ) escapeCodes)
char ';'
return [x]
w = parse escapeCode "" "<"
cdata = do
x <- cdata'
return (TagCData x)
cdata' = do
string "<![CDATA["
many1Till anyChar (string "]]>")
q = parse cdata "" "<![CDATA[ hi ]]>"
replaceEntities = (\(Right x) -> x) . parse (replaceParser' escapeCode) ""
replaceParser parser = (\(Right x) ->x) . parse (replaceParser' parser) ""
replaceParser' parser = replaceParser'' parser (anyChar >>=(return . (:[])))
replaceParser'' parser p2 = do
xs <- many (try parser <|> p2 )
return (concat xs)
parseXML = parse xmlParser ""
main = do
let str = "<one green=\"blue\"> < five <two> <![CDATA[ hello </two> ]]> </two> </one>"
print str
let rs = map (parse xmlParser "") [str,"<single/>","<nocont></nocont>"]
mapM_ (either (const (print "fail")) (\x -> mapM_ putStrLn [showXML x,show x] )) rs