module Text.XML.HaXml.XmlContent.Parser
(
Document(..)
, Element(..)
, ElemTag(..)
, Content(..)
, Attribute()
, AttValue(..)
, Prolog(..)
, Reference(..)
, XmlContent(..)
, XmlAttributes(..)
, XmlAttrType(..)
, module Text.ParserCombinators.Poly
, XMLParser
, content, posnElement, element, interior, inElement, text, attributes
, posnElementWith, elementWith, inElementWith
, choice, definite
, mkElem, mkElemC, mkAttr
, toText, toCData
, maybeToAttr, defaultToAttr
, definiteA, defaultA, possibleA, fromAttrToStr, toAttrFrStr
, Defaultable(..)
, str2attr, attr2str, attval
, catMaybes
, module Text.XML.HaXml.TypeMapping
, List1(..)
, ANYContent(..)
) where
import Maybe (catMaybes)
import Char (chr, isSpace)
import List (isPrefixOf, isSuffixOf)
import Text.PrettyPrint.HughesPJ (render)
import qualified Text.XML.HaXml.Pretty as PP
import Text.XML.HaXml.Types
import Text.XML.HaXml.TypeMapping
import Text.XML.HaXml.Posn (Posn, posInNewCxt)
import Text.XML.HaXml.Pretty (document)
import Text.XML.HaXml.Parse (xmlParse)
import Text.XML.HaXml.Verbatim (Verbatim(verbatim))
import qualified Text.XML.HaXml.ByteStringPP as FPS (document)
import qualified Data.ByteString.Lazy.Char8 as FPS
import Text.ParserCombinators.Poly
#if defined(DEBUG)
import Debug.Trace(trace)
debug :: a -> String -> a
v `debug` s = trace s v
#else
debug :: t -> t1 -> t
v `debug` _ = v
#endif
attval :: (Read a) => Element i -> a
attval (Elem _ [("value",v@(AttValue _))] []) = read (show v)
mkAttr :: String -> String -> Attribute
mkAttr n v = (n, AttValue [Left v])
mkElem :: XmlContent a => a -> [Content ()] -> Content ()
mkElem x cs = CElem (Elem (showHType (toHType x) "") [] cs) ()
mkElemC :: String -> [Content ()] -> Content ()
mkElemC x cs = CElem (Elem x [] cs) ()
toText :: String -> [Content ()]
toText s = [CString False s ()]
toCData :: String -> [Content ()]
toCData s = [CString True s ()]
type XMLParser a = Parser (Content Posn) a
content :: String -> XMLParser (Content Posn)
content word = next `adjustErr` (++" when expecting "++word)
posnElementWith :: (String->String->Bool) -> [String]
-> XMLParser (Posn, Element Posn)
posnElementWith match tags = do
{ c <- content (formatted tags)
; case c of
CElem e@(Elem t _ _) pos
| any (match t) tags -> return (pos, e)
| otherwise -> fail ("Found a <"++t++">, but expected "
++formatted tags++"\nat "++show pos)
CString b s pos
| not b && all isSpace s -> posnElementWith match tags
| otherwise -> fail ("Found text content, but expected "
++formatted tags++"\ntext is: "++s
++"\nat "++show pos)
CRef r pos -> fail ("Found reference, but expected "
++formatted tags++"\nreference is: "++verbatim r
++"\nat "++show pos)
CMisc _ _ -> posnElementWith match tags
}
where
formatted [t] = "a <"++t++">"
formatted tgs = "one of"++ concatMap (\t->" <"++t++">") tgs
posnElement :: [String] -> XMLParser (Posn, Element Posn)
posnElement = posnElementWith (==)
element :: [String] -> XMLParser (Element Posn)
element tags = fmap snd (posnElement tags)
`debug` ("Element: "++unwords tags++"\n")
elementWith :: (String->String->Bool) -> [String] -> XMLParser (Element Posn)
elementWith match tags = fmap snd (posnElementWith match tags)
`debug` ("Element: "++unwords tags++"\n")
interior :: Element Posn -> XMLParser a -> XMLParser a
interior (Elem e _ cs) p =
case runParser p cs of
(Left msg, _) -> fail msg
(Right x, []) -> return x
(Right x, ds@(d:_))
| all onlyMisc ds -> return x
| otherwise -> fail ("Too many elements inside <"++e++"> at\n"
++show (info d)++"\n"
++"Found excess: "++verbatim d)
where onlyMisc (CMisc _ _) = True
onlyMisc (CString False s _) | all isSpace s = True
onlyMisc _ = False
inElement :: String -> XMLParser a -> XMLParser a
inElement tag p = do { e <- element [tag]; commit (interior e p) }
inElementWith :: (String->String->Bool) -> String -> XMLParser a -> XMLParser a
inElementWith match tag p = do { e <- elementWith match [tag]
; commit (interior e p) }
attributes :: XmlAttributes a => Element Posn -> XMLParser a
attributes (Elem _ as _) = return (fromAttrs as)
text :: XMLParser String
text = text' []
where text' acc =
do { c <- content "plain text"
; case c of
CString _ s _ -> text' (s:acc)
CRef (RefChar s) _ -> text' (("&#"++show s++";") :acc)
CRef (RefEntity s) _ -> text' (('&':s++";"):acc)
CMisc _ _ -> text' acc
CElem _ _ -> do { reparse [c]
; if null acc then fail "empty string"
else return (concat (reverse acc))
}
}
`onFail` ( if null acc then fail "empty string"
else return (concat (reverse acc)) )
choice :: XmlContent a => (a -> b) -> XMLParser b -> XMLParser b
choice cons (P other) =
P (\cs-> case runParser parseContents cs of
(Left _, _) -> other cs
(Right x, cs') -> Success cs' (cons x) )
definite :: XmlContent a => XMLParser a -> String -> String -> XMLParser a
definite p inner tag = P (\cs-> case runParser p cs of
(Left _, cs') -> Failure cs' msg'
(Right x, cs') -> Success cs' x )
where msg' = "content error: expected "++inner++" inside <"++tag
++"> element\n"
class HTypeable a => XmlContent a where
parseContents :: XMLParser a
toContents :: a -> [Content ()]
xToChar :: a -> Char
xFromChar :: Char -> a
xToChar = error "HaXml.XmlContent.xToChar used in error"
xFromChar = error "HaXml.XmlContent.xFromChar used in error"
class XmlAttributes a where
fromAttrs :: [Attribute] -> a
toAttrs :: a -> [Attribute]
class XmlAttrType a where
fromAttrToTyp :: String -> Attribute -> Maybe a
toAttrFrTyp :: String -> a -> Maybe Attribute
instance (XmlContent a, XmlContent b) => XmlContent (a,b) where
toContents (a,b) = toContents a ++ toContents b
parseContents = do
{ a <- parseContents
; b <- parseContents
; return (a,b)
}
instance (XmlContent a, XmlContent b, XmlContent c) => XmlContent (a,b,c) where
toContents (a,b,c) = toContents a ++ toContents b ++ toContents c
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; return (a,b,c)
}
instance (XmlContent a, XmlContent b, XmlContent c, XmlContent d) =>
XmlContent (a,b,c,d) where
toContents (a,b,c,d) = toContents a ++ toContents b ++ toContents c
++ toContents d
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; return (a,b,c,d)
}
instance ( XmlContent a, XmlContent b, XmlContent c, XmlContent d
, XmlContent e ) =>
XmlContent (a,b,c,d,e) where
toContents (a,b,c,d,e) = toContents a ++ toContents b ++ toContents c
++ toContents d ++ toContents e
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; e <- parseContents
; return (a,b,c,d,e)
}
instance ( XmlContent a, XmlContent b, XmlContent c, XmlContent d
, XmlContent e, XmlContent f ) =>
XmlContent (a,b,c,d,e,f) where
toContents (a,b,c,d,e,f) = toContents a ++ toContents b ++ toContents c
++ toContents d ++ toContents e ++ toContents f
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; e <- parseContents
; f <- parseContents
; return (a,b,c,d,e,f)
}
instance ( XmlContent a, XmlContent b, XmlContent c, XmlContent d
, XmlContent e, XmlContent f, XmlContent g ) =>
XmlContent (a,b,c,d,e,f,g) where
toContents (a,b,c,d,e,f,g)
= toContents a ++ toContents b ++ toContents c ++ toContents d
++ toContents e ++ toContents f ++ toContents g
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; e <- parseContents
; f <- parseContents
; g <- parseContents
; return (a,b,c,d,e,f,g)
}
instance ( XmlContent a, XmlContent b, XmlContent c, XmlContent d
, XmlContent e, XmlContent f, XmlContent g, XmlContent h ) =>
XmlContent (a,b,c,d,e,f,g,h) where
toContents (a,b,c,d,e,f,g,h)
= toContents a ++ toContents b ++ toContents c ++ toContents d
++ toContents e ++ toContents f ++ toContents g ++ toContents h
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; e <- parseContents
; f <- parseContents
; g <- parseContents
; h <- parseContents
; return (a,b,c,d,e,f,g,h)
}
instance ( XmlContent a, XmlContent b, XmlContent c, XmlContent d
, XmlContent e, XmlContent f, XmlContent g, XmlContent h
, XmlContent i ) =>
XmlContent (a,b,c,d,e,f,g,h,i) where
toContents (a,b,c,d,e,f,g,h,i)
= toContents a ++ toContents b ++ toContents c ++ toContents d
++ toContents e ++ toContents f ++ toContents g ++ toContents h
++ toContents i
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; e <- parseContents
; f <- parseContents
; g <- parseContents
; h <- parseContents
; i <- parseContents
; return (a,b,c,d,e,f,g,h,i)
}
instance ( XmlContent a, XmlContent b, XmlContent c, XmlContent d
, XmlContent e, XmlContent f, XmlContent g, XmlContent h
, XmlContent i, XmlContent j ) =>
XmlContent (a,b,c,d,e,f,g,h,i,j) where
toContents (a,b,c,d,e,f,g,h,i,j)
= toContents a ++ toContents b ++ toContents c ++ toContents d
++ toContents e ++ toContents f ++ toContents g ++ toContents h
++ toContents i ++ toContents j
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; e <- parseContents
; f <- parseContents
; g <- parseContents
; h <- parseContents
; i <- parseContents
; j <- parseContents
; return (a,b,c,d,e,f,g,h,i,j)
}
instance ( XmlContent a, XmlContent b, XmlContent c, XmlContent d
, XmlContent e, XmlContent f, XmlContent g, XmlContent h
, XmlContent i, XmlContent j, XmlContent k ) =>
XmlContent (a,b,c,d,e,f,g,h,i,j,k) where
toContents (a,b,c,d,e,f,g,h,i,j,k)
= toContents a ++ toContents b ++ toContents c ++ toContents d
++ toContents e ++ toContents f ++ toContents g ++ toContents h
++ toContents i ++ toContents j ++ toContents k
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; e <- parseContents
; f <- parseContents
; g <- parseContents
; h <- parseContents
; i <- parseContents
; j <- parseContents
; k <- parseContents
; return (a,b,c,d,e,f,g,h,i,j,k)
}
instance ( XmlContent a, XmlContent b, XmlContent c, XmlContent d
, XmlContent e, XmlContent f, XmlContent g, XmlContent h
, XmlContent i, XmlContent j, XmlContent k, XmlContent l ) =>
XmlContent (a,b,c,d,e,f,g,h,i,j,k,l) where
toContents (a,b,c,d,e,f,g,h,i,j,k,l)
= toContents a ++ toContents b ++ toContents c ++ toContents d
++ toContents e ++ toContents f ++ toContents g ++ toContents h
++ toContents i ++ toContents j ++ toContents k ++ toContents l
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; e <- parseContents
; f <- parseContents
; g <- parseContents
; h <- parseContents
; i <- parseContents
; j <- parseContents
; k <- parseContents
; l <- parseContents
; return (a,b,c,d,e,f,g,h,i,j,k,l)
}
instance ( XmlContent a, XmlContent b, XmlContent c, XmlContent d
, XmlContent e, XmlContent f, XmlContent g, XmlContent h
, XmlContent i, XmlContent j, XmlContent k, XmlContent l
, XmlContent m ) =>
XmlContent (a,b,c,d,e,f,g,h,i,j,k,l,m) where
toContents (a,b,c,d,e,f,g,h,i,j,k,l,m)
= toContents a ++ toContents b ++ toContents c ++ toContents d
++ toContents e ++ toContents f ++ toContents g ++ toContents h
++ toContents i ++ toContents j ++ toContents k ++ toContents l
++ toContents m
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; e <- parseContents
; f <- parseContents
; g <- parseContents
; h <- parseContents
; i <- parseContents
; j <- parseContents
; k <- parseContents
; l <- parseContents
; m <- parseContents
; return (a,b,c,d,e,f,g,h,i,j,k,l,m)
}
instance ( XmlContent a, XmlContent b, XmlContent c, XmlContent d
, XmlContent e, XmlContent f, XmlContent g, XmlContent h
, XmlContent i, XmlContent j, XmlContent k, XmlContent l
, XmlContent m, XmlContent n ) =>
XmlContent (a,b,c,d,e,f,g,h,i,j,k,l,m,n) where
toContents (a,b,c,d,e,f,g,h,i,j,k,l,m,n)
= toContents a ++ toContents b ++ toContents c ++ toContents d
++ toContents e ++ toContents f ++ toContents g ++ toContents h
++ toContents i ++ toContents j ++ toContents k ++ toContents l
++ toContents m ++ toContents n
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; e <- parseContents
; f <- parseContents
; g <- parseContents
; h <- parseContents
; i <- parseContents
; j <- parseContents
; k <- parseContents
; l <- parseContents
; m <- parseContents
; n <- parseContents
; return (a,b,c,d,e,f,g,h,i,j,k,l,m,n)
}
instance ( XmlContent a, XmlContent b, XmlContent c, XmlContent d
, XmlContent e, XmlContent f, XmlContent g, XmlContent h
, XmlContent i, XmlContent j, XmlContent k, XmlContent l
, XmlContent m, XmlContent n, XmlContent o ) =>
XmlContent (a,b,c,d,e,f,g,h,i,j,k,l,m,n,o) where
toContents (a,b,c,d,e,f,g,h,i,j,k,l,m,n,o)
= toContents a ++ toContents b ++ toContents c ++ toContents d
++ toContents e ++ toContents f ++ toContents g ++ toContents h
++ toContents i ++ toContents j ++ toContents k ++ toContents l
++ toContents m ++ toContents n ++ toContents o
parseContents = do
{ a <- parseContents
; b <- parseContents
; c <- parseContents
; d <- parseContents
; e <- parseContents
; f <- parseContents
; g <- parseContents
; h <- parseContents
; i <- parseContents
; j <- parseContents
; k <- parseContents
; l <- parseContents
; m <- parseContents
; n <- parseContents
; o <- parseContents
; return (a,b,c,d,e,f,g,h,i,j,k,l,m,n,o)
}
data Defaultable a = Default a | NonDefault a deriving (Eq,Show)
searchMaybe :: (a -> Maybe b) -> [a] -> Maybe b
searchMaybe _ [] = Nothing
searchMaybe f (x:xs) =
let fx = f x in
case fx of
Nothing -> searchMaybe f xs
(Just _) -> fx
maybeToAttr :: (String->a->Maybe Attribute) -> String -> Maybe a
-> Maybe Attribute
maybeToAttr _ _ Nothing = Nothing
maybeToAttr to n (Just v) = to n v
defaultToAttr :: (String->a->Maybe Attribute) -> String -> Defaultable a
-> Maybe Attribute
defaultToAttr _ _ (Default _) = Nothing
defaultToAttr to n (NonDefault v) = to n v
definiteA :: (String->Attribute->Maybe a) -> String -> String
-> [Attribute] -> a
definiteA from tag at as =
case searchMaybe (from at) as of
Nothing -> error ("missing attribute "++at++" in tag <"++tag++">")
(Just a) -> a
defaultA :: (String->Attribute->Maybe a) -> a -> String
-> [Attribute] -> Defaultable a
defaultA from def at as =
case searchMaybe (from at) as of
Nothing -> Default def
(Just a) -> NonDefault a
possibleA :: (String->Attribute->Maybe a) -> String -> [Attribute] -> Maybe a
possibleA from at as = searchMaybe (from at) as
fromAttrToStr :: String -> Attribute -> Maybe String
fromAttrToStr n (n0,v)
| n == n0 = Just (attr2str v)
| otherwise = Nothing
toAttrFrStr :: String -> String -> Maybe Attribute
toAttrFrStr n v = Just (n, str2attr v)
str2attr :: String -> AttValue
str2attr s =
let f t =
let (l,r) = span (\c-> not (elem c "\"&<>'")) t
in if null r then [Left l]
else Left l: Right (g (head r)): f (tail r)
g '"' = RefEntity "quot"
g '&' = RefEntity "amp"
g '<' = RefEntity "lt"
g '>' = RefEntity "gt"
g '\'' = RefEntity "apos"
in AttValue (f s)
attr2str :: AttValue -> String
attr2str (AttValue xs) =
let f (Left s) = s
f (Right (RefChar i)) = [chr i]
f (Right (RefEntity "quot")) = "\""
f (Right (RefEntity "amp")) = "&"
f (Right (RefEntity "lt")) = "<"
f (Right (RefEntity "gt")) = ">"
f (Right (RefEntity "apos")) = "'"
f (Right _) = "*"
in concatMap f xs
data ANYContent = forall a . (XmlContent a, Show a) => ANYContent a
| UnConverted [Content Posn]
instance Show ANYContent where
show (UnConverted c) = "UnConverted " ++ (show $ map verbatim c)
show (ANYContent a) = "ANYContent " ++ (show a)
instance Eq ANYContent where
a == b = show a == show b
data List1 a = NonEmpty [a] deriving (Eq, Show)
instance (HTypeable a) => HTypeable (List1 a) where
toHType m = Defined "List1" [hx]
[Constr "NonEmpty" [hx] [List hx] ]
where (NonEmpty x) = m
hx = toHType x
instance (XmlContent a) => XmlContent (List1 a) where
toContents (NonEmpty xs) = concatMap toContents xs
parseContents = fmap NonEmpty $ many1 parseContents
instance HTypeable ANYContent where
toHType _ = Prim "ANYContent" "ANY"
instance XmlContent ANYContent where
toContents (ANYContent a) = toContents a
toContents (UnConverted s) = map (fmap (const ())) s
parseContents = P (\cs -> Success [] (UnConverted cs))