module Text.XML.HXT.Parser.XmlDTDParser
( parseXmlDTDdecl
, parseXmlDTDdeclPart
, parseXmlDTDEntityValue
, elementDecl
, attlistDecl
, entityDecl
, notationDecl
)
where
import Data.Maybe
import Text.ParserCombinators.Parsec
import Text.ParserCombinators.Parsec.Pos
import Text.XML.HXT.DOM.XmlTree hiding (choice)
import Text.XML.HXT.DOM.Util (stringToUpper)
import qualified Text.XML.HXT.Parser.XmlTokenParser as XT
import qualified Text.XML.HXT.Parser.XmlCharParser as XC(xmlSpaceChar)
import qualified Text.XML.HXT.Parser.XmlDTDTokenParser as XD(dtdToken)
type LocalState = (Int, [(Int, String, SourcePos)])
type SParser a = GenParser Char LocalState a
initialLocalState :: SourcePos -> LocalState
initialLocalState p = (0, [(0, sourceName p, p)])
pushPar :: String -> SParser ()
pushPar n = do
p <- getPosition
updateState (\ (i, s) -> (i+1, (i+1, n, p) : s))
setPosition ( newPos (sourceName p ++ " (line " ++ show (sourceLine p) ++ ", column " ++ show (sourceColumn p) ++ ") in content of parameter entity ref %" ++ n ++ ";") 1 1)
popPar :: SParser ()
popPar = do
oldPos <- getPos
updateState pop
setPosition oldPos
where
pop (i, [(_, s, p)]) = (i+1, [(i+1, s, p)])
pop (i, _t:s) = (i, s)
pop (_i, []) = undefined
getParNo :: SParser Int
getParNo = do
(_i, (top, _n, _p) : _s) <- getState
return top
getPos :: SParser SourcePos
getPos = do
(_i, (_top, _n, p) : _s) <- getState
return p
delPE :: SParser ()
delPE = do
char '\0'
return ()
startPE :: SParser ()
startPE
= do
try ( do
delPE
n <- many1 (satisfy (/= '\0'))
delPE
pushPar n
)
endPE :: SParser ()
endPE
= do
try (do
delPE
delPE
popPar
)
inSamePE :: SParser a -> SParser a
inSamePE p
= do
i <- getParNo
r <- p
j <- getParNo
if (i == j)
then return r
else fail $ "parameter entity contents does not fit into the structure of a DTD declarations"
xmlSpaceChar :: SParser ()
xmlSpaceChar = ( do
XC.xmlSpaceChar
return ()
)
<|>
startPE
<|>
endPE
<?> "white space"
skipS :: SParser ()
skipS
= do
skipMany1 xmlSpaceChar
return ()
skipS0 :: SParser ()
skipS0
= do
skipMany xmlSpaceChar
return ()
name :: SParser XmlTree
name
= do
n <- XT.name
return (mkXDTDTree NAME [(a_name, n)] [])
nmtoken :: SParser XmlTree
nmtoken
= do
n <- XT.nmtoken
return (mkXDTDTree NAME [(a_name, n)] [])
elementDecl :: SParser XmlTrees
elementDecl
= between (try $ string "<!ELEMENT") (char '>') elementDeclBody
elementDeclBody :: SParser XmlTrees
elementDeclBody
= do
skipS
n <- XT.name
skipS
(al, cl) <- contentspec
skipS0
return [mkXDTDTree ELEMENT ((a_name, n) : al) cl]
contentspec :: SParser (Attributes, XmlTrees)
contentspec
= simplespec k_empty v_empty
<|>
simplespec k_any v_any
<|>
inSamePE mixed
<|>
inSamePE children
<?> "content specification"
where
simplespec kw v
= do
XT.keyword kw
return ([(a_type, v)], [])
children :: SParser (Attributes, XmlTrees)
children
= ( do
(al, cl) <- choiceOrSeq
modifier <- optOrRep
return ([(a_type, v_children)], [mkXDTDTree CONTENT (modifier ++ al) cl])
)
<?> "element content"
optOrRep :: SParser Attributes
optOrRep
= do
m <- option "" (XT.mkList (oneOf "?*+"))
return [(a_modifier, m)]
choiceOrSeq :: SParser (Attributes, XmlTrees)
choiceOrSeq
= inSamePE $
do
cl <- try ( do
lpar
choiceOrSeqBody
)
rpar
return cl
choiceOrSeqBody :: SParser (Attributes, XmlTrees)
choiceOrSeqBody
= do
cp1 <- cp
choiceOrSeq1 cp1
where
choiceOrSeq1 :: XmlTree -> SParser (Attributes, XmlTrees)
choiceOrSeq1 c1
= ( do
bar
c2 <- cp
cl <- many ( do
bar
cp
)
return ([(a_kind, v_choice)], (c1 : c2 : cl))
)
<|>
( do
cl <- many ( do
comma
cp
)
return ([(a_kind, v_seq)], (c1 : cl))
)
<?> "sequence or choice"
cp :: SParser XmlTree
cp
= ( do
n <- name
m <- optOrRep
return ( case m of
[(_, "")] -> n
_ -> mkXDTDTree CONTENT (m ++ [(a_kind, v_seq)]) [n]
)
)
<|>
( do
(al, cl) <- choiceOrSeq
m <- optOrRep
return (mkXDTDTree CONTENT (m ++ al) cl)
)
mixed :: SParser (Attributes, XmlTrees)
mixed
= ( do
try ( do
lpar
string k_pcdata
)
nl <- many ( do
bar
name
)
rpar
if null nl
then do
option ' ' (char '*')
return ( [ (a_type, v_pcdata) ]
, []
)
else do
char '*' <?> "closing parent for mixed content (\")*\")"
return ( [ (a_type, v_mixed) ]
, [ mkXDTDTree CONTENT [ (a_modifier, "*")
, (a_kind, v_choice)
] nl
]
)
)
<?> "mixed content"
attlistDecl :: SParser XmlTrees
attlistDecl
= between (try $ string "<!ATTLIST") (char '>') attlistDeclBody
attlistDeclBody :: SParser XmlTrees
attlistDeclBody
= do
skipS
n <- XT.name
al <- many attDef
skipS0
return (map (mkDTree n) al)
where
mkDTree n' (al, cl)
= mkXDTDTree ATTLIST ((a_name, n') : al) cl
attDef :: SParser (Attributes, XmlTrees)
attDef
= do
n <- try ( do
skipS
XT.name
) <?> "attribute name"
skipS
(t, cl) <- attType
skipS
d <- defaultDecl
return (((a_value, n) : d) ++ t, cl)
attType :: SParser (Attributes, XmlTrees)
attType
= tokenizedOrStringType
<|>
enumeration
<|>
notationType
<?> "attribute type"
tokenizedOrStringType :: SParser (Attributes, XmlTrees)
tokenizedOrStringType
= do
n <- choice $ map XT.keyword typl
return ([(a_type, n)], [])
where
typl = [ k_cdata
, k_idrefs
, k_idref
, k_id
, k_enitity
, k_entities
, k_nmtokens
, k_nmtoken
]
enumeration :: SParser (Attributes, XmlTrees)
enumeration
= do
nl <- inSamePE (between lpar rpar (sepBy1 nmtoken bar))
return ([(a_type, k_enumeration)], nl)
notationType :: SParser (Attributes, XmlTrees)
notationType
= do
XT.keyword k_notation
skipS
nl <- inSamePE (between lpar rpar ( sepBy1 name bar ))
return ([(a_type, k_notation)], nl)
defaultDecl :: SParser Attributes
defaultDecl
= ( do
str <- try $ string k_required
return [(a_kind, str)]
)
<|>
( do
str <- try $ string k_implied
return [(a_kind, str)]
)
<|>
( do
l <- fixed
v <- XT.attrValueT
return ((a_default, xshow v) : l)
)
<?> "default declaration"
where
fixed = option [(a_kind, k_default)]
( do
try $ string k_fixed
skipS
return [(a_kind, k_fixed)]
)
entityDecl :: SParser XmlTrees
entityDecl
= between ( try $ string "<!ENTITY" ) (char '>') entityDeclBody
entityDeclBody :: SParser XmlTrees
entityDeclBody
= do
skipS
( peDecl
<|>
geDecl
<?> "entity declaration" )
geDecl :: SParser XmlTrees
geDecl
= do
n <- XT.name
skipS
(al, cl) <- entityDef
skipS0
return [mkXDTDTree ENTITY ((a_name, n) : al) cl]
entityDef :: SParser (Attributes, XmlTrees)
entityDef
= entityValue
<|>
externalEntitySpec
externalEntitySpec :: SParser (Attributes, XmlTrees)
externalEntitySpec
= do
al <- externalID
nd <- option [] nDataDecl
return ((al ++ nd), [])
peDecl :: SParser XmlTrees
peDecl
= do
char '%'
skipS
n <- XT.name
skipS
(al, cs) <- peDef
skipS0
return [mkXDTDTree PENTITY ((a_name, n) : al) cs]
peDef :: SParser (Attributes, XmlTrees)
peDef
= entityValue
<|>
do
al <- externalID
return (al, [])
entityValue :: GenParser Char state (Attributes, XmlTrees)
entityValue
= do
v <- XT.entityValueT
return ([], v)
externalID :: SParser Attributes
externalID
= ( do
XT.keyword k_system
skipS
lit <- XT.systemLiteral
return [(k_system, lit)]
)
<|>
( do
XT.keyword k_public
skipS
pl <- XT.pubidLiteral
skipS
sl <- XT.systemLiteral
return [ (k_system, sl)
, (k_public, pl) ]
)
<?> "SYSTEM or PUBLIC declaration"
nDataDecl :: SParser Attributes
nDataDecl
= do
try ( do
skipS
XT.keyword k_ndata
)
skipS
n <- XT.name
return [(k_ndata, n)]
notationDecl :: SParser XmlTrees
notationDecl
= between (try $ string "<!NOTATION") (char '>' <?> "notation declaration") notationDeclBody
notationDeclBody :: SParser XmlTrees
notationDeclBody
= do
skipS
n <- XT.name
skipS
eid <- ( try externalID
<|>
publicID
)
skipS0
return [mkXDTDTree NOTATION ((a_name, n) : eid) []]
publicID :: SParser Attributes
publicID
= do
XT.keyword k_public
skipS
l <- XT.pubidLiteral
return [(k_public, l)]
condSectCondBody :: SParser XmlTrees
condSectCondBody
= do
skipS0
n <- XT.name
skipS0
let n' = stringToUpper n
if n' `elem` [k_include, k_ignore]
then return (xtext n')
else fail $ "INCLUDE or IGNORE expected in conditional section"
separator :: Char -> SParser ()
separator c
= do
try ( do
skipS0
char c
)
skipS0
<?> [c]
bar, comma, lpar, rpar :: SParser ()
bar = separator '|'
comma = separator ','
lpar
= do
char '('
skipS0
rpar
= do
skipS0
char ')'
return ()
parseXmlDTDEntityValue :: XmlFilter
parseXmlDTDEntityValue (NTree (XDTD PEREF al) cl)
= ( either (xerr . (++ "\n") . show)
(\cl' -> if null cl'
then [mkXTextTree ""]
else cl'
)
.
parse parser source
) input
where
parser = XT.entityTokensT "%&"
source = "value of parameter entity " ++ lookupDef "" a_peref al
input = xshow cl
parseXmlDTDEntityValue n
= error ("parseXmlDTDEntityValue: illegal argument: " ++ show n)
parseXmlDTDdeclPart :: XmlFilter
parseXmlDTDdeclPart n@(NTree (XDTD PEREF al) cl)
= ( either (xerr . (++ "\n") . show)
(\cl' -> replaceChildren cl' n)
.
parse parser source
) input
where
parser = many XD.dtdToken
source = "value of parameter entity " ++ lookupDef "" a_peref al
input = xshow cl
parseXmlDTDdeclPart n
= error ("parseXmlDTDdeclPart: illegal argument: " ++ show n)
parseXmlDTDdecl :: XmlFilter
parseXmlDTDdecl (NTree (XDTD dtdElem al) cl)
= ( either (xerr . (++ "\n") . show) id
.
runParser parser (initialLocalState pos) source
) input
where
dtdParsers
= [ (ELEMENT, elementDeclBody)
, (ATTLIST, attlistDeclBody)
, (ENTITY, entityDeclBody)
, (NOTATION, notationDeclBody)
, (CONDSECT, condSectCondBody)
]
source = lookupDef "DTD declaration" a_source al
line = lookupDef "1" a_line al
column = lookupDef "1" a_column al
pos = newPos source (read line) (read column)
parser = do
setPosition pos
res <- fromJust . lookup dtdElem $ dtdParsers
eof
return res
input = concatMap collectText cl
parseXmlDTDdecl _
= []
collectText :: XmlTree -> String
collectText (NTree (XText s) _)
= s
collectText (NTree (XDTD PEREF al) cl)
= prefixPe ++ concatMap collectText cl ++ suffixPe
where
delPe = "\0"
prefixPe = delPe ++ lookupDef "???" a_peref al ++ delPe
suffixPe = delPe ++ delPe
collectText _
= ""