module Text.XML.HXT.Parser.XmlTokenParser
( allBut
, allBut1
, asciiLetter
, attrValue
, bar
, charRef
, comma
, dq
, encName
, entityRef
, entityValue
, eq
, gt
, keyword
, keywords
, lpar
, lt
, name
, names
, ncName
, nmtoken
, nmtokens
, peReference
, pubidLiteral
, qName
, quoted
, reference
, rpar
, semi
, separator
, singleChar
, singleChars
, skipS
, skipS0
, sPace
, sPace0
, sq
, systemLiteral
, versionNum
, concRes
, mkList
, nameT
, nmtokenT
, entityValueT
, entityTokensT
, entityCharT
, attrValueT
, attrValueT'
, referenceT
, charRefT
, entityRefT
, peReferenceT
, singleCharsT
)
where
import Text.ParserCombinators.Parsec
import Text.XML.HXT.DOM.Interface
import Text.XML.HXT.DOM.XmlNode
( mkDTDElem
, mkText
, mkCharRef
, mkEntityRef
)
import Text.XML.HXT.DOM.Unicode
( isXmlChar
, intToCharRef
, intToCharRefHex
)
import Text.XML.HXT.DOM.Util
( hexStringToInt
, decimalStringToInt
)
import Text.XML.HXT.Parser.XmlCharParser
( xmlNameChar
, xmlNameStartChar
, xmlNCNameChar
, xmlNCNameStartChar
, xmlSpaceChar
)
sPace :: GenParser Char state String
sPace
= many1 xmlSpaceChar
sPace0 :: GenParser Char state String
sPace0
= many xmlSpaceChar
skipS :: GenParser Char state ()
skipS
= skipMany1 xmlSpaceChar
skipS0 :: GenParser Char state ()
skipS0
= skipMany xmlSpaceChar
asciiLetter :: GenParser Char state Char
asciiLetter
= satisfy (\ c -> ( c >= 'A' && c <= 'Z' ||
c >= 'a' && c <= 'z' )
)
<?> "ASCII letter"
name :: GenParser Char state String
name
= try ( do
s1 <- xmlNameStartChar
sl <- many xmlNameChar
return (s1 : sl)
)
<?> "Name"
ncName :: GenParser Char state String
ncName
= try ( do
s1 <- xmlNCNameStartChar
sl <- many xmlNCNameChar
return (s1 : sl)
)
<?> "NCName"
qName :: GenParser Char state (String, String)
qName
= do
s1 <- ncName
s2 <- option "" ( do
char ':'
ncName
)
return ( if null s2 then (s2, s1) else (s1, s2) )
nmtoken :: GenParser Char state String
nmtoken
= try (many1 xmlNameChar)
<?> "Nmtoken"
names :: GenParser Char state [String]
names
= sepBy1 name sPace
nmtokens :: GenParser Char state [String]
nmtokens
= sepBy1 nmtoken sPace
singleChar :: String -> GenParser Char state Char
singleChar notAllowed
= satisfy (\ c -> isXmlChar c && not (c `elem` notAllowed))
singleChars :: String -> GenParser Char state String
singleChars notAllowed
= many1 (singleChar notAllowed)
entityValue :: GenParser Char state String
entityValue
= attrValue
attrValueDQ :: GenParser Char state String
attrValueDQ
= between dq dq (concRes $ many $ attrChar "<&\"")
attrValueSQ :: GenParser Char state String
attrValueSQ
= between sq sq (concRes $ many $ attrChar "<&\'")
attrValue :: GenParser Char state String
attrValue
= ( do
v <- attrValueDQ
return ("\"" ++ v ++ "\"")
)
<|>
( do
v <- attrValueSQ
return ("'" ++ v ++ "'")
)
<?> "attribute value (in quotes)"
attrChar :: String -> GenParser Char state String
attrChar notAllowed
= reference
<|>
mkList (singleChar notAllowed)
<?> "legal attribute character or reference"
systemLiteral :: GenParser Char state String
systemLiteral
= between dq dq (many $ noneOf "\"")
<|>
between sq sq (many $ noneOf "\'")
<?> "system literal (in quotes)"
pubidLiteral :: GenParser Char state String
pubidLiteral
= between dq dq (many $ pubidChar "\'")
<|>
between sq sq (many $ pubidChar "")
<?> "pubid literal (in quotes)"
where
pubidChar :: String -> GenParser Char state Char
pubidChar quoteChars
= asciiLetter
<|>
digit
<|>
oneOf " \r\n"
<|>
oneOf "-()+,./:=?;!*#@$_%"
<|>
oneOf quoteChars
reference :: GenParser Char state String
reference
= ( do
i <- charRef
return ("&#" ++ show i ++ ";")
)
<|>
( do
n <- entityRef
return ("&" ++ n ++ ";")
)
checkCharRef :: Int -> GenParser Char state Int
checkCharRef i
= if ( i <= fromEnum (maxBound::Char)
&& isXmlChar (toEnum i)
)
then return i
else unexpected ("illegal value in character reference: " ++ intToCharRef i ++ " , in hex: " ++ intToCharRefHex i)
charRef :: GenParser Char state Int
charRef
= do
try (string "&#x")
d <- many1 hexDigit
semi
checkCharRef (hexStringToInt d)
<|>
do
try (string "&#")
d <- many1 digit
semi
checkCharRef (decimalStringToInt d)
<?> "character reference"
entityRef :: GenParser Char state String
entityRef
= do
char '&'
n <- name
semi
return n
<?> "entity reference"
peReference :: GenParser Char state String
peReference
= try ( do
char '%'
n <- name
semi
return n
)
<?> "parameter-entity reference"
encName :: GenParser Char state String
encName
= do
c <- asciiLetter
r <- many (asciiLetter <|> digit <|> oneOf "._-")
return (c:r)
versionNum :: GenParser Char state String
versionNum
= many1 xmlNameChar
keyword :: String -> GenParser Char state String
keyword kw
= try ( do
n <- name
if n == kw
then return n
else unexpected n
)
<?> kw
keywords :: [String] -> GenParser Char state String
keywords
= foldr1 (<|>) . map keyword
quoted :: GenParser Char state a -> GenParser Char state a
quoted p
= between dq dq p
<|>
between sq sq p
dq, sq, lt, gt, semi :: GenParser Char state Char
dq = char '\"'
sq = char '\''
lt = char '<'
gt = char '>'
semi = char ';'
separator :: Char -> GenParser Char state ()
separator c
= do
try ( do
skipS0
char c
)
skipS0
<?> [c]
bar, comma, eq, lpar, rpar :: GenParser Char state ()
bar = separator '|'
comma = separator ','
eq = separator '='
lpar
= do
char '('
skipS0
rpar
= do
skipS0
char ')'
return ()
allBut :: (GenParser Char state Char -> GenParser Char state String) -> String -> GenParser Char state String
allBut p str
= allBut1 p (const True) str
allBut1 :: (GenParser Char state Char -> GenParser Char state String) -> (Char -> Bool) -> String -> GenParser Char state String
allBut1 p prd (c:rest)
= p ( satisfy (\ x -> isXmlChar x && prd x && not (x == c) )
<|>
try ( do
char c
notFollowedBy ( do
try (string rest)
return c
)
return c
)
)
allBut1 _p _prd str
= error ("allBut1 _ _ " ++ show str ++ " is undefined")
concRes :: GenParser Char state [[a]] -> GenParser Char state [a]
concRes p
= do
sl <- p
return (concat sl)
mkList :: GenParser Char state a -> GenParser Char state [a]
mkList p
= do
r <- p
return [r]
nameT :: GenParser Char state XmlTree
nameT
= do
n <- name
return (mkDTDElem NAME [(a_name, n)] [])
nmtokenT :: GenParser Char state XmlTree
nmtokenT
= do
n <- nmtoken
return (mkDTDElem NAME [(a_name, n)] [])
entityValueT :: GenParser Char state XmlTrees
entityValueT
= do
sl <- between dq dq (entityTokensT "%&\"")
return sl
<|>
do
sl <- between sq sq (entityTokensT "%&\'")
return sl
<?> "entity value (in quotes)"
entityTokensT :: String -> GenParser Char state XmlTrees
entityTokensT notAllowed
= many (entityCharT notAllowed)
entityCharT :: String -> GenParser Char state XmlTree
entityCharT notAllowed
= peReferenceT
<|>
charRefT
<|>
bypassedEntityRefT
<|>
( do
cs <- many1 (singleChar notAllowed)
return (mkText cs)
)
attrValueT :: GenParser Char state XmlTrees
attrValueT
= between dq dq (attrValueT' "<&\"")
<|>
between sq sq (attrValueT' "<&\'")
<?> "attribute value (in quotes)"
attrValueT' :: String -> GenParser Char state XmlTrees
attrValueT' notAllowed
= many ( referenceT <|> singleCharsT notAllowed)
singleCharsT :: String -> GenParser Char state XmlTree
singleCharsT notAllowed
= do
cs <- singleChars notAllowed
return (mkText cs)
referenceT :: GenParser Char state XmlTree
referenceT
= charRefT
<|>
entityRefT
charRefT :: GenParser Char state XmlTree
charRefT
= do
i <- charRef
return $! (mkCharRef $! i)
entityRefT :: GenParser Char state XmlTree
entityRefT
= do
n <- entityRef
return $! (mkEntityRef $! n)
bypassedEntityRefT :: GenParser Char state XmlTree
bypassedEntityRefT
= do
n <- entityRef
return $! (mkText ("&" ++ n ++ ";"))
peReferenceT :: GenParser Char state XmlTree
peReferenceT
= do
r <- peReference
return $! (mkDTDElem PEREF [(a_peref, r)] [])