module Text.XML.HaXml.Validate
( validate
, partialValidate
) where
import Prelude hiding (elem,rem,mod,sequence)
import qualified Prelude (elem)
import Text.XML.HaXml.Types
import Text.XML.HaXml.Combinators (multi,tag,iffind,literal,none,o)
import Text.XML.HaXml.XmlContent (attr2str)
import Maybe (fromMaybe,isNothing,fromJust)
import List (intersperse,nub,(\\))
import Char (isSpace)
#if __GLASGOW_HASKELL__ >= 604 || __NHC__ >= 118 || defined(__HUGS__)
import qualified Data.Map as Map
type FiniteMap a b = Map.Map a b
listToFM :: Ord a => [(a,b)] -> FiniteMap a b
listToFM = Map.fromList
lookupFM :: Ord a => FiniteMap a b -> a -> Maybe b
lookupFM = flip Map.lookup
#elif __GLASGOW_HASKELL__ >= 504 || __NHC__ > 114
import Data.FiniteMap
#else
type FiniteMap a b = [(a,b)]
listToFM :: Eq a => [(a,b)] -> FiniteMap a b
listToFM = id
lookupFM :: Eq a => FiniteMap a b -> a -> Maybe b
lookupFM fm k = lookup k fm
#endif
data SimpleDTD = SimpleDTD
{ elements :: FiniteMap Name ContentSpec
, attributes :: FiniteMap (Name,Name) AttType
, required :: FiniteMap Name [Name]
, ids :: [(Name,Name)]
, idrefs :: [(Name,Name)]
}
simplifyDTD :: DocTypeDecl -> SimpleDTD
simplifyDTD (DTD _ _ decls) =
SimpleDTD
{ elements = listToFM [ (name,content)
| Element (ElementDecl name content) <- decls ]
, attributes = listToFM [ ((elem,attr),typ)
| AttList (AttListDecl elem attdefs) <- decls
, AttDef attr typ _ <- attdefs ]
, required = listToFM [ (elem, [ attr
| AttDef attr _ REQUIRED <- attdefs ])
| AttList (AttListDecl elem attdefs) <- decls ]
, ids = [ (elem,attr)
| Element (ElementDecl elem _) <- decls
, AttList (AttListDecl name attdefs) <- decls
, elem == name
, AttDef attr (TokenizedType ID) _ <- attdefs ]
, idrefs = []
}
gives :: Bool -> a -> [a]
True `gives` x = [x]
False `gives` _ = []
validate :: DocTypeDecl -> Element i -> [String]
validate dtd' elem = root dtd' elem ++ partialValidate dtd' elem
where
root (DTD name _ _) (Elem name' _ _) =
(name/=name') `gives` ("Document type should be <"++name
++"> but appears to be <"++name'++">.")
partialValidate :: DocTypeDecl -> Element i -> [String]
partialValidate dtd' elem = valid elem ++ checkIDs elem
where
dtd = simplifyDTD dtd'
valid (Elem name attrs contents) =
let spec = lookupFM (elements dtd) name in
(isNothing spec) `gives` ("Element <"++name++"> not known.")
++ (let dups = duplicates (map fst attrs) in
not (null dups) `gives`
("Element <"++name++"> has duplicate attributes: "
++concat (intersperse "," dups)++"."))
++ concatMap (checkAttr name) attrs
++ concatMap (checkRequired name attrs)
(fromMaybe [] (lookupFM (required dtd) name))
++ checkContentSpec name (fromMaybe ANY spec) contents
++ concatMap valid [ elem | CElem elem _ <- contents ]
checkAttr elem (attr, val) =
let typ = lookupFM (attributes dtd) (elem,attr)
attval = attr2str val in
if isNothing typ then ["Attribute \""++attr
++"\" not known for element <"++elem++">."]
else
case fromJust typ of
EnumeratedType e ->
case e of
Enumeration es ->
(not (attval `Prelude.elem` es)) `gives`
("Value \""++attval++"\" of attribute \""
++attr++"\" in element <"++elem
++"> is not in the required enumeration range: "
++unwords es)
_ -> []
_ -> []
checkRequired elem attrs req =
(not (req `Prelude.elem` map fst attrs)) `gives`
("Element <"++elem++"> requires the attribute \""++req
++"\" but it is missing.")
checkContentSpec _elem ANY _ = []
checkContentSpec _elem EMPTY [] = []
checkContentSpec elem EMPTY (_:_) =
["Element <"++elem++"> is not empty but should be."]
checkContentSpec elem (Mixed PCDATA) cs = concatMap (checkMixed elem []) cs
checkContentSpec elem (Mixed (PCDATAplus names)) cs =
concatMap (checkMixed elem names) cs
checkContentSpec elem (ContentSpec cp) cs = excludeText elem cs ++
(let (errs,rest) = checkCP elem cp (flatten cs) in
case rest of [] -> errs
_ -> errs++["Element <"++elem++"> contains extra "
++"elements beyond its content spec."])
checkMixed elem permitted (CElem (Elem name _ _) _)
| not (name `Prelude.elem` permitted) =
["Element <"++elem++"> contains an element <"++name
++"> but should not."]
checkMixed _elem _permitted _ = []
flatten (CElem (Elem name _ _) _: cs) = name: flatten cs
flatten (_: cs) = flatten cs
flatten [] = []
excludeText elem (CElem _ _: cs) = excludeText elem cs
excludeText elem (CMisc _ _: cs) = excludeText elem cs
excludeText elem (CString _ s _: cs) | all isSpace s = excludeText elem cs
excludeText elem (_: cs) =
["Element <"++elem++"> contains text/references but should not."]
excludeText _elem [] = []
checkCP :: Name -> CP -> [Name] -> ([String],[Name])
checkCP elem cp@(TagName n None) [] = (cpError elem cp, [])
checkCP elem cp@(TagName n None) (n':ns)
| n==n' = ([], ns)
| otherwise = (cpError elem cp, n':ns)
checkCP _elem cp@(TagName n Query) [] = ([],[])
checkCP _elem cp@(TagName n Query) (n':ns)
| n==n' = ([], ns)
| otherwise = ([], n':ns)
checkCP _elem cp@(TagName n Star) [] = ([],[])
checkCP elem cp@(TagName n Star) (n':ns)
| n==n' = checkCP elem (TagName n Star) ns
| otherwise = ([], n':ns)
checkCP elem cp@(TagName n Plus) [] = (cpError elem cp, [])
checkCP elem cp@(TagName n Plus) (n':ns)
| n==n' = checkCP elem (TagName n Star) ns
| otherwise = (cpError elem cp, n':ns)
checkCP elem cp@(Choice cps None) ns =
let next = choice elem ns cps in
if null next then (cpError elem cp, ns)
else ([], head next)
checkCP _elem cp@(Choice cps Query) [] = ([],[])
checkCP elem cp@(Choice cps Query) ns =
let next = choice elem ns cps in
if null next then ([],ns)
else ([], head next)
checkCP _elem cp@(Choice cps Star) [] = ([],[])
checkCP elem cp@(Choice cps Star) ns =
let next = choice elem ns cps in
if null next then ([],ns)
else checkCP elem (Choice cps Star) (head next)
checkCP elem cp@(Choice cps Plus) [] = (cpError elem cp, [])
checkCP elem cp@(Choice cps Plus) ns =
let next = choice elem ns cps in
if null next then (cpError elem cp, ns)
else checkCP elem (Choice cps Star) (head next)
checkCP elem cp@(Seq cps None) ns =
let (errs,next) = sequence elem ns cps in
if null errs then ([],next)
else (cpError elem cp++errs, ns)
checkCP _elem cp@(Seq cps Query) [] = ([],[])
checkCP elem cp@(Seq cps Query) ns =
let (errs,next) = sequence elem ns cps in
if null errs then ([],next)
else ([], ns)
checkCP _elem cp@(Seq cps Star) [] = ([],[])
checkCP elem cp@(Seq cps Star) ns =
let (errs,next) = sequence elem ns cps in
if null errs then checkCP elem (Seq cps Star) next
else ([], ns)
checkCP elem cp@(Seq cps Plus) [] = (cpError elem cp, [])
checkCP elem cp@(Seq cps Plus) ns =
let (errs,next) = sequence elem ns cps in
if null errs then checkCP elem (Seq cps Star) next
else (cpError elem cp++errs, ns)
choice elem ns cps =
[ rem | ([],rem) <- map (\cp-> checkCP elem (definite cp) ns) cps ]
++ [ ns | all possEmpty cps ]
where definite (TagName n Query) = TagName n None
definite (Choice cps Query) = Choice cps None
definite (Seq cps Query) = Seq cps None
definite (TagName n Star) = TagName n Plus
definite (Choice cps Star) = Choice cps Plus
definite (Seq cps Star) = Seq cps Plus
definite x = x
possEmpty (TagName _ mod) = mod `Prelude.elem` [Query,Star]
possEmpty (Choice cps None) = all possEmpty cps
possEmpty (Choice _ mod) = mod `Prelude.elem` [Query,Star]
possEmpty (Seq cps None) = all possEmpty cps
possEmpty (Seq _ mod) = mod `Prelude.elem` [Query,Star]
sequence elem ns cps =
foldl (\(es,ns) cp-> let (es',ns') = checkCP elem cp ns
in (es++es', ns'))
([],ns) cps
checkIDs elem =
let celem = CElem elem undefined
showAttr a = iffind a literal none
idElems = concatMap (\(name,at)-> multi (showAttr at `o` tag name)
celem)
(ids dtd)
badIds = duplicates (map (\(CString _ s _)->s) idElems)
in not (null badIds) `gives`
("These attribute values of type ID are not unique: "
++concat (intersperse "," badIds)++".")
cpError :: Name -> CP -> [String]
cpError elem cp =
["Element <"++elem++"> should contain "++display cp++" but does not."]
display :: CP -> String
display (TagName name mod) = name ++ modifier mod
display (Choice cps mod) = "(" ++ concat (intersperse "|" (map display cps))
++ ")" ++ modifier mod
display (Seq cps mod) = "(" ++ concat (intersperse "," (map display cps))
++ ")" ++ modifier mod
modifier :: Modifier -> String
modifier None = ""
modifier Query = "?"
modifier Star = "*"
modifier Plus = "+"
duplicates :: Eq a => [a] -> [a]
duplicates xs = xs \\ (nub xs)