module Text.XML.HXQ.Types where
import Char(isDigit,isSpace)
import List(isSuffixOf,inits,tails)
import Text.XML.HXQ.Parser
import Text.XML.HXQ.XTree
import XMLParse(XMLEvent(..),parseDocument)
import HXML(Name,AttList)
buildInTypes :: [(String,String)]
buildInTypes
= [("untypedAtomic","anyAtomicType"),
("dateTime","anyAtomicType"),
("date","anyAtomicType"),
("time","anyAtomicType"),
("duration","anyAtomicType"),
("float","anyAtomicType"),
("double","anyAtomicType"),
("decimal","anyAtomicType"),
("gYearMonth","anyAtomicType"),
("gYear","anyAtomicType"),
("gMonthDay","anyAtomicType"),
("gDay","anyAtomicType"),
("gMonth","anyAtomicType"),
("boolean","anyAtomicType"),
("base64Binary","anyAtomicType"),
("hexBinary","anyAtomicType"),
("anyURI","anyAtomicType"),
("QName","anyAtomicType"),
("NOTATION","anyAtomicType"),
("yearMonthDuration","duration"),
("dayTmeDuration","duration"),
("integer","decimal"),
("nonPositiveInteger","integer"),
("negativeInteger","nonPositiveInteger"),
("long","integer"),
("int","long"),
("short","int"),
("byte","short"),
("nonNegativeInteger","integer"),
("unsignedLong","nonNegativeInteger"),
("unsignedInt","unsignedLong"),
("unsignedShort","unsignedInt"),
("unsignedByte","unsignedShort"),
("positiveInteger","nonNegativeInteger"),
("string","anyAtomicType"),
("normalizedString","string"),
("token","normalizedString"),
("language","token"),
("NMTOKEN","token"),
("Name","token"),
("NCName","Name"),
("ID","NCName"),
("IDREF","NCName"),
("ENTITY","NCName")]
typeHierarchy :: [(String,String)]
typeHierarchy
= [("anyAtomicType","item"),
("node","item"),
("attribute","node"),
("comment","node"),
("document","node"),
("element","node"),
("processing-instruction","node"),
("text","node")]
isBuildInType :: String -> Bool
isBuildInType "anyAtomicType" = True
isBuildInType "numeric" = True
isBuildInType name = memA name buildInTypes
baseType s = TBase (QName "xs" xsNamespace s)
tString = baseType "string"
tInt = baseType "integer"
tFloat = baseType "float"
tBool = baseType "boolean"
tAtomic = baseType "anyAtomicType"
tUAtomic = baseType "untypedAtomic"
tItem = TItem "item"
tNode = TItem "node"
tText = TItem "text"
tStar t = TQualified t '*'
tPlus t = TQualified t '+'
tOptional t = TQualified t '?'
tNumeric = TBase (QName "" xsNamespace "numeric")
findA var ((n,b):_) | n==var = b
findA var (_:xs) = findA var xs
findA var _ = error ("Undefined variable: "++var)
memA var ((n,_):_) | n==var = True
memA var (_:xs) = memA var xs
memA _ _ = False
toString :: XTree -> String
toString x
= case x of
XElem _ _ _ _ zs
-> concatMap toString zs
XAttr _ v -> v
XText x -> x
XInt n -> show n
XFloat n -> show n
XBool b -> if b then "true" else "false"
_ -> ""
readNum :: String -> Maybe XTree
readNum cs
= let readInt ('+':rest) = span isDigit rest
readInt ('-':rest) = let (s,rest1) = span isDigit rest
in ('-':s,rest1)
readInt rest = span isDigit rest
readExp ('e':cs) = readInt cs
readExp ('E':cs) = readInt cs
readExp cs = ("",cs)
(si,rest) = readInt cs
in case rest of
'.':rest1
-> let (sd,rest2) = span isDigit rest1
in case readExp rest2 of
("",[]) -> Just $ XFloat (read $ si ++ "." ++ sd)
(exp,[]) -> Just $ XFloat (read $ si ++ "." ++ sd ++ "e" ++ exp)
_ -> Nothing
_ -> case readExp rest of
("",[]) -> Just $ XInt (read si)
(exp,[]) -> Just $ XFloat (read $ si ++ "e" ++ exp)
_ -> Nothing
toNum :: XTree -> Maybe XTree
toNum (XElem _ _ _ _ [x]) = toNum x
toNum (XText s) = readNum s
toNum (x@(XInt n)) = Just x
toNum (x@(XFloat n)) = Just x
toNum (XBool b) = Just $ XInt (if b then 1 else 0)
toNum (XAttr _ v) = toNum (XText v)
toNum _ = Nothing
toInt :: XTree -> Maybe XTree
toInt (XElem _ _ _ _ [x]) = toInt x
toInt (XText s) = case readNum s of
Just (XFloat n) -> Just $ XInt (floor n)
x -> x
toInt (x@(XInt n)) = Just x
toInt (XFloat n) = Just $ XInt (floor n)
toInt (XBool b) = Just $ XInt (if b then 1 else 0)
toInt (XAttr _ v) = toInt (XText v)
toInt _ = Nothing
toFloat :: XTree -> Maybe XTree
toFloat (XElem _ _ _ _ [x]) = toFloat x
toFloat (XText s) = case readNum s of
Just (XInt n) -> Just $ XFloat $ fromIntegral n
x -> x
toFloat (XInt n) = Just $ XFloat $ fromIntegral n
toFloat (x@(XFloat n)) = Just x
toFloat (XBool b) = Just $ XFloat (if b then 1 else 0)
toFloat (XAttr _ v) = toFloat (XText v)
toFloat _ = Nothing
toBool :: XTree -> Maybe XTree
toBool (XText s) = Just $ XBool $ s /= ""
toBool (XInt n) = Just $ XBool $ n /= 0
toBool (XFloat n) = Just $ XBool $ n /= 0
toBool (XAttr _ v) = Just $ XBool $ v /= ""
toBool (XBool b) = Just $ XBool b
toBool x = Nothing
casts :: [(String,XTree->Maybe XTree)]
casts = [("anyAtomicType",Just . id),
("string",Just . XText . toString),
("float",toFloat),
("integer",toInt),
("nonNegativeInteger",\x -> do XInt n <- toInt x
return $! XInt $ abs n),
("boolean",toBool)]
instanceOf :: XSeq -> Type -> Bool
instanceOf e tp
= case typeValidate tp e of
Success _ -> True
_ -> False
castAs :: XSeq -> Type -> XSeq
castAs [] (TQualified _ '?') = []
castAs [XElem _ _ _ _ xs] tp = castAs xs tp
castAs [x] (TBase (QName nm u tname))
| u == xsNamespace
= if memA tname casts
then case findA tname casts x of
Just v -> [v]
Nothing -> error $ "Value "++show x++" cannot be cast to the atomic type: "++tname
else if memA tname buildInTypes
then castAs [x] (TBase (QName nm u $ findA tname buildInTypes))
else error $ "Unrecognized build-in type: "++tname
castAs xs tp = error $ "Value "++show xs++" cannot be cast to the type "++show tp
castableAs :: XSeq -> Type -> Bool
castableAs [] (TQualified _ '?') = True
castableAs [XElem _ _ _ _ xs] tp = castableAs xs tp
castableAs [x] (TBase (QName nm u tname))
| u == xsNamespace
= if memA tname casts
then findA tname casts x /= Nothing
else if memA tname buildInTypes
then castableAs [x] (TBase (QName nm u $ findA tname buildInTypes))
else error $ "Unrecognized build-in type: "++tname
castableAs _ _ = False
expandQName :: QName -> NS -> Type
expandQName tag ns
= es $ expand tag
where expand (QName n uri ln)
= let u = if uri == "" then defaultElementNS ns else uri
in case lookup u (schemas ns) of
Just s -> case lookup ln s of
Just t -> t
_ -> error ("Tag "++show tag++" is not defined in XML Schema "++u)
_ -> error ("Undefined schema "++n++"="++u)
es (TNamed tag)
| uri tag /= xsNamespace
= es $ expand tag
es (TElement n t) = TElement n $! (es t)
es (TAttribute a t) = TAttribute a $! (es t)
es (TSequence t1 t2) = (TSequence $! (es t1)) $! (es t2)
es (TInterleaving t1 t2) = (TInterleaving $! (es t1)) $! (es t2)
es (TChoice t1 t2) = (TChoice $! (es t1)) $! (es t2)
es (TQualified t c) = (TQualified $! (es t)) c
es t = t
toType :: Ast -> NS -> Type
toType e ns
= case e of
Avar "any" -> tItem
Avar "numeric" -> tNumeric
Avar s -> TBase (tag s ns)
Ast "item" [] -> tItem
Ast "node" [] -> tNode
Ast "text" [] -> tText
Ast "empty-sequence" [] -> TEmpty
Ast "element" []
-> TElement "*" TAny
Ast "element" [Avar tag]
-> TElement tag TAny
Ast "element" [Avar tag,tp]
-> TElement tag (toType tp ns)
Ast "attribute" []
-> TAttribute "*" TAny
Ast "attribute" [Avar tag]
-> TAttribute tag TAny
Ast "attribute" [Avar tag,tp]
-> TAttribute tag (toType tp ns)
Ast "*" [tp]
-> TQualified (toType tp ns) '*'
Ast "+" [tp]
-> TQualified (toType tp ns) '+'
Ast "?" [tp]
-> TQualified (toType tp ns) '?'
Ast item []
-> TItem item
_ -> error $ "Unknown type "++show e
schema2Type :: XTree -> NS -> (Type,XMLSchema)
schema2Type x ns = s2t x
where s2t (XElem tag _ _ _ ts)
| tag == documentRootTag
= (TAny,concat (map (snd . s2t) (filter isElem ts)))
s2t (XElem tag al _ _ ts)
| uri tag == xsNamespace
= case (localName tag,filter isElem ts) of
("element",[])
-> case lookup "ref" al of
Just s -> qualify al (named s,[])
_ -> let t = case lookup "type" al of Just s -> defined s; _ -> TAny
n = case lookup "name" al of Just n -> n; _ -> "*"
in qualify al (TElement n t,[])
("element",ts)
-> let tt = map s2t ts
t = makeSequence $ map fst tt
n = case lookup "name" al of Just n -> n; _ -> "*"
in qualify al (TElement n t, concatMap snd tt)
("attribute",[])
-> let t = case lookup "type" al of Just s -> defined s; _ -> TAny
n = case lookup "name" al of Just n -> n; _ -> "*"
in qualify al (case lookup "use" al of
Just "required" -> TAttribute n t
_ -> TQualified (TAttribute n t) '?',[])
("attribute",ts)
-> let tt = map s2t ts
t = makeSequence $ map fst tt
n = case lookup "name" al of Just n -> n; _ -> "*"
in qualify al (case lookup "use" al of
Just "required" -> TAttribute n t
_ -> TQualified (TAttribute n t) '?',
concatMap snd tt)
("complexType",[])
-> case (lookup "type" al, lookup "name" al) of
(Just t, Just n) -> qualify al (defined t,[])
(_, Just n) -> qualify al (named n,[])
(Just t, _) -> qualify al (defined t,[])
_ -> case lookup "mixed" al of
Just "true" -> qualify al (TQualified TAny '*',[])
_ -> error $ "wrong complexType: "++show x
("complexType",ts)
-> let tt = map s2t ts
t = makeSequence $ map fst tt
in case lookup "mixed" al of
Just "true" -> qualify al (makeSequence [TQualified TAny '*',t],
concatMap snd tt)
_ -> qualify al (t, concatMap snd tt)
("sequence",ts)
-> let tt = map s2t ts
t = makeSequence $ map fst tt
in qualify al (t, concatMap snd tt)
("choice",ts)
-> let tt = map s2t ts
t = foldl1 TChoice $ map fst tt
in qualify al (t, concatMap snd tt)
("list",[])
-> case lookup "itemType" al of
Just s -> (TQualified (defined s) '*', [])
_ -> error $ "wrong list: "++show x
("simpleType",ts)
-> let tt = map s2t ts
t = makeSequence $ map fst tt
in qualify al (t, concatMap snd tt)
("extension",ts)
-> let tt = map s2t ts
t = makeSequence $ map fst tt
in case lookup "base" al of
Just s -> qualify al (makeSequence [defined s,t],
concatMap snd tt)
_ -> qualify al (t, concatMap snd tt)
("restriction",_)
-> case lookup "base" al of
Just s -> qualify al (defined s, [])
_ -> error $ "wrong restriction: "++show x
("simpleContent",[t])
-> qualify al $ s2t t
("schema",ts)
-> let tt1 = map s2t (filter isType ts)
tt2 = map s2t (filter (not . isType) ts)
in (TAny, concatMap snd tt1++concatMap snd tt2)
(_,ts)
-> let tt = map s2t ts
t = makeSequence $ map fst tt
in (t, concatMap snd tt)
s2t _ = (TEmpty,[])
qualify al (t,binds)
= let nt = case (lookup "minOccurs" al,lookup "maxOccurs" al) of
(Nothing, Nothing) -> t
(Just "0", Just "1") -> TQualified t '?'
(Just "1", _) -> TQualified t '+'
(_,_) -> TQualified t '*'
in case lookup "name" al of
Just n -> (nt,(n,nt):binds)
_ -> (nt,binds)
makeSequence ts
= ms $ filter (/= TEmpty) ts
where ms [] = TEmpty
ms [t] = t
ms (t:ts) = TSequence t (ms ts)
lookup s ((QName _ _ m,v):_) | s==m = Just v
lookup s (_:xs) = lookup s xs
lookup s [] = Nothing
isElem (XElem s _ _ _ _) = True
isElem _ = False
isType (XElem s _ _ _ _) | elem (localName s) ["complexType","simpleType"] = True
isType _ = False
named s = TNamed (tag s ns)
defined s = let qn = tag s ns
in if isBuildInType (localName qn)
then TBase qn
else TNamed qn
data Match a
= Success a
| Failure [(Type,XTree)]
instance Monad Match where
return s = Success s
Success s >>= f = f s
Failure ts >>= _ = Failure ts
typeValidate :: Type -> XSeq -> Match XSeq
typeValidate tp xs
= v tp xs []
where allV t xs as
= case v t xs as of
x@(Success []) -> x
Success xs'
-> allV t xs' as
Failure _ -> Success xs
sp (XText s:xs)
| all isSpace s
= sp xs
sp x = x
v t (XText s:xs) as
| all isSpace s
= v t xs as
v t xs as
= case t of
TEmpty -> Success xs
TAny -> Success []
TQualified (TAttribute name t') '?'
-> if memA name as
then do v t' [XText $ findA name as] []
return xs
else Success xs
TAttribute name t'
-> if memA name as
then do v t' [XText $ findA name as] []
return xs
else Failure []
TSequence t1 t2
-> do xs' <- v t1 xs as
v t2 xs' as
TInterleaving t1 t2
-> case v t1 xs as of
Success xs'
-> v t2 xs' as
_ -> do xs' <- v t2 xs as
v t1 xs' as
TChoice t1 t2
-> case v t1 xs as of
Failure ts
-> v t2 xs as
x -> x
TQualified t '?'
-> case v t xs as of
Failure _ -> Success xs
x -> x
TQualified t '+'
-> do xs' <- v t xs as
allV t xs' as
TQualified t '*'
-> allV t xs as
_ -> case xs of
[] -> Failure [(t,XText "Unexpected end of element content")]
x:xs'-> case t of
TBase s
-> if uri s == xsNamespace
&& castableAs [x] (TBase s)
then Success xs'
else Failure [(t,x)]
TAny -> Success xs'
TItem "item"
-> Success xs'
TItem "node"
-> case x of
XElem _ _ _ _ _ -> Success xs'
_ -> Failure [(t,x)]
TItem _ -> error ("Unexpected named type: "++show t)
TElement tag t'
-> case x of
XElem tag' as' _ _ xs''
-> if tag == localName tag' || tag == "*"
then case v t' xs'' (map (\(q,v) -> (localName q,v)) as') of
Failure ts -> Failure ((t,x):ts)
Success s
-> case sp s of
[] -> Success xs'
(z:_) -> Failure [(t,z)]
else Failure [(t,x)]
_ -> Failure [(t,x)]
_ -> Failure [(t,x)]
validateXSeq :: XSeq -> NS -> Match XSeq
validateXSeq xs ns
= vs xs
where vs (XElem rt _ _ _ xs':xs)
| rt == documentRootTag
= vs xs'
vs (x@(XElem tag _ _ _ _):xs)
= case expandQName tag ns of
t@(TElement _ _)
-> do xs' <- typeValidate t [x]
vs xs
t -> do xs' <- typeValidate (TElement (localName tag) t) [x]
vs xs
vs (_:xs) = vs xs
vs [] = Success []
validate :: XSeq -> XSeq -> NS -> XSeq
validate xs [xt] ns
= let (_,t) = schema2Type xt ns
n = ns{ defaultElementNS="default", schemas=("default",t):schemas ns }
in case validateXSeq xs n of
Failure ((_,x):_)
-> [x]
Success [] -> []
Success [XText s]
| all isSpace s
-> []
_ -> [XBool False]
parseSchema :: String -> NS -> IO XMLSchema
parseSchema schema ns
= do sch <- readFile schema
let x = materialize False (parseDocument sch)
(_,t) = schema2Type x ns
return t
validateFile :: FilePath -> FilePath -> IO Bool
validateFile file schema
= do sch <- readFile schema
let ns = initialNS
(_,t) = schema2Type (materialize False (parseDocument sch)) ns
doc <- readFile file
let xs = materialize False (parseDocument doc)
n = ns{ defaultElementNS="default", schemas=("default",t):schemas ns }
case validateXSeq [xs] n of
Failure ts
-> error ("Failed to validate the file against the schema: "
++show ts)
Success [] -> return True
Success [XText s]
| all isSpace s
-> return True
_ -> error "Failed to validate the file against the schema"