{------------------------------------------------------------------------------------- - - The XQuery type system - Programmer: Leonidas Fegaras - Email: fegaras@cse.uta.edu - Web: http://lambda.uta.edu/ - Creation: 01/16/09, last update: 09/29/09 - - Copyright (c) 2009 by Leonidas Fegaras, the University of Texas at Arlington. All rights reserved. - This material is provided as is, with absolutely no warranty expressed or implied. - Any use is at your own risk. Permission is hereby granted to use or copy this program - for any purpose, provided the above notices are retained on all copies. - --------------------------------------------------------------------------------------} {-# OPTIONS_GHC -funbox-strict-fields #-} {-# LANGUAGE BangPatterns #-} module Text.XML.HXQ.Types where import Char(isDigit,isSpace) import List(isSuffixOf,inits,tails) import Text.XML.HXQ.Parser import Text.XML.HXQ.XTree import XMLParse(XMLEvent(..),parseDocument) import HXML(Name,AttList) import DeepSeq buildInTypes :: [(String,String)] buildInTypes = [("untypedAtomic","anyAtomicType"), ("dateTime","anyAtomicType"), ("date","anyAtomicType"), ("time","anyAtomicType"), ("duration","anyAtomicType"), ("float","anyAtomicType"), ("double","anyAtomicType"), ("decimal","anyAtomicType"), ("gYearMonth","anyAtomicType"), ("gYear","anyAtomicType"), ("gMonthDay","anyAtomicType"), ("gDay","anyAtomicType"), ("gMonth","anyAtomicType"), ("boolean","anyAtomicType"), ("base64Binary","anyAtomicType"), ("hexBinary","anyAtomicType"), ("anyURI","anyAtomicType"), ("QName","anyAtomicType"), ("NOTATION","anyAtomicType"), ("yearMonthDuration","duration"), ("dayTmeDuration","duration"), ("integer","decimal"), ("nonPositiveInteger","integer"), ("negativeInteger","nonPositiveInteger"), ("long","integer"), ("int","long"), ("short","int"), ("byte","short"), ("nonNegativeInteger","integer"), ("unsignedLong","nonNegativeInteger"), ("unsignedInt","unsignedLong"), ("unsignedShort","unsignedInt"), ("unsignedByte","unsignedShort"), ("positiveInteger","nonNegativeInteger"), ("string","anyAtomicType"), ("normalizedString","string"), ("token","normalizedString"), ("language","token"), ("NMTOKEN","token"), ("Name","token"), ("NCName","Name"), ("ID","NCName"), ("IDREF","NCName"), ("ENTITY","NCName")] typeHierarchy :: [(String,String)] typeHierarchy = [("anyAtomicType","item"), ("node","item"), ("attribute","node"), ("comment","node"), ("document","node"), ("element","node"), ("processing-instruction","node"), ("text","node")] isBuildInType :: String -> Bool isBuildInType "anyAtomicType" = True isBuildInType "numeric" = True isBuildInType name = memA name buildInTypes baseType s = TBase (QName "xs" xsNamespace s) tString = baseType "string" tInt = baseType "integer" tFloat = baseType "float" tBool = baseType "boolean" tAtomic = baseType "anyAtomicType" tUAtomic = baseType "untypedAtomic" tItem = TItem "item" tNode = TItem "node" tText = TItem "text" tStar t = TQualified t '*' tPlus t = TQualified t '+' tOptional t = TQualified t '?' tNumeric = TBase (QName "" xsNamespace "numeric") -- (integer,decimal,float,double) -- find the value of a variable in an association list findA var ((n,b):_) | n==var = b findA var (_:xs) = findA var xs findA var _ = error ("Undefined variable: "++var) -- is the variable defined in the association list? memA var ((n,_):_) | n==var = True memA var (_:xs) = memA var xs memA _ _ = False -- xs:string casting. Much like string() toString :: XTree -> String toString x = case x of XElem _ _ _ _ zs -> concatMap toString zs XAttr _ v -> v XText x -> x XInt n -> show n XFloat n -> show n XBool b -> if b then "true" else "false" _ -> "" -- parse a numeral (int or float) from a string readNum :: String -> Maybe XTree readNum cs = let readInt ('+':rest) = span isDigit rest readInt ('-':rest) = let (s,rest1) = span isDigit rest in ('-':s,rest1) readInt rest = span isDigit rest readExp ('e':cs) = readInt cs readExp ('E':cs) = readInt cs readExp cs = ("",cs) (si,rest) = readInt cs in case rest of '.':rest1 -> let (sd,rest2) = span isDigit rest1 in case readExp rest2 of ("",[]) -> Just $ XFloat (read $ si ++ "." ++ sd) (exp,[]) -> Just $ XFloat (read $ si ++ "." ++ sd ++ "e" ++ exp) _ -> Nothing _ -> case readExp rest of ("",[]) -> Just $ XInt (read si) (exp,[]) -> Just $ XFloat (read $ si ++ "e" ++ exp) _ -> Nothing -- casting to any kind of numeral toNum :: XTree -> Maybe XTree toNum (XElem _ _ _ _ [x]) = toNum x toNum (XText s) = readNum s toNum (x@(XInt n)) = Just x toNum (x@(XFloat n)) = Just x toNum (XBool b) = Just $ XInt (if b then 1 else 0) toNum (XAttr _ v) = toNum (XText v) toNum _ = Nothing -- xs:int casting toInt :: XTree -> Maybe XTree toInt (XElem _ _ _ _ [x]) = toInt x toInt (XText s) = case readNum s of Just (XFloat n) -> Just $ XInt (floor n) x -> x toInt (x@(XInt n)) = Just x toInt (XFloat n) = Just $ XInt (floor n) toInt (XBool b) = Just $ XInt (if b then 1 else 0) toInt (XAttr _ v) = toInt (XText v) toInt _ = Nothing -- xs:float casting toFloat :: XTree -> Maybe XTree toFloat (XElem _ _ _ _ [x]) = toFloat x toFloat (XText s) = case readNum s of Just (XInt n) -> Just $ XFloat $ fromIntegral n x -> x toFloat (XInt n) = Just $ XFloat $ fromIntegral n toFloat (x@(XFloat n)) = Just x toFloat (XBool b) = Just $ XFloat (if b then 1 else 0) toFloat (XAttr _ v) = toFloat (XText v) toFloat _ = Nothing -- xs:boolean casting toBool :: XTree -> Maybe XTree toBool (XText s) = Just $ XBool $ s /= "" toBool (XInt n) = Just $ XBool $ n /= 0 toBool (XFloat n) = Just $ XBool $ n /= 0 toBool (XAttr _ v) = Just $ XBool $ v /= "" toBool (XBool b) = Just $ XBool b toBool x = Nothing -- all casting functions casts :: [(String,XTree->Maybe XTree)] casts = [("anyAtomicType",Just . id), ("string",Just . XText . toString), ("float",toFloat), ("integer",toInt), ("nonNegativeInteger",\x -> do XInt n <- toInt x return $! XInt $ abs n), ("boolean",toBool)] -- implements: expr instance of type instanceOf :: XSeq -> Type -> Bool instanceOf e tp = case typeValidate tp e of Success _ -> True _ -> False -- implements: expr cast as type castAs :: XSeq -> Type -> XSeq castAs [] (TQualified _ '?') = [] castAs [XElem _ _ _ _ xs] tp = castAs xs tp castAs [x] (TBase (QName nm u tname)) | u == xsNamespace = if memA tname casts then case findA tname casts x of Just v -> [v] Nothing -> error $ "Value "++show x++" cannot be cast to the atomic type: "++tname else if memA tname buildInTypes then castAs [x] (TBase (QName nm u $ findA tname buildInTypes)) else error $ "Unrecognized build-in type: "++tname castAs xs tp = error $ "Value "++show xs++" cannot be cast to the type "++show tp -- implements: expr castable as type castableAs :: XSeq -> Type -> Bool castableAs [] (TQualified _ '?') = True castableAs [XElem _ _ _ _ xs] tp = castableAs xs tp castableAs [x] (TBase (QName nm u tname)) | u == xsNamespace = if memA tname casts then findA tname casts x /= Nothing else if memA tname buildInTypes then castableAs [x] (TBase (QName nm u $ findA tname buildInTypes)) else error $ "Unrecognized build-in type: "++tname castableAs _ _ = False -- inline element and type references in an XML Schema expandQName :: QName -> NS -> Type expandQName tag ns = es $ expand tag where expand (QName n uri ln) = let u = if uri == "" then defaultElementNS ns else uri in case lookup u (schemas ns) of Just s -> case lookup ln s of Just t -> t _ -> error ("Tag "++show tag++" is not defined in XML Schema "++u) _ -> error ("Undefined schema "++n++"="++u) es (TNamed tag) | uri tag /= xsNamespace = es $ expand tag es (TElement n t) = TElement n $! (es t) es (TAttribute a t) = TAttribute a $! (es t) es (TSequence t1 t2) = (TSequence $! (es t1)) $! (es t2) es (TInterleaving t1 t2) = (TInterleaving $! (es t1)) $! (es t2) es (TChoice t1 t2) = (TChoice $! (es t1)) $! (es t2) es (TQualified t c) = (TQualified $! (es t)) c es t = t -- Convert an XQuery type representation to an XML Schema toType :: Ast -> NS -> Type toType e ns = case e of Avar "any" -> tItem Avar "numeric" -> tNumeric Avar s -> TBase (tag s ns) Ast "item" [] -> tItem Ast "node" [] -> tNode Ast "text" [] -> tText Ast "empty-sequence" [] -> TEmpty Ast "element" [] -> TElement "*" TAny Ast "element" [Avar tag] -> TElement tag TAny Ast "element" [Avar tag,tp] -> TElement tag (toType tp ns) Ast "attribute" [] -> TAttribute "*" TAny Ast "attribute" [Avar tag] -> TAttribute tag TAny Ast "attribute" [Avar tag,tp] -> TAttribute tag (toType tp ns) Ast "*" [tp] -> TQualified (toType tp ns) '*' Ast "+" [tp] -> TQualified (toType tp ns) '+' Ast "?" [tp] -> TQualified (toType tp ns) '?' Ast item [] -> TItem item _ -> error $ "Unknown type "++show e -- Derive the XML Schema from its XML representation schema2Type :: XTree -> NS -> (Type,XMLSchema) schema2Type x ns = s2t x where s2t (XElem tag _ _ _ ts) | tag == documentRootTag = (TAny,concat (map (snd . s2t) (filter isElem ts))) s2t (XElem tag al _ _ ts) | uri tag == xsNamespace = case (localName tag,filter isElem ts) of ("element",[]) -> case lookup "ref" al of Just s -> qualify al (named s,[]) _ -> let t = case lookup "type" al of Just s -> defined s; _ -> TAny n = case lookup "name" al of Just n -> n; _ -> "*" in qualify al (TElement n t,[]) ("element",ts) -> let tt = map s2t ts t = makeSequence $ map fst tt n = case lookup "name" al of Just n -> n; _ -> "*" in qualify al (TElement n t, concatMap snd tt) ("attribute",[]) -> let t = case lookup "type" al of Just s -> defined s; _ -> TAny n = case lookup "name" al of Just n -> n; _ -> "*" in qualify al (case lookup "use" al of Just "required" -> TAttribute n t _ -> TQualified (TAttribute n t) '?',[]) ("attribute",ts) -> let tt = map s2t ts t = makeSequence $ map fst tt n = case lookup "name" al of Just n -> n; _ -> "*" in qualify al (case lookup "use" al of Just "required" -> TAttribute n t _ -> TQualified (TAttribute n t) '?', concatMap snd tt) ("complexType",[]) -> case (lookup "type" al, lookup "name" al) of (Just t, Just n) -> qualify al (defined t,[]) (_, Just n) -> qualify al (named n,[]) (Just t, _) -> qualify al (defined t,[]) _ -> case lookup "mixed" al of Just "true" -> qualify al (TQualified TAny '*',[]) _ -> error $ "wrong complexType: "++show x ("complexType",ts) -> let tt = map s2t ts t = makeSequence $ map fst tt in case lookup "mixed" al of Just "true" -> qualify al (makeSequence [TQualified TAny '*',t], concatMap snd tt) _ -> qualify al (t, concatMap snd tt) ("sequence",ts) -> let tt = map s2t ts t = makeSequence $ map fst tt in qualify al (t, concatMap snd tt) ("choice",ts) -> let tt = map s2t ts t = foldl1 TChoice $ map fst tt in qualify al (t, concatMap snd tt) ("list",[]) -> case lookup "itemType" al of Just s -> (TQualified (defined s) '*', []) _ -> error $ "wrong list: "++show x ("simpleType",ts) -> let tt = map s2t ts t = makeSequence $ map fst tt in qualify al (t, concatMap snd tt) ("extension",ts) -> let tt = map s2t ts t = makeSequence $ map fst tt in case lookup "base" al of Just s -> qualify al (makeSequence [defined s,t], concatMap snd tt) _ -> qualify al (t, concatMap snd tt) ("restriction",_) -> case lookup "base" al of Just s -> qualify al (defined s, []) _ -> error $ "wrong restriction: "++show x ("simpleContent",[t]) -> qualify al $ s2t t ("schema",ts) -> let tt1 = map s2t (filter isType ts) tt2 = map s2t (filter (not . isType) ts) in (TAny, concatMap snd tt1++concatMap snd tt2) (_,ts) -> let tt = map s2t ts t = makeSequence $ map fst tt in (t, concatMap snd tt) s2t _ = (TEmpty,[]) qualify al (t,binds) = let nt = case (lookup "minOccurs" al,lookup "maxOccurs" al) of (Nothing, Nothing) -> t (Just "0", Just "1") -> TQualified t '?' (Just "1", _) -> TQualified t '+' (_,_) -> TQualified t '*' in case lookup "name" al of Just n -> (nt,(n,nt):binds) _ -> (nt,binds) makeSequence ts = ms $ filter (/= TEmpty) ts where ms [] = TEmpty ms [t] = t ms (t:ts) = TSequence t (ms ts) lookup s ((QName _ _ m,v):_) | s==m = Just v lookup s (_:xs) = lookup s xs lookup s [] = Nothing isElem (XElem s _ _ _ _) = True isElem _ = False isType (XElem s _ _ _ _) | elem (localName s) ["complexType","simpleType"] = True isType _ = False named s = TNamed (tag s ns) defined s = let qn = tag s ns in if isBuildInType (localName qn) then TBase qn else TNamed qn data Match a = Success a -- a = value to be matched after success | Failure [(Type,XTree)] -- failure history (trace) instance Monad Match where return s = Success s Success s >>= f = f s Failure ts >>= _ = Failure ts -- validate the XML sequence xs against the XML Schema type tp typeValidate :: Type -> XSeq -> Match XSeq typeValidate !tp xs = v tp xs [] where allV t xs as = case v t xs as of x@(Success []) -> x Success xs' -> allV t xs' as Failure _ -> Success xs sp (XText s:xs) | all isSpace s = sp xs sp x = x v !t (XText s:xs) as | all isSpace s = v t xs as v !t xs as = case t of TEmpty -> Success xs TAny -> Success [] TQualified (TAttribute name t') '?' -> if memA name as then do v t' [XText $ findA name as] [] return xs else Success xs TAttribute name t' -> if memA name as then do v t' [XText $ findA name as] [] return xs else Failure [] TSequence t1 t2 -> do xs' <- v t1 xs as v t2 xs' as TInterleaving t1 t2 -> case v t1 xs as of Success xs' -> v t2 xs' as _ -> do xs' <- v t2 xs as v t1 xs' as TChoice t1 t2 -> case v t1 xs as of Failure ts -> v t2 xs as x -> x TQualified t '?' -> case v t xs as of Failure _ -> Success xs x -> x TQualified t '+' -> do xs' <- v t xs as allV t xs' as TQualified t '*' -> allV t xs as _ -> case xs of [] -> Failure [(t,XText "Unexpected end of element content")] x:xs'-> case t of TBase s -> if uri s == xsNamespace && castableAs [x] (TBase s) then Success xs' else Failure [(t,x)] TAny -> Success xs' TItem "item" -> Success xs' TItem "node" -> case x of XElem _ _ _ _ _ -> Success xs' _ -> Failure [(t,x)] TItem _ -> error ("Unexpected named type: "++show t) TElement tag t' -> case x of XElem tag' as' _ _ xs'' -> if tag == localName tag' || tag == "*" then case v t' xs'' (map (\(q,v) -> (localName q,v)) as') of Failure ts -> Failure ((t,x):ts) Success s -> case sp s of [] -> Success xs' (z:_) -> Failure [(t,z)] else Failure [(t,x)] _ -> Failure [(t,x)] _ -> Failure [(t,x)] -- Validate the XML data using XML Schema validateXSeq :: XSeq -> NS -> Match XSeq validateXSeq xs !ns = vs xs where vs (XElem rt _ _ _ xs':xs) | rt == documentRootTag = vs xs' vs (x@(XElem tag _ _ _ _):xs) = case expandQName tag ns of t@(TElement _ _) -> do xs' <- typeValidate t [x] vs xs t -> do xs' <- typeValidate (TElement (localName tag) t) [x] vs xs vs (_:xs) = vs xs vs [] = Success [] validate :: XSeq -> XSeq -> NS -> XSeq validate xs [xt] ns = let (_,t) = schema2Type xt ns n = ns{ defaultElementNS="default", schemas=("default",t):schemas ns } in case validateXSeq xs n of Failure ((_,x):_) -> [x] Success [] -> [] Success [XText s] | all isSpace s -> [] _ -> [XBool False] parseSchema :: String -> NS -> IO XMLSchema parseSchema schema ns = do sch <- readFile schema let x = materialize False (parseDocument sch) (_,t) = schema2Type x ns return t -- | Validate the XML document against the XML Schema. Also done using the validate XQuery form. validateFile :: FilePath -> FilePath -> IO Bool validateFile file schema = do sch <- readFile schema let ns = initialNS (_,!t) = schema2Type (materialize False (parseDocument sch)) ns -- putStrLn (show t) doc <- readFile file let xs = materialize False (parseDocument doc) n = deepSeq t $ ns{ defaultElementNS="default", schemas=("default",t):schemas ns } case validateXSeq [xs] n of Failure ts -> error ("Failed to validate the file against the schema: " ++show ts) Success [] -> return True Success [XText s] | all isSpace s -> return True _ -> error "Failed to validate the file against the schema"