{-------------------------------------------------------------------------------------
-
- The XQuery type system
- Programmer: Leonidas Fegaras
- Email: fegaras@cse.uta.edu
- Web: http://lambda.uta.edu/
- Creation: 01/16/09, last update: 09/29/09
- 
- Copyright (c) 2009 by Leonidas Fegaras, the University of Texas at Arlington. All rights reserved.
- This material is provided as is, with absolutely no warranty expressed or implied.
- Any use is at your own risk. Permission is hereby granted to use or copy this program
- for any purpose, provided the above notices are retained on all copies.
-
--------------------------------------------------------------------------------------}


module Text.XML.HXQ.Types where

import Char(isDigit,isSpace)
import List(isSuffixOf,inits,tails)
import Text.XML.HXQ.Parser
import Text.XML.HXQ.XTree
import XMLParse(XMLEvent(..),parseDocument)
import HXML(Name,AttList)


buildInTypes :: [(String,String)]
buildInTypes
    = [("untypedAtomic","anyAtomicType"),
       ("dateTime","anyAtomicType"),
       ("date","anyAtomicType"),
       ("time","anyAtomicType"),
       ("duration","anyAtomicType"),
       ("float","anyAtomicType"),
       ("double","anyAtomicType"),
       ("decimal","anyAtomicType"),
       ("gYearMonth","anyAtomicType"),
       ("gYear","anyAtomicType"),
       ("gMonthDay","anyAtomicType"),
       ("gDay","anyAtomicType"),
       ("gMonth","anyAtomicType"),
       ("boolean","anyAtomicType"),
       ("base64Binary","anyAtomicType"),
       ("hexBinary","anyAtomicType"),
       ("anyURI","anyAtomicType"),
       ("QName","anyAtomicType"),
       ("NOTATION","anyAtomicType"),
       ("yearMonthDuration","duration"),
       ("dayTmeDuration","duration"),
       ("integer","decimal"),
       ("nonPositiveInteger","integer"),
       ("negativeInteger","nonPositiveInteger"),
       ("long","integer"),
       ("int","long"),
       ("short","int"),
       ("byte","short"),
       ("nonNegativeInteger","integer"),
       ("unsignedLong","nonNegativeInteger"),
       ("unsignedInt","unsignedLong"),
       ("unsignedShort","unsignedInt"),
       ("unsignedByte","unsignedShort"),
       ("positiveInteger","nonNegativeInteger"),
       ("string","anyAtomicType"),
       ("normalizedString","string"),
       ("token","normalizedString"),
       ("language","token"),
       ("NMTOKEN","token"),
       ("Name","token"),
       ("NCName","Name"),
       ("ID","NCName"),
       ("IDREF","NCName"),
       ("ENTITY","NCName")]


typeHierarchy :: [(String,String)]
typeHierarchy
    = [("anyAtomicType","item"),
       ("node","item"),
       ("attribute","node"),
       ("comment","node"),
       ("document","node"),
       ("element","node"),
       ("processing-instruction","node"),
       ("text","node")]


isBuildInType :: String -> Bool
isBuildInType "anyAtomicType" = True
isBuildInType "numeric" = True
isBuildInType name = memA name buildInTypes


baseType s = TBase (QName "xs" xsNamespace s)

tString = baseType "string"
tInt = baseType "integer"
tFloat = baseType "float"
tBool = baseType "boolean"
tAtomic = baseType "anyAtomicType"
tUAtomic = baseType "untypedAtomic"
tItem = TItem "item"
tNode = TItem "node"
tText = TItem "text"
tStar t = TQualified t '*'
tPlus t = TQualified t '+'
tOptional t = TQualified t '?'
tNumeric = TBase (QName "" xsNamespace "numeric")   -- (integer,decimal,float,double)


-- find the value of a variable in an association list
findA var ((n,b):_) | n==var = b
findA var (_:xs) = findA var xs
findA var _ = error ("Undefined variable: "++var)


-- is the variable defined in the association list?
memA var ((n,_):_) | n==var = True
memA var (_:xs) = memA var xs
memA _ _ = False


-- xs:string casting. Much like string()
toString :: XTree -> String
toString x
    = case x of
        XElem _ _ _ _ zs
            -> concatMap toString zs
        XAttr _ v -> v
        XText x -> x
        XInt n -> show n
        XFloat n -> show n
        XBool b -> if b then "true" else "false"
        _ -> ""


-- parse a numeral (int or float) from a string
readNum :: String -> Maybe XTree
readNum cs
    = let readInt ('+':rest) = span isDigit rest
          readInt ('-':rest) = let (s,rest1) = span isDigit rest
                               in ('-':s,rest1)
          readInt rest = span isDigit rest
          readExp ('e':cs) = readInt cs
          readExp ('E':cs) = readInt cs
          readExp cs = ("",cs)
          (si,rest) = readInt cs
      in case rest of
           '.':rest1
               -> let (sd,rest2) = span isDigit rest1
                  in case readExp rest2 of
                       ("",[]) -> Just $ XFloat (read $ si ++ "." ++ sd)
                       (exp,[]) -> Just $ XFloat (read $ si ++ "." ++ sd ++ "e" ++ exp)
                       _ -> Nothing
           _ -> case readExp rest of
                  ("",[]) -> Just $ XInt (read si)
                  (exp,[]) -> Just $ XFloat (read $ si ++ "e" ++ exp)
                  _ -> Nothing


-- casting to any kind of numeral
toNum :: XTree -> Maybe XTree
toNum (XElem _ _ _ _ [x]) = toNum x
toNum (XText s) = readNum s
toNum (x@(XInt n)) = Just x
toNum (x@(XFloat n)) = Just x
toNum (XBool b) = Just $ XInt (if b then 1 else 0)
toNum (XAttr _ v) = toNum (XText v)
toNum _ = Nothing


-- xs:int casting
toInt :: XTree -> Maybe XTree
toInt (XElem _ _ _ _ [x]) = toInt x
toInt (XText s) = case readNum s of
                    Just (XFloat n) -> Just $ XInt (floor n)
                    x -> x
toInt (x@(XInt n)) = Just x
toInt (XFloat n) = Just $ XInt (floor n)
toInt (XBool b) = Just $ XInt (if b then 1 else 0)
toInt (XAttr _ v) = toInt (XText v)
toInt _ = Nothing


-- xs:float casting
toFloat :: XTree -> Maybe XTree
toFloat (XElem _ _ _ _ [x]) = toFloat x
toFloat (XText s) = case readNum s of
                      Just (XInt n) -> Just $ XFloat $ fromIntegral n
                      x -> x
toFloat (XInt n) = Just $ XFloat $ fromIntegral n
toFloat (x@(XFloat n)) = Just x
toFloat (XBool b) = Just $ XFloat (if b then 1 else 0)
toFloat (XAttr _ v) = toFloat (XText v)
toFloat _ = Nothing


-- xs:boolean casting
toBool :: XTree -> Maybe XTree
toBool (XText s) = Just $ XBool $ s /= ""
toBool (XInt n) = Just $ XBool $ n /= 0
toBool (XFloat n) = Just $ XBool $ n /= 0
toBool (XAttr _ v) = Just $ XBool $ v /= ""
toBool (XBool b) = Just $ XBool b
toBool x = Nothing


-- all casting functions
casts :: [(String,XTree->Maybe XTree)]
casts = [("anyAtomicType",Just . id),
         ("string",Just . XText . toString),
         ("float",toFloat),
         ("integer",toInt),
         ("nonNegativeInteger",\x -> do XInt n <- toInt x
                                        return $! XInt $ abs n),
         ("boolean",toBool)]


-- implements: expr instance of type
instanceOf :: XSeq -> Type -> Bool
instanceOf e tp
    = case typeValidate tp e of
        Success _ -> True
        _ -> False


-- implements: expr cast as type
castAs :: XSeq -> Type -> XSeq
castAs [] (TQualified _ '?') = []
castAs [XElem _ _ _ _ xs] tp = castAs xs tp
castAs [x] (TBase (QName nm u tname))
    | u == xsNamespace
    = if memA tname casts
      then case findA tname casts x of
             Just v -> [v]
             Nothing -> error $ "Value "++show x++" cannot be cast to the atomic type: "++tname
      else if memA tname buildInTypes
           then castAs [x] (TBase (QName nm u $ findA tname buildInTypes))
           else error $ "Unrecognized build-in type: "++tname
castAs xs tp = error $ "Value "++show xs++" cannot be cast to the type "++show tp


-- implements: expr castable as type
castableAs :: XSeq -> Type -> Bool
castableAs [] (TQualified _ '?') = True
castableAs [XElem _ _ _ _ xs] tp = castableAs xs tp
castableAs [x] (TBase (QName nm u tname))
    | u == xsNamespace
    = if memA tname casts
      then findA tname casts x /= Nothing
      else if memA tname buildInTypes
           then castableAs [x] (TBase (QName nm u $ findA tname buildInTypes))
           else error $ "Unrecognized build-in type: "++tname
castableAs _ _ = False


-- inline element and type references in an XML Schema
expandQName :: QName -> NS -> Type
expandQName tag ns
    = es $ expand tag
      where expand (QName n uri ln)
                = let u = if uri == "" then defaultElementNS ns else uri
                  in case lookup u (schemas ns) of
                       Just s -> case lookup ln s of
                                   Just t -> t
                                   _ -> error ("Tag "++show tag++" is not defined in XML Schema "++u)
                       _ -> error ("Undefined schema "++n++"="++u)
            es (TNamed tag)
                | uri tag /= xsNamespace
                = es $ expand tag
            es (TElement n t) = TElement n $! (es t)
            es (TAttribute a t) = TAttribute a $! (es t)
            es (TSequence t1 t2) = (TSequence $! (es t1)) $! (es t2)
            es (TInterleaving t1 t2) = (TInterleaving $! (es t1)) $! (es t2)
            es (TChoice t1 t2) = (TChoice $! (es t1)) $! (es t2)
            es (TQualified t c) = (TQualified $! (es t)) c
            es t = t


-- Convert an XQuery type representation to an XML Schema
toType :: Ast -> NS -> Type
toType e ns
    = case e of
        Avar "any" -> tItem
        Avar "numeric" -> tNumeric
        Avar s -> TBase (tag s ns)
        Ast "item" [] -> tItem
        Ast "node" [] -> tNode
        Ast "text" [] -> tText
        Ast "empty-sequence" [] -> TEmpty
        Ast "element" []
            -> TElement "*" TAny
        Ast "element" [Avar tag]
            -> TElement tag TAny
        Ast "element" [Avar tag,tp]
            -> TElement tag (toType tp ns)
        Ast "attribute" []
            -> TAttribute "*" TAny
        Ast "attribute" [Avar tag]
            -> TAttribute tag TAny
        Ast "attribute" [Avar tag,tp]
            -> TAttribute tag (toType tp ns)
        Ast "*" [tp]
            -> TQualified (toType tp ns) '*'
        Ast "+" [tp]
            -> TQualified (toType tp ns) '+'
        Ast "?" [tp]
            -> TQualified (toType tp ns) '?'
        Ast item []
            -> TItem item
        _ -> error $ "Unknown type "++show e


-- Derive the XML Schema from its XML representation
schema2Type :: XTree -> NS -> (Type,XMLSchema)
schema2Type x ns = s2t x
    where s2t (XElem tag _ _ _ ts)
             | tag == documentRootTag
             = (TAny,concat (map (snd . s2t) (filter isElem ts)))
          s2t (XElem tag al _ _ ts)
             | uri tag == xsNamespace
             = case (localName tag,filter isElem ts) of
                 ("element",[])
                     -> case lookup "ref" al of
                          Just s -> qualify al (named s,[])
                          _ -> let t = case lookup "type" al of Just s -> defined s; _ -> TAny
                                   n = case lookup "name" al of Just n -> n; _ -> "*"
                               in qualify al (TElement n t,[])
                 ("element",ts)
                     -> let tt = map s2t ts
                            t = makeSequence $ map fst tt
                            n = case lookup "name" al of Just n -> n; _ -> "*"
                        in qualify al (TElement n t, concatMap snd tt)
                 ("attribute",[])
                     -> let t = case lookup "type" al of Just s -> defined s; _ -> TAny
                            n = case lookup "name" al of Just n -> n; _ -> "*"
                        in qualify al (case lookup "use" al of
                                         Just "required" -> TAttribute n t
                                         _ -> TQualified (TAttribute n t) '?',[])
                 ("attribute",ts)
                     -> let tt = map s2t ts
                            t = makeSequence $ map fst tt
                            n = case lookup "name" al of Just n -> n; _ -> "*"
                        in qualify al (case lookup "use" al of
                                         Just "required" -> TAttribute n t
                                         _ -> TQualified (TAttribute n t) '?',
                                       concatMap snd tt)
                 ("complexType",[])
                     -> case (lookup "type" al, lookup "name" al) of
                          (Just t, Just n) -> qualify al (defined t,[])
                          (_, Just n) -> qualify al (named n,[])
                          (Just t, _) -> qualify al (defined t,[])
                          _ -> case lookup "mixed" al of
                                 Just "true" -> qualify al (TQualified TAny '*',[])
                                 _ -> error $ "wrong complexType: "++show x
                 ("complexType",ts)
                     -> let tt = map s2t ts
                            t = makeSequence $ map fst tt
                        in case lookup "mixed" al of
                             Just "true" -> qualify al (makeSequence [TQualified TAny '*',t],
                                                        concatMap snd tt)
                             _ -> qualify al (t, concatMap snd tt)
                 ("sequence",ts)
                     -> let tt = map s2t ts
                            t = makeSequence $ map fst tt
                        in qualify al (t, concatMap snd tt)
                 ("choice",ts)
                     -> let tt = map s2t ts
                            t = foldl1 TChoice $ map fst tt
                        in qualify al (t, concatMap snd tt)
                 ("list",[])
                     -> case lookup "itemType" al of
                          Just s -> (TQualified (defined s) '*', [])
                          _ -> error $ "wrong list: "++show x
                 ("simpleType",ts)
                     -> let tt = map s2t ts
                            t = makeSequence $ map fst tt
                        in qualify al (t, concatMap snd tt)
                 ("extension",ts)
                     -> let tt = map s2t ts
                            t = makeSequence $ map fst tt
                        in case lookup "base" al of
                             Just s -> qualify al (makeSequence [defined s,t],
                                                   concatMap snd tt)
                             _ -> qualify al (t, concatMap snd tt)
                 ("restriction",_)
                     -> case lookup "base" al of
                          Just s -> qualify al (defined s, [])
                          _ -> error $ "wrong restriction: "++show x
                 ("simpleContent",[t])
                     -> qualify al $ s2t t
                 ("schema",ts)
                     -> let tt1 = map s2t (filter isType ts)
                            tt2 = map s2t (filter (not . isType) ts)
                        in (TAny, concatMap snd tt1++concatMap snd tt2)
                 (_,ts)
                     -> let tt = map s2t ts
                            t = makeSequence $ map fst tt
                        in (t, concatMap snd tt)
          s2t _ = (TEmpty,[])
          qualify al (t,binds)
                = let nt = case (lookup "minOccurs" al,lookup "maxOccurs" al) of
                             (Nothing, Nothing) -> t
                             (Just "0", Just "1") -> TQualified t '?'
                             (Just "1", _) -> TQualified t '+'
                             (_,_) -> TQualified t '*'
                  in case lookup "name" al of
                       Just n -> (nt,(n,nt):binds)
                       _ -> (nt,binds)
          makeSequence ts
                = ms $ filter (/= TEmpty) ts
                where ms [] = TEmpty
                      ms [t] = t
                      ms (t:ts) = TSequence t (ms ts)
          lookup s ((QName _ _ m,v):_) | s==m = Just v
          lookup s (_:xs) = lookup s xs
          lookup s [] = Nothing
          isElem (XElem s _ _ _ _) = True
          isElem _ = False
          isType (XElem s _ _ _ _) | elem (localName s) ["complexType","simpleType"] = True
          isType _ = False
          named s = TNamed (tag s ns)
          defined s = let qn = tag s ns
                      in if isBuildInType (localName qn)
                         then TBase qn
                         else TNamed qn


data Match a
    = Success a                  -- a = value to be matched after success
    | Failure [(Type,XTree)]     -- failure history (trace)


instance Monad Match where
    return s = Success s
    Success s >>= f = f s
    Failure ts >>= _ = Failure ts


-- validate the XML sequence xs against the XML Schema type tp
typeValidate :: Type -> XSeq -> Match XSeq
typeValidate tp xs
    = v tp xs []
      where allV t xs as
                = case v t xs as of
                    x@(Success []) -> x
                    Success xs'
                        -> allV t xs' as
                    Failure _ -> Success xs
            sp (XText s:xs)
                | all isSpace s
                = sp xs
            sp x = x
            v t (XText s:xs) as
                | all isSpace s
                = v t xs as
            v t xs as
                = case t of
                    TEmpty -> Success xs
                    TAny -> Success []
                    TQualified (TAttribute name t') '?'
                        -> if memA name as
                           then do v t' [XText $ findA name as] []
                                   return xs
                           else Success xs
                    TAttribute name t'
                        -> if memA name as
                           then do v t' [XText $ findA name as] []
                                   return xs
                           else Failure []
                    TSequence t1 t2
                        -> do xs' <- v t1 xs as
                              v t2 xs' as
                    TInterleaving t1 t2
                        -> case v t1 xs as of
                             Success xs'
                                 -> v t2 xs' as
                             _ -> do xs' <- v t2 xs as
                                     v t1 xs' as
                    TChoice t1 t2
                        -> case v t1 xs as of
                             Failure ts
                                 -> v t2 xs as
                             x -> x
                    TQualified t '?'
                        -> case v t xs as of
                             Failure _ -> Success xs
                             x -> x
                    TQualified t '+'
                        -> do xs' <- v t xs as
                              allV t xs' as
                    TQualified t '*'
                        -> allV t xs as
                    _ -> case xs of
                           [] -> Failure [(t,XText "Unexpected end of element content")]
                           x:xs'-> case t of
                                     TBase s
                                         -> if uri s == xsNamespace
                                               && castableAs [x] (TBase s)
                                            then Success xs'
                                            else Failure [(t,x)]
                                     TAny -> Success xs'
                                     TItem "item"
                                         -> Success xs'
                                     TItem "node"
                                         -> case x of
                                              XElem _ _ _ _ _ -> Success xs'
                                              _ -> Failure [(t,x)]
                                     TItem _ -> error ("Unexpected named type: "++show t)
                                     TElement tag t'
                                         -> case x of
                                              XElem tag' as' _ _ xs''
                                                  -> if tag == localName tag' || tag == "*"
                                                     then case v t' xs'' (map (\(q,v) -> (localName q,v)) as') of
                                                            Failure ts -> Failure ((t,x):ts)
                                                            Success s
                                                                -> case sp s of
                                                                     [] -> Success xs'
                                                                     (z:_) -> Failure [(t,z)]
                                                     else Failure [(t,x)]
                                              _ -> Failure [(t,x)]
                                     _ -> Failure [(t,x)]


-- Validate the XML data using XML Schema
validateXSeq :: XSeq -> NS -> Match XSeq
validateXSeq xs ns
    = vs xs
      where vs (XElem rt _ _ _ xs':xs)
                | rt == documentRootTag
                = vs xs'
            vs (x@(XElem tag _ _ _ _):xs)
                = case expandQName tag ns of
                    t@(TElement _ _)
                        -> do xs' <- typeValidate t [x]
                              vs xs
                    t -> do xs' <- typeValidate (TElement (localName tag) t) [x]
                            vs xs
            vs (_:xs) = vs xs
            vs [] = Success []


validate :: XSeq -> XSeq -> NS -> XSeq
validate xs [xt] ns
    = let (_,t) = schema2Type xt ns
          n = ns{ defaultElementNS="default", schemas=("default",t):schemas ns }
      in case validateXSeq xs n of
           Failure ((_,x):_)
               -> [x]
           Success [] -> []
           Success [XText s]
               | all isSpace s
               -> []
           _ -> [XBool False]


parseSchema :: String -> NS -> IO XMLSchema
parseSchema schema ns
    = do sch <- readFile schema
         let x = materialize False (parseDocument sch)
             (_,t) = schema2Type x ns
         return t


-- | Validate the XML document against the XML Schema. Also done using the validate XQuery form.
validateFile :: FilePath -> FilePath -> IO Bool
validateFile file schema
    = do sch <- readFile schema
         let ns = initialNS
             (_,t) = schema2Type (materialize False (parseDocument sch)) ns
         doc <- readFile file
         let xs = materialize False (parseDocument doc)
             n = ns{ defaultElementNS="default", schemas=("default",t):schemas ns }
         case validateXSeq [xs] n of
           Failure ts
               -> error ("Failed to validate the file against the schema: "
                         ++show ts)
           Success [] -> return True
           Success [XText s]
               | all isSpace s
               -> return True
           _ -> error "Failed to validate the file against the schema"