-- |
-- This module provides functions for validating XML Documents represented as
-- XmlTree.

-- Unlike other popular XML validation tools the validation process returns
-- a list of errors instead of aborting after the first error was found.


-- Before the document is validated, a lookup-table is build on the basis of
-- the DTD which maps element names to their validation functions.
-- After this initialization phase the whole document is traversed in preorder
-- and every element is validated by the XmlFilter from the lookup-table.

-- Special namings in source code:
--
--  - nd - XDTD node
--
--  - n  - XTag node
--
-- Author : .\\artin Schmidt

module Text.XML.HXT.Validator.DocValidation
    ( validateDoc
    )
where

import Text.XML.HXT.DOM.XmlTree

import Text.XML.HXT.Validator.AttributeValueValidation
import Text.XML.HXT.Validator.XmlRE

-- |
-- Lookup-table which maps element names to their validation functions. The
-- validation functions are XmlFilters.

type ValiEnvTable	= [ValiEnv]
type ValiEnv 		= (ElemName, ValFct)
type ElemName		= String
type ValFct		= XmlFilter



-- ------------------------------------------------------------


-- |
-- Validate a document.
--
--    * 1.parameter dtdPart :  the DTD subset (Node @DOCTYPE@) of the XmlTree
--
--    - 2.parameter doc :  the document subset of the XmlTree
--
--    - returns : a list of errors

validateDoc :: XmlTree -> XmlTree -> XmlTrees
validateDoc dtdPart doc
    = traverseTree valTable doc
    where
    valTable = {-# SCC "buildAllValFcts" #-} buildAllValidationFunctions dtdPart


-- |
-- Traverse the XmlTree in preorder.
--
--    * 1.parameter valiEnv :  lookup-table which maps element names to their validation functions
--
--    - returns : list of errors

traverseTree :: ValiEnvTable -> XmlFilter
traverseTree valiEnv n@(NTree (XTag name _) cs)
    = (valFct n) ++ concatMap (traverseTree valiEnv) cs
      where
      valFct :: XmlFilter
      valFct
          = case (lookup (qualifiedName name) valiEnv) of
	    Nothing -> err ("Element " ++ show (qualifiedName name) ++ " not declared in DTD.")
	    Just f  -> f

traverseTree _ _ = []



-- ------------------------------------------------------------


-- |
-- Build all validation functions.
--
--    * 1.parameter dtdPart :  DTD subset, root node should be of type @DOCTYPE@
--
--    - returns : lookup-table which maps element names to their validation functions

buildAllValidationFunctions :: XmlTree -> ValiEnvTable
buildAllValidationFunctions dtdPart
    = buildValidateRoot dtdPart
      :
      -- construct a list of validation filters for all element declarations
      map (buildValidateFunctions dtdNodes) (isElement $$ dtdNodes)
      where
      dtdNodes = getChildren dtdPart


-- |
-- Build a validation function for the document root. By root node @\/@
-- is meant, which is the topmost dummy created by the parser.
--
--    * 1.parameter dtdPart :  DTD subset, root node should be of type @DOCTYPE@
--
--    - returns : entry for the lookup-table

buildValidateRoot :: XmlTree -> ValiEnv
buildValidateRoot (NTree (XDTD DOCTYPE al) _)
    = (t_root, valFct)
      where
      name = lookup1 a_name al

      valFct :: XmlFilter
      valFct nd@(NTree (XTag _ _) cs)
          = if msg == ""
            then []
            else err ("Root Element must be " ++ show name ++ ". " ++ msg) nd
	    where
	    re = re_sym (name)
	    msg = checkRE (matches re cs)

      valFct n = error ("buildValidateRoot: illegeal parameter:\n" ++ show n)

buildValidateRoot nd
    = error ("buildValidateRoot: illegeal parameter:\n" ++ show nd)


-- |
-- Build validation functions for an element.
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration for which the validation functions are
--                   created
--
--    - returns : entry for the lookup-table

buildValidateFunctions :: XmlTrees -> XmlTree -> ValiEnv

buildValidateFunctions dtdPart nd@(NTree (XDTD ELEMENT al) _)
    = (elemName, valFct)
      where
      elemName = lookup1 a_name al

      valFct :: XmlFilter
      valFct = buildContentValidation nd
               +++
	       buildAttributeValidation dtdPart nd

buildValidateFunctions _ nd
    = error ("buildValidateFunctions: illegeal parameter:\n" ++ show nd)


-- ------------------------------------------------------------


-- |
-- Build validation functions for the content model of an element.
-- Validity constraint: Element Valid (3 \/ p.18 in Spec)
--
--    * 1.parameter nd :  element declaration for which the content validation functions
--                  are built
--
--    - returns : a function which takes an element (XTag), checks if its
--                  children match its content model and returns a list of errors

buildContentValidation :: XmlTree -> XmlFilter
buildContentValidation nd@(NTree (XDTD ELEMENT al) _)
    = contentValidation attrType nd
      where
      attrType = lookup1 a_type al


      -- Delegates construction of the validation function on the basis of the
      -- content model type
      contentValidation :: String -> XmlTree -> XmlFilter
      contentValidation typ (NTree (XDTD ELEMENT _) cs)
          | typ == k_pcdata   = contentValidationPcdata
          | typ == k_empty    = contentValidationEmpty
          | typ == k_any      = contentValidationAny
          | typ == v_children = contentValidationChildren cs
          | typ == v_mixed    = contentValidationMixed cs
          | otherwise         = error ("contentValidation: unknown type: " ++ show typ)

      contentValidation _ nd'
          = error ("contentValidation: illegeal parameter:\n" ++ show nd')


      -- Checks #PCDATA content models
      contentValidationPcdata :: XmlFilter
      contentValidationPcdata n@(NTree (XTag name _) cs)
          = if msg == ""
            then []
            else err ("The content of element "++ show (qualifiedName name) ++
	              " must match (#PCDATA). "++ msg) n
	    where
	    re = re_rep (re_sym k_pcdata)
	    msg = checkRE (matches re cs)

      contentValidationPcdata n
          = error ("contentValidationPcdata: illegeal parameter:\n" ++ show n)


      -- Checks EMPTY content models
      contentValidationEmpty :: XmlFilter
      contentValidationEmpty n@(NTree (XTag name _) cs)
          = if msg == ""
            then []
            else err ("The content of element " ++ show (qualifiedName name) ++
	              " must match EMPTY. " ++ msg) n
	    where
	    re = re_unit
	    msg = checkRE (matches re cs)

      contentValidationEmpty n
          = error ("contentValidationEmpty: illegeal parameter:\n" ++ show n)


      -- Checks ANY content models
      contentValidationAny :: XmlFilter
      contentValidationAny n@(NTree (XTag name _) cs)
          = if msg == ""
            then []
            else err ("The content of element " ++ show (qualifiedName name) ++
	              " must match ANY. " ++ msg) n
	    where
	    re = re_rep (re_dot)
	    msg = checkRE (matches re cs)

      contentValidationAny n
          = error ("contentValidationAny: illegeal parameter:\n" ++ show n)


      -- Checks "children" content models
      contentValidationChildren :: XmlTrees -> XmlFilter
      contentValidationChildren cm n@(NTree (XTag name _) cs)
          = if msg == ""
            then []
            else err ("The content of element " ++ show (qualifiedName name) ++
	              " must match " ++ printRE re ++ ". " ++ msg) n
	    where
	    re = createRE (head cm)
	    msg = checkRE (matches re cs)

      contentValidationChildren _ n
          = error ("contentValidationChildren: illegeal parameter:\n" ++ show n)


      -- Checks "mixed content" content models
      contentValidationMixed :: XmlTrees -> XmlFilter
      contentValidationMixed cm n@(NTree (XTag name _) cs)
          = if msg == ""
            then []
            else err ("The content of element "++ show (qualifiedName name) ++
	              " must match " ++ printRE re ++ ". " ++ msg) n
	    where
	    re = re_rep (re_alt (re_sym k_pcdata) (createRE (head cm)))
	    msg = checkRE (matches re cs)

      contentValidationMixed _ n
          = error ("contentValidationMixed: illegeal parameter:\n" ++ show n)


buildContentValidation nd
    = error ("buildContentValidation: illegeal parameter:\n" ++ show nd)



-- |
-- Build a regular expression from the content model. The regular expression
-- is provided by the module XmlRE.
--
--    * 1.parameter nd :  node of the content model. Expected: @CONTENT@ or
--              @NAME@
--
--    - returns : regular expression of the content model

createRE ::  XmlTree -> RE String
createRE (NTree (XDTD CONTENT al) cs)
    = processModifier modifier
      where
      modifier = lookup1 a_modifier al
      kind     = lookup1 a_kind al

      processModifier :: String -> RE String
      processModifier m
          | m == v_plus	  = re_plus (processKind kind)
	  | m == v_star	  = re_rep  (processKind kind)
	  | m == v_option = re_opt  (processKind kind)
	  | m == v_null	  = processKind kind
	  | otherwise     = error ("Unknown modifier: " ++ show m)

      processKind :: String -> RE String
      processKind k
          | k == v_seq	  = makeSequence cs
	  | k == v_choice = makeChoice cs
	  | otherwise	  = error ("Unknown kind: " ++ show k)

      makeSequence :: XmlTrees -> RE String
      makeSequence []     = re_unit
      makeSequence (x:xs) = re_seq (createRE x) (makeSequence xs)

      makeChoice :: XmlTrees -> RE String
      makeChoice []       = re_zero ""
      makeChoice (x:xs)   = re_alt (createRE x) (makeChoice xs)

createRE (NTree (XDTD NAME al) _)
    = re_sym (lookup1 a_name al)

createRE nd
    = error ("createRE: illegeal parameter:\n" ++ show nd)



-- ------------------------------------------------------------



-- |
-- Build validation functions for the attributes of an element.
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration for which the attribute validation functions
--                  are created
--
--    - returns : a function which takes an element (XTag), checks if its
--                  attributes are valid and returns a list of errors

buildAttributeValidation :: XmlTrees -> XmlTree -> XmlFilter
buildAttributeValidation dtdPart nd =
    noDoublicateAttributes
    +++
    checkNotDeclardAttributes attrDecls nd
    +++
    checkRequiredAttributes attrDecls nd
    +++
    checkFixedAttributes attrDecls nd
    +++
    checkValuesOfAttributes attrDecls dtdPart nd
    where
    attrDecls = isAttlist $$ dtdPart


-- |
-- Validate that all attributes of an element are unique.
-- Well-formdness constraint: Unique AttSpec (3.1 \/ p.19 in Spec)
--
--    - returns : a function which takes an element (XTag), checks if its
--                  attributes are unique and returns a list of errors

noDoublicateAttributes :: XmlFilter
noDoublicateAttributes n@(NTree (XTag _ _) _)
    = doubles . reverse $ names
      where
      tagname = nameOf n
      names   = map nameOf . getAttrl $ n

      doubles :: [String] -> XmlTrees
      doubles []
	  = []
      doubles (n1:ns)
	  = ( if n1 `elem` ns
	      then err ("Attribute " ++ show n1 ++ " was already specified for element " ++ show tagname ++ ".") n
	      else []
	    ) ++ doubles ns

noDoublicateAttributes n
    = error ("noDoublicateAttributes: illegeal parameter:\n" ++ show n)


-- |
-- Validate that all \#REQUIRED attributes are provided.
-- Validity constraint: Required Attributes (3.3.2 \/ p.28 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration which attributes have to be checked
--
--    - returns : a function which takes an element (XTag), checks if all
--                  required attributes are provided and returns a list of errors

checkRequiredAttributes :: XmlTrees -> XmlTree -> XmlFilter
checkRequiredAttributes attrDecls (NTree (XDTD ELEMENT al) _)
    = checkRequired requiredAtts
      where
      elemName     = lookup1 a_name al
      requiredAtts = isRequiredAttrKind $$ (isAttlistOfElement elemName $$ attrDecls)


      checkRequired :: XmlTrees -> XmlFilter
      checkRequired ((NTree (XDTD ATTLIST al') _):xs) n@(NTree (XTag name _) _)
          = if satisfies (hasAttr attName) n
	    then checkRequired xs n
	    else err ("Attribute " ++ show attName ++ " must be declared for element type " ++
	             show (qualifiedName name) ++ ".") n
	         ++
		 checkRequired xs n
	    where
	    attName = lookup1 a_value al'

      checkRequired [] _ = []

      checkRequired nd n
          = error ("checkRequired: illegeal parameter:\n" ++ show nd ++ show n)


checkRequiredAttributes _ nd
    = error ("checkRequiredAttributes: illegeal parameter:\n" ++ show nd)



-- |
-- Validate that \#FIXED attributes match the default value.
-- Validity constraint: Fixed Attribute Default (3.3.2 \/ p.28 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration which attributes have to be checked
--
--    - returns : a function which takes an element (XTag), checks if all
--                  fixed attributes match the default value and returns a list of errors

checkFixedAttributes :: XmlTrees -> XmlTree -> XmlFilter
checkFixedAttributes attrDecls (NTree (XDTD ELEMENT al) _)
    = checkFixed fixedAtts
      where
      elemName  = lookup1 a_name al
      fixedAtts = isFixedAttrKind $$ (isAttlistOfElement elemName $$ attrDecls)


      checkFixed :: XmlTrees -> XmlFilter
      checkFixed (x@(NTree (XDTD ATTLIST al') _):xs) n@(NTree (XTag name _) _)
          = if satisfies (hasAttr attName) n
	    then if attValue == fixedValue
	         then checkFixed xs n
	         else err ("Attribute " ++ show attName ++ " of element " ++ show (qualifiedName name) ++
		           " with value " ++ show attValue ++ " must have a value of " ++
			   show fixedValue ++ ".") n
	              ++
		      checkFixed xs n
	    else checkFixed xs n
	    where
	    attName    = lookup1 a_value al'
	    fixedValue = normalizeAttributeValue (Just x) (lookup1 a_default al')
	    attValue   = normalizeAttributeValue (Just x) (valueOf attName n)

      checkFixed [] _ = []

      checkFixed nd n
          = error ("checkFixed: illegeal parameter:\n" ++ show nd ++ show n)


checkFixedAttributes _ nd
    = error ("checkFixedAttributes: illegeal parameter:\n" ++ show nd)


-- |
-- Validate that an element has no attributes which are not declared.
-- Validity constraint: Attribute Value Type (3.1 \/ p.19 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration which attributes have to be checked
--
--    - returns : a function which takes an element (XTag), checks if all
--                  attributes are declared and returns a list of errors

checkNotDeclardAttributes :: XmlTrees -> XmlTree -> XmlFilter
checkNotDeclardAttributes attrDecls elemDescr
    = checkNotDeclared
      where
      elemName = valueOfDTD a_name elemDescr
      decls    = isAttlistOfElement elemName $$ attrDecls

      checkNotDeclared :: XmlFilter
      checkNotDeclared n
	  = ( isXTag
	      `guards`
	      cat (map (searchForDeclaredAtt elemName decls) (getAttrl n))
	    ) n

      searchForDeclaredAtt :: String -> XmlTrees -> XmlTree -> XmlFilter
      searchForDeclaredAtt name ((NTree (XDTD ATTLIST al') _):xs) att
          = if (lookup1 a_value al') == nameOf att
            then none
	    else searchForDeclaredAtt name xs att

      searchForDeclaredAtt name [] (NTree (XAttr attrName) _)
          = err ("Attribute " ++ show (qualifiedName attrName) ++ " of element " ++ show name ++ " is not declared in DTD.")

      searchForDeclaredAtt _ nd a
          = error ("searchForDeclaredAtt: illegeal paramter:\n" ++ show nd ++ show a)

-- |
-- Validate that the attribute value meets the lexical constraints of its type.
-- Validity constaint: Attribute Value Type (3.1 \/ p.19 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration which attributes have to be checked
--
--    - returns : a function which takes an element (XTag), checks if all
--                  attributes meet the lexical constraints and returns a list of errors

checkValuesOfAttributes :: XmlTrees -> XmlTrees -> XmlTree -> XmlFilter
checkValuesOfAttributes attrDecls dtdPart elemDescr
    = checkValues
      where
      elemName = valueOfDTD a_name elemDescr
      decls    = isAttlistOfElement elemName $$ attrDecls

      checkValues :: XmlFilter
      checkValues n
          = ( isXTag
	      `guards`
	      cat (map (checkValue decls) (getAttrl n))
	    ) n

      checkValue :: XmlTrees -> XmlTree -> XmlFilter
      checkValue (attrDecl@(NTree (XDTD ATTLIST al') _):xs) att
          = if (lookup1 a_value al') == nameOf att
            then checkAttributeValue dtdPart attrDecl
	    else checkValue xs att

      checkValue [] _
          = none  -- undeclared attribute, reported by separate function

      checkValue n _
          = error ("checkValue: illegeal parameter:\n" ++ show n)