-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.DTDValidation.TypeDefs
   Copyright  : Copyright (C) 2008 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : experimental
   Portability: portable

   This module provides functions for validating XML Documents represented as
   XmlTree.

   Unlike other popular XML validation tools the validation process returns
   a list of errors instead of aborting after the first error was found.

   Before the document is validated, a lookup-table is build on the basis of
   the DTD which maps element names to their validation functions.
   After this initialization phase the whole document is traversed in preorder
   and every element is validated by the XmlFilter from the lookup-table.

-}

-- ------------------------------------------------------------

module Text.XML.HXT.DTDValidation.DocValidation
    ( validateDoc
    )
where

import Text.XML.HXT.DTDValidation.TypeDefs

import Text.XML.HXT.DTDValidation.AttributeValueValidation
import Text.XML.HXT.DTDValidation.XmlRE

-- ------------------------------------------------------------

-- |
-- Lookup-table which maps element names to their validation functions. The
-- validation functions are XmlArrows.

type ValiEnvTable       = [ValiEnv]
type ValiEnv            = (ElemName, ValFct)
type ElemName           = String
type ValFct             = XmlArrow


-- ------------------------------------------------------------

-- |
-- Validate a document.
--
--    * 1.parameter dtdPart :  the DTD subset (Node @DOCTYPE@) of the XmlTree
--
--    - 2.parameter doc :  the document subset of the XmlTree
--
--    - returns : a list of errors

validateDoc     :: XmlTree -> XmlArrow
validateDoc dtdPart
    = traverseTree valTable
    where
    valTable = buildAllValidationFunctions dtdPart


-- |
-- Traverse the XmlTree in preorder.
--
--    * 1.parameter valiEnv :  lookup-table which maps element names to their validation functions
--
--    - returns : list of errors

traverseTree    :: ValiEnvTable -> XmlArrow
traverseTree valiEnv
    = choiceA [ isElem  :-> (valFct $< getQName)
              , this    :-> none
              ]
      <+>
      ( getChildren >>> traverseTree valiEnv )
    where
    valFct      :: QName -> XmlArrow
    valFct name = case (lookup (qualifiedName name) valiEnv) of
                  Nothing -> err ("Element " ++ show (qualifiedName name) ++ " not declared in DTD.")
                  Just f  -> f

-- ------------------------------------------------------------

-- |
-- Build all validation functions.
--
--    * 1.parameter dtdPart :  DTD subset, root node should be of type @DOCTYPE@
--
--    - returns : lookup-table which maps element names to their validation functions

buildAllValidationFunctions :: XmlTree -> ValiEnvTable
buildAllValidationFunctions dtdPart
    = concat $
      buildValidateRoot dtdPart :             -- construct a list of validation filters for all element declarations
      map (buildValidateFunctions dtdNodes) dtdNodes
      where
      dtdNodes = runLA getChildren dtdPart

-- |
-- Build a validation function for the document root. By root node @\/@
-- is meant, which is the topmost dummy created by the parser.
--
--    * 1.parameter dtdPart :  DTD subset, root node should be of type @DOCTYPE@
--
--    - returns : entry for the lookup-table

buildValidateRoot :: XmlTree -> [ValiEnv]
buildValidateRoot dn
    | isDTDDoctypeNode dn       = [(t_root, valFct)]
    | otherwise                 = []
      where
      name      = dtd_name . getDTDAttributes $ dn

      valFct    :: XmlArrow
      valFct    = isElem
                  `guards`
                  ( checkRegex (re_sym name)
                    >>>
                    msgToErr (("Root Element must be " ++ show name ++ ". ") ++)
                  )

checkRegex      :: RE String -> LA XmlTree String
checkRegex re   = listA getChildren
                  >>> arr (\ cs -> checkRE (matches re cs))

-- |
-- Build validation functions for an element.
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration for which the validation functions are
--                   created
--
--    - returns : entry for the lookup-table

buildValidateFunctions :: XmlTrees -> XmlTree -> [ValiEnv]

buildValidateFunctions dtdPart dn
    | isDTDElementNode dn       = [(elemName, valFct)]
    | otherwise                 = []
      where
      elemName = dtd_name . getDTDAttributes $ dn

      valFct :: XmlArrow
      valFct = buildContentValidation dn
               <+>
               buildAttributeValidation dtdPart dn

-- ------------------------------------------------------------


-- |
-- Build validation functions for the content model of an element.
-- Validity constraint: Element Valid (3 \/ p.18 in Spec)
--
--    * 1.parameter nd :  element declaration for which the content validation functions
--                  are built
--
--    - returns : a function which takes an element (XTag), checks if its
--                  children match its content model and returns a list of errors

buildContentValidation :: XmlTree -> XmlArrow
buildContentValidation nd
    = contentValidation attrType nd
      where
      attrType = dtd_type . getDTDAttributes $ nd


      -- Delegates construction of the validation function on the basis of the
      -- content model type
      contentValidation :: String -> XmlTree -> XmlArrow
      contentValidation typ dn
          | typ == k_pcdata   = contentValidationPcdata
          | typ == k_empty    = contentValidationEmpty
          | typ == k_any      = contentValidationAny
          | typ == v_children = contentValidationChildren cs
          | typ == v_mixed    = contentValidationMixed cs
          | otherwise         = none
          where
          cs = runLA getChildren dn

      -- Checks #PCDATA content models
      contentValidationPcdata :: XmlArrow
      contentValidationPcdata
          = isElem `guards` (contentVal $< getQName)
            where
            contentVal name
                = checkRegex (re_rep (re_sym k_pcdata))
                  >>>
                  msgToErr ( ( "The content of element " ++
                               show (qualifiedName name) ++
                               " must match (#PCDATA). "
                             ) ++
                           )

      -- Checks EMPTY content models
      contentValidationEmpty :: XmlArrow
      contentValidationEmpty
          = isElem `guards` (contentVal $< getQName)
            where
            contentVal name
                = checkRegex re_unit
                  >>>
                  msgToErr ( ( "The content of element " ++
                                 show (qualifiedName name) ++
                                 " must match EMPTY. "
                             ) ++
                           )

      -- Checks ANY content models
      contentValidationAny :: XmlArrow
      contentValidationAny
          = isElem `guards` (contentVal $< getName)
            where
            contentVal name
                = checkRegex (re_rep (re_dot))
                  >>>
                  msgToErr ( ( "The content of element " ++
                               show name ++
                               " must match ANY. "
                             ) ++
                           )

      -- Checks "children" content models
      contentValidationChildren :: XmlTrees -> XmlArrow
      contentValidationChildren cm
          = isElem `guards` (contentVal $< getName)
            where
            contentVal name
                = checkRegex re
                  >>>
                  msgToErr ( ( "The content of element " ++
                               show name ++
                               " must match " ++ printRE re ++ ". "
                             ) ++
                           )
            re = createRE (head cm)

      -- Checks "mixed content" content models
      contentValidationMixed :: XmlTrees -> XmlArrow
      contentValidationMixed cm
          = isElem `guards` (contentVal $< getName)
            where
            contentVal name
                = checkRegex re
                  >>>
                  msgToErr ( ( "The content of element " ++
                               show name ++
                               " must match " ++ printRE re ++ ". "
                             ) ++
                           )
            re = re_rep (re_alt (re_sym k_pcdata) (createRE (head cm)))

-- |
-- Build a regular expression from the content model. The regular expression
-- is provided by the module XmlRE.
--
--    * 1.parameter nd :  node of the content model. Expected: @CONTENT@ or
--              @NAME@
--
--    - returns : regular expression of the content model

createRE        ::  XmlTree -> RE String
createRE dn
    | isDTDContentNode dn
        = processModifier modifier
    | isDTDNameNode dn
        = re_sym name
    | otherwise
        = error ("createRE: illegeal parameter:\n" ++ show dn)
    where
    al          = getDTDAttributes dn
    name        = dtd_name     al
    modifier    = dtd_modifier al
    kind        = dtd_kind     al
    cs          = runLA getChildren dn

    processModifier :: String -> RE String
    processModifier m
        | m == v_plus     = re_plus (processKind kind)
        | m == v_star     = re_rep  (processKind kind)
        | m == v_option   = re_opt  (processKind kind)
        | m == v_null     = processKind kind
        | otherwise       = error ("Unknown modifier: " ++ show m)

    processKind :: String -> RE String
    processKind k
        | k == v_seq      = makeSequence cs
        | k == v_choice   = makeChoice cs
        | otherwise       = error ("Unknown kind: " ++ show k)

    makeSequence :: XmlTrees -> RE String
    makeSequence []     = re_unit
    makeSequence (x:xs) = re_seq (createRE x) (makeSequence xs)

    makeChoice :: XmlTrees -> RE String
    makeChoice []       = re_zero ""
    makeChoice (x:xs)   = re_alt (createRE x) (makeChoice xs)

-- ------------------------------------------------------------

-- |
-- Build validation functions for the attributes of an element.
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration for which the attribute validation functions
--                  are created
--
--    - returns : a function which takes an element (XTag), checks if its
--                  attributes are valid and returns a list of errors

buildAttributeValidation :: XmlTrees -> XmlTree -> XmlArrow
buildAttributeValidation dtdPart nd =
    noDoublicateAttributes
    <+>
    checkNotDeclardAttributes attrDecls nd
    <+>
    checkRequiredAttributes attrDecls nd
    <+>
    checkFixedAttributes attrDecls nd
    <+>
    checkValuesOfAttributes attrDecls dtdPart nd
    where
    attrDecls = isDTDAttlist $$ dtdPart


-- |
-- Validate that all attributes of an element are unique.
-- Well-formdness constraint: Unique AttSpec (3.1 \/ p.19 in Spec)
--
--    - returns : a function which takes an element (XTag), checks if its
--                  attributes are unique and returns a list of errors

noDoublicateAttributes  :: XmlArrow
noDoublicateAttributes
    = isElem
      `guards`
      ( noDoubles' $< getName )
    where
    noDoubles' elemName
        = listA (getAttrl >>> getName)
          >>> applyA (arr (catA . map toErr . doubles . reverse))
        where
        toErr n1 = err ( "Attribute " ++ show n1 ++
                         " was already specified for element " ++
                         show elemName ++ "."
                       )

-- |
-- Validate that all \#REQUIRED attributes are provided.
-- Validity constraint: Required Attributes (3.3.2 \/ p.28 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration which attributes have to be checked
--
--    - returns : a function which takes an element (XTag), checks if all
--                  required attributes are provided and returns a list of errors

checkRequiredAttributes :: XmlTrees -> XmlTree -> XmlArrow
checkRequiredAttributes attrDecls dn
    | isDTDElementNode dn
        = isElem
          `guards`
          ( checkRequired $< getName )
    | otherwise
        = none
      where
      elemName     = dtd_name . getDTDAttributes $ dn
      requiredAtts = (isAttlistOfElement elemName >>> isRequiredAttrKind) $$ attrDecls

      checkRequired :: String -> XmlArrow
      checkRequired name
          = catA . map checkReq $ requiredAtts
          where
          checkReq      :: XmlTree -> XmlArrow
          checkReq attrDecl
              = neg (hasAttr attName)
                `guards`
                err ( "Attribute " ++ show attName ++ " must be declared for element type " ++
                      show name ++ "." )
              where
              attName = dtd_value . getDTDAttributes $ attrDecl

-- |
-- Validate that \#FIXED attributes match the default value.
-- Validity constraint: Fixed Attribute Default (3.3.2 \/ p.28 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration which attributes have to be checked
--
--    - returns : a function which takes an element (XTag), checks if all
--                  fixed attributes match the default value and returns a list of errors

checkFixedAttributes :: XmlTrees -> XmlTree -> XmlArrow
checkFixedAttributes attrDecls dn
    | isDTDElementNode dn
        = isElem
          `guards`
          ( checkFixed $< getName )
    | otherwise
        = none
      where
      elemName  = dtd_name . getDTDAttributes $ dn
      fixedAtts = (isAttlistOfElement elemName >>> isFixedAttrKind) $$ attrDecls

      checkFixed :: String -> XmlArrow
      checkFixed name
          = catA . map checkFix $ fixedAtts
          where
          checkFix      :: XmlTree -> XmlArrow
          checkFix an
              |  isDTDAttlistNode an
                  = checkFixedVal $< getAttrValue attName
              | otherwise
                  = none
              where
              al'       = getDTDAttributes an
              attName   = dtd_value   al'
              defa      = dtd_default al'
              fixedValue = normalizeAttributeValue (Just an) defa

              checkFixedVal     :: String -> XmlArrow
              checkFixedVal val
                  = ( ( hasAttr attName
                        >>>
                        isA (const (attValue /= fixedValue))
                      )
                      `guards`
                      err ( "Attribute " ++ show attName ++ " of element " ++ show name ++
                            " with value " ++ show attValue ++ " must have a value of " ++
                            show fixedValue ++ "." )
                    )
                  where
                  attValue   = normalizeAttributeValue (Just an) val

-- |
-- Validate that an element has no attributes which are not declared.
-- Validity constraint: Attribute Value Type (3.1 \/ p.19 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration which attributes have to be checked
--
--    - returns : a function which takes an element (XTag), checks if all
--                  attributes are declared and returns a list of errors

checkNotDeclardAttributes :: XmlTrees -> XmlTree -> XmlArrow
checkNotDeclardAttributes attrDecls elemDescr
    = checkNotDeclared
      where
      elemName = valueOfDTD a_name elemDescr
      decls    = isAttlistOfElement elemName $$ attrDecls

      checkNotDeclared :: XmlArrow
      checkNotDeclared
          = isElem
            `guards`
            ( getAttrl >>> searchForDeclaredAtt elemName decls )

      searchForDeclaredAtt :: String -> XmlTrees -> XmlArrow
      searchForDeclaredAtt name (dn : xs)
          | isDTDAttlistNode dn
              = ( getName >>> isA ( (dtd_value . getDTDAttributes $ dn) /= ) )
                `guards`
                searchForDeclaredAtt name xs
          | otherwise
              = searchForDeclaredAtt name xs

      searchForDeclaredAtt name []
          = mkErr $< getName
            where
            mkErr n = err ( "Attribute " ++ show n ++ " of element " ++
                            show name ++ " is not declared in DTD." )

-- |
-- Validate that the attribute value meets the lexical constraints of its type.
-- Validity constaint: Attribute Value Type (3.1 \/ p.19 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter nd :  element declaration which attributes have to be checked
--
--    - returns : a function which takes an element (XTag), checks if all
--                  attributes meet the lexical constraints and returns a list of errors

checkValuesOfAttributes :: XmlTrees -> XmlTrees -> XmlTree -> XmlArrow
checkValuesOfAttributes attrDecls dtdPart elemDescr
    = checkValues
      where
      elemName  = dtd_name . getDTDAttributes $ elemDescr
      decls     = isAttlistOfElement elemName $$ attrDecls

      checkValues :: XmlArrow
      checkValues
          = isElem
            `guards`
            ( checkValue $< getAttrl )

      checkValue att
          = catA . map checkVal $ decls
            where
            checkVal :: XmlTree -> XmlArrow
            checkVal attrDecl
                | isDTDAttlistNode attrDecl
                  &&
                  nameOfAttr att == dtd_value al'
                      = checkAttributeValue dtdPart attrDecl
                | otherwise
                    = none
                where
                al' = getDTDAttributes attrDecl

-- ------------------------------------------------------------