-- ------------------------------------------------------------

{- |
   Module     : Text.XML.HXT.DTDValidation.TypeDefs
   Copyright  : Copyright (C) 2008 Uwe Schmidt
   License    : MIT

   Maintainer : Uwe Schmidt (uwe@fh-wedel.de)
   Stability  : experimental
   Portability: portable

   This module provides functions for validating attributes.

   The main functions are:

    - Check if the attribute value meets the lexical constraints of its type

    - Normalization of an attribute value
-}

-- ------------------------------------------------------------

-- Special namings in source code:
--
--  - nd - XDTD node
--
--  - n  - XTag node
--

module Text.XML.HXT.DTDValidation.AttributeValueValidation
    ( checkAttributeValue
    , normalizeAttributeValue
    )
where

import Text.XML.HXT.Parser.XmlParsec
    ( parseNMToken
    , parseName
    )

import Text.XML.HXT.DTDValidation.TypeDefs

-- ------------------------------------------------------------

-- |
-- Checks if the attribute value meets the lexical constraints of its type.
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 2.parameter attrDecl :  the declaration of the attribute from the DTD
--
--    - returns : a function which takes an element (XTag or XDTD ATTLIST),
--                    checks if the attribute value meets the lexical constraints
--                    of its type and returns a list of errors

checkAttributeValue :: XmlTrees -> XmlTree -> XmlArrow
checkAttributeValue dtdPart attrDecl
    | isDTDAttlistNode attrDecl
        = choiceA
          [ isElem       :-> ( checkAttrVal $< getAttrValue attrName )
          , isDTDAttlist :-> ( checkAttrVal $< (getDTDAttrl >>^ dtd_default) )
          , this             :-> none
          ]
    | otherwise
        = none
      where
      al        = getDTDAttributes attrDecl
      attrName  = dtd_value al
      attrType  = dtd_type  al
      checkAttrVal attrValue
          = checkValue attrType dtdPart normalizedVal attrDecl
            where
            normalizedVal = normalizeAttributeValue (Just attrDecl) attrValue

-- |
-- Dispatches the attibute check by the attribute type.
--
--    * 1.parameter typ :  the attribute type
--
--    - 2.parameter dtdPart :  the children of the @DOCTYPE@ node
--
--    - 3.parameter attrValue :  the normalized attribute value to be checked
--
--    - 4.parameter attrDecl :  the declaration of the attribute from the DTD
--
--    - returns : a functions which takes an element (XTag or XDTD ATTLIST),
--                        checks if the attribute value meets the lexical constraints
--                        of its type and returns a list of errors

checkValue :: String -> XmlTrees -> String -> XmlTree -> XmlArrow
checkValue typ dtdPart attrValue attrDecl
        | typ == k_cdata        = none
        | typ == k_enumeration  = checkValueEnumeration attrDecl attrValue
        | typ == k_entity       = checkValueEntity dtdPart attrDecl attrValue
        | typ == k_entities     = checkValueEntities dtdPart attrDecl attrValue
        | typ == k_id           = checkValueId attrDecl attrValue
        | typ == k_idref        = checkValueIdref attrDecl attrValue
        | typ == k_idrefs       = checkValueIdrefs attrDecl attrValue
        | typ == k_nmtoken      = checkValueNmtoken attrDecl attrValue
        | typ == k_nmtokens     = checkValueNmtokens attrDecl attrValue
        | typ == k_notation     = checkValueEnumeration attrDecl attrValue
        | otherwise             = error ("Attribute type " ++ show typ ++ " unknown.")

-- |
-- Checks the value of Enumeration attribute types. (3.3.1 \/ p.27 in Spec)
--
--    * 1.parameter attrDecl :  the declaration of the attribute from the DTD
--
--    - 2.parameter attrValue :  the normalized attribute value to be checked

checkValueEnumeration :: XmlTree -> String -> XmlArrow
checkValueEnumeration attrDecl attrValue
    | isDTDAttlistNode attrDecl
      &&
      attrValue `notElem` enumVals
        = err ( "Attribute " ++ show (dtd_value al) ++ " for element " ++ show (dtd_name al) ++
                " must have a value from list "++ show enumVals {- ++ " but has value " ++ show attrValue-} ++ ".")
    | otherwise
        = none
      where
      al        = getDTDAttributes attrDecl

      enumVals :: [String]
      enumVals = map (dtd_name . getDTDAttributes) $ (runLA getChildren attrDecl)

-- |
-- Checks the value of ENTITY attribute types. (3.3.1 \/ p.26 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node, to get the
--                    unparsed entity declarations
--
--    - 2.parameter attrDecl :  the declaration of the attribute from the DTD
--
--    - 3.parameter attrValue :  the normalized attribute value to be checked

checkValueEntity :: XmlTrees -> XmlTree -> String -> XmlArrow
checkValueEntity dtdPart attrDecl attrValue
    | isDTDAttlistNode attrDecl
      &&
      attrValue `notElem` upEntities
        = err ( "Entity " ++ show attrValue ++ " of attribute " ++ show (dtd_value al) ++
                " for element " ++ show (dtd_name al) ++ " is not unparsed. " ++
                "The following unparsed entities exist: " ++ show upEntities ++ ".")
    | otherwise
        = none
      where
      al        = getDTDAttributes attrDecl

      upEntities :: [String]
      upEntities = map (dtd_name . getDTDAttributes) (isUnparsedEntity $$ dtdPart)

-- |
-- Checks the value of ENTITIES attribute types. (3.3.1 \/ p.26 in Spec)
--
--    * 1.parameter dtdPart :  the children of the @DOCTYPE@ node, to get the
--                    unparsed entity declarations
--
--    - 2.parameter attrDecl :  the declaration of the attribute from the DTD
--
--    - 3.parameter attrValue :  the normalized attribute value to be checked

checkValueEntities ::XmlTrees -> XmlTree -> String -> XmlArrow
checkValueEntities dtdPart attrDecl attrValue
    | isDTDAttlistNode attrDecl
        = if null valueList
          then err ("Attribute " ++ show (dtd_value al) ++ " of element " ++
                    show (dtd_name al) ++ " must be one or more names.")
          else catA . map (checkValueEntity dtdPart attrDecl) $ valueList
    | otherwise
        = none
      where
      al        = getDTDAttributes attrDecl
      valueList = words attrValue

-- |
-- Checks the value of NMTOKEN attribute types. (3.3.1 \/ p.26 in Spec)
--
--    * 1.parameter attrDecl :  the declaration of the attribute from the DTD
--
--    - 2.parameter attrValue :  the normalized attribute value to be checked

checkValueNmtoken :: XmlTree -> String -> XmlArrow
checkValueNmtoken attrDecl attrValue
    | isDTDAttlistNode attrDecl
        = constA attrValue >>> checkNmtoken
    | otherwise
        = none
      where
      al        = getDTDAttributes attrDecl
      checkNmtoken
          = mkText >>> arrL (parseNMToken "")
            >>>
            isError
            >>>
            getErrorMsg
            >>>
            arr (\ s -> ( "Attribute value " ++ show attrValue ++ " of attribute " ++ show (dtd_value al) ++
                          " for element " ++ show (dtd_name al) ++ " must be a name token, "++ (lines s) !! 1 ++".") )
            >>>
            mkError c_err

-- |
-- Checks the value of NMTOKENS attribute types. (3.3.1 \/ p.26 in Spec)
--
--    * 1.parameter attrDecl :  the declaration of the attribute from the DTD
--
--    - 2.parameter attrValue :  the normalized attribute value to be checked

checkValueNmtokens :: XmlTree -> String -> XmlArrow
checkValueNmtokens attrDecl attrValue
    | isDTDAttlistNode attrDecl
        = if null valueList
          then err ( "Attribute "++ show (dtd_value al) ++" of element " ++
                     show (dtd_name al) ++ " must be one or more name tokens.")
          else catA . map (checkValueNmtoken attrDecl) $ valueList
    | otherwise
        = none
      where
      al        = getDTDAttributes attrDecl
      valueList = words attrValue

-- |
-- Checks the value of ID attribute types. (3.3.1 \/ p.25 in Spec)
--
--    * 1.parameter attrDecl :  the declaration of the attribute from the DTD
--
--    - 2.parameter attrValue :  the normalized attribute value to be checked

checkValueId :: XmlTree -> String -> XmlArrow
checkValueId attrDecl attrValue
    = checkForName "Attribute value" attrDecl attrValue


-- |
-- Checks the value of IDREF attribute types. (3.3.1 \/ p.26 in Spec)
--
--    * 1.parameter attrDecl :  the declaration of the attribute from the DTD
--
--    - 2.parameter attrValue :  the normalized attribute value to be checked

checkValueIdref :: XmlTree -> String -> XmlArrow
checkValueIdref attrDecl attrValue
    = checkForName "Attribute value" attrDecl attrValue


-- |
-- Checks the value of IDREFS attribute types. (3.3.1 \/ p.26 in Spec)
--
--    * 1.parameter attrDecl :  the declaration of the attribute from the DTD
--
--    - 2.parameter attrValue :  the normalized attribute value to be checked

checkValueIdrefs :: XmlTree -> String -> XmlArrow
checkValueIdrefs attrDecl attrValue
    = catA . map (checkValueIdref attrDecl) . words $ attrValue



-- -----------------------------------------------------------------------------
-- General helper functions for checking attribute values
--

-- |
-- Checks if the value of an attribute is a name.
--
--    * 1.parameter msg :  error message, should be "Entity" or "Attribute value"
--
--    - 2.parameter attrDecl :  the declaration of the attribute from the DTD
--
--    - 3.parameter attrValue :  the normalized attribute value to be checked

checkForName ::  String -> XmlTree -> String -> XmlArrow
checkForName msg attrDecl attrValue
    | isDTDAttlistNode attrDecl
        = constA attrValue >>> checkName
    | otherwise
        = none
    where
    al  = getDTDAttributes attrDecl
    checkName
        = mkText >>> arrL (parseName "")
          >>>
          isError
          >>>
          getErrorMsg
          >>>
          arr (\s -> ( msg ++ " " ++ show attrValue ++" of attribute " ++ show (dtd_value al) ++
                       " for element "++ show (dtd_name al) ++" must be a name, " ++ (lines s) !! 1 ++ ".") )
          >>>
          mkError c_err

-- -----------------------------------------------------------------------------

-- |
-- Normalizes an attribute value with respect to its type. (3.3.3 \/ p.29 in Spec)
--
--    * 1.parameter attrDecl :  the declaration of the attribute from the DTD. Expected
--                   is a list. If the list is empty, no declaration exists.
--
--    - 2.parameter value :  the attribute value to be normalized
--
--    - returns : the normalized value
--
normalizeAttributeValue :: Maybe XmlTree -> String -> String
normalizeAttributeValue (Just attrDecl) value
    = normalizeAttribute attrType
      where
      al             = getDTDAttributes attrDecl
      attrType = dtd_type al

      normalizeAttribute :: String -> String
      normalizeAttribute typ
          | typ == k_cdata      = cdataNormalization value
          | otherwise           = otherNormalization value

-- Attribute not declared in DTD, normalization as CDATA
normalizeAttributeValue Nothing value
    = cdataNormalization value

-- ------------------------------------------------------------
-- Helper functions for normalization

-- |
-- Normalization of CDATA attribute values.
-- is already done when parsing
-- during entity substituion for attribute values

cdataNormalization :: String -> String
cdataNormalization = id

-- | Normalization of attribute values other than CDATA.

otherNormalization :: String -> String
otherNormalization = reduceWSSequences . stringTrim . cdataNormalization

-- | Reduce whitespace sequences to a single whitespace.

reduceWSSequences :: String -> String
reduceWSSequences str = unwords (words str)

-- ------------------------------------------------------------