-- |
-- A module for regular expression matching, adapted for XML DTDs.
--
-- This module is based on the module RE.
--
-- Author .\\artin Schmidt

module Text.XML.HXT.Validator.XmlRE
    ( RE

    , checkRE
    , matches
    , printRE

    , re_unit
    , re_zero
    , re_sym
    , re_rep
    , re_plus
    , re_opt
    , re_seq
    , re_alt
    , re_dot
    )
where

-- import Debug.Trace(trace)

import Text.XML.HXT.Validator.RE hiding (matches)

import Text.XML.HXT.DOM.XmlTree

import Text.XML.HXT.DOM.EditFilters
    ( removeComment
    , removeWhiteSpace
    )

-- |
-- Derives a regular expression with respect to a list of elements.
--
--    * 1.parameter re :  regular expression
--
--    - 2.parameter list :  list of elements to which the regular expression is applied
--
--    - returns : the derived regular expression

matches :: RE String -> XmlTrees -> RE String
matches re list
    = foldl delta re (removeUnimportantStuff $$ list)
      where
      removeUnimportantStuff :: XmlFilter
      removeUnimportantStuff = processBottomUp (removeWhiteSpace `o` removeComment)
      -- trace of growth of REs
      -- delta' re el = delta (trace (("RE : " ++) . (++ "\n" ) . show $ re) re) el


-- |
-- Derives a regular expression with respect to one element.
--
-- L(delta e x) = x \ L(e)
--
--    * 1.parameter re :  regular expression to be derived
--
--    - 2.parameter el :  the element on which the regular expression is applied
--
--    - returns : the derived regular expression

delta :: RE String -> XmlTree -> RE String
delta re el
    | not (allowed el) = re
    | otherwise        = case re of
        RE_ZERO m		 -> re_zero m
        RE_UNIT			 -> re_zero (elemName el ++" unexpected.")
        RE_SYM sym
    	    | sym == k_pcdata	 -> let node = getNode el in
	                            if ((isXTextNode node) || (isXCdataNode node))
				    then re_unit
				    else re_zero ("Character data expected, but "++ elemName el ++" found.")
       	    | expectedNode el sym -> re_unit
	    | otherwise           -> re_zero ("Element "++ show sym ++" expected, but "++ elemName el ++" found.")
        RE_REP e                  -> re_seq (delta e el) (re_rep e)
        RE_PLUS e                 -> re_seq (delta e el) (re_rep e)
        RE_OPT e                  -> delta e el
        RE_SEQ e f
       	    | nullable e          -> re_alt (re_seq (delta e el) f) (delta f el)
       	    | otherwise           -> re_seq (delta e el) f
        RE_ALT e f                -> re_alt (delta e el) (delta f el)
        RE_DOT                    -> re_unit

    where
    expectedNode :: XmlTree -> String -> Bool
    expectedNode (NTree (XTag n _) _) sym = (qualifiedName n) == sym
    expectedNode _ _			  = False

    elemName :: XmlTree -> String
    elemName (NTree (XTag n _) _)	= "element "++ show (qualifiedName n)
    elemName _				= "character data"

    allowed :: XmlTree -> Bool
    allowed (NTree (XTag _ _) _)	= True
    allowed (NTree (XText _) _)		= True
    allowed (NTree (XCdata _) _)	= True
    allowed _				= False