module Text.XML.HXT.Arrow.XmlRegex
    ( XmlRegex
    , mkZero
    , mkUnit
    , mkPrim
    , mkPrimA
    , mkDot
    , mkStar
    , mkAlt
    , mkSeq
    , mkRep
    , mkRng
    , mkOpt
    , nullable
    , delta
    , matchXmlRegex
    , splitXmlRegex
    , scanXmlRegex
    , matchRegexA
    , splitRegexA
    , scanRegexA
    )
where
import Control.Arrow.ListArrows
import Data.Maybe
import Text.XML.HXT.DOM.Interface
matchRegexA		:: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
matchRegexA re ts	= ts >>. (\ s -> maybe [] (const [s]) . matchXmlRegex re $ s)
splitRegexA		:: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree (XmlTrees, XmlTrees)
splitRegexA re ts	= ts >>. (maybeToList . splitXmlRegex re)
scanRegexA		:: XmlRegex -> LA XmlTree XmlTree -> LA XmlTree XmlTrees
scanRegexA re ts	= ts >>. (fromMaybe [] . scanXmlRegex re)
data XmlRegex	= Zero String
		| Unit
		| Sym (XmlTree -> Bool)
		| Dot
		| Star XmlRegex
		| Alt XmlRegex XmlRegex
		| Seq XmlRegex XmlRegex
		| Rep Int XmlRegex		
		| Rng Int Int XmlRegex	
mkZero		:: String -> XmlRegex
mkZero		= Zero
mkUnit		:: XmlRegex
mkUnit		= Unit
mkPrim		:: (XmlTree -> Bool) -> XmlRegex
mkPrim		= Sym
mkPrimA		:: LA XmlTree XmlTree -> XmlRegex
mkPrimA a	= mkPrim (not . null . runLA a)
mkDot	:: XmlRegex
mkDot	= Dot
mkStar			:: XmlRegex -> XmlRegex
mkStar (Zero _)		= mkUnit		
mkStar e@Unit		= e			
mkStar e@(Star _e1)	= e			
mkStar (Rep 1 e1)	= mkStar e1		
mkStar e@(Alt _ _)	= Star (rmStar e)	
mkStar e		= Star e
rmStar	:: XmlRegex -> XmlRegex
rmStar (Alt e1 e2)	= mkAlt (rmStar e1) (rmStar e2)
rmStar (Star e1)	= rmStar e1
rmStar (Rep 1 e1)	= rmStar e1
rmStar e1		= e1
mkAlt					:: XmlRegex -> XmlRegex -> XmlRegex
mkAlt e1            (Zero _)		= e1				
mkAlt (Zero _)      e2			= e2				
mkAlt e1@(Star Dot) _e2			= e1				
mkAlt _e1           e2@(Star Dot)	= e2				
mkAlt (Sym p1)      (Sym p2)		= mkPrim $ \ x -> p1 x || p2 x	
mkAlt e1            e2@(Sym _)		= mkAlt e2 e1			
mkAlt e1@(Sym _)    (Alt e2@(Sym _) e3)	= mkAlt (mkAlt e1 e2) e3	
mkAlt (Alt e1 e2)   e3			= mkAlt e1 (mkAlt e2 e3)	
mkAlt e1 e2				= Alt e1 e2
mkSeq				:: XmlRegex -> XmlRegex -> XmlRegex
mkSeq e1@(Zero _) _e2		= e1
mkSeq _e1         e2@(Zero _)	= e2
mkSeq Unit        e2		= e2
mkSeq e1          Unit		= e1
mkSeq (Seq e1 e2) e3		= mkSeq e1 (mkSeq e2 e3)
mkSeq e1 e2			= Seq e1 e2
mkRep		:: Int -> XmlRegex -> XmlRegex
mkRep 0 e			= mkStar e
mkRep _ e@(Zero _)		= e
mkRep _ e@Unit			= e
mkRep i e			= Rep i e
mkRng	:: Int -> Int -> XmlRegex -> XmlRegex
mkRng 0  0  _e			= mkUnit
mkRng 1  1  e			= e
mkRng lb ub _e
    | lb > ub			= Zero $
				  "illegal range " ++
				  show lb ++ ".." ++ show ub
mkRng _l _u e@(Zero _)		= e
mkRng _l _u e@Unit		= e
mkRng lb ub e			= Rng lb ub e
mkOpt	:: XmlRegex -> XmlRegex
mkOpt	= mkRng 0 1
instance Show XmlRegex where
    show (Zero s)	= "{err:" ++ s ++ "}"
    show Unit		= "()"
    show (Sym _p)	= "{single tree pred}"
    show Dot		= "."
    show (Star e)	= "(" ++ show e ++ ")*"
    show (Alt e1 e2)	= "(" ++ show e1 ++ "|" ++ show e2 ++ ")"
    show (Seq e1 e2)	= show e1 ++ show e2
    show (Rep 1 e)	= "(" ++ show e ++ ")+"
    show (Rep i e)	= "(" ++ show e ++ "){" ++ show i ++ ",}"
    show (Rng 0 1 e)	= "(" ++ show e ++ ")?"
    show (Rng i j e)	= "(" ++ show e ++ "){" ++ show i ++ "," ++ show j ++ "}"
nullable	:: XmlRegex -> Bool
nullable (Zero _)	= False
nullable Unit		= True
nullable (Sym _p)	= False		
nullable Dot		= False
nullable (Star _)	= True
nullable (Alt e1 e2)	= nullable e1 ||
			  nullable e2
nullable (Seq e1 e2)	= nullable e1 &&
			  nullable e2
nullable (Rep _i e)	= nullable e
nullable (Rng i _ e)	= i == 0 ||
			  nullable e
delta	:: XmlRegex -> XmlTree -> XmlRegex
delta e@(Zero _)  _	= e
delta Unit        c	= mkZero $
			  "unexpected char " ++ show c
delta (Sym p)     c
    | p c		= mkUnit
    | otherwise		= mkZero $
			  "unexpected tree " ++ show c
delta Dot         _	= mkUnit
delta e@(Star e1) c	= mkSeq (delta e1 c) e
delta (Alt e1 e2) c	= mkAlt (delta e1 c) (delta e2 c)
delta (Seq e1 e2) c
    | nullable e1	= mkAlt (mkSeq (delta e1 c) e2) (delta e2 c)
    | otherwise		= mkSeq (delta e1 c) e2
delta (Rep i e)   c	= mkSeq (delta e c) (mkRep (i1) e)
delta (Rng i j e) c	= mkSeq (delta e c) (mkRng ((i1) `max` 0) (j1) e)
delta'		:: XmlRegex -> XmlTrees -> XmlRegex
delta'		= foldl delta
matchXmlRegex		:: XmlRegex -> XmlTrees -> Maybe String
matchXmlRegex e
    = res . delta' e
    where
    res (Zero er)	= Just er
    res re
	| nullable re	= Nothing	
	| otherwise	= Just $ "input does not match " ++ show e
splitXmlRegex		:: XmlRegex -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex re	= splitXmlRegex' re []
splitXmlRegex' 		:: XmlRegex -> XmlTrees -> XmlTrees -> Maybe (XmlTrees, XmlTrees)
splitXmlRegex' re res []
    | nullable re	= Just (reverse res, [])
    | otherwise		= Nothing
splitXmlRegex' (Zero _) _ _
			= Nothing
splitXmlRegex' re res xs@(x:xs')
    | isJust res'	= res'
    | nullable re	= Just (reverse res, xs)
    | otherwise		= Nothing
    where
    re'  = delta re x
    res' = splitXmlRegex' re' (x:res) xs'
scanXmlRegex				:: XmlRegex -> XmlTrees -> Maybe [XmlTrees]
scanXmlRegex re ts			= scanXmlRegex' re (splitXmlRegex re ts)
scanXmlRegex'				:: XmlRegex -> Maybe (XmlTrees, XmlTrees) -> Maybe [XmlTrees]
scanXmlRegex' _  Nothing		= Nothing
scanXmlRegex' _  (Just (rs, []))	= Just [rs]
scanXmlRegex' _  (Just ([], _))		= Nothing	
							
scanXmlRegex' re (Just (rs, rest))
    | isNothing res			= Nothing
    | otherwise				= Just (rs : fromJust res)
    where
    res = scanXmlRegex' re (splitXmlRegex re rest)