-- | This "Text.Regex.DFA.Pattern" module provides the 'Pattern' data
-- type and its subtypes.  This 'Pattern' type is used to represent
-- the parsed form of a Regular Expression and is syntax independent.
--
-- It is possible to construct values of 'Pattern' that are invalid
-- regular expressions.
--
-- There are also several 
module Text.Regex.DFA.Pattern
    (Pattern(..)
    ,PatternSet(..)
    ,PatternSetCharacterClass(..),PatternSetCollatingElement(..),PatternSetEquivalenceClass(..)
    ,PatternIndex
    ,showPattern
    ,showPatternP
    -- ** Pattern DoPa
    ,DoPa(..)
    ,newDoPa
    ) where

{- By Chris Kuklewicz, 2006. BSD License, see the LICENSE file. -}

import Data.List(intersperse,partition)
import qualified Data.Set as Set(toAscList,toList)
import Data.Set(Set)

data DoPa = DoPa {dopaIndex :: Int} deriving (Eq,Ord)
instance Show DoPa where
  show (DoPa {dopaIndex=i}) = show i

newDoPa :: Int -> DoPa
newDoPa i = DoPa i

data Pattern = PEmpty
             | PGroup  PatternIndex Pattern
             | POr     [Pattern]
             | PConcat [Pattern]
             | PQuest  Pattern
             | PPlus   Pattern
             | PStar   Pattern
             | PBound  Int (Maybe Int) Pattern
             -- The rest of these need an index of where in the regex string it is from
             | PCarat  DoPa
             | PDollar DoPa
             -- The following test and accept a single character
             | PDot    DoPa            -- Any character (newline?) at all
             | PAny    DoPa PatternSet -- Square bracketed things
             | PAnyNot DoPa PatternSet -- Inverted square bracketed things
             | PEscape DoPa Char       -- Backslashed Character
             | PChar   DoPa Char       -- Specific Character
               deriving (Eq,Show)

showPattern :: Pattern -> String
showPattern pIn =
  case pIn of
    PEmpty -> "()"
    PGroup _ p -> ('(':showPattern p)++")"
    POr ps -> concat $ intersperse "|" (map showPattern ps)
    PConcat ps -> concatMap showPattern ps
    PQuest p -> (showPattern p)++"?"
    PPlus p -> (showPattern p)++"+"
    PStar p -> (showPattern p)++"*"
    PBound i (Just j) p | i==j -> showPattern p ++ ('{':show i)++"}"
    PBound i mj p -> showPattern p ++ ('{':show i) ++ maybe ",}" (\j -> ',':show j++"}") mj
    --
    PCarat _ -> "^"
    PDollar _ -> "$"
    PDot _ -> "."
    PAny _ (PatternSet s scc sce sec) ->
        let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
            charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
            scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc
            sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce
            sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec
        in concat ['[':charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"]
    PAnyNot _ (PatternSet s scc sce sec) ->
        let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
            charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
            scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc
            sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce
            sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec
        in concat ["[^",charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"]
    PEscape _ c -> '\\':c:[]
    PChar _ c -> [c]
  where byRange xAll@(x:xs) | length xAll <=3 = xAll
                            | otherwise = groupRange x 1 xs
        byRange _ = undefined
        groupRange x n (y:ys) = if (fromEnum y)-(fromEnum x) == n then groupRange x (succ n) ys
                                else (if n <=3 then take n [x..]
                                      else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys
        groupRange x n [] = if n <=3 then take n [x..]
                            else x:'-':(toEnum (pred n+fromEnum x)):[]

showPatternP :: Pattern -> String
showPatternP pIn =
  case pIn of
    PEmpty -> paren $ ""
    PGroup _ p -> paren $ showPatternP p
    POr ps -> paren $ concat $ intersperse "|" (map showPatternP ps)
    PConcat ps -> paren $ concatMap showPatternP ps
    PQuest p -> (showPatternP p)++"?"
    PPlus p -> (showPatternP p)++"+"
    PStar p -> ( showPatternP p)++"*"
    PBound i (Just j) p | i==j -> showPatternP p ++ ('{':show i)++"}"
    PBound i mj p -> showPatternP p ++ ('{':show i) ++ maybe ",}" (\j -> ',':show j++"}") mj
    --
    PCarat _ -> "^"
    PDollar _ -> "$"
    PDot _ -> "."
    PAny _ (PatternSet s scc sce sec) ->
        let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
            charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
            scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc
            sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce
            sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec
        in concat ['[':charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"]
    PAnyNot _ (PatternSet s scc sce sec) ->
        let (special,normal) = maybe ("","") ((partition (`elem` "]-")) . Set.toAscList) s
            charSpec = (if ']' `elem` special then (']':) else id) (byRange normal)
            scc' = maybe "" ((concatMap (\ss -> "[:"++unSCC ss++":]")) . Set.toList) scc
            sce' = maybe "" ((concatMap (\ss -> "[."++unSCE ss++".]")) . Set.toList) sce
            sec' = maybe "" ((concatMap (\ss -> "[="++unSEC ss++"=]")) . Set.toList) sec
        in concat ["[^",charSpec,scc',sce',sec',if '-' `elem` special then "-]" else "]"]
    PEscape _ c -> '\\':c:[]
    PChar _ c -> [c]
  where byRange xAll@(x:xs) | length xAll <=3 = xAll
                            | otherwise = groupRange x 1 xs
        byRange _ = undefined
        groupRange x n (y:ys) = if (fromEnum y)-(fromEnum x) == n then groupRange x (succ n) ys
                                else (if n <=3 then take n [x..]
                                      else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys
        groupRange x n [] = if n <=3 then take n [x..]
                            else x:'-':(toEnum (pred n+fromEnum x)):[]
        paren s = ('(':s)++")"

data PatternSet = PatternSet (Maybe (Set Char))  (Maybe (Set (PatternSetCharacterClass)))
   (Maybe (Set PatternSetCollatingElement)) (Maybe (Set PatternSetEquivalenceClass)) deriving (Eq,Show)

newtype PatternSetCharacterClass   = PatternSetCharacterClass   {unSCC::String} deriving (Eq,Ord,Show) -- [: :]
newtype PatternSetCollatingElement = PatternSetCollatingElement {unSCE::String} deriving (Eq,Ord,Show) -- [. .]
newtype PatternSetEquivalenceClass = PatternSetEquivalenceClass {unSEC::String} deriving (Eq,Ord,Show) -- [= =]

-- | PatternIndex is for indexing submatches from  parenthesized groups (PGroup)
type PatternIndex = Int

{-
-- helper function
isPostAtom :: Pattern -> Bool
isPostAtom p = case p of
                 PQuest _ -> True
                 PPlus _ -> True
                 PStar _ -> True
                 PBound _ _ _ -> True
                 _ -> False
-}