{-# OPTIONS_GHC -fno-warn-incomplete-uni-patterns #-}

-- | This "Text.Regex.TDFA.Pattern" module provides the 'Pattern' data
-- type and its subtypes.  This 'Pattern' type is used to represent
-- the parsed form of a regular expression.

module Text.Regex.TDFA.Pattern
    (Pattern(..)
    ,PatternSet(..)
    ,PatternSetCharacterClass(..)
    ,PatternSetCollatingElement(..)
    ,PatternSetEquivalenceClass(..)
    ,GroupIndex
    ,DoPa(..)
    ,decodeCharacterClass, decodePatternSet
    ,showPattern
-- ** Internal use
    ,starTrans
-- ** Internal use, operations to support debugging under @ghci@
    ,starTrans',simplify',dfsPattern
    ) where

{- By Chris Kuklewicz, 2007. BSD License, see the LICENSE file. -}

import Data.List(intersperse,partition)
import qualified Data.Set as Set
import Data.Set (Set)
import Text.Regex.TDFA.Common(DoPa(..),GroupIndex,common_error)

err :: String -> a
err :: String -> a
err = String -> String -> a
forall a. String -> String -> a
common_error String
"Text.Regex.TDFA.Pattern"

-- | 'Pattern' is the type returned by the regular expression parser 'parseRegex'.
-- This is consumed by the "Text.Regex.TDFA.CorePattern" module and the tender leaves
-- are nibbled by the "Text.Regex.TDFA.TNFA" module.
--
-- The 'DoPa' field is the index of the component in the regex string @r@.
data Pattern
  = PEmpty
      -- ^ @()@, matches the empty string.
  | PGroup  (Maybe GroupIndex) Pattern
      -- ^ Group @(r)@.  @Nothing@ indicates non-matching 'PGroup'
      -- (never produced by parser 'parseRegex').
  | POr     [Pattern]
      -- ^ Alternative @r|s@ (flattened by 'starTrans').
  | PConcat [Pattern]
      -- ^ Sequence @rs@ (flattened by 'starTrans').
  | PQuest  Pattern
      -- ^ Zero or one repetitions @r?@ (eliminated by 'starTrans').
  | PPlus   Pattern
      -- ^ One or more repetitions @r+@ (eliminated by 'starTrans').
  | PStar   Bool Pattern
      -- ^ Zero or more repetitions @r*@.
      -- @True@ (default) means may accept the empty string on its first iteration.
  | PBound  Int (Maybe Int) Pattern
      -- ^ Given number or repetitions @r{n}@ or @r{n,m}@
      -- (eliminated by 'starTrans').

  -- The rest of these need an index of where in the regex string it is from
  | PCarat  { Pattern -> DoPa
getDoPa :: DoPa }
      -- ^ @^@ matches beginning of input.
  | PDollar { getDoPa :: DoPa }
      -- ^ @$@ matches end of input.

  -- The following test and accept a single character
  | PDot    { getDoPa :: DoPa }
      -- ^ @.@ matches any character.
  | PAny    { getDoPa :: DoPa, Pattern -> PatternSet
getPatternSet :: PatternSet }
      -- ^ Bracket expression @[...]@.
  | PAnyNot { getDoPa :: DoPa, getPatternSet :: PatternSet }
      -- ^ Inverted bracket expression @[^...]@.
  | PEscape { getDoPa :: DoPa, Pattern -> Char
getPatternChar :: Char }
      -- ^ Backslashed character @\c@, may have special meaning.
  | PChar   { getDoPa :: DoPa, getPatternChar :: Char }
      -- ^ Single character, matches given character.

  -- The following are semantic tags created in starTrans, not the parser
  | PNonCapture Pattern
     -- ^ Tag for internal use, introduced by 'starTrans'.
  | PNonEmpty Pattern
     -- ^ Tag for internal use, introduced by 'starTrans'.
  deriving (Pattern -> Pattern -> Bool
(Pattern -> Pattern -> Bool)
-> (Pattern -> Pattern -> Bool) -> Eq Pattern
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Pattern -> Pattern -> Bool
$c/= :: Pattern -> Pattern -> Bool
== :: Pattern -> Pattern -> Bool
$c== :: Pattern -> Pattern -> Bool
Eq, Int -> Pattern -> ShowS
[Pattern] -> ShowS
Pattern -> String
(Int -> Pattern -> ShowS)
-> (Pattern -> String) -> ([Pattern] -> ShowS) -> Show Pattern
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Pattern] -> ShowS
$cshowList :: [Pattern] -> ShowS
show :: Pattern -> String
$cshow :: Pattern -> String
showsPrec :: Int -> Pattern -> ShowS
$cshowsPrec :: Int -> Pattern -> ShowS
Show)

-- Andreas Abel, 2022-07-18, issue #47:
-- The following claim is FALSE:
--
-- I have not been checking, but this should have the property that
-- parsing the resulting string should result in an identical 'Pattern'.
-- This is not true if 'starTrans' has created 'PNonCapture' and 'PNonEmpty'
-- values or a @'PStar' False@.  The contents of a @[...]@ grouping are
-- always shown in a sorted canonical order.
showPattern :: Pattern -> String
showPattern :: Pattern -> String
showPattern Pattern
pIn =
  case Pattern
pIn of
    Pattern
PEmpty -> String
"()"
    PGroup Maybe Int
_ Pattern
p -> ShowS
paren (Pattern -> String
showPattern Pattern
p)
    POr [Pattern]
ps -> [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([String] -> String) -> [String] -> String
forall a b. (a -> b) -> a -> b
$ String -> [String] -> [String]
forall a. a -> [a] -> [a]
intersperse String
"|" ((Pattern -> String) -> [Pattern] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Pattern -> String
showPattern [Pattern]
ps)
    PConcat [Pattern]
ps -> (Pattern -> String) -> [Pattern] -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Pattern -> String
showPattern [Pattern]
ps
    PQuest Pattern
p -> (Pattern -> String
showPattern Pattern
p)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
"?"
    PPlus Pattern
p -> (Pattern -> String
showPattern Pattern
p)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
"+"
    -- If PStar has mayFirstBeNull False then reparsing will forget this flag
    PStar Bool
_ Pattern
p -> (Pattern -> String
showPattern Pattern
p)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
"*"
    PBound Int
i (Just Int
j) Pattern
p | Int
iInt -> Int -> Bool
forall a. Eq a => a -> a -> Bool
==Int
j -> Pattern -> String
showPattern Pattern
p String -> ShowS
forall a. [a] -> [a] -> [a]
++ (Char
'{'Char -> ShowS
forall a. a -> [a] -> [a]
:Int -> String
forall a. Show a => a -> String
show Int
i)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
"}"
    PBound Int
i Maybe Int
mj Pattern
p -> Pattern -> String
showPattern Pattern
p String -> ShowS
forall a. [a] -> [a] -> [a]
++ (Char
'{'Char -> ShowS
forall a. a -> [a] -> [a]
:Int -> String
forall a. Show a => a -> String
show Int
i) String -> ShowS
forall a. [a] -> [a] -> [a]
++ String -> (Int -> String) -> Maybe Int -> String
forall b a. b -> (a -> b) -> Maybe a -> b
maybe String
",}" (\Int
j -> Char
','Char -> ShowS
forall a. a -> [a] -> [a]
:Int -> String
forall a. Show a => a -> String
show Int
jString -> ShowS
forall a. [a] -> [a] -> [a]
++String
"}") Maybe Int
mj
    --
    PCarat DoPa
_ -> String
"^"
    PDollar DoPa
_ -> String
"$"
    PDot DoPa
_ -> String
"."
    PAny DoPa
_ PatternSet
ps -> (Char
'['Char -> ShowS
forall a. a -> [a] -> [a]
:PatternSet -> String
forall a. Show a => a -> String
show PatternSet
ps)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
"]"
    PAnyNot DoPa
_ PatternSet
ps ->  (Char
'['Char -> ShowS
forall a. a -> [a] -> [a]
:Char
'^'Char -> ShowS
forall a. a -> [a] -> [a]
:PatternSet -> String
forall a. Show a => a -> String
show PatternSet
ps)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
"]"
    PEscape DoPa
_ Char
c -> Char
'\\'Char -> ShowS
forall a. a -> [a] -> [a]
:Char
cChar -> ShowS
forall a. a -> [a] -> [a]
:[]
    PChar DoPa
_ Char
c -> [Char
c]
    -- The following were not directly from the parser, and will not be parsed in properly
    PNonCapture Pattern
p -> Pattern -> String
showPattern Pattern
p
    PNonEmpty Pattern
p -> Pattern -> String
showPattern Pattern
p
  where {-
        groupRange x n (y:ys) = if (fromEnum y)-(fromEnum x) == n then groupRange x (succ n) ys
                                else (if n <=3 then take n [x..]
                                      else x:'-':(toEnum (pred n+fromEnum x)):[]) ++ groupRange y 1 ys
        groupRange x n [] = if n <=3 then take n [x..]
                            else x:'-':(toEnum (pred n+fromEnum x)):[]
-}
        paren :: ShowS
paren String
s = (Char
'('Char -> ShowS
forall a. a -> [a] -> [a]
:String
s)String -> ShowS
forall a. [a] -> [a] -> [a]
++String
")"

-- | Content of a bracket expression @[...]@ organized into
-- characters,
-- POSIX character classes (e.g. @[[:alnum:]]@),
-- collating elements (e.g. @[.ch.]@, unused), and
-- equivalence classes (e.g. @[=a=]@, treated as characters).
--
data PatternSet = PatternSet (Maybe (Set Char))
                             (Maybe (Set PatternSetCharacterClass))
                             (Maybe (Set PatternSetCollatingElement))
                             (Maybe (Set PatternSetEquivalenceClass))
                             deriving (PatternSet -> PatternSet -> Bool
(PatternSet -> PatternSet -> Bool)
-> (PatternSet -> PatternSet -> Bool) -> Eq PatternSet
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PatternSet -> PatternSet -> Bool
$c/= :: PatternSet -> PatternSet -> Bool
== :: PatternSet -> PatternSet -> Bool
$c== :: PatternSet -> PatternSet -> Bool
Eq)

-- | Hand-rolled implementation, giving textual rather than Haskell representation.
instance Show PatternSet where
  showsPrec :: Int -> PatternSet -> ShowS
showsPrec Int
i (PatternSet Maybe (Set Char)
s Maybe (Set PatternSetCharacterClass)
scc Maybe (Set PatternSetCollatingElement)
sce Maybe (Set PatternSetEquivalenceClass)
sec) =
    let (String
special,String
normal) = (String, String)
-> (Set Char -> (String, String))
-> Maybe (Set Char)
-> (String, String)
forall b a. b -> (a -> b) -> Maybe a -> b
maybe (String
"",String
"") (((Char -> Bool) -> String -> (String, String)
forall a. (a -> Bool) -> [a] -> ([a], [a])
partition (Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
"]-")) (String -> (String, String))
-> (Set Char -> String) -> Set Char -> (String, String)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set Char -> String
forall a. Set a -> [a]
Set.toAscList) Maybe (Set Char)
s
        charSpec :: String
charSpec = (if Char
']' Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
special then (Char
']'Char -> ShowS
forall a. a -> [a] -> [a]
:) else ShowS
forall a. a -> a
id) (ShowS
byRange String
normal)
        scc' :: String
scc' = String
-> (Set PatternSetCharacterClass -> String)
-> Maybe (Set PatternSetCharacterClass)
-> String
forall b a. b -> (a -> b) -> Maybe a -> b
maybe String
"" (((PatternSetCharacterClass -> String)
-> [PatternSetCharacterClass] -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap PatternSetCharacterClass -> String
forall a. Show a => a -> String
show) ([PatternSetCharacterClass] -> String)
-> (Set PatternSetCharacterClass -> [PatternSetCharacterClass])
-> Set PatternSetCharacterClass
-> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set PatternSetCharacterClass -> [PatternSetCharacterClass]
forall a. Set a -> [a]
Set.toList) Maybe (Set PatternSetCharacterClass)
scc
        sce' :: String
sce' = String
-> (Set PatternSetCollatingElement -> String)
-> Maybe (Set PatternSetCollatingElement)
-> String
forall b a. b -> (a -> b) -> Maybe a -> b
maybe String
"" (((PatternSetCollatingElement -> String)
-> [PatternSetCollatingElement] -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap PatternSetCollatingElement -> String
forall a. Show a => a -> String
show) ([PatternSetCollatingElement] -> String)
-> (Set PatternSetCollatingElement -> [PatternSetCollatingElement])
-> Set PatternSetCollatingElement
-> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set PatternSetCollatingElement -> [PatternSetCollatingElement]
forall a. Set a -> [a]
Set.toList) Maybe (Set PatternSetCollatingElement)
sce
        sec' :: String
sec' = String
-> (Set PatternSetEquivalenceClass -> String)
-> Maybe (Set PatternSetEquivalenceClass)
-> String
forall b a. b -> (a -> b) -> Maybe a -> b
maybe String
"" (((PatternSetEquivalenceClass -> String)
-> [PatternSetEquivalenceClass] -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap PatternSetEquivalenceClass -> String
forall a. Show a => a -> String
show) ([PatternSetEquivalenceClass] -> String)
-> (Set PatternSetEquivalenceClass -> [PatternSetEquivalenceClass])
-> Set PatternSetEquivalenceClass
-> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set PatternSetEquivalenceClass -> [PatternSetEquivalenceClass]
forall a. Set a -> [a]
Set.toList) Maybe (Set PatternSetEquivalenceClass)
sec
    in String -> ShowS
forall a. Show a => a -> ShowS
shows String
charSpec
       ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
i String
scc' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
i String
sce' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> String -> ShowS
forall a. Show a => Int -> a -> ShowS
showsPrec Int
i String
sec'
       ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. if Char
'-' Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` String
special then Char -> ShowS
showChar Char
'-' else ShowS
forall a. a -> a
id
    where byRange :: ShowS
byRange xAll :: String
xAll@(~(Char
x:String
xs))
            | String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length String
xAll Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<=Int
3 = String
xAll
            | Bool
otherwise       = Char -> Int -> ShowS
groupRange Char
x Int
1 String
xs
          groupRange :: Char -> Int -> ShowS
groupRange Char
x Int
n (Char
y:String
ys) = if (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
y)Int -> Int -> Int
forall a. Num a => a -> a -> a
-(Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
x) Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
n then Char -> Int -> ShowS
groupRange Char
x (Int -> Int
forall a. Enum a => a -> a
succ Int
n) String
ys
                                  else (if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<=Int
3 then Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
n [Char
x..]
                                        else Char
xChar -> ShowS
forall a. a -> [a] -> [a]
:Char
'-'Char -> ShowS
forall a. a -> [a] -> [a]
:(Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Int
forall a. Enum a => a -> a
pred Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
x))Char -> ShowS
forall a. a -> [a] -> [a]
:[]) String -> ShowS
forall a. [a] -> [a] -> [a]
++ Char -> Int -> ShowS
groupRange Char
y Int
1 String
ys
          groupRange Char
x Int
n [] = if Int
n Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<=Int
3 then Int -> ShowS
forall a. Int -> [a] -> [a]
take Int
n [Char
x..]
                              else Char
xChar -> ShowS
forall a. a -> [a] -> [a]
:Char
'-'Char -> ShowS
forall a. a -> [a] -> [a]
:(Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Int
forall a. Enum a => a -> a
pred Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
+Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
x))Char -> ShowS
forall a. a -> [a] -> [a]
:[]

-- | Content of @[: :]@, e.g. @"alnum"@ for @[:alnum:]@.
newtype PatternSetCharacterClass   = PatternSetCharacterClass   {PatternSetCharacterClass -> String
unSCC::String}
  deriving (PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
(PatternSetCharacterClass -> PatternSetCharacterClass -> Bool)
-> (PatternSetCharacterClass -> PatternSetCharacterClass -> Bool)
-> Eq PatternSetCharacterClass
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
$c/= :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
== :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
$c== :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
Eq,Eq PatternSetCharacterClass
Eq PatternSetCharacterClass
-> (PatternSetCharacterClass
    -> PatternSetCharacterClass -> Ordering)
-> (PatternSetCharacterClass -> PatternSetCharacterClass -> Bool)
-> (PatternSetCharacterClass -> PatternSetCharacterClass -> Bool)
-> (PatternSetCharacterClass -> PatternSetCharacterClass -> Bool)
-> (PatternSetCharacterClass -> PatternSetCharacterClass -> Bool)
-> (PatternSetCharacterClass
    -> PatternSetCharacterClass -> PatternSetCharacterClass)
-> (PatternSetCharacterClass
    -> PatternSetCharacterClass -> PatternSetCharacterClass)
-> Ord PatternSetCharacterClass
PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
PatternSetCharacterClass -> PatternSetCharacterClass -> Ordering
PatternSetCharacterClass
-> PatternSetCharacterClass -> PatternSetCharacterClass
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: PatternSetCharacterClass
-> PatternSetCharacterClass -> PatternSetCharacterClass
$cmin :: PatternSetCharacterClass
-> PatternSetCharacterClass -> PatternSetCharacterClass
max :: PatternSetCharacterClass
-> PatternSetCharacterClass -> PatternSetCharacterClass
$cmax :: PatternSetCharacterClass
-> PatternSetCharacterClass -> PatternSetCharacterClass
>= :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
$c>= :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
> :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
$c> :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
<= :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
$c<= :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
< :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
$c< :: PatternSetCharacterClass -> PatternSetCharacterClass -> Bool
compare :: PatternSetCharacterClass -> PatternSetCharacterClass -> Ordering
$ccompare :: PatternSetCharacterClass -> PatternSetCharacterClass -> Ordering
$cp1Ord :: Eq PatternSetCharacterClass
Ord)

-- | Content of @[. .]@, e.g. @"ch"@ for @[.ch.]@.
newtype PatternSetCollatingElement = PatternSetCollatingElement {PatternSetCollatingElement -> String
unSCE::String}
  deriving (PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
(PatternSetCollatingElement -> PatternSetCollatingElement -> Bool)
-> (PatternSetCollatingElement
    -> PatternSetCollatingElement -> Bool)
-> Eq PatternSetCollatingElement
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
$c/= :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
== :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
$c== :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
Eq,Eq PatternSetCollatingElement
Eq PatternSetCollatingElement
-> (PatternSetCollatingElement
    -> PatternSetCollatingElement -> Ordering)
-> (PatternSetCollatingElement
    -> PatternSetCollatingElement -> Bool)
-> (PatternSetCollatingElement
    -> PatternSetCollatingElement -> Bool)
-> (PatternSetCollatingElement
    -> PatternSetCollatingElement -> Bool)
-> (PatternSetCollatingElement
    -> PatternSetCollatingElement -> Bool)
-> (PatternSetCollatingElement
    -> PatternSetCollatingElement -> PatternSetCollatingElement)
-> (PatternSetCollatingElement
    -> PatternSetCollatingElement -> PatternSetCollatingElement)
-> Ord PatternSetCollatingElement
PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
PatternSetCollatingElement
-> PatternSetCollatingElement -> Ordering
PatternSetCollatingElement
-> PatternSetCollatingElement -> PatternSetCollatingElement
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: PatternSetCollatingElement
-> PatternSetCollatingElement -> PatternSetCollatingElement
$cmin :: PatternSetCollatingElement
-> PatternSetCollatingElement -> PatternSetCollatingElement
max :: PatternSetCollatingElement
-> PatternSetCollatingElement -> PatternSetCollatingElement
$cmax :: PatternSetCollatingElement
-> PatternSetCollatingElement -> PatternSetCollatingElement
>= :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
$c>= :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
> :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
$c> :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
<= :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
$c<= :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
< :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
$c< :: PatternSetCollatingElement -> PatternSetCollatingElement -> Bool
compare :: PatternSetCollatingElement
-> PatternSetCollatingElement -> Ordering
$ccompare :: PatternSetCollatingElement
-> PatternSetCollatingElement -> Ordering
$cp1Ord :: Eq PatternSetCollatingElement
Ord)

-- | Content of @[= =]@, e.g. @"a"@ for @[=a=]@.
newtype PatternSetEquivalenceClass = PatternSetEquivalenceClass {PatternSetEquivalenceClass -> String
unSEC::String}
  deriving (PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
(PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool)
-> (PatternSetEquivalenceClass
    -> PatternSetEquivalenceClass -> Bool)
-> Eq PatternSetEquivalenceClass
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
$c/= :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
== :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
$c== :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
Eq,Eq PatternSetEquivalenceClass
Eq PatternSetEquivalenceClass
-> (PatternSetEquivalenceClass
    -> PatternSetEquivalenceClass -> Ordering)
-> (PatternSetEquivalenceClass
    -> PatternSetEquivalenceClass -> Bool)
-> (PatternSetEquivalenceClass
    -> PatternSetEquivalenceClass -> Bool)
-> (PatternSetEquivalenceClass
    -> PatternSetEquivalenceClass -> Bool)
-> (PatternSetEquivalenceClass
    -> PatternSetEquivalenceClass -> Bool)
-> (PatternSetEquivalenceClass
    -> PatternSetEquivalenceClass -> PatternSetEquivalenceClass)
-> (PatternSetEquivalenceClass
    -> PatternSetEquivalenceClass -> PatternSetEquivalenceClass)
-> Ord PatternSetEquivalenceClass
PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
PatternSetEquivalenceClass
-> PatternSetEquivalenceClass -> Ordering
PatternSetEquivalenceClass
-> PatternSetEquivalenceClass -> PatternSetEquivalenceClass
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: PatternSetEquivalenceClass
-> PatternSetEquivalenceClass -> PatternSetEquivalenceClass
$cmin :: PatternSetEquivalenceClass
-> PatternSetEquivalenceClass -> PatternSetEquivalenceClass
max :: PatternSetEquivalenceClass
-> PatternSetEquivalenceClass -> PatternSetEquivalenceClass
$cmax :: PatternSetEquivalenceClass
-> PatternSetEquivalenceClass -> PatternSetEquivalenceClass
>= :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
$c>= :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
> :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
$c> :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
<= :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
$c<= :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
< :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
$c< :: PatternSetEquivalenceClass -> PatternSetEquivalenceClass -> Bool
compare :: PatternSetEquivalenceClass
-> PatternSetEquivalenceClass -> Ordering
$ccompare :: PatternSetEquivalenceClass
-> PatternSetEquivalenceClass -> Ordering
$cp1Ord :: Eq PatternSetEquivalenceClass
Ord)

-- | Hand-rolled implementation, giving textual rather than Haskell representation.
instance Show PatternSetCharacterClass where
  showsPrec :: Int -> PatternSetCharacterClass -> ShowS
showsPrec Int
_ PatternSetCharacterClass
p = Char -> ShowS
showChar Char
'[' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
':' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
forall a. Show a => a -> ShowS
shows (PatternSetCharacterClass -> String
unSCC PatternSetCharacterClass
p) ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
':' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
']'

-- | Hand-rolled implementation, giving textual rather than Haskell representation.
instance Show PatternSetCollatingElement where
  showsPrec :: Int -> PatternSetCollatingElement -> ShowS
showsPrec Int
_ PatternSetCollatingElement
p = Char -> ShowS
showChar Char
'[' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
'.' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
forall a. Show a => a -> ShowS
shows (PatternSetCollatingElement -> String
unSCE PatternSetCollatingElement
p) ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
'.' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
']'

-- | Hand-rolled implementation, giving textual rather than Haskell representation.
instance Show PatternSetEquivalenceClass where
  showsPrec :: Int -> PatternSetEquivalenceClass -> ShowS
showsPrec Int
_ PatternSetEquivalenceClass
p = Char -> ShowS
showChar Char
'[' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
'=' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
forall a. Show a => a -> ShowS
shows (PatternSetEquivalenceClass -> String
unSEC PatternSetEquivalenceClass
p) ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
'=' ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> ShowS
showChar Char
']'

-- | @decodePatternSet@ cannot handle collating element and treats
-- equivalence classes as just their definition and nothing more.
--
-- @since 1.3.2
decodePatternSet :: PatternSet -> Set Char
decodePatternSet :: PatternSet -> Set Char
decodePatternSet (PatternSet Maybe (Set Char)
msc Maybe (Set PatternSetCharacterClass)
mscc Maybe (Set PatternSetCollatingElement)
_ Maybe (Set PatternSetEquivalenceClass)
msec) =
  let baseMSC :: Set Char
baseMSC = Set Char -> (Set Char -> Set Char) -> Maybe (Set Char) -> Set Char
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Set Char
forall a. Set a
Set.empty Set Char -> Set Char
forall a. a -> a
id Maybe (Set Char)
msc
      withMSCC :: Set Char
withMSCC = (Set Char -> Char -> Set Char) -> Set Char -> String -> Set Char
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl ((Char -> Set Char -> Set Char) -> Set Char -> Char -> Set Char
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> Set Char -> Set Char
forall a. Ord a => a -> Set a -> Set a
Set.insert) Set Char
baseMSC  (String
-> (Set PatternSetCharacterClass -> String)
-> Maybe (Set PatternSetCharacterClass)
-> String
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] ((PatternSetCharacterClass -> String)
-> [PatternSetCharacterClass] -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap PatternSetCharacterClass -> String
decodeCharacterClass ([PatternSetCharacterClass] -> String)
-> (Set PatternSetCharacterClass -> [PatternSetCharacterClass])
-> Set PatternSetCharacterClass
-> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set PatternSetCharacterClass -> [PatternSetCharacterClass]
forall a. Set a -> [a]
Set.toAscList) Maybe (Set PatternSetCharacterClass)
mscc)
      withMSEC :: Set Char
withMSEC = (Set Char -> Char -> Set Char) -> Set Char -> String -> Set Char
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl ((Char -> Set Char -> Set Char) -> Set Char -> Char -> Set Char
forall a b c. (a -> b -> c) -> b -> a -> c
flip Char -> Set Char -> Set Char
forall a. Ord a => a -> Set a -> Set a
Set.insert) Set Char
withMSCC (String
-> (Set PatternSetEquivalenceClass -> String)
-> Maybe (Set PatternSetEquivalenceClass)
-> String
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] ((PatternSetEquivalenceClass -> String)
-> [PatternSetEquivalenceClass] -> String
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap PatternSetEquivalenceClass -> String
unSEC ([PatternSetEquivalenceClass] -> String)
-> (Set PatternSetEquivalenceClass -> [PatternSetEquivalenceClass])
-> Set PatternSetEquivalenceClass
-> String
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Set PatternSetEquivalenceClass -> [PatternSetEquivalenceClass]
forall a. Set a -> [a]
Set.toAscList) Maybe (Set PatternSetEquivalenceClass)
msec)
  in Set Char
withMSEC

-- | This returns the strictly ascending list of characters
-- represented by @[: :]@ POSIX character classes.
-- Unrecognized class names return an empty string.
--
-- @since 1.3.2
decodeCharacterClass :: PatternSetCharacterClass -> String
decodeCharacterClass :: PatternSetCharacterClass -> String
decodeCharacterClass (PatternSetCharacterClass String
s) =
  case String
s of
    String
"alnum"  -> [Char
'0'..Char
'9']String -> ShowS
forall a. [a] -> [a] -> [a]
++[Char
'A'..Char
'Z']String -> ShowS
forall a. [a] -> [a] -> [a]
++[Char
'a'..Char
'z']
    String
"digit"  -> [Char
'0'..Char
'9']
    String
"punct"  -> [Char
'\33'..Char
'\47']String -> ShowS
forall a. [a] -> [a] -> [a]
++[Char
'\58'..Char
'\64']String -> ShowS
forall a. [a] -> [a] -> [a]
++[Char
'\91'..Char
'\96']String -> ShowS
forall a. [a] -> [a] -> [a]
++[Char
'\123'..Char
'\126']
    String
"alpha"  -> [Char
'A'..Char
'Z']String -> ShowS
forall a. [a] -> [a] -> [a]
++[Char
'a'..Char
'z']
    String
"graph"  -> [Char
'\41'..Char
'\126']
    String
"space"  -> String
"\t\n\v\f\r "
    String
"blank"  -> String
"\t "
    String
"lower"  -> [Char
'a'..Char
'z']
    String
"upper"  -> [Char
'A'..Char
'Z']
    String
"cntrl"  -> [Char
'\0'..Char
'\31']String -> ShowS
forall a. [a] -> [a] -> [a]
++String
"\127" -- with NUL
    String
"print"  -> [Char
'\32'..Char
'\126']
    String
"xdigit" -> [Char
'0'..Char
'9']String -> ShowS
forall a. [a] -> [a] -> [a]
++[Char
'A'..Char
'F']String -> ShowS
forall a. [a] -> [a] -> [a]
++[Char
'a'..Char
'f']
    String
"word"   -> [Char
'0'..Char
'9']String -> ShowS
forall a. [a] -> [a] -> [a]
++[Char
'A'..Char
'Z']String -> ShowS
forall a. [a] -> [a] -> [a]
++String
"_"String -> ShowS
forall a. [a] -> [a] -> [a]
++[Char
'a'..Char
'z']
    String
_ -> []

-- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- == -- ==

-- | Do the transformation and simplification in a single traversal.
-- This removes the 'PPlus', 'PQuest', and 'PBound' values, changing to 'POr'
-- and 'PEmpty' and 'PStar'.  For some 'PBound' values it adds
-- 'PNonEmpty' and 'PNonCapture' semantic marker.  It also simplifies to
-- flatten out nested 'POr' and 'PConcat' instances and eliminate some
-- unneeded 'PEmpty' values.
starTrans :: Pattern -> Pattern
starTrans :: Pattern -> Pattern
starTrans = (Pattern -> Pattern) -> Pattern -> Pattern
dfsPattern (Pattern -> Pattern
simplify' (Pattern -> Pattern) -> (Pattern -> Pattern) -> Pattern -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Pattern -> Pattern
starTrans')

-- | Apply a 'Pattern' transformation function depth first.
dfsPattern :: (Pattern -> Pattern)  -- ^ The transformation function.
           -> Pattern               -- ^ The 'Pattern' to transform.
           -> Pattern               -- ^ The transformed 'Pattern'.
dfsPattern :: (Pattern -> Pattern) -> Pattern -> Pattern
dfsPattern Pattern -> Pattern
f = Pattern -> Pattern
dfs
 where unary :: (Pattern -> Pattern) -> Pattern -> Pattern
unary Pattern -> Pattern
c = Pattern -> Pattern
f (Pattern -> Pattern) -> (Pattern -> Pattern) -> Pattern -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Pattern -> Pattern
c (Pattern -> Pattern) -> (Pattern -> Pattern) -> Pattern -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Pattern -> Pattern
dfs
       dfs :: Pattern -> Pattern
dfs Pattern
pattern = case Pattern
pattern of
                       POr [Pattern]
ps -> Pattern -> Pattern
f ([Pattern] -> Pattern
POr ((Pattern -> Pattern) -> [Pattern] -> [Pattern]
forall a b. (a -> b) -> [a] -> [b]
map Pattern -> Pattern
dfs [Pattern]
ps))
                       PConcat [Pattern]
ps -> Pattern -> Pattern
f ([Pattern] -> Pattern
PConcat ((Pattern -> Pattern) -> [Pattern] -> [Pattern]
forall a b. (a -> b) -> [a] -> [b]
map Pattern -> Pattern
dfs [Pattern]
ps))
                       PGroup Maybe Int
i Pattern
p -> (Pattern -> Pattern) -> Pattern -> Pattern
unary (Maybe Int -> Pattern -> Pattern
PGroup Maybe Int
i) Pattern
p
                       PQuest Pattern
p -> (Pattern -> Pattern) -> Pattern -> Pattern
unary Pattern -> Pattern
PQuest Pattern
p
                       PPlus Pattern
p -> (Pattern -> Pattern) -> Pattern -> Pattern
unary Pattern -> Pattern
PPlus Pattern
p
                       PStar Bool
i Pattern
p -> (Pattern -> Pattern) -> Pattern -> Pattern
unary (Bool -> Pattern -> Pattern
PStar Bool
i) Pattern
p
                       PBound Int
i Maybe Int
mi Pattern
p -> (Pattern -> Pattern) -> Pattern -> Pattern
unary (Int -> Maybe Int -> Pattern -> Pattern
PBound Int
i Maybe Int
mi) Pattern
p
                       Pattern
_ -> Pattern -> Pattern
f Pattern
pattern

{- Replace by PNonCapture
unCapture = dfsPattern unCapture' where
  unCapture' (PGroup (Just _) p) = PGroup Nothing p
  unCapture' x = x
-}
reGroup :: Pattern -> Pattern
reGroup :: Pattern -> Pattern
reGroup p :: Pattern
p@(PConcat [Pattern]
xs) | Int
2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= [Pattern] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Pattern]
xs = Maybe Int -> Pattern -> Pattern
PGroup Maybe Int
forall a. Maybe a
Nothing Pattern
p
reGroup p :: Pattern
p@(POr [Pattern]
xs)     | Int
2 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= [Pattern] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Pattern]
xs = Maybe Int -> Pattern -> Pattern
PGroup Maybe Int
forall a. Maybe a
Nothing Pattern
p
reGroup Pattern
p = Pattern
p

starTrans' :: Pattern -> Pattern
starTrans' :: Pattern -> Pattern
starTrans' Pattern
pIn =
  case Pattern
pIn of -- We know that "p" has been simplified in each of these cases:
    PQuest Pattern
p -> [Pattern] -> Pattern
POr [Pattern
p,Pattern
PEmpty]

{- The PStar should not capture 0 characters on its first iteration,
   so set its mayFirstBeNull flag to False
 -}
    PPlus Pattern
p | Pattern -> Bool
canOnlyMatchNull Pattern
p -> Pattern
p
            | Bool
otherwise -> Pattern -> Pattern
asGroup (Pattern -> Pattern) -> Pattern -> Pattern
forall a b. (a -> b) -> a -> b
$ [Pattern] -> Pattern
PConcat [Pattern -> Pattern
reGroup Pattern
p,Bool -> Pattern -> Pattern
PStar Bool
False Pattern
p]

{- "An ERE matching a single character repeated by an '*' , '?' , or
   an interval expression shall not match a null expression unless
   this is the only match for the repetition or it is necessary to
   satisfy the exact or minimum number of occurrences for the interval
   expression."
 -}
{- p? is p|PEmpty which prefers even a 0-character match for p
   p{0,1} is p? is POr [p,PEmpty]
   p{0,2} is (pp?)? NOT p?p?
   p{0,3} is (p(pp?)?)?
   p{1,2} is like pp{0,1} is like pp? but see below
   p{2,5} is ppp{0,3} is pp(p(pp?)?)?

   But this is not always right.  Because if the second use of p in
   p?p? matches 0 characters then the perhaps non 0 character match of
   the first p is overwritten.

   We need a new operation "p!" that means "p?" unless "p" match 0
   characters, in which case skip p as if it failed in "p?".  Thus
   when p cannot accept 0 characters p! and p? are equivalent.  And
   when p can only match 0 characters p! is PEmpty.  So for
   simplicity, only use ! when p can match 0 characters but not only 0
   characters.

   Call this (PNonEmpty p) in the Pattern type.
   p! is PNonEmpty p is POr [PEmpty,p]
   IS THIS TRUE?  Use QuickCheck?

   Note that if p cannot match 0 characters then p! is p? and vice versa

   The p{0,1} is still always p? and POr [p,PEmpty]
   Now p{0,2} means p?p! or (pp!)? and p{0,3} means (p(pp!)!)? or p?p!p!
   Equivalently p?p! and p?p!p!
   And p{2,2} is p'p and p{3,3} is p'p'p and p{4} is p'p'p'p
   The p{1,2} is pp! and p{1,3} is pp!p! or p(pp!)!
   And p{2,4} means p'pp!p! and p{3,6} is p'p'pp!p!p! or p'p'p(p(pp!)!)!

   But this second form still has a problem: the (pp!)! can have the first
   p match 0 and the second p match non-zero. This showed up for (.|$){1,3}
   since ($.!)! should not be a valid path but altered the qt_win commands.

   Thus only p'p'pp!p!p! has the right semantics.  For completeness:

   if p can only match only 0 characters then the cases are
   p{0,0} is (), p{0,_} = p?, p{_,_} is p

   if p can match 0 or non-zero characters then cases are
   p{0,0} is (), p{0,1} is (p)?, p{0,2} is (pp!)?, p{0,3} is (pp!p!)?
   p{1,1} is p, p{1,2} is pp!, p{1,3} is pp!p!, p{1,4} is pp!p!p!
   p{2,2} is p'p,
   p{2,3} is p'pp!,
   p{2,4} is p'pp!p! or p'p(pp!)!
   p{2,5} is p'pp!p!p! or p'p(p(pp!)!)!
   p{3,3} is p'p'p, p{3,4} is p'p'pp!, p{3,5} is p'p'pp!p!, p{3,6} is p'p'pp!p!p!

   if p can only match 1 or more characters then cases are
   p{0,0} is ()
   p{0,1} is p?, p{0,2} is (pp?)?, p{0,3} is (p(pp?)?)?, p{0,4} is (pp{0,3})?
   p{1,1} is p, p{1,j} is pp{0,pred j}
   p{2,2} is p'p, p{2,3} is p'pp?, p{2,4} is p'p(pp?)?, p{2,5} = p'p{1,4} = p'(pp{0,3})
   p{3,3} is p'p'p, p{3,4} is p'p'pp?, p{3,5} is p'p'p(pp?)?, p{3,6} is

   And by this logic, the PStar False is really p*!  So p{0,} is p*
   and p{1,} is pp*! and p{2,} is p'pp*! and p{3,} is p'p'pp*!

   The (nonEmpty' p) below is the only way PNonEmpty is introduced
   into the Pattern.  It is always preceded by p inside a PConcat
   list.  The p involved never simplifies to PEmpty.  Thus it is
   impossible to have PNonEmpty directly nested, i.e. (PNonEmpty
   (PNonEmpty _)) never occurs even after simplifications.

   The (nonCapture' p) below is the only way PNonCapture is
   introduced into the Pattern. It is always followed by p inside a
   PConcat list.

-}
-- Easy cases
    PBound Int
i Maybe Int
_        Pattern
_ | Int
iInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<Int
0 -> Pattern
PEmpty  -- impossibly malformed
    PBound Int
i (Just Int
j) Pattern
_ | Int
iInt -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>Int
j -> Pattern
PEmpty  -- impossibly malformed
    PBound Int
_ (Just Int
0) Pattern
_ -> Pattern
PEmpty
-- Medium cases
    PBound Int
0 Maybe Int
Nothing  Pattern
p | Pattern -> Bool
canOnlyMatchNull Pattern
p -> Pattern -> Pattern
quest Pattern
p
                        | Bool
otherwise -> Bool -> Pattern -> Pattern
PStar Bool
True Pattern
p
    PBound Int
0 (Just Int
1) Pattern
p -> Pattern -> Pattern
quest Pattern
p
-- Hard cases
    PBound Int
i Maybe Int
Nothing  Pattern
p | Pattern -> Bool
canOnlyMatchNull Pattern
p -> Pattern
p
                        | Bool
otherwise -> Pattern -> Pattern
asGroup (Pattern -> Pattern)
-> ([Pattern] -> Pattern) -> [Pattern] -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Pattern] -> Pattern
PConcat ([Pattern] -> Pattern) -> [Pattern] -> Pattern
forall a b. (a -> b) -> a -> b
$ ([Pattern] -> [Pattern]) -> Int -> [Pattern] -> [Pattern]
forall a. (a -> a) -> Int -> a -> a
apply (Pattern
nc'pPattern -> [Pattern] -> [Pattern]
forall a. a -> [a] -> [a]
:) (Int -> Int
forall a. Enum a => a -> a
pred Int
i) [Pattern -> Pattern
reGroup Pattern
p,Bool -> Pattern -> Pattern
PStar Bool
False Pattern
p]
      where nc'p :: Pattern
nc'p = Pattern -> Pattern
nonCapture' Pattern
p
    PBound Int
0 (Just Int
j) Pattern
p | Pattern -> Bool
canOnlyMatchNull Pattern
p -> Pattern -> Pattern
quest Pattern
p
                        -- The first operation is quest NOT nonEmpty. This can be tested with
                        -- "a\nb" "((^)?|b){0,3}" and "a\nb" "((^)|b){0,3}"
                        | Bool
otherwise -> Pattern -> Pattern
quest (Pattern -> Pattern) -> (Pattern -> Pattern) -> Pattern -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Pattern -> Pattern -> Pattern
concat' Pattern
p) (Pattern -> Pattern) -> Pattern -> Pattern
forall a b. (a -> b) -> a -> b
$
                                        (Pattern -> Pattern) -> Int -> Pattern -> Pattern
forall a. (a -> a) -> Int -> a -> a
apply (Pattern -> Pattern
nonEmpty' (Pattern -> Pattern) -> (Pattern -> Pattern) -> Pattern -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Pattern -> Pattern -> Pattern
concat' Pattern
p)) (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
2) (Pattern -> Pattern
nonEmpty' Pattern
p)
{- 0.99.6 remove
| cannotMatchNull p -> apply (quest' . (concat' p)) (pred j) (quest' p)
| otherwise -> POr [ simplify' (PConcat (p : replicate (pred j) (nonEmpty' p))) , PEmpty ]
-}
{- 0.99.6 add, 0.99.7 remove
    PBound i (Just j) p | canOnlyMatchNull p -> p
                        | i == j -> PConcat $ apply (p':) (pred i) [p]
                        | otherwise -> PConcat $ apply (p':) (pred i)
                                        [p,apply (nonEmpty' . (concat' p)) (j-i-1) (nonEmpty' p) ]
      where p' = nonCapture' p
-}
{- 0.99.7 add -}
    PBound Int
i (Just Int
j) Pattern
p | Pattern -> Bool
canOnlyMatchNull Pattern
p -> Pattern
p
                        | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
j -> Pattern -> Pattern
asGroup (Pattern -> Pattern)
-> ([Pattern] -> Pattern) -> [Pattern] -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Pattern] -> Pattern
PConcat ([Pattern] -> Pattern) -> [Pattern] -> Pattern
forall a b. (a -> b) -> a -> b
$ ([Pattern] -> [Pattern]) -> Int -> [Pattern] -> [Pattern]
forall a. (a -> a) -> Int -> a -> a
apply (Pattern
nc'pPattern -> [Pattern] -> [Pattern]
forall a. a -> [a] -> [a]
:) (Int -> Int
forall a. Enum a => a -> a
pred Int
i) [Pattern -> Pattern
reGroup Pattern
p]
                        | Bool
otherwise -> Pattern -> Pattern
asGroup (Pattern -> Pattern)
-> ([Pattern] -> Pattern) -> [Pattern] -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Pattern] -> Pattern
PConcat ([Pattern] -> Pattern) -> [Pattern] -> Pattern
forall a b. (a -> b) -> a -> b
$ ([Pattern] -> [Pattern]) -> Int -> [Pattern] -> [Pattern]
forall a. (a -> a) -> Int -> a -> a
apply (Pattern
nc'pPattern -> [Pattern] -> [Pattern]
forall a. a -> [a] -> [a]
:) (Int -> Int
forall a. Enum a => a -> a
pred Int
i)
                                        [Pattern -> Pattern
reGroup Pattern
p,(Pattern -> Pattern) -> Int -> Pattern -> Pattern
forall a. (a -> a) -> Int -> a -> a
apply (Pattern -> Pattern
nonEmpty' (Pattern -> Pattern) -> (Pattern -> Pattern) -> Pattern -> Pattern
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Pattern -> Pattern -> Pattern
concat' Pattern
p)) (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1) (Pattern
ne'p) ]
      where nc'p :: Pattern
nc'p = Pattern -> Pattern
nonCapture' Pattern
p
            ne'p :: Pattern
ne'p = Pattern -> Pattern
nonEmpty' Pattern
p
{- 0.99.6
| cannotMatchNull p -> PConcat $ apply (p':) (pred i) $ (p:) $
  [apply (quest' . (concat' p)) (pred (j-i)) (quest' p)]
| otherwise -> PConcat $ (replicate (pred i) p') ++ p : (replicate (j-i) (nonEmpty' p))
-}
    PStar Bool
mayFirstBeNull Pattern
p | Pattern -> Bool
canOnlyMatchNull Pattern
p -> if Bool
mayFirstBeNull then Pattern -> Pattern
quest Pattern
p
                                                                    else Pattern
PEmpty
                           | Bool
otherwise -> Pattern
pass
    -- Left intact
    Pattern
PEmpty -> Pattern
pass
    PGroup {} -> Pattern
pass
    POr {} -> Pattern
pass
    PConcat {} -> Pattern
pass
    PCarat {} -> Pattern
pass
    PDollar {} -> Pattern
pass
    PDot {} -> Pattern
pass
    PAny {} -> Pattern
pass
    PAnyNot {} -> Pattern
pass
    PEscape {} -> Pattern
pass
    PChar {} -> Pattern
pass
    PNonCapture {} -> Pattern
pass
    PNonEmpty {} -> Pattern
pass -- TODO : remove PNonEmpty from program
  where
    quest :: Pattern -> Pattern
quest = (\ Pattern
p -> [Pattern] -> Pattern
POr [Pattern
p,Pattern
PEmpty])  -- require p to have been simplified
--    quest' = (\ p -> simplify' $ POr [p,PEmpty])  -- require p to have been simplified
    concat' :: Pattern -> Pattern -> Pattern
concat' Pattern
a Pattern
b = Pattern -> Pattern
simplify' (Pattern -> Pattern) -> Pattern -> Pattern
forall a b. (a -> b) -> a -> b
$ [Pattern] -> Pattern
PConcat [Pattern -> Pattern
reGroup Pattern
a,Pattern -> Pattern
reGroup Pattern
b]      -- require a and b to have been simplified
    nonEmpty' :: Pattern -> Pattern
nonEmpty' = (\ Pattern
p -> Pattern -> Pattern
simplify' (Pattern -> Pattern) -> Pattern -> Pattern
forall a b. (a -> b) -> a -> b
$ [Pattern] -> Pattern
POr [Pattern
PEmpty,Pattern
p]) -- 2009-01-19 : this was PNonEmpty
    nonCapture' :: Pattern -> Pattern
nonCapture' = Pattern -> Pattern
PNonCapture
    apply :: (a -> a) -> Int -> a -> a
apply a -> a
f Int
n a
x = ((a -> a) -> a -> a) -> a -> [a -> a] -> a
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
($) a
x (Int -> (a -> a) -> [a -> a]
forall a. Int -> a -> [a]
replicate Int
n a -> a
f) -- function f applied n times to x : f^n(x)
    asGroup :: Pattern -> Pattern
asGroup Pattern
p = Maybe Int -> Pattern -> Pattern
PGroup Maybe Int
forall a. Maybe a
Nothing (Pattern -> Pattern
simplify' Pattern
p)
    pass :: Pattern
pass = Pattern
pIn

-- | Function to transform a pattern into an equivalent, but less
-- redundant form.  Nested 'POr' and 'PConcat' are flattened. 'PEmpty'
-- is propagated.
simplify' :: Pattern -> Pattern
simplify' :: Pattern -> Pattern
simplify' x :: Pattern
x@(POr [Pattern]
_) =
  let ps' :: [Pattern]
ps' = case (Pattern -> Bool) -> [Pattern] -> ([Pattern], [Pattern])
forall a. (a -> Bool) -> [a] -> ([a], [a])
span Pattern -> Bool
notPEmpty (Pattern -> [Pattern]
flatten Pattern
x) of
              ([Pattern]
notEmpty,[]) -> [Pattern]
notEmpty
              ([Pattern]
notEmpty,Pattern
_:[Pattern]
rest) -> [Pattern]
notEmpty [Pattern] -> [Pattern] -> [Pattern]
forall a. [a] -> [a] -> [a]
++ (Pattern
PEmptyPattern -> [Pattern] -> [Pattern]
forall a. a -> [a] -> [a]
:(Pattern -> Bool) -> [Pattern] -> [Pattern]
forall a. (a -> Bool) -> [a] -> [a]
filter Pattern -> Bool
notPEmpty [Pattern]
rest) -- keep 1st PEmpty only
  in case [Pattern]
ps' of
       [] -> Pattern
PEmpty
       [Pattern
p] -> Pattern
p
       [Pattern]
_ -> [Pattern] -> Pattern
POr [Pattern]
ps'
simplify' x :: Pattern
x@(PConcat [Pattern]
_) =
  let ps' :: [Pattern]
ps' = (Pattern -> Bool) -> [Pattern] -> [Pattern]
forall a. (a -> Bool) -> [a] -> [a]
filter Pattern -> Bool
notPEmpty (Pattern -> [Pattern]
flatten Pattern
x)
  in case [Pattern]
ps' of
       [] -> Pattern
PEmpty
       [Pattern
p] -> Pattern
p
       [Pattern]
_ -> [Pattern] -> Pattern
PConcat [Pattern]
ps' -- PConcat ps'
simplify' (PStar Bool
_ Pattern
PEmpty) = Pattern
PEmpty
simplify' (PNonCapture Pattern
PEmpty) = Pattern
PEmpty -- 2009, perhaps useful
--simplify' (PNonEmpty PEmpty) = err "simplify' (PNonEmpty PEmpty) = should be Impossible!" -- 2009
simplify' Pattern
other = Pattern
other

-- | Function to flatten nested 'POr' or nested 'PConcat' applicataions.
flatten :: Pattern -> [Pattern]
flatten :: Pattern -> [Pattern]
flatten (POr [Pattern]
ps) = ((Pattern -> [Pattern]) -> [Pattern] -> [Pattern]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\Pattern
x -> case Pattern
x of
                                       POr [Pattern]
ps' -> [Pattern]
ps'
                                       Pattern
p -> [Pattern
p]) [Pattern]
ps)
flatten (PConcat [Pattern]
ps) = ((Pattern -> [Pattern]) -> [Pattern] -> [Pattern]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (\Pattern
x -> case Pattern
x of
                                           PConcat [Pattern]
ps' -> [Pattern]
ps'
                                           Pattern
p -> [Pattern
p]) [Pattern]
ps)
flatten Pattern
_ = String -> [Pattern]
forall a. String -> a
err String
"flatten can only be applied to POr or PConcat"

notPEmpty :: Pattern -> Bool
notPEmpty :: Pattern -> Bool
notPEmpty Pattern
PEmpty = Bool
False
notPEmpty Pattern
_      = Bool
True

-- | Determines if 'Pattern' will fail or accept @[]@ and never accept any
-- characters. Treat 'PCarat' and 'PDollar' as @True@.
canOnlyMatchNull :: Pattern -> Bool
canOnlyMatchNull :: Pattern -> Bool
canOnlyMatchNull Pattern
pIn =
  case Pattern
pIn of
    Pattern
PEmpty -> Bool
True
    PGroup Maybe Int
_ Pattern
p -> Pattern -> Bool
canOnlyMatchNull Pattern
p
    POr [Pattern]
ps -> (Pattern -> Bool) -> [Pattern] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Pattern -> Bool
canOnlyMatchNull [Pattern]
ps
    PConcat [Pattern]
ps -> (Pattern -> Bool) -> [Pattern] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Pattern -> Bool
canOnlyMatchNull [Pattern]
ps
    PQuest Pattern
p -> Pattern -> Bool
canOnlyMatchNull Pattern
p
    PPlus Pattern
p -> Pattern -> Bool
canOnlyMatchNull Pattern
p
    PStar Bool
_ Pattern
p -> Pattern -> Bool
canOnlyMatchNull Pattern
p
    PBound Int
_ (Just Int
0) Pattern
_ -> Bool
True
    PBound Int
_ Maybe Int
_ Pattern
p -> Pattern -> Bool
canOnlyMatchNull Pattern
p
    PCarat DoPa
_ -> Bool
True
    PDollar DoPa
_ -> Bool
True
    PNonCapture Pattern
p -> Pattern -> Bool
canOnlyMatchNull Pattern
p
--    PNonEmpty p -> canOnlyMatchNull p -- like PQuest
    Pattern
_ ->Bool
False

{-

-- | If 'cannotMatchNull' returns 'True' then it is known that the
-- 'Pattern' will never accept an empty string.  If 'cannotMatchNull'
-- returns 'False' then it is possible but not definite that the
-- 'Pattern' could accept an empty string.
cannotMatchNull :: Pattern -> Bool
cannotMatchNull pIn =
  case pIn of
    PEmpty -> False
    PGroup _ p -> cannotMatchNull p
    POr [] -> False
    POr ps -> all cannotMatchNull ps
    PConcat [] -> False
    PConcat ps -> any cannotMatchNull ps
    PQuest _ -> False
    PPlus p -> cannotMatchNull p
    PStar {} -> False
    PBound 0 _ _ -> False
    PBound _ _ p -> cannotMatchNull p
    PCarat _ -> False
    PDollar _ -> False
    PNonCapture p -> cannotMatchNull p
--    PNonEmpty _ -> False -- like PQuest
    _ -> True
-}