-----------------------------------------------------------------------------
-- |
-- Module    : Data.SBV.RegExp
-- Copyright : (c) Levent Erkok
-- License   : BSD3
-- Maintainer: erkokl@gmail.com
-- Stability : experimental
--
-- A collection of regular-expression related utilities. The recommended
-- workflow is to import this module qualified as the names of the functions
-- are specifically chosen to be common identifiers. Also, it is recommended
-- you use the @OverloadedStrings@ extension to allow literal strings to be
-- used as symbolic-strings and regular-expressions when working with
-- this module.
-----------------------------------------------------------------------------

{-# LANGUAGE FlexibleInstances   #-}
{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE Rank2Types          #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeApplications    #-}

{-# OPTIONS_GHC -Wall -Werror #-}

module Data.SBV.RegExp (
        -- * Regular expressions
        RegExp(..)
        -- * Matching
        -- $matching
        , RegExpMatchable(..)
        -- * Constructing regular expressions
        -- ** Literals
        , exactly
        -- ** A class of characters
        , oneOf
        -- ** Spaces
        , newline, whiteSpaceNoNewLine, whiteSpace
        -- ** Separators
        , tab, punctuation
        -- ** Letters
        , asciiLetter, asciiLower, asciiUpper
        -- ** Digits
        , digit, octDigit, hexDigit
        -- ** Numbers
        , decimal, octal, hexadecimal, floating
        -- ** Identifiers
        , identifier
        ) where

import Prelude hiding (length, take, elem, notElem, head)

import qualified Prelude   as P
import qualified Data.List as L

import Data.SBV.Core.Data
import Data.SBV.Core.Model () -- instances only

import Data.SBV.String
import qualified Data.Char as C

import Data.Proxy

-- For testing only
import Data.SBV.Char

-- For doctest use only
--
-- $setup
-- >>> import Prelude hiding (length, take, elem, notElem, head)
-- >>> import Data.SBV.Provers.Prover (prove, sat)
-- >>> import Data.SBV.Core.Model
-- >>> :set -XOverloadedStrings
-- >>> :set -XScopedTypeVariables

-- | Matchable class. Things we can match against a 'RegExp'.
--
-- For instance, you can generate valid-looking phone numbers like this:
--
-- >>> :set -XOverloadedStrings
-- >>> let dig09 = Range '0' '9'
-- >>> let dig19 = Range '1' '9'
-- >>> let pre   = dig19 * Loop 2 2 dig09
-- >>> let post  = dig19 * Loop 3 3 dig09
-- >>> let phone = pre * "-" * post
-- >>> sat $ \s -> (s :: SString) `match` phone
-- Satisfiable. Model:
--   s0 = "386-3868" :: String
class RegExpMatchable a where
   -- | @`match` s r@ checks whether @s@ is in the language generated by @r@.
   match :: a -> RegExp -> SBool

-- | Matching a character simply means the singleton string matches the regex.
instance RegExpMatchable SChar where
   match :: SChar -> RegExp -> SBool
match = SString -> RegExp -> SBool
forall a. RegExpMatchable a => a -> RegExp -> SBool
match (SString -> RegExp -> SBool)
-> (SChar -> SString) -> SChar -> RegExp -> SBool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. SChar -> SString
singleton

-- | Matching symbolic strings.
instance RegExpMatchable SString where
   match :: SString -> RegExp -> SBool
match SString
input RegExp
regExp = StrOp -> Maybe (String -> Bool) -> SString -> SBool
forall a b.
(SymVal a, SymVal b) =>
StrOp -> Maybe (a -> b) -> SBV a -> SBV b
lift1 (RegExp -> StrOp
StrInRe RegExp
regExp) ((String -> Bool) -> Maybe (String -> Bool)
forall a. a -> Maybe a
Just (RegExp -> (String -> Bool) -> String -> Bool
go RegExp
regExp String -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
P.null)) SString
input
     where -- This isn't super efficient, but it gets the job done.
           go :: RegExp -> (String -> Bool) -> String -> Bool
           go :: RegExp -> (String -> Bool) -> String -> Bool
go (Literal String
l)    String -> Bool
k String
s      = String
l String -> String -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`L.isPrefixOf` String
s Bool -> Bool -> Bool
&& String -> Bool
k (Int -> String -> String
forall a. Int -> [a] -> [a]
P.drop (String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
P.length String
l) String
s)
           go RegExp
All            String -> Bool
_ String
_      = Bool
True
           go RegExp
None           String -> Bool
_ String
_      = Bool
False
           go (Range Char
_ Char
_)    String -> Bool
_ []     = Bool
False
           go (Range Char
a Char
b)    String -> Bool
k (Char
c:String
cs) = Char
a Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
c Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
b Bool -> Bool -> Bool
&& String -> Bool
k String
cs
           go (Conc [])      String -> Bool
k String
s      = String -> Bool
k String
s
           go (Conc (RegExp
r:[RegExp]
rs))  String -> Bool
k String
s      = RegExp -> (String -> Bool) -> String -> Bool
go RegExp
r (RegExp -> (String -> Bool) -> String -> Bool
go ([RegExp] -> RegExp
Conc [RegExp]
rs) String -> Bool
k) String
s
           go (KStar RegExp
r)      String -> Bool
k String
s      = String -> Bool
k String
s Bool -> Bool -> Bool
|| RegExp -> (String -> Bool) -> String -> Bool
go RegExp
r (Int -> (String -> Bool) -> String -> Bool
forall (t :: * -> *) a.
Foldable t =>
Int -> (t a -> Bool) -> t a -> Bool
smaller (String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
P.length String
s) (RegExp -> (String -> Bool) -> String -> Bool
go (RegExp -> RegExp
KStar RegExp
r) String -> Bool
k)) String
s
           go (KPlus RegExp
r)      String -> Bool
k String
s      = RegExp -> (String -> Bool) -> String -> Bool
go ([RegExp] -> RegExp
Conc [RegExp
r, RegExp -> RegExp
KStar RegExp
r]) String -> Bool
k String
s
           go (Opt RegExp
r)        String -> Bool
k String
s      = String -> Bool
k String
s Bool -> Bool -> Bool
|| RegExp -> (String -> Bool) -> String -> Bool
go RegExp
r String -> Bool
k String
s
           go (Loop Int
i Int
j RegExp
r)   String -> Bool
k String
s      = RegExp -> (String -> Bool) -> String -> Bool
go ([RegExp] -> RegExp
Conc (Int -> RegExp -> [RegExp]
forall a. Int -> a -> [a]
replicate Int
i RegExp
r [RegExp] -> [RegExp] -> [RegExp]
forall a. [a] -> [a] -> [a]
++ Int -> RegExp -> [RegExp]
forall a. Int -> a -> [a]
replicate (Int
j Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
i) (RegExp -> RegExp
Opt RegExp
r))) String -> Bool
k String
s
           go (Union [])     String -> Bool
_ String
_      = Bool
False
           go (Union [RegExp
x])    String -> Bool
k String
s      = RegExp -> (String -> Bool) -> String -> Bool
go RegExp
x String -> Bool
k String
s
           go (Union (RegExp
x:[RegExp]
xs)) String -> Bool
k String
s      = RegExp -> (String -> Bool) -> String -> Bool
go RegExp
x String -> Bool
k String
s Bool -> Bool -> Bool
|| RegExp -> (String -> Bool) -> String -> Bool
go ([RegExp] -> RegExp
Union [RegExp]
xs) String -> Bool
k String
s
           go (Inter RegExp
a RegExp
b)    String -> Bool
k String
s      = RegExp -> (String -> Bool) -> String -> Bool
go RegExp
a String -> Bool
k String
s Bool -> Bool -> Bool
&& RegExp -> (String -> Bool) -> String -> Bool
go RegExp
b String -> Bool
k String
s

           -- In the KStar case, make sure the continuation is called with something
           -- smaller to avoid infinite recursion!
           smaller :: Int -> (t a -> Bool) -> t a -> Bool
smaller Int
orig t a -> Bool
k t a
inp = t a -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
P.length t a
inp Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
orig Bool -> Bool -> Bool
&& t a -> Bool
k t a
inp

-- | A literal regular-expression, matching the given string exactly. Note that
-- with @OverloadedStrings@ extension, you can simply use a Haskell
-- string to mean the same thing, so this function is rarely needed.
--
-- >>> prove $ \(s :: SString) -> s `match` exactly "LITERAL" .<=> s .== "LITERAL"
-- Q.E.D.
exactly :: String -> RegExp
exactly :: String -> RegExp
exactly = String -> RegExp
Literal

-- | Helper to define a character class.
--
-- >>> prove $ \(c :: SChar) -> c `match` oneOf "ABCD" .<=> sAny (c .==) (map literal "ABCD")
-- Q.E.D.
oneOf :: String -> RegExp
oneOf :: String -> RegExp
oneOf String
xs = [RegExp] -> RegExp
Union [String -> RegExp
exactly [Char
x] | Char
x <- String
xs]

-- | Recognize a newline. Also includes carriage-return and form-feed.
--
-- >>> newline
-- (re.union (str.to.re "\n") (str.to.re "\r") (str.to.re "\f"))
-- >>> prove $ \c -> c `match` newline .=> isSpaceL1 c
-- Q.E.D.
newline :: RegExp
newline :: RegExp
newline = String -> RegExp
oneOf String
"\n\r\f"

-- | Recognize a tab.
--
-- >>> tab
-- (str.to.re "\t")
-- >>> prove $ \c -> c `match` tab .=> c .== literal '\t'
-- Q.E.D.
tab :: RegExp
tab :: RegExp
tab = String -> RegExp
oneOf String
"\t"

-- | Lift a char function to a regular expression that recognizes it.
liftPredL1 :: (Char -> Bool) -> RegExp
liftPredL1 :: (Char -> Bool) -> RegExp
liftPredL1 Char -> Bool
predicate = String -> RegExp
oneOf (String -> RegExp) -> String -> RegExp
forall a b. (a -> b) -> a -> b
$ (Char -> Bool) -> String -> String
forall a. (a -> Bool) -> [a] -> [a]
filter Char -> Bool
predicate ((Int -> Char) -> [Int] -> String
forall a b. (a -> b) -> [a] -> [b]
map Int -> Char
C.chr [Int
0 .. Int
255])

-- | Recognize white-space, but without a new line.
--
-- >>> prove $ \c -> c `match` whiteSpaceNoNewLine .=> c `match` whiteSpace .&& c ./= literal '\n'
-- Q.E.D.
whiteSpaceNoNewLine :: RegExp
whiteSpaceNoNewLine :: RegExp
whiteSpaceNoNewLine = (Char -> Bool) -> RegExp
liftPredL1 (\Char
c -> Char -> Bool
C.isSpace Char
c Bool -> Bool -> Bool
&& Char
c Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`P.notElem` (String
"\n" :: String))

-- | Recognize white space.
--
-- >>> prove $ \c -> c `match` whiteSpace .=> isSpaceL1 c
-- Q.E.D.
whiteSpace :: RegExp
whiteSpace :: RegExp
whiteSpace = (Char -> Bool) -> RegExp
liftPredL1 Char -> Bool
C.isSpace

-- | Recognize a punctuation character.
--
-- >>> prove $ \c -> c `match` punctuation .=> isPunctuationL1 c
-- Q.E.D.
punctuation :: RegExp
punctuation :: RegExp
punctuation = (Char -> Bool) -> RegExp
liftPredL1 Char -> Bool
C.isPunctuation

-- | Recognize an alphabet letter, i.e., @A@..@Z@, @a@..@z@.
asciiLetter :: RegExp
asciiLetter :: RegExp
asciiLetter = RegExp
asciiLower RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
asciiUpper

-- | Recognize an ASCII lower case letter
--
-- >>> asciiLower
-- (re.range "a" "z")
-- >>> prove $ \c -> (c :: SChar) `match` asciiLower  .=> c `match` asciiLetter
-- Q.E.D.
-- >>> prove $ \c -> c `match` asciiLower  .=> toUpperL1 c `match` asciiUpper
-- Q.E.D.
-- >>> prove $ \c -> c `match` asciiLetter .=> toLowerL1 c `match` asciiLower
-- Q.E.D.
asciiLower :: RegExp
asciiLower :: RegExp
asciiLower = Char -> Char -> RegExp
Range Char
'a' Char
'z'

-- | Recognize an upper case letter
--
-- >>> asciiUpper
-- (re.range "A" "Z")
-- >>> prove $ \c -> (c :: SChar) `match` asciiUpper  .=> c `match` asciiLetter
-- Q.E.D.
-- >>> prove $ \c -> c `match` asciiUpper  .=> toLowerL1 c `match` asciiLower
-- Q.E.D.
-- >>> prove $ \c -> c `match` asciiLetter .=> toUpperL1 c `match` asciiUpper
-- Q.E.D.
asciiUpper :: RegExp
asciiUpper :: RegExp
asciiUpper = Char -> Char -> RegExp
Range Char
'A' Char
'Z'

-- | Recognize a digit. One of @0@..@9@.
--
-- >>> digit
-- (re.range "0" "9")
-- >>> prove $ \c -> c `match` digit .<=> let v = digitToInt c in 0 .<= v .&& v .< 10
-- Q.E.D.
digit :: RegExp
digit :: RegExp
digit = Char -> Char -> RegExp
Range Char
'0' Char
'9'

-- | Recognize an octal digit. One of @0@..@7@.
--
-- >>> octDigit
-- (re.range "0" "7")
-- >>> prove $ \c -> c `match` octDigit .<=> let v = digitToInt c in 0 .<= v .&& v .< 8
-- Q.E.D.
-- >>> prove $ \(c :: SChar) -> c `match` octDigit .=> c `match` digit
-- Q.E.D.
octDigit :: RegExp
octDigit :: RegExp
octDigit = Char -> Char -> RegExp
Range Char
'0' Char
'7'

-- | Recognize a hexadecimal digit. One of @0@..@9@, @a@..@f@, @A@..@F@.
--
-- >>> hexDigit
-- (re.union (re.range "0" "9") (re.range "a" "f") (re.range "A" "F"))
-- >>> prove $ \c -> c `match` hexDigit .<=> let v = digitToInt c in 0 .<= v .&& v .< 16
-- Q.E.D.
-- >>> prove $ \(c :: SChar) -> c `match` digit .=> c `match` hexDigit
-- Q.E.D.
hexDigit :: RegExp
hexDigit :: RegExp
hexDigit = RegExp
digit RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ Char -> Char -> RegExp
Range Char
'a' Char
'f' RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ Char -> Char -> RegExp
Range Char
'A' Char
'F'

-- | Recognize a decimal number.
--
-- >>> decimal
-- (re.+ (re.range "0" "9"))
-- >>> prove $ \s -> (s::SString) `match` decimal .=> sNot (s `match` KStar asciiLetter)
-- Q.E.D.
decimal :: RegExp
decimal :: RegExp
decimal = RegExp -> RegExp
KPlus RegExp
digit

-- | Recognize an octal number. Must have a prefix of the form @0o@\/@0O@.
--
-- >>> octal
-- (re.++ (re.union (str.to.re "0o") (str.to.re "0O")) (re.+ (re.range "0" "7")))
-- >>> prove $ \s -> s `match` octal .=> sAny (.== take 2 s) ["0o", "0O"]
-- Q.E.D.
octal :: RegExp
octal :: RegExp
octal = (RegExp
"0o" RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
"0O") RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp -> RegExp
KPlus RegExp
octDigit

-- | Recognize a hexadecimal number. Must have a prefix of the form @0x@\/@0X@.
--
-- >>> hexadecimal
-- (re.++ (re.union (str.to.re "0x") (str.to.re "0X")) (re.+ (re.union (re.range "0" "9") (re.range "a" "f") (re.range "A" "F"))))
-- >>> prove $ \s -> s `match` hexadecimal .=> sAny (.== take 2 s) ["0x", "0X"]
-- Q.E.D.
hexadecimal :: RegExp
hexadecimal :: RegExp
hexadecimal = (RegExp
"0x" RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
"0X") RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp -> RegExp
KPlus RegExp
hexDigit

-- | Recognize a floating point number. The exponent part is optional if a fraction
-- is present. The exponent may or may not have a sign.
--
-- >>> prove $ \s -> s `match` floating .=> length s .>= 3
-- Q.E.D.
floating :: RegExp
floating :: RegExp
floating = RegExp
withFraction RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
withoutFraction
  where withFraction :: RegExp
withFraction    = RegExp
decimal RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp
"." RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp
decimal RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp -> RegExp
Opt RegExp
expt
        withoutFraction :: RegExp
withoutFraction = RegExp
decimal RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp
expt
        expt :: RegExp
expt            = (RegExp
"e" RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
"E") RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp -> RegExp
Opt (String -> RegExp
oneOf String
"+-") RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp
decimal

-- | For the purposes of this regular expression, an identifier consists of a letter
-- followed by zero or more letters, digits, underscores, and single quotes. The first
-- letter must be lowercase.
--
-- >>> prove $ \s -> s `match` identifier .=> isAsciiLower (head s)
-- Q.E.D.
-- >>> prove $ \s -> s `match` identifier .=> length s .>= 1
-- Q.E.D.
identifier :: RegExp
identifier :: RegExp
identifier = RegExp
asciiLower RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
* RegExp -> RegExp
KStar (RegExp
asciiLetter RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
digit RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
"_" RegExp -> RegExp -> RegExp
forall a. Num a => a -> a -> a
+ RegExp
"'")

-- | Lift a unary operator over strings.
lift1 :: forall a b. (SymVal a, SymVal b) => StrOp -> Maybe (a -> b) -> SBV a -> SBV b
lift1 :: StrOp -> Maybe (a -> b) -> SBV a -> SBV b
lift1 StrOp
w Maybe (a -> b)
mbOp SBV a
a
  | Just SBV b
cv <- Maybe (a -> b) -> SBV a -> Maybe (SBV b)
forall a b.
(SymVal a, SymVal b) =>
Maybe (a -> b) -> SBV a -> Maybe (SBV b)
concEval1 Maybe (a -> b)
mbOp SBV a
a
  = SBV b
cv
  | Bool
True
  = SVal -> SBV b
forall a. SVal -> SBV a
SBV (SVal -> SBV b) -> SVal -> SBV b
forall a b. (a -> b) -> a -> b
$ Kind -> Either CV (Cached SV) -> SVal
SVal Kind
k (Either CV (Cached SV) -> SVal) -> Either CV (Cached SV) -> SVal
forall a b. (a -> b) -> a -> b
$ Cached SV -> Either CV (Cached SV)
forall a b. b -> Either a b
Right (Cached SV -> Either CV (Cached SV))
-> Cached SV -> Either CV (Cached SV)
forall a b. (a -> b) -> a -> b
$ (State -> IO SV) -> Cached SV
forall a. (State -> IO a) -> Cached a
cache State -> IO SV
r
  where k :: Kind
k = Proxy b -> Kind
forall a. HasKind a => a -> Kind
kindOf (Proxy b
forall k (t :: k). Proxy t
Proxy @b)
        r :: State -> IO SV
r State
st = do SV
sva <- State -> SBV a -> IO SV
forall a. State -> SBV a -> IO SV
sbvToSV State
st SBV a
a
                  State -> Kind -> SBVExpr -> IO SV
newExpr State
st Kind
k (Op -> [SV] -> SBVExpr
SBVApp (StrOp -> Op
StrOp StrOp
w) [SV
sva])

-- | Concrete evaluation for unary ops
concEval1 :: (SymVal a, SymVal b) => Maybe (a -> b) -> SBV a -> Maybe (SBV b)
concEval1 :: Maybe (a -> b) -> SBV a -> Maybe (SBV b)
concEval1 Maybe (a -> b)
mbOp SBV a
a = b -> SBV b
forall a. SymVal a => a -> SBV a
literal (b -> SBV b) -> Maybe b -> Maybe (SBV b)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Maybe (a -> b)
mbOp Maybe (a -> b) -> Maybe a -> Maybe b
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> SBV a -> Maybe a
forall a. SymVal a => SBV a -> Maybe a
unliteral SBV a
a)

-- | Quiet GHC about testing only imports
__unused :: a
__unused :: a
__unused = (SChar -> SBool)
-> (SString -> SInteger)
-> (SInteger -> SString -> SString)
-> (SChar -> SString -> SBool)
-> (SChar -> SString -> SBool)
-> (SString -> SChar)
-> a
forall a. HasCallStack => a
undefined SChar -> SBool
isSpaceL1 SString -> SInteger
length SInteger -> SString -> SString
take SChar -> SString -> SBool
elem SChar -> SString -> SBool
notElem SString -> SChar
head

{- $matching
A symbolic string or a character ('SString' or 'SChar') can be matched against a regular-expression. Note
that the regular-expression itself is not a symbolic object: It's a fully concrete representation, as
captured by the 'RegExp' class. The 'RegExp' class is an instance of the @IsString@ class, which makes writing
literal matches easier. The 'RegExp' type also has a (somewhat degenerate) 'Num' instance: Concatenation
corresponds to multiplication, union corresponds to addition, and @0@ corresponds to the empty language.

Note that since `match` is a method of 'RegExpMatchable' class, both 'SChar' and 'SString' can be used as
an argument for matching. In practice, this means you might have to disambiguate with a type-ascription
if it is not deducible from context.

>>> prove $ \s -> (s :: SString) `match` "hello" .<=> s .== "hello"
Q.E.D.
>>> prove $ \s -> s `match` Loop 2 5 "xyz" .=> length s .>= 6
Q.E.D.
>>> prove $ \s -> s `match` Loop 2 5 "xyz" .=> length s .<= 15
Q.E.D.
>>> prove $ \s -> match s (Loop 2 5 "xyz") .=> length s .>= 7
Falsifiable. Counter-example:
  s0 = "xyzxyz" :: String
>>> prove $ \s -> (s :: SString) `match` "hello" .=> s `match` ("hello" + "world")
Q.E.D.
>>> prove $ \s -> sNot $ (s::SString) `match` ("so close" * 0)
Q.E.D.
>>> prove $ \c -> (c :: SChar) `match` oneOf "abcd" .=> ord c .>= ord (literal 'a') .&& ord c .<= ord (literal 'd')
Q.E.D.
-}