-----------------------------------------------------------------------------
-- |
-- Module    : Data.SBV.Char
-- Copyright : (c) Levent Erkok
-- License   : BSD3
-- Maintainer: erkokl@gmail.com
-- Stability : experimental
--
-- A collection of character utilities, follows the namings
-- in "Data.Char" and is intended to be imported qualified.
-- Also, it is recommended you use the @OverloadedStrings@
-- extension to allow literal strings to be used as
-- symbolic-strings when working with symbolic characters
-- and strings.
--
-- 'SChar' type only covers all unicode characters, following the specification
-- in <http://smtlib.cs.uiowa.edu/theories-UnicodeStrings.shtml>.
-- However, some of the recognizers only support the Latin1 subset, suffixed
-- by @L1@. The reason for this is that there is no performant way of performing
-- these functions for the entire unicode set. As SMTLib's capabilities increase,
-- we will provide full unicode versions as well.
-----------------------------------------------------------------------------

{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE Rank2Types          #-}
{-# LANGUAGE ScopedTypeVariables #-}

{-# OPTIONS_GHC -Wall -Werror #-}

module Data.SBV.Char (
        -- * Occurrence in a string
        elem, notElem
        -- * Conversion to\/from 'SInteger'
        , ord, chr
        -- * Conversion to upper\/lower case
        , toLowerL1, toUpperL1
        -- * Converting digits to ints and back
        , digitToInt, intToDigit
        -- * Character classification
        , isControlL1, isSpaceL1, isLowerL1, isUpperL1, isAlphaL1, isAlphaNumL1, isPrintL1, isDigit, isOctDigit, isHexDigit
        , isLetterL1, isMarkL1, isNumberL1, isPunctuationL1, isSymbolL1, isSeparatorL1
        -- * Subranges
        , isAscii, isLatin1, isAsciiUpper, isAsciiLower
        ) where

import Prelude hiding (elem, notElem)
import qualified Prelude as P

import Data.SBV.Core.Data
import Data.SBV.Core.Model

import qualified Data.Char as C

import Data.SBV.String (isInfixOf, singleton)

-- $setup
-- >>> -- For doctest purposes only:
-- >>> import Data.SBV
-- >>> import Data.SBV.String (isInfixOf, singleton)
-- >>> import Prelude hiding(elem, notElem)
-- >>> :set -XOverloadedStrings

-- | Is the character in the string?
--
-- >>> :set -XOverloadedStrings
-- >>> prove $ \c -> c `elem` singleton c
-- Q.E.D.
-- >>> prove $ \c -> sNot (c `elem` "")
-- Q.E.D.
elem :: SChar -> SString -> SBool
SChar
c elem :: SChar -> SString -> SBool
`elem` SString
s
 | Just String
cs <- SString -> Maybe String
forall a. SymVal a => SBV a -> Maybe a
unliteral SString
s, Just Char
cc <- SChar -> Maybe Char
forall a. SymVal a => SBV a -> Maybe a
unliteral SChar
c
 = Bool -> SBool
forall a. SymVal a => a -> SBV a
literal (Char
cc Char -> String -> Bool
forall a. Eq a => a -> [a] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`P.elem` String
cs)
 | Just String
cs <- SString -> Maybe String
forall a. SymVal a => SBV a -> Maybe a
unliteral SString
s                            -- If only the second string is concrete, element-wise checking is still much better!
 = (SChar -> SBool) -> [SChar] -> SBool
forall a. (a -> SBool) -> [a] -> SBool
sAny (SChar
c SChar -> SChar -> SBool
forall a. EqSymbolic a => a -> a -> SBool
.==) ([SChar] -> SBool) -> [SChar] -> SBool
forall a b. (a -> b) -> a -> b
$ (Char -> SChar) -> String -> [SChar]
forall a b. (a -> b) -> [a] -> [b]
map Char -> SChar
forall a. SymVal a => a -> SBV a
literal String
cs
 | Bool
True
 = SChar -> SString
singleton SChar
c SString -> SString -> SBool
`isInfixOf` SString
s

-- | Is the character not in the string?
--
-- >>> prove $ \c s -> c `elem` s .<=> sNot (c `notElem` s)
-- Q.E.D.
notElem :: SChar -> SString -> SBool
SChar
c notElem :: SChar -> SString -> SBool
`notElem` SString
s = SBool -> SBool
sNot (SChar
c SChar -> SString -> SBool
`elem` SString
s)

-- | The 'ord' of a character.
ord :: SChar -> SInteger
ord :: SChar -> SInteger
ord SChar
c
 | Just Char
cc <- SChar -> Maybe Char
forall a. SymVal a => SBV a -> Maybe a
unliteral SChar
c
 = Integer -> SInteger
forall a. SymVal a => a -> SBV a
literal (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
C.ord Char
cc))
 | Bool
True
 = SVal -> SInteger
forall a. SVal -> SBV a
SBV (SVal -> SInteger) -> SVal -> SInteger
forall a b. (a -> b) -> a -> b
$ Kind -> Either CV (Cached SV) -> SVal
SVal Kind
KUnbounded (Either CV (Cached SV) -> SVal) -> Either CV (Cached SV) -> SVal
forall a b. (a -> b) -> a -> b
$ Cached SV -> Either CV (Cached SV)
forall a b. b -> Either a b
Right (Cached SV -> Either CV (Cached SV))
-> Cached SV -> Either CV (Cached SV)
forall a b. (a -> b) -> a -> b
$ (State -> IO SV) -> Cached SV
forall a. (State -> IO a) -> Cached a
cache State -> IO SV
r
 where r :: State -> IO SV
r State
st = do SV
csv <- State -> SChar -> IO SV
forall a. State -> SBV a -> IO SV
sbvToSV State
st SChar
c
                 State -> Kind -> SBVExpr -> IO SV
newExpr State
st Kind
KUnbounded (Op -> [SV] -> SBVExpr
SBVApp (StrOp -> Op
StrOp StrOp
StrToCode) [SV
csv])

-- | Conversion from an integer to a character.
--
-- >>> prove $ \x -> 0 .<= x .&& x .< 256 .=> ord (chr x) .== x
-- Q.E.D.
-- >>> prove $ \x -> chr (ord x) .== x
-- Q.E.D.
chr :: SInteger -> SChar
chr :: SInteger -> SChar
chr SInteger
w
 | Just Integer
cw <- SInteger -> Maybe Integer
forall a. SymVal a => SBV a -> Maybe a
unliteral SInteger
w
 = Char -> SChar
forall a. SymVal a => a -> SBV a
literal (Int -> Char
C.chr (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
cw))
 | Bool
True
 = SVal -> SChar
forall a. SVal -> SBV a
SBV (SVal -> SChar) -> SVal -> SChar
forall a b. (a -> b) -> a -> b
$ Kind -> Either CV (Cached SV) -> SVal
SVal Kind
KChar (Either CV (Cached SV) -> SVal) -> Either CV (Cached SV) -> SVal
forall a b. (a -> b) -> a -> b
$ Cached SV -> Either CV (Cached SV)
forall a b. b -> Either a b
Right (Cached SV -> Either CV (Cached SV))
-> Cached SV -> Either CV (Cached SV)
forall a b. (a -> b) -> a -> b
$ (State -> IO SV) -> Cached SV
forall a. (State -> IO a) -> Cached a
cache State -> IO SV
r
 where r :: State -> IO SV
r State
st = do SV
wsv <- State -> SInteger -> IO SV
forall a. State -> SBV a -> IO SV
sbvToSV State
st SInteger
w
                 State -> Kind -> SBVExpr -> IO SV
newExpr State
st Kind
KChar (Op -> [SV] -> SBVExpr
SBVApp (StrOp -> Op
StrOp StrOp
StrFromCode) [SV
wsv])

-- | Lift a char function to a symbolic version. If the given char is
-- not in the class recognized by predicate, the output is the same as the input.
-- Only works for the Latin1 set, i.e., the first 256 characters. If the given
-- character is outside this range, it's returned unchanged.
liftFunL1 :: (Char -> Char) -> SChar -> SChar
liftFunL1 :: (Char -> Char) -> SChar -> SChar
liftFunL1 Char -> Char
f SChar
c = String -> SChar
walk String
kernel
  where kernel :: String
kernel = [Char
g | Char
g <- (Int -> Char) -> [Int] -> String
forall a b. (a -> b) -> [a] -> [b]
map Int -> Char
C.chr [Int
0 .. Int
255], Char
g Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char -> Char
f Char
g]
        walk :: String -> SChar
walk []     = SChar
c
        walk (Char
d:String
ds) = SBool -> SChar -> SChar -> SChar
forall a. Mergeable a => SBool -> a -> a -> a
ite (Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
d SChar -> SChar -> SBool
forall a. EqSymbolic a => a -> a -> SBool
.== SChar
c) (Char -> SChar
forall a. SymVal a => a -> SBV a
literal (Char -> Char
f Char
d)) (String -> SChar
walk String
ds)

-- | Lift a char predicate to a symbolic version. Only works for the Latin1 set, i.e., the
-- first 256 characters.
liftPredL1 :: (Char -> Bool) -> SChar -> SBool
liftPredL1 :: (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
predicate SChar
c = SChar
c SChar -> [SChar] -> SBool
forall a. EqSymbolic a => a -> [a] -> SBool
`sElem` [Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
g | Char
g <- (Int -> Char) -> [Int] -> String
forall a b. (a -> b) -> [a] -> [b]
map Int -> Char
C.chr [Int
0 .. Int
255], Char -> Bool
predicate Char
g]

-- | Convert to lower-case. Only works for the Latin1 subset, otherwise returns its argument unchanged.
--
-- >>> prove $ \c -> toLowerL1 (toLowerL1 c) .== toLowerL1 c
-- Q.E.D.
-- >>> prove $ \c -> isLowerL1 c .&& c `notElem` "\181\255" .=> toLowerL1 (toUpperL1 c) .== c
-- Q.E.D.
toLowerL1 :: SChar -> SChar
toLowerL1 :: SChar -> SChar
toLowerL1 = (Char -> Char) -> SChar -> SChar
liftFunL1 Char -> Char
C.toLower

-- | Convert to upper-case. Only works for the Latin1 subset, otherwise returns its argument unchanged.
--
-- >>> prove $ \c -> toUpperL1 (toUpperL1 c) .== toUpperL1 c
-- Q.E.D.
-- >>> prove $ \c -> isUpperL1 c .=> toUpperL1 (toLowerL1 c) .== c
-- Q.E.D.
toUpperL1 :: SChar -> SChar
toUpperL1 :: SChar -> SChar
toUpperL1 = (Char -> Char) -> SChar -> SChar
liftFunL1 Char -> Char
C.toUpper

-- | Convert a digit to an integer. Works for hexadecimal digits too. If the input isn't a digit,
-- then return -1.
--
-- >>> prove $ \c -> isDigit c .|| isHexDigit c .=> digitToInt c .>= 0 .&& digitToInt c .<= 15
-- Q.E.D.
-- >>> prove $ \c -> sNot (isDigit c .|| isHexDigit c) .=> digitToInt c .== -1
-- Q.E.D.
digitToInt :: SChar -> SInteger
digitToInt :: SChar -> SInteger
digitToInt SChar
c = SBool -> SInteger -> SInteger -> SInteger
forall a. Mergeable a => SBool -> a -> a -> a
ite (SChar
uc SChar -> SString -> SBool
`elem` SString
"0123456789") (SInteger -> SInteger
forall a b.
(Integral a, HasKind a, Num a, SymVal a, HasKind b, Num b,
 SymVal b) =>
SBV a -> SBV b
sFromIntegral (SInteger
o SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
- SChar -> SInteger
ord (Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
'0')))
             (SInteger -> SInteger) -> SInteger -> SInteger
forall a b. (a -> b) -> a -> b
$ SBool -> SInteger -> SInteger -> SInteger
forall a. Mergeable a => SBool -> a -> a -> a
ite (SChar
uc SChar -> SString -> SBool
`elem` SString
"ABCDEF")     (SInteger -> SInteger
forall a b.
(Integral a, HasKind a, Num a, SymVal a, HasKind b, Num b,
 SymVal b) =>
SBV a -> SBV b
sFromIntegral (SInteger
o SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
- SChar -> SInteger
ord (Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
'A') SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
+ SInteger
10))
             (SInteger -> SInteger) -> SInteger -> SInteger
forall a b. (a -> b) -> a -> b
$ -SInteger
1
  where uc :: SChar
uc = SChar -> SChar
toUpperL1 SChar
c
        o :: SInteger
o  = SChar -> SInteger
ord SChar
uc

-- | Convert an integer to a digit, inverse of 'digitToInt'. If the integer is out of
-- bounds, we return the arbitrarily chosen space character. Note that for hexadecimal
-- letters, we return the corresponding lowercase letter.
--
-- >>> prove $ \i -> i .>= 0 .&& i .<= 15 .=> digitToInt (intToDigit i) .== i
-- Q.E.D.
-- >>> prove $ \i -> i .<  0 .|| i .>  15 .=> digitToInt (intToDigit i) .== -1
-- Q.E.D.
-- >>> prove $ \c -> digitToInt c .== -1 .<=> intToDigit (digitToInt c) .== literal ' '
-- Q.E.D.
intToDigit :: SInteger -> SChar
intToDigit :: SInteger -> SChar
intToDigit SInteger
i = SBool -> SChar -> SChar -> SChar
forall a. Mergeable a => SBool -> a -> a -> a
ite (SInteger
i SInteger -> SInteger -> SBool
forall a. OrdSymbolic a => a -> a -> SBool
.>=  SInteger
0 SBool -> SBool -> SBool
.&& SInteger
i SInteger -> SInteger -> SBool
forall a. OrdSymbolic a => a -> a -> SBool
.<=  SInteger
9) (SInteger -> SChar
chr (SInteger -> SInteger
forall a b.
(Integral a, HasKind a, Num a, SymVal a, HasKind b, Num b,
 SymVal b) =>
SBV a -> SBV b
sFromIntegral SInteger
i SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
+ SChar -> SInteger
ord (Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
'0')))
             (SChar -> SChar) -> SChar -> SChar
forall a b. (a -> b) -> a -> b
$ SBool -> SChar -> SChar -> SChar
forall a. Mergeable a => SBool -> a -> a -> a
ite (SInteger
i SInteger -> SInteger -> SBool
forall a. OrdSymbolic a => a -> a -> SBool
.>= SInteger
10 SBool -> SBool -> SBool
.&& SInteger
i SInteger -> SInteger -> SBool
forall a. OrdSymbolic a => a -> a -> SBool
.<= SInteger
15) (SInteger -> SChar
chr (SInteger -> SInteger
forall a b.
(Integral a, HasKind a, Num a, SymVal a, HasKind b, Num b,
 SymVal b) =>
SBV a -> SBV b
sFromIntegral SInteger
i SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
+ SChar -> SInteger
ord (Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
'a') SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
- SInteger
10))
             (SChar -> SChar) -> SChar -> SChar
forall a b. (a -> b) -> a -> b
$ Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
' '

-- | Is this a control character? Control characters are essentially the non-printing characters. Only works for the Latin1 subset, otherwise returns 'sFalse'.
isControlL1 :: SChar -> SBool
isControlL1 :: SChar -> SBool
isControlL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isControl

-- | Is this white-space? Only works for the Latin1 subset, otherwise returns 'sFalse'.
isSpaceL1 :: SChar -> SBool
isSpaceL1 :: SChar -> SBool
isSpaceL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isSpace

-- | Is this a lower-case character? Only works for the Latin1 subset, otherwise returns 'sFalse'.
--
-- >>> prove $ \c -> isUpperL1 c .=> isLowerL1 (toLowerL1 c)
-- Q.E.D.
isLowerL1 :: SChar -> SBool
isLowerL1 :: SChar -> SBool
isLowerL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isLower

-- | Is this an upper-case character? Only works for the Latin1 subset, otherwise returns 'sFalse'.
--
-- >>> prove $ \c -> sNot (isLowerL1 c .&& isUpperL1 c)
-- Q.E.D.
isUpperL1 :: SChar -> SBool
isUpperL1 :: SChar -> SBool
isUpperL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isUpper

-- | Is this an alphabet character? That is lower-case, upper-case and title-case letters, plus letters of caseless scripts and modifiers letters.
-- Only works for the Latin1 subset, otherwise returns 'sFalse'.
isAlphaL1 :: SChar -> SBool
isAlphaL1 :: SChar -> SBool
isAlphaL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isAlpha

-- | Is this an alphabetical character or a digit? Only works for the Latin1 subset, otherwise returns 'sFalse'.
--
-- >>> prove $ \c -> isAlphaNumL1 c .<=> isAlphaL1 c .|| isNumberL1 c
-- Q.E.D.
isAlphaNumL1 :: SChar -> SBool
isAlphaNumL1 :: SChar -> SBool
isAlphaNumL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isAlphaNum

-- | Is this a printable character? Only works for the Latin1 subset, otherwise returns 'sFalse'.
isPrintL1 :: SChar -> SBool
isPrintL1 :: SChar -> SBool
isPrintL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isPrint

-- | Is this an ASCII digit, i.e., one of @0@..@9@. Note that this is a subset of 'isNumberL1' 
--
-- >>> prove $ \c -> isDigit c .=> isNumberL1 c
-- Q.E.D.
isDigit :: SChar -> SBool
isDigit :: SChar -> SBool
isDigit = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isDigit

-- | Is this an Octal digit, i.e., one of @0@..@7@.
isOctDigit :: SChar -> SBool
isOctDigit :: SChar -> SBool
isOctDigit = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isOctDigit

-- | Is this a Hex digit, i.e, one of @0@..@9@, @a@..@f@, @A@..@F@.
--
-- >>> prove $ \c -> isHexDigit c .=> isAlphaNumL1 c
-- Q.E.D.
isHexDigit :: SChar -> SBool
isHexDigit :: SChar -> SBool
isHexDigit = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isHexDigit

-- | Is this an alphabet character. Only works for the Latin1 subset, otherwise returns 'sFalse'.
--
-- >>> prove $ \c -> isLetterL1 c .<=> isAlphaL1 c
-- Q.E.D.
isLetterL1 :: SChar -> SBool
isLetterL1 :: SChar -> SBool
isLetterL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isLetter

-- | Is this a mark? Only works for the Latin1 subset, otherwise returns 'sFalse'.
--
-- Note that there are no marks in the Latin1 set, so this function always returns false!
--
-- >>> prove $ sNot . isMarkL1
-- Q.E.D.
isMarkL1 :: SChar -> SBool
isMarkL1 :: SChar -> SBool
isMarkL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isMark

-- | Is this a number character? Only works for the Latin1 subset, otherwise returns 'sFalse'.
isNumberL1 :: SChar -> SBool
isNumberL1 :: SChar -> SBool
isNumberL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isNumber

-- | Is this a punctuation mark? Only works for the Latin1 subset, otherwise returns 'sFalse'.
isPunctuationL1 :: SChar -> SBool
isPunctuationL1 :: SChar -> SBool
isPunctuationL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isPunctuation

-- | Is this a symbol? Only works for the Latin1 subset, otherwise returns 'sFalse'.
isSymbolL1 :: SChar -> SBool
isSymbolL1 :: SChar -> SBool
isSymbolL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isSymbol

-- | Is this a separator? Only works for the Latin1 subset, otherwise returns 'sFalse'.
--
-- >>> prove $ \c -> isSeparatorL1 c .=> isSpaceL1 c
-- Q.E.D.
isSeparatorL1 :: SChar -> SBool
isSeparatorL1 :: SChar -> SBool
isSeparatorL1 = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isSeparator

-- | Is this an ASCII character, i.e., the first 128 characters.
isAscii :: SChar -> SBool
isAscii :: SChar -> SBool
isAscii SChar
c = SChar -> SInteger
ord SChar
c SInteger -> SInteger -> SBool
forall a. OrdSymbolic a => a -> a -> SBool
.< SInteger
128

-- | Is this a Latin1 character?
isLatin1 :: SChar -> SBool
isLatin1 :: SChar -> SBool
isLatin1 SChar
c = SChar -> SInteger
ord SChar
c SInteger -> SInteger -> SBool
forall a. OrdSymbolic a => a -> a -> SBool
.< SInteger
256

-- | Is this an ASCII Upper-case letter? i.e., @A@ thru @Z@
--
-- >>> prove $ \c -> isAsciiUpper c .<=> ord c .>= ord (literal 'A') .&& ord c .<= ord (literal 'Z')
-- Q.E.D.
-- >>> prove $ \c -> isAsciiUpper c .<=> isAscii c .&& isUpperL1 c
-- Q.E.D.
isAsciiUpper :: SChar -> SBool
isAsciiUpper :: SChar -> SBool
isAsciiUpper = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isAsciiUpper

-- | Is this an ASCII Lower-case letter? i.e., @a@ thru @z@
--
-- >>> prove $ \c -> isAsciiLower c .<=> ord c .>= ord (literal 'a') .&& ord c .<= ord (literal 'z')
-- Q.E.D.
-- >>> prove $ \c -> isAsciiLower c .<=> isAscii c .&& isLowerL1 c
-- Q.E.D.
isAsciiLower :: SChar -> SBool
isAsciiLower :: SChar -> SBool
isAsciiLower = (Char -> Bool) -> SChar -> SBool
liftPredL1 Char -> Bool
C.isAsciiLower