-----------------------------------------------------------------------------
-- |
-- Module    : Data.SBV.Char
-- Copyright : (c) Levent Erkok
-- License   : BSD3
-- Maintainer: erkokl@gmail.com
-- Stability : experimental
--
-- A collection of character utilities, follows the namings
-- in "Data.Char" and is intended to be imported qualified.
-- Also, it is recommended you use the @OverloadedStrings@
-- extension to allow literal strings to be used as
-- symbolic-strings when working with symbolic characters
-- and strings.
--
-- Note that currently 'SChar' type only covers Latin1 (i.e., the first 256
-- characters), as opposed to Haskell's Unicode character support. However,
-- there is a pending SMTLib proposal to extend this set to Unicode, at
-- which point we will update these functions to match the implementations.
-- For details, see: <http://smtlib.cs.uiowa.edu/theories-UnicodeStrings.shtml>
-----------------------------------------------------------------------------

{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE Rank2Types          #-}
{-# LANGUAGE ScopedTypeVariables #-}

{-# OPTIONS_GHC -Wall -Werror #-}

module Data.SBV.Char (
        -- * Occurrence in a string
        elem, notElem
        -- * Conversion to\/from 'SInteger'
        , ord, chr
        -- * Conversion to upper\/lower case
        , toLower, toUpper
        -- * Converting digits to ints and back
        , digitToInt, intToDigit
        -- * Character classification
        , isControl, isSpace, isLower, isUpper, isAlpha, isAlphaNum, isPrint, isDigit, isOctDigit, isHexDigit, isLetter, isMark, isNumber, isPunctuation, isSymbol, isSeparator
        -- * Subranges
        , isAscii, isLatin1, isAsciiLetter, isAsciiUpper, isAsciiLower
        ) where

import Prelude hiding (elem, notElem)
import qualified Prelude as P

import Data.SBV.Core.Data
import Data.SBV.Core.Model

import qualified Data.Char as C

import Data.SBV.String (isInfixOf, singleton)

-- For doctest use only
--
-- $setup
-- >>> import Data.SBV.Provers.Prover (prove, sat)
-- >>> :set -XOverloadedStrings

-- | Is the character in the string?
--
-- >>> :set -XOverloadedStrings
-- >>> prove $ \c -> c `elem` singleton c
-- Q.E.D.
-- >>> prove $ \c -> sNot (c `elem` "")
-- Q.E.D.
elem :: SChar -> SString -> SBool
SChar
c elem :: SChar -> SString -> SBool
`elem` SString
s
 | Just String
cs <- SString -> Maybe String
forall a. SymVal a => SBV a -> Maybe a
unliteral SString
s, Just Char
cc <- SChar -> Maybe Char
forall a. SymVal a => SBV a -> Maybe a
unliteral SChar
c
 = Bool -> SBool
forall a. SymVal a => a -> SBV a
literal (Char
cc Char -> String -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`P.elem` String
cs)
 | Just String
cs <- SString -> Maybe String
forall a. SymVal a => SBV a -> Maybe a
unliteral SString
s                            -- If only the second string is concrete, element-wise checking is still much better!
 = (SChar -> SBool) -> [SChar] -> SBool
forall a. (a -> SBool) -> [a] -> SBool
sAny (SChar
c SChar -> SChar -> SBool
forall a. EqSymbolic a => a -> a -> SBool
.==) ([SChar] -> SBool) -> [SChar] -> SBool
forall a b. (a -> b) -> a -> b
$ (Char -> SChar) -> String -> [SChar]
forall a b. (a -> b) -> [a] -> [b]
map Char -> SChar
forall a. SymVal a => a -> SBV a
literal String
cs
 | Bool
True
 = SChar -> SString
singleton SChar
c SString -> SString -> SBool
`isInfixOf` SString
s

-- | Is the character not in the string?
--
-- >>> prove $ \c s -> c `elem` s .<=> sNot (c `notElem` s)
-- Q.E.D.
notElem :: SChar -> SString -> SBool
SChar
c notElem :: SChar -> SString -> SBool
`notElem` SString
s = SBool -> SBool
sNot (SChar
c SChar -> SString -> SBool
`elem` SString
s)

-- | The 'ord' of a character.
ord :: SChar -> SInteger
ord :: SChar -> SInteger
ord SChar
c
 | Just Char
cc <- SChar -> Maybe Char
forall a. SymVal a => SBV a -> Maybe a
unliteral SChar
c
 = Integer -> SInteger
forall a. SymVal a => a -> SBV a
literal (Int -> Integer
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
C.ord Char
cc))
 | Bool
True
 = SVal -> SInteger
forall a. SVal -> SBV a
SBV (SVal -> SInteger) -> SVal -> SInteger
forall a b. (a -> b) -> a -> b
$ Kind -> Either CV (Cached SV) -> SVal
SVal Kind
kTo (Either CV (Cached SV) -> SVal) -> Either CV (Cached SV) -> SVal
forall a b. (a -> b) -> a -> b
$ Cached SV -> Either CV (Cached SV)
forall a b. b -> Either a b
Right (Cached SV -> Either CV (Cached SV))
-> Cached SV -> Either CV (Cached SV)
forall a b. (a -> b) -> a -> b
$ (State -> IO SV) -> Cached SV
forall a. (State -> IO a) -> Cached a
cache State -> IO SV
r
 where kFrom :: Kind
kFrom = Bool -> Int -> Kind
KBounded Bool
False Int
8
       kTo :: Kind
kTo   = Kind
KUnbounded
       r :: State -> IO SV
r State
st = do SV
csv <- State -> SChar -> IO SV
forall a. State -> SBV a -> IO SV
sbvToSV State
st SChar
c
                 State -> Kind -> SBVExpr -> IO SV
newExpr State
st Kind
kTo (Op -> [SV] -> SBVExpr
SBVApp (Kind -> Kind -> Op
KindCast Kind
kFrom Kind
kTo) [SV
csv])

-- | Conversion from an integer to a character.
--
-- >>> prove $ \x -> 0 .<= x .&& x .< 256 .=> ord (chr x) .== x
-- Q.E.D.
-- >>> prove $ \x -> chr (ord x) .== x
-- Q.E.D.
chr :: SInteger -> SChar
chr :: SInteger -> SChar
chr SInteger
w
 | Just Integer
cw <- SInteger -> Maybe Integer
forall a. SymVal a => SBV a -> Maybe a
unliteral SInteger
w
 = Char -> SChar
forall a. SymVal a => a -> SBV a
literal (Int -> Char
C.chr (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
cw))
 | Bool
True
 = SVal -> SChar
forall a. SVal -> SBV a
SBV (SVal -> SChar) -> SVal -> SChar
forall a b. (a -> b) -> a -> b
$ Kind -> Either CV (Cached SV) -> SVal
SVal Kind
KChar (Either CV (Cached SV) -> SVal) -> Either CV (Cached SV) -> SVal
forall a b. (a -> b) -> a -> b
$ Cached SV -> Either CV (Cached SV)
forall a b. b -> Either a b
Right (Cached SV -> Either CV (Cached SV))
-> Cached SV -> Either CV (Cached SV)
forall a b. (a -> b) -> a -> b
$ (State -> IO SV) -> Cached SV
forall a. (State -> IO a) -> Cached a
cache State -> IO SV
r
 where w8 :: SWord8
       w8 :: SWord8
w8 = SInteger -> SWord8
forall a b.
(Integral a, HasKind a, Num a, SymVal a, HasKind b, Num b,
 SymVal b) =>
SBV a -> SBV b
sFromIntegral SInteger
w
       r :: State -> IO SV
r State
st = do SV Kind
_ NodeId
n <- State -> SWord8 -> IO SV
forall a. State -> SBV a -> IO SV
sbvToSV State
st SWord8
w8
                 SV -> IO SV
forall (m :: * -> *) a. Monad m => a -> m a
return (SV -> IO SV) -> SV -> IO SV
forall a b. (a -> b) -> a -> b
$ Kind -> NodeId -> SV
SV Kind
KChar NodeId
n

-- | Convert to lower-case.
--
-- >>> prove $ \c -> toLower (toLower c) .== toLower c
-- Q.E.D.
-- >>> prove $ \c -> isLower c .=> toLower (toUpper c) .== c
-- Q.E.D.
toLower :: SChar -> SChar
toLower :: SChar -> SChar
toLower SChar
c = SBool -> SChar -> SChar -> SChar
forall a. Mergeable a => SBool -> a -> a -> a
ite (SChar -> SBool
isUpper SChar
c) (SInteger -> SChar
chr (SChar -> SInteger
ord SChar
c SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
+ SInteger
32)) SChar
c

-- | Convert to upper-case. N.B. There are three special cases!
--
--   * The character \223 is special. It corresponds to the German Eszett, it is considered lower-case,
--     and furthermore it's upper-case maps back to itself within our character-set. So, we leave it
--     untouched.
--
--   * The character \181 maps to upper-case \924, which is beyond our character set. We leave it
--     untouched. (This is the A with an acute accent.)
--
--   * The character \255 maps to upper-case \376, which is beyond our character set. We leave it
--     untouched. (This is the non-breaking space character.)
--
-- >>> prove $ \c -> toUpper (toUpper c) .== toUpper c
-- Q.E.D.
-- >>> prove $ \c -> isUpper c .=> toUpper (toLower c) .== c
-- Q.E.D.
toUpper :: SChar -> SChar
toUpper :: SChar -> SChar
toUpper SChar
c = SBool -> SChar -> SChar -> SChar
forall a. Mergeable a => SBool -> a -> a -> a
ite (SChar -> SBool
isLower SChar
c SBool -> SBool -> SBool
.&& SChar
c SChar -> SString -> SBool
`notElem` SString
"\181\223\255") (SInteger -> SChar
chr (SChar -> SInteger
ord SChar
c SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
- SInteger
32)) SChar
c

-- | Convert a digit to an integer. Works for hexadecimal digits too. If the input isn't a digit,
-- then return -1.
--
-- >>> prove $ \c -> isDigit c .|| isHexDigit c .=> digitToInt c .>= 0 .&& digitToInt c .<= 15
-- Q.E.D.
-- >>> prove $ \c -> sNot (isDigit c .|| isHexDigit c) .=> digitToInt c .== -1
-- Q.E.D.
digitToInt :: SChar -> SInteger
digitToInt :: SChar -> SInteger
digitToInt SChar
c = SBool -> SInteger -> SInteger -> SInteger
forall a. Mergeable a => SBool -> a -> a -> a
ite (SChar
uc SChar -> SString -> SBool
`elem` SString
"0123456789") (SInteger -> SInteger
forall a b.
(Integral a, HasKind a, Num a, SymVal a, HasKind b, Num b,
 SymVal b) =>
SBV a -> SBV b
sFromIntegral (SInteger
o SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
- SChar -> SInteger
ord (Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
'0')))
             (SInteger -> SInteger) -> SInteger -> SInteger
forall a b. (a -> b) -> a -> b
$ SBool -> SInteger -> SInteger -> SInteger
forall a. Mergeable a => SBool -> a -> a -> a
ite (SChar
uc SChar -> SString -> SBool
`elem` SString
"ABCDEF")     (SInteger -> SInteger
forall a b.
(Integral a, HasKind a, Num a, SymVal a, HasKind b, Num b,
 SymVal b) =>
SBV a -> SBV b
sFromIntegral (SInteger
o SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
- SChar -> SInteger
ord (Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
'A') SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
+ SInteger
10))
             (SInteger -> SInteger) -> SInteger -> SInteger
forall a b. (a -> b) -> a -> b
$ -SInteger
1
  where uc :: SChar
uc = SChar -> SChar
toUpper SChar
c
        o :: SInteger
o  = SChar -> SInteger
ord SChar
uc

-- | Convert an integer to a digit, inverse of 'digitToInt'. If the integer is out of
-- bounds, we return the arbitrarily chosen space character. Note that for hexadecimal
-- letters, we return the corresponding lowercase letter.
--
-- >>> prove $ \i -> i .>= 0 .&& i .<= 15 .=> digitToInt (intToDigit i) .== i
-- Q.E.D.
-- >>> prove $ \i -> i .<  0 .|| i .>  15 .=> digitToInt (intToDigit i) .== -1
-- Q.E.D.
-- >>> prove $ \c -> digitToInt c .== -1 .<=> intToDigit (digitToInt c) .== literal ' '
-- Q.E.D.
intToDigit :: SInteger -> SChar
intToDigit :: SInteger -> SChar
intToDigit SInteger
i = SBool -> SChar -> SChar -> SChar
forall a. Mergeable a => SBool -> a -> a -> a
ite (SInteger
i SInteger -> SInteger -> SBool
forall a. OrdSymbolic a => a -> a -> SBool
.>=  SInteger
0 SBool -> SBool -> SBool
.&& SInteger
i SInteger -> SInteger -> SBool
forall a. OrdSymbolic a => a -> a -> SBool
.<=  SInteger
9) (SInteger -> SChar
chr (SInteger -> SInteger
forall a b.
(Integral a, HasKind a, Num a, SymVal a, HasKind b, Num b,
 SymVal b) =>
SBV a -> SBV b
sFromIntegral SInteger
i SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
+ SChar -> SInteger
ord (Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
'0')))
             (SChar -> SChar) -> SChar -> SChar
forall a b. (a -> b) -> a -> b
$ SBool -> SChar -> SChar -> SChar
forall a. Mergeable a => SBool -> a -> a -> a
ite (SInteger
i SInteger -> SInteger -> SBool
forall a. OrdSymbolic a => a -> a -> SBool
.>= SInteger
10 SBool -> SBool -> SBool
.&& SInteger
i SInteger -> SInteger -> SBool
forall a. OrdSymbolic a => a -> a -> SBool
.<= SInteger
15) (SInteger -> SChar
chr (SInteger -> SInteger
forall a b.
(Integral a, HasKind a, Num a, SymVal a, HasKind b, Num b,
 SymVal b) =>
SBV a -> SBV b
sFromIntegral SInteger
i SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
+ SChar -> SInteger
ord (Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
'a') SInteger -> SInteger -> SInteger
forall a. Num a => a -> a -> a
- SInteger
10))
             (SChar -> SChar) -> SChar -> SChar
forall a b. (a -> b) -> a -> b
$ Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
' '

-- | Is this a control character? Control characters are essentially the non-printing characters.
isControl :: SChar -> SBool
isControl :: SChar -> SBool
isControl = (SChar -> SString -> SBool
`elem` SString
controls)
  where controls :: SString
controls = SString
"\NUL\SOH\STX\ETX\EOT\ENQ\ACK\a\b\t\n\v\f\r\SO\SI\DLE\DC1\DC2\DC3\DC4\NAK\SYN\ETB\CAN\EM\SUB\ESC\FS\GS\RS\US\DEL\128\129\130\131\132\133\134\135\136\137\138\139\140\141\142\143\144\145\146\147\148\149\150\151\152\153\154\155\156\157\158\159"

-- | Is this white-space? That is, one of "\t\n\v\f\r \160".
isSpace :: SChar -> SBool
isSpace :: SChar -> SBool
isSpace = (SChar -> SString -> SBool
`elem` SString
spaces)
  where spaces :: SString
spaces = SString
"\t\n\v\f\r \160"

-- | Is this a lower-case character?
--
-- >>> prove $ \c -> isUpper c .=> isLower (toLower c)
-- Q.E.D.
isLower :: SChar -> SBool
isLower :: SChar -> SBool
isLower = (SChar -> SString -> SBool
`elem` SString
lower)
  where lower :: SString
lower = SString
"abcdefghijklmnopqrstuvwxyz\181\223\224\225\226\227\228\229\230\231\232\233\234\235\236\237\238\239\240\241\242\243\244\245\246\248\249\250\251\252\253\254\255"

-- | Is this an upper-case character?
--
-- >>> prove $ \c -> sNot (isLower c .&& isUpper c)
-- Q.E.D.
isUpper :: SChar -> SBool
isUpper :: SChar -> SBool
isUpper = (SChar -> SString -> SBool
`elem` SString
upper)
  where upper :: SString
upper = SString
"ABCDEFGHIJKLMNOPQRSTUVWXYZ\192\193\194\195\196\197\198\199\200\201\202\203\204\205\206\207\208\209\210\211\212\213\214\216\217\218\219\220\221\222"

-- | Is this an alphabet character? That is lower-case, upper-case and title-case letters, plus letters of caseless scripts and modifiers letters.
isAlpha :: SChar -> SBool
isAlpha :: SChar -> SBool
isAlpha = (SChar -> SString -> SBool
`elem` SString
alpha)
  where alpha :: SString
alpha = SString
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz\170\181\186\192\193\194\195\196\197\198\199\200\201\202\203\204\205\206\207\208\209\210\211\212\213\214\216\217\218\219\220\221\222\223\224\225\226\227\228\229\230\231\232\233\234\235\236\237\238\239\240\241\242\243\244\245\246\248\249\250\251\252\253\254\255"

-- | Is this an 'isAlpha' or 'isNumber'.
--
-- >>> prove $ \c -> isAlphaNum c .<=> isAlpha c .|| isNumber c
-- Q.E.D.
isAlphaNum :: SChar -> SBool
isAlphaNum :: SChar -> SBool
isAlphaNum SChar
c = SChar -> SBool
isAlpha SChar
c SBool -> SBool -> SBool
.|| SChar -> SBool
isNumber SChar
c

-- | Is this a printable character? Essentially the complement of 'isControl', with one
-- exception. The Latin-1 character \173 is neither control nor printable. Go figure.
--
-- >>> prove $ \c -> c .== literal '\173' .|| isControl c .<=> sNot (isPrint c)
-- Q.E.D.
isPrint :: SChar -> SBool
isPrint :: SChar -> SBool
isPrint SChar
c = SChar
c SChar -> SChar -> SBool
forall a. EqSymbolic a => a -> a -> SBool
./= Char -> SChar
forall a. SymVal a => a -> SBV a
literal Char
'\173' SBool -> SBool -> SBool
.&& SBool -> SBool
sNot (SChar -> SBool
isControl SChar
c)

-- | Is this an ASCII digit, i.e., one of @0@..@9@. Note that this is a subset of 'isNumber'
--
-- >>> prove $ \c -> isDigit c .=> isNumber c
-- Q.E.D.
isDigit :: SChar -> SBool
isDigit :: SChar -> SBool
isDigit = (SChar -> SString -> SBool
`elem` SString
"0123456789")

-- | Is this an Octal digit, i.e., one of @0@..@7@.
--
-- >>> prove $ \c -> isOctDigit c .=> isDigit c
-- Q.E.D.
isOctDigit :: SChar -> SBool
isOctDigit :: SChar -> SBool
isOctDigit = (SChar -> SString -> SBool
`elem` SString
"01234567")

-- | Is this a Hex digit, i.e, one of @0@..@9@, @a@..@f@, @A@..@F@.
--
-- >>> prove $ \c -> isHexDigit c .=> isAlphaNum c
-- Q.E.D.
isHexDigit :: SChar -> SBool
isHexDigit :: SChar -> SBool
isHexDigit = (SChar -> SString -> SBool
`elem` SString
"0123456789abcdefABCDEF")

-- | Is this an alphabet character. Note that this function is equivalent to 'isAlpha'.
--
-- >>> prove $ \c -> isLetter c .<=> isAlpha c
-- Q.E.D.
isLetter :: SChar -> SBool
isLetter :: SChar -> SBool
isLetter = SChar -> SBool
isAlpha

-- | Is this a mark? Note that the Latin-1 subset doesn't have any marks; so this function
-- is simply constant false for the time being.
--
-- >>> prove $ sNot . isMark
-- Q.E.D.
isMark :: SChar -> SBool
isMark :: SChar -> SBool
isMark = SBool -> SChar -> SBool
forall a b. a -> b -> a
const SBool
sFalse

-- | Is this a number character? Note that this set contains not only the digits, but also
-- the codes for a few numeric looking characters like 1/2 etc. Use 'isDigit' for the digits @0@ through @9@.
isNumber :: SChar -> SBool
isNumber :: SChar -> SBool
isNumber = (SChar -> SString -> SBool
`elem` SString
"0123456789\178\179\185\188\189\190")

-- | Is this a punctuation mark?
isPunctuation :: SChar -> SBool
isPunctuation :: SChar -> SBool
isPunctuation = (SChar -> SString -> SBool
`elem` SString
"!\"#%&'()*,-./:;?@[\\]_{}\161\167\171\182\183\187\191")

-- | Is this a symbol?
isSymbol :: SChar -> SBool
isSymbol :: SChar -> SBool
isSymbol = (SChar -> SString -> SBool
`elem` SString
"$+<=>^`|~\162\163\164\165\166\168\169\172\174\175\176\177\180\184\215\247")

-- | Is this a separator?
--
-- >>> prove $ \c -> isSeparator c .=> isSpace c
-- Q.E.D.
isSeparator :: SChar -> SBool
isSeparator :: SChar -> SBool
isSeparator = (SChar -> SString -> SBool
`elem` SString
" \160")

-- | Is this an ASCII character, i.e., the first 128 characters.
isAscii :: SChar -> SBool
isAscii :: SChar -> SBool
isAscii SChar
c = SChar -> SInteger
ord SChar
c SInteger -> SInteger -> SBool
forall a. OrdSymbolic a => a -> a -> SBool
.< SInteger
128

-- | Is this a Latin1 character? Note that this function is always true since 'SChar' corresponds
-- precisely to Latin1 for the time being.
--
-- >>> prove isLatin1
-- Q.E.D.
isLatin1 :: SChar -> SBool
isLatin1 :: SChar -> SBool
isLatin1 = SBool -> SChar -> SBool
forall a b. a -> b -> a
const SBool
sTrue

-- | Is this an ASCII letter?
--
-- >>> prove $ \c -> isAsciiLetter c .<=> isAsciiUpper c .|| isAsciiLower c
-- Q.E.D.
isAsciiLetter :: SChar -> SBool
isAsciiLetter :: SChar -> SBool
isAsciiLetter SChar
c = SChar -> SBool
isAsciiUpper SChar
c SBool -> SBool -> SBool
.|| SChar -> SBool
isAsciiLower SChar
c

-- | Is this an ASCII Upper-case letter? i.e., @A@ thru @Z@
--
-- >>> prove $ \c -> isAsciiUpper c .<=> ord c .>= ord (literal 'A') .&& ord c .<= ord (literal 'Z')
-- Q.E.D.
-- >>> prove $ \c -> isAsciiUpper c .<=> isAscii c .&& isUpper c
-- Q.E.D.
isAsciiUpper :: SChar -> SBool
isAsciiUpper :: SChar -> SBool
isAsciiUpper = (SChar -> SString -> SBool
`elem` String -> SString
forall a. SymVal a => a -> SBV a
literal [Char
'A' .. Char
'Z'])

-- | Is this an ASCII Lower-case letter? i.e., @a@ thru @z@
--
-- >>> prove $ \c -> isAsciiLower c .<=> ord c .>= ord (literal 'a') .&& ord c .<= ord (literal 'z')
-- Q.E.D.
-- >>> prove $ \c -> isAsciiLower c .<=> isAscii c .&& isLower c
-- Q.E.D.
isAsciiLower :: SChar -> SBool
isAsciiLower :: SChar -> SBool
isAsciiLower = (SChar -> SString -> SBool
`elem` String -> SString
forall a. SymVal a => a -> SBV a
literal [Char
'a' .. Char
'z'])