------------------------------------------------------------------------
-- |
-- Module           : What4.Utils.StringLiteral
-- Description      : Utility definitions for strings
-- Copyright        : (c) Galois, Inc 2019-2020
-- License          : BSD3
-- Maintainer       : Rob Dockins <rdockins@galois.com>
-- Stability        : provisional
------------------------------------------------------------------------

{-# LANGUAGE DataKinds #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE KindSignatures #-}
{-# LANGUAGE GADTs #-}

module What4.Utils.StringLiteral
( StringLiteral(..)
, stringLiteralInfo
, fromUnicodeLit
, fromChar8Lit
, fromChar16Lit
, stringLitEmpty
, stringLitLength
, stringLitNull
, stringLitBounds
, stringLitContains
, stringLitIsPrefixOf
, stringLitIsSuffixOf
, stringLitSubstring
, stringLitIndexOf
) where


import           Data.Kind
import           Data.Parameterized.Classes
import qualified Data.ByteString as BS
import           Data.String
import qualified Data.Text as T

import           What4.BaseTypes
import qualified What4.Utils.Word16String as WS


------------------------------------------------------------------------
-- String literals

data StringLiteral (si::StringInfo) :: Type where
  UnicodeLiteral :: !T.Text -> StringLiteral Unicode
  Char8Literal   :: !BS.ByteString -> StringLiteral Char8
  Char16Literal  :: !WS.Word16String -> StringLiteral Char16

stringLiteralInfo :: StringLiteral si -> StringInfoRepr si
stringLiteralInfo :: forall (si :: StringInfo). StringLiteral si -> StringInfoRepr si
stringLiteralInfo UnicodeLiteral{} = StringInfoRepr 'Unicode
UnicodeRepr
stringLiteralInfo Char16Literal{}  = StringInfoRepr 'Char16
Char16Repr
stringLiteralInfo Char8Literal{}   = StringInfoRepr 'Char8
Char8Repr

fromUnicodeLit :: StringLiteral Unicode -> T.Text
fromUnicodeLit :: StringLiteral 'Unicode -> Text
fromUnicodeLit (UnicodeLiteral Text
x) = Text
x

fromChar8Lit :: StringLiteral Char8 -> BS.ByteString
fromChar8Lit :: StringLiteral 'Char8 -> ByteString
fromChar8Lit (Char8Literal ByteString
x) = ByteString
x

fromChar16Lit :: StringLiteral Char16 -> WS.Word16String
fromChar16Lit :: StringLiteral 'Char16 -> Word16String
fromChar16Lit (Char16Literal Word16String
x) = Word16String
x

instance TestEquality StringLiteral where
  testEquality :: forall (a :: StringInfo) (b :: StringInfo).
StringLiteral a -> StringLiteral b -> Maybe (a :~: b)
testEquality (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) =
    if Text
x forall a. Eq a => a -> a -> Bool
== Text
y then forall a. a -> Maybe a
Just forall {k} (a :: k). a :~: a
Refl else forall a. Maybe a
Nothing
  testEquality (Char16Literal Word16String
x) (Char16Literal Word16String
y) =
    if Word16String
x forall a. Eq a => a -> a -> Bool
== Word16String
y then forall a. a -> Maybe a
Just forall {k} (a :: k). a :~: a
Refl else forall a. Maybe a
Nothing
  testEquality (Char8Literal ByteString
x) (Char8Literal ByteString
y) =
    if ByteString
x forall a. Eq a => a -> a -> Bool
== ByteString
y then forall a. a -> Maybe a
Just forall {k} (a :: k). a :~: a
Refl else forall a. Maybe a
Nothing

  testEquality StringLiteral a
_ StringLiteral b
_ = forall a. Maybe a
Nothing

instance Eq (StringLiteral si) where
  StringLiteral si
x == :: StringLiteral si -> StringLiteral si -> Bool
== StringLiteral si
y = forall a. Maybe a -> Bool
isJust (forall {k} (f :: k -> Type) (a :: k) (b :: k).
TestEquality f =>
f a -> f b -> Maybe (a :~: b)
testEquality StringLiteral si
x StringLiteral si
y)

instance OrdF StringLiteral where
  compareF :: forall (x :: StringInfo) (y :: StringInfo).
StringLiteral x -> StringLiteral y -> OrderingF x y
compareF (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) =
    case forall a. Ord a => a -> a -> Ordering
compare Text
x Text
y of
      Ordering
LT -> forall {k} (x :: k) (y :: k). OrderingF x y
LTF
      Ordering
EQ -> forall {k} (x :: k). OrderingF x x
EQF
      Ordering
GT -> forall {k} (x :: k) (y :: k). OrderingF x y
GTF
  compareF UnicodeLiteral{} StringLiteral y
_ = forall {k} (x :: k) (y :: k). OrderingF x y
LTF
  compareF StringLiteral x
_ UnicodeLiteral{} = forall {k} (x :: k) (y :: k). OrderingF x y
GTF

  compareF (Char16Literal Word16String
x) (Char16Literal Word16String
y) =
    case forall a. Ord a => a -> a -> Ordering
compare Word16String
x Word16String
y of
      Ordering
LT -> forall {k} (x :: k) (y :: k). OrderingF x y
LTF
      Ordering
EQ -> forall {k} (x :: k). OrderingF x x
EQF
      Ordering
GT -> forall {k} (x :: k) (y :: k). OrderingF x y
GTF
  compareF Char16Literal{} StringLiteral y
_ = forall {k} (x :: k) (y :: k). OrderingF x y
LTF
  compareF StringLiteral x
_ Char16Literal{} = forall {k} (x :: k) (y :: k). OrderingF x y
GTF

  compareF (Char8Literal ByteString
x) (Char8Literal ByteString
y) =
    case forall a. Ord a => a -> a -> Ordering
compare ByteString
x ByteString
y of
      Ordering
LT -> forall {k} (x :: k) (y :: k). OrderingF x y
LTF
      Ordering
EQ -> forall {k} (x :: k). OrderingF x x
EQF
      Ordering
GT -> forall {k} (x :: k) (y :: k). OrderingF x y
GTF

instance Ord (StringLiteral si) where
  compare :: StringLiteral si -> StringLiteral si -> Ordering
compare StringLiteral si
x StringLiteral si
y = forall {k} (x :: k) (y :: k). OrderingF x y -> Ordering
toOrdering (forall k (ktp :: k -> Type) (x :: k) (y :: k).
OrdF ktp =>
ktp x -> ktp y -> OrderingF x y
compareF StringLiteral si
x StringLiteral si
y)

instance ShowF StringLiteral where
  showF :: forall (tp :: StringInfo). StringLiteral tp -> String
showF (UnicodeLiteral Text
x) = forall a. Show a => a -> String
show Text
x
  showF (Char16Literal Word16String
x) = forall a. Show a => a -> String
show Word16String
x
  showF (Char8Literal ByteString
x) = forall a. Show a => a -> String
show ByteString
x

instance Show (StringLiteral si) where
  show :: StringLiteral si -> String
show = forall k (f :: k -> Type) (tp :: k). ShowF f => f tp -> String
showF


instance HashableF StringLiteral where
  hashWithSaltF :: forall (tp :: StringInfo). Int -> StringLiteral tp -> Int
hashWithSaltF Int
s (UnicodeLiteral Text
x) = forall a. Hashable a => Int -> a -> Int
hashWithSalt (forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
s (Int
1::Int)) Text
x
  hashWithSaltF Int
s (Char16Literal Word16String
x)  = forall a. Hashable a => Int -> a -> Int
hashWithSalt (forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
s (Int
2::Int)) Word16String
x
  hashWithSaltF Int
s (Char8Literal ByteString
x)   = forall a. Hashable a => Int -> a -> Int
hashWithSalt (forall a. Hashable a => Int -> a -> Int
hashWithSalt Int
s (Int
3::Int)) ByteString
x

instance Hashable (StringLiteral si) where
  hashWithSalt :: Int -> StringLiteral si -> Int
hashWithSalt = forall k (f :: k -> Type) (tp :: k).
HashableF f =>
Int -> f tp -> Int
hashWithSaltF

stringLitLength :: StringLiteral si -> Integer
stringLitLength :: forall (si :: StringInfo). StringLiteral si -> Integer
stringLitLength (UnicodeLiteral Text
x) = forall a. Integral a => a -> Integer
toInteger (Text -> Int
T.length Text
x)
stringLitLength (Char16Literal Word16String
x)  = forall a. Integral a => a -> Integer
toInteger (Word16String -> Int
WS.length Word16String
x)
stringLitLength (Char8Literal ByteString
x)   = forall a. Integral a => a -> Integer
toInteger (ByteString -> Int
BS.length ByteString
x)

stringLitEmpty :: StringInfoRepr si -> StringLiteral si
stringLitEmpty :: forall (si :: StringInfo). StringInfoRepr si -> StringLiteral si
stringLitEmpty StringInfoRepr si
UnicodeRepr = Text -> StringLiteral 'Unicode
UnicodeLiteral forall a. Monoid a => a
mempty
stringLitEmpty StringInfoRepr si
Char16Repr  = Word16String -> StringLiteral 'Char16
Char16Literal forall a. Monoid a => a
mempty
stringLitEmpty StringInfoRepr si
Char8Repr   = ByteString -> StringLiteral 'Char8
Char8Literal forall a. Monoid a => a
mempty

stringLitNull :: StringLiteral si -> Bool
stringLitNull :: forall (si :: StringInfo). StringLiteral si -> Bool
stringLitNull (UnicodeLiteral Text
x) = Text -> Bool
T.null Text
x
stringLitNull (Char16Literal Word16String
x)  = Word16String -> Bool
WS.null Word16String
x
stringLitNull (Char8Literal ByteString
x)   = ByteString -> Bool
BS.null ByteString
x

stringLitContains :: StringLiteral si -> StringLiteral si -> Bool
stringLitContains :: forall (si :: StringInfo).
StringLiteral si -> StringLiteral si -> Bool
stringLitContains (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) = Text -> Text -> Bool
T.isInfixOf Text
y Text
x
stringLitContains (Char16Literal Word16String
x) (Char16Literal Word16String
y) = Word16String -> Word16String -> Bool
WS.isInfixOf Word16String
y Word16String
x
stringLitContains (Char8Literal ByteString
x) (Char8Literal ByteString
y) = ByteString -> ByteString -> Bool
BS.isInfixOf ByteString
y ByteString
x

stringLitIsPrefixOf :: StringLiteral si -> StringLiteral si -> Bool
stringLitIsPrefixOf :: forall (si :: StringInfo).
StringLiteral si -> StringLiteral si -> Bool
stringLitIsPrefixOf (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) = Text -> Text -> Bool
T.isPrefixOf Text
x Text
y
stringLitIsPrefixOf (Char16Literal Word16String
x) (Char16Literal Word16String
y) = Word16String -> Word16String -> Bool
WS.isPrefixOf Word16String
x Word16String
y
stringLitIsPrefixOf (Char8Literal ByteString
x) (Char8Literal ByteString
y) = ByteString -> ByteString -> Bool
BS.isPrefixOf ByteString
x ByteString
y

stringLitIsSuffixOf :: StringLiteral si -> StringLiteral si -> Bool
stringLitIsSuffixOf :: forall (si :: StringInfo).
StringLiteral si -> StringLiteral si -> Bool
stringLitIsSuffixOf (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) = Text -> Text -> Bool
T.isSuffixOf Text
x Text
y
stringLitIsSuffixOf (Char16Literal Word16String
x) (Char16Literal Word16String
y) = Word16String -> Word16String -> Bool
WS.isSuffixOf Word16String
x Word16String
y
stringLitIsSuffixOf (Char8Literal ByteString
x) (Char8Literal ByteString
y) = ByteString -> ByteString -> Bool
BS.isSuffixOf ByteString
x ByteString
y

stringLitSubstring :: StringLiteral si -> Integer -> Integer -> StringLiteral si
stringLitSubstring :: forall (si :: StringInfo).
StringLiteral si -> Integer -> Integer -> StringLiteral si
stringLitSubstring (UnicodeLiteral Text
x) Integer
len Integer
off
  | Integer
off forall a. Ord a => a -> a -> Bool
< Integer
0 Bool -> Bool -> Bool
|| Integer
len forall a. Ord a => a -> a -> Bool
< Integer
0 = Text -> StringLiteral 'Unicode
UnicodeLiteral Text
T.empty
  | Bool
otherwise = Text -> StringLiteral 'Unicode
UnicodeLiteral forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.take (forall a. Num a => Integer -> a
fromInteger Integer
len)  forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
T.drop (forall a. Num a => Integer -> a
fromInteger Integer
off) Text
x
stringLitSubstring (Char16Literal Word16String
x) Integer
len Integer
off
  | Integer
off forall a. Ord a => a -> a -> Bool
< Integer
0 Bool -> Bool -> Bool
|| Integer
len forall a. Ord a => a -> a -> Bool
< Integer
0 = Word16String -> StringLiteral 'Char16
Char16Literal Word16String
WS.empty
  | Bool
otherwise = Word16String -> StringLiteral 'Char16
Char16Literal forall a b. (a -> b) -> a -> b
$ Int -> Word16String -> Word16String
WS.take (forall a. Num a => Integer -> a
fromInteger Integer
len) forall a b. (a -> b) -> a -> b
$ Int -> Word16String -> Word16String
WS.drop (forall a. Num a => Integer -> a
fromInteger Integer
off) Word16String
x
stringLitSubstring (Char8Literal ByteString
x) Integer
len Integer
off
  | Integer
off forall a. Ord a => a -> a -> Bool
< Integer
0 Bool -> Bool -> Bool
|| Integer
len forall a. Ord a => a -> a -> Bool
< Integer
0 = ByteString -> StringLiteral 'Char8
Char8Literal ByteString
BS.empty
  | Bool
otherwise = ByteString -> StringLiteral 'Char8
Char8Literal forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.take (forall a b. (Integral a, Num b) => a -> b
fromIntegral Integer
len) forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
BS.drop (forall a. Num a => Integer -> a
fromInteger Integer
off) ByteString
x

-- | Index of first occurrence of second string in first one starting at
--   the position specified by the third argument.
--   @stringLitIndexOf s t k@, with @0 <= k <= |s|@ is the position of the first
--   occurrence of @t@ in @s@ at or after position @k@, if any.
--   Otherwise, it is @-1@. Note that the result is @k@ whenever @k@ is within
--   the range @[0, |s|]@ and @t@ is empty.
stringLitIndexOf :: StringLiteral si -> StringLiteral si -> Integer -> Integer
stringLitIndexOf :: forall (si :: StringInfo).
StringLiteral si -> StringLiteral si -> Integer -> Integer
stringLitIndexOf (UnicodeLiteral Text
x) (UnicodeLiteral Text
y) Integer
k
   | Integer
k forall a. Ord a => a -> a -> Bool
< Integer
0    = -Integer
1
   | Integer
k forall a. Ord a => a -> a -> Bool
> forall a. Integral a => a -> Integer
toInteger (Text -> Int
T.length Text
x) = -Integer
1
   | Text -> Bool
T.null Text
y = Integer
k
   | Text -> Bool
T.null Text
b = -Integer
1
   | Bool
otherwise = forall a. Integral a => a -> Integer
toInteger (Text -> Int
T.length Text
a) forall a. Num a => a -> a -> a
+ Integer
k
  where (Text
a,Text
b) = Text -> Text -> (Text, Text)
T.breakOn Text
y (Int -> Text -> Text
T.drop (forall a. Num a => Integer -> a
fromInteger Integer
k) Text
x)

stringLitIndexOf (Char16Literal Word16String
x) (Char16Literal Word16String
y) Integer
k
  | Integer
k forall a. Ord a => a -> a -> Bool
< Integer
0 = -Integer
1
  | Integer
k forall a. Ord a => a -> a -> Bool
> forall a. Integral a => a -> Integer
toInteger (Word16String -> Int
WS.length Word16String
x) = -Integer
1
  | Word16String -> Bool
WS.null Word16String
y = Integer
k
  | Bool
otherwise =
      case Word16String -> Word16String -> Maybe Int
WS.findSubstring Word16String
y (Int -> Word16String -> Word16String
WS.drop (forall a. Num a => Integer -> a
fromInteger Integer
k) Word16String
x) of
        Maybe Int
Nothing -> -Integer
1
        Just Int
n  -> forall a. Integral a => a -> Integer
toInteger Int
n forall a. Num a => a -> a -> a
+ Integer
k

stringLitIndexOf (Char8Literal ByteString
x) (Char8Literal ByteString
y) Integer
k
  | Integer
k forall a. Ord a => a -> a -> Bool
< Integer
0 = -Integer
1
  | Integer
k forall a. Ord a => a -> a -> Bool
> forall a. Integral a => a -> Integer
toInteger (ByteString -> Int
BS.length ByteString
x) = -Integer
1
  | ByteString -> Bool
BS.null ByteString
y = Integer
k
  | Bool
otherwise =
      case ByteString -> ByteString -> Maybe Int
bsFindSubstring ByteString
y (Int -> ByteString -> ByteString
BS.drop (forall a. Num a => Integer -> a
fromInteger Integer
k) ByteString
x) of
        Maybe Int
Nothing -> -Integer
1
        Just Int
n  -> forall a. Integral a => a -> Integer
toInteger Int
n forall a. Num a => a -> a -> a
+ Integer
k

-- | Get the first index of a substring in another string,
--   or 'Nothing' if the string is not found.
--
--   Copy/pasted from the old `bytestring` implementation because it was
--   deprecated/removed for some reason.
bsFindSubstring :: BS.ByteString -- ^ String to search for.
              -> BS.ByteString -- ^ String to seach in.
              -> Maybe Int
bsFindSubstring :: ByteString -> ByteString -> Maybe Int
bsFindSubstring ByteString
pat ByteString
src
    | ByteString -> Bool
BS.null ByteString
pat Bool -> Bool -> Bool
&& ByteString -> Bool
BS.null ByteString
src = forall a. a -> Maybe a
Just Int
0
    | ByteString -> Bool
BS.null ByteString
b = forall a. Maybe a
Nothing
    | Bool
otherwise = forall a. a -> Maybe a
Just (ByteString -> Int
BS.length ByteString
a)
  where (ByteString
a, ByteString
b) = ByteString -> ByteString -> (ByteString, ByteString)
BS.breakSubstring ByteString
pat ByteString
src

stringLitBounds :: StringLiteral si -> Maybe (Int, Int)
stringLitBounds :: forall (si :: StringInfo). StringLiteral si -> Maybe (Int, Int)
stringLitBounds StringLiteral si
si =
  case StringLiteral si
si of
    UnicodeLiteral Text
t -> forall a. (a -> Char -> a) -> a -> Text -> a
T.foldl' forall a. Enum a => Maybe (Int, Int) -> a -> Maybe (Int, Int)
f forall a. Maybe a
Nothing Text
t
    Char16Literal Word16String
ws -> forall a. (a -> Word16 -> a) -> a -> Word16String -> a
WS.foldl' forall a. Enum a => Maybe (Int, Int) -> a -> Maybe (Int, Int)
f forall a. Maybe a
Nothing Word16String
ws
    Char8Literal ByteString
bs  -> forall a. (a -> Word8 -> a) -> a -> ByteString -> a
BS.foldl' forall a. Enum a => Maybe (Int, Int) -> a -> Maybe (Int, Int)
f forall a. Maybe a
Nothing ByteString
bs

 where
 f :: Enum a =>  Maybe (Int,Int) -> a -> Maybe (Int, Int)
 f :: forall a. Enum a => Maybe (Int, Int) -> a -> Maybe (Int, Int)
f Maybe (Int, Int)
Nothing a
c = forall a. a -> Maybe a
Just (forall a. Enum a => a -> Int
fromEnum a
c, forall a. Enum a => a -> Int
fromEnum a
c)
 f (Just (Int
lo, Int
hi)) a
c = Int
lo' seq :: forall a b. a -> b -> b
`seq` Int
hi' seq :: forall a b. a -> b -> b
`seq` forall a. a -> Maybe a
Just (Int
lo',Int
hi')
    where
    lo' :: Int
lo' = forall a. Ord a => a -> a -> a
min Int
lo (forall a. Enum a => a -> Int
fromEnum a
c)
    hi' :: Int
hi' = forall a. Ord a => a -> a -> a
max Int
hi (forall a. Enum a => a -> Int
fromEnum a
c)


instance Semigroup (StringLiteral si) where
  UnicodeLiteral Text
x <> :: StringLiteral si -> StringLiteral si -> StringLiteral si
<> UnicodeLiteral Text
y = Text -> StringLiteral 'Unicode
UnicodeLiteral (Text
x forall a. Semigroup a => a -> a -> a
<> Text
y)
  Char16Literal Word16String
x  <> Char16Literal Word16String
y  = Word16String -> StringLiteral 'Char16
Char16Literal (Word16String
x forall a. Semigroup a => a -> a -> a
<> Word16String
y)
  Char8Literal ByteString
x   <> Char8Literal ByteString
y   = ByteString -> StringLiteral 'Char8
Char8Literal (ByteString
x forall a. Semigroup a => a -> a -> a
<> ByteString
y)

instance IsString (StringLiteral Unicode) where
  fromString :: String -> StringLiteral 'Unicode
fromString = Text -> StringLiteral 'Unicode
UnicodeLiteral forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Text
T.pack