{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE BinaryLiterals #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE NumericUnderscores #-}
{-# LANGUAGE TypeApplications #-}

-- | This module treats 'Bytes' data as holding ASCII text. Providing bytes
-- outside the ASCII range (@U+0000@ -- @U+007F@) may cause a failure or
-- unspecified results, but such bytes will never be inspected.
--
-- For functions that can operate on ASCII-compatible encodings, see
-- 'Data.Bytes.Text.AsciiExt'.
module Data.Bytes.Text.Ascii
  ( fromString
  , decodeDecWord
  , equalsCStringCaseInsensitive
  , toShortText
  , toShortTextU
#if MIN_VERSION_text(2,0,0)
  , toText
#endif
  ) where

import Data.Bits ((.&.))
import Data.ByteString.Short.Internal (ShortByteString(SBS))
import Data.Bytes.Text.Latin1 (decodeDecWord)
import Data.Bytes.Types (Bytes(Bytes))
import Data.Char (ord)
import Data.Primitive (ByteArray)
import Data.Text (Text)
import Data.Text.Short (ShortText)
import Data.Word (Word8)
import Foreign.C.String (CString)
import Foreign.Ptr (Ptr,plusPtr,castPtr)

import qualified Data.Bytes.Pure as Bytes
import qualified Data.Primitive as PM
import qualified Data.Primitive.Ptr as PM
import qualified Data.Text.Array as A
import qualified Data.Text.Internal as I
import qualified Data.Text.Short.Unsafe as TS
import qualified GHC.Exts as Exts

-- | Convert a 'String' consisting of only characters in the ASCII block
-- to a byte sequence. Any character with a codepoint above @U+007F@ is
-- replaced by @U+0000@.
fromString :: String -> Bytes
fromString :: String -> Bytes
fromString = ByteArray -> Bytes
Bytes.fromByteArray
  forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall l. IsList l => [Item l] -> l
Exts.fromList
  forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (\Char
c -> let i :: Int
i = Char -> Int
ord Char
c in if Int
i forall a. Ord a => a -> a -> Bool
< Int
128 then forall a b. (Integral a, Num b) => a -> b
fromIntegral @Int @Word8 Int
i else Word8
0)

-- TODO presumably also fromText and fromShortText

toShortText :: Bytes -> Maybe ShortText
{-# inline toShortText #-}
toShortText :: Bytes -> Maybe ShortText
toShortText !Bytes
b = case forall a. (Word8 -> a -> a) -> a -> Bytes -> a
Bytes.foldr (\Word8
w Bool
acc -> Word8
w forall a. Ord a => a -> a -> Bool
< Word8
128 Bool -> Bool -> Bool
&& Bool
acc) Bool
True Bytes
b of
  Bool
True -> forall a. a -> Maybe a
Just (ShortByteString -> ShortText
TS.fromShortByteStringUnsafe (Bytes -> ShortByteString
Bytes.toShortByteString Bytes
b))
  Bool
False -> forall a. Maybe a
Nothing

toShortTextU :: ByteArray -> Maybe ShortText
{-# inline toShortTextU #-}
toShortTextU :: ByteArray -> Maybe ShortText
toShortTextU !ByteArray
b = case forall a. (Word8 -> a -> a) -> a -> Bytes -> a
Bytes.foldr (\Word8
w Bool
acc -> Word8
w forall a. Ord a => a -> a -> Bool
< Word8
128 Bool -> Bool -> Bool
&& Bool
acc) Bool
True (ByteArray -> Bytes
Bytes.fromByteArray ByteArray
b) of
  Bool
True -> forall a. a -> Maybe a
Just (ShortByteString -> ShortText
TS.fromShortByteStringUnsafe (case ByteArray
b of {PM.ByteArray ByteArray#
x -> ByteArray# -> ShortByteString
SBS ByteArray#
x}))
  Bool
False -> forall a. Maybe a
Nothing

#if MIN_VERSION_text(2,0,0)
-- | Interpret byte sequence as ASCII codepoints.
-- Only available when building with @text-2.0@ and newer.
-- Returns 'Nothing' if any of the bytes are outside of the
-- range @0x00-0x7F@
toText :: Bytes -> Maybe Text
{-# inline toText #-}
toText !b@(Bytes (PM.ByteArray arr) off len) = case Bytes.foldr (\w acc -> w < 128 && acc) True b of
  True -> Just (I.Text (A.ByteArray arr) off len)
  False -> Nothing
#endif

-- | Is the byte sequence equal to the @NUL@-terminated C String?
-- The C string must be a constant.
equalsCStringCaseInsensitive :: CString -> Bytes -> Bool
{-# inline equalsCStringCaseInsensitive #-}
equalsCStringCaseInsensitive :: CString -> Bytes -> Bool
equalsCStringCaseInsensitive !CString
ptr0 (Bytes ByteArray
arr Int
off0 Int
len0) = forall {t}. (Eq t, Num t) => Ptr Word8 -> Int -> t -> Bool
go (forall a b. Ptr a -> Ptr b
castPtr CString
ptr0 :: Ptr Word8) Int
off0 Int
len0 where
  go :: Ptr Word8 -> Int -> t -> Bool
go !Ptr Word8
ptr !Int
off !t
len = case t
len of
    t
0 -> forall a. Prim a => Ptr a -> Int -> a
PM.indexOffPtr Ptr Word8
ptr Int
0 forall a. Eq a => a -> a -> Bool
== (Word8
0 :: Word8)
    t
_ -> case forall a. Prim a => Ptr a -> Int -> a
PM.indexOffPtr Ptr Word8
ptr Int
0 of
      Word8
0 -> Bool
False
      Word8
c ->
        (Word8
c forall a. Bits a => a -> a -> a
.&. Word8
0b1101_1111) forall a. Eq a => a -> a -> Bool
== (forall a. Prim a => ByteArray -> Int -> a
PM.indexByteArray ByteArray
arr Int
off forall a. Bits a => a -> a -> a
.&. Word8
0b1101_1111)
        Bool -> Bool -> Bool
&&
        Ptr Word8 -> Int -> t -> Bool
go (forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
ptr Int
1) (Int
off forall a. Num a => a -> a -> a
+ Int
1) (t
len forall a. Num a => a -> a -> a
- t
1)