{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE TypeApplications #-}

-- | This module treats 'Bytes' data as holding ASCII text. Providing bytes
-- outside the ASCII range (@U+0000@ -- @U+007F@) may cause a failure or
-- unspecified results, but such bytes will never be inspected.
--
-- For functions that can operate on ASCII-compatible encodings, see
-- 'Data.Bytes.Text.AsciiExt'.
module Data.Bytes.Text.Ascii
  ( fromString
  , decodeDecWord
  , toShortText
  , toShortTextU
#if MIN_VERSION_text(2,0,0)
  , toText
#endif
  ) where

import Data.ByteString.Short.Internal (ShortByteString(SBS))
import Data.Bytes.Text.Latin1 (decodeDecWord)
import Data.Bytes.Types (Bytes(Bytes))
import Data.Char (ord)
import Data.Primitive (ByteArray)
import Data.Text (Text)
import Data.Text.Short (ShortText)
import Data.Word (Word8)

import qualified Data.Bytes.Pure as Bytes
import qualified Data.Primitive as PM
import qualified Data.Text.Array as A
import qualified Data.Text.Internal as I
import qualified Data.Text.Short.Unsafe as TS
import qualified GHC.Exts as Exts

-- | Convert a 'String' consisting of only characters in the ASCII block
-- to a byte sequence. Any character with a codepoint above @U+007F@ is
-- replaced by @U+0000@.
fromString :: String -> Bytes
fromString :: String -> Bytes
fromString = ByteArray -> Bytes
Bytes.fromByteArray
  forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall l. IsList l => [Item l] -> l
Exts.fromList
  forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (\Char
c -> let i :: Int
i = Char -> Int
ord Char
c in if Int
i forall a. Ord a => a -> a -> Bool
< Int
128 then forall a b. (Integral a, Num b) => a -> b
fromIntegral @Int @Word8 Int
i else Word8
0)

-- TODO presumably also fromText and fromShortText

toShortText :: Bytes -> Maybe ShortText
{-# inline toShortText #-}
toShortText :: Bytes -> Maybe ShortText
toShortText !Bytes
b = case forall a. (Word8 -> a -> a) -> a -> Bytes -> a
Bytes.foldr (\Word8
w Bool
acc -> Word8
w forall a. Ord a => a -> a -> Bool
< Word8
128 Bool -> Bool -> Bool
&& Bool
acc) Bool
True Bytes
b of
  Bool
True -> forall a. a -> Maybe a
Just (ShortByteString -> ShortText
TS.fromShortByteStringUnsafe (Bytes -> ShortByteString
Bytes.toShortByteString Bytes
b))
  Bool
False -> forall a. Maybe a
Nothing

toShortTextU :: ByteArray -> Maybe ShortText
{-# inline toShortTextU #-}
toShortTextU :: ByteArray -> Maybe ShortText
toShortTextU !ByteArray
b = case forall a. (Word8 -> a -> a) -> a -> Bytes -> a
Bytes.foldr (\Word8
w Bool
acc -> Word8
w forall a. Ord a => a -> a -> Bool
< Word8
128 Bool -> Bool -> Bool
&& Bool
acc) Bool
True (ByteArray -> Bytes
Bytes.fromByteArray ByteArray
b) of
  Bool
True -> forall a. a -> Maybe a
Just (ShortByteString -> ShortText
TS.fromShortByteStringUnsafe (case ByteArray
b of {PM.ByteArray ByteArray#
x -> ByteArray# -> ShortByteString
SBS ByteArray#
x}))
  Bool
False -> forall a. Maybe a
Nothing

#if MIN_VERSION_text(2,0,0)
-- | Interpret byte sequence as ASCII codepoints.
-- Only available when building with @text-2.0@ and newer.
-- Returns 'Nothing' if any of the bytes are outside of the
-- range @0x00-0x7F@
toText :: Bytes -> Maybe Text
{-# inline toText #-}
toText !b@(Bytes (PM.ByteArray arr) off len) = case Bytes.foldr (\w acc -> w < 128 && acc) True b of
  True -> Just (I.Text (A.ByteArray arr) off len)
  False -> Nothing
#endif