{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE RankNTypes #-}

-- For functions that can fail for bytes outside the ASCII range, see
-- 'Data.Bytes.Ascii'. For functions that can inspect bytes outside ASCII, see
-- any of the modules for ASCII-compatible encodings (e.g. 'Data.Bytes.Utf8',
-- 'Data.Bytes.Latin1', and so on).

{- | This module contains functions which operate on supersets of 'Bytes' containing ASCII-encoded text.
That is, none of the functions here inspect bytes with a value greater than 127, and do not fail due to the presence of such bytes.
-}
module Data.Bytes.Text.AsciiExt
  ( -- * Line-Oriented IO
    hFoldLines
  , hForLines_

    -- ** Standard Handles
  , forLines_
  , foldLines

    -- * Text Manipulation
  , toLowerU
  ) where

import Control.Monad.ST (ST)
import Control.Monad.ST.Run (runByteArrayST)
import Data.Bytes.Types (Bytes (..))
import Data.Primitive (ByteArray)
import Data.Word (Word8)
import System.IO (Handle, hIsEOF, stdin)

import qualified Data.ByteString.Char8 as BC8
import qualified Data.Bytes.Pure as Bytes
import qualified Data.Primitive as PM

-- | `hForLines_` over `stdin`
forLines_ :: (Bytes -> IO a) -> IO ()
{-# INLINEABLE forLines_ #-}
forLines_ :: forall a. (Bytes -> IO a) -> IO ()
forLines_ = Handle -> (Bytes -> IO a) -> IO ()
forall a. Handle -> (Bytes -> IO a) -> IO ()
hForLines_ Handle
stdin

-- | `hFoldLines` over `stdin`
foldLines :: a -> (a -> Bytes -> IO a) -> IO a
{-# INLINEABLE foldLines #-}
foldLines :: forall a. a -> (a -> Bytes -> IO a) -> IO a
foldLines = Handle -> a -> (a -> Bytes -> IO a) -> IO a
forall a. Handle -> a -> (a -> Bytes -> IO a) -> IO a
hFoldLines Handle
stdin

{- | Perform an action on each line of the input, discarding results.
To maintain a running state, see 'hFoldLines'.

Lines are extracted with with 'BC8.hGetLine', which does not document its
detection algorithm. As of writing (bytestring v0.11.1.0), lines are
delimited by a single @\n@ character (UNIX-style, as all things should be).
-}
hForLines_ :: Handle -> (Bytes -> IO a) -> IO ()
hForLines_ :: forall a. Handle -> (Bytes -> IO a) -> IO ()
hForLines_ Handle
h Bytes -> IO a
body = IO ()
loop
 where
  loop :: IO ()
loop =
    Handle -> IO Bool
hIsEOF Handle
h IO Bool -> (Bool -> IO ()) -> IO ()
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
      Bool
False -> do
        Bytes
line <- ByteString -> Bytes
Bytes.fromByteString (ByteString -> Bytes) -> IO ByteString -> IO Bytes
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Handle -> IO ByteString
BC8.hGetLine Handle
h
        a
_ <- Bytes -> IO a
body Bytes
line
        IO ()
loop
      Bool
True -> () -> IO ()
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()

{- | Perform an action on each line of the input, threading state through the computation.
If you do not need to keep a state, see `hForLines_`.

Lines are extracted with with 'BC8.hGetLine', which does not document its
detection algorithm. As of writing (bytestring v0.11.1.0), lines are
delimited by a single @\n@ character (UNIX-style, as all things should be).
-}
hFoldLines :: Handle -> a -> (a -> Bytes -> IO a) -> IO a
hFoldLines :: forall a. Handle -> a -> (a -> Bytes -> IO a) -> IO a
hFoldLines Handle
h a
z a -> Bytes -> IO a
body = a -> IO a
loop a
z
 where
  loop :: a -> IO a
loop !a
x =
    Handle -> IO Bool
hIsEOF Handle
h IO Bool -> (Bool -> IO a) -> IO a
forall a b. IO a -> (a -> IO b) -> IO b
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
      Bool
False -> do
        Bytes
line <- ByteString -> Bytes
Bytes.fromByteString (ByteString -> Bytes) -> IO ByteString -> IO Bytes
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Handle -> IO ByteString
BC8.hGetLine Handle
h
        a
x' <- a -> Bytes -> IO a
body a
x Bytes
line
        a -> IO a
loop a
x'
      Bool
True -> a -> IO a
forall a. a -> IO a
forall (f :: * -> *) a. Applicative f => a -> f a
pure a
x

{- | /O(n)/ Convert ASCII letters to lowercase. This adds @0x20@ to bytes in the
range @[0x41,0x5A]@ (@A-Z@ ⇒ @a-z@) and leaves all other bytes alone.
Unconditionally copies the bytes.
-}
toLowerU :: Bytes -> ByteArray
toLowerU :: Bytes -> ByteArray
toLowerU (Bytes ByteArray
src Int
off0 Int
len0) =
  (forall s. ST s ByteArray) -> ByteArray
runByteArrayST ST s ByteArray
forall s. ST s ByteArray
action
 where
  action :: forall s. ST s ByteArray
  action :: forall s. ST s ByteArray
action = do
    MutableByteArray (PrimState (ST s))
dst <- Int -> ST s (MutableByteArray (PrimState (ST s)))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (MutableByteArray (PrimState m))
PM.newByteArray Int
len0
    let go :: Int -> Int -> t -> ST s ()
go !Int
off !Int
ix !t
len =
          if t
len t -> t -> Bool
forall a. Eq a => a -> a -> Bool
== t
0
            then () -> ST s ()
forall a. a -> ST s a
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
            else do
              let w :: Word8
w = ByteArray -> Int -> Word8
forall a. Prim a => ByteArray -> Int -> a
PM.indexByteArray ByteArray
src Int
off :: Word8
                  w' :: Word8
w' =
                    if Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0x41 Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
<= Word8
0x5A
                      then Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
+ Word8
32
                      else Word8
w
              MutableByteArray (PrimState (ST s)) -> Int -> Word8 -> ST s ()
forall a (m :: * -> *).
(Prim a, PrimMonad m) =>
MutableByteArray (PrimState m) -> Int -> a -> m ()
PM.writeByteArray MutableByteArray (PrimState (ST s))
dst Int
ix Word8
w'
              Int -> Int -> t -> ST s ()
go (Int
off Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Int
ix Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (t
len t -> t -> t
forall a. Num a => a -> a -> a
- t
1)
    Int -> Int -> Int -> ST s ()
forall {t}. (Eq t, Num t) => Int -> Int -> t -> ST s ()
go Int
off0 Int
0 Int
len0
    MutableByteArray (PrimState (ST s)) -> ST s ByteArray
forall (m :: * -> *).
PrimMonad m =>
MutableByteArray (PrimState m) -> m ByteArray
PM.unsafeFreezeByteArray MutableByteArray (PrimState (ST s))
dst