{-# LANGUAGE CPP, BangPatterns #-}
#if __GLASGOW_HASKELL__ >= 701
{-# LANGUAGE Trustworthy #-}
#endif
--
-- |
-- Module      :  Data.ByteString.Lazy.UTF8
-- Copyright   :  (c) Iavor S. Diatchki 2009
-- License     :  BSD3-style (see LICENSE)
--
-- Maintainer  :  emertens@galois.com
-- Stability   :  experimental
-- Portability :  portable
--
--   This module provides fast, validated encoding and decoding functions
--   between 'ByteString's and 'String's. It does not exactly match the
--   output of the Codec.Binary.UTF8.String output for invalid encodings
--   as the number of replacement characters is sometimes longer.
module Data.ByteString.Lazy.UTF8
  ( B.ByteString
  , decode
  , replacement_char
  , uncons
  , splitAt
  , take
  , drop
  , span
  , break
  , fromString
  , toString
  , foldl
  , foldr
  , length
  , lines
  , lines'
  ) where

import Data.Bits
import Data.Word
import Data.Int
import Foreign.Storable
import Foreign.Ptr
import Foreign.ForeignPtr
import Data.Char        (ord)
import Control.Exception        (assert)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Internal as B
import qualified Data.ByteString.Internal as S
import Prelude hiding (take,drop,splitAt,span,break,foldr,foldl,length,lines)

import Codec.Binary.UTF8.Generic (buncons)

#if MIN_VERSION_base(4,4,0)
import System.IO.Unsafe (unsafeDupablePerformIO)
#else
import GHC.IO (unsafeDupablePerformIO)
#endif

---------------------------------------------------------------------
-- ENCODING

-- | Converts a Haskell string into a UTF8 encoded bytestring.
fromString :: String -> B.ByteString
fromString :: String -> ByteString
fromString []  = ByteString
B.empty
fromString String
xs0 = Int -> String -> ByteString
packChunks Int
32 String
xs0
  where
    packChunks :: Int -> String -> ByteString
packChunks Int
n String
xs = case Int -> String -> (ByteString, String)
packUptoLenBytes Int
n String
xs of
        (ByteString
bs, [] ) -> ByteString -> ByteString -> ByteString
B.chunk ByteString
bs ByteString
B.Empty
        (ByteString
bs, String
xs') -> ByteString -> ByteString -> ByteString
B.Chunk ByteString
bs (Int -> String -> ByteString
packChunks (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
2) Int
B.smallChunkSize) String
xs')

    packUptoLenBytes :: Int -> String -> (S.ByteString, String)
    packUptoLenBytes :: Int -> String -> (ByteString, String)
packUptoLenBytes Int
len String
xs = Int -> (Ptr Word8 -> IO (Int, String)) -> (ByteString, String)
forall a. Int -> (Ptr Word8 -> IO (Int, a)) -> (ByteString, a)
unsafeCreateUptoN' Int
len ((Ptr Word8 -> IO (Int, String)) -> (ByteString, String))
-> (Ptr Word8 -> IO (Int, String)) -> (ByteString, String)
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr -> do
        (Ptr Word8
end, String
xs') <- Ptr Word8 -> Ptr Word8 -> String -> IO (Ptr Word8, String)
go Ptr Word8
ptr (Ptr Word8
ptr Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` (Int
lenInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
4)) String
xs
        (Int, String) -> IO (Int, String)
forall (m :: * -> *) a. Monad m => a -> m a
return (Ptr Word8
end Ptr Word8 -> Ptr Word8 -> Int
forall a b. Ptr a -> Ptr b -> Int
`minusPtr` Ptr Word8
ptr, String
xs')

    -- end is the last position at which you can write a whole 4 byte sequence safely
    go :: Ptr Word8 -> Ptr Word8 -> String -> IO (Ptr Word8, String)
    go :: Ptr Word8 -> Ptr Word8 -> String -> IO (Ptr Word8, String)
go !Ptr Word8
ptr !Ptr Word8
end String
xs | Ptr Word8
ptr Ptr Word8 -> Ptr Word8 -> Bool
forall a. Ord a => a -> a -> Bool
> Ptr Word8
end = (Ptr Word8, String) -> IO (Ptr Word8, String)
forall (m :: * -> *) a. Monad m => a -> m a
return (Ptr Word8
ptr, String
xs)
    go !Ptr Word8
ptr !Ptr Word8
_   [] = (Ptr Word8, String) -> IO (Ptr Word8, String)
forall (m :: * -> *) a. Monad m => a -> m a
return (Ptr Word8
ptr, [])
    go !Ptr Word8
ptr !Ptr Word8
end (Char
x:String
xs)
        | Char
x Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'\x7f' = Ptr Word8 -> Word8 -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr Word8
ptr (Char -> Word8
S.c2w Char
x) IO () -> IO (Ptr Word8, String) -> IO (Ptr Word8, String)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Ptr Word8 -> Ptr Word8 -> String -> IO (Ptr Word8, String)
go (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
ptr Int
1) Ptr Word8
end String
xs
        | Bool
otherwise = case Char -> Int
ord Char
x of
            Int
oc | Int
oc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0x7ff -> do
                    Ptr Word8 -> Word8 -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr Word8
ptr (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0xc0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
oc Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6)
                    Ptr Word8 -> Int -> Word8 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
ptr Int
1 (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
oc Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f
                    Ptr Word8 -> Ptr Word8 -> String -> IO (Ptr Word8, String)
go (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
ptr Int
2) Ptr Word8
end String
xs
               | Int
oc Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0xffff -> do
                    Ptr Word8 -> Word8 -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr Word8
ptr (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0xe0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
oc Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
12)
                    Ptr Word8 -> Int -> Word8 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
ptr Int
1 (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ((Int
oc Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
                    Ptr Word8 -> Int -> Word8 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
ptr Int
2 (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
oc Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f
                    Ptr Word8 -> Ptr Word8 -> String -> IO (Ptr Word8, String)
go (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
ptr Int
3) Ptr Word8
end String
xs
               | Bool
otherwise -> do
                    Ptr Word8 -> Word8 -> IO ()
forall a. Storable a => Ptr a -> a -> IO ()
poke Ptr Word8
ptr (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0xf0 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ (Int
oc Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
18)
                    Ptr Word8 -> Int -> Word8 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
ptr Int
1 (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ((Int
oc Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
12) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
                    Ptr Word8 -> Int -> Word8 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
ptr Int
2 (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ ((Int
oc Int -> Int -> Int
forall a. Bits a => a -> Int -> a
`shiftR` Int
6) Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f)
                    Ptr Word8 -> Int -> Word8 -> IO ()
forall a. Storable a => Ptr a -> Int -> a -> IO ()
pokeElemOff Ptr Word8
ptr Int
3 (Word8 -> IO ()) -> Word8 -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Word8
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Word8) -> Int -> Word8
forall a b. (a -> b) -> a -> b
$ Int
0x80 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
oc Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
0x3f
                    Ptr Word8 -> Ptr Word8 -> String -> IO (Ptr Word8, String)
go (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
ptr Int
4) Ptr Word8
end String
xs


---------------------------------------------------------------------
-- DECODING

-- | Convert a UTF8 encoded bytestring into a Haskell string.
-- Invalid characters are replaced with @\'\\0xFFFD\'@.
toString :: B.ByteString -> String
toString :: ByteString -> String
toString ByteString
bs = (Char -> String -> String) -> String -> ByteString -> String
forall a. (Char -> a -> a) -> a -> ByteString -> a
foldr (:) [] ByteString
bs

-- | This character is used to mark errors in a UTF8 encoded string.
replacement_char :: Char
replacement_char :: Char
replacement_char = Char
'\xfffd'

-- | Try to extract a character from a byte string.
-- Returns 'Nothing' if there are no more bytes in the byte string.
-- Otherwise, it returns a decoded character and the number of
-- bytes used in its representation.
-- Errors are replaced by character @\'\\0xFFFD\'@.

-- XXX: Should we combine sequences of errors into a single replacement
-- character?
decode :: B.ByteString -> Maybe (Char,Int64)
decode :: ByteString -> Maybe (Char, Int64)
decode ByteString
bs = do (Word8
c,ByteString
cs) <- ByteString -> Maybe (Word8, ByteString)
forall b s. UTF8Bytes b s => b -> Maybe (Word8, b)
buncons ByteString
bs
               (Char, Int64) -> Maybe (Char, Int64)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int -> ByteString -> (Char, Int64)
choose (Word8 -> Int
forall a. Enum a => a -> Int
fromEnum Word8
c) ByteString
cs)
  where
  choose :: Int -> B.ByteString -> (Char, Int64)
  choose :: Int -> ByteString -> (Char, Int64)
choose Int
c ByteString
cs
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0x80  = (Int -> Char
forall a. Enum a => Int -> a
toEnum (Int -> Char) -> Int -> Char
forall a b. (a -> b) -> a -> b
$ Int -> Int
forall a. Enum a => a -> Int
fromEnum Int
c, Int64
1)
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xc0  = (Char
replacement_char, Int64
1)
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xe0  = Int -> ByteString -> (Char, Int64)
bytes2 (Int -> Int -> Int
mask Int
c Int
0x1f) ByteString
cs
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xf0  = Int -> ByteString -> (Char, Int64)
bytes3 (Int -> Int -> Int
mask Int
c Int
0x0f) ByteString
cs
    | Int
c Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xf8  = Int -> ByteString -> (Char, Int64)
bytes4 (Int -> Int -> Int
mask Int
c Int
0x07) ByteString
cs
    | Bool
otherwise = (Char
replacement_char, Int64
1)

  mask :: Int -> Int -> Int
  mask :: Int -> Int -> Int
mask Int
c Int
m = Int -> Int
forall a. Enum a => a -> Int
fromEnum (Int
c Int -> Int -> Int
forall a. Bits a => a -> a -> a
.&. Int
m)

  combine :: Int -> Word8 -> Int
  combine :: Int -> Word8 -> Int
combine Int
acc Word8
r = Int -> Int -> Int
forall a. Bits a => a -> Int -> a
shiftL Int
acc Int
6 Int -> Int -> Int
forall a. Bits a => a -> a -> a
.|. Word8 -> Int
forall a. Enum a => a -> Int
fromEnum (Word8
r Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x3f)

  follower :: Int -> Word8 -> Maybe Int
  follower :: Int -> Word8 -> Maybe Int
follower Int
acc Word8
r | Word8
r Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0xc0 Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x80 = Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Word8 -> Int
combine Int
acc Word8
r)
  follower Int
_ Word8
_                        = Maybe Int
forall a. Maybe a
Nothing

  {-# INLINE get_follower #-}
  get_follower :: Int -> B.ByteString -> Maybe (Int, B.ByteString)
  get_follower :: Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
acc ByteString
cs = do (Word8
x,ByteString
xs) <- ByteString -> Maybe (Word8, ByteString)
forall b s. UTF8Bytes b s => b -> Maybe (Word8, b)
buncons ByteString
cs
                           Int
acc1 <- Int -> Word8 -> Maybe Int
follower Int
acc Word8
x
                           (Int, ByteString) -> Maybe (Int, ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return (Int
acc1,ByteString
xs)

  bytes2 :: Int -> B.ByteString -> (Char, Int64)
  bytes2 :: Int -> ByteString -> (Char, Int64)
bytes2 Int
c ByteString
cs = case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
c ByteString
cs of
                  Just (Int
d, ByteString
_) | Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x80  -> (Int -> Char
forall a. Enum a => Int -> a
toEnum Int
d, Int64
2)
                              | Bool
otherwise  -> (Char
replacement_char, Int64
1)
                  Maybe (Int, ByteString)
_ -> (Char
replacement_char, Int64
1)

  bytes3 :: Int -> B.ByteString -> (Char, Int64)
  bytes3 :: Int -> ByteString -> (Char, Int64)
bytes3 Int
c ByteString
cs =
    case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
c ByteString
cs of
      Just (Int
d1, ByteString
cs1) ->
        case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
d1 ByteString
cs1 of
          Just (Int
d, ByteString
_) | (Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x800 Bool -> Bool -> Bool
&& Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xd800) Bool -> Bool -> Bool
||
                        (Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0xdfff Bool -> Bool -> Bool
&& Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0xfffe) -> (Int -> Char
forall a. Enum a => Int -> a
toEnum Int
d, Int64
3)
                      | Bool
otherwise -> (Char
replacement_char, Int64
3)
          Maybe (Int, ByteString)
_ -> (Char
replacement_char, Int64
2)
      Maybe (Int, ByteString)
_ -> (Char
replacement_char, Int64
1)

  bytes4 :: Int -> B.ByteString -> (Char, Int64)
  bytes4 :: Int -> ByteString -> (Char, Int64)
bytes4 Int
c ByteString
cs =
    case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
c ByteString
cs of
      Just (Int
d1, ByteString
cs1) ->
        case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
d1 ByteString
cs1 of
          Just (Int
d2, ByteString
cs2) ->
            case Int -> ByteString -> Maybe (Int, ByteString)
get_follower Int
d2 ByteString
cs2 of
              Just (Int
d,ByteString
_) | Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0x10000 Bool -> Bool -> Bool
&& Int
d Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0x110000 -> (Int -> Char
forall a. Enum a => Int -> a
toEnum Int
d, Int64
4)
                         | Bool
otherwise                    -> (Char
replacement_char, Int64
4)
              Maybe (Int, ByteString)
_ -> (Char
replacement_char, Int64
3)
          Maybe (Int, ByteString)
_ -> (Char
replacement_char, Int64
2)
      Maybe (Int, ByteString)
_ -> (Char
replacement_char, Int64
1)
{-# INLINE decode #-}


-- | Split after a given number of characters.
-- Negative values are treated as if they are 0.
splitAt :: Int64 -> B.ByteString -> (B.ByteString,B.ByteString)
splitAt :: Int64 -> ByteString -> (ByteString, ByteString)
splitAt Int64
x ByteString
bs = Int64 -> Int64 -> ByteString -> (ByteString, ByteString)
forall t.
(Ord t, Num t) =>
Int64 -> t -> ByteString -> (ByteString, ByteString)
loop Int64
0 Int64
x ByteString
bs
  where loop :: Int64 -> t -> ByteString -> (ByteString, ByteString)
loop !Int64
a t
n ByteString
_ | t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
<= t
0 = Int64 -> ByteString -> (ByteString, ByteString)
B.splitAt Int64
a ByteString
bs
        loop !Int64
a t
n ByteString
bs1 = case ByteString -> Maybe (Char, Int64)
decode ByteString
bs1 of
                         Just (Char
_,Int64
y) -> Int64 -> t -> ByteString -> (ByteString, ByteString)
loop (Int64
aInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
y) (t
nt -> t -> t
forall a. Num a => a -> a -> a
-t
1) (Int64 -> ByteString -> ByteString
B.drop Int64
y ByteString
bs1)
                         Maybe (Char, Int64)
Nothing    -> (ByteString
bs, ByteString
B.empty)

-- | @take n s@ returns the first @n@ characters of @s@.
-- If @s@ has less than @n@ characters, then we return the whole of @s@.
take :: Int64 -> B.ByteString -> B.ByteString
take :: Int64 -> ByteString -> ByteString
take Int64
x ByteString
bs = Int64 -> Int64 -> ByteString -> ByteString
forall t. (Ord t, Num t) => Int64 -> t -> ByteString -> ByteString
loop Int64
0 Int64
x ByteString
bs
  where loop :: Int64 -> t -> ByteString -> ByteString
loop !Int64
a t
n ByteString
_ | t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
<= t
0 = Int64 -> ByteString -> ByteString
B.take Int64
a ByteString
bs
        loop !Int64
a t
n ByteString
bs1 = case ByteString -> Maybe (Char, Int64)
decode ByteString
bs1 of
                         Just (Char
_,Int64
y) -> Int64 -> t -> ByteString -> ByteString
loop (Int64
aInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
y) (t
nt -> t -> t
forall a. Num a => a -> a -> a
-t
1) (Int64 -> ByteString -> ByteString
B.drop Int64
y ByteString
bs1)
                         Maybe (Char, Int64)
Nothing    -> ByteString
bs

-- | @drop n s@ returns the @s@ without its first @n@ characters.
-- If @s@ has less than @n@ characters, then we return an empty string.
drop :: Int64 -> B.ByteString -> B.ByteString
drop :: Int64 -> ByteString -> ByteString
drop Int64
x ByteString
bs = Int64 -> Int64 -> ByteString -> ByteString
forall t. (Ord t, Num t) => Int64 -> t -> ByteString -> ByteString
loop Int64
0 Int64
x ByteString
bs
  where loop :: Int64 -> t -> ByteString -> ByteString
loop !Int64
a t
n ByteString
_ | t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
<= t
0 = Int64 -> ByteString -> ByteString
B.drop Int64
a ByteString
bs
        loop !Int64
a t
n ByteString
bs1 = case ByteString -> Maybe (Char, Int64)
decode ByteString
bs1 of
                         Just (Char
_,Int64
y) -> Int64 -> t -> ByteString -> ByteString
loop (Int64
aInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
y) (t
nt -> t -> t
forall a. Num a => a -> a -> a
-t
1) (Int64 -> ByteString -> ByteString
B.drop Int64
y ByteString
bs1)
                         Maybe (Char, Int64)
Nothing    -> ByteString
B.empty

-- | Split a string into two parts:  the first is the longest prefix
-- that contains only characters that satisfy the predicate; the second
-- part is the rest of the string.
-- Invalid characters are passed as @\'\\0xFFFD\'@ to the predicate.
span :: (Char -> Bool) -> B.ByteString -> (B.ByteString, B.ByteString)
span :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
span Char -> Bool
p ByteString
bs = Int64 -> ByteString -> (ByteString, ByteString)
loop Int64
0 ByteString
bs
  where loop :: Int64 -> ByteString -> (ByteString, ByteString)
loop Int64
a ByteString
cs = case ByteString -> Maybe (Char, Int64)
decode ByteString
cs of
                      Just (Char
c,Int64
n) | Char -> Bool
p Char
c -> Int64 -> ByteString -> (ByteString, ByteString)
loop (Int64
aInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
n) (Int64 -> ByteString -> ByteString
B.drop Int64
n ByteString
cs)
                      Maybe (Char, Int64)
_ -> Int64 -> ByteString -> (ByteString, ByteString)
B.splitAt Int64
a ByteString
bs

-- | Split a string into two parts:  the first is the longest prefix
-- that contains only characters that do not satisfy the predicate; the second
-- part is the rest of the string.
-- Invalid characters are passed as @\'\\0xFFFD\'@ to the predicate.
break :: (Char -> Bool) -> B.ByteString -> (B.ByteString, B.ByteString)
break :: (Char -> Bool) -> ByteString -> (ByteString, ByteString)
break Char -> Bool
p ByteString
bs = (Char -> Bool) -> ByteString -> (ByteString, ByteString)
span (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
p) ByteString
bs

-- | Get the first character of a byte string, if any.
-- Malformed characters are replaced by @\'\\0xFFFD\'@.
uncons :: B.ByteString -> Maybe (Char,B.ByteString)
uncons :: ByteString -> Maybe (Char, ByteString)
uncons ByteString
bs = do (Char
c,Int64
n) <- ByteString -> Maybe (Char, Int64)
decode ByteString
bs
               (Char, ByteString) -> Maybe (Char, ByteString)
forall (m :: * -> *) a. Monad m => a -> m a
return (Char
c, Int64 -> ByteString -> ByteString
B.drop Int64
n ByteString
bs)

-- | Traverse a bytestring (right biased).
foldr :: (Char -> a -> a) -> a -> B.ByteString -> a
foldr :: (Char -> a -> a) -> a -> ByteString -> a
foldr Char -> a -> a
cons a
nil ByteString
cs = case ByteString -> Maybe (Char, ByteString)
uncons ByteString
cs of
                      Just (Char
a,ByteString
as) -> Char -> a -> a
cons Char
a ((Char -> a -> a) -> a -> ByteString -> a
forall a. (Char -> a -> a) -> a -> ByteString -> a
foldr Char -> a -> a
cons a
nil ByteString
as)
                      Maybe (Char, ByteString)
Nothing     -> a
nil

-- | Traverse a bytestring (left biased).
-- This function is strict in the accumulator.
foldl :: (a -> Char -> a) -> a -> B.ByteString -> a
foldl :: (a -> Char -> a) -> a -> ByteString -> a
foldl a -> Char -> a
add a
acc ByteString
cs  = case ByteString -> Maybe (Char, ByteString)
uncons ByteString
cs of
                      Just (Char
a,ByteString
as) -> let v :: a
v = a -> Char -> a
add a
acc Char
a
                                     in a -> a -> a
seq a
v ((a -> Char -> a) -> a -> ByteString -> a
forall a. (a -> Char -> a) -> a -> ByteString -> a
foldl a -> Char -> a
add a
v ByteString
as)
                      Maybe (Char, ByteString)
Nothing     -> a
acc

-- | Counts the number of characters encoded in the bytestring.
-- Note that this includes replacement characters.
length :: B.ByteString -> Int
length :: ByteString -> Int
length ByteString
b = Int -> ByteString -> Int
forall p. Num p => p -> ByteString -> p
loop Int
0 ByteString
b
  where loop :: p -> ByteString -> p
loop p
n ByteString
xs = case ByteString -> Maybe (Char, Int64)
decode ByteString
xs of
                      Just (Char
_,Int64
m) -> p -> ByteString -> p
loop (p
np -> p -> p
forall a. Num a => a -> a -> a
+p
1) (Int64 -> ByteString -> ByteString
B.drop Int64
m ByteString
xs)
                      Maybe (Char, Int64)
Nothing -> p
n

-- | Split a string into a list of lines.
-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
-- See also 'lines''.
lines :: B.ByteString -> [B.ByteString]
lines :: ByteString -> [ByteString]
lines ByteString
bs | ByteString -> Bool
B.null ByteString
bs  = []
lines ByteString
bs = case Word8 -> ByteString -> Maybe Int64
B.elemIndex Word8
10 ByteString
bs of
             Just Int64
x -> let (ByteString
xs,ByteString
ys) = Int64 -> ByteString -> (ByteString, ByteString)
B.splitAt Int64
x ByteString
bs
                       in ByteString
xs ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
lines (ByteString -> ByteString
B.tail ByteString
ys)
             Maybe Int64
Nothing -> [ByteString
bs]

-- | Split a string into a list of lines.
-- Lines are terminated by @\'\\n\'@ or the end of the string.
-- Empty lines may not be terminated by the end of the string.
-- This function preserves the terminators.
-- See also 'lines'.
lines' :: B.ByteString -> [B.ByteString]
lines' :: ByteString -> [ByteString]
lines' ByteString
bs | ByteString -> Bool
B.null ByteString
bs  = []
lines' ByteString
bs = case Word8 -> ByteString -> Maybe Int64
B.elemIndex Word8
10 ByteString
bs of
              Just Int64
x -> let (ByteString
xs,ByteString
ys) = Int64 -> ByteString -> (ByteString, ByteString)
B.splitAt (Int64
xInt64 -> Int64 -> Int64
forall a. Num a => a -> a -> a
+Int64
1) ByteString
bs
                        in ByteString
xs ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: ByteString -> [ByteString]
lines' ByteString
ys
              Maybe Int64
Nothing -> [ByteString
bs]


---------------------------------------------------------------------
-- COPIED FROM BYTESTRING
-- These functions are copied verbatum from Data.ByteString.Internal
-- I suspect their lack of export is an oversight

unsafeCreateUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> (S.ByteString, a)
unsafeCreateUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> (ByteString, a)
unsafeCreateUptoN' Int
l Ptr Word8 -> IO (Int, a)
f = IO (ByteString, a) -> (ByteString, a)
forall a. IO a -> a
unsafeDupablePerformIO (Int -> (Ptr Word8 -> IO (Int, a)) -> IO (ByteString, a)
forall a. Int -> (Ptr Word8 -> IO (Int, a)) -> IO (ByteString, a)
createUptoN' Int
l Ptr Word8 -> IO (Int, a)
f)
{-# INLINE unsafeCreateUptoN' #-}

-- | Create ByteString of up to size @l@ and use action @f@ to fill it's contents which returns its true size.
createUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> IO (S.ByteString, a)
createUptoN' :: Int -> (Ptr Word8 -> IO (Int, a)) -> IO (ByteString, a)
createUptoN' Int
l Ptr Word8 -> IO (Int, a)
f = do
    ForeignPtr Word8
fp <- Int -> IO (ForeignPtr Word8)
forall a. Int -> IO (ForeignPtr a)
S.mallocByteString Int
l
    (Int
l', a
res) <- ForeignPtr Word8 -> (Ptr Word8 -> IO (Int, a)) -> IO (Int, a)
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
fp ((Ptr Word8 -> IO (Int, a)) -> IO (Int, a))
-> (Ptr Word8 -> IO (Int, a)) -> IO (Int, a)
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p -> Ptr Word8 -> IO (Int, a)
f Ptr Word8
p
#if MIN_VERSION_bytestring(0,11,0)
    let bs = S.BS fp l'
#else
    let bs :: ByteString
bs = ForeignPtr Word8 -> Int -> Int -> ByteString
S.PS ForeignPtr Word8
fp Int
0 Int
l'
#endif
    Bool -> IO (ByteString, a) -> IO (ByteString, a)
forall a. (?callStack::CallStack) => Bool -> a -> a
assert (Int
l' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
l) (IO (ByteString, a) -> IO (ByteString, a))
-> IO (ByteString, a) -> IO (ByteString, a)
forall a b. (a -> b) -> a -> b
$ (ByteString, a) -> IO (ByteString, a)
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString
bs, a
res)
{-# INLINE createUptoN' #-}