{-
(c) The University of Glasgow 2006
(c) The University of Glasgow, 1997-2006


Buffers for scanning string input stored in external arrays.
-}

{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE CPP #-}
{-# LANGUAGE MagicHash #-}
{-# LANGUAGE UnboxedTuples #-}

{-# OPTIONS_GHC -O2 #-}
-- We always optimise this, otherwise performance of a non-optimised
-- compiler is severely affected

module GHC.Data.StringBuffer
       (
        StringBuffer(..),
        -- non-abstract for vs\/HaskellService

         -- * Creation\/destruction
        hGetStringBuffer,
        hGetStringBufferBlock,
        hPutStringBuffer,
        appendStringBuffers,
        stringToStringBuffer,
        stringBufferFromByteString,

        -- * Inspection
        nextChar,
        currentChar,
        prevChar,
        atEnd,
        fingerprintStringBuffer,

        -- * Moving and comparison
        stepOn,
        offsetBytes,
        byteDiff,
        atLine,

        -- * Conversion
        lexemeToString,
        lexemeToFastString,
        decodePrevNChars,

         -- * Parsing integers
        parseUnsignedInteger,

        -- * Checking for bi-directional format characters
        containsBidirectionalFormatChar,
        bidirectionalFormatChars
        ) where

import GHC.Prelude

import GHC.Data.FastString
import GHC.Utils.Encoding
import GHC.Utils.IO.Unsafe
import GHC.Utils.Panic.Plain
import GHC.Utils.Exception      ( bracket_ )
import GHC.Fingerprint

import Data.Maybe
import System.IO
import System.IO.Unsafe         ( unsafePerformIO )
import GHC.IO.Encoding.UTF8     ( mkUTF8 )
import GHC.IO.Encoding.Failure  ( CodingFailureMode(IgnoreCodingFailure) )

import qualified Data.ByteString.Internal as BS
import qualified Data.ByteString as BS
import Data.ByteString ( ByteString )

import GHC.Exts

import Foreign
#if MIN_VERSION_base(4,15,0)
import GHC.ForeignPtr (unsafeWithForeignPtr)
#else
unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr :: ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr = ForeignPtr a -> (Ptr a -> IO b) -> IO b
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr
#endif

-- -----------------------------------------------------------------------------
-- The StringBuffer type

-- |A StringBuffer is an internal pointer to a sized chunk of bytes.
-- The bytes are intended to be *immutable*.  There are pure
-- operations to read the contents of a StringBuffer.
--
-- A StringBuffer may have a finalizer, depending on how it was
-- obtained.
--
data StringBuffer
 = StringBuffer {
     StringBuffer -> ForeignPtr Word8
buf :: {-# UNPACK #-} !(ForeignPtr Word8),
     StringBuffer -> Int
len :: {-# UNPACK #-} !Int,        -- length
     StringBuffer -> Int
cur :: {-# UNPACK #-} !Int         -- current pos
  }
  -- The buffer is assumed to be UTF-8 encoded, and furthermore
  -- we add three @\'\\0\'@ bytes to the end as sentinels so that the
  -- decoder doesn't have to check for overflow at every single byte
  -- of a multibyte sequence.

instance Show StringBuffer where
        showsPrec :: Int -> StringBuffer -> ShowS
showsPrec Int
_ StringBuffer
s = String -> ShowS
showString String
"<stringbuffer("
                      ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ShowS
forall a. Show a => a -> ShowS
shows (StringBuffer -> Int
len StringBuffer
s) ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString String
"," ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> ShowS
forall a. Show a => a -> ShowS
shows (StringBuffer -> Int
cur StringBuffer
s)
                      ShowS -> ShowS -> ShowS
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ShowS
showString String
")>"

-- -----------------------------------------------------------------------------
-- Creation / Destruction

-- | Read a file into a 'StringBuffer'.  The resulting buffer is automatically
-- managed by the garbage collector.
hGetStringBuffer :: FilePath -> IO StringBuffer
hGetStringBuffer :: String -> IO StringBuffer
hGetStringBuffer String
fname = do
   Handle
h <- String -> IOMode -> IO Handle
openBinaryFile String
fname IOMode
ReadMode
   Integer
size_i <- Handle -> IO Integer
hFileSize Handle
h
   Integer
offset_i <- Handle -> Integer -> Integer -> IO Integer
skipBOM Handle
h Integer
size_i Integer
0  -- offset is 0 initially
   let size :: Int
size = Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Int) -> Integer -> Int
forall a b. (a -> b) -> a -> b
$ Integer
size_i Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
- Integer
offset_i
   ForeignPtr Word8
buf <- Int -> IO (ForeignPtr Word8)
forall a. Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArray (Int
sizeInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3)
   ForeignPtr Word8
-> (Ptr Word8 -> IO StringBuffer) -> IO StringBuffer
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf ((Ptr Word8 -> IO StringBuffer) -> IO StringBuffer)
-> (Ptr Word8 -> IO StringBuffer) -> IO StringBuffer
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr -> do
     Int
r <- if Int
size Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Int -> IO Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
0 else Handle -> Ptr Word8 -> Int -> IO Int
forall a. Handle -> Ptr a -> Int -> IO Int
hGetBuf Handle
h Ptr Word8
ptr Int
size
     Handle -> IO ()
hClose Handle
h
     if (Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
size)
        then IOError -> IO StringBuffer
forall a. IOError -> IO a
ioError (String -> IOError
userError String
"short read of file")
        else ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
newUTF8StringBuffer ForeignPtr Word8
buf Ptr Word8
ptr Int
size

hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
hGetStringBufferBlock :: Handle -> Int -> IO StringBuffer
hGetStringBufferBlock Handle
handle Int
wanted
    = do Integer
size_i <- Handle -> IO Integer
hFileSize Handle
handle
         Integer
offset_i <- Handle -> IO Integer
hTell Handle
handle IO Integer -> (Integer -> IO Integer) -> IO Integer
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Handle -> Integer -> Integer -> IO Integer
skipBOM Handle
handle Integer
size_i
         let size :: Int
size = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
wanted (Integer -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Integer -> Int) -> Integer -> Int
forall a b. (a -> b) -> a -> b
$ Integer
size_iInteger -> Integer -> Integer
forall a. Num a => a -> a -> a
-Integer
offset_i)
         ForeignPtr Word8
buf <- Int -> IO (ForeignPtr Word8)
forall a. Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArray (Int
sizeInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3)
         ForeignPtr Word8
-> (Ptr Word8 -> IO StringBuffer) -> IO StringBuffer
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf ((Ptr Word8 -> IO StringBuffer) -> IO StringBuffer)
-> (Ptr Word8 -> IO StringBuffer) -> IO StringBuffer
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr ->
             do Int
r <- if Int
size Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 then Int -> IO Int
forall (m :: * -> *) a. Monad m => a -> m a
return Int
0 else Handle -> Ptr Word8 -> Int -> IO Int
forall a. Handle -> Ptr a -> Int -> IO Int
hGetBuf Handle
handle Ptr Word8
ptr Int
size
                if Int
r Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
size
                   then IOError -> IO StringBuffer
forall a. IOError -> IO a
ioError (String -> IOError
userError (String -> IOError) -> String -> IOError
forall a b. (a -> b) -> a -> b
$ String
"short read of file: "String -> ShowS
forall a. [a] -> [a] -> [a]
++(Int, Int, Integer, Handle) -> String
forall a. Show a => a -> String
show(Int
r,Int
size,Integer
size_i,Handle
handle))
                   else ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
newUTF8StringBuffer ForeignPtr Word8
buf Ptr Word8
ptr Int
size

hPutStringBuffer :: Handle -> StringBuffer -> IO ()
hPutStringBuffer :: Handle -> StringBuffer -> IO ()
hPutStringBuffer Handle
hdl (StringBuffer ForeignPtr Word8
buf Int
len Int
cur)
    = ForeignPtr Any -> (Ptr Any -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr (ForeignPtr Word8 -> Int -> ForeignPtr Any
forall a b. ForeignPtr a -> Int -> ForeignPtr b
plusForeignPtr ForeignPtr Word8
buf Int
cur) ((Ptr Any -> IO ()) -> IO ()) -> (Ptr Any -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Ptr Any
ptr ->
          Handle -> Ptr Any -> Int -> IO ()
forall a. Handle -> Ptr a -> Int -> IO ()
hPutBuf Handle
hdl Ptr Any
ptr Int
len

-- | Skip the byte-order mark if there is one (see #1744 and #6016),
-- and return the new position of the handle in bytes.
--
-- This is better than treating #FEFF as whitespace,
-- because that would mess up layout.  We don't have a concept
-- of zero-width whitespace in Haskell: all whitespace codepoints
-- have a width of one column.
skipBOM :: Handle -> Integer -> Integer -> IO Integer
skipBOM :: Handle -> Integer -> Integer -> IO Integer
skipBOM Handle
h Integer
size Integer
offset =
  -- Only skip BOM at the beginning of a file.
  if Integer
size Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
> Integer
0 Bool -> Bool -> Bool
&& Integer
offset Integer -> Integer -> Bool
forall a. Eq a => a -> a -> Bool
== Integer
0
    then do
      -- Validate assumption that handle is in binary mode.
      IO Bool -> IO ()
forall (m :: * -> *). (HasCallStack, Monad m) => m Bool -> m ()
assertM (Handle -> IO (Maybe TextEncoding)
hGetEncoding Handle
h IO (Maybe TextEncoding)
-> (Maybe TextEncoding -> IO Bool) -> IO Bool
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Bool -> IO Bool
forall (m :: * -> *) a. Monad m => a -> m a
return (Bool -> IO Bool)
-> (Maybe TextEncoding -> Bool) -> Maybe TextEncoding -> IO Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Maybe TextEncoding -> Bool
forall a. Maybe a -> Bool
isNothing)
      -- Temporarily select utf8 encoding with error ignoring,
      -- to make `hLookAhead` and `hGetChar` return full Unicode characters.
      IO () -> IO () -> IO Integer -> IO Integer
forall a b c. IO a -> IO b -> IO c -> IO c
bracket_ (Handle -> TextEncoding -> IO ()
hSetEncoding Handle
h TextEncoding
safeEncoding) (Handle -> Bool -> IO ()
hSetBinaryMode Handle
h Bool
True) (IO Integer -> IO Integer) -> IO Integer -> IO Integer
forall a b. (a -> b) -> a -> b
$ do
        Char
c <- Handle -> IO Char
hLookAhead Handle
h
        if Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\xfeff'
          then Handle -> IO Char
hGetChar Handle
h IO Char -> IO Integer -> IO Integer
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Handle -> IO Integer
hTell Handle
h
          else Integer -> IO Integer
forall (m :: * -> *) a. Monad m => a -> m a
return Integer
offset
    else Integer -> IO Integer
forall (m :: * -> *) a. Monad m => a -> m a
return Integer
offset
  where
    safeEncoding :: TextEncoding
safeEncoding = CodingFailureMode -> TextEncoding
mkUTF8 CodingFailureMode
IgnoreCodingFailure

newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
newUTF8StringBuffer :: ForeignPtr Word8 -> Ptr Word8 -> Int -> IO StringBuffer
newUTF8StringBuffer ForeignPtr Word8
buf Ptr Word8
ptr Int
size = do
  Ptr Word8 -> [Word8] -> IO ()
forall a. Storable a => Ptr a -> [a] -> IO ()
pokeArray (Ptr Word8
ptr Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
size :: Ptr Word8) [Word8
0,Word8
0,Word8
0]
  -- sentinels for UTF-8 decoding
  StringBuffer -> IO StringBuffer
forall (m :: * -> *) a. Monad m => a -> m a
return (StringBuffer -> IO StringBuffer)
-> StringBuffer -> IO StringBuffer
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> Int -> Int -> StringBuffer
StringBuffer ForeignPtr Word8
buf Int
size Int
0

appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
appendStringBuffers :: StringBuffer -> StringBuffer -> IO StringBuffer
appendStringBuffers StringBuffer
sb1 StringBuffer
sb2
    = do ForeignPtr Word8
newBuf <- Int -> IO (ForeignPtr Word8)
forall a. Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArray (Int
sizeInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3)
         ForeignPtr Word8
-> (Ptr Word8 -> IO StringBuffer) -> IO StringBuffer
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
newBuf ((Ptr Word8 -> IO StringBuffer) -> IO StringBuffer)
-> (Ptr Word8 -> IO StringBuffer) -> IO StringBuffer
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr ->
          ForeignPtr Word8
-> (Ptr Word8 -> IO StringBuffer) -> IO StringBuffer
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr (StringBuffer -> ForeignPtr Word8
buf StringBuffer
sb1) ((Ptr Word8 -> IO StringBuffer) -> IO StringBuffer)
-> (Ptr Word8 -> IO StringBuffer) -> IO StringBuffer
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
sb1Ptr ->
           ForeignPtr Word8
-> (Ptr Word8 -> IO StringBuffer) -> IO StringBuffer
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr (StringBuffer -> ForeignPtr Word8
buf StringBuffer
sb2) ((Ptr Word8 -> IO StringBuffer) -> IO StringBuffer)
-> (Ptr Word8 -> IO StringBuffer) -> IO StringBuffer
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
sb2Ptr ->
             do Ptr Word8 -> Ptr Word8 -> Int -> IO ()
forall a. Storable a => Ptr a -> Ptr a -> Int -> IO ()
copyArray Ptr Word8
ptr (Ptr Word8
sb1Ptr Ptr Word8 -> Int -> Ptr Word8
forall a. Storable a => Ptr a -> Int -> Ptr a
`advancePtr` StringBuffer -> Int
cur StringBuffer
sb1) Int
sb1_len
                Ptr Word8 -> Ptr Word8 -> Int -> IO ()
forall a. Storable a => Ptr a -> Ptr a -> Int -> IO ()
copyArray (Ptr Word8
ptr Ptr Word8 -> Int -> Ptr Word8
forall a. Storable a => Ptr a -> Int -> Ptr a
`advancePtr` Int
sb1_len) (Ptr Word8
sb2Ptr Ptr Word8 -> Int -> Ptr Word8
forall a. Storable a => Ptr a -> Int -> Ptr a
`advancePtr` StringBuffer -> Int
cur StringBuffer
sb2) Int
sb2_len
                Ptr Word8 -> [Word8] -> IO ()
forall a. Storable a => Ptr a -> [a] -> IO ()
pokeArray (Ptr Word8
ptr Ptr Word8 -> Int -> Ptr Word8
forall a. Storable a => Ptr a -> Int -> Ptr a
`advancePtr` Int
size) [Word8
0,Word8
0,Word8
0]
                StringBuffer -> IO StringBuffer
forall (m :: * -> *) a. Monad m => a -> m a
return (ForeignPtr Word8 -> Int -> Int -> StringBuffer
StringBuffer ForeignPtr Word8
newBuf Int
size Int
0)
    where sb1_len :: Int
sb1_len = StringBuffer -> Int
calcLen StringBuffer
sb1
          sb2_len :: Int
sb2_len = StringBuffer -> Int
calcLen StringBuffer
sb2
          calcLen :: StringBuffer -> Int
calcLen StringBuffer
sb = StringBuffer -> Int
len StringBuffer
sb Int -> Int -> Int
forall a. Num a => a -> a -> a
- StringBuffer -> Int
cur StringBuffer
sb
          size :: Int
size =  Int
sb1_len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
sb2_len

-- | Encode a 'String' into a 'StringBuffer' as UTF-8.  The resulting buffer
-- is automatically managed by the garbage collector.
stringToStringBuffer :: String -> StringBuffer
stringToStringBuffer :: String -> StringBuffer
stringToStringBuffer String
str =
 IO StringBuffer -> StringBuffer
forall a. IO a -> a
unsafePerformIO (IO StringBuffer -> StringBuffer)
-> IO StringBuffer -> StringBuffer
forall a b. (a -> b) -> a -> b
$ do
  let size :: Int
size = String -> Int
utf8EncodedLength String
str
  ForeignPtr Word8
buf <- Int -> IO (ForeignPtr Word8)
forall a. Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArray (Int
sizeInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
3)
  ForeignPtr Word8 -> (Ptr Word8 -> IO ()) -> IO ()
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf ((Ptr Word8 -> IO ()) -> IO ()) -> (Ptr Word8 -> IO ()) -> IO ()
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr -> do
    Ptr Word8 -> String -> IO ()
utf8EncodeStringPtr Ptr Word8
ptr String
str
    Ptr Word8 -> [Word8] -> IO ()
forall a. Storable a => Ptr a -> [a] -> IO ()
pokeArray (Ptr Word8
ptr Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
size :: Ptr Word8) [Word8
0,Word8
0,Word8
0]
    -- sentinels for UTF-8 decoding
  StringBuffer -> IO StringBuffer
forall (m :: * -> *) a. Monad m => a -> m a
return (ForeignPtr Word8 -> Int -> Int -> StringBuffer
StringBuffer ForeignPtr Word8
buf Int
size Int
0)

-- | Convert a UTF-8 encoded 'ByteString' into a 'StringBuffer. This really
-- relies on the internals of both 'ByteString' and 'StringBuffer'.
--
-- /O(n)/ (but optimized into a @memcpy@ by @bytestring@ under the hood)
stringBufferFromByteString :: ByteString -> StringBuffer
stringBufferFromByteString :: ByteString -> StringBuffer
stringBufferFromByteString ByteString
bs =
  let BS.PS ForeignPtr Word8
fp Int
off Int
len = ByteString -> ByteString -> ByteString
BS.append ByteString
bs ([Word8] -> ByteString
BS.pack [Word8
0,Word8
0,Word8
0])
  in StringBuffer :: ForeignPtr Word8 -> Int -> Int -> StringBuffer
StringBuffer { buf :: ForeignPtr Word8
buf = ForeignPtr Word8
fp, len :: Int
len = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
3, cur :: Int
cur = Int
off }

-- -----------------------------------------------------------------------------
-- Grab a character

-- | Return the first UTF-8 character of a nonempty 'StringBuffer' and as well
-- the remaining portion (analogous to 'Data.List.uncons').  __Warning:__ The
-- behavior is undefined if the 'StringBuffer' is empty.  The result shares
-- the same buffer as the original.  Similar to 'utf8DecodeChar', if the
-- character cannot be decoded as UTF-8, @\'\\0\'@ is returned.
{-# INLINE nextChar #-}
nextChar :: StringBuffer -> (Char,StringBuffer)
nextChar :: StringBuffer -> (Char, StringBuffer)
nextChar (StringBuffer ForeignPtr Word8
buf Int
len (I# Int#
cur#)) =
  -- Getting our fingers dirty a little here, but this is performance-critical
  IO (Char, StringBuffer) -> (Char, StringBuffer)
forall a. IO a -> a
inlinePerformIO (IO (Char, StringBuffer) -> (Char, StringBuffer))
-> IO (Char, StringBuffer) -> (Char, StringBuffer)
forall a b. (a -> b) -> a -> b
$
    ForeignPtr Word8
-> (Ptr Word8 -> IO (Char, StringBuffer))
-> IO (Char, StringBuffer)
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf ((Ptr Word8 -> IO (Char, StringBuffer)) -> IO (Char, StringBuffer))
-> (Ptr Word8 -> IO (Char, StringBuffer))
-> IO (Char, StringBuffer)
forall a b. (a -> b) -> a -> b
$ \(Ptr Addr#
a#) ->
        case Addr# -> Int# -> (# Char#, Int# #)
utf8DecodeCharAddr# (Addr#
a# Addr# -> Int# -> Addr#
`plusAddr#` Int#
cur#) Int#
0# of
          (# Char#
c#, Int#
nBytes# #) ->
             let cur' :: Int
cur' = Int# -> Int
I# (Int#
cur# Int# -> Int# -> Int#
+# Int#
nBytes#) in
             (Char, StringBuffer) -> IO (Char, StringBuffer)
forall (m :: * -> *) a. Monad m => a -> m a
return (Char# -> Char
C# Char#
c#, ForeignPtr Word8 -> Int -> Int -> StringBuffer
StringBuffer ForeignPtr Word8
buf Int
len Int
cur')


bidirectionalFormatChars :: [(Char,String)]
bidirectionalFormatChars :: [(Char, String)]
bidirectionalFormatChars =
  [ (Char
'\x202a' , String
"U+202A LEFT-TO-RIGHT EMBEDDING (LRE)")
  , (Char
'\x202b' , String
"U+202B RIGHT-TO-LEFT EMBEDDING (RLE)")
  , (Char
'\x202c' , String
"U+202C POP DIRECTIONAL FORMATTING (PDF)")
  , (Char
'\x202d' , String
"U+202D LEFT-TO-RIGHT OVERRIDE (LRO)")
  , (Char
'\x202e' , String
"U+202E RIGHT-TO-LEFT OVERRIDE (RLO)")
  , (Char
'\x2066' , String
"U+2066 LEFT-TO-RIGHT ISOLATE (LRI)")
  , (Char
'\x2067' , String
"U+2067 RIGHT-TO-LEFT ISOLATE (RLI)")
  , (Char
'\x2068' , String
"U+2068 FIRST STRONG ISOLATE (FSI)")
  , (Char
'\x2069' , String
"U+2069 POP DIRECTIONAL ISOLATE (PDI)")
  ]

{-| Returns true if the buffer contains Unicode bi-directional formatting
characters.

https://www.unicode.org/reports/tr9/#Bidirectional_Character_Types

Bidirectional format characters are one of
'\x202a' : "U+202A LEFT-TO-RIGHT EMBEDDING (LRE)"
'\x202b' : "U+202B RIGHT-TO-LEFT EMBEDDING (RLE)"
'\x202c' : "U+202C POP DIRECTIONAL FORMATTING (PDF)"
'\x202d' : "U+202D LEFT-TO-RIGHT OVERRIDE (LRO)"
'\x202e' : "U+202E RIGHT-TO-LEFT OVERRIDE (RLO)"
'\x2066' : "U+2066 LEFT-TO-RIGHT ISOLATE (LRI)"
'\x2067' : "U+2067 RIGHT-TO-LEFT ISOLATE (RLI)"
'\x2068' : "U+2068 FIRST STRONG ISOLATE (FSI)"
'\x2069' : "U+2069 POP DIRECTIONAL ISOLATE (PDI)"

This list is encoded in 'bidirectionalFormatChars'

-}
{-# INLINE containsBidirectionalFormatChar #-}
containsBidirectionalFormatChar :: StringBuffer -> Bool
containsBidirectionalFormatChar :: StringBuffer -> Bool
containsBidirectionalFormatChar (StringBuffer ForeignPtr Word8
buf (I# Int#
len#) (I# Int#
cur#))
  = IO Bool -> Bool
forall a. IO a -> a
inlinePerformIO (IO Bool -> Bool) -> IO Bool -> Bool
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> (Ptr Word8 -> IO Bool) -> IO Bool
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf ((Ptr Word8 -> IO Bool) -> IO Bool)
-> (Ptr Word8 -> IO Bool) -> IO Bool
forall a b. (a -> b) -> a -> b
$ \(Ptr Addr#
a#) -> do
  let go :: Int# -> Bool
      go :: Int# -> Bool
go Int#
i | Int# -> Bool
isTrue# (Int#
i Int# -> Int# -> Int#
>=# Int#
len#) = Bool
False
           | Bool
otherwise = case Addr# -> Int# -> (# Char#, Int# #)
utf8DecodeCharAddr# Addr#
a# Int#
i of
                (# Char#
'\x202a'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x202b'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x202c'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x202d'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x202e'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x2066'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x2067'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x2068'#  , Int#
_ #) -> Bool
True
                (# Char#
'\x2069'#  , Int#
_ #) -> Bool
True
                (# Char#
_, Int#
bytes #) -> Int# -> Bool
go (Int#
i Int# -> Int# -> Int#
+# Int#
bytes)
  Bool -> IO Bool
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Bool -> IO Bool) -> Bool -> IO Bool
forall a b. (a -> b) -> a -> b
$! Int# -> Bool
go Int#
cur#

-- | Return the first UTF-8 character of a nonempty 'StringBuffer' (analogous
-- to 'Data.List.head').  __Warning:__ The behavior is undefined if the
-- 'StringBuffer' is empty.  Similar to 'utf8DecodeChar', if the character
-- cannot be decoded as UTF-8, @\'\\0\'@ is returned.
currentChar :: StringBuffer -> Char
currentChar :: StringBuffer -> Char
currentChar = (Char, StringBuffer) -> Char
forall a b. (a, b) -> a
fst ((Char, StringBuffer) -> Char)
-> (StringBuffer -> (Char, StringBuffer)) -> StringBuffer -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. StringBuffer -> (Char, StringBuffer)
nextChar

prevChar :: StringBuffer -> Char -> Char
prevChar :: StringBuffer -> Char -> Char
prevChar (StringBuffer ForeignPtr Word8
_   Int
_   Int
0)   Char
deflt = Char
deflt
prevChar (StringBuffer ForeignPtr Word8
buf Int
_   Int
cur) Char
_     =
  IO Char -> Char
forall a. IO a -> a
inlinePerformIO (IO Char -> Char) -> IO Char -> Char
forall a b. (a -> b) -> a -> b
$
    ForeignPtr Word8 -> (Ptr Word8 -> IO Char) -> IO Char
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf ((Ptr Word8 -> IO Char) -> IO Char)
-> (Ptr Word8 -> IO Char) -> IO Char
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p -> do
      Ptr Word8
p' <- Ptr Word8 -> IO (Ptr Word8)
utf8PrevChar (Ptr Word8
p Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
cur)
      Char -> IO Char
forall (m :: * -> *) a. Monad m => a -> m a
return ((Char, Int) -> Char
forall a b. (a, b) -> a
fst (Ptr Word8 -> (Char, Int)
utf8DecodeChar Ptr Word8
p'))

-- -----------------------------------------------------------------------------
-- Moving

-- | Return a 'StringBuffer' with the first UTF-8 character removed (analogous
-- to 'Data.List.tail').  __Warning:__ The behavior is undefined if the
-- 'StringBuffer' is empty.  The result shares the same buffer as the
-- original.
stepOn :: StringBuffer -> StringBuffer
stepOn :: StringBuffer -> StringBuffer
stepOn StringBuffer
s = (Char, StringBuffer) -> StringBuffer
forall a b. (a, b) -> b
snd (StringBuffer -> (Char, StringBuffer)
nextChar StringBuffer
s)

-- | Return a 'StringBuffer' with the first @n@ bytes removed.  __Warning:__
-- If there aren't enough characters, the returned 'StringBuffer' will be
-- invalid and any use of it may lead to undefined behavior.  The result
-- shares the same buffer as the original.
offsetBytes :: Int                      -- ^ @n@, the number of bytes
            -> StringBuffer
            -> StringBuffer
offsetBytes :: Int -> StringBuffer -> StringBuffer
offsetBytes Int
i StringBuffer
s = StringBuffer
s { cur :: Int
cur = StringBuffer -> Int
cur StringBuffer
s Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i }

-- | Compute the difference in offset between two 'StringBuffer's that share
-- the same buffer.  __Warning:__ The behavior is undefined if the
-- 'StringBuffer's use separate buffers.
byteDiff :: StringBuffer -> StringBuffer -> Int
byteDiff :: StringBuffer -> StringBuffer -> Int
byteDiff StringBuffer
s1 StringBuffer
s2 = StringBuffer -> Int
cur StringBuffer
s2 Int -> Int -> Int
forall a. Num a => a -> a -> a
- StringBuffer -> Int
cur StringBuffer
s1

-- | Check whether a 'StringBuffer' is empty (analogous to 'Data.List.null').
atEnd :: StringBuffer -> Bool
atEnd :: StringBuffer -> Bool
atEnd (StringBuffer ForeignPtr Word8
_ Int
l Int
c) = Int
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
c

-- | Computes a hash of the contents of a 'StringBuffer'.
fingerprintStringBuffer :: StringBuffer -> Fingerprint
fingerprintStringBuffer :: StringBuffer -> Fingerprint
fingerprintStringBuffer (StringBuffer ForeignPtr Word8
buf Int
len Int
cur) =
  IO Fingerprint -> Fingerprint
forall a. IO a -> a
unsafePerformIO (IO Fingerprint -> Fingerprint) -> IO Fingerprint -> Fingerprint
forall a b. (a -> b) -> a -> b
$
    ForeignPtr Word8 -> (Ptr Word8 -> IO Fingerprint) -> IO Fingerprint
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
buf ((Ptr Word8 -> IO Fingerprint) -> IO Fingerprint)
-> (Ptr Word8 -> IO Fingerprint) -> IO Fingerprint
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr ->
      Ptr Word8 -> Int -> IO Fingerprint
fingerprintData (Ptr Word8
ptr Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
cur) Int
len

-- | Computes a 'StringBuffer' which points to the first character of the
-- wanted line. Lines begin at 1.
atLine :: Int -> StringBuffer -> Maybe StringBuffer
atLine :: Int -> StringBuffer -> Maybe StringBuffer
atLine Int
line sb :: StringBuffer
sb@(StringBuffer ForeignPtr Word8
buf Int
len Int
_) =
  IO (Maybe StringBuffer) -> Maybe StringBuffer
forall a. IO a -> a
inlinePerformIO (IO (Maybe StringBuffer) -> Maybe StringBuffer)
-> IO (Maybe StringBuffer) -> Maybe StringBuffer
forall a b. (a -> b) -> a -> b
$
    ForeignPtr Word8
-> (Ptr Word8 -> IO (Maybe StringBuffer))
-> IO (Maybe StringBuffer)
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf ((Ptr Word8 -> IO (Maybe StringBuffer)) -> IO (Maybe StringBuffer))
-> (Ptr Word8 -> IO (Maybe StringBuffer))
-> IO (Maybe StringBuffer)
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p -> do
      Ptr Word8
p' <- Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
skipToLine Int
line Int
len Ptr Word8
p
      if Ptr Word8
p' Ptr Word8 -> Ptr Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Ptr Word8
forall a. Ptr a
nullPtr
        then Maybe StringBuffer -> IO (Maybe StringBuffer)
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe StringBuffer
forall a. Maybe a
Nothing
        else
          let
            delta :: Int
delta = Ptr Word8
p' Ptr Word8 -> Ptr Word8 -> Int
forall a b. Ptr a -> Ptr b -> Int
`minusPtr` Ptr Word8
p
          in Maybe StringBuffer -> IO (Maybe StringBuffer)
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe StringBuffer -> IO (Maybe StringBuffer))
-> Maybe StringBuffer -> IO (Maybe StringBuffer)
forall a b. (a -> b) -> a -> b
$ StringBuffer -> Maybe StringBuffer
forall a. a -> Maybe a
Just (StringBuffer
sb { cur :: Int
cur = Int
delta
                               , len :: Int
len = Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
delta
                               })

skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
skipToLine :: Int -> Int -> Ptr Word8 -> IO (Ptr Word8)
skipToLine !Int
line !Int
len !Ptr Word8
op0 = Int -> Ptr Word8 -> IO (Ptr Word8)
go Int
1 Ptr Word8
op0
  where
    !opend :: Ptr Word8
opend = Ptr Word8
op0 Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
len

    go :: Int -> Ptr Word8 -> IO (Ptr Word8)
go !Int
i_line !Ptr Word8
op
      | Ptr Word8
op Ptr Word8 -> Ptr Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Ptr Word8
opend    = Ptr Word8 -> IO (Ptr Word8)
forall (f :: * -> *) a. Applicative f => a -> f a
pure Ptr Word8
forall a. Ptr a
nullPtr
      | Int
i_line Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
line = Ptr Word8 -> IO (Ptr Word8)
forall (f :: * -> *) a. Applicative f => a -> f a
pure Ptr Word8
op
      | Bool
otherwise      = do
          Word8
w <- Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek Ptr Word8
op :: IO Word8
          case Word8
w of
            Word8
10 -> Int -> Ptr Word8 -> IO (Ptr Word8)
go (Int
i_line Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
op Int
1)
            Word8
13 -> do
              -- this is safe because a 'StringBuffer' is
              -- guaranteed to have 3 bytes sentinel values.
              Word8
w' <- Ptr Word8 -> IO Word8
forall a. Storable a => Ptr a -> IO a
peek (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
op Int
1) :: IO Word8
              case Word8
w' of
                Word8
10 -> Int -> Ptr Word8 -> IO (Ptr Word8)
go (Int
i_line Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
op Int
2)
                Word8
_  -> Int -> Ptr Word8 -> IO (Ptr Word8)
go (Int
i_line Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
op Int
1)
            Word8
_  -> Int -> Ptr Word8 -> IO (Ptr Word8)
go Int
i_line (Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
plusPtr Ptr Word8
op Int
1)

-- -----------------------------------------------------------------------------
-- Conversion

-- | Decode the first @n@ bytes of a 'StringBuffer' as UTF-8 into a 'String'.
-- Similar to 'utf8DecodeChar', if the character cannot be decoded as UTF-8,
-- they will be replaced with @\'\\0\'@.
lexemeToString :: StringBuffer
               -> Int                   -- ^ @n@, the number of bytes
               -> String
lexemeToString :: StringBuffer -> Int -> String
lexemeToString StringBuffer
_ Int
0 = String
""
lexemeToString (StringBuffer ForeignPtr Word8
buf Int
_ Int
cur) Int
bytes =
  ForeignPtr Word8 -> Int -> Int -> String
utf8DecodeStringLazy ForeignPtr Word8
buf Int
cur Int
bytes

lexemeToFastString :: StringBuffer
                   -> Int               -- ^ @n@, the number of bytes
                   -> FastString
lexemeToFastString :: StringBuffer -> Int -> FastString
lexemeToFastString StringBuffer
_ Int
0 = FastString
nilFS
lexemeToFastString (StringBuffer ForeignPtr Word8
buf Int
_ Int
cur) Int
len =
   IO FastString -> FastString
forall a. IO a -> a
inlinePerformIO (IO FastString -> FastString) -> IO FastString -> FastString
forall a b. (a -> b) -> a -> b
$
     ForeignPtr Word8 -> (Ptr Word8 -> IO FastString) -> IO FastString
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf ((Ptr Word8 -> IO FastString) -> IO FastString)
-> (Ptr Word8 -> IO FastString) -> IO FastString
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr ->
       FastString -> IO FastString
forall (m :: * -> *) a. Monad m => a -> m a
return (FastString -> IO FastString) -> FastString -> IO FastString
forall a b. (a -> b) -> a -> b
$! Ptr Word8 -> Int -> FastString
mkFastStringBytes (Ptr Word8
ptr Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` Int
cur) Int
len

-- | Return the previous @n@ characters (or fewer if we are less than @n@
-- characters into the buffer.
decodePrevNChars :: Int -> StringBuffer -> String
decodePrevNChars :: Int -> StringBuffer -> String
decodePrevNChars Int
n (StringBuffer ForeignPtr Word8
buf Int
_ Int
cur) =
    IO String -> String
forall a. IO a -> a
inlinePerformIO (IO String -> String) -> IO String -> String
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> (Ptr Word8 -> IO String) -> IO String
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
unsafeWithForeignPtr ForeignPtr Word8
buf ((Ptr Word8 -> IO String) -> IO String)
-> (Ptr Word8 -> IO String) -> IO String
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
p0 ->
      Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
go Ptr Word8
p0 Int
n String
"" (Ptr Word8
p0 Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` (Int
cur Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1))
  where
    go :: Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
    go :: Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
go Ptr Word8
buf0 Int
n String
acc Ptr Word8
p | Int
n Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
|| Ptr Word8
buf0 Ptr Word8 -> Ptr Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Ptr Word8
p = String -> IO String
forall (m :: * -> *) a. Monad m => a -> m a
return String
acc
    go Ptr Word8
buf0 Int
n String
acc Ptr Word8
p = do
        Ptr Word8
p' <- Ptr Word8 -> IO (Ptr Word8)
utf8PrevChar Ptr Word8
p
        let (Char
c,Int
_) = Ptr Word8 -> (Char, Int)
utf8DecodeChar Ptr Word8
p'
        Ptr Word8 -> Int -> String -> Ptr Word8 -> IO String
go Ptr Word8
buf0 (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) (Char
cChar -> ShowS
forall a. a -> [a] -> [a]
:String
acc) Ptr Word8
p'

-- -----------------------------------------------------------------------------
-- Parsing integer strings in various bases
parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char->Int) -> Integer
parseUnsignedInteger :: StringBuffer -> Int -> Integer -> (Char -> Int) -> Integer
parseUnsignedInteger (StringBuffer ForeignPtr Word8
buf Int
_ Int
cur) Int
len Integer
radix Char -> Int
char_to_int
  = IO Integer -> Integer
forall a. IO a -> a
inlinePerformIO (IO Integer -> Integer) -> IO Integer -> Integer
forall a b. (a -> b) -> a -> b
$ ForeignPtr Word8 -> (Ptr Word8 -> IO Integer) -> IO Integer
forall a b. ForeignPtr a -> (Ptr a -> IO b) -> IO b
withForeignPtr ForeignPtr Word8
buf ((Ptr Word8 -> IO Integer) -> IO Integer)
-> (Ptr Word8 -> IO Integer) -> IO Integer
forall a b. (a -> b) -> a -> b
$ \Ptr Word8
ptr -> Integer -> IO Integer
forall (m :: * -> *) a. Monad m => a -> m a
return (Integer -> IO Integer) -> Integer -> IO Integer
forall a b. (a -> b) -> a -> b
$! let
    go :: Int -> Integer -> Integer
go Int
i Integer
x | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len  = Integer
x
           | Bool
otherwise = case (Char, Int) -> Char
forall a b. (a, b) -> a
fst (Ptr Word8 -> (Char, Int)
utf8DecodeChar (Ptr Word8
ptr Ptr Word8 -> Int -> Ptr Word8
forall a b. Ptr a -> Int -> Ptr b
`plusPtr` (Int
cur Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i))) of
               Char
'_'  -> Int -> Integer -> Integer
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) Integer
x    -- skip "_" (#14473)
               Char
char -> Int -> Integer -> Integer
go (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1) (Integer
x Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
radix Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Int -> Integer
forall a. Integral a => a -> Integer
toInteger (Char -> Int
char_to_int Char
char))
  in Int -> Integer -> Integer
go Int
0 Integer
0