{-# language BangPatterns #-} {-# language BinaryLiterals #-} {-# language DataKinds #-} {-# language KindSignatures #-} {-# language LambdaCase #-} {-# language MagicHash #-} {-# language RankNTypes #-} {-# language ScopedTypeVariables #-} {-# language TypeApplications #-} {-# language TypeOperators #-} {-# language UnboxedTuples #-} -- | The functions in this module are explict about the maximum number -- of bytes they require. module Data.ByteArray.Builder.Bounded ( -- * Builder Builder -- * Execute , run , pasteGrowST -- * Combine , empty , append -- * Bounds Manipulation , weaken , substitute -- * Encode Integral Types -- ** Human-Readable , word64Dec , word32Dec , word16Dec , word8Dec , wordDec , int64Dec , int32Dec , int16Dec , int8Dec , intDec -- * Unsigned Words -- ** 64-bit , word64PaddedUpperHex -- ** 32-bit , word32PaddedUpperHex -- ** 16-bit , word16PaddedLowerHex , word16PaddedUpperHex , word16LowerHex , word16UpperHex -- ** 8-bit , word8PaddedUpperHex , word8LowerHex , ascii , char -- ** Machine-Readable -- *** One , word8 -- **** Big Endian , word64BE , word32BE , word16BE , int64BE , int32BE , int16BE -- **** Little Endian , word64LE , word32LE , word16LE , int64LE , int32LE , int16LE -- * Encode Floating-Point Types , doubleDec ) where import Arithmetic.Types (type (<=), type (:=:)) import Control.Monad.Primitive import Control.Monad.ST (ST) import Control.Monad.ST.Run (runByteArrayST) import Data.Bits import Data.ByteArray.Builder.Bounded.Unsafe (Builder(..)) import Data.Char (ord) import Data.Primitive import Data.Primitive.ByteArray.Offset (MutableByteArrayOffset(..)) import GHC.Exts import GHC.Int (Int64(I64#),Int32(I32#),Int16(I16#),Int8(I8#)) import GHC.ST (ST(ST)) import GHC.TypeLits (type (+)) import GHC.Word (Word8(W8#),Word16(W16#),Word32(W32#),Word64(W64#)) import qualified Arithmetic.Types as Arithmetic import qualified Arithmetic.Nat as Nat import qualified Arithmetic.Lte as Lte import qualified Data.ByteArray.Builder.Bounded.Unsafe as Unsafe import qualified Data.Primitive as PM -- | Execute the bounded builder. If the size is a constant, -- use @Arithmetic.Nat.constant@ as the first argument to let -- GHC conjure up this value for you. run :: Arithmetic.Nat n -> Builder n -- ^ Builder -> ByteArray {-# inline run #-} run n b = runByteArrayST $ do arr <- newByteArray (Nat.demote n) len <- Unsafe.pasteST b arr 0 shrinkMutableByteArray arr len unsafeFreezeByteArray arr -- | Paste the builder into the byte array starting at offset zero. -- This reallocates the byte array if it cannot accomodate the builder, -- growing it by the minimum amount necessary. pasteGrowST :: Arithmetic.Nat n -> Builder n -> MutableByteArrayOffset s -- ^ Initial buffer, used linearly. Do not reuse this argument. -> ST s (MutableByteArrayOffset s) -- ^ Final buffer that accomodated the builder. {-# inline pasteGrowST #-} pasteGrowST n b !(MutableByteArrayOffset{array=arr0,offset=off0}) = do sz0 <- PM.getSizeofMutableByteArray arr0 let req = Nat.demote n let sz1 = off0 + req if sz1 <= sz0 then do off1 <- Unsafe.pasteST b arr0 off0 pure (MutableByteArrayOffset arr0 off1) else do arr1 <- PM.resizeMutableByteArray arr0 sz1 off1 <- Unsafe.pasteST b arr1 off0 pure (MutableByteArrayOffset arr1 off1) -- | The monoidal unit of `append` empty :: Builder 0 empty = Builder $ \_ off0 s0 -> (# s0, off0 #) infixr 9 `append` -- | Concatenate two builders. append :: Builder m -> Builder n -> Builder (m + n) append (Builder f) (Builder g) = Builder $ \arr off0 s0 -> case f arr off0 s0 of (# s1, r #) -> g arr r s1 -- | Weaken the bound on the maximum number of bytes required. For example, -- to use two builders with unequal bounds in a disjunctive setting: -- -- > import qualified Arithmetic.Lte as Lte -- > -- > buildNumber :: Either Double Word64 -> Builder 32 -- > buildNumber = \case -- > Left d -> doubleDec d -- > Right w -> weaken (Lte.constant @19 @32) (word64Dec w) weaken :: forall m n. (m <= n) -> Builder m -> Builder n weaken !_ (Builder f) = Builder f -- | Replace the upper bound on size with an equal number. substitute :: forall m n. (m :=: n) -> Builder m -> Builder n substitute !_ (Builder f) = Builder f -- | Encode a double-floating-point number, using decimal notation or -- scientific notation depending on the magnitude. This has undefined -- behavior when representing @+inf@, @-inf@, and @NaN@. It will not -- crash, but the generated numbers will be nonsense. doubleDec :: Double -> Builder 32 doubleDec (D# d) = Builder (\arr off0 s0 -> doubleDec# d arr off0 s0) -- | Requires up to 19 bytes. Encodes an unsigned 64-bit integer as decimal. -- This encoding never starts with a zero unless the argument was zero. word64Dec :: Word64 -> Builder 19 word64Dec (W64# w) = wordCommonDec# w -- | Requires up to 10 bytes. Encodes an unsigned 32-bit integer as decimal. -- This encoding never starts with a zero unless the argument was zero. word32Dec :: Word32 -> Builder 10 word32Dec (W32# w) = wordCommonDec# w -- | Requires up to 5 bytes. Encodes an unsigned 16-bit integer as decimal. -- This encoding never starts with a zero unless the argument was zero. word16Dec :: Word16 -> Builder 5 word16Dec (W16# w) = wordCommonDec# w -- | Requires up to 3 bytes. Encodes an unsigned 8-bit integer as decimal. -- This encoding never starts with a zero unless the argument was zero. word8Dec :: Word8 -> Builder 3 word8Dec (W8# w) = -- We unroll the loop when encoding Word8s. This speeds things -- up IPv4 encoding by about 10% in the @ip@ library. We can -- encode Word8s at twice this speed by using a lookup table. -- However, I (Andrew Martin) am concerned that although lookup -- table perform very well in microbenchmarks, they can thrash -- L1 cache in real applications. word8Dec# w -- | Requires up to 19 bytes. Encodes an unsigned machine-sized integer -- as decimal. This encoding never starts with a zero unless the argument -- was zero. wordDec :: Word -> Builder 19 wordDec (W# w) = wordCommonDec# w -- | Requires up to 20 bytes. Encodes a signed 64-bit integer as decimal. -- This encoding never starts with a zero unless the argument was zero. -- Negative numbers are preceded by a minus sign. Positive numbers -- are not preceded by anything. int64Dec :: Int64 -> Builder 20 int64Dec (I64# w) = intCommonDec# w -- | Requires up to 11 bytes. Encodes a signed 32-bit integer as decimal. -- This encoding never starts with a zero unless the argument was zero. -- Negative numbers are preceded by a minus sign. Positive numbers -- are not preceded by anything. int32Dec :: Int32 -> Builder 11 int32Dec (I32# w) = intCommonDec# w -- | Requires up to 6 bytes. Encodes a signed 16-bit integer as decimal. -- This encoding never starts with a zero unless the argument was zero. -- Negative numbers are preceded by a minus sign. Positive numbers -- are not preceded by anything. int16Dec :: Int16 -> Builder 6 int16Dec (I16# w) = intCommonDec# w -- | Requires up to 4 bytes. Encodes a signed 8-bit integer as decimal. -- This encoding never starts with a zero unless the argument was zero. -- Negative numbers are preceded by a minus sign. Positive numbers -- are not preceded by anything. int8Dec :: Int8 -> Builder 4 int8Dec (I8# w) = intCommonDec# w -- | Requires up to 20 bytes. Encodes a signed machine-sized integer -- as decimal. This encoding never starts with a zero unless the -- argument was zero. Negative numbers are preceded by a minus sign. -- Positive numbers are not preceded by anything. intDec :: Int -> Builder 20 intDec (I# w) = intCommonDec# w word8Dec# :: Word# -> Builder 3 {-# noinline word8Dec# #-} word8Dec# w# = Unsafe.construct $ \arr off0 -> do let !(I# off0# ) = off0 !(!x,!ones) = quotRem w 10 !(hundreds@(W# hundreds# ),tens@(W# tens# )) = quotRem x 10 writeByteArray arr off0 (fromIntegral (hundreds + 0x30) :: Word8) let !hasHundreds = gtWord# hundreds# 0## !off1@(I# off1# ) = I# (off0# +# hasHundreds) writeByteArray arr off1 (fromIntegral (tens + 0x30) :: Word8) let !off2 = I# (off1# +# (orI# hasHundreds (gtWord# tens# 0## ))) writeByteArray arr off2 (fromIntegral (ones + 0x30) :: Word8) pure (off2 + 1) where w = W# w# -- Requires a number of bytes that is bounded by the size of -- the word. This is only used internally. wordCommonDec# :: Word# -> Builder n {-# noinline wordCommonDec# #-} wordCommonDec# w# = Unsafe.construct $ \arr off0 -> if w /= 0 then internalWordLoop arr off0 (W# w#) else do writeByteArray arr off0 (c2w '0') pure (off0 + 1) where w = W64# w# internalWordLoop :: MutableByteArray s -> Int -> Word -> ST s Int {-# inline internalWordLoop #-} internalWordLoop arr off0 x0 = go off0 x0 where go !off !(x :: Word) = if x > 0 then do let (y,z) = quotRem x 10 writeByteArray arr off (fromIntegral (z + 0x30) :: Word8) go (off + 1) y else do reverseBytes arr off0 (off - 1) pure off -- Requires up to 20 bytes. Can be less depending on what the -- size of the argument is known to be. Unsafe. intCommonDec# :: Int# -> Builder n {-# noinline intCommonDec# #-} intCommonDec# w# = Unsafe.construct $ \arr off0 -> case compare w 0 of GT -> internalWordLoop arr off0 (fromIntegral w) EQ -> do writeByteArray arr off0 (c2w '0') pure (off0 + 1) LT -> do writeByteArray arr off0 (c2w '-') internalWordLoop arr (off0 + 1) (fromIntegral (negate w)) where w = I64# w# -- Convert a number between 0 and 16 to the ASCII -- representation of its hexadecimal character. -- The use of fromIntegral causes us to incur an -- unneeded bitmask. This actually needs a Word64 -- argument. toHexUpper :: Word -> Word8 toHexUpper w' = fromIntegral $ (complement theMask .&. loSolved) .|. (theMask .&. hiSolved) where w = w' .&. 0xF -- This is all ones if the value was >= 10 theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1 loSolved = w + 48 hiSolved = w + 55 toHexLower :: Word -> Word8 toHexLower w' = fromIntegral $ (complement theMask .&. loSolved) .|. (theMask .&. hiSolved) where w = w' .&. 0xF -- This is all ones if the value was >= 10 theMask = (1 .&. unsafeShiftR (w - 10) 63) - 1 loSolved = w + 48 hiSolved = w + 87 -- | Requires exactly 16 bytes. Encodes a 64-bit unsigned integer as -- hexadecimal, zero-padding the encoding to 16 digits. This uses -- uppercase for the alphabetical digits. For example, this encodes the -- number 1022 as @00000000000003FE@. word64PaddedUpperHex :: Word64 -> Builder 16 word64PaddedUpperHex (W64# w) = word64PaddedUpperHex# w -- | Requires exactly 8 bytes. Encodes a 32-bit unsigned integer as -- hexadecimal, zero-padding the encoding to 8 digits. This uses -- uppercase for the alphabetical digits. word32PaddedUpperHex :: Word32 -> Builder 8 word32PaddedUpperHex (W32# w) = word32PaddedUpperHex# w -- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as -- hexadecimal, zero-padding the encoding to 4 digits. This uses -- uppercase for the alphabetical digits. -- -- >>> word16PaddedUpperHex 0xab0 -- 0AB0 word16PaddedUpperHex :: Word16 -> Builder 4 word16PaddedUpperHex (W16# w) = word16PaddedUpperHex# w -- | Requires exactly 4 bytes. Encodes a 16-bit unsigned integer as -- hexadecimal, zero-padding the encoding to 4 digits. This uses -- lowercase for the alphabetical digits. -- -- >>> word16PaddedLowerHex 0xab0 -- 0ab0 word16PaddedLowerHex :: Word16 -> Builder 4 word16PaddedLowerHex (W16# w) = word16PaddedLowerHex# w -- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as -- hexadecimal. No leading zeroes are displayed. Letters are presented -- in lowercase. If the number is zero, a single zero digit is used. -- -- >>> word16LowerHex 0xab0 -- ab0 word16LowerHex :: Word16 -> Builder 4 word16LowerHex (W16# w) = word16LowerHex# w -- | Requires at most 4 bytes. Encodes a 16-bit unsigned integer as -- hexadecimal. No leading zeroes are displayed. Letters are presented -- in uppercase. If the number is zero, a single zero digit is used. -- -- >>> word16UpperHex 0xab0 -- AB0 word16UpperHex :: Word16 -> Builder 4 word16UpperHex (W16# w) = word16UpperHex# w -- | Requires at most 2 bytes. Encodes a 8-bit unsigned integer as -- hexadecimal. No leading zeroes are displayed. If the number is zero, -- a single zero digit is used. word8LowerHex :: Word8 -> Builder 2 word8LowerHex (W8# w) = word8LowerHex# w -- | Requires exactly 2 bytes. Encodes a 8-bit unsigned integer as -- hexadecimal, zero-padding the encoding to 2 digits. This uses -- uppercase for the alphabetical digits. word8PaddedUpperHex :: Word8 -> Builder 2 word8PaddedUpperHex (W8# w) = word8PaddedUpperHex# w -- TODO: Is it actually worth unrolling this loop. I suspect that it -- might not be. Benchmark this. word64PaddedUpperHex# :: Word# -> Builder 16 {-# noinline word64PaddedUpperHex# #-} word64PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 60)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 56)) writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 52)) writeByteArray arr (off + 3) (toHexUpper (unsafeShiftR w 48)) writeByteArray arr (off + 4) (toHexUpper (unsafeShiftR w 44)) writeByteArray arr (off + 5) (toHexUpper (unsafeShiftR w 40)) writeByteArray arr (off + 6) (toHexUpper (unsafeShiftR w 36)) writeByteArray arr (off + 7) (toHexUpper (unsafeShiftR w 32)) writeByteArray arr (off + 8) (toHexUpper (unsafeShiftR w 28)) writeByteArray arr (off + 9) (toHexUpper (unsafeShiftR w 24)) writeByteArray arr (off + 10) (toHexUpper (unsafeShiftR w 20)) writeByteArray arr (off + 11) (toHexUpper (unsafeShiftR w 16)) writeByteArray arr (off + 12) (toHexUpper (unsafeShiftR w 12)) writeByteArray arr (off + 13) (toHexUpper (unsafeShiftR w 8)) writeByteArray arr (off + 14) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 15) (toHexUpper (unsafeShiftR w 0)) pure (off + 16) where w = W# w# word32PaddedUpperHex# :: Word# -> Builder 8 {-# noinline word32PaddedUpperHex# #-} word32PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 28)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 24)) writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 20)) writeByteArray arr (off + 3) (toHexUpper (unsafeShiftR w 16)) writeByteArray arr (off + 4) (toHexUpper (unsafeShiftR w 12)) writeByteArray arr (off + 5) (toHexUpper (unsafeShiftR w 8)) writeByteArray arr (off + 6) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 7) (toHexUpper (unsafeShiftR w 0)) pure (off + 8) where w = W# w# -- Not sure if it is beneficial to inline this. We just let -- GHC make the decision. Open an issue on github if this is -- a problem. word16PaddedUpperHex# :: Word# -> Builder 4 word16PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 12)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 8)) writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 3) (toHexUpper (unsafeShiftR w 0)) pure (off + 4) where w = W# w# word16PaddedLowerHex# :: Word# -> Builder 4 word16PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 12)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 8)) writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 3) (toHexLower (unsafeShiftR w 0)) pure (off + 4) where w = W# w# word12PaddedLowerHex# :: Word# -> Builder 3 word12PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 8)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 2) (toHexLower (unsafeShiftR w 0)) pure (off + 3) where w = W# w# word12PaddedUpperHex# :: Word# -> Builder 3 word12PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 8)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 2) (toHexUpper (unsafeShiftR w 0)) pure (off + 3) where w = W# w# -- Definitely want this to inline. It's maybe a dozen instructions total. word8PaddedUpperHex# :: Word# -> Builder 2 {-# inline word8PaddedUpperHex# #-} word8PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper (unsafeShiftR w 4)) writeByteArray arr (off + 1) (toHexUpper (unsafeShiftR w 0)) pure (off + 2) where w = W# w# word8PaddedLowerHex# :: Word# -> Builder 2 {-# inline word8PaddedLowerHex# #-} word8PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower (unsafeShiftR w 4)) writeByteArray arr (off + 1) (toHexLower (unsafeShiftR w 0)) pure (off + 2) where w = W# w# word4PaddedLowerHex# :: Word# -> Builder 1 {-# inline word4PaddedLowerHex# #-} word4PaddedLowerHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexLower w) pure (off + 1) where w = W# w# word4PaddedUpperHex# :: Word# -> Builder 1 {-# inline word4PaddedUpperHex# #-} word4PaddedUpperHex# w# = Unsafe.construct $ \arr off -> do writeByteArray arr off (toHexUpper w) pure (off + 1) where w = W# w# word16UpperHex# :: Word# -> Builder 4 word16UpperHex# w# | w <= 0xF = weaken Lte.constant (word4PaddedUpperHex# w#) | w <= 0xFF = weaken Lte.constant (word8PaddedUpperHex# w#) | w <= 0xFFF = weaken Lte.constant (word12PaddedUpperHex# w#) | otherwise = word16PaddedUpperHex# w# where w = W# w# word16LowerHex# :: Word# -> Builder 4 word16LowerHex# w# | w <= 0xF = weaken Lte.constant (word4PaddedLowerHex# w#) | w <= 0xFF = weaken Lte.constant (word8PaddedLowerHex# w#) | w <= 0xFFF = weaken Lte.constant (word12PaddedLowerHex# w#) | otherwise = word16PaddedLowerHex# w# where w = W# w# -- Precondition: argument less than 256 word8LowerHex# :: Word# -> Builder 2 word8LowerHex# w# | w <= 0xF = weaken Lte.constant (word4PaddedLowerHex# w#) | otherwise = weaken Lte.constant (word8PaddedLowerHex# w#) where w = W# w# -- | Encode an ASCII character. -- Precondition: Input must be an ASCII character. This is not checked. ascii :: Char -> Builder 1 ascii (C# c) = Unsafe.construct $ \(MutableByteArray arr) (I# off) -> do primitive_ (writeCharArray# arr off c) pure (I# (off +# 1# )) -- | Encode a character as UTF-8. This only uses as much space as is required. char :: Char -> Builder 4 char c | codepoint < 0x80 = Unsafe.construct $ \arr off -> do writeByteArray arr off (unsafeWordToWord8 codepoint) pure (off + 1) | codepoint < 0x800 = Unsafe.construct $ \arr off -> do writeByteArray arr off (unsafeWordToWord8 (byteTwoOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteTwoTwo codepoint)) return (off + 2) | codepoint >= 0xD800 && codepoint < 0xE000 = Unsafe.construct $ \arr off -> do -- Codepoint U+FFFD writeByteArray arr off (0xEF :: Word8) writeByteArray arr (off + 1) (0xBF :: Word8) writeByteArray arr (off + 2) (0xBD :: Word8) return (off + 3) | codepoint < 0x10000 = Unsafe.construct $ \arr off -> do writeByteArray arr off (unsafeWordToWord8 (byteThreeOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteThreeTwo codepoint)) writeByteArray arr (off + 2) (unsafeWordToWord8 (byteThreeThree codepoint)) return (off + 3) | otherwise = Unsafe.construct $ \arr off -> do writeByteArray arr off (unsafeWordToWord8 (byteFourOne codepoint)) writeByteArray arr (off + 1) (unsafeWordToWord8 (byteFourTwo codepoint)) writeByteArray arr (off + 2) (unsafeWordToWord8 (byteFourThree codepoint)) writeByteArray arr (off + 3) (unsafeWordToWord8 (byteFourFour codepoint)) return (off + 4) where codepoint :: Word codepoint = fromIntegral (ord c) unsafeWordToWord8 :: Word -> Word8 unsafeWordToWord8 (W# w) = W8# w -- precondition: codepoint is less than 0x800 byteTwoOne :: Word -> Word byteTwoOne w = unsafeShiftR w 6 .|. 0b11000000 byteTwoTwo :: Word -> Word byteTwoTwo w = (w .&. 0b00111111) .|. 0b10000000 -- precondition: codepoint is less than 0x1000 byteThreeOne :: Word -> Word byteThreeOne w = unsafeShiftR w 12 .|. 0b11100000 byteThreeTwo :: Word -> Word byteThreeTwo w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 byteThreeThree :: Word -> Word byteThreeThree w = (w .&. 0b00111111) .|. 0b10000000 -- precondition: codepoint is less than 0x110000 byteFourOne :: Word -> Word byteFourOne w = unsafeShiftR w 18 .|. 0b11110000 byteFourTwo :: Word -> Word byteFourTwo w = (0b00111111 .&. unsafeShiftR w 12) .|. 0b10000000 byteFourThree :: Word -> Word byteFourThree w = (0b00111111 .&. unsafeShiftR w 6) .|. 0b10000000 byteFourFour :: Word -> Word byteFourFour w = (0b00111111 .&. w) .|. 0b10000000 int64BE :: Int64 -> Builder 8 int64BE (I64# i) = word64BE (W64# (int2Word# i)) int32BE :: Int32 -> Builder 4 int32BE (I32# i) = word32BE (W32# (int2Word# i)) int16BE :: Int16 -> Builder 2 int16BE (I16# i) = word16BE (W16# (int2Word# i)) int64LE :: Int64 -> Builder 8 int64LE (I64# i) = word64LE (W64# (int2Word# i)) int32LE :: Int32 -> Builder 4 int32LE (I32# i) = word32LE (W32# (int2Word# i)) int16LE :: Int16 -> Builder 2 int16LE (I16# i) = word16LE (W16# (int2Word# i)) -- | Requires exactly 8 bytes. Dump the octets of a 64-bit -- word in a little-endian fashion. word64LE :: Word64 -> Builder 8 word64LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 7) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) writeByteArray arr (off + 6) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48)) writeByteArray arr (off + 5) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40)) writeByteArray arr (off + 4) (fromIntegral @Word64 @Word8 (unsafeShiftR w 32)) writeByteArray arr (off + 3) (fromIntegral @Word64 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off ) (fromIntegral @Word64 @Word8 w) pure (off + 8) -- | Requires exactly 8 bytes. Dump the octets of a 64-bit -- word in a big-endian fashion. word64BE :: Word64 -> Builder 8 word64BE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off ) (fromIntegral @Word64 @Word8 (unsafeShiftR w 56)) writeByteArray arr (off + 1) (fromIntegral @Word64 @Word8 (unsafeShiftR w 48)) writeByteArray arr (off + 2) (fromIntegral @Word64 @Word8 (unsafeShiftR w 40)) writeByteArray arr (off + 3) (fromIntegral @Word64 @Word8 (unsafeShiftR w 32)) writeByteArray arr (off + 4) (fromIntegral @Word64 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 5) (fromIntegral @Word64 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 6) (fromIntegral @Word64 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off + 7) (fromIntegral @Word64 @Word8 w) pure (off + 8) -- | Requires exactly 4 bytes. Dump the octets of a 32-bit -- word in a little-endian fashion. word32LE :: Word32 -> Builder 4 word32LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 3) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off ) (fromIntegral @Word32 @Word8 w) pure (off + 4) -- | Requires exactly 4 bytes. Dump the octets of a 32-bit -- word in a big-endian fashion. word32BE :: Word32 -> Builder 4 word32BE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off ) (fromIntegral @Word32 @Word8 (unsafeShiftR w 24)) writeByteArray arr (off + 1) (fromIntegral @Word32 @Word8 (unsafeShiftR w 16)) writeByteArray arr (off + 2) (fromIntegral @Word32 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off + 3) (fromIntegral @Word32 @Word8 w) pure (off + 4) -- | Requires exactly 2 bytes. Dump the octets of a 16-bit -- word in a little-endian fashion. word16LE :: Word16 -> Builder 2 word16LE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off ) (fromIntegral @Word16 @Word8 w) pure (off + 2) -- | Requires exactly 2 bytes. Dump the octets of a 16-bit -- word in a big-endian fashion. word16BE :: Word16 -> Builder 2 word16BE w = Unsafe.construct $ \arr off -> do writeByteArray arr (off ) (fromIntegral @Word16 @Word8 (unsafeShiftR w 8)) writeByteArray arr (off + 1) (fromIntegral @Word16 @Word8 w) pure (off + 2) word8 :: Word8 -> Builder 1 word8 w = Unsafe.construct $ \arr off -> do writeByteArray arr off w pure (off + 1) -- Reverse the bytes in the designated slice. This takes -- an inclusive start offset and an inclusive end offset. reverseBytes :: MutableByteArray s -> Int -> Int -> ST s () {-# inline reverseBytes #-} reverseBytes arr begin end = go begin end where go ixA ixB = if ixA < ixB then do a :: Word8 <- readByteArray arr ixA b :: Word8 <- readByteArray arr ixB writeByteArray arr ixA b writeByteArray arr ixB a go (ixA + 1) (ixB - 1) else pure () c2w :: Char -> Word8 c2w = fromIntegral . ord shrinkMutableByteArray :: MutableByteArray s -> Int -> ST s () shrinkMutableByteArray (MutableByteArray arr) (I# sz) = primitive_ (shrinkMutableByteArray# arr sz) -- This is adapted from androider's code in https://stackoverflow.com/a/7097567 -- The checks for infinity and NaN have been removed. Note that this is a little -- inaccurate. This is very visible when encoding a number like 2.25, which -- is perfectly represented as an IEEE 754 floating point number but is goofed -- up by this function. -- If you modify this function, please take a took at the resulting core. -- It currently performs no boxing at all, and it would be nice to keep -- it that way. doubleDec# :: forall s. Double# -> MutableByteArray# s -> Int# -> State# s -> (# State# s, Int# #) {-# noinline doubleDec# #-} doubleDec# d# marr# off# s0 = unIntST s0 $ do let marr = MutableByteArray marr# let d0 = D# d# let off0 = I# off# if d0 == 0 then do writeByteArray marr off0 (c2w '0') pure (off0 + 1) else do let neg = d0 < 0 off1 <- if neg then do writeByteArray marr off0 (c2w '-') pure (off0 + 1) else pure off0 let d1 = abs d0 let mag0 = floor (logBase10 d1) :: Int let useExp = (mag0 >= 14 || (neg && mag0 >= 9) || mag0 <= (-9)) -- This straightforward adaptation of the C code is awkward -- in Haskell. Binding the triple where mag1 might not even -- get used is strange. let !(!d2,!mag1,!mag0A) = if useExp then let mag0' = if mag0 < 0 then mag0 - 1 else mag0 in (d1 / (10.0 ** fromIntegral @Int @Double mag0'), mag0', 0) else (d1,0,mag0) let mag0B = if mag0A < 1 then 0 else mag0A let goNum :: Double -> Int -> Int -> ST s Int goNum !dA0 !mag !offA0 = if (dA0 > doublePrecision || mag >= 0) then do let weight = 10.0 ** (fromIntegral @Int @Double mag) -- We should actually check weight with isinf here, -- but we do not. (dA1,offA1) <- if weight > 0 then do -- TODO: use a better floor function let digit = ((floor :: Double -> Int) (dA0 / weight)) let discard = fromIntegral @Int @Double digit * weight writeByteArray marr offA0 (fromIntegral @Int @Word8 (digit + ord '0')) pure (dA0 - discard,offA0 + 1) else pure (dA0,offA0) offA2 <- if mag == 0 && dA1 > 0 then do writeByteArray marr offA1 (c2w '.') pure (offA1 + 1) else pure offA1 goNum dA1 (mag - 1) offA2 else pure offA0 !off2 <- goNum d2 mag0B off1 off3 <- if useExp then do writeByteArray marr off2 (c2w 'e') !mag2 <- if mag1 > 0 then do writeByteArray marr (off2 + 1) (c2w '+') pure mag1 else do writeByteArray marr (off2 + 1) (c2w '-') pure (-mag1) let goMag !mag !off = if mag > 0 then do let (q,r) = quotRem mag 10 writeByteArray marr off (fromIntegral @Int @Word8 (ord '0' + r)) goMag q (off + 1) else pure off !off3 <- goMag mag2 (off2 + 2) reverseBytes marr (off2 + 2) (off3 - 1) pure off3 else pure off2 pure off3 doublePrecision :: Double doublePrecision = 0.00000000000001 unIntST :: State# s -> ST s Int -> (# State# s, Int# #) {-# inline unIntST #-} unIntST s0 (ST f) = case f s0 of (# s1, I# i #) -> (# s1, i #) -- This is slightly inaccurate. I think this can actually cause -- problems in some situations. The log10 function from C would -- be better. The inaccuracy here cause the logarithm to be slightly -- larger than it should be. There might actually be a simple way to -- fix this by just using recursion to compute it. We just floor the -- result anyway. Hmm... logBase10 :: Double -> Double logBase10 d = log d / 2.30258509299