{-# language BangPatterns #-}
{-# language BinaryLiterals #-}
{-# language TypeApplications #-}

-- | Parse numbers that have been encoded with <https://en.wikipedia.org/wiki/LEB128 LEB-128>.
-- LEB-128 allows arbitrarily large numbers to be encoded. Parsers in this
-- module will fail if the number they attempt to parse is outside the
-- range of what their target type can handle. The parsers for signed
-- numbers assume that the numbers have been
-- <https://developers.google.com/protocol-buffers/docs/encoding zigzig encoded>.
module Data.Bytes.Parser.Leb128
  ( -- * Unsigned
    word16
  , word32
  , word64
    -- * Signed (Zig-zag)
  , int16
  , int32
  , int64
  ) where

import Data.Bits (testBit,(.&.),unsafeShiftR,xor,complement)
import Data.Bits (unsafeShiftL,(.|.))
import Data.Bytes.Parser (Parser)
import Data.Int (Int16,Int32,Int64)
import Data.Word (Word8,Word16,Word32,Word64)

import qualified Data.Bytes.Parser as P

-- | Parse a LEB-128-encoded number. If the number is larger
-- than @0xFFFF@, fails with the provided error.
word16 :: e -> Parser e s Word16
word16 :: e -> Parser e s Word16
word16 e
e = do
  Word64
w <- e -> Int -> Word64 -> Int -> Parser e s Word64
forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
16 Word64
0 Int
0
  Word16 -> Parser e s Word16
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Word64 -> Word16
forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word64 @Word16 Word64
w)

-- | Parse a LEB-128-encoded number. If the number is larger
-- than @0xFFFFFFFF@, fails with the provided error.
word32 :: e -> Parser e s Word32
word32 :: e -> Parser e s Word32
word32 e
e = do
  Word64
w <- e -> Int -> Word64 -> Int -> Parser e s Word64
forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
32 Word64
0 Int
0
  Word32 -> Parser e s Word32
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Word64 -> Word32
forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word64 @Word32 Word64
w)

-- | Parse a LEB-128-encoded number. If the number is larger
-- than @0xFFFFFFFFFFFFFFFF@, fails with the provided error.
word64 :: e -> Parser e s Word64
word64 :: e -> Parser e s Word64
word64 e
e = e -> Int -> Word64 -> Int -> Parser e s Word64
forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
64 Word64
0 Int
0

-- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
-- number is outside the range @[-32768,32767]@, this fails with
-- the provided error.
int16 :: e -> Parser e s Int16
int16 :: e -> Parser e s Int16
int16 = (Word16 -> Int16) -> Parser e s Word16 -> Parser e s Int16
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word16 -> Int16
zigzagDecode16 (Parser e s Word16 -> Parser e s Int16)
-> (e -> Parser e s Word16) -> e -> Parser e s Int16
forall b c a. (b -> c) -> (a -> b) -> a -> c
. e -> Parser e s Word16
forall e s. e -> Parser e s Word16
word16

-- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
-- number is outside the range @[-2147483648,2147483647]@, this
-- fails with the provided error.
int32 :: e -> Parser e s Int32
int32 :: e -> Parser e s Int32
int32 = (Word32 -> Int32) -> Parser e s Word32 -> Parser e s Int32
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word32 -> Int32
zigzagDecode32 (Parser e s Word32 -> Parser e s Int32)
-> (e -> Parser e s Word32) -> e -> Parser e s Int32
forall b c a. (b -> c) -> (a -> b) -> a -> c
. e -> Parser e s Word32
forall e s. e -> Parser e s Word32
word32

-- | Parse a LEB-128-zigzag-encoded signed number. If the encoded
-- number is outside the range @[-9223372036854775808,9223372036854775807]@,
-- this fails with the provided error.
int64 :: e -> Parser e s Int64
int64 :: e -> Parser e s Int64
int64 = (Word64 -> Int64) -> Parser e s Word64 -> Parser e s Int64
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Word64 -> Int64
zigzagDecode64 (Parser e s Word64 -> Parser e s Int64)
-> (e -> Parser e s Word64) -> e -> Parser e s Int64
forall b c a. (b -> c) -> (a -> b) -> a -> c
. e -> Parser e s Word64
forall e s. e -> Parser e s Word64
word64

-- What these parameters are:
--
-- bitLimit: number of bits in the target word size
-- accShift: shift amount, increases by 7 at a time
stepBoundedWord :: e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord :: e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e !Int
bitLimit !Word64
acc0 !Int
accShift = do
  Word8
raw <- e -> Parser e s Word8
forall e s. e -> Parser e s Word8
P.any e
e
  let number :: Word8
number = Word8
raw Word8 -> Word8 -> Word8
forall a. Bits a => a -> a -> a
.&. Word8
0x7F
      acc1 :: Word64
acc1 = Word64
acc0 Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.|.
        Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
unsafeShiftL (Word8 -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word8 @Word64 Word8
number) Int
accShift
      accShift' :: Int
accShift' = Int
accShift Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
7
  if Int
accShift' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
bitLimit
    then if Word8 -> Int -> Bool
forall a. Bits a => a -> Int -> Bool
testBit Word8
raw Int
7
      then e -> Int -> Word64 -> Int -> Parser e s Word64
forall e s. e -> Int -> Word64 -> Int -> Parser e s Word64
stepBoundedWord e
e Int
bitLimit Word64
acc1 Int
accShift'
      else Word64 -> Parser e s Word64
forall (f :: * -> *) a. Applicative f => a -> f a
pure Word64
acc1
    else if Word8 -> Word
forall a b. (Integral a, Num b) => a -> b
fromIntegral @Word8 @Word Word8
raw Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Int -> Word
twoExp (Int
bitLimit Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
accShift)
      then Word64 -> Parser e s Word64
forall (f :: * -> *) a. Applicative f => a -> f a
pure Word64
acc1 -- TODO: no need to mask upper bit in number
      else e -> Parser e s Word64
forall e s a. e -> Parser e s a
P.fail e
e

twoExp :: Int -> Word
twoExp :: Int -> Word
twoExp Int
x = Word -> Int -> Word
forall a. Bits a => a -> Int -> a
unsafeShiftL Word
1 Int
x

-- Zigzag decode strategy taken from https://stackoverflow.com/a/2211086/1405768
-- The accepted answer is a little bit, so an answer further down was used:
--
-- > zigzag_decode(value) = ( value >> 1 ) ^ ( ~( value & 1 ) + 1 )
zigzagDecode16 :: Word16 -> Int16
zigzagDecode16 :: Word16 -> Int16
zigzagDecode16 Word16
n =
  Word16 -> Int16
forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Word16 -> Int -> Word16
forall a. Bits a => a -> Int -> a
unsafeShiftR Word16
n Int
1) Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
`xor` (Word16 -> Word16
forall a. Bits a => a -> a
complement (Word16
n Word16 -> Word16 -> Word16
forall a. Bits a => a -> a -> a
.&. Word16
1) Word16 -> Word16 -> Word16
forall a. Num a => a -> a -> a
+ Word16
1))

zigzagDecode32 :: Word32 -> Int32
zigzagDecode32 :: Word32 -> Int32
zigzagDecode32 Word32
n =
  Word32 -> Int32
forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Word32 -> Int -> Word32
forall a. Bits a => a -> Int -> a
unsafeShiftR Word32
n Int
1) Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
`xor` (Word32 -> Word32
forall a. Bits a => a -> a
complement (Word32
n Word32 -> Word32 -> Word32
forall a. Bits a => a -> a -> a
.&. Word32
1) Word32 -> Word32 -> Word32
forall a. Num a => a -> a -> a
+ Word32
1))

zigzagDecode64 :: Word64 -> Int64
zigzagDecode64 :: Word64 -> Int64
zigzagDecode64 Word64
n =
  Word64 -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral ((Word64 -> Int -> Word64
forall a. Bits a => a -> Int -> a
unsafeShiftR Word64
n Int
1) Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
`xor` (Word64 -> Word64
forall a. Bits a => a -> a
complement (Word64
n Word64 -> Word64 -> Word64
forall a. Bits a => a -> a -> a
.&. Word64
1) Word64 -> Word64 -> Word64
forall a. Num a => a -> a -> a
+ Word64
1))