{-# OPTIONS_GHC -Wall -fwarn-tabs #-}
{-# LANGUAGE BangPatterns #-}
----------------------------------------------------------------
--                                                    2024-04-11
-- |
-- Module      :  Data.ByteString.Lex.Internal
-- Copyright   :  Copyright (c) 2010--2024 wren gayle romano
-- License     :  BSD2
-- Maintainer  :  wren@cpan.org
-- Stability   :  provisional
-- Portability :  BangPatterns
--
-- Some functions we want to share across the other modules without
-- actually exposing them to the user.
----------------------------------------------------------------
module Data.ByteString.Lex.Internal
    (
    -- * Character-based bit-bashing
      isNotPeriod
    , isNotE
    , isDecimal
    , isDecimalZero
    , toDigit
    , addDigit
    -- * Integral logarithms
    , numDigits
    , numTwoPowerDigits
    , numDecimalDigits
    ) where

import Data.Word (Word8, Word64)
import Data.Bits (Bits(shiftR))

----------------------------------------------------------------
----------------------------------------------------------------
----- Character-based bit-bashing

{-# INLINE isNotPeriod #-}
isNotPeriod :: Word8 -> Bool
isNotPeriod :: Word8 -> Bool
isNotPeriod Word8
w = Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x2E

{-# INLINE isNotE #-}
isNotE :: Word8 -> Bool
isNotE :: Word8 -> Bool
isNotE Word8
w = Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x65 Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
/= Word8
0x45

{-# INLINE isDecimal #-}
isDecimal :: Word8 -> Bool
isDecimal :: Word8 -> Bool
isDecimal Word8
w = Word8
0x39 Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
w Bool -> Bool -> Bool
&& Word8
w Word8 -> Word8 -> Bool
forall a. Ord a => a -> a -> Bool
>= Word8
0x30

{-# INLINE isDecimalZero #-}
isDecimalZero :: Word8 -> Bool
isDecimalZero :: Word8 -> Bool
isDecimalZero Word8
w = Word8
w Word8 -> Word8 -> Bool
forall a. Eq a => a -> a -> Bool
== Word8
0x30

{-# INLINE toDigit #-}
toDigit :: (Integral a) => Word8 -> a
toDigit :: forall a. Integral a => Word8 -> a
toDigit Word8
w = Word8 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word8
w Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
- Word8
0x30)

{-# INLINE addDigit #-}
addDigit :: Int -> Word8 -> Int
addDigit :: Int -> Word8 -> Int
addDigit Int
n Word8
w = Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
* Int
10 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Word8 -> Int
forall a. Integral a => Word8 -> a
toDigit Word8
w

----------------------------------------------------------------
----- Integral logarithms

-- TODO: cf. integer-gmp:GHC.Integer.Logarithms made available in
-- version 0.3.0.0 (ships with GHC 7.2.1).
-- <http://haskell.org/ghc/docs/7.2.1/html/libraries/integer-gmp-0.3.0.0/GHC-Integer-Logarithms.html>


-- This implementation is derived from
-- <http://www.haskell.org/pipermail/haskell-cafe/2009-August/065854.html>
-- modified to use 'quot' instead of 'div', to ensure strictness,
-- and using more guard notation (but this last one's compiled
-- away). See @./bench/BenchNumDigits.hs@ for other implementation
-- choices.
--
-- | @numDigits b n@ computes the number of base-@b@ digits required
-- to represent the number @n@. N.B., this implementation is unsafe
-- and will throw errors if the base is @(<= 1)@, or if the number
-- is negative. If the base happens to be a power of 2, then see
-- 'numTwoPowerDigits' for a more efficient implementation.
--
-- We must be careful about the input types here. When using small
-- unsigned types or very large values, the repeated squaring can
-- overflow causing the function to loop. (E.g., the fourth squaring
-- of 10 overflows 32-bits (==1874919424) which is greater than the
-- third squaring. For 64-bit, the 5th squaring overflows, but it's
-- negative so will be caught.) Forcing the type to Integer ensures
-- correct behavior, but makes it substantially slower.

numDigits :: Integer -> Integer -> Int
{-# INLINE numDigits #-}
numDigits :: Integer -> Integer -> Int
numDigits !Integer
b0 !Integer
n0
    | Integer
b0 Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
<= Integer
1   = [Char] -> Int
forall a. HasCallStack => [Char] -> a
error ([Char]
_numDigits [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
_nonpositiveBase)
    | Integer
n0 Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
<  Integer
0   = [Char] -> Int
forall a. HasCallStack => [Char] -> a
error ([Char]
_numDigits [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
_negativeNumber)
    -- BUG: need to check n0 to be sure we won't overflow Int
    | Bool
otherwise = ND -> Int
finish (Integer -> Integer -> ND
ilog Integer
b0 Integer
n0)
    where
    finish :: ND -> Int
finish (ND Int
e Integer
_) = Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
e
    ilog :: Integer -> Integer -> ND
ilog !Integer
b !Integer
n
        | Integer
n Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
< Integer
b     = Int -> Integer -> ND
ND Int
0 Integer
n
        -- TODO(2024-04-11): Check core to see whether these @(2*)@
        -- ops are properly weakened to shifts.
        | Integer
r Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
< Integer
b     = Int -> Integer -> ND
ND (Int
2Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
e) Integer
r
        | Bool
otherwise = Int -> Integer -> ND
ND (Int
2Int -> Int -> Int
forall a. Num a => a -> a -> a
*Int
eInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Integer
r Integer -> Integer -> Integer
forall a. Integral a => a -> a -> a
`quot` Integer
b)
        where
        -- TODO(2024-04-11): Benchmark this lazy-pattern matching,
        -- vs using a strict pattern (and alas less guard-notation,
        -- to ensure we only evaluate it when needed).
        ND Int
e Integer
r = Integer -> Integer -> ND
ilog (Integer
bInteger -> Integer -> Integer
forall a. Num a => a -> a -> a
*Integer
b) Integer
n

-- TODO(2024-04-11): Benchmark this change in the implementation
-- (relative to using @(,)@ and @($!)@).  Also, need to re-run all
-- the benchmarks anyways, to see how things've changed on newer GHC.
data ND = ND {-#UNPACK#-}!Int !Integer


-- | Compute the number of base-@2^p@ digits required to represent a
-- number @n@. N.B., this implementation is unsafe and will throw
-- errors if the base power is non-positive, or if the number is
-- negative. For bases which are not a power of 2, see 'numDigits'
-- for a more general implementation.
numTwoPowerDigits :: (Integral a, Bits a) => Int -> a -> Int
{-# INLINE numTwoPowerDigits #-}
numTwoPowerDigits :: forall a. (Integral a, Bits a) => Int -> a -> Int
numTwoPowerDigits !Int
p !a
n0
    | Int
p  Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
0   = [Char] -> Int
forall a. HasCallStack => [Char] -> a
error ([Char]
_numTwoPowerDigits [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
_nonpositiveBase)
    | a
n0 a -> a -> Bool
forall a. Ord a => a -> a -> Bool
<  a
0   = [Char] -> Int
forall a. HasCallStack => [Char] -> a
error ([Char]
_numTwoPowerDigits [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
_negativeNumber)
    | a
n0 a -> a -> Bool
forall a. Eq a => a -> a -> Bool
== a
0   = Int
1
    -- BUG: need to check n0 to be sure we won't overflow Int
    | Bool
otherwise = Int -> a -> Int
forall {t} {t}. (Ord t, Num t, Num t, Bits t) => t -> t -> t
go Int
0 a
n0
    where
    go :: t -> t -> t
go !t
d !t
n
        | t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
> t
0     = t -> t -> t
go (t
dt -> t -> t
forall a. Num a => a -> a -> a
+t
1) (t
n t -> Int -> t
forall a. Bits a => a -> Int -> a
`shiftR` Int
p)
        | Bool
otherwise = t
d


-- This implementation is from:
-- <http://www.serpentine.com/blog/2013/03/20/whats-good-for-c-is-good-for-haskell/>
--
-- | Compute the number of base-@10@ digits required to represent
-- a number @n@. N.B., this implementation is unsafe and will throw
-- errors if the number is negative.
numDecimalDigits :: (Integral a) => a -> Int
{-# INLINE numDecimalDigits #-}
numDecimalDigits :: forall a. Integral a => a -> Int
numDecimalDigits a
n0
    | a
n0 a -> a -> Bool
forall a. Ord a => a -> a -> Bool
< a
0     = [Char] -> Int
forall a. HasCallStack => [Char] -> a
error ([Char]
_numDecimalDigits [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
_negativeNumber)
    -- Unfortunately this causes significant (1.2x) slowdown since
    -- GHC can't see it will always fail for types other than Integer...
    -- TODO(2024-04-11): See if we can't do more static-analysis
    -- code to optimize this path (a~la my C++ safe comparisons)
    | a
n0 a -> a -> Bool
forall a. Ord a => a -> a -> Bool
> a
limit = Integer -> Integer -> Int
numDigits Integer
10 (a -> Integer
forall a. Integral a => a -> Integer
toInteger a
n0)
    | Bool
otherwise  = Int -> Word64 -> Int
forall {t} {t}. (Num t, Integral t) => t -> t -> t
go Int
1 (a -> Word64
forall a b. (Integral a, Num b) => a -> b
fromIntegral a
n0 :: Word64)
    where
    limit :: a
limit = Word64 -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word64
forall a. Bounded a => a
maxBound :: Word64)

    fin :: a -> a -> a
fin a
n a
bound = if a
n a -> a -> Bool
forall a. Ord a => a -> a -> Bool
>= a
bound then a
1 else a
0
    go :: t -> t -> t
go !t
k !t
n
        | t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
< t
10        = t
k
        | t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
< t
100       = t
k t -> t -> t
forall a. Num a => a -> a -> a
+ t
1
        | t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
< t
1000      = t
k t -> t -> t
forall a. Num a => a -> a -> a
+ t
2
        | t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
< t
1000000000000 =
            t
k t -> t -> t
forall a. Num a => a -> a -> a
+ if t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
< t
100000000
                then if t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
< t
1000000
                    then if t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
< t
10000
                        then t
3
                        else t
4 t -> t -> t
forall a. Num a => a -> a -> a
+ t -> t -> t
forall {a} {a}. (Ord a, Num a) => a -> a -> a
fin t
n t
100000
                    else t
6 t -> t -> t
forall a. Num a => a -> a -> a
+ t -> t -> t
forall {a} {a}. (Ord a, Num a) => a -> a -> a
fin t
n t
10000000
                else if t
n t -> t -> Bool
forall a. Ord a => a -> a -> Bool
< t
10000000000
                    then t
8 t -> t -> t
forall a. Num a => a -> a -> a
+ t -> t -> t
forall {a} {a}. (Ord a, Num a) => a -> a -> a
fin t
n t
1000000000
                    else t
10 t -> t -> t
forall a. Num a => a -> a -> a
+ t -> t -> t
forall {a} {a}. (Ord a, Num a) => a -> a -> a
fin t
n t
100000000000
        | Bool
otherwise = t -> t -> t
go (t
k t -> t -> t
forall a. Num a => a -> a -> a
+ t
12) (t
n t -> t -> t
forall a. Integral a => a -> a -> a
`quot` t
1000000000000)


_numDigits :: String
_numDigits :: [Char]
_numDigits = [Char]
"numDigits"
{-# NOINLINE _numDigits #-}

_numTwoPowerDigits :: String
_numTwoPowerDigits :: [Char]
_numTwoPowerDigits = [Char]
"numTwoPowerDigits"
{-# NOINLINE _numTwoPowerDigits #-}

_numDecimalDigits :: String
_numDecimalDigits :: [Char]
_numDecimalDigits = [Char]
"numDecimalDigits"
{-# NOINLINE _numDecimalDigits #-}

_nonpositiveBase :: String
_nonpositiveBase :: [Char]
_nonpositiveBase = [Char]
": base must be greater than one"
{-# NOINLINE _nonpositiveBase #-}

_negativeNumber :: String
_negativeNumber :: [Char]
_negativeNumber = [Char]
": number must be non-negative"
{-# NOINLINE _negativeNumber #-}

----------------------------------------------------------------
----------------------------------------------------------- fin.