{-# LANGUAGE BangPatterns #-}

{-|
Module      : Std.Data.Parser.Numeric
Description : Textual numeric parsers.
Copyright   : (c) Dong Han, 2017-2019
License     : BSD
Maintainer  : winterland1989@gmail.com
Stability   : experimental
Portability : non-portable

Textual numeric parsers.

-}

module Std.Data.Parser.Numeric
  ( -- * decimal
    uint, int
    -- * hex
  , hex
    -- * fractional
  , rational
  , float, double
  , scientific
  , scientifically
  ) where

import           Control.Applicative
import           Control.Monad
import           Data.Bits
import           Data.Int
import qualified Data.Primitive.PrimArray as A
import qualified Data.Scientific          as Sci
import           Data.Word
import           Data.Word8               (isDigit, isHexDigit)
import           Foreign.Ptr              (IntPtr)
import           Std.Data.Parser.Base     (Parser)
import qualified Std.Data.Parser.Base     as P
import qualified Std.Data.Vector.Base     as V
import qualified Std.Data.Vector.Extra    as V

minus, plus, littleE, bigE, dot :: Word8
minus    = 45
plus     = 43
littleE = 101
bigE    = 69
dot      = 46

-- | Parse and decode an unsigned hex number.  The hex digits
-- @\'a\'@ through @\'f\'@ may be upper or lower case.
--
-- This parser does not accept a leading @\"0x\"@ string, and consider
-- sign bit part of the binary hex nibbles, i.e.
-- 'parse hex "0xFF" == Right (-1 :: Int8)'
--
hex :: (Integral a, Bits a) => Parser a
{-# INLINE hex #-}
{-# SPECIALIZE INLINE hex :: Parser Int    #-}
{-# SPECIALIZE INLINE hex :: Parser Int64  #-}
{-# SPECIALIZE INLINE hex :: Parser Int32  #-}
{-# SPECIALIZE INLINE hex :: Parser Int16  #-}
{-# SPECIALIZE INLINE hex :: Parser Int8   #-}
{-# SPECIALIZE INLINE hex :: Parser Word   #-}
{-# SPECIALIZE INLINE hex :: Parser Word64 #-}
{-# SPECIALIZE INLINE hex :: Parser Word32 #-}
{-# SPECIALIZE INLINE hex :: Parser Word16 #-}
{-# SPECIALIZE INLINE hex :: Parser Word8  #-}
{-# SPECIALIZE INLINE hex :: Parser IntPtr #-}
hex = do
    (V.Vec arr s l) <- P.takeWhile1 isHexDigit
    return $! hexLoop arr s (l-1) 0
  where
    hexLoop arr !i !j !acc
        | j == 0 = acc .|. w2iHex (A.indexPrimArray arr i)
        | otherwise =
            let acc' = acc .|. w2iHex (A.indexPrimArray arr i) `unsafeShiftL` (j*4)
            in hexLoop arr (i+1) (j-1) acc'

w2iHex :: (Integral a) => Word8 -> a
{-# INLINE w2iHex #-}
w2iHex w
    | w <= 57              = fromIntegral w - 48
    | 65 <= w && w <= 70   = fromIntegral w - 55
    | 97 <= w && w <= 102  = fromIntegral w - 87


-- | Parse and decode an unsigned decimal number.
uint :: Integral a => Parser a
{-# INLINE uint #-}
{-# SPECIALIZE INLINE uint :: Parser Int    #-}
{-# SPECIALIZE INLINE uint :: Parser Int64  #-}
{-# SPECIALIZE INLINE uint :: Parser Int32  #-}
{-# SPECIALIZE INLINE uint :: Parser Int16  #-}
{-# SPECIALIZE INLINE uint :: Parser Int8   #-}
{-# SPECIALIZE INLINE uint :: Parser Word   #-}
{-# SPECIALIZE INLINE uint :: Parser Word64 #-}
{-# SPECIALIZE INLINE uint :: Parser Word32 #-}
{-# SPECIALIZE INLINE uint :: Parser Word16 #-}
{-# SPECIALIZE INLINE uint :: Parser Word8  #-}
uint = do
    (V.Vec arr s l) <- P.takeWhile1 isDigit
    return $! decLoop arr s (l-1) 0
  where
    decLoop arr !i !j !acc
        | j == 0 = acc*10 + w2iDec (A.indexPrimArray arr i)
        | otherwise =
            let acc' = acc*10 + w2iDec (A.indexPrimArray arr i)
            in decLoop arr (i+1) (j-1) acc'

w2iDec :: (Integral a) => Word8 -> a
{-# INLINE w2iDec #-}
w2iDec w = fromIntegral w - 48

-- | Parse a decimal number with an optional leading @\'+\'@ or @\'-\'@ sign
-- character.
int :: Integral a => Parser a
{-# INLINE int #-}
{-# SPECIALIZE INLINE int :: Parser Int    #-}
{-# SPECIALIZE INLINE int :: Parser Int64  #-}
{-# SPECIALIZE INLINE int :: Parser Int32  #-}
{-# SPECIALIZE INLINE int :: Parser Int16  #-}
{-# SPECIALIZE INLINE int :: Parser Int8   #-}
{-# SPECIALIZE INLINE int :: Parser Word   #-}
{-# SPECIALIZE INLINE int :: Parser Word64 #-}
{-# SPECIALIZE INLINE int :: Parser Word32 #-}
{-# SPECIALIZE INLINE int :: Parser Word16 #-}
{-# SPECIALIZE INLINE int :: Parser Word8  #-}
int = do
    w <- P.peek
    if w == minus
        then P.skip 1 >> negate <$> uint
        else if w == plus then P.skip 1 >> uint else uint

-- | Parse a rational number.
--
-- The syntax accepted by this parser is the same as for 'double'.
--
-- /Note/: this parser is not safe for use with inputs from untrusted
-- sources.  An input with a suitably large exponent such as
-- @"1e1000000000"@ will cause a huge 'Integer' to be allocated,
-- resulting in what is effectively a denial-of-service attack.
--
-- In most cases, it is better to use 'double' or 'scientific'
-- instead.
--
rational :: Fractional a => Parser a
{-# INLINE rational #-}
rational = scientifically realToFrac

-- | Parse a rational number and round to 'Double'.
--
-- This parser accepts an optional leading sign character, followed by
-- at least one decimal digit.  The syntax similar to that accepted by
-- the 'read' function, with the exception that a trailing @\'.\'@ or
-- @\'e\'@ /not/ followed by a number is not consumed.
--
-- Examples with behaviour identical to 'read':
--
-- >parseOnly double "3"     == Right ("",1,3.0)
-- >parseOnly double "3.1"   == Right ("",3,3.1)
-- >parseOnly double "3e4"   == Right ("",3,30000.0)
-- >parseOnly double "3.1e4" == Right ("",5,31000.0)
--
-- >parseOnly double ".3"    == Left (".3",0,"takeWhile1")
-- >parseOnly double "e3"    == Left ("e3",0,"takeWhile1")
--
-- Examples of differences from 'read':
--
-- >parseOnly double "3.foo" == Right (".foo",1,3.0)
-- >parseOnly double "3e"    == Right ("e",1,3.0)
--
-- This function does not accept string representations of \"NaN\" or
-- \"Infinity\".
--
double :: Parser Double
{-# INLINE double #-}
double = scientifically Sci.toRealFloat

-- | Parse a rational number and round to 'Float'.
--
-- Single precision version of 'double'.
float :: Parser Float
{-# INLINE float #-}
float = scientifically Sci.toRealFloat

-- | Parse a scientific number.
--
-- The syntax accepted by this parser is the same as for 'double'.
--
scientific :: Parser Sci.Scientific
{-# INLINE scientific #-}
scientific = scientifically id

-- | Parse a scientific number and convert to result using a user supply function.
--
-- The syntax accepted by this parser is the same as for 'double'.
--
scientifically :: (Sci.Scientific -> a) -> Parser a
{-# INLINE scientifically #-}
scientifically h = do
    sign <- P.peek
    when (sign == plus || sign == minus) (P.skip 1)
    intPart <- uint
    sci <- (do (V.Vec arr s l) <- P.word8 dot >> P.takeWhile1 isDigit
               let intPart' = intPart * (10 ^ l)
                   fracPart = decLoop arr s (l-1) 0
               parseE (intPart' + fracPart) l
           ) <|> (parseE intPart 0)

    if sign /= minus then return $! h sci else return $! h (negate sci)
  where
    {-# INLINE parseE #-}
    parseE c e =
        (do _ <- P.satisfy (\w -> w ==  littleE || w == bigE)
            (Sci.scientific c . (subtract e) <$> int)) <|> return (Sci.scientific c (negate e))

    decLoop arr !i !j !acc
        | j == 0 = acc*10 + w2iDec (A.indexPrimArray arr i)
        | otherwise =
            let acc' = acc*10 + w2iDec (A.indexPrimArray arr i)
            in decLoop arr (i+1) (j-1) acc'