{- |
Module      :  Text/ParserCombinators/Parsec/Number.hs
Description :  portable number parsers
Copyright   :  (c) C. Maeder 2011
License     :  BSD

Maintainer  :  chr.maeder@web.de
Stability   :  provisional
Portability :  portable

adjusted and portable number parsers stolen from
Text.ParserCombinators.Parsec.Token

The basic top-level number parsers are 'decimal', 'nat', 'int', 'fractional',
'decimalFract', 'natFract', 'floating', 'decimalFloat', 'natFloat'.

`natFloat` parses numeric literals as defined for Haskell. All numbers are
unsigned, i.e. non-negative. Leading zeros are allowed. At least a single
digit is required. A decimal point must be preceded and followed by at least
one digit.

A result type @(Either Integer Double)@ can be converted to a final @Double@
using @(either fromIntegral id)@.

The parser 'nat', 'natFract' and 'natFloat' parse hexadecimal and octal
 integrals (beginning with @0x@, @0X@, @0o@ or @0O@) that are disallowed when
using 'decimal', 'decimalFract' and 'decimalFloat'.

The parsers 'decimalFract' and 'natFract' only allow a decimal point, whereas
'decimalFloat' and 'natFloat' also allow the exponent notation using @e@ or
@E@.

The parser 'fractional' requires a decimal point between at least two
digits and 'floating' requires either a decimal point or the exponent
notation using @e@ or @E@. (Both parsers do not return integral values and do
not support hexadecimal or octal values).

Signed numbers can be parsed using \"'Control.Monad.ap' 'sign'\" as is done
for the 'int' parser.

Note that all top-level parsers succeed on a string like \"@1.0e-100@\", but
only the floating point parsers consume the whole string. The fractional
parsers stop before the exponent and the integral parsers before the decimal
point. You may which to check for the end of a string using
'Text.ParserCombinators.Parsec.eof', i.e. @liftM2 const nat eof@.

-}

module Text.ParserCombinators.Parsec.Number where

import Text.ParserCombinators.Parsec
import Data.Char (digitToInt)
import Control.Monad (liftM, ap)

-- * floats

-- | parse a decimal unsigned floating point number containing a dot, e or E
floating :: Floating f => CharParser st f
floating = do
  n <- decimal
  fractExponent (n :: Integer)

{- | same as 'floating' but returns a non-negative integral wrapped by Left if
a fractional part and exponent is missing -}
decimalFloat :: (Integral i, Floating f) => CharParser st (Either i f)
decimalFloat = do
  n <- decimal
  option (Left n) (fractFloat n)

-- | parse hexadecimal, octal or decimal integrals or 'floating'
natFloat :: (Integral i, Floating f) => CharParser st (Either i f)
natFloat = (char '0' >> zeroNumFloat) <|> decimalFloat

-- ** float parts

{- | parse any hexadecimal, octal, decimal or floating point number following
a zero -}
zeroNumFloat :: (Integral i, Floating f) => CharParser st (Either i f)
zeroNumFloat =
  liftM Left (hexadecimal <|> octal)
  <|> decimalFloat
  <|> fractFloat 0
  <|> return (Left 0)

-- | same as 'fractExponent', returns a f wrapped by Right
fractFloat :: (Integral i, Floating f) => i -> CharParser st (Either i f)
fractFloat = liftM Right . fractExponent

-- | parse a floating point number given the number before a dot, e or E
fractExponent :: (Integral i, Floating f) => i -> CharParser st f
fractExponent i = case fromIntegral i of
  d -> ap (liftM (flip id . (d +)) fraction) (option id exponentFactor)
    <|> liftM ($ d) exponentFactor

-- | parse a floating point exponent starting with e or E
exponentFactor :: Floating f => CharParser st (f -> f)
exponentFactor = oneOf "eE"
  >> liftM (flip (*) . exponentValue) (ap sign (decimal <?> "exponent"))
  <?> "exponent"

{- | compute the factor given by the number following e or E. This
implementation uses @**@ rather than @^@ for more efficiency for large
integers. -}
exponentValue :: Floating f => Integer -> f
exponentValue = (10 **) . fromIntegral

-- * fractional numbers (with just a decimal point between digits)

-- | parse a fractional number containing a decimal dot
fractional :: Fractional f => CharParser st f
fractional = do
  n <- decimal
  fractFract (n :: Integer)

{- | same as 'fractional' but returns a non-negative integral wrapped by Left if
a fractional part is missing -}
decimalFract :: (Integral i, Fractional f) => CharParser st (Either i f)
decimalFract = do
  n <- decimal
  option (Left n) $ liftM Right $ fractFract n

-- | parse hexadecimal, octal or decimal integrals or 'fractional'
natFract :: (Integral i, Fractional f) => CharParser st (Either i f)
natFract = (char '0' >> zeroNumFract) <|> decimalFract

{- | parse any hexadecimal, octal, decimal or fractional number following
a zero -}
zeroNumFract :: (Integral i, Fractional f) => CharParser st (Either i f)
zeroNumFract =
  liftM Left (hexadecimal <|> octal)
  <|> decimalFract
  <|> liftM Right (fractFract (0 :: Integer))
  <|> return (Left 0)

-- ** fractional parts

-- | parse a fractional number given the number before the dot
fractFract :: (Integral i, Fractional f) => i -> CharParser st f
fractFract i = liftM (fromIntegral i +) fraction

-- | parse a dot followed by digits as fractional part
fraction :: Fractional f => CharParser st f
fraction =
  char '.' >> liftM fractionValue (many1 digit <?> "fraction") <?> "fraction"

-- | compute the fraction given by a sequence of digits following the dot
fractionValue :: Fractional f => String -> f
fractionValue = foldr (\ d -> (/ 10) . (+ fromIntegral (digitToInt d))) 0

-- * integers and naturals

{- | parse an optional 'sign' immediately followed by a 'nat'. Note, that in
Daan Leijen's code the sign was wrapped as lexeme in order to skip comments
and spaces in between. -}
int :: Integral i => CharParser st i
int = ap sign nat

-- | parse an optional plus or minus sign, returning 'negate' or 'id'
sign :: Num a => CharParser st (a -> a)
sign = (char '-' >> return negate) <|> (optional (char '+') >> return id)

{- | parse plain non-negative decimal numbers given by a non-empty sequence
of digits -}
decimal :: Integral i => CharParser st i
decimal = number 10 digit

-- | parse non-negative hexadecimal, octal or decimal numbers
nat :: Integral i => CharParser st i
nat = zeroNumber <|> decimal

-- ** natural parts

-- | parse a 'nat' syntactically starting with a zero
zeroNumber :: Integral i => CharParser st i
zeroNumber =
  char '0' >> (hexadecimal <|> octal <|> decimal <|> return 0) <?> ""

-- | parse a hexadecimal number preceded by an x or X character
hexadecimal :: Integral i => CharParser st i
hexadecimal = oneOf "xX" >> number 16 hexDigit

-- | parse an octal number preceded by an o or O character
octal :: Integral i => CharParser st i
octal = oneOf "oO" >> number 8 octDigit

-- | parse a non-negative number given a base and a parser for the digits
number :: Integral i => Int -> GenParser tok st Char -> GenParser tok st i
number base baseDigit = do
  n <- liftM (numberValue base) (many1 baseDigit)
  seq n (return n)

-- | compute the value from a string of digits using a base
numberValue :: Integral i => Int -> String -> i
numberValue base =
  foldl (\ x -> (fromIntegral base * x +) . fromIntegral . digitToInt) 0