Text/Parsec/Number.hs

{-# LANGUAGE FlexibleContexts #-}
{- |
Module      :  Text/Parsec/Number.hs
Description :  portable number parsers
Copyright   :  (c) C. Maeder 2013
License     :  BSD

Maintainer  :  chr.maeder@web.de
Stability   :  provisional
Portability :  non-portable (FlexibleContexts)

a copy of Text.ParserCombinators.Parsec.Number from the parsec-number
package adjusted for parsec3.


The basic top-level number parsers are 'decimal', 'nat', 'int', 'fractional',
'decimalFract', 'natFract', 'floating', 'decimalFloat', 'natFloat'.

`natFloat` parses numeric literals as defined for Haskell. All numbers are
unsigned, i.e. non-negative. Leading zeros are allowed. At least a single
digit is required. A decimal point must be preceded and followed by at least
one digit.

A result type @(Either Integer Double)@ can be converted to a final @Double@
using @(either fromInteger id)@ as is done for the parsers 'fractional2' and
'floating2'.

The parser 'nat', 'natFract' and 'natFloat' parse hexadecimal and octal
 integrals (beginning with @0x@, @0X@, @0o@ or @0O@) that are disallowed when
using 'decimal', 'decimalFract' and 'decimalFloat'.

The parsers 'decimalFract' and 'natFract' only allow a decimal point, whereas
'decimalFloat' and 'natFloat' also allow the exponent notation using @e@ or
@E@.

The parser 'fractional' requires a decimal point between at least two
digits and 'floating' requires either a decimal point or the exponent
notation using @e@ or @E@. (Both parsers do not return integral values and do
not support hexadecimal or octal values).

Signed numbers can be parsed using \"'Control.Monad.ap' 'sign'\" as is done
for the 'int' parser.

A couple of parsers have been added that take a @Bool@ argument, where @False@
does not require any digit following the decimal dot. The parsers
'fractional3' and 'floating3' allow even to start a number with the decimal
dot. Also parsers 'hexFract' and 'hexFloat' for hexadecimal fractions and
floats have been added.

Note that most top-level parsers succeed on a string like \"@1.0e-100@\", but
only the floating point parsers consume the whole string. The fractional
parsers stop before the exponent and the integral parsers before the decimal
point. You may wish to check for the end of a string using
'Text.ParserCombinators.Parsec.eof', i.e. \"@liftM2 const nat eof@\".

The returned values may be inaccurate. 'Int' may overflow. Fractional numbers
should be accurate as only one division is performed. Floating point numbers
with decimal exponents may be inaccurate due to using '**'. Rational numbers
are needed for correct conversions, but large positive or negative exponents
may be a problem and the class `RealFloat` is needed to check for minimal and
maximal exponents.

-}

module Text.Parsec.Number where

import Text.Parsec
import Data.Char (digitToInt)
import Control.Monad (liftM, ap)

-- * floats

-- | parse a decimal unsigned floating point number containing a dot, e or E
floating :: (Floating f, Stream s m Char) => ParsecT s u m f
floating = do
  n <- decimal
  fractExponent n

-- | parse a floating point number possibly containing a decimal dot, e or E
floating2 :: (Floating f, Stream s m Char) => Bool -> ParsecT s u m f
floating2 = liftM (either fromInteger id) . decFloat

{- | parse a floating point number possibly starting with a decimal dot.
Note, that a single decimal point or a number starting with @.E@ is illegal.
-}
floating3 :: (Floating f, Stream s m Char) => Bool -> ParsecT s u m f
floating3 b = genFractAndExp 0 (fraction True) exponentFactor <|> floating2 b

{- | same as 'floating' but returns a non-negative integral wrapped by Left if
a fractional part and exponent is missing -}
decimalFloat :: (Integral i, Floating f, Stream s m Char)
  => ParsecT s u m (Either i f)
decimalFloat = decFloat True

{- | same as 'floating' but returns a non-negative integral wrapped by Left if
a fractional part and exponent is missing -}
decFloat :: (Integral i, Floating f, Stream s m Char)
  => Bool -> ParsecT s u m (Either i f)
decFloat b = do
  n <- decimal
  option (Left n) $ liftM Right $ fractExp (toInteger n) b

-- | parse a hexadecimal floating point number
hexFloat :: (Integral i, Floating f, Stream s m Char)
  => Bool -> ParsecT s u m (Either i f)
hexFloat b = do
  n <- hexnum
  option (Left n) $ liftM Right $ hexFractExp (toInteger n) b

-- | parse hexadecimal, octal or decimal integrals or 'floating'
natFloat :: (Integral i, Floating f, Stream s m Char)
  => ParsecT s u m (Either i f)
natFloat = (char '0' >> zeroNumFloat) <|> decimalFloat

-- ** float parts

{- | parse any hexadecimal, octal, decimal or floating point number following
a zero -}
zeroNumFloat :: (Integral i, Floating f, Stream s m Char)
  => ParsecT s u m (Either i f)
zeroNumFloat =
  liftM Left hexOrOct
  <|> decimalFloat
  <|> liftM Right (fractExponent 0)
  <|> return (Left 0)

-- | parse a floating point number given the number before a dot, e or E
fractExponent :: (Floating f, Stream s m Char) => Integer -> ParsecT s u m f
fractExponent i = fractExp i True

-- | parse a hex floating point number given the number before a dot or p
hexFractExp :: (Floating f, Stream s m Char) => Integer -> Bool
  -> ParsecT s u m f
hexFractExp i b = genFractExp i (hexFraction b) hexExponentFactor

-- | parse a floating point number given the number before a dot, e or E
fractExp :: (Floating f, Stream s m Char) => Integer -> Bool
  -> ParsecT s u m f
fractExp i b = genFractExp i (fraction b) exponentFactor

{- | parse a floating point number given the number before the fraction and
exponent -}
genFractExp :: (Floating f, Stream s m Char) => Integer -> ParsecT s u m f
  -> ParsecT s u m (f -> f) -> ParsecT s u m f
genFractExp i frac expo = case fromInteger i of
  f -> genFractAndExp f frac expo <|> liftM ($ f) expo

{- | parse a floating point number given the number before the fraction and
exponent that must follow the fraction -}
genFractAndExp :: (Floating f, Stream s m Char) => f -> ParsecT s u m f
  -> ParsecT s u m (f -> f) -> ParsecT s u m f
genFractAndExp f frac = ap (liftM (flip id . (f +)) frac) . option id

-- | parse a floating point exponent starting with e or E
exponentFactor :: (Floating f, Stream s m Char) => ParsecT s u m (f -> f)
exponentFactor = oneOf "eE" >> extExponentFactor 10 <?> "exponent"

-- | pare a hexadecimal floating point starting with p (IEEE 754)
hexExponentFactor :: (Floating f, Stream s m Char) => ParsecT s u m (f -> f)
hexExponentFactor = char 'p' >> extExponentFactor 2 <?> "hex-exponent"

{- | parse a signed decimal and compute the exponent factor given a base.
For hexadecimal exponential notation (IEEE 754) the base is 2 and the
leading character a p. -}
extExponentFactor :: (Floating f, Stream s m Char)
  => Int -> ParsecT s u m (f -> f)
extExponentFactor base =
  liftM (flip (*) . exponentValue base) (ap sign (decimal <?> "exponent"))

{- | compute the factor given by the number following e or E. This
implementation uses @**@ rather than @^@ for more efficiency for large
integers. -}
exponentValue :: Floating f => Int -> Integer -> f
exponentValue base = (fromIntegral base **) . fromInteger

-- * fractional numbers (with just a decimal point between digits)

-- | parse a fractional number containing a decimal dot
fractional :: (Fractional f, Stream s m Char) => ParsecT s u m f
fractional = do
  n <- decimal
  fractFract n True

-- | parse a fractional number possibly containing a decimal dot
fractional2 :: (Fractional f, Stream s m Char) => Bool -> ParsecT s u m f
fractional2 = liftM (either fromInteger id) . decFract

-- | parse a fractional number possibly starting with a decimal dot
fractional3 :: (Fractional f, Stream s m Char) => Bool -> ParsecT s u m f
fractional3 b = fractFract 0 True <|> fractional2 b

-- | a decimal fractional
decFract :: (Integral i, Fractional f, Stream s m Char)
  => Bool -> ParsecT s u m (Either i f)
decFract b = do
  n <- decimal
  option (Left n) $ liftM Right $ fractFract (toInteger n) b

-- | a hexadecimal fractional
hexFract :: (Integral i, Fractional f, Stream s m Char)
  => Bool -> ParsecT s u m (Either i f)
hexFract b = do
  n <- hexnum
  option (Left n) $ liftM Right $ genFractFract (toInteger n) $ hexFraction b

{- | same as 'fractional' but returns a non-negative integral wrapped by Left if
a fractional part is missing -}
decimalFract :: (Integral i, Fractional f, Stream s m Char)
  => ParsecT s u m (Either i f)
decimalFract = decFract True

-- | parse hexadecimal, octal or decimal integrals or 'fractional'
natFract :: (Integral i, Fractional f, Stream s m Char)
  => ParsecT s u m (Either i f)
natFract = (char '0' >> zeroNumFract) <|> decimalFract

{- | parse any hexadecimal, octal, decimal or fractional number following
a zero -}
zeroNumFract :: (Integral i, Fractional f, Stream s m Char)
  => ParsecT s u m (Either i f)
zeroNumFract =
  liftM Left hexOrOct
  <|> decimalFract
  <|> liftM Right (fractFract 0 True)
  <|> return (Left 0)

-- ** fractional parts

-- | parse a fractional number given the number before the dot
fractFract :: (Fractional f, Stream s m Char) => Integer -> Bool
  -> ParsecT s u m f
fractFract i = genFractFract i . fraction

{- | combine the given number before the dot with a parser for the fractional
part -}
genFractFract :: (Fractional f, Stream s m Char) => Integer -> ParsecT s u m f
  -> ParsecT s u m f
genFractFract i = liftM (fromInteger i +)

-- | parse a dot followed by decimal digits as fractional part
fraction :: (Fractional f, Stream s m Char) => Bool -> ParsecT s u m f
fraction b = baseFraction b 10 digit

-- | parse a dot followed by hexadecimal digits as fractional part
hexFraction :: (Fractional f, Stream s m Char) => Bool -> ParsecT s u m f
hexFraction b = baseFraction b 16 hexDigit

-- | parse a dot followed by base dependent digits as fractional part
baseFraction :: (Fractional f, Stream s m Char) => Bool -> Int
  -> ParsecT s u m Char -> ParsecT s u m f
baseFraction requireDigit base baseDigit = char '.' >>
  liftM (fractionValue base)
    ((if requireDigit then many1 else many) baseDigit <?> "fraction")
  <?> "fraction"

{- | compute the fraction given by a sequence of digits following the dot.
Only one division is performed and trailing zeros are ignored. -}
fractionValue :: Fractional f => Int -> String -> f
fractionValue base = uncurry (/)
  . foldl (\ (s, p) d ->
           (p * fromIntegral (digitToInt d) + s, p * fromIntegral base))
    (0, 1) . dropWhile (== '0') . reverse

-- * integers and naturals

{- | parse an optional 'sign' immediately followed by a 'nat'. Note, that in
Daan Leijen's code the sign was wrapped as lexeme in order to skip comments
and spaces in between. -}
int :: (Integral i, Stream s m Char) => ParsecT s u m i
int = ap sign nat

-- | parse an optional plus or minus sign, returning 'negate' or 'id'
sign :: (Num a, Stream s m Char) => ParsecT s u m (a -> a)
sign = (char '-' >> return negate) <|> (optional (char '+') >> return id)

{- | parse plain non-negative decimal numbers given by a non-empty sequence
of digits -}
decimal :: (Integral i, Stream s m Char) => ParsecT s u m i
decimal = number 10 digit

-- | parse a binary number
binary :: (Integral i, Stream s m Char) => ParsecT s u m i
binary = number 2 $ oneOf "01"

-- | parse non-negative hexadecimal, octal or decimal numbers
nat :: (Integral i, Stream s m Char) => ParsecT s u m i
nat = zeroNumber <|> decimal

-- ** natural parts

-- | parse a 'nat' syntactically starting with a zero
zeroNumber :: (Integral i, Stream s m Char) => ParsecT s u m i
zeroNumber =
  char '0' >> (hexOrOct <|> decimal <|> return 0) <?> ""

-- | hexadecimal or octal number
hexOrOct :: (Integral i, Stream s m Char) => ParsecT s u m i
hexOrOct = hexadecimal <|> octal

-- | parse a hexadecimal number preceded by an x or X character
hexadecimal :: (Integral i, Stream s m Char) => ParsecT s u m i
hexadecimal = oneOf "xX" >> hexnum

-- | parse a hexadecimal number
hexnum :: (Integral i, Stream s m Char) => ParsecT s u m i
hexnum = number 16 hexDigit

-- | parse an octal number preceded by an o or O character
octal :: (Integral i, Stream s m Char) => ParsecT s u m i
octal = oneOf "oO" >> number 8 octDigit

-- | parse a non-negative number given a base and a parser for the digits
number :: (Integral i, Stream s m t) => Int -> ParsecT s u m Char
  -> ParsecT s u m i
number base baseDigit = do
  n <- liftM (numberValue base) (many1 baseDigit)
  seq n (return n)

-- | compute the value from a string of digits using a base
numberValue :: Integral i => Int -> String -> i
numberValue base =
  foldl (\ x -> ((fromIntegral base * x) +) . fromIntegral . digitToInt) 0