Text/Trifecta/Parser/Token/Prim.hs

-----------------------------------------------------------------------------
-- |
-- Module      :  Text.Trifecta.Parser.Token.Prim
-- Copyright   :  (c) Edward Kmett 2011,
--                (c) Daan Leijen 1999-2001
-- License     :  BSD3
-- 
-- Maintainer  :  ekmett@gmail.com
-- Stability   :  provisional
-- Portability :  non-portable
-- 
-----------------------------------------------------------------------------
module Text.Trifecta.Parser.Token.Prim
  ( charLiteral'
  , characterChar
  , stringLiteral'
  , natural'
  , integer'
  , double'
  , naturalOrDouble'
  , decimal
  , hexadecimal
  , octal
  ) where

import Data.Char (digitToInt)
import Data.Foldable
import Control.Applicative
import Text.Trifecta.Parser.Class
import Text.Trifecta.Parser.Char
import Text.Trifecta.Parser.Combinators
import Text.Trifecta.Highlight.Prim

-- | This parser parses a single literal character. Returns the
-- literal character value. This parsers deals correctly with escape
-- sequences. The literal character is parsed according to the grammar
-- rules defined in the Haskell report (which matches most programming
-- languages quite closely). 
--
-- This parser does NOT swallow trailing whitespace. 
charLiteral' :: MonadParser m => m Char
charLiteral' = highlight CharLiteral (between (char '\'') (char '\'' <?> "end of character") characterChar)
          <?> "character" 

characterChar, charEscape, charLetter :: MonadParser m => m Char
characterChar = charLetter <|> charEscape
            <?> "literal character"
charEscape = highlight EscapeCode $ char '\\' *> escapeCode
charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))

-- | This parser parses a literal string. Returns the literal
-- string value. This parsers deals correctly with escape sequences and
-- gaps. The literal string is parsed according to the grammar rules
-- defined in the Haskell report (which matches most programming
-- languages quite closely). 
--
-- This parser does NOT swallow trailing whitespace
stringLiteral' :: MonadParser m => m String
stringLiteral' = highlight StringLiteral lit where
  lit = Prelude.foldr (maybe id (:)) "" <$> between (char '"') (char '"' <?> "end of string") (many stringChar) 
    <?> "literal string"
  stringChar = Just <$> stringLetter 
           <|> stringEscape 
       <?> "string character"
  stringLetter    = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))

  stringEscape = highlight EscapeCode $ char '\\' *> esc where
    esc = Nothing <$ escapeGap 
      <|> Nothing <$ escapeEmpty 
      <|> Just <$> escapeCode
  escapeEmpty = char '&'
  escapeGap = do skipSome space
                 char '\\' <?> "end of string gap"

escapeCode :: MonadParser m => m Char
escapeCode = (charEsc <|> charNum <|> charAscii <|> charControl) <?> "escape code"
  where 
  charControl = (\c -> toEnum (fromEnum c - fromEnum 'A')) <$> (char '^' *> upper)
  charNum     = toEnum . fromInteger <$> num where
    num = decimal 
      <|> (char 'o' *> number 8 octDigit)
      <|> (char 'x' *> number 16 hexDigit)
  charEsc = choice $ parseEsc <$> escMap
  parseEsc (c,code) = code <$ char c
  escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'")
  charAscii = choice $ parseAscii <$> asciiMap
  parseAscii (asc,code) = try $ code <$ string asc
  asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
  ascii2codes, ascii3codes :: [String]
  ascii2codes = [ "BS","HT","LF","VT","FF","CR","SO"
                , "SI","EM","FS","GS","RS","US","SP"]
  ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK"
                ,"BEL","DLE","DC1","DC2","DC3","DC4","NAK"
                ,"SYN","ETB","CAN","SUB","ESC","DEL"]
  ascii2, ascii3 :: [Char]
  ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI'
           ,'\EM','\FS','\GS','\RS','\US','\SP']
  ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK'
           ,'\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK'
           ,'\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']
  

-- | This parser parses a natural number (a positive whole
-- number). Returns the value of the number. The number can be
-- specified in 'decimal', 'hexadecimal' or
-- 'octal'. The number is parsed according to the grammar
-- rules in the Haskell report. 
--
-- This parser does NOT swallow trailing whitespace. 
natural' :: MonadParser m => m Integer
natural' = highlight Number nat <?> "natural"

number :: MonadParser m => Integer -> m Char -> m Integer
number base baseDigit = do
  digits <- some baseDigit
  return $! foldl' (\x d -> base*x + toInteger (digitToInt d)) 0 digits

-- | This parser parses an integer (a whole number). This parser
-- is like 'natural' except that it can be prefixed with
-- sign (i.e. \'-\' or \'+\'). Returns the value of the number. The
-- number can be specified in 'decimal', 'hexadecimal'
-- or 'octal'. The number is parsed according
-- to the grammar rules in the Haskell report. 
--
-- This parser does NOT swallow trailing whitespace. 
--
-- Also, unlike the 'integer' parser, this parser does not admit spaces
-- between the sign and the number.
        
integer' :: MonadParser m => m Integer
integer' = int <?> "integer"

sign :: MonadParser m => m (Integer -> Integer)
sign = highlight Operator
     $ negate <$ char '-'
   <|> id <$ char '+'
   <|> pure id

int :: MonadParser m => m Integer
int = {-lexeme-} sign <*> highlight Number nat
nat, zeroNumber :: MonadParser m => m Integer
nat = zeroNumber <|> decimal
zeroNumber = char '0' *> (hexadecimal <|> octal <|> decimal <|> return 0) <?> ""

-- | This parser parses a floating point value. Returns the value
-- of the number. The number is parsed according to the grammar rules
-- defined in the Haskell report. 
--
-- This parser does NOT swallow trailing whitespace. 

double' :: MonadParser m => m Double
double' = highlight Number floating <?> "double"

floating :: MonadParser m => m Double
floating = decimal >>= fractExponent

fractExponent :: MonadParser m => Integer -> m Double
fractExponent n = (\fract expo -> (fromInteger n + fract) * expo) <$> fraction <*> option 1.0 exponent'
              <|> (fromInteger n *) <$> exponent' where
  fraction = Prelude.foldr op 0.0 <$> (char '.' *> (some digit <?> "fraction"))
  op d f = (f + fromIntegral (digitToInt d))/10.0
  exponent' = do
       _ <- oneOf "eE"
       f <- sign
       e <- decimal <?> "exponent"
       return (power (f e))
    <?> "exponent"
  power e  
    | e < 0     = 1.0/power(-e)
    | otherwise = fromInteger (10^e)


-- | This parser parses either 'natural' or a 'double'.
-- Returns the value of the number. This parsers deals with
-- any overlap in the grammar rules for naturals and floats. The number
-- is parsed according to the grammar rules defined in the Haskell report. 
--
-- This parser does NOT swallow trailing whitespace. 

naturalOrDouble' :: MonadParser m => m (Either Integer Double)
naturalOrDouble' = highlight Number natDouble <?> "number"

natDouble, zeroNumFloat, decimalFloat :: MonadParser m => m (Either Integer Double)
natDouble 
    = char '0' *> zeroNumFloat
  <|> decimalFloat
zeroNumFloat
    = Left <$> (hexadecimal <|> octal)
  <|> decimalFloat
  <|> fractFloat 0
  <|> return (Left 0)
decimalFloat = do 
  n <- decimal
  option (Left n) (fractFloat n)

fractFloat :: MonadParser m => Integer -> m (Either Integer Double)
fractFloat n = Right <$> fractExponent n

-- | Parses a positive whole number in the decimal system. Returns the
-- value of the number. 

decimal :: MonadParser m => m Integer
decimal = number 10 digit

-- | Parses a positive whole number in the hexadecimal system. The number
-- should be prefixed with \"x\" or \"X\". Returns the value of the
-- number. 

hexadecimal :: MonadParser m => m Integer
hexadecimal = oneOf "xX" *> number 16 hexDigit

-- | Parses a positive whole number in the octal system. The number
-- should be prefixed with \"o\" or \"O\". Returns the value of the
-- number. 

octal :: MonadParser m => m Integer
octal = oneOf "oO" *> number 8 octDigit