-- |
-- Module      :  Text.Megaparsec.Byte.Lexer
-- Copyright   :  © 2015–2018 Megaparsec contributors
-- License     :  FreeBSD
--
-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
-- Stability   :  experimental
-- Portability :  portable
--
-- Stripped-down version of "Text.Megaparsec.Char.Lexer" for streams of
-- bytes.
--
-- This module is intended to be imported qualified:
--
-- > import qualified Text.Megaparsec.Byte.Lexer as L

{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TypeFamilies        #-}

module Text.Megaparsec.Byte.Lexer
  ( -- * White space
    space
  , lexeme
  , symbol
  , symbol'
  , skipLineComment
  , skipBlockComment
  , skipBlockCommentNested
    -- * Numbers
  , decimal
  , binary
  , octal
  , hexadecimal
  , scientific
  , float
  , signed )
where

import Control.Applicative
import Data.Functor (void)
import Data.List (foldl')
import Data.Proxy
import Data.Scientific (Scientific)
import Data.Word (Word8)
import Text.Megaparsec
import Text.Megaparsec.Lexer
import qualified Data.Scientific      as Sci
import qualified Text.Megaparsec.Byte as B

----------------------------------------------------------------------------
-- White space

-- | Given comment prefix this function returns a parser that skips line
-- comments. Note that it stops just before the newline character but
-- doesn't consume the newline. Newline is either supposed to be consumed by
-- 'space' parser or picked up manually.

skipLineComment :: (MonadParsec e s m, Token s ~ Word8)
  => Tokens s          -- ^ Line comment prefix
  -> m ()
skipLineComment prefix =
  B.string prefix *> void (takeWhileP (Just "character") (/= 10))
{-# INLINEABLE skipLineComment #-}

-- | @'skipBlockComment' start end@ skips non-nested block comment starting
-- with @start@ and ending with @end@.

skipBlockComment :: (MonadParsec e s m, Token s ~ Word8)
  => Tokens s          -- ^ Start of block comment
  -> Tokens s          -- ^ End of block comment
  -> m ()
skipBlockComment start end = p >> void (manyTill anySingle n)
  where
    p = B.string start
    n = B.string end
{-# INLINEABLE skipBlockComment #-}

-- | @'skipBlockCommentNested' start end@ skips possibly nested block
-- comment starting with @start@ and ending with @end@.
--
-- @since 5.0.0

skipBlockCommentNested :: (MonadParsec e s m, Token s ~ Word8)
  => Tokens s          -- ^ Start of block comment
  -> Tokens s          -- ^ End of block comment
  -> m ()
skipBlockCommentNested start end = p >> void (manyTill e n)
  where
    e = skipBlockCommentNested start end <|> void anySingle
    p = B.string start
    n = B.string end
{-# INLINEABLE skipBlockCommentNested #-}

----------------------------------------------------------------------------
-- Numbers

-- | Parse an integer in decimal representation according to the format of
-- integer literals described in the Haskell report.
--
-- If you need to parse signed integers, see the 'signed' combinator.

decimal
  :: forall e s m a. (MonadParsec e s m, Token s ~ Word8, Integral a)
  => m a
decimal = decimal_ <?> "integer"
{-# INLINEABLE decimal #-}

-- | A non-public helper to parse decimal integers.

decimal_
  :: forall e s m a. (MonadParsec e s m, Token s ~ Word8, Integral a)
  => m a
decimal_ = mkNum <$> takeWhile1P (Just "digit") isDigit
  where
    mkNum    = foldl' step 0 . chunkToTokens (Proxy :: Proxy s)
    step a w = a * 10 + fromIntegral (w - 48)
{-# INLINE decimal_ #-}

-- | Parse an integer in binary representation. Binary number is expected to
-- be a non-empty sequence of zeroes “0” and ones “1”.
--
-- You could of course parse some prefix before the actual number:
--
-- > binary = char 48 >> char' 98 >> L.binary
--
-- @since 7.0.0

binary
  :: forall e s m a. (MonadParsec e s m, Token s ~ Word8, Integral a)
  => m a
binary = mkNum
  <$> takeWhile1P Nothing isBinDigit
  <?> "binary integer"
  where
    mkNum        = foldl' step 0 . chunkToTokens (Proxy :: Proxy s)
    step a w     = a * 2 + fromIntegral (w - 48)
    isBinDigit w = w == 48 || w == 49
{-# INLINEABLE binary #-}

-- | Parse an integer in octal representation. Representation of octal
-- number is expected to be according to the Haskell report except for the
-- fact that this parser doesn't parse “0o” or “0O” prefix. It is a
-- responsibility of the programmer to parse correct prefix before parsing
-- the number itself.
--
-- For example you can make it conform to the Haskell report like this:
--
-- > octal = char 48 >> char' 111 >> L.octal

octal
  :: forall e s m a. (MonadParsec e s m, Token s ~ Word8, Integral a)
  => m a
octal = mkNum
  <$> takeWhile1P Nothing isOctDigit
  <?> "octal integer"
  where
    mkNum        = foldl' step 0 . chunkToTokens (Proxy :: Proxy s)
    step a w     = a * 8 + fromIntegral (w - 48)
    isOctDigit w = w - 48 < 8
{-# INLINEABLE octal #-}

-- | Parse an integer in hexadecimal representation. Representation of
-- hexadecimal number is expected to be according to the Haskell report
-- except for the fact that this parser doesn't parse “0x” or “0X” prefix.
-- It is a responsibility of the programmer to parse correct prefix before
-- parsing the number itself.
--
-- For example you can make it conform to the Haskell report like this:
--
-- > hexadecimal = char 48 >> char' 120 >> L.hexadecimal

hexadecimal
  :: forall e s m a. (MonadParsec e s m, Token s ~ Word8, Integral a)
  => m a
hexadecimal = mkNum
  <$> takeWhile1P Nothing isHexDigit
  <?> "hexadecimal integer"
  where
    mkNum        = foldl' step 0 . chunkToTokens (Proxy :: Proxy s)
    step a w
      | w >= 48 && w <= 57 = a * 16 + fromIntegral (w - 48)
      | w >= 97            = a * 16 + fromIntegral (w - 87)
      | otherwise          = a * 16 + fromIntegral (w - 55)
    isHexDigit w =
      (w >= 48 && w <= 57)  ||
      (w >= 97 && w <= 102) ||
      (w >= 65 && w <= 70)
{-# INLINEABLE hexadecimal #-}

-- | Parse a floating point value as a 'Scientific' number. 'Scientific' is
-- great for parsing of arbitrary precision numbers coming from an untrusted
-- source. See documentation in "Data.Scientific" for more information.
--
-- The parser can be used to parse integers or floating point values. Use
-- functions like 'Data.Scientific.floatingOrInteger' from "Data.Scientific"
-- to test and extract integer or real values.
--
-- This function does not parse sign, if you need to parse signed numbers,
-- see 'signed'.

scientific
  :: forall e s m. (MonadParsec e s m, Token s ~ Word8)
  => m Scientific
scientific = do
  c'      <- decimal_
  SP c e' <- option (SP c' 0) (try $ dotDecimal_ (Proxy :: Proxy s) c')
  e       <- option e' (try $ exponent_ e')
  return (Sci.scientific c e)
{-# INLINEABLE scientific #-}

data SP = SP !Integer {-# UNPACK #-} !Int

-- | Parse a floating point number according to the syntax for floating
-- point literals described in the Haskell report.
--
-- This function does not parse sign, if you need to parse signed numbers,
-- see 'signed'.
--
-- __Note__: in versions 6.0.0–6.1.1 this function accepted plain integers.

float :: (MonadParsec e s m, Token s ~ Word8, RealFloat a) => m a
float = do
  c' <- decimal_
  Sci.toRealFloat <$>
    ((do SP c e' <- dotDecimal_ (Proxy :: Proxy s) c'
         e       <- option e' (try $ exponent_ e')
         return (Sci.scientific c e))
     <|> (Sci.scientific c' <$> exponent_ 0))
{-# INLINEABLE float #-}

dotDecimal_ :: (MonadParsec e s m, Token s ~ Word8)
  => Proxy s
  -> Integer
  -> m SP
dotDecimal_ pxy c' = do
  void (B.char 46)
  let mkNum    = foldl' step (SP c' 0) . chunkToTokens pxy
      step (SP a e') w = SP
        (a * 10 + fromIntegral (w - 48))
        (e' - 1)
  mkNum <$> takeWhile1P (Just "digit") isDigit
{-# INLINE dotDecimal_ #-}

exponent_ :: (MonadParsec e s m, Token s ~ Word8)
  => Int
  -> m Int
exponent_ e' = do
  void (B.char' 101)
  (+ e') <$> signed (return ()) decimal_
{-# INLINE exponent_ #-}

-- | @'signed' space p@ parser parses an optional sign character (“+” or
-- “-”), then if there is a sign it consumes optional white space (using
-- @space@ parser), then it runs parser @p@ which should return a number.
-- Sign of the number is changed according to the previously parsed sign
-- character.
--
-- For example, to parse signed integer you can write:
--
-- > lexeme        = L.lexeme spaceConsumer
-- > integer       = lexeme L.decimal
-- > signedInteger = L.signed spaceConsumer integer

signed :: (MonadParsec e s m, Token s ~ Word8, Num a)
  => m ()              -- ^ How to consume white space after the sign
  -> m a               -- ^ How to parse the number itself
  -> m a               -- ^ Parser for signed numbers
signed spc p = option id (lexeme spc sign) <*> p
  where
    sign = (id <$ B.char 43) <|> (negate <$ B.char 45)
{-# INLINEABLE signed #-}

----------------------------------------------------------------------------
-- Helpers

-- | A fast predicate to check if given 'Word8' is a digit in ASCII.

isDigit :: Word8 -> Bool
isDigit w = w - 48 < 10
{-# INLINE isDigit #-}