{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE Rank2Types #-}
module Text.PariPari.Internal.CharCombinators (
  digitByte
  , integer
  , integer'
  , decimal
  , octal
  , hexadecimal
  , digit
  , sign
  , signed
  , fractionHex
  , fractionDec
  , char'
  , notChar
  , anyChar
  , anyAsciiByte
  , alphaNumChar
  , digitChar
  , letterChar
  , lowerChar
  , upperChar
  , symbolChar
  , categoryChar
  , punctuationChar
  , spaceChar
  , asciiChar
  , satisfy
  , asciiSatisfy
  , skipChars
  , takeChars
  , skipCharsWhile
  , takeCharsWhile
  , skipCharsWhile1
  , takeCharsWhile1
  , scanChars
  , scanChars1
  , string
) where

import Control.Applicative ((<|>), optional)
import Control.Monad.Combinators (option, skipCount, skipMany)
import Data.Functor (void)
import Data.Maybe (fromMaybe)
import Data.Semigroup ((<>))
import Data.Word (Word8)
import Text.PariPari.Internal.Chunk
import Text.PariPari.Internal.Class
import Text.PariPari.Internal.ElementCombinators ((<?>))
import qualified Data.Char as C

type CharP k a  = (forall p. CharParser k p => p a)

-- | Parse a digit byte for the given base.
-- Bases 2 to 36 are supported.
digitByte :: CharParser k p => Int -> p Word8
digitByte base = asciiSatisfy (isDigit base)
{-# INLINE digitByte #-}

isDigit :: Int -> Word8 -> Bool
isDigit base b
  | base >= 2 && base <= 10 = b >= asc_0 && b <= asc_0 + fromIntegral base - 1
  | base <= 36 = (b >= asc_0 && b <= asc_9)
                 || ((fromIntegral b :: Word) - fromIntegral asc_A) < fromIntegral (base - 10)
                 || ((fromIntegral b :: Word) - fromIntegral asc_a) < fromIntegral (base - 10)
  |otherwise = error "Text.PariPari.Internal.Combinators.isDigit: Bases 2 to 36 are supported"
{-# INLINE isDigit #-}

digitToInt :: Int -> Word8 -> Word
digitToInt base b
  | n <- (fromIntegral b :: Word) - fromIntegral asc_0, base <= 10 || n <= 9  = n
  | n <- (fromIntegral b :: Word) - fromIntegral asc_A, n               <= 26 = n + 10
  | n <- (fromIntegral b :: Word) - fromIntegral asc_a                        = n + 10
{-# INLINE digitToInt #-}

-- | Parse a single digit of the given base and return its value.
-- Bases 2 to 36 are supported.
digit :: CharParser k p => Int -> p Word
digit base = digitToInt base <$> asciiSatisfy (isDigit base)
{-# INLINE digit #-}

-- | Parse an integer of the given base.
-- Returns the integer and the number of digits.
-- Bases 2 to 36 are supported.
-- Digits can be separated by separator, e.g. `optional (char '_')`.
-- Signs are not parsed by this combinator.
integer' :: (Num a, CharParser k p) => p sep -> Int -> p (a, Int)
integer' sep base = label (integerLabel base) $ do
  d <- digit base
  accum 1 $ fromIntegral d
  where accum !i !n = next i n <|> pure (n, i)
        next !i !n = do
          void $ sep
          d <- digit base
          accum (i + 1) $ n * fromIntegral base + fromIntegral d
{-# INLINE integer' #-}

-- | Parse an integer of the given base.
-- Bases 2 to 36 are supported.
-- Digits can be separated by separator, e.g. `optional (char '_')`.
-- Signs are not parsed by this combinator.
integer :: (Num a, CharParser k p) => p sep -> Int -> p a
integer sep base = label (integerLabel base) $ do
  d <- digit base
  accum $ fromIntegral d
  where accum !n = next n <|> pure n
        next !n = do
          void $ sep
          d <- digit base
          accum $ n * fromIntegral base + fromIntegral d
{-# INLINE integer #-}

integerLabel :: Int -> String
integerLabel 2  = "binary integer"
integerLabel 8  = "octal integer"
integerLabel 10 = "decimal integer"
integerLabel 16 = "hexadecimal integer"
integerLabel b  = "integer of base " <> show b

-- | Parses a decimal integer.
-- Signs are not parsed by this combinator.
decimal :: Num a => CharP k a
decimal = integer (pure ()) 10
{-# INLINE decimal #-}

-- | Parses an octal integer.
-- Signs are not parsed by this combinator.
octal :: Num a => CharP k a
octal = integer (pure ()) 8
{-# INLINE octal #-}

-- | Parses a hexadecimal integer.
-- Signs are not parsed by this combinator.
hexadecimal :: Num a => CharP k a
hexadecimal = integer (pure ()) 16
{-# INLINE hexadecimal #-}

-- | Parse plus or minus sign
sign :: (CharParser k f, Num a) => f (a -> a)
sign = (negate <$ asciiByte asc_minus) <|> (id <$ optional (asciiByte asc_plus))
{-# INLINE sign #-}

-- | Parse a number with a plus or minus sign.
signed :: (Num a, CharParser k p) => p a -> p a
signed p = ($) <$> sign <*> p
{-# INLINE signed #-}

fractionExp :: (Num a, CharParser k p) => p expSep -> p digitSep -> p (Maybe a)
fractionExp expSep digitSep = do
  e <- optional expSep
  case e of
    Nothing{} -> pure Nothing
    Just{} -> Just <$> signed (integer digitSep 10)
{-# INLINE fractionExp #-}

-- | Parse a fraction of arbitrary exponent base and mantissa base.
-- 'fractionDec' and 'fractionHex' should be used instead probably.
-- Returns either an integer in 'Left' or a fraction in 'Right'.
-- Signs are not parsed by this combinator.
fraction :: (Num a, CharParser k p) => p expSep -> Int -> Int -> p digitSep -> p (Either a (a, Int, a))
fraction expSep expBase mantBasePow digitSep = do
  let mantBase = expBase ^ mantBasePow
  mant <- integer digitSep mantBase
  frac <- optional $ asciiByte asc_point *> option (0, 0) (integer' digitSep mantBase)
  expn <- fractionExp expSep digitSep
  let (fracVal, fracLen) = fromMaybe (0, 0) frac
      expVal = fromMaybe 0 expn
  pure $ case (frac, expn) of
           (Nothing, Nothing) -> Left mant
           _ -> Right ( mant * fromIntegral mantBase ^ fracLen + fracVal
                      , expBase
                      , expVal - fromIntegral (fracLen * mantBasePow))
{-# INLINE fraction #-}

-- | Parse a decimal fraction, e.g., 123.456e-78, returning (mantissa, 10, exponent),
-- corresponding to mantissa * 10^exponent.
-- Digits can be separated by separator, e.g. `optional (char '_')`.
-- Signs are not parsed by this combinator.
fractionDec :: (Num a, CharParser k p) => p digitSep -> p (Either a (a, Int, a))
fractionDec sep = fraction (asciiSatisfy (\b -> b == asc_E || b == asc_e)) 10 1 sep <?> "fraction"
{-# INLINE fractionDec #-}

-- | Parse a hexadecimal fraction, e.g., co.ffeep123, returning (mantissa, 2, exponent),
-- corresponding to mantissa * 2^exponent.
-- Digits can be separated by separator, e.g. `optional (char '_')`.
-- Signs are not parsed by this combinator.
fractionHex :: (Num a, CharParser k p) => p digitSep -> p (Either a (a, Int, a))
fractionHex sep = fraction (asciiSatisfy (\b -> b == asc_P || b == asc_p)) 2 4 sep <?> "hexadecimal fraction"
{-# INLINE fractionHex #-}

-- | Parse a case-insensitive character
char' :: CharParser k p => Char -> p Char
char' x =
  let l = C.toLower x
      u = C.toUpper x
  in satisfy (\c -> c == l || c == u)
{-# INLINE char' #-}

-- | Parse a character different from the given one.
notChar :: CharParser k p => Char -> p Char
notChar c = satisfy (/= c)
{-# INLINE notChar #-}

-- | Parse an arbitrary character.
anyChar :: CharP k Char
anyChar = satisfy (const True)
{-# INLINE anyChar #-}

-- | Parse an arbitrary ASCII byte.
anyAsciiByte :: CharP k Word8
anyAsciiByte = asciiSatisfy (const True)
{-# INLINE anyAsciiByte #-}

-- | Parse an alphanumeric character, including Unicode.
alphaNumChar :: CharP k Char
alphaNumChar = satisfy C.isAlphaNum <?> "alphanumeric character"
{-# INLINE alphaNumChar #-}

-- | Parse a letter character, including Unicode.
letterChar :: CharP k Char
letterChar = satisfy C.isLetter <?> "letter"
{-# INLINE letterChar #-}

-- | Parse a lowercase letter, including Unicode.
lowerChar :: CharP k Char
lowerChar = satisfy C.isLower <?> "lowercase letter"
{-# INLINE lowerChar #-}

-- | Parse a uppercase letter, including Unicode.
upperChar :: CharP k Char
upperChar = satisfy C.isUpper <?> "uppercase letter"
{-# INLINE upperChar #-}

-- | Parse a space character, including Unicode.
spaceChar :: CharP k Char
spaceChar = satisfy C.isSpace <?> "space"
{-# INLINE spaceChar #-}

-- | Parse a symbol character, including Unicode.
symbolChar :: CharP k Char
symbolChar = satisfy C.isSymbol <?> "symbol"
{-# INLINE symbolChar #-}

-- | Parse a punctuation character, including Unicode.
punctuationChar :: CharP k Char
punctuationChar = satisfy C.isPunctuation <?> "punctuation"
{-# INLINE punctuationChar #-}

-- | Parse a digit character of the given base.
-- Bases 2 to 36 are supported.
digitChar :: CharParser k p => Int -> p Char
digitChar base = unsafeAsciiToChar <$> digitByte base
{-# INLINE digitChar #-}

-- | Parse a character beloning to the ASCII charset (< 128)
asciiChar :: CharP k Char
asciiChar = unsafeAsciiToChar <$> anyAsciiByte
{-# INLINE asciiChar #-}

-- | Parse a character belonging to the given Unicode category
categoryChar :: CharParser k p => C.GeneralCategory -> p Char
categoryChar cat = satisfy ((== cat) . C.generalCategory) <?> untitle (show cat)
{-# INLINE categoryChar #-}

untitle :: String -> String
untitle []     = []
untitle (x:xs) = C.toLower x : go xs
  where go [] = ""
        go (y:ys) | C.isUpper y = ' ' : C.toLower y : untitle ys
                  | otherwise   = y : ys

-- | Skip the next n characters
skipChars :: CharParser k p => Int -> p ()
skipChars n = skipCount n anyChar
{-# INLINE skipChars #-}

-- | Skip char while predicate is true
skipCharsWhile :: CharParser k p => (Char -> Bool) -> p ()
skipCharsWhile f = skipMany (satisfy f)
{-# INLINE skipCharsWhile #-}

-- | Skip at least one char while predicate is true
skipCharsWhile1 :: CharParser k p => (Char -> Bool) -> p ()
skipCharsWhile1 f = satisfy f *> skipCharsWhile f
{-# INLINE skipCharsWhile1 #-}

-- | Take the next n characters and advance the position by n characters
takeChars :: CharParser k p => Int -> p k
takeChars n = asChunk (skipChars n) <?> "string of length " <> show n
{-# INLINE takeChars #-}

-- | Take chars while predicate is true
takeCharsWhile :: CharParser k p => (Char -> Bool) -> p k
takeCharsWhile f = asChunk (skipCharsWhile f)
{-# INLINE takeCharsWhile #-}

-- | Take at least one byte while predicate is true
takeCharsWhile1 :: CharParser k p => (Char -> Bool) -> p k
takeCharsWhile1 f = asChunk (skipCharsWhile1 f)
{-# INLINE takeCharsWhile1 #-}

-- | Parse a single character with the given predicate
satisfy :: CharParser k p => (Char -> Bool) -> p Char
satisfy f = scan $ \c -> if f c then Just c else Nothing
{-# INLINE satisfy #-}

-- | Parse a single character within the ASCII charset with the given predicate
asciiSatisfy :: CharParser k p => (Word8 -> Bool) -> p Word8
asciiSatisfy f = asciiScan $ \b -> if f b then Just b else Nothing
{-# INLINE asciiSatisfy #-}

scanChars :: CharParser k p => (s -> Char -> Maybe s) -> s -> p s
scanChars f = go
  where go s = (scan (f s) >>= go) <|> pure s
{-# INLINE scanChars #-}

scanChars1 :: CharParser k p => (s -> Char -> Maybe s) -> s -> p s
scanChars1 f s = scan (f s) >>= scanChars f
{-# INLINE scanChars1 #-}