-- | This module provides some higher-level types and infrastructure to  make it easier to use.

{-# LANGUAGE PatternGuards, ScopedTypeVariables, NoMonomorphismRestriction,FlexibleInstances,  FlexibleContexts, RankNTypes, ScopedTypeVariables #-}
-- {-# LANGUAGE  MultiParamTypeClasses, TypeSynonymInstances, FlexibleContexts #-}

module Text.ParserCombinators.UU.Utils (
   -- * Single-char parsers
  pCR,
  pLF,
  pLower,
  pUpper,
  pLetter,
  pAscii,
  pDigit,
  pDigitAsNum,
  pAnySym,

  -- * Whitespace and comments (comments - not yet supported)
  pSpaces, -- This should not be used very often. In general
           -- you may want to use it to skip initial whitespace
           -- at the start of all input, but after that you
           -- should rely on Lexeme parsers to skip whitespace
           -- as needed. (This is the same as the strategy used
           -- by Parsec).

  -- * Lexeme parsers (as opposed to 'Raw' parsers)
  lexeme,
  pDot,
  pComma,
  pDQuote,
  pLParen,
  pRParen,
  pLBracket,
  pRBracket,
  pLBrace,
  pRBrace,
  pSymbol,

  -- * Raw parsers for numbers
  pNaturalRaw,
  pIntegerRaw,
  pDoubleRaw,

  -- * Lexeme parsers for numbers
  pNatural,
  pInteger,
  pDouble,
  pPercent,

  -- * Parsers for Enums
  pEnumRaw,
  pEnum,
  pEnumStrs,

  -- * Parenthesized parsers
  pParens,
  pBraces,
  pBrackets,
  listParser,
  tupleParser,
  pTuple,

  -- * Lexeme parsers for `Date`-s
  pDay,
  pDayMonthYear,

  -- * Lexeme parser for quoted `String`-s
  pParentheticalString,
  pQuotedString,

  -- * Read-compatability
  parserReadsPrec,
  
  -- * Basic facility for runninga parser, getting at most a single error message
  execParser,
  runParser
)
where

import Data.Char
import Data.List
import Data.Time
import Text.ParserCombinators.UU.Core
import Text.ParserCombinators.UU.BasicInstances
import Text.ParserCombinators.UU.Derived 
import Text.Printf
import qualified Data.ListLike  as LL

------------------------------------------------------------------------

--  Single Char parsers

pCR :: Parser Char
pCR       = pSym '\r'

pLF :: Parser Char
pLF       = pSym '\n'

pLower :: Parser Char
pLower  = pRange ('a','z')

pUpper :: Parser Char
pUpper  = pRange ('A','Z')

pLetter:: Parser Char
pLetter = pUpper <|> pLower

pAscii :: Parser Char
pAscii = pRange ('\000', '\254')

pDigit :: Parser Char
pDigit  = pRange ('0','9')


pDigitAsNum ::  Num a => Parser a
pDigitAsNum =
  digit2Int <$> pDigit
  where
  digit2Int a = fromInteger $ toInteger $ ord a - ord '0'

pAnySym ::  (IsLocationUpdatedBy loc Char, LL.ListLike state Char) => String -> P (Str Char state loc) Char
pAnySym = pAny pSym

-- * Dealing with Whitespace
pSpaces :: Parser String
pSpaces = pList $ pAnySym " \r\n\t" <?> "Whitespace"

-- | Lexeme Parsers skip trailing whitespace (this terminology comes from Parsec)
lexeme :: ParserTrafo a a
lexeme p = p <* pSpaces

pDot, pComma, pDQuote, pLParen, pRParen, pLBracket, pRBracket, pLBrace, pRBrace :: Parser Char
pDot      = lexeme $ pSym '.'
pComma    = lexeme $ pSym ','
pDQuote   = lexeme $ pSym '"'
pLParen   = lexeme $ pSym '('
pRParen   = lexeme $ pSym ')'
pLBracket = lexeme $ pSym '['
pRBracket = lexeme $ pSym ']'
pLBrace   = lexeme $ pSym '{'
pRBrace   = lexeme $ pSym '}'

pSymbol :: (IsLocationUpdatedBy loc Char, LL.ListLike state Char) => String -> P (Str Char state loc)  String
pSymbol   = lexeme . pToken

-- * Parsers for Numbers
-- ** Raw (non lexeme) parsers
pNaturalRaw :: (Num a) => Parser a
pNaturalRaw = foldl (\a b -> a * 10 + b) 0 <$> pList1 pDigitAsNum <?> "Natural"

pIntegerRaw :: (Num a) => Parser a
pIntegerRaw = pSign <*> pNaturalRaw <?> "Integer"

pDoubleRaw :: (Read a) => Parser a
pDoubleRaw = read <$> pDoubleStr

pDoubleStr :: Parser  [Char]
pDoubleStr = pOptSign <*> (pToken "Infinity" <|> pPlainDouble)
             <?> "Double (eg -3.4e-5)"
  where
    pPlainDouble = (++) <$> ((++) <$> pList1 pDigit <*> (pFraction `opt` [])) <*> pExponent
    pFraction = (:) <$> pSym '.' <*> pList1 pDigit
    pExponent = ((:) <$> pAnySym "eE" <*> (pOptSign <*> pList1 pDigit)) `opt` []
    pOptSign = ((('+':) <$ (pSym '+')) <|> (('-':) <$ (pSym '-'))) `opt` id

-- | NB - At present this is /not/ a lexeme parser, hence we don't
--   support @- 7.0@, @- 7@, @+ 7.0@ etc.
--   It's also currently private - ie local to this module.
pSign :: (Num a) => Parser (a -> a)
pSign = (id <$ (pSym '+')) <|> (negate <$ (pSym '-')) `opt` id

pPercentRaw ::Parser Double
pPercentRaw = (/ 100.0) . read <$> pDoubleStr <* pSym '%' <?> "Double%"

pPctOrDbl = pPercentRaw <|> pDoubleRaw

-- ** Lexeme Parsers for Numbers

pNatural :: Num a => Parser a
pNatural = lexeme pNaturalRaw

pInteger :: Num a => Parser a
pInteger = lexeme pIntegerRaw

pDouble :: Parser Double
pDouble = lexeme pDoubleRaw

pPercent :: Parser Double
pPercent = lexeme pPctOrDbl

-- * Parsers for Enums

pEnumRaw :: forall a . ((Enum a, Show a)=> Parser  a)
pEnumRaw = foldr (\ c r -> c <$ pToken (show c) <|> r) pFail enumerated
           <?> (printf "Enum (eg %s or ... %s)" (show (head enumerated)) (show (last enumerated)))
            -- unless it is an empty data decl we will always have a head/last (even if the same)
            -- if it is empty, you cannot use it anyhow...
  where
    enumerated :: [a]
    enumerated = [toEnum 0..] 
--    pToken :: Provides st s s => [s] -> P st [s]
--    pToken []     = pure []
--    pToken (a:as) = (:) <$> pSym a <*> pToken as

pEnum ::  (Enum a, Show a) => Parser a
pEnum = lexeme pEnumRaw

pEnumStrs :: [String]-> Parser String
pEnumStrs xs = pAny (\t -> pSpaces *> pToken t <* pSpaces) xs <?> "enumerated value in " ++ show xs


-- * Parenthesized structures
pParens :: ParserTrafo a a
pParens p = pLParen *> p <* pRParen

pBraces ::  ParserTrafo a a
pBraces p = pLBrace *> p <* pRBrace

pBrackets ::  ParserTrafo a a
pBrackets p = pLBracket *> p <* pRBracket

-- * Lists and tuples
-- | eg [1,2,3]
listParser :: ParserTrafo a [a]
listParser = pBrackets . pListSep pComma

-- | eg (1,2,3)
tupleParser :: ParserTrafo a [a]
tupleParser = pParens . pListSep pComma

pTuple :: (IsLocationUpdatedBy loc Char, LL.ListLike state Char) => [P (Str Char state loc) a] -> P (Str Char state loc) [a]
pTuple []     = [] <$ pParens pSpaces
pTuple (p:ps) = pParens $ (:) <$> lexeme p <*> mapM ((pComma *>) . lexeme) ps

-- * Lexeme parsers for Dates

data Month = Jan | Feb | Mar | Apr | May | Jun | Jul | Aug | Sep | Oct | Nov | Dec
           deriving (Enum, Bounded, Eq, Show, Ord)

pDayMonthYear :: (Num d, Num y) => Parser (d, Int, y)
pDayMonthYear = lexeme $ (,,) <$> pDayNum <*> (pSym '-' *> pMonthNum) <*> (pSym '-' *> pYearNum)
  where
    pMonthNum = ((+1) . (fromEnum :: Month -> Int)) <$> pEnumRaw <?> "Month (eg Jan)"
    pDayNum   = pNaturalRaw <?> "Day (1-31)"
    pYearNum  = pNaturalRaw <?> "Year (eg 2019)"

pDay :: Parser Day
pDay = (\(d,m,y) -> fromGregorian y m d) <$> pDayMonthYear

-- * Quoted Strings

pParentheticalString :: Char -> Parser String

pParentheticalString d = lexeme $ pSym d *> pList pNonQuoteVChar <* pSym d
  where
    pNonQuoteVChar = pSatisfy (\c -> visibleChar c && c /= d) 
                              (Insertion  "Character in a string set off from main text by delimiter, e.g. double-quotes or comment token" 'y' 5)
    -- visibleChar :: Char -> Bool
    visibleChar c = '\032' <= c && c <= '\126'

pQuotedString :: Parser String
pQuotedString = pParentheticalString '"'

-- * Read-compatability

-- | Converts a UU Parser into a read-style one.
--
-- This is intended to facilitate migration from read-style
-- parsers to UU-based ones.
parserReadsPrec :: Parser a -> Int -> ReadS a
parserReadsPrec p _ s = [parse ((,) <$> p <*> pMunch (const True)) . createStr (0::Int) $ s]


-- * Running parsers straightforwardly

-- | The lower-level interface. Returns all errors. 
execParser :: Parser a -> String -> (a, [Error LineColPos])
execParser p = parse_h ((,) <$> p <*> pEnd) . createStr (LineColPos 0 0 0)

-- | The higher-level interface. (Calls 'error' with a simplified error).  
--   Runs the parser; if the complete input is accepted without problems  return the
--   result else fail with reporting unconsumed tokens
runParser :: String -> Parser a -> String -> a
runParser inputName p s | (a,b) <- execParser p s =
    if null b
    then a
    else error (printf "Failed parsing '%s' :\n%s\n" inputName (pruneError s b))
         -- We do 'pruneError' above because otherwise you can end
         -- up reporting huge correction streams, and that's
         -- generally not helpful... but the pruning does discard info...
    where -- | Produce a single simple, user-friendly error message
          pruneError :: String -> [Error LineColPos] -> String
          pruneError _ [] = ""
          pruneError _ (DeletedAtEnd x     : _) = printf "Unexpected '%s' at end." x
          pruneError s (Inserted _ pos exp : _) = prettyError s exp pos
          pruneError s (Deleted  _ pos exp : _) = prettyError s exp pos
          prettyError :: String -> [String] -> LineColPos -> String
          prettyError s exp p@(LineColPos line c abs) = printf "Expected %s at %s :\n%s\n%s\n%s\n"
                                                           (show_expecting p exp)
                                                           (show p)
                                                           aboveString
                                                           inputFrag
                                                           belowString
                             where
                                s' = map (\c -> if c=='\n' || c=='\r' || c=='\t' then ' ' else c) s
                                aboveString = replicate 30 ' ' ++ "v"
                                belowString = replicate 30 ' ' ++ "^"
                                inputFrag   = replicate (30 - c) ' ' ++ (take 71 $ drop (c - 30) s')