{-# LANGUAGE PatternSynonyms #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE StandaloneDeriving #-} {-# LANGUAGE DeriveLift #-} {-# LANGUAGE GADTs #-} {-# LANGUAGE KindSignatures #-} {-# LANGUAGE DataKinds #-} {- | This module provides a parser for . -} module PyF.Internal.PythonSyntax ( parsePythonFormatString , parseGenericFormatString , Item(..) , FormatMode(..) , Padding(..) , Precision(..) , TypeFormat(..) , AlternateForm(..) , pattern DefaultFormatMode ) where import Language.Haskell.TH.Syntax import Text.Megaparsec import qualified Text.Megaparsec.Char.Lexer as L import Text.Megaparsec.Char import Data.Void (Void) import qualified Data.Char import Data.Maybe (fromMaybe) import qualified Data.Set as Set -- For fancyFailure import PyF.Formatters type Parser t = Parsec Void String t {- -- TODO: - Better parsing of integer - Recursive replacement field, so "{string:.{precision}} can be parsed - f_expression / conversion - Not (Yet) implemented: - types: n -} {- f_string ::= (literal_char | "{{" | "}}" | replacement_field)* replacement_field ::= "{" f_expression ["!" conversion] [":" format_spec] "}" f_expression ::= (conditional_expression | "*" or_expr) ("," conditional_expression | "," "*" or_expr)* [","] | yield_expression conversion ::= "s" | "r" | "a" format_spec ::= (literal_char | NULL | replacement_field)* literal_char ::= -} -- | A format string is composed of many chunks of raw string or replacement data Item = Raw String -- ^ A raw string | Replacement String (Maybe FormatMode) -- ^ A replacement string, composed of an arbitrary Haskell expression followed by an optional formatter deriving (Show) {- | Parse a string, returns a list of raw string or replacement fields >>> import Text.Megaparsec >>> parse parsePythonFormatString "" "hello {1+1:>10.2f}" Right [ Raw "hello ", Replacement "1+1" ( Just (FormatMode (Padding 10 (Just (Nothing,AnyAlign AlignRight))) (FixedF (Precision 2) NormalForm Minus) Nothing))] -} parsePythonFormatString :: Parser [Item] parsePythonFormatString = parseGenericFormatString ('{', '}') parseGenericFormatString :: (Char, Char) -> Parser [Item] parseGenericFormatString delimiters = many (rawString delimiters <|> escapedParenthesis delimiters <|> replacementField delimiters) rawString :: (Char, Char) -> Parser Item rawString (openingChar,closingChar) = Raw . escapeChars <$> some (noneOf ([openingChar, closingChar])) escapedParenthesis :: (Char, Char) -> Parser Item escapedParenthesis (openingChar, closingChar) = Raw <$> (parseRaw openingChar <|> parseRaw closingChar) where parseRaw c = c:[] <$ string (replicate 2 c) {- | Replace escape chars with their value >>> escapeChars "hello \\n" "hello \n" -} escapeChars :: String -> String escapeChars "" = "" escapeChars s = case Data.Char.readLitChar s of [] -> "" ((c, xs):_) -> c : escapeChars xs replacementField :: (Char, Char) -> Parser Item replacementField (charOpening, charClosing) = do _ <- char charOpening expr <- many (noneOf (charClosing:":")) fmt <- optional $ do _ <- char ':' format_spec _ <- char charClosing pure (Replacement expr fmt) -- | Default formating mode, no padding, default precision, no grouping, no sign handling pattern DefaultFormatMode :: FormatMode pattern DefaultFormatMode = FormatMode PaddingDefault (DefaultF PrecisionDefault Minus) Nothing -- | A Formatter, listing padding, format and and grouping char data FormatMode = FormatMode Padding TypeFormat (Maybe Char) deriving (Show) -- | Padding, containing the padding width, the padding char and the alignement mode data Padding = PaddingDefault | Padding Integer (Maybe (Maybe Char, AnyAlign)) deriving (Show) -- | Floating point precision data Precision = PrecisionDefault | Precision Integer deriving (Show) {- Python format mini language format_spec ::= [[fill]align][sign][#][0][width][grouping_option][.precision][type] fill ::= align ::= "<" | ">" | "=" | "^" sign ::= "+" | "-" | " " width ::= integer grouping_option ::= "_" | "," precision ::= integer type ::= "b" | "c" | "d" | "e" | "E" | "f" | "F" | "g" | "G" | "n" | "o" | "s" | "x" | "X" | "%" -} data TypeFlag = Flagb | Flagc | Flagd | Flage | FlagE | Flagf | FlagF | Flagg | FlagG | Flagn | Flago | Flags | Flagx | FlagX | FlagPercent deriving (Show) -- | All formating type data TypeFormat = DefaultF Precision SignMode -- ^ Default, depends on the infered type of the expression | BinaryF AlternateForm SignMode -- ^ Binary, such as `0b0121` | CharacterF -- ^ Character, will convert an integer to its character representation | DecimalF SignMode -- ^ Decimal, base 10 integer formatting | ExponentialF Precision AlternateForm SignMode -- ^ Exponential notation for floatting points | ExponentialCapsF Precision AlternateForm SignMode -- ^ Exponential notation with capitalised 'e' | FixedF Precision AlternateForm SignMode -- ^ Fixed number of digits floating point | FixedCapsF Precision AlternateForm SignMode -- ^ Capitalized version of the previous | GeneralF Precision AlternateForm SignMode -- ^ General formatting: `FixedF` or `ExponentialF` depending on the number magnitude | GeneralCapsF Precision AlternateForm SignMode -- ^ Same as `GeneralF` but with upper case 'E' and infinite / NaN | OctalF AlternateForm SignMode -- ^ Octal, such as 00245 | StringF Precision -- ^ Simple string | HexF AlternateForm SignMode -- ^ Hexadecimal, such as 0xaf3e | HexCapsF AlternateForm SignMode -- ^ Hexadecimal with capitalized letters, such as 0XAF3E | PercentF Precision AlternateForm SignMode -- ^ Percent representation deriving (Show) -- | If the formatter use its alternate form data AlternateForm = AlternateForm | NormalForm deriving (Show) lastCharFailed :: String -> Parser t lastCharFailed err = do (SourcePos name line col) <- getPosition -- This is right as long as there is not line break in the string setPosition (SourcePos name line (mkPos (unPos col - 1))) fancyFailure (Set.singleton (ErrorFail err)) overrideAlignmentIfZero :: Bool -> Maybe (Maybe Char, AnyAlign) -> Maybe (Maybe Char, AnyAlign) overrideAlignmentIfZero True Nothing = Just (Just '0', AnyAlign AlignInside) overrideAlignmentIfZero True (Just (Nothing, al)) = Just (Just '0', al) overrideAlignmentIfZero _ v = v format_spec :: Parser FormatMode format_spec = do al' <- optional alignment s <- optional sign alternateForm <- option NormalForm (AlternateForm <$ char '#') hasZero <- option False (True <$ char '0') let al = overrideAlignmentIfZero hasZero al' w <- optional width grouping <- optional grouping_option prec <- option PrecisionDefault (char '.' *> (Precision <$> precision)) t <- optional type_ let padding = case w of Just p -> Padding p al Nothing -> PaddingDefault case t of Nothing -> pure (FormatMode padding (DefaultF prec (fromMaybe Minus s)) grouping) Just flag -> case evalFlag flag prec alternateForm s of Right fmt -> pure (FormatMode padding fmt grouping) Left typeError -> do lastCharFailed typeError evalFlag :: TypeFlag -> Precision -> AlternateForm -> Maybe SignMode -> Either String TypeFormat evalFlag Flagb prec alt s = failIfPrec prec (BinaryF alt (defSign s)) evalFlag Flagc prec alt s = failIfS s =<< failIfPrec prec =<< failIfAlt alt CharacterF evalFlag Flagd prec alt s = failIfPrec prec =<< failIfAlt alt (DecimalF (defSign s)) evalFlag Flage prec alt s = pure $ExponentialF prec alt (defSign s) evalFlag FlagE prec alt s = pure $ ExponentialCapsF prec alt (defSign s) evalFlag Flagf prec alt s = pure $ FixedF prec alt (defSign s) evalFlag FlagF prec alt s = pure $ FixedCapsF prec alt (defSign s) evalFlag Flagg prec alt s = pure $ GeneralF prec alt (defSign s) evalFlag FlagG prec alt s = pure $ GeneralCapsF prec alt (defSign s) evalFlag Flagn _prec _alt _s = Left ("Type 'n' not handled (yet). " ++ errgGn) evalFlag Flago prec alt s = failIfPrec prec $ OctalF alt (defSign s) evalFlag Flags prec alt s = failIfS s =<< (failIfAlt alt $ StringF prec) evalFlag Flagx prec alt s = failIfPrec prec $ HexF alt (defSign s) evalFlag FlagX prec alt s = failIfPrec prec $ HexCapsF alt (defSign s) evalFlag FlagPercent prec alt s = pure $ PercentF prec alt (defSign s) defSign :: Maybe SignMode -> SignMode defSign Nothing = Minus defSign (Just s) = s errgGn :: String errgGn = "Use one of {'b', 'c', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'n', 'o', 's', 'x', 'X', '%'}." failIfPrec :: Precision -> TypeFormat -> Either String TypeFormat failIfPrec PrecisionDefault i = Right i failIfPrec (Precision i) _ = Left ("Type incompatible with precision (." ++ show i ++ "), use any of {'e', 'E', 'f', 'F', 'g', 'G', 'n', 's', '%'} or remove the precision field.") failIfAlt :: AlternateForm -> TypeFormat -> Either String TypeFormat failIfAlt NormalForm i = Right i failIfAlt _ _ = Left "Type incompatible with alternative form (#), use any of {'e', 'E', 'f', 'F', 'g', 'G', 'n', 'o', 'x', 'X', '%'} or remove the alternative field." failIfS :: Maybe SignMode -> TypeFormat -> Either String TypeFormat failIfS Nothing i = Right i failIfS (Just s) _ = Left ("Type incompatible with sign field (" ++ [toSignMode s] ++ "), use any of {'b', 'd', 'e', 'E', 'f', 'F', 'g', 'G', 'n', 'o', 'x', 'X', '%'} or remove the sign field.") toSignMode :: SignMode -> Char toSignMode Plus = '+' toSignMode Minus = '-' toSignMode Space = ' ' alignment :: Parser (Maybe Char, AnyAlign) alignment = choice [ try $ do c <- fill mode <- align pure (Just c, mode) , do mode <- align pure (Nothing, mode) ] fill :: Parser Char fill = anyChar align :: Parser AnyAlign align = choice [ AnyAlign AlignLeft <$ char '<', AnyAlign AlignRight <$ char '>', AnyAlign AlignCenter <$ char '^', AnyAlign AlignInside <$ char '=' ] sign :: Parser SignMode sign = choice [Plus <$ char '+', Minus <$ char '-', Space <$ char ' ' ] width :: Parser Integer width = integer integer :: Parser Integer integer = L.decimal -- incomplete: see: https://docs.python.org/3/reference/lexical_analysis.html#grammar-token-integer grouping_option :: Parser Char grouping_option = oneOf ("_," :: [Char]) precision :: Parser Integer precision = integer type_ :: Parser TypeFlag type_ = choice [ Flagb <$ char 'b', Flagc <$ char 'c', Flagd <$ char 'd', Flage <$ char 'e', FlagE <$ char 'E', Flagf <$ char 'f', FlagF <$ char 'F', Flagg <$ char 'g', FlagG <$ char 'G', Flagn <$ char 'n', Flago <$ char 'o', Flags <$ char 's', Flagx <$ char 'x', FlagX <$ char 'X', FlagPercent <$ char '%' ] -- TODO: remove ! deriving instance Lift Precision deriving instance Lift Padding