{-|
Module      : Toml.Lexer.Token
Description : Lexical tokens
Copyright   : (c) Eric Mertens, 2023
License     : ISC
Maintainer  : emertens@gmail.com

This module provides the datatype for the lexical
syntax of TOML files. These tokens will drive the
parser in the "Parser" module.

-}
module Toml.Lexer.Token (
    Token(..),
    
    mkBasicString,
    mkLiteralString,
    mkMlBasicString,
    mkMlLiteralString,
    
    -- * integer literals
    mkBinInteger,
    mkDecInteger,
    mkOctInteger,
    mkHexInteger,

    -- * float literals
    mkFloat,

    -- * date and time patterns
    localDatePatterns,
    localTimePatterns,
    localDateTimePatterns,
    offsetDateTimePatterns,

    -- * errors
    mkError,
    ) where

import Data.Char (chr, isSpace)
import Data.Time (Day, LocalTime, TimeOfDay, ZonedTime)
import Numeric (readBin, readHex, readOct)

-- | Lexical token
data Token
    = TokTrue                       -- ^ @true@
    | TokFalse                      -- ^ @false@
    | TokComma                      -- ^ @','@
    | TokEquals                     -- ^ @'='@
    | TokNewline                    -- ^ @'\\n'@
    | TokPeriod                     -- ^ @'.'@
    | TokSquareO                    -- ^ @'['@
    | TokSquareC                    -- ^ @']'@
    | Tok2SquareO                   -- ^ @'[['@
    | Tok2SquareC                   -- ^ @']]'@
    | TokCurlyO                     -- ^ @'{'@
    | TokCurlyC                     -- ^ @'}'@
    | TokBareKey String             -- ^ bare key
    | TokString String              -- ^ string literal
    | TokMlString String            -- ^ multiline string literal
    | TokInteger !Integer           -- ^ integer literal
    | TokFloat !Double              -- ^ floating-point literal
    | TokOffsetDateTime !ZonedTime  -- ^ date-time with timezone offset
    | TokLocalDateTime !LocalTime   -- ^ local date-time
    | TokLocalDate !Day             -- ^ local date
    | TokLocalTime !TimeOfDay       -- ^ local time
    | TokError String               -- ^ lexical error
    | TokEOF                        -- ^ end of file
    deriving (ReadPrec [Token]
ReadPrec Token
Int -> ReadS Token
ReadS [Token]
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Token]
$creadListPrec :: ReadPrec [Token]
readPrec :: ReadPrec Token
$creadPrec :: ReadPrec Token
readList :: ReadS [Token]
$creadList :: ReadS [Token]
readsPrec :: Int -> ReadS Token
$creadsPrec :: Int -> ReadS Token
Read, Int -> Token -> ShowS
[Token] -> ShowS
Token -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Token] -> ShowS
$cshowList :: [Token] -> ShowS
show :: Token -> String
$cshow :: Token -> String
showsPrec :: Int -> Token -> ShowS
$cshowsPrec :: Int -> Token -> ShowS
Show)

-- | Remove underscores from number literals
scrub :: String -> String
scrub :: ShowS
scrub = forall a. (a -> Bool) -> [a] -> [a]
filter (Char
'_' forall a. Eq a => a -> a -> Bool
/=)

-- | Construct a 'TokInteger' from a decimal integer literal lexeme.
mkDecInteger :: String -> Token
mkDecInteger :: String -> Token
mkDecInteger (Char
'+':String
xs) = Integer -> Token
TokInteger (forall a. Read a => String -> a
read (ShowS
scrub String
xs))
mkDecInteger String
xs = Integer -> Token
TokInteger (forall a. Read a => String -> a
read (ShowS
scrub String
xs))

-- | Construct a 'TokInteger' from a hexadecimal integer literal lexeme.
mkHexInteger :: String -> Token
mkHexInteger :: String -> Token
mkHexInteger (Char
'0':Char
'x':String
xs) = Integer -> Token
TokInteger (forall a b. (a, b) -> a
fst (forall a. [a] -> a
head (forall a. (Eq a, Num a) => ReadS a
readHex (ShowS
scrub String
xs))))
mkHexInteger String
_ = forall a. HasCallStack => String -> a
error String
"processHex: bad input"

-- | Construct a 'TokInteger' from a octal integer literal lexeme.
mkOctInteger :: String -> Token
mkOctInteger :: String -> Token
mkOctInteger (Char
'0':Char
'o':String
xs) = Integer -> Token
TokInteger (forall a b. (a, b) -> a
fst (forall a. [a] -> a
head (forall a. (Eq a, Num a) => ReadS a
readOct (ShowS
scrub String
xs))))
mkOctInteger String
_ = forall a. HasCallStack => String -> a
error String
"processHex: bad input"

-- | Construct a 'TokInteger' from a binary integer literal lexeme.
mkBinInteger :: String -> Token
mkBinInteger :: String -> Token
mkBinInteger (Char
'0':Char
'b':String
xs) = Integer -> Token
TokInteger (forall a b. (a, b) -> a
fst (forall a. [a] -> a
head (forall a. (Eq a, Num a) => ReadS a
readBin (ShowS
scrub String
xs))))
mkBinInteger String
_ = forall a. HasCallStack => String -> a
error String
"processHex: bad input"

-- | Construct a 'TokFloat' from a floating-point literal lexeme.
mkFloat :: String -> Token
mkFloat :: String -> Token
mkFloat String
"nan"   = Double -> Token
TokFloat (Double
0forall a. Fractional a => a -> a -> a
/Double
0)
mkFloat String
"+nan"  = Double -> Token
TokFloat (Double
0forall a. Fractional a => a -> a -> a
/Double
0)
mkFloat String
"-nan"  = Double -> Token
TokFloat (Double
0forall a. Fractional a => a -> a -> a
/Double
0)
mkFloat String
"inf"   = Double -> Token
TokFloat (Double
1forall a. Fractional a => a -> a -> a
/Double
0)
mkFloat String
"+inf"  = Double -> Token
TokFloat (Double
1forall a. Fractional a => a -> a -> a
/Double
0)
mkFloat String
"-inf"  = Double -> Token
TokFloat (-Double
1forall a. Fractional a => a -> a -> a
/Double
0)
mkFloat (Char
'+':String
x) = Double -> Token
TokFloat (forall a. Read a => String -> a
read (ShowS
scrub String
x))
mkFloat String
x       = Double -> Token
TokFloat (forall a. Read a => String -> a
read (ShowS
scrub String
x))

-- | Construct a 'TokString' from a literal string lexeme.
mkLiteralString :: String -> Token
mkLiteralString :: String -> Token
mkLiteralString = String -> Token
TokString forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
tail forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
init

-- | Construct a 'TokString' from a basic string lexeme.
mkBasicString :: String -> Token
mkBasicString :: String -> Token
mkBasicString String
"" = forall a. HasCallStack => String -> a
error String
"processBasic: missing initializer"
mkBasicString (Char
_:String
start) = (String -> Token) -> String -> Token
enforceScalar String -> Token
TokString (ShowS
go String
start)
    where
        go :: ShowS
go [] = forall a. HasCallStack => String -> a
error String
"processBasic: missing terminator"
        go String
"\"" = String
""
        go (Char
'\\':Char
'"':String
xs) = Char
'"' forall a. a -> [a] -> [a]
: ShowS
go String
xs
        go (Char
'\\':Char
'\\':String
xs) = Char
'\\' forall a. a -> [a] -> [a]
: ShowS
go String
xs
        go (Char
'\\':Char
'b':String
xs) = Char
'\b' forall a. a -> [a] -> [a]
: ShowS
go String
xs
        go (Char
'\\':Char
'f':String
xs) = Char
'\f' forall a. a -> [a] -> [a]
: ShowS
go String
xs
        go (Char
'\\':Char
'n':String
xs) = Char
'\n' forall a. a -> [a] -> [a]
: ShowS
go String
xs
        go (Char
'\\':Char
'r':String
xs) = Char
'\r' forall a. a -> [a] -> [a]
: ShowS
go String
xs
        go (Char
'\\':Char
't':String
xs) = Char
'\t' forall a. a -> [a] -> [a]
: ShowS
go String
xs
        go (Char
'\\':Char
'u':Char
a:Char
b:Char
c:Char
d:String
xs) = Int -> Char
chr (forall a b. (a, b) -> a
fst (forall a. [a] -> a
head (forall a. (Eq a, Num a) => ReadS a
readHex [Char
a,Char
b,Char
c,Char
d]))) forall a. a -> [a] -> [a]
: ShowS
go String
xs
        go (Char
'\\':Char
'U':Char
a:Char
b:Char
c:Char
d:Char
e:Char
f:Char
g:Char
h:String
xs) = Int -> Char
chr (forall a b. (a, b) -> a
fst (forall a. [a] -> a
head (forall a. (Eq a, Num a) => ReadS a
readHex [Char
a,Char
b,Char
c,Char
d,Char
e,Char
f,Char
g,Char
h]))) forall a. a -> [a] -> [a]
: ShowS
go String
xs
        go (Char
x:String
xs) = Char
x forall a. a -> [a] -> [a]
: ShowS
go String
xs

-- | Construct a 'TokMlString' from a basic multi-line string lexeme.
mkMlBasicString :: String -> Token
mkMlBasicString :: String -> Token
mkMlBasicString String
str =
    (String -> Token) -> String -> Token
enforceScalar String -> Token
TokMlString
    case String
str of
        Char
'"':Char
'"':Char
'"':Char
'\r':Char
'\n':String
start -> ShowS
go String
start
        Char
'"':Char
'"':Char
'"':Char
'\n':String
start -> ShowS
go String
start
        Char
'"':Char
'"':Char
'"':String
start -> ShowS
go String
start
        String
_ -> forall a. HasCallStack => String -> a
error String
"processMlBasic: missing initializer"
    where
      go :: ShowS
go String
"\"\"\"" = String
""
      go (Char
'\\':Char
'"':String
xs) = Char
'"' forall a. a -> [a] -> [a]
: ShowS
go String
xs
      go (Char
'\\':Char
'\\':String
xs) = Char
'\\' forall a. a -> [a] -> [a]
: ShowS
go String
xs
      go (Char
'\\':Char
'b':String
xs) = Char
'\b' forall a. a -> [a] -> [a]
: ShowS
go String
xs
      go (Char
'\\':Char
'f':String
xs) = Char
'\f' forall a. a -> [a] -> [a]
: ShowS
go String
xs
      go (Char
'\\':Char
'n':String
xs) = Char
'\n' forall a. a -> [a] -> [a]
: ShowS
go String
xs
      go (Char
'\\':Char
'r':String
xs) = Char
'\r' forall a. a -> [a] -> [a]
: ShowS
go String
xs
      go (Char
'\\':Char
't':String
xs) = Char
'\t' forall a. a -> [a] -> [a]
: ShowS
go String
xs
      go (Char
'\\':Char
'u':Char
a:Char
b:Char
c:Char
d:String
xs) = Int -> Char
chr (forall a b. (a, b) -> a
fst (forall a. [a] -> a
head (forall a. (Eq a, Num a) => ReadS a
readHex [Char
a,Char
b,Char
c,Char
d]))) forall a. a -> [a] -> [a]
: ShowS
go String
xs
      go (Char
'\\':Char
'U':Char
a:Char
b:Char
c:Char
d:Char
e:Char
f:Char
g:Char
h:String
xs) = Int -> Char
chr (forall a b. (a, b) -> a
fst (forall a. [a] -> a
head (forall a. (Eq a, Num a) => ReadS a
readHex [Char
a,Char
b,Char
c,Char
d,Char
e,Char
f,Char
g,Char
h]))) forall a. a -> [a] -> [a]
: ShowS
go String
xs
      go (Char
'\\':Char
'\r':String
xs) = ShowS
go (forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace String
xs)
      go (Char
'\\':Char
'\n':String
xs) = ShowS
go (forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace String
xs)
      go (Char
'\\':Char
' ':String
xs)  = ShowS
go (forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace String
xs)
      go (Char
'\\':Char
'\t':String
xs) = ShowS
go (forall a. (a -> Bool) -> [a] -> [a]
dropWhile Char -> Bool
isSpace String
xs)
      go (Char
x:String
xs) = Char
x forall a. a -> [a] -> [a]
: ShowS
go String
xs
      go [] = forall a. HasCallStack => String -> a
error String
"processMlBasic: missing terminator"

-- | Construct a 'TokMlString' from a literal multi-line string lexeme.
mkMlLiteralString :: String -> Token
mkMlLiteralString :: String -> Token
mkMlLiteralString String
str =
    String -> Token
TokMlString
    case String
str of
        Char
'\'':Char
'\'':Char
'\'':Char
'\r':Char
'\n':String
start -> ShowS
go String
start
        Char
'\'':Char
'\'':Char
'\'':Char
'\n':String
start -> ShowS
go String
start
        Char
'\'':Char
'\'':Char
'\'':String
start -> ShowS
go String
start
        String
_ -> forall a. HasCallStack => String -> a
error String
"processMlLiteral: mising initializer"
    where
        go :: ShowS
go String
"'''" = String
""
        go (Char
x:String
xs) = Char
x forall a. a -> [a] -> [a]
: ShowS
go String
xs
        go String
"" = forall a. HasCallStack => String -> a
error String
"processMlLiteral: missing terminator"

enforceScalar :: (String -> Token) -> String -> Token
enforceScalar :: (String -> Token) -> String -> Token
enforceScalar String -> Token
f String
str
    | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any Char -> Bool
isInvalid String
str = String -> Token
TokError String
"string literal controls non-scalar value"
    | Bool
otherwise = String -> Token
f String
str
    where
        isInvalid :: Char -> Bool
isInvalid Char
x = Char
'\xd800' forall a. Ord a => a -> a -> Bool
<= Char
x Bool -> Bool -> Bool
&& Char
x forall a. Ord a => a -> a -> Bool
< Char
'\xe000'

-- | Make a 'TokError' from a lexical error message.
mkError :: String -> Token
mkError :: String -> Token
mkError String
str = String -> Token
TokError (String
"Lexical error: " forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show (forall a. [a] -> a
head String
str))

-- | Format strings for local date lexemes.
localDatePatterns :: [String]
localDatePatterns :: [String]
localDatePatterns = [String
"%Y-%m-%d"]

-- | Format strings for local time lexemes.
localTimePatterns :: [String]
localTimePatterns :: [String]
localTimePatterns = [String
"%H:%M:%S%Q"]

-- | Format strings for local datetime lexemes.
localDateTimePatterns :: [String]
localDateTimePatterns :: [String]
localDateTimePatterns =
    [String
"%Y-%m-%dT%H:%M:%S%Q",
    String
"%Y-%m-%d %H:%M:%S%Q"]

-- | Format strings for offset datetime lexemes.
offsetDateTimePatterns :: [String]
offsetDateTimePatterns :: [String]
offsetDateTimePatterns =
    [String
"%Y-%m-%dT%H:%M:%S%Q%Ez",String
"%Y-%m-%dT%H:%M:%S%QZ",
    String
"%Y-%m-%d %H:%M:%S%Q%Ez",String
"%Y-%m-%d %H:%M:%S%QZ"]