{-| The code to lex string and character literals. Basically the same code
    as in GHC.
-}
module Agda.Syntax.Parser.StringLiterals
    ( litString, litChar
    ) where

import Data.Bifunctor
import Data.Char
import qualified Data.Text as T

import Agda.Syntax.Common (pattern Ranged)
import Agda.Syntax.Parser.Alex
import Agda.Syntax.Parser.Monad
import Agda.Syntax.Parser.Tokens
import Agda.Syntax.Parser.LookAhead
import Agda.Syntax.Position
import Agda.Syntax.Literal

{--------------------------------------------------------------------------
    Exported actions
 --------------------------------------------------------------------------}

-- | Lex a string literal. Assumes that a double quote has been lexed.
litString :: LexAction Token
litString :: LexAction Token
litString = Char -> (Interval -> String -> Parser Token) -> LexAction Token
forall tok.
Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken Char
'"' ((Interval -> String -> Parser Token) -> LexAction Token)
-> (Interval -> String -> Parser Token) -> LexAction Token
forall a b. (a -> b) -> a -> b
$ \ Interval
i String
s ->
  Token -> Parser Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ RLiteral -> Token
TokLiteral (RLiteral -> Token) -> RLiteral -> Token
forall a b. (a -> b) -> a -> b
$ Range -> Literal -> RLiteral
forall a. Range -> a -> Ranged a
Ranged (Interval -> Range
forall a. HasRange a => a -> Range
getRange Interval
i) (Literal -> RLiteral) -> Literal -> RLiteral
forall a b. (a -> b) -> a -> b
$ Text -> Literal
LitString (Text -> Literal) -> Text -> Literal
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack String
s

{-| Lex a character literal. Assumes that a single quote has been lexed.  A
    character literal is lexed in exactly the same way as a string literal.
    Only before returning the token do we check that the lexed string is of
    length 1. This is maybe not the most efficient way of doing things, but on
    the other hand it will only be inefficient if there is a lexical error.
-}
litChar :: LexAction Token
litChar :: LexAction Token
litChar = Char -> (Interval -> String -> Parser Token) -> LexAction Token
forall tok.
Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken Char
'\'' ((Interval -> String -> Parser Token) -> LexAction Token)
-> (Interval -> String -> Parser Token) -> LexAction Token
forall a b. (a -> b) -> a -> b
$ \ Interval
i -> \case
  [Char
c] -> Token -> Parser Token
forall (m :: * -> *) a. Monad m => a -> m a
return (Token -> Parser Token) -> Token -> Parser Token
forall a b. (a -> b) -> a -> b
$ RLiteral -> Token
TokLiteral (RLiteral -> Token) -> RLiteral -> Token
forall a b. (a -> b) -> a -> b
$ Range -> Literal -> RLiteral
forall a. Range -> a -> Ranged a
Ranged (Interval -> Range
forall a. HasRange a => a -> Range
getRange Interval
i) (Literal -> RLiteral) -> Literal -> RLiteral
forall a b. (a -> b) -> a -> b
$ Char -> Literal
LitChar Char
c
  String
_   -> String -> Parser Token
forall a. String -> Parser a
lexError String
"character literal must contain a single character"


{--------------------------------------------------------------------------
    Errors
 --------------------------------------------------------------------------}

-- | Custom error function.
litError :: String -> LookAhead a
litError :: String -> LookAhead a
litError String
msg =
    do  LookAhead ()
sync
        Parser a -> LookAhead a
forall a. Parser a -> LookAhead a
liftP (Parser a -> LookAhead a) -> Parser a -> LookAhead a
forall a b. (a -> b) -> a -> b
$ String -> Parser a
forall a. String -> Parser a
lexError (String -> Parser a) -> String -> Parser a
forall a b. (a -> b) -> a -> b
$
            String
"Lexical error in string or character literal: " String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
msg


{--------------------------------------------------------------------------
    The meat
 --------------------------------------------------------------------------}

-- | The general function to lex a string or character literal token. The
--   character argument is the delimiter (@\"@ for strings and @\'@ for
--   characters).
stringToken :: Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken :: Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken Char
del Interval -> String -> Parser tok
mkTok = (PreviousInput -> PreviousInput -> TokenLength -> Parser tok)
-> LexAction tok
forall r.
(PreviousInput -> PreviousInput -> TokenLength -> Parser r)
-> LexAction r
LexAction ((PreviousInput -> PreviousInput -> TokenLength -> Parser tok)
 -> LexAction tok)
-> (PreviousInput -> PreviousInput -> TokenLength -> Parser tok)
-> LexAction tok
forall a b. (a -> b) -> a -> b
$ \ PreviousInput
inp PreviousInput
inp' TokenLength
n ->
    do  PositionWithoutFile -> Parser ()
setLastPos (PositionWithoutFile -> PositionWithoutFile
forall a. Position' a -> Position' a
backupPos (PositionWithoutFile -> PositionWithoutFile)
-> PositionWithoutFile -> PositionWithoutFile
forall a b. (a -> b) -> a -> b
$ PreviousInput -> PositionWithoutFile
lexPos PreviousInput
inp')
        PreviousInput -> Parser ()
setLexInput PreviousInput
inp'
        -- TODO: Should setPrevToken be run here? Compare with
        -- Agda.Syntax.Parser.LexActions.token.
        String
tok <- (forall b. String -> LookAhead b)
-> LookAhead String -> Parser String
forall a.
(forall b. String -> LookAhead b) -> LookAhead a -> Parser a
runLookAhead forall b. String -> LookAhead b
litError (LookAhead String -> Parser String)
-> LookAhead String -> Parser String
forall a b. (a -> b) -> a -> b
$ Char -> String -> LookAhead String
lexString Char
del String
""
        Interval
i   <- Parser Interval
getParseInterval
        Interval -> String -> Parser tok
mkTok Interval
i String
tok


-- | This is where the work happens. The string argument is an accumulating
--   parameter for the string being lexed.
lexString :: Char -> String -> LookAhead String
lexString :: Char -> String -> LookAhead String
lexString Char
del String
s =

    do  Char
c <- LookAhead Char
nextChar
        case Char
c of

            Char
c | Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
del  -> LookAhead ()
sync LookAhead () -> LookAhead String -> LookAhead String
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> String -> LookAhead String
forall (m :: * -> *) a. Monad m => a -> m a
return (String -> String
forall a. [a] -> [a]
reverse String
s)

            Char
'\\' ->
                do  Char
c' <- LookAhead Char
nextChar
                    case Char
c' of
                        Char
'&'             -> LookAhead ()
sync LookAhead () -> LookAhead String -> LookAhead String
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> String -> LookAhead String
lexString Char
del String
s
                        Char
c | Char -> Bool
isSpace Char
c   -> LookAhead ()
sync LookAhead () -> LookAhead String -> LookAhead String
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> String -> LookAhead String
lexStringGap Char
del String
s
                        Char
_               -> LookAhead String
normalChar

            Char
_ -> LookAhead String
normalChar
    where
        normalChar :: LookAhead String
normalChar =
            do  LookAhead ()
rollback
                Char
c <- LookAhead Char
lexChar
                Char -> String -> LookAhead String
lexString Char
del (Char
cChar -> String -> String
forall a. a -> [a] -> [a]
:String
s)


-- | A string gap consists of whitespace (possibly including line breaks)
--   enclosed in backslashes. The gap is not part of the resulting string.
lexStringGap :: Char -> String -> LookAhead String
lexStringGap :: Char -> String -> LookAhead String
lexStringGap Char
del String
s =
    do  Char
c <- LookAhead Char
eatNextChar
        case Char
c of
            Char
'\\'            -> Char -> String -> LookAhead String
lexString Char
del String
s
            Char
c | Char -> Bool
isSpace Char
c   -> Char -> String -> LookAhead String
lexStringGap Char
del String
s
            Char
_               -> String -> LookAhead String
forall b. String -> LookAhead b
lookAheadError String
"non-space in string gap"

-- | Lex a single character.
lexChar :: LookAhead Char
lexChar :: LookAhead Char
lexChar =
    do  Char
c <- LookAhead Char
eatNextChar
        case Char
c of
            Char
'\\'    -> LookAhead Char
lexEscape
            Char
_       -> Char -> LookAhead Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
c

-- | Lex an escaped character. Assumes the backslash has been lexed.
lexEscape :: LookAhead Char
lexEscape :: LookAhead Char
lexEscape =
    do  Char
c <- LookAhead Char
eatNextChar
        case Char
c of
            Char
'^'     -> do Char
c <- LookAhead Char
eatNextChar
                          if Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
>= Char
'@' Bool -> Bool -> Bool
&& Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
<= Char
'_'
                            then Char -> LookAhead Char
forall (m :: * -> *) a. Monad m => a -> m a
return (TokenLength -> Char
chr (Char -> TokenLength
ord Char
c TokenLength -> TokenLength -> TokenLength
forall a. Num a => a -> a -> a
- Char -> TokenLength
ord Char
'@'))
                            else String -> LookAhead Char
forall b. String -> LookAhead b
lookAheadError String
"invalid control character"

            Char
'x'     -> (Char -> Bool)
-> TokenLength -> (Char -> TokenLength) -> LookAhead Char
readNum Char -> Bool
isHexDigit TokenLength
16 Char -> TokenLength
digitToInt
            Char
'o'     -> (Char -> Bool)
-> TokenLength -> (Char -> TokenLength) -> LookAhead Char
readNum Char -> Bool
isOctDigit  TokenLength
8 Char -> TokenLength
digitToInt
            Char
x | Char -> Bool
isDigit Char
x
                    -> (Char -> Bool)
-> TokenLength
-> (Char -> TokenLength)
-> TokenLength
-> LookAhead Char
readNumAcc Char -> Bool
isDigit TokenLength
10 Char -> TokenLength
digitToInt (Char -> TokenLength
digitToInt Char
x)

            Char
c ->
                -- Try to match the input (starting with c) against the
                -- silly escape codes.
                do  Char
esc <- Char
-> [(String, LookAhead Char)] -> LookAhead Char -> LookAhead Char
forall a.
Char -> [(String, LookAhead a)] -> LookAhead a -> LookAhead a
match' Char
c (((String, Char) -> (String, LookAhead Char))
-> [(String, Char)] -> [(String, LookAhead Char)]
forall a b. (a -> b) -> [a] -> [b]
map ((Char -> LookAhead Char)
-> (String, Char) -> (String, LookAhead Char)
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second Char -> LookAhead Char
forall (m :: * -> *) a. Monad m => a -> m a
return) [(String, Char)]
sillyEscapeChars)
                                    (String -> LookAhead Char
forall b. String -> LookAhead b
lookAheadError String
"bad escape code")
                    LookAhead ()
sync
                    Char -> LookAhead Char
forall (m :: * -> *) a. Monad m => a -> m a
return Char
esc

-- | Read a number in the specified base.
readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> LookAhead Char
readNum :: (Char -> Bool)
-> TokenLength -> (Char -> TokenLength) -> LookAhead Char
readNum Char -> Bool
isDigit TokenLength
base Char -> TokenLength
conv =
    do  Char
c <- LookAhead Char
eatNextChar
        if Char -> Bool
isDigit Char
c
            then (Char -> Bool)
-> TokenLength
-> (Char -> TokenLength)
-> TokenLength
-> LookAhead Char
readNumAcc Char -> Bool
isDigit TokenLength
base Char -> TokenLength
conv (Char -> TokenLength
conv Char
c)
            else String -> LookAhead Char
forall b. String -> LookAhead b
lookAheadError String
"non-digit in numeral"

-- | Same as 'readNum' but with an accumulating parameter.
readNumAcc :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> LookAhead Char
readNumAcc :: (Char -> Bool)
-> TokenLength
-> (Char -> TokenLength)
-> TokenLength
-> LookAhead Char
readNumAcc Char -> Bool
isDigit TokenLength
base Char -> TokenLength
conv TokenLength
i = TokenLength -> LookAhead Char
scan TokenLength
i
    where
        scan :: TokenLength -> LookAhead Char
scan TokenLength
i =
            do  PreviousInput
inp <- LookAhead PreviousInput
getInput
                Char
c   <- LookAhead Char
nextChar
                case Char
c of
                    Char
c | Char -> Bool
isDigit Char
c -> TokenLength -> LookAhead Char
scan (TokenLength
iTokenLength -> TokenLength -> TokenLength
forall a. Num a => a -> a -> a
*TokenLength
base TokenLength -> TokenLength -> TokenLength
forall a. Num a => a -> a -> a
+ Char -> TokenLength
conv Char
c)
                    Char
_             ->
                        do  PreviousInput -> LookAhead ()
setInput PreviousInput
inp
                            LookAhead ()
sync
                            if TokenLength
i TokenLength -> TokenLength -> Bool
forall a. Ord a => a -> a -> Bool
>= Char -> TokenLength
ord Char
forall a. Bounded a => a
minBound Bool -> Bool -> Bool
&& TokenLength
i TokenLength -> TokenLength -> Bool
forall a. Ord a => a -> a -> Bool
<= Char -> TokenLength
ord Char
forall a. Bounded a => a
maxBound
                                then Char -> LookAhead Char
forall (m :: * -> *) a. Monad m => a -> m a
return (TokenLength -> Char
chr TokenLength
i)
                                else String -> LookAhead Char
forall b. String -> LookAhead b
lookAheadError String
"character literal out of bounds"

-- | The escape codes.
sillyEscapeChars :: [(String, Char)]
sillyEscapeChars :: [(String, Char)]
sillyEscapeChars =
    [ (String
"a", Char
'\a')
    , (String
"b", Char
'\b')
    , (String
"f", Char
'\f')
    , (String
"n", Char
'\n')
    , (String
"r", Char
'\r')
    , (String
"t", Char
'\t')
    , (String
"v", Char
'\v')
    , (String
"\\", Char
'\\')
    , (String
"\"", Char
'\"')
    , (String
"'", Char
'\'')
    , (String
"NUL", Char
'\NUL')
    , (String
"SOH", Char
'\SOH')
    , (String
"STX", Char
'\STX')
    , (String
"ETX", Char
'\ETX')
    , (String
"EOT", Char
'\EOT')
    , (String
"ENQ", Char
'\ENQ')
    , (String
"ACK", Char
'\ACK')
    , (String
"BEL", Char
'\BEL')
    , (String
"BS", Char
'\BS')
    , (String
"HT", Char
'\HT')
    , (String
"LF", Char
'\LF')
    , (String
"VT", Char
'\VT')
    , (String
"FF", Char
'\FF')
    , (String
"CR", Char
'\CR')
    , (String
"SO", Char
'\SO')
    , (String
"SI", Char
'\SI')
    , (String
"DLE", Char
'\DLE')
    , (String
"DC1", Char
'\DC1')
    , (String
"DC2", Char
'\DC2')
    , (String
"DC3", Char
'\DC3')
    , (String
"DC4", Char
'\DC4')
    , (String
"NAK", Char
'\NAK')
    , (String
"SYN", Char
'\SYN')
    , (String
"ETB", Char
'\ETB')
    , (String
"CAN", Char
'\CAN')
    , (String
"EM", Char
'\EM')
    , (String
"SUB", Char
'\SUB')
    , (String
"ESC", Char
'\ESC')
    , (String
"FS", Char
'\FS')
    , (String
"GS", Char
'\GS')
    , (String
"RS", Char
'\RS')
    , (String
"US", Char
'\US')
    , (String
"SP", Char
'\SP')
    , (String
"DEL", Char
'\DEL')
    ]