module Agda.Syntax.Parser.StringLiterals
( litString, litChar
) where
import Data.Char
import Agda.Syntax.Parser.Alex
import Agda.Syntax.Parser.Monad
import Agda.Syntax.Parser.Tokens
import Agda.Syntax.Parser.LookAhead
import Agda.Syntax.Position
import Agda.Syntax.Literal
import Agda.Utils.Char ( decDigit, hexDigit, octDigit )
import Agda.Utils.Tuple ( (-*-) )
litString :: LexAction Token
litString = stringToken '"' (\i s ->
return $ TokLiteral $ LitString (getRange i) s)
litChar :: LexAction Token
litChar = stringToken '\'' $ \i s ->
do case s of
[c] -> return $ TokLiteral $ LitChar (getRange i) c
_ -> lexError
"character literal must contain a single character"
litError :: String -> LookAhead a
litError msg =
do sync
liftP $ lexError $
"Lexical error in string or character literal: " ++ msg
stringToken :: Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken del mkTok inp inp' n =
do setLastPos (backupPos $ lexPos inp')
setLexInput inp'
tok <- runLookAhead litError $ lexString del ""
i <- getParseInterval
mkTok i tok
lexString :: Char -> String -> LookAhead String
lexString del s =
do c <- nextChar
case c of
c | c == del -> sync >> return (reverse s)
'\\' ->
do c' <- nextChar
case c' of
'&' -> sync >> lexString del s
c | isSpace c -> sync >> lexStringGap del s
_ -> normalChar
_ -> normalChar
where
normalChar =
do rollback
c <- lexChar
lexString del (c:s)
lexStringGap :: Char -> String -> LookAhead String
lexStringGap del s =
do c <- eatNextChar
case c of
'\\' -> lexString del s
c | isSpace c -> lexStringGap del s
_ -> fail "non-space in string gap"
lexChar :: LookAhead Char
lexChar =
do c <- eatNextChar
case c of
'\\' -> lexEscape
_ -> return c
lexEscape :: LookAhead Char
lexEscape =
do c <- eatNextChar
case c of
'^' -> do c <- eatNextChar
if c >= '@' && c <= '_'
then return (chr (ord c - ord '@'))
else fail "invalid control character"
'x' -> readNum isHexDigit 16 hexDigit
'o' -> readNum isOctDigit 8 octDigit
x | isDigit x
-> readNumAcc isDigit 10 decDigit (decDigit x)
c ->
do esc <- match' c (map (id -*- return) sillyEscapeChars)
(fail "bad escape code")
sync
return esc
readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> LookAhead Char
readNum isDigit base conv =
do c <- eatNextChar
if isDigit c
then readNumAcc isDigit base conv (conv c)
else fail "non-digit in numeral"
readNumAcc :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> LookAhead Char
readNumAcc isDigit base conv i = scan i
where
scan i =
do inp <- getInput
c <- nextChar
case c of
c | isDigit c -> scan (i*base + conv c)
_ ->
do setInput inp
sync
if i >= ord minBound && i <= ord maxBound
then return (chr i)
else fail "character literal out of bounds"
sillyEscapeChars :: [(String, Char)]
sillyEscapeChars =
[ ("a", '\a')
, ("b", '\b')
, ("f", '\f')
, ("n", '\n')
, ("r", '\r')
, ("t", '\t')
, ("v", '\v')
, ("\\", '\\')
, ("\"", '\"')
, ("'", '\'')
, ("NUL", '\NUL')
, ("SOH", '\SOH')
, ("STX", '\STX')
, ("ETX", '\ETX')
, ("EOT", '\EOT')
, ("ENQ", '\ENQ')
, ("ACK", '\ACK')
, ("BEL", '\BEL')
, ("BS", '\BS')
, ("HT", '\HT')
, ("LF", '\LF')
, ("VT", '\VT')
, ("FF", '\FF')
, ("CR", '\CR')
, ("SO", '\SO')
, ("SI", '\SI')
, ("DLE", '\DLE')
, ("DC1", '\DC1')
, ("DC2", '\DC2')
, ("DC3", '\DC3')
, ("DC4", '\DC4')
, ("NAK", '\NAK')
, ("SYN", '\SYN')
, ("ETB", '\ETB')
, ("CAN", '\CAN')
, ("EM", '\EM')
, ("SUB", '\SUB')
, ("ESC", '\ESC')
, ("FS", '\FS')
, ("GS", '\GS')
, ("RS", '\RS')
, ("US", '\US')
, ("SP", '\SP')
, ("DEL", '\DEL')
]