module Agda.Syntax.Parser.StringLiterals
    ( litString, litChar
    ) where
import Data.Char
import Agda.Syntax.Parser.Alex
import Agda.Syntax.Parser.Monad
import Agda.Syntax.Parser.Tokens
import Agda.Syntax.Parser.LookAhead
import Agda.Syntax.Position
import Agda.Syntax.Literal
import Agda.Utils.Tuple  ( (-*-) )
litString :: LexAction Token
litString = stringToken '"' (\i s ->
              return $ TokLiteral $ LitString (getRange i) s)
litChar :: LexAction Token
litChar = stringToken '\'' $ \i s ->
            do  case s of
                    [c] -> return $ TokLiteral $ LitChar (getRange i) c
                    _   -> lexError
                            "character literal must contain a single character"
litError :: String -> LookAhead a
litError msg =
    do  sync
        liftP $ lexError $
            "Lexical error in string or character literal: " ++ msg
stringToken :: Char -> (Interval -> String -> Parser tok) -> LexAction tok
stringToken del mkTok inp inp' n =
    do  setLastPos (backupPos $ lexPos inp')
        setLexInput inp'
        
        
        tok <- runLookAhead litError $ lexString del ""
        i   <- getParseInterval
        mkTok i tok
lexString :: Char -> String -> LookAhead String
lexString del s =
    do  c <- nextChar
        case c of
            c | c == del  -> sync >> return (reverse s)
            '\\' ->
                do  c' <- nextChar
                    case c' of
                        '&'             -> sync >> lexString del s
                        c | isSpace c   -> sync >> lexStringGap del s
                        _               -> normalChar
            _ -> normalChar
    where
        normalChar =
            do  rollback
                c <- lexChar
                lexString del (c:s)
lexStringGap :: Char -> String -> LookAhead String
lexStringGap del s =
    do  c <- eatNextChar
        case c of
            '\\'            -> lexString del s
            c | isSpace c   -> lexStringGap del s
            _               -> lookAheadError "non-space in string gap"
lexChar :: LookAhead Char
lexChar =
    do  c <- eatNextChar
        case c of
            '\\'    -> lexEscape
            _       -> return c
lexEscape :: LookAhead Char
lexEscape =
    do  c <- eatNextChar
        case c of
            '^'     -> do c <- eatNextChar
                          if c >= '@' && c <= '_'
                            then return (chr (ord c - ord '@'))
                            else lookAheadError "invalid control character"
            'x'     -> readNum isHexDigit 16 digitToInt
            'o'     -> readNum isOctDigit  8 digitToInt
            x | isDigit x
                    -> readNumAcc isDigit 10 digitToInt (digitToInt x)
            c ->
                
                
                do  esc <- match' c (map (id -*- return) sillyEscapeChars)
                                    (lookAheadError "bad escape code")
                    sync
                    return esc
readNum :: (Char -> Bool) -> Int -> (Char -> Int) -> LookAhead Char
readNum isDigit base conv =
    do  c <- eatNextChar
        if isDigit c
            then readNumAcc isDigit base conv (conv c)
            else lookAheadError "non-digit in numeral"
readNumAcc :: (Char -> Bool) -> Int -> (Char -> Int) -> Int -> LookAhead Char
readNumAcc isDigit base conv i = scan i
    where
        scan i =
            do  inp <- getInput
                c   <- nextChar
                case c of
                    c | isDigit c -> scan (i*base + conv c)
                    _             ->
                        do  setInput inp
                            sync
                            if i >= ord minBound && i <= ord maxBound
                                then return (chr i)
                                else lookAheadError "character literal out of bounds"
sillyEscapeChars :: [(String, Char)]
sillyEscapeChars =
    [ ("a", '\a')
    , ("b", '\b')
    , ("f", '\f')
    , ("n", '\n')
    , ("r", '\r')
    , ("t", '\t')
    , ("v", '\v')
    , ("\\", '\\')
    , ("\"", '\"')
    , ("'", '\'')
    , ("NUL", '\NUL')
    , ("SOH", '\SOH')
    , ("STX", '\STX')
    , ("ETX", '\ETX')
    , ("EOT", '\EOT')
    , ("ENQ", '\ENQ')
    , ("ACK", '\ACK')
    , ("BEL", '\BEL')
    , ("BS", '\BS')
    , ("HT", '\HT')
    , ("LF", '\LF')
    , ("VT", '\VT')
    , ("FF", '\FF')
    , ("CR", '\CR')
    , ("SO", '\SO')
    , ("SI", '\SI')
    , ("DLE", '\DLE')
    , ("DC1", '\DC1')
    , ("DC2", '\DC2')
    , ("DC3", '\DC3')
    , ("DC4", '\DC4')
    , ("NAK", '\NAK')
    , ("SYN", '\SYN')
    , ("ETB", '\ETB')
    , ("CAN", '\CAN')
    , ("EM", '\EM')
    , ("SUB", '\SUB')
    , ("ESC", '\ESC')
    , ("FS", '\FS')
    , ("GS", '\GS')
    , ("RS", '\RS')
    , ("US", '\US')
    , ("SP", '\SP')
    , ("DEL", '\DEL')
    ]