{-# LANGUAGE CPP #-}

#ifndef MIN_VERSION_base
#define MIN_VERSION_base(x,y,z) 1
#endif

module Language.Lua.StringLiteral
  ( interpretStringLiteral
  , constructStringLiteral
  ) where

import           Data.Char (ord, chr, isNumber, isPrint, isAscii)
import           Data.ByteString.Lazy (ByteString)
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Builder as B
import qualified Data.ByteString.Lazy.Char8 as B8
import           Data.List (foldl')
import           Data.Bits ((.&.),shiftR)
import           Numeric (showHex)

#if !(MIN_VERSION_base(4,8,0))
import           Data.Monoid (mempty, mappend, mconcat)
#endif

skipWS :: String -> String
skipWS (' '  : rest) = skipWS rest
skipWS ('\n' : rest) = skipWS rest
skipWS ('\r' : rest) = skipWS rest
skipWS ('\f' : rest) = skipWS rest
skipWS ('\t' : rest) = skipWS rest
skipWS ('\v' : rest) = skipWS rest
skipWS str           = str

hexToInt :: Char -> Int
hexToInt c =
  case c of
    'A' -> 10
    'a' -> 10
    'B' -> 11
    'b' -> 11
    'C' -> 12
    'c' -> 12
    'D' -> 13
    'd' -> 13
    'E' -> 14
    'e' -> 14
    'F' -> 15
    'f' -> 15
    _   -> decToNum c

{-# INLINE decToNum #-}
decToNum :: Char -> Int
decToNum c = fromEnum c - fromEnum '0'


interpretStringLiteral :: String -> Maybe ByteString
interpretStringLiteral xxs =
  case xxs of
    '\'':xs -> Just (decodeEscapes (dropLast 1 xs))
    '"':xs  -> Just (decodeEscapes (dropLast 1 xs))
    '[':xs  -> removeLongQuotes xs
    _       -> Nothing

-- | Long-quoted string literals have no escapes.
-- A leading newline on a long quoted string literal is ignored.
removeLongQuotes :: String -> Maybe ByteString
removeLongQuotes str =
  case span (=='=') str of
    (eqs,'[':'\n':xs) -> go (dropLast (2+length eqs) xs)
    (eqs,'[':     xs) -> go (dropLast (2+length eqs) xs)
    _                 -> Nothing
  where
  go = Just . B.toLazyByteString . mconcat . map encodeChar

dropLast :: Int -> [a] -> [a]
dropLast n xs = take (length xs - n) xs

decodeEscapes :: String -> ByteString
decodeEscapes = B.toLazyByteString . aux
  where
  aux xxs =
    case xxs of
      [] -> mempty
      '\\' : 'x' : h1 : h2 : rest ->
        B.word8 (fromIntegral (hexToInt h1 * 16 + hexToInt h2)) `mappend` aux rest

      '\\' : 'u' : '{' : rest ->
        case break (=='}') rest of
          (ds,_:rest')
             | code <= 0x10ffff -> encodeChar (chr code) `mappend` aux rest'
             where code = foldl' (\acc d -> acc * 16 + hexToInt d) 0 ds
          _ -> encodeChar '\xFFFD' `mappend` aux (dropWhile (/='}') rest)

      '\\' : c1 : c2 : c3 : rest
        | isNumber c1 && isNumber c2 && isNumber c3 ->
            let code = decToNum c1 * 100 + decToNum c2 * 10 + decToNum c3
            in B.word8 (fromIntegral code) `mappend` aux rest

      '\\' : c1 : c2 : rest
        | isNumber c1 && isNumber c2 ->
            let code = decToNum c1 * 10 + decToNum c2
            in B.word8 (fromIntegral code) `mappend` aux rest

      '\\' : c1 : rest
        | isNumber c1 -> B.word8 (fromIntegral (decToNum c1)) `mappend` aux rest

      '\\' : 'a'  : rest -> B.char8 '\a' `mappend` aux rest
      '\\' : 'b'  : rest -> B.char8 '\b' `mappend` aux rest
      '\\' : 'f'  : rest -> B.char8 '\f' `mappend` aux rest
      '\\' : 'n'  : rest -> B.char8 '\n' `mappend` aux rest
      '\\' : '\n' : rest -> B.char8 '\n' `mappend` aux rest
      '\\' : 'r'  : rest -> B.char8 '\r' `mappend` aux rest
      '\\' : 't'  : rest -> B.char8 '\t' `mappend` aux rest
      '\\' : 'v'  : rest -> B.char8 '\v' `mappend` aux rest
      '\\' : '\\' : rest -> B.char8 '\\' `mappend` aux rest
      '\\' : '"'  : rest -> B.char8 '"' `mappend` aux rest
      '\\' : '\'' : rest -> B.char8 '\'' `mappend` aux rest
      '\\' : 'z'  : rest -> aux (skipWS rest)
      c : rest -> encodeChar c `mappend` aux rest

-- | Convert a string literal body to string literal syntax
constructStringLiteral :: ByteString -> String
constructStringLiteral bs = '"' : aux 0
  where
  aux i
    | i >= B.length bs = "\""
    | otherwise =
    case B8.index bs i of
      '\a' -> '\\' : 'a'  : aux (i+1)
      '\b' -> '\\' : 'b'  : aux (i+1)
      '\f' -> '\\' : 'f'  : aux (i+1)
      '\n' -> '\\' : 'n'  : aux (i+1)
      '\r' -> '\\' : 'r'  : aux (i+1)
      '\t' -> '\\' : 't'  : aux (i+1)
      '\v' -> '\\' : 'v'  : aux (i+1)
      '\\' -> '\\' : '\\' : aux (i+1)
      '\"' -> '\\' : '"'  : aux (i+1)
      x | isPrint x && isAscii x -> x : aux (i+1)
        | x <= '\x0f' -> '\\' : 'x' : '0' : showHex (ord x) (aux (i+1))
        | otherwise   -> '\\' : 'x'       : showHex (ord x) (aux (i+1))

encodeChar :: Char -> B.Builder
encodeChar c
   | oc <= 0x7f       = asByte oc

   | oc <= 0x7ff      = asByte (0xc0 + (oc `shiftR` 6))
              `mappend` asByte (0x80 + oc .&. 0x3f)

   | oc <= 0xffff     = asByte (0xe0 + (oc `shiftR` 12))
              `mappend` asByte (0x80 + ((oc `shiftR` 6) .&. 0x3f))
              `mappend` asByte (0x80 + oc .&. 0x3f)

   | otherwise        = asByte (0xf0 + (oc `shiftR` 18))
              `mappend` asByte (0x80 + ((oc `shiftR` 12) .&. 0x3f))
              `mappend` asByte (0x80 + ((oc `shiftR` 6) .&. 0x3f))
              `mappend` asByte (0x80 + oc .&. 0x3f)
  where
    asByte = B.word8 . fromIntegral
    oc = ord c