{-# LANGUAGE PackageImports #-}

module Data.String.Interpolate.Parse
  ( InterpSegment(..), parseInterpSegments, dosToUnix )
where

import           Data.Char
import qualified "base" Numeric as N

data InterpSegment = Expression String | Verbatim String
  deriving (Eq, Show)

-- |
-- Given the raw input from a quasiquote, parse it into the information
-- we need to output the actual expression.
--
-- Returns an error message if parsing fails.
parseInterpSegments :: String -> Either String [InterpSegment]
parseInterpSegments = go ""
  where go :: String -> String -> Either String [InterpSegment]
        go acc parsee = case parsee of
          "" -> Right [Verbatim $ reverse acc]
          '\\':'#':rest -> go ('#':acc) rest
          '\\':_rest -> case unescapeChar parsee of
            (Nothing, rest) -> go acc rest
            (Just c, rest) -> go (c:acc) rest
          '#':'{':rest -> case span (/= '}') rest of
            (expr, _:rest') ->
              ((Verbatim . reverse) acc :) . (Expression expr :) <$> go "" rest'
            (_, "") -> Left "unterminated #{...} interpolation"
          c:cs -> go (c:acc) cs

dosToUnix :: String -> String
dosToUnix = go
  where go xs = case xs of
          '\r' : '\n' : ys -> '\n' : go ys
          y : ys           -> y : go ys
          []               -> []

-- |
-- Haskell 2010 character unescaping, see:
-- <http://www.haskell.org/onlinereport/haskell2010/haskellch2.html#x7-200002.6>
--
-- Unescape the very first backslashed character of the string, if it results in
-- a character. Note that there is an escape sequence that doesn't result in
-- a character (\&).
unescapeChar :: String -> (Maybe Char, String)
unescapeChar input = case input of
  "" -> (Nothing, input)
  '\\' : 'x' : x : xs | isHexDigit x -> case span isHexDigit xs of
    (ys, zs) -> ((Just . chr . readHex $ x:ys), zs)
  '\\' : 'o' : x : xs | isOctDigit x -> case span isOctDigit xs of
    (ys, zs) -> ((Just . chr . readOct $ x:ys), zs)
  '\\' : x : xs | isDigit x -> case span isDigit xs of
    (ys, zs) -> ((Just . chr . read $ x:ys), zs)
  '\\' : input_ -> case input_ of
    '\\' : xs        -> (Just ('\\'), xs)
    'a' : xs         -> (Just ('\a'), xs)
    'b' : xs         -> (Just ('\b'), xs)
    'f' : xs         -> (Just ('\f'), xs)
    'n' : xs         -> (Just ('\n'), xs)
    'r' : xs         -> (Just ('\r'), xs)
    't' : xs         -> (Just ('\t'), xs)
    'v' : xs         -> (Just ('\v'), xs)
    '&' : xs         -> (Nothing, xs)
    'N':'U':'L' : xs -> (Just ('\NUL'), xs)
    'S':'O':'H' : xs -> (Just ('\SOH'), xs)
    'S':'T':'X' : xs -> (Just ('\STX'), xs)
    'E':'T':'X' : xs -> (Just ('\ETX'), xs)
    'E':'O':'T' : xs -> (Just ('\EOT'), xs)
    'E':'N':'Q' : xs -> (Just ('\ENQ'), xs)
    'A':'C':'K' : xs -> (Just ('\ACK'), xs)
    'B':'E':'L' : xs -> (Just ('\BEL'), xs)
    'B':'S' : xs     -> (Just ('\BS'), xs)
    'H':'T' : xs     -> (Just ('\HT'), xs)
    'L':'F' : xs     -> (Just ('\LF'), xs)
    'V':'T' : xs     -> (Just ('\VT'), xs)
    'F':'F' : xs     -> (Just ('\FF'), xs)
    'C':'R' : xs     -> (Just ('\CR'), xs)
    'S':'O' : xs     -> (Just ('\SO'), xs)
    'S':'I' : xs     -> (Just ('\SI'), xs)
    'D':'L':'E' : xs -> (Just ('\DLE'), xs)
    'D':'C':'1' : xs -> (Just ('\DC1'), xs)
    'D':'C':'2' : xs -> (Just ('\DC2'), xs)
    'D':'C':'3' : xs -> (Just ('\DC3'), xs)
    'D':'C':'4' : xs -> (Just ('\DC4'), xs)
    'N':'A':'K' : xs -> (Just ('\NAK'), xs)
    'S':'Y':'N' : xs -> (Just ('\SYN'), xs)
    'E':'T':'B' : xs -> (Just ('\ETB'), xs)
    'C':'A':'N' : xs -> (Just ('\CAN'), xs)
    'E':'M' : xs     -> (Just ('\EM'), xs)
    'S':'U':'B' : xs -> (Just ('\SUB'), xs)
    'E':'S':'C' : xs -> (Just ('\ESC'), xs)
    'F':'S' : xs     -> (Just ('\FS'), xs)
    'G':'S' : xs     -> (Just ('\GS'), xs)
    'R':'S' : xs     -> (Just ('\RS'), xs)
    'U':'S' : xs     -> (Just ('\US'), xs)
    'S':'P' : xs     -> (Just ('\SP'), xs)
    'D':'E':'L' : xs -> (Just ('\DEL'), xs)
    '^':'@' : xs     -> (Just ('\^@'), xs)
    '^':'A' : xs     -> (Just ('\^A'), xs)
    '^':'B' : xs     -> (Just ('\^B'), xs)
    '^':'C' : xs     -> (Just ('\^C'), xs)
    '^':'D' : xs     -> (Just ('\^D'), xs)
    '^':'E' : xs     -> (Just ('\^E'), xs)
    '^':'F' : xs     -> (Just ('\^F'), xs)
    '^':'G' : xs     -> (Just ('\^G'), xs)
    '^':'H' : xs     -> (Just ('\^H'), xs)
    '^':'I' : xs     -> (Just ('\^I'), xs)
    '^':'J' : xs     -> (Just ('\^J'), xs)
    '^':'K' : xs     -> (Just ('\^K'), xs)
    '^':'L' : xs     -> (Just ('\^L'), xs)
    '^':'M' : xs     -> (Just ('\^M'), xs)
    '^':'N' : xs     -> (Just ('\^N'), xs)
    '^':'O' : xs     -> (Just ('\^O'), xs)
    '^':'P' : xs     -> (Just ('\^P'), xs)
    '^':'Q' : xs     -> (Just ('\^Q'), xs)
    '^':'R' : xs     -> (Just ('\^R'), xs)
    '^':'S' : xs     -> (Just ('\^S'), xs)
    '^':'T' : xs     -> (Just ('\^T'), xs)
    '^':'U' : xs     -> (Just ('\^U'), xs)
    '^':'V' : xs     -> (Just ('\^V'), xs)
    '^':'W' : xs     -> (Just ('\^W'), xs)
    '^':'X' : xs     -> (Just ('\^X'), xs)
    '^':'Y' : xs     -> (Just ('\^Y'), xs)
    '^':'Z' : xs     -> (Just ('\^Z'), xs)
    '^':'[' : xs     -> (Just ('\^['), xs)
    '^':'\\' : xs    -> (Just ('\^\'), xs)
    '^':']' : xs     -> (Just ('\^]'), xs)
    '^':'^' : xs     -> (Just ('\^^'), xs)
    '^':'_' : xs     -> (Just ('\^_'), xs)
    xs               -> (Nothing, xs)
  x:xs -> (Just x, xs)

  where readHex :: String -> Int
        readHex xs = case N.readHex xs of
          [(n, "")] -> n
          _         -> error "Data.String.Interpolate.Util.readHex: no parse"

        readOct :: String -> Int
        readOct xs = case N.readOct xs of
          [(n, "")] -> n
          _         -> error "Data.String.Interpolate.Util.readHex: no parse"