-- |
-- Module      : Data.String.Interpolate.Parse
-- Copyright   : (c) William Yao, 2019-2020
-- License     : BSD-3
-- Maintainer  : williamyaoh@gmail.com
-- Stability   : experimental
-- Portability : POSIX
--
-- YOU SHOULD NOT USE THIS MODULE.
--
-- This is exported mainly so tests can introspect on the implementation.

{-# LANGUAGE PackageImports #-}

module Data.String.Interpolate.Parse
  ( InterpSegment(..), parseInterpSegments, dosToUnix )
where

import           Data.Char
import qualified "base" Numeric as N

data InterpSegment
  = Expression String
  | Verbatim String
  | Newline
  | Spaces Int
  | Tabs Int
  deriving (Eq, Show)

-- |
-- Given the raw input from a quasiquote, parse it into the information
-- we need to output the actual expression.
--
-- Returns an error message if parsing fails.
parseInterpSegments :: String -> Either String [InterpSegment]
parseInterpSegments = switch
  -- Given how complicated this is getting, it might be worth switching
  -- to megaparsec instead of hand-rolling this.
  where switch :: String -> Either String [InterpSegment]
        switch ""             = pure []
        switch ('#':'{':rest) = expr rest
        switch ('#':rest)     = verbatim "#" rest
        switch ('\n':rest)    = newline rest  -- CRLF handled by `dosToUnix'
        switch (' ':rest)     = spaces 1 rest
        switch ('\t':rest)    = tabs 1 rest
        switch other          = verbatim "" other

        verbatim :: String -> String -> Either String [InterpSegment]
        verbatim acc parsee = case parsee of
          "" ->
            ((Verbatim . reverse) acc :) <$> switch parsee
          (c:_) | c `elem` ['#', ' ', '\t', '\n'] ->
            ((Verbatim . reverse) acc :) <$> switch parsee
          ('\\':'#':rest) ->
            verbatim ('#':acc) rest
          ('\\':_) -> case unescapeChar parsee of
            (Nothing, rest) -> verbatim acc rest
            (Just c, rest)  -> verbatim (c:acc) rest
          c:cs ->
            verbatim (c:acc) cs

        expr :: String -> Either String [InterpSegment]
        expr parsee = case span (/= '}') parsee of
          (_, "")        -> Left "unterminated #{...} interpolation"
          (expr, _:rest) -> (Expression expr :) <$> switch rest

        newline :: String -> Either String [InterpSegment]
        newline parsee = (Newline :) <$> switch parsee

        spaces :: Int -> String -> Either String [InterpSegment]
        spaces n (' ':rest) = spaces (n+1) rest
        spaces n other      = (Spaces n :) <$> switch other

        tabs :: Int -> String -> Either String [InterpSegment]
        tabs n ('\t':rest) = tabs (n+1) rest
        tabs n other       = (Tabs n :) <$> switch other

dosToUnix :: String -> String
dosToUnix = go
  where go xs = case xs of
          '\r' : '\n' : ys -> '\n' : go ys
          y : ys           -> y : go ys
          []               -> []

-- |
-- Haskell 2010 character unescaping, see:
-- <http://www.haskell.org/onlinereport/haskell2010/haskellch2.html#x7-200002.6>
--
-- Unescape the very first backslashed character of the string, if it results in
-- a character. Note that there is an escape sequence that doesn't result in
-- a character (\&).
unescapeChar :: String -> (Maybe Char, String)
unescapeChar input = case input of
  "" -> (Nothing, input)
  '\\' : 'x' : x : xs | isHexDigit x -> case span isHexDigit xs of
    (ys, zs) -> ((Just . chr . readHex $ x:ys), zs)
  '\\' : 'o' : x : xs | isOctDigit x -> case span isOctDigit xs of
    (ys, zs) -> ((Just . chr . readOct $ x:ys), zs)
  '\\' : x : xs | isDigit x -> case span isDigit xs of
    (ys, zs) -> ((Just . chr . read $ x:ys), zs)
  '\\' : input_ -> case input_ of
    '\\' : xs        -> (Just ('\\'), xs)
    'a' : xs         -> (Just ('\a'), xs)
    'b' : xs         -> (Just ('\b'), xs)
    'f' : xs         -> (Just ('\f'), xs)
    'n' : xs         -> (Just ('\n'), xs)
    'r' : xs         -> (Just ('\r'), xs)
    't' : xs         -> (Just ('\t'), xs)
    'v' : xs         -> (Just ('\v'), xs)
    '&' : xs         -> (Nothing, xs)
    'N':'U':'L' : xs -> (Just ('\NUL'), xs)
    'S':'O':'H' : xs -> (Just ('\SOH'), xs)
    'S':'T':'X' : xs -> (Just ('\STX'), xs)
    'E':'T':'X' : xs -> (Just ('\ETX'), xs)
    'E':'O':'T' : xs -> (Just ('\EOT'), xs)
    'E':'N':'Q' : xs -> (Just ('\ENQ'), xs)
    'A':'C':'K' : xs -> (Just ('\ACK'), xs)
    'B':'E':'L' : xs -> (Just ('\BEL'), xs)
    'B':'S' : xs     -> (Just ('\BS'), xs)
    'H':'T' : xs     -> (Just ('\HT'), xs)
    'L':'F' : xs     -> (Just ('\LF'), xs)
    'V':'T' : xs     -> (Just ('\VT'), xs)
    'F':'F' : xs     -> (Just ('\FF'), xs)
    'C':'R' : xs     -> (Just ('\CR'), xs)
    'S':'O' : xs     -> (Just ('\SO'), xs)
    'S':'I' : xs     -> (Just ('\SI'), xs)
    'D':'L':'E' : xs -> (Just ('\DLE'), xs)
    'D':'C':'1' : xs -> (Just ('\DC1'), xs)
    'D':'C':'2' : xs -> (Just ('\DC2'), xs)
    'D':'C':'3' : xs -> (Just ('\DC3'), xs)
    'D':'C':'4' : xs -> (Just ('\DC4'), xs)
    'N':'A':'K' : xs -> (Just ('\NAK'), xs)
    'S':'Y':'N' : xs -> (Just ('\SYN'), xs)
    'E':'T':'B' : xs -> (Just ('\ETB'), xs)
    'C':'A':'N' : xs -> (Just ('\CAN'), xs)
    'E':'M' : xs     -> (Just ('\EM'), xs)
    'S':'U':'B' : xs -> (Just ('\SUB'), xs)
    'E':'S':'C' : xs -> (Just ('\ESC'), xs)
    'F':'S' : xs     -> (Just ('\FS'), xs)
    'G':'S' : xs     -> (Just ('\GS'), xs)
    'R':'S' : xs     -> (Just ('\RS'), xs)
    'U':'S' : xs     -> (Just ('\US'), xs)
    'S':'P' : xs     -> (Just ('\SP'), xs)
    'D':'E':'L' : xs -> (Just ('\DEL'), xs)
    '^':'@' : xs     -> (Just ('\^@'), xs)
    '^':'A' : xs     -> (Just ('\^A'), xs)
    '^':'B' : xs     -> (Just ('\^B'), xs)
    '^':'C' : xs     -> (Just ('\^C'), xs)
    '^':'D' : xs     -> (Just ('\^D'), xs)
    '^':'E' : xs     -> (Just ('\^E'), xs)
    '^':'F' : xs     -> (Just ('\^F'), xs)
    '^':'G' : xs     -> (Just ('\^G'), xs)
    '^':'H' : xs     -> (Just ('\^H'), xs)
    '^':'I' : xs     -> (Just ('\^I'), xs)
    '^':'J' : xs     -> (Just ('\^J'), xs)
    '^':'K' : xs     -> (Just ('\^K'), xs)
    '^':'L' : xs     -> (Just ('\^L'), xs)
    '^':'M' : xs     -> (Just ('\^M'), xs)
    '^':'N' : xs     -> (Just ('\^N'), xs)
    '^':'O' : xs     -> (Just ('\^O'), xs)
    '^':'P' : xs     -> (Just ('\^P'), xs)
    '^':'Q' : xs     -> (Just ('\^Q'), xs)
    '^':'R' : xs     -> (Just ('\^R'), xs)
    '^':'S' : xs     -> (Just ('\^S'), xs)
    '^':'T' : xs     -> (Just ('\^T'), xs)
    '^':'U' : xs     -> (Just ('\^U'), xs)
    '^':'V' : xs     -> (Just ('\^V'), xs)
    '^':'W' : xs     -> (Just ('\^W'), xs)
    '^':'X' : xs     -> (Just ('\^X'), xs)
    '^':'Y' : xs     -> (Just ('\^Y'), xs)
    '^':'Z' : xs     -> (Just ('\^Z'), xs)
    '^':'[' : xs     -> (Just ('\^['), xs)
    '^':'\\' : xs    -> (Just ('\^\'), xs)
    '^':']' : xs     -> (Just ('\^]'), xs)
    '^':'^' : xs     -> (Just ('\^^'), xs)
    '^':'_' : xs     -> (Just ('\^_'), xs)
    xs               -> (Nothing, xs)
  x:xs -> (Just x, xs)

  where readHex :: String -> Int
        readHex xs = case N.readHex xs of
          [(n, "")] -> n
          _         -> error "Data.String.Interpolate.Util.readHex: no parse"

        readOct :: String -> Int
        readOct xs = case N.readOct xs of
          [(n, "")] -> n
          _         -> error "Data.String.Interpolate.Util.readHex: no parse"