-- |
-- Module      : Data.String.Interpolate.Parse
-- Copyright   : (c) William Yao, 2019-2022
-- License     : BSD-3
-- Maintainer  : williamyaoh@gmail.com
-- Stability   : experimental
-- Portability : POSIX
--
-- YOU SHOULD NOT USE THIS MODULE.
--
-- This is exported mainly so tests can introspect on the implementation.

{-# LANGUAGE PackageImports #-}

module Data.String.Interpolate.Parse
  ( ParseOutput(..)
  , parseInput, parseInterpSegments
  , dosToUnix
  )
where

import           "base" Data.Bifunctor
import           Data.Char
import qualified "base" Numeric        as N

import Data.String.Interpolate.Lines ( isBlankLine )
import Data.String.Interpolate.Types

-- |
-- Each section here is a list of lines.
--
-- "Content" here is defined by the contiguous sequence of lines begining
-- with the first non-blank line and ending with the last non-blank line
data ParseOutput = ParseOutput
  { ParseOutput -> Lines
poHeaderWS :: Lines
  , ParseOutput -> Lines
poContent  :: Lines
  , ParseOutput -> Lines
poFooterWS :: Lines
  }
  deriving (ParseOutput -> ParseOutput -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ParseOutput -> ParseOutput -> Bool
$c/= :: ParseOutput -> ParseOutput -> Bool
== :: ParseOutput -> ParseOutput -> Bool
$c== :: ParseOutput -> ParseOutput -> Bool
Eq, Int -> ParseOutput -> ShowS
[ParseOutput] -> ShowS
ParseOutput -> String
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ParseOutput] -> ShowS
$cshowList :: [ParseOutput] -> ShowS
show :: ParseOutput -> String
$cshow :: ParseOutput -> String
showsPrec :: Int -> ParseOutput -> ShowS
$cshowsPrec :: Int -> ParseOutput -> ShowS
Show)

-- |
-- Given the raw input from a quasiquote, parse it into the information
-- we need to output the actual expression.
--
-- Returns an error message if parsing fails.
parseInterpSegments :: String -> Either String Lines
parseInterpSegments :: String -> Either String Lines
parseInterpSegments = [InterpSegment] -> String -> Either String Lines
switch []
  -- Given how complicated this is getting, it might be worth switching
  -- to megaparsec instead of hand-rolling this.
  where
    switch :: Line -> String -> Either String Lines
    switch :: [InterpSegment] -> String -> Either String Lines
switch [InterpSegment]
line String
""             = forall (f :: * -> *) a. Applicative f => a -> f a
pure [forall a. [a] -> [a]
reverse [InterpSegment]
line]
    switch [InterpSegment]
line (Char
'#':Char
'{':String
rest) = [InterpSegment] -> String -> Either String Lines
expr [InterpSegment]
line String
rest
    switch [InterpSegment]
_ (Char
'#':String
_)           = forall a b. a -> Either a b
Left String
"unescaped # symbol without interpolation brackets"
    switch [InterpSegment]
line (Char
'\n':String
rest)    = [InterpSegment] -> String -> Either String Lines
newline [InterpSegment]
line String
rest  -- CRLF handled by `dosToUnix'
    switch [InterpSegment]
line (Char
' ':String
rest)     = [InterpSegment] -> Int -> String -> Either String Lines
spaces [InterpSegment]
line Int
1 String
rest
    switch [InterpSegment]
line (Char
'\t':String
rest)    = [InterpSegment] -> Int -> String -> Either String Lines
tabs [InterpSegment]
line Int
1 String
rest
    switch [InterpSegment]
line String
other          = [InterpSegment] -> String -> String -> Either String Lines
verbatim [InterpSegment]
line String
"" String
other

    verbatim :: Line -> String -> String -> Either String Lines
    verbatim :: [InterpSegment] -> String -> String -> Either String Lines
verbatim [InterpSegment]
line String
acc String
parsee = case String
parsee of
      String
"" ->
        [InterpSegment] -> String -> Either String Lines
switch ((String -> InterpSegment
Verbatim forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse) String
acc forall a. a -> [a] -> [a]
: [InterpSegment]
line) String
parsee
      (Char
c:String
_) | Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char
'#', Char
' ', Char
'\t', Char
'\n'] ->
        [InterpSegment] -> String -> Either String Lines
switch ((String -> InterpSegment
Verbatim forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> [a]
reverse) String
acc forall a. a -> [a] -> [a]
: [InterpSegment]
line) String
parsee
      (Char
'\\':Char
'#':String
rest) ->
        [InterpSegment] -> String -> String -> Either String Lines
verbatim [InterpSegment]
line (Char
'#'forall a. a -> [a] -> [a]
:String
acc) String
rest
      (Char
'\\':String
_) -> case String -> (EscapeResult, String)
unescapeChar String
parsee of
        (FoundChar Char
c, String
rest)     -> [InterpSegment] -> String -> String -> Either String Lines
verbatim [InterpSegment]
line (Char
cforall a. a -> [a] -> [a]
:String
acc) String
rest
        (EscapeResult
EscapeEmpty, String
rest)     -> [InterpSegment] -> String -> String -> Either String Lines
verbatim [InterpSegment]
line String
acc String
rest
        (EscapeResult
EscapeUnterminated, String
_) -> forall a b. a -> Either a b
Left String
"unterminated backslash escape at end of string"
        (UnknownEscape Char
esc, String
_)  -> forall a b. a -> Either a b
Left (String
"unknown escape character: " forall a. [a] -> [a] -> [a]
++ [Char
esc])
      Char
c:String
cs ->
        [InterpSegment] -> String -> String -> Either String Lines
verbatim [InterpSegment]
line (Char
cforall a. a -> [a] -> [a]
:String
acc) String
cs

    expr :: Line -> String -> Either String Lines
    expr :: [InterpSegment] -> String -> Either String Lines
expr [InterpSegment]
line String
parsee = case forall a. (a -> Bool) -> [a] -> ([a], [a])
span (forall a. Eq a => a -> a -> Bool
/= Char
'}') String
parsee of
      (String
_, String
"")        -> forall a b. a -> Either a b
Left String
"unterminated #{...} interpolation"
      (String
expr, Char
_:String
rest) -> [InterpSegment] -> String -> Either String Lines
switch (String -> InterpSegment
Expression String
expr forall a. a -> [a] -> [a]
: [InterpSegment]
line) String
rest

    newline :: Line -> String -> Either String Lines
    newline :: [InterpSegment] -> String -> Either String Lines
newline [InterpSegment]
line String
parsee = (forall a. [a] -> [a]
reverse [InterpSegment]
line forall a. a -> [a] -> [a]
:) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [InterpSegment] -> String -> Either String Lines
switch [] String
parsee

    spaces :: Line -> Int -> String -> Either String Lines
    spaces :: [InterpSegment] -> Int -> String -> Either String Lines
spaces [InterpSegment]
line Int
n (Char
' ':String
rest) = [InterpSegment] -> Int -> String -> Either String Lines
spaces [InterpSegment]
line (Int
nforall a. Num a => a -> a -> a
+Int
1) String
rest
    spaces [InterpSegment]
line Int
n String
other      = [InterpSegment] -> String -> Either String Lines
switch (Int -> InterpSegment
Spaces Int
n forall a. a -> [a] -> [a]
: [InterpSegment]
line) String
other

    tabs :: Line -> Int -> String -> Either String Lines
    tabs :: [InterpSegment] -> Int -> String -> Either String Lines
tabs [InterpSegment]
line Int
n (Char
'\t':String
rest) = [InterpSegment] -> Int -> String -> Either String Lines
tabs [InterpSegment]
line (Int
nforall a. Num a => a -> a -> a
+Int
1) String
rest
    tabs [InterpSegment]
line Int
n String
other       = [InterpSegment] -> String -> Either String Lines
switch (Int -> InterpSegment
Tabs Int
n forall a. a -> [a] -> [a]
: [InterpSegment]
line) String
other

-- |
-- Like `parseInterpSegments', but for cases where we need to do
-- more complicated transformations on the input. Separates the
-- interpolation input into its content, whitespace header, and
-- whitespace footer.
parseInput :: String -> Either String ParseOutput
parseInput :: String -> Either String ParseOutput
parseInput String
parsee = do
  Lines
lines <- String -> Either String Lines
parseInterpSegments String
parsee
  let (Lines
headerWS, Lines
tail) = forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. [InterpSegment] -> Bool
isBlankLine) Lines
lines
      (Lines
footerWS, Lines
init) = forall (p :: * -> * -> *) a b c d.
Bifunctor p =>
(a -> b) -> (c -> d) -> p a c -> p b d
bimap forall a. [a] -> [a]
reverse forall a. [a] -> [a]
reverse forall a b. (a -> b) -> a -> b
$
        forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. [InterpSegment] -> Bool
isBlankLine) (forall a. [a] -> [a]
reverse Lines
tail)
  forall (f :: * -> *) a. Applicative f => a -> f a
pure forall a b. (a -> b) -> a -> b
$! ParseOutput
    { poHeaderWS :: Lines
poHeaderWS = Lines
headerWS
    , poContent :: Lines
poContent = Lines
init
    , poFooterWS :: Lines
poFooterWS = Lines
footerWS
    }

dosToUnix :: String -> String
dosToUnix :: ShowS
dosToUnix = ShowS
go
  where
    go :: ShowS
go String
xs = case String
xs of
      Char
'\r' : Char
'\n' : String
ys -> Char
'\n' forall a. a -> [a] -> [a]
: ShowS
go String
ys
      Char
y : String
ys           -> Char
y forall a. a -> [a] -> [a]
: ShowS
go String
ys
      []               -> []

data EscapeResult
  = FoundChar Char
  | EscapeEmpty         -- ^ Haskell's lexical syntax has \& as an escape that produces an empty string
  | EscapeUnterminated
  | UnknownEscape Char

-- |
-- Haskell 2010 character unescaping, see:
-- <http://www.haskell.org/onlinereport/haskell2010/haskellch2.html#x7-200002.6>
--
-- Unescape the very first backslashed character of the string, if it's a known
-- escape.
unescapeChar :: String -> (EscapeResult, String)
unescapeChar :: String -> (EscapeResult, String)
unescapeChar String
input = case String
input of
  String
"" -> (EscapeResult
EscapeEmpty, String
input)
  Char
'\\' : Char
'x' : Char
x : String
xs | Char -> Bool
isHexDigit Char
x -> case forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isHexDigit String
xs of
    (String
ys, String
zs) -> ((Char -> EscapeResult
FoundChar forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
chr forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Int
readHex forall a b. (a -> b) -> a -> b
$ Char
xforall a. a -> [a] -> [a]
:String
ys), String
zs)
  Char
'\\' : Char
'o' : Char
x : String
xs | Char -> Bool
isOctDigit Char
x -> case forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isOctDigit String
xs of
    (String
ys, String
zs) -> ((Char -> EscapeResult
FoundChar forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
chr forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> Int
readOct forall a b. (a -> b) -> a -> b
$ Char
xforall a. a -> [a] -> [a]
:String
ys), String
zs)
  Char
'\\' : Char
x : String
xs | Char -> Bool
isDigit Char
x -> case forall a. (a -> Bool) -> [a] -> ([a], [a])
span Char -> Bool
isDigit String
xs of
    (String
ys, String
zs) -> ((Char -> EscapeResult
FoundChar forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Char
chr forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Read a => String -> a
read forall a b. (a -> b) -> a -> b
$ Char
xforall a. a -> [a] -> [a]
:String
ys), String
zs)
  Char
'\\' : String
input_ -> case String
input_ of
    Char
'\\' : String
xs        -> (Char -> EscapeResult
FoundChar (Char
'\\'), String
xs)
    Char
'a' : String
xs         -> (Char -> EscapeResult
FoundChar (Char
'\a'), String
xs)
    Char
'b' : String
xs         -> (Char -> EscapeResult
FoundChar (Char
'\b'), String
xs)
    Char
'f' : String
xs         -> (Char -> EscapeResult
FoundChar (Char
'\f'), String
xs)
    Char
'n' : String
xs         -> (Char -> EscapeResult
FoundChar (Char
'\n'), String
xs)
    Char
'r' : String
xs         -> (Char -> EscapeResult
FoundChar (Char
'\r'), String
xs)
    Char
't' : String
xs         -> (Char -> EscapeResult
FoundChar (Char
'\t'), String
xs)
    Char
'v' : String
xs         -> (Char -> EscapeResult
FoundChar (Char
'\v'), String
xs)
    Char
'&' : String
xs         -> (EscapeResult
EscapeEmpty, String
xs)
    Char
'N':Char
'U':Char
'L' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\NUL'), String
xs)
    Char
'S':Char
'O':Char
'H' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\SOH'), String
xs)
    Char
'S':Char
'T':Char
'X' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\STX'), String
xs)
    Char
'E':Char
'T':Char
'X' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\ETX'), String
xs)
    Char
'E':Char
'O':Char
'T' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\EOT'), String
xs)
    Char
'E':Char
'N':Char
'Q' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\ENQ'), String
xs)
    Char
'A':Char
'C':Char
'K' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\ACK'), String
xs)
    Char
'B':Char
'E':Char
'L' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\BEL'), String
xs)
    Char
'B':Char
'S' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\BS'), String
xs)
    Char
'H':Char
'T' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\HT'), String
xs)
    Char
'L':Char
'F' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\LF'), String
xs)
    Char
'V':Char
'T' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\VT'), String
xs)
    Char
'F':Char
'F' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\FF'), String
xs)
    Char
'C':Char
'R' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\CR'), String
xs)
    Char
'S':Char
'O' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\SO'), String
xs)
    Char
'S':Char
'I' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\SI'), String
xs)
    Char
'D':Char
'L':Char
'E' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\DLE'), String
xs)
    Char
'D':Char
'C':Char
'1' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\DC1'), String
xs)
    Char
'D':Char
'C':Char
'2' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\DC2'), String
xs)
    Char
'D':Char
'C':Char
'3' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\DC3'), String
xs)
    Char
'D':Char
'C':Char
'4' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\DC4'), String
xs)
    Char
'N':Char
'A':Char
'K' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\NAK'), String
xs)
    Char
'S':Char
'Y':Char
'N' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\SYN'), String
xs)
    Char
'E':Char
'T':Char
'B' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\ETB'), String
xs)
    Char
'C':Char
'A':Char
'N' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\CAN'), String
xs)
    Char
'E':Char
'M' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\EM'), String
xs)
    Char
'S':Char
'U':Char
'B' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\SUB'), String
xs)
    Char
'E':Char
'S':Char
'C' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\ESC'), String
xs)
    Char
'F':Char
'S' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\FS'), String
xs)
    Char
'G':Char
'S' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\GS'), String
xs)
    Char
'R':Char
'S' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\RS'), String
xs)
    Char
'U':Char
'S' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\US'), String
xs)
    Char
'S':Char
'P' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\SP'), String
xs)
    Char
'D':Char
'E':Char
'L' : String
xs -> (Char -> EscapeResult
FoundChar (Char
'\DEL'), String
xs)
    Char
'^':Char
'@' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^@'), String
xs)
    Char
'^':Char
'A' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^A'), String
xs)
    Char
'^':Char
'B' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^B'), String
xs)
    Char
'^':Char
'C' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^C'), String
xs)
    Char
'^':Char
'D' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^D'), String
xs)
    Char
'^':Char
'E' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^E'), String
xs)
    Char
'^':Char
'F' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^F'), String
xs)
    Char
'^':Char
'G' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^G'), String
xs)
    Char
'^':Char
'H' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^H'), String
xs)
    Char
'^':Char
'I' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^I'), String
xs)
    Char
'^':Char
'J' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^J'), String
xs)
    Char
'^':Char
'K' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^K'), String
xs)
    Char
'^':Char
'L' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^L'), String
xs)
    Char
'^':Char
'M' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^M'), String
xs)
    Char
'^':Char
'N' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^N'), String
xs)
    Char
'^':Char
'O' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^O'), String
xs)
    Char
'^':Char
'P' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^P'), String
xs)
    Char
'^':Char
'Q' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^Q'), String
xs)
    Char
'^':Char
'R' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^R'), String
xs)
    Char
'^':Char
'S' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^S'), String
xs)
    Char
'^':Char
'T' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^T'), String
xs)
    Char
'^':Char
'U' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^U'), String
xs)
    Char
'^':Char
'V' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^V'), String
xs)
    Char
'^':Char
'W' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^W'), String
xs)
    Char
'^':Char
'X' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^X'), String
xs)
    Char
'^':Char
'Y' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^Y'), String
xs)
    Char
'^':Char
'Z' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^Z'), String
xs)
    Char
'^':Char
'[' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^['), String
xs)
    Char
'^':Char
'\\' : String
xs    -> (Char -> EscapeResult
FoundChar (Char
'\^\'), String
xs)
    Char
'^':Char
']' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^]'), String
xs)
    Char
'^':Char
'^' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^^'), String
xs)
    Char
'^':Char
'_' : String
xs     -> (Char -> EscapeResult
FoundChar (Char
'\^_'), String
xs)
    Char
x:String
xs             -> (Char -> EscapeResult
UnknownEscape Char
x, String
xs)
    String
""               -> (EscapeResult
EscapeUnterminated, String
"")
  Char
x:String
xs -> (Char -> EscapeResult
FoundChar Char
x, String
xs)

  where
    readHex :: String -> Int
    readHex :: String -> Int
readHex String
xs = case forall a. (Eq a, Num a) => ReadS a
N.readHex String
xs of
      [(Int
n, String
"")] -> Int
n
      [(Int, String)]
_         -> forall a. HasCallStack => String -> a
error String
"Data.String.Interpolate.Util.readHex: no parse"

    readOct :: String -> Int
    readOct :: String -> Int
readOct String
xs = case forall a. (Eq a, Num a) => ReadS a
N.readOct String
xs of
      [(Int
n, String
"")] -> Int
n
      [(Int, String)]
_         -> forall a. HasCallStack => String -> a
error String
"Data.String.Interpolate.Util.readHex: no parse"