-- | -- Module : Data.String.Interpolate.Parse -- Copyright : (c) William Yao, 2019-2020 -- License : BSD-3 -- Maintainer : williamyaoh@gmail.com -- Stability : experimental -- Portability : POSIX -- -- YOU SHOULD NOT USE THIS MODULE. -- -- This is exported mainly so tests can introspect on the implementation. {-# LANGUAGE PackageImports #-} module Data.String.Interpolate.Parse ( InterpSegment(..), parseInterpSegments, dosToUnix ) where import Data.Char import qualified "base" Numeric as N data InterpSegment = Expression String | Verbatim String | Newline | Spaces Int | Tabs Int deriving (Eq, Show) -- | -- Given the raw input from a quasiquote, parse it into the information -- we need to output the actual expression. -- -- Returns an error message if parsing fails. parseInterpSegments :: String -> Either String [InterpSegment] parseInterpSegments = switch -- Given how complicated this is getting, it might be worth switching -- to megaparsec instead of hand-rolling this. where switch :: String -> Either String [InterpSegment] switch "" = pure [] switch ('#':'{':rest) = expr rest switch ('#':rest) = verbatim "#" rest switch ('\n':rest) = newline rest -- CRLF handled by `dosToUnix' switch (' ':rest) = spaces 1 rest switch ('\t':rest) = tabs 1 rest switch other = verbatim "" other verbatim :: String -> String -> Either String [InterpSegment] verbatim acc parsee = case parsee of "" -> ((Verbatim . reverse) acc :) <$> switch parsee (c:_) | c `elem` ['#', ' ', '\t', '\n'] -> ((Verbatim . reverse) acc :) <$> switch parsee ('\\':'#':rest) -> verbatim ('#':acc) rest ('\\':_) -> case unescapeChar parsee of (Nothing, rest) -> verbatim acc rest (Just c, rest) -> verbatim (c:acc) rest c:cs -> verbatim (c:acc) cs expr :: String -> Either String [InterpSegment] expr parsee = case span (/= '}') parsee of (_, "") -> Left "unterminated #{...} interpolation" (expr, _:rest) -> (Expression expr :) <$> switch rest newline :: String -> Either String [InterpSegment] newline parsee = (Newline :) <$> switch parsee spaces :: Int -> String -> Either String [InterpSegment] spaces n (' ':rest) = spaces (n+1) rest spaces n other = (Spaces n :) <$> switch other tabs :: Int -> String -> Either String [InterpSegment] tabs n ('\t':rest) = tabs (n+1) rest tabs n other = (Tabs n :) <$> switch other dosToUnix :: String -> String dosToUnix = go where go xs = case xs of '\r' : '\n' : ys -> '\n' : go ys y : ys -> y : go ys [] -> [] -- | -- Haskell 2010 character unescaping, see: -- -- -- Unescape the very first backslashed character of the string, if it results in -- a character. Note that there is an escape sequence that doesn't result in -- a character (\&). unescapeChar :: String -> (Maybe Char, String) unescapeChar input = case input of "" -> (Nothing, input) '\\' : 'x' : x : xs | isHexDigit x -> case span isHexDigit xs of (ys, zs) -> ((Just . chr . readHex $ x:ys), zs) '\\' : 'o' : x : xs | isOctDigit x -> case span isOctDigit xs of (ys, zs) -> ((Just . chr . readOct $ x:ys), zs) '\\' : x : xs | isDigit x -> case span isDigit xs of (ys, zs) -> ((Just . chr . read $ x:ys), zs) '\\' : input_ -> case input_ of '\\' : xs -> (Just ('\\'), xs) 'a' : xs -> (Just ('\a'), xs) 'b' : xs -> (Just ('\b'), xs) 'f' : xs -> (Just ('\f'), xs) 'n' : xs -> (Just ('\n'), xs) 'r' : xs -> (Just ('\r'), xs) 't' : xs -> (Just ('\t'), xs) 'v' : xs -> (Just ('\v'), xs) '&' : xs -> (Nothing, xs) 'N':'U':'L' : xs -> (Just ('\NUL'), xs) 'S':'O':'H' : xs -> (Just ('\SOH'), xs) 'S':'T':'X' : xs -> (Just ('\STX'), xs) 'E':'T':'X' : xs -> (Just ('\ETX'), xs) 'E':'O':'T' : xs -> (Just ('\EOT'), xs) 'E':'N':'Q' : xs -> (Just ('\ENQ'), xs) 'A':'C':'K' : xs -> (Just ('\ACK'), xs) 'B':'E':'L' : xs -> (Just ('\BEL'), xs) 'B':'S' : xs -> (Just ('\BS'), xs) 'H':'T' : xs -> (Just ('\HT'), xs) 'L':'F' : xs -> (Just ('\LF'), xs) 'V':'T' : xs -> (Just ('\VT'), xs) 'F':'F' : xs -> (Just ('\FF'), xs) 'C':'R' : xs -> (Just ('\CR'), xs) 'S':'O' : xs -> (Just ('\SO'), xs) 'S':'I' : xs -> (Just ('\SI'), xs) 'D':'L':'E' : xs -> (Just ('\DLE'), xs) 'D':'C':'1' : xs -> (Just ('\DC1'), xs) 'D':'C':'2' : xs -> (Just ('\DC2'), xs) 'D':'C':'3' : xs -> (Just ('\DC3'), xs) 'D':'C':'4' : xs -> (Just ('\DC4'), xs) 'N':'A':'K' : xs -> (Just ('\NAK'), xs) 'S':'Y':'N' : xs -> (Just ('\SYN'), xs) 'E':'T':'B' : xs -> (Just ('\ETB'), xs) 'C':'A':'N' : xs -> (Just ('\CAN'), xs) 'E':'M' : xs -> (Just ('\EM'), xs) 'S':'U':'B' : xs -> (Just ('\SUB'), xs) 'E':'S':'C' : xs -> (Just ('\ESC'), xs) 'F':'S' : xs -> (Just ('\FS'), xs) 'G':'S' : xs -> (Just ('\GS'), xs) 'R':'S' : xs -> (Just ('\RS'), xs) 'U':'S' : xs -> (Just ('\US'), xs) 'S':'P' : xs -> (Just ('\SP'), xs) 'D':'E':'L' : xs -> (Just ('\DEL'), xs) '^':'@' : xs -> (Just ('\^@'), xs) '^':'A' : xs -> (Just ('\^A'), xs) '^':'B' : xs -> (Just ('\^B'), xs) '^':'C' : xs -> (Just ('\^C'), xs) '^':'D' : xs -> (Just ('\^D'), xs) '^':'E' : xs -> (Just ('\^E'), xs) '^':'F' : xs -> (Just ('\^F'), xs) '^':'G' : xs -> (Just ('\^G'), xs) '^':'H' : xs -> (Just ('\^H'), xs) '^':'I' : xs -> (Just ('\^I'), xs) '^':'J' : xs -> (Just ('\^J'), xs) '^':'K' : xs -> (Just ('\^K'), xs) '^':'L' : xs -> (Just ('\^L'), xs) '^':'M' : xs -> (Just ('\^M'), xs) '^':'N' : xs -> (Just ('\^N'), xs) '^':'O' : xs -> (Just ('\^O'), xs) '^':'P' : xs -> (Just ('\^P'), xs) '^':'Q' : xs -> (Just ('\^Q'), xs) '^':'R' : xs -> (Just ('\^R'), xs) '^':'S' : xs -> (Just ('\^S'), xs) '^':'T' : xs -> (Just ('\^T'), xs) '^':'U' : xs -> (Just ('\^U'), xs) '^':'V' : xs -> (Just ('\^V'), xs) '^':'W' : xs -> (Just ('\^W'), xs) '^':'X' : xs -> (Just ('\^X'), xs) '^':'Y' : xs -> (Just ('\^Y'), xs) '^':'Z' : xs -> (Just ('\^Z'), xs) '^':'[' : xs -> (Just ('\^['), xs) '^':'\\' : xs -> (Just ('\^\'), xs) '^':']' : xs -> (Just ('\^]'), xs) '^':'^' : xs -> (Just ('\^^'), xs) '^':'_' : xs -> (Just ('\^_'), xs) xs -> (Nothing, xs) x:xs -> (Just x, xs) where readHex :: String -> Int readHex xs = case N.readHex xs of [(n, "")] -> n _ -> error "Data.String.Interpolate.Util.readHex: no parse" readOct :: String -> Int readOct xs = case N.readOct xs of [(n, "")] -> n _ -> error "Data.String.Interpolate.Util.readHex: no parse"