{-# LANGUAGE BangPatterns, OverloadedStrings, ViewPatterns #-}
module Hpp.Tokens (Token(..), detok, isImportant, notImportant, importants,
trimUnimportant, detokenize, tokenize, newLine,
skipLiteral) where
import Control.Arrow (first, second)
import Data.Char (isAlphaNum, isDigit, isSpace, isOctDigit, isHexDigit, digitToInt)
import Data.Foldable (foldl')
import Data.Monoid ((<>))
import Data.String (IsString, fromString)
import Hpp.StringSig
data Token s = Important s
| Other s
deriving (Eq,Ord,Show)
instance Functor Token where
fmap f (Important s) = Important (f s)
fmap f (Other s) = Other (f s)
{-# INLINE fmap #-}
detok :: Token s -> s
detok (Important s) = s
detok (Other s) = s
{-# INLINE detok #-}
isImportant :: Token s -> Bool
isImportant (Important _) = True
isImportant _ = False
notImportant :: Token s -> Bool
notImportant (Other _) = True
notImportant _ = False
importants :: [Token s] -> [s]
importants = map detok . filter isImportant
trimUnimportant :: [Token s] -> [Token s]
trimUnimportant = aux id . dropWhile (not . isImportant)
where aux _ [] = []
aux acc (t@(Important _) : ts) = acc (t : aux id ts)
aux acc (t@(Other _) : ts) = aux (acc . (t:)) ts
newLine :: (Eq s, IsString s) => Token s -> Bool
newLine (Other s) = s == "\n"
newLine _ = False
maybeImp :: Stringy s => s -> [Token s]
maybeImp s = if isEmpty s then [] else [Important s]
digitsFromBase :: Stringy s => Int -> s -> s
digitsFromBase base = fromString . show . foldl' aux 0 . map digitToInt . toChars
where aux acc d = base * acc + d
escapeChar :: Stringy s => Char -> Maybe s
escapeChar = fmap fromString . flip lookup lut
where lut = map (second (show :: Int -> String))
[ ('a', 0x07), ('b', 0x08), ('f', 0x0C), ('n', 0x0A)
, ('r', 0x0D), ('t', 0x09), ('v', 0x0B), ('\\', 0x5C)
, ('\'', 0x27), ('"', 0x22), ('?', 0x3F) ]
data TokChar = TokSpace Char | TokQuote | TokDQuote
tokWords :: Stringy s => s -> [Token s]
tokWords s =
case sbreak aux s of
Nothing -> [Important s]
Just (TokSpace c, pre, pos) ->
case sbreak (predicateJust (not . isSpace)) pos of
Nothing -> maybeImp pre ++ [Other (cons c pos)]
Just (c', spaces, pos') ->
maybeImp pre ++
Other (cons c spaces) : tokWords (cons c' pos')
Just (TokQuote, pre, pos) ->
let pre' = snoc pre '\''
in case pos of
'\\' :. cs ->
case sbreak (boolJust . (== '\'')) cs of
Nothing -> [Important (pre' <> pos)]
Just (_,esc,pos') ->
let esc' = if sall isOctDigit esc
then Important (digitsFromBase 8 esc)
else case esc of
'x' :. hs
| sall isHexDigit hs ->
Important (digitsFromBase 16 hs)
(escapeChar -> Just e) :. Nil -> Important e
_ -> Important ("'\\" <> snoc esc '\'')
in maybeImp pre ++ esc' : tokWords pos'
c:.('\'':.cs) -> maybeImp pre
++ Important (fromString ['\'', c, '\''])
: tokWords cs
_:._ -> let oops = snoc pre '\''
in case tokWords pos of
(Important t:ts) -> Important (oops<>t) : ts
ts -> Important oops : ts
_ -> [Important (snoc pre '\'')]
Just (TokDQuote, pre, pos) ->
let (lit,pos') = skipLiteral pos
in (if isEmpty pre then [] else [Important pre])
++ Important (cons '"' lit) : tokWords pos'
where aux c | isSpace c = Just (TokSpace c)
| c == '\'' = Just TokQuote
| c == '"' = Just TokDQuote
| otherwise = Nothing
{-# INLINE aux #-}
{-# INLINABLE tokWords #-}
data LitStringChar = DBackSlash | EscapedDQuote | DQuote
skipLiteral :: Stringy s => s -> (s,s)
skipLiteral s =
case breakOn [("\\\\", DBackSlash), ("\\\"", EscapedDQuote), ("\"", DQuote)] s of
Nothing -> (s, mempty)
Just (DBackSlash, pre, pos) -> first ((pre <> "\\\\") <>) (skipLiteral pos)
Just (EscapedDQuote, pre, pos) -> first ((pre <> "\\\"") <>) (skipLiteral pos)
Just (DQuote, pre, pos) -> (snoc pre '"', pos)
{-# INLINABLE skipLiteral #-}
splits :: Stringy s => (Char -> Bool) -> s -> [s]
splits isDelim = filter (not . isEmpty) . go . sdropWhile isSpace
where go s = case sbreak (\c -> if isDelim c then Just c else Nothing) s of
Nothing -> [s]
Just (d, pre, pos) ->
pre : fromString [d] : go (sdropWhile isSpace pos)
{-# INLINE splits #-}
validIdentifierChar :: Char -> Bool
validIdentifierChar c = isAlphaNum c || c == '_' || c == '\''
fixExponents :: Stringy s => [Token s] -> [Token s]
fixExponents [] = []
fixExponents (t1'@(Important t1) : ts@(Important t2 : Important t3 : ts')) =
case (,,,) <$> uncons t1 <*> unsnoc t1 <*> uncons t2 <*> uncons t3 of
Just !(!(!d1,_), !(_,!e), !(!c,!cs), !(!d2,_))
| elem c ("-+" :: [Char]) &&
isEmpty cs && isDigit d1 && isAlphaNum d2 &&
elem e ("eE" :: [Char]) -> let t = t1 <> t2 <> t3
in t `seq` Important t : fixExponents ts'
_ -> t1' : fixExponents ts
fixExponents (t:ts) = t : fixExponents ts
{-# INLINABLE fixExponents #-}
tokenize :: Stringy s => s -> [Token s]
tokenize = fixExponents . foldMap seps . tokWords
where seps t@(Other _) = [t]
seps t@(Important s) =
case uncons s of
Nothing -> []
Just (c,_)
| c == '"' -> [t]
| c == '\'' -> [t]
| otherwise -> map Important (splits (not . validIdentifierChar) s)
{-# INLINABLE tokenize #-}
detokenize :: Monoid s => [Token s] -> s
detokenize = foldMap detok
{-# INLINE detokenize #-}