module Text.Parser.Token
(
whiteSpace
, token
, charLiteral
, stringLiteral
, natural
, integer
, double
, naturalOrDouble
, symbol
, symbolic
, parens
, braces
, angles
, brackets
, comma
, colon
, dot
, semiSep
, semiSep1
, commaSep
, commaSep1
, TokenParsing(..)
, Unspaced(..)
, Unhighlighted(..)
, decimal
, hexadecimal
, octal
, characterChar
, integer'
, IdentifierStyle(..)
, liftIdentifierStyle
, ident
, reserve
) where
import Control.Applicative
import Control.Monad (MonadPlus(..), when)
import Control.Monad.Trans.Class
import Control.Monad.Trans.State.Lazy as Lazy
import Control.Monad.Trans.State.Strict as Strict
import Control.Monad.Trans.Writer.Lazy as Lazy
import Control.Monad.Trans.Writer.Strict as Strict
import Control.Monad.Trans.RWS.Lazy as Lazy
import Control.Monad.Trans.RWS.Strict as Strict
import Control.Monad.Trans.Reader
import Control.Monad.Trans.Identity
import Data.Char
import qualified Data.HashSet as HashSet
import Data.HashSet (HashSet)
import Data.List (foldl')
import Data.Monoid
import Text.Parser.Char
import Text.Parser.Combinators
import Text.Parser.Token.Highlight
whiteSpace :: TokenParsing m => m ()
whiteSpace = someSpace <|> return ()
token :: TokenParsing m => m a -> m a
token p = p <* whiteSpace
charLiteral :: TokenParsing m => m Char
charLiteral = token (highlight CharLiteral lit) where
lit = between (char '\'') (char '\'' <?> "end of character") characterChar
<?> "character"
stringLiteral :: TokenParsing m => m String
stringLiteral = token (highlight StringLiteral lit) where
lit = Prelude.foldr (maybe id (:)) ""
<$> between (char '"') (char '"' <?> "end of string") (many stringChar)
<?> "string"
stringChar = Just <$> stringLetter
<|> stringEscape
<?> "string character"
stringLetter = satisfy (\c -> (c /= '"') && (c /= '\\') && (c > '\026'))
stringEscape = highlight EscapeCode $ char '\\' *> esc where
esc = Nothing <$ escapeGap
<|> Nothing <$ escapeEmpty
<|> Just <$> escapeCode
escapeEmpty = char '&'
escapeGap = do skipSome space
char '\\' <?> "end of string gap"
natural :: TokenParsing m => m Integer
natural = token natural'
integer :: TokenParsing m => m Integer
integer = token (token (highlight Operator sgn <*> natural')) <?> "integer"
where
sgn = negate <$ char '-'
<|> id <$ char '+'
<|> pure id
double :: TokenParsing m => m Double
double = token (highlight Number floating <?> "double")
naturalOrDouble :: TokenParsing m => m (Either Integer Double)
naturalOrDouble = token (highlight Number natDouble <?> "number")
symbol :: TokenParsing m => String -> m String
symbol name = token (highlight Symbol (string name))
symbolic :: TokenParsing m => Char -> m Char
symbolic name = token (highlight Symbol (char name))
parens :: TokenParsing m => m a -> m a
parens = nesting . between (symbolic '(') (symbolic ')')
braces :: TokenParsing m => m a -> m a
braces = nesting . between (symbolic '{') (symbolic '}')
angles :: TokenParsing m => m a -> m a
angles = nesting . between (symbolic '<') (symbolic '>')
brackets :: TokenParsing m => m a -> m a
brackets = nesting . between (symbolic '[') (symbolic ']')
comma :: TokenParsing m => m Char
comma = symbolic ','
colon :: TokenParsing m => m Char
colon = symbolic ':'
dot :: TokenParsing m => m Char
dot = symbolic '.'
semiSep :: TokenParsing m => m a -> m [a]
semiSep p = sepBy p semi
semiSep1 :: TokenParsing m => m a -> m [a]
semiSep1 p = sepBy1 p semi
commaSep :: TokenParsing m => m a -> m [a]
commaSep p = sepBy p comma
commaSep1 :: TokenParsing m => m a -> m [a]
commaSep1 p = sepBy p comma
class CharParsing m => TokenParsing m where
someSpace :: m ()
someSpace = skipSome (satisfy isSpace)
nesting :: m a -> m a
nesting = id
semi :: m Char
semi = (satisfy (';'==) <?> ";") <* (someSpace <|> pure ())
highlight :: Highlight -> m a -> m a
highlight _ a = a
instance TokenParsing m => TokenParsing (Lazy.StateT s m) where
nesting (Lazy.StateT m) = Lazy.StateT $ nesting . m
someSpace = lift someSpace
semi = lift semi
highlight h (Lazy.StateT m) = Lazy.StateT $ highlight h . m
instance TokenParsing m => TokenParsing (Strict.StateT s m) where
nesting (Strict.StateT m) = Strict.StateT $ nesting . m
someSpace = lift someSpace
semi = lift semi
highlight h (Strict.StateT m) = Strict.StateT $ highlight h . m
instance TokenParsing m => TokenParsing (ReaderT e m) where
nesting (ReaderT m) = ReaderT $ nesting . m
someSpace = lift someSpace
semi = lift semi
highlight h (ReaderT m) = ReaderT $ highlight h . m
instance (TokenParsing m, Monoid w) => TokenParsing (Strict.WriterT w m) where
nesting (Strict.WriterT m) = Strict.WriterT $ nesting m
someSpace = lift someSpace
semi = lift semi
highlight h (Strict.WriterT m) = Strict.WriterT $ highlight h m
instance (TokenParsing m, Monoid w) => TokenParsing (Lazy.WriterT w m) where
nesting (Lazy.WriterT m) = Lazy.WriterT $ nesting m
someSpace = lift someSpace
semi = lift semi
highlight h (Lazy.WriterT m) = Lazy.WriterT $ highlight h m
instance (TokenParsing m, Monoid w) => TokenParsing (Lazy.RWST r w s m) where
nesting (Lazy.RWST m) = Lazy.RWST $ \r s -> nesting (m r s)
someSpace = lift someSpace
semi = lift semi
highlight h (Lazy.RWST m) = Lazy.RWST $ \r s -> highlight h (m r s)
instance (TokenParsing m, Monoid w) => TokenParsing (Strict.RWST r w s m) where
nesting (Strict.RWST m) = Strict.RWST $ \r s -> nesting (m r s)
someSpace = lift someSpace
semi = lift semi
highlight h (Strict.RWST m) = Strict.RWST $ \r s -> highlight h (m r s)
instance TokenParsing m => TokenParsing (IdentityT m) where
nesting = IdentityT . nesting . runIdentityT
someSpace = lift someSpace
semi = lift semi
highlight h = IdentityT . highlight h . runIdentityT
data IdentifierStyle m = IdentifierStyle
{ styleName :: String
, styleStart :: m Char
, styleLetter :: m Char
, styleReserved :: HashSet String
, styleHighlight :: Highlight
, styleReservedHighlight :: Highlight
}
liftIdentifierStyle :: (MonadTrans t, Monad m) => IdentifierStyle m -> IdentifierStyle (t m)
liftIdentifierStyle s =
s { styleStart = lift (styleStart s)
, styleLetter = lift (styleLetter s)
}
reserve :: TokenParsing m => IdentifierStyle m -> String -> m ()
reserve s name = token $ try $ do
_ <- highlight (styleReservedHighlight s) $ string name
notFollowedBy (styleLetter s) <?> "end of " ++ show name
ident :: TokenParsing m => IdentifierStyle m -> m String
ident s = token $ try $ do
name <- highlight (styleHighlight s)
((:) <$> styleStart s <*> many (styleLetter s) <?> styleName s)
when (HashSet.member name (styleReserved s)) $ unexpected $ "reserved " ++ styleName s ++ " " ++ show name
return name
characterChar :: TokenParsing m => m Char
charEscape, charLetter :: TokenParsing m => m Char
characterChar = charLetter <|> charEscape <?> "literal character"
charEscape = highlight EscapeCode $ char '\\' *> escapeCode
charLetter = satisfy (\c -> (c /= '\'') && (c /= '\\') && (c > '\026'))
escapeCode :: TokenParsing m => m Char
escapeCode = (charEsc <|> charNum <|> charAscii <|> charControl) <?> "escape code"
where
charControl = (\c -> toEnum (fromEnum c fromEnum 'A')) <$> (char '^' *> upper)
charNum = toEnum . fromInteger <$> num where
num = decimal
<|> (char 'o' *> number 8 octDigit)
<|> (char 'x' *> number 16 hexDigit)
charEsc = choice $ parseEsc <$> escMap
parseEsc (c,code) = code <$ char c
escMap = zip ("abfnrtv\\\"\'") ("\a\b\f\n\r\t\v\\\"\'")
charAscii = choice $ parseAscii <$> asciiMap
parseAscii (asc,code) = try $ code <$ string asc
asciiMap = zip (ascii3codes ++ ascii2codes) (ascii3 ++ ascii2)
ascii2codes, ascii3codes :: [String]
ascii2codes = [ "BS","HT","LF","VT","FF","CR","SO"
, "SI","EM","FS","GS","RS","US","SP"]
ascii3codes = ["NUL","SOH","STX","ETX","EOT","ENQ","ACK"
,"BEL","DLE","DC1","DC2","DC3","DC4","NAK"
,"SYN","ETB","CAN","SUB","ESC","DEL"]
ascii2, ascii3 :: [Char]
ascii2 = ['\BS','\HT','\LF','\VT','\FF','\CR','\SO','\SI'
,'\EM','\FS','\GS','\RS','\US','\SP']
ascii3 = ['\NUL','\SOH','\STX','\ETX','\EOT','\ENQ','\ACK'
,'\BEL','\DLE','\DC1','\DC2','\DC3','\DC4','\NAK'
,'\SYN','\ETB','\CAN','\SUB','\ESC','\DEL']
natural' :: TokenParsing m => m Integer
natural' = highlight Number nat <?> "natural"
number :: TokenParsing m => Integer -> m Char -> m Integer
number base baseDigit =
foldl' (\x d -> base*x + toInteger (digitToInt d)) 0 <$> some baseDigit
integer' :: TokenParsing m => m Integer
integer' = int <?> "integer"
sign :: TokenParsing m => m (Integer -> Integer)
sign = highlight Operator
$ negate <$ char '-'
<|> id <$ char '+'
<|> pure id
int :: TokenParsing m => m Integer
int = sign <*> highlight Number nat
nat, zeroNumber :: TokenParsing m => m Integer
nat = zeroNumber <|> decimal
zeroNumber = char '0' *> (hexadecimal <|> octal <|> decimal <|> return 0) <?> ""
floating :: TokenParsing m => m Double
floating = decimal >>= fractExponent
fractExponent :: TokenParsing m => Integer -> m Double
fractExponent n = (\fract expo -> (fromInteger n + fract) * expo) <$> fraction <*> option 1.0 exponent'
<|> (fromInteger n *) <$> exponent' where
fraction = Prelude.foldr op 0.0 <$> (char '.' *> (some digit <?> "fraction"))
op d f = (f + fromIntegral (digitToInt d))/10.0
exponent' = do
_ <- oneOf "eE"
f <- sign
e <- decimal <?> "exponent"
return (power (f e))
<?> "exponent"
power e
| e < 0 = 1.0/power(e)
| otherwise = fromInteger (10^e)
natDouble, zeroNumFloat, decimalFloat :: TokenParsing m => m (Either Integer Double)
natDouble
= char '0' *> zeroNumFloat
<|> decimalFloat
zeroNumFloat
= Left <$> (hexadecimal <|> octal)
<|> decimalFloat
<|> fractFloat 0
<|> return (Left 0)
decimalFloat = do
n <- decimal
option (Left n) (fractFloat n)
fractFloat :: TokenParsing m => Integer -> m (Either Integer Double)
fractFloat n = Right <$> fractExponent n
decimal :: TokenParsing m => m Integer
decimal = number 10 digit
hexadecimal :: TokenParsing m => m Integer
hexadecimal = oneOf "xX" *> number 16 hexDigit
octal :: TokenParsing m => m Integer
octal = oneOf "oO" *> number 8 octDigit
newtype Unhighlighted m a = Unhighlighted { runUnhighlighted :: m a }
deriving (Functor,Applicative,Alternative,Monad,MonadPlus,Parsing,CharParsing)
instance MonadTrans Unhighlighted where
lift = Unhighlighted
instance TokenParsing m => TokenParsing (Unhighlighted m) where
nesting (Unhighlighted m) = Unhighlighted (nesting m)
someSpace = lift someSpace
semi = lift semi
highlight _ m = m
newtype Unspaced m a = Unspaced { runUnspaced :: m a }
deriving (Functor,Applicative,Alternative,Monad,MonadPlus,Parsing,CharParsing)
instance MonadTrans Unspaced where
lift = Unspaced
instance TokenParsing m => TokenParsing (Unspaced m) where
nesting (Unspaced m) = Unspaced (nesting m)
someSpace = empty
semi = lift semi
highlight h (Unspaced m) = Unspaced (highlight h m)