{-# LANGUAGE TemplateHaskell #-} {-# LANGUAGE DeriveDataTypeable #-} {-# LANGUAGE GeneralizedNewtypeDeriving #-} {-# LANGUAGE CPP #-} -- | General parsers, functions and datatypes for all Shakespeare languages. module Text.Shakespeare.Base ( Deref (..) , Ident (..) , Scope , parseDeref , parseHash , parseVar , parseVarString , parseAt , parseUrl , parseUrlString , parseCaret , parseUnder , parseInt , parseIntString , derefToExp , flattenDeref , readUtf8File ) where import Language.Haskell.TH.Syntax import Language.Haskell.TH (appE) import Data.Char (isUpper, isSymbol, isPunctuation, isAscii) import Text.ParserCombinators.Parsec import Text.Parsec.Prim (Parsec) import Data.List (intercalate) import Data.Ratio (Ratio, numerator, denominator, (%)) import Data.Data (Data) import Data.Typeable (Typeable) import qualified Data.Text.Lazy as TL import qualified System.IO as SIO import qualified Data.Text.Lazy.IO as TIO import Control.Monad (when) newtype Ident = Ident String deriving (Show, Eq, Read, Data, Typeable, Ord) type Scope = [(Ident, Exp)] data Deref = DerefModulesIdent [String] Ident | DerefIdent Ident | DerefIntegral Integer | DerefRational Rational | DerefString String | DerefBranch Deref Deref | DerefList [Deref] | DerefTuple [Deref] deriving (Show, Eq, Read, Data, Typeable, Ord) instance Lift Ident where lift (Ident s) = [|Ident|] `appE` lift s instance Lift Deref where lift (DerefModulesIdent v s) = do dl <- [|DerefModulesIdent|] v' <- lift v s' <- lift s return $ dl `AppE` v' `AppE` s' lift (DerefIdent s) = do dl <- [|DerefIdent|] s' <- lift s return $ dl `AppE` s' lift (DerefBranch x y) = do x' <- lift x y' <- lift y db <- [|DerefBranch|] return $ db `AppE` x' `AppE` y' lift (DerefIntegral i) = [|DerefIntegral|] `appE` lift i lift (DerefRational r) = do n <- lift $ numerator r d <- lift $ denominator r per <- [|(%) :: Int -> Int -> Ratio Int|] dr <- [|DerefRational|] return $ dr `AppE` InfixE (Just n) per (Just d) lift (DerefString s) = [|DerefString|] `appE` lift s lift (DerefList x) = [|DerefList $(lift x)|] lift (DerefTuple x) = [|DerefTuple $(lift x)|] derefParens, derefCurlyBrackets :: UserParser a Deref derefParens = between (char '(') (char ')') parseDeref derefCurlyBrackets = between (char '{') (char '}') parseDeref derefList, derefTuple :: UserParser a Deref derefList = between (char '[') (char ']') (fmap DerefList $ sepBy parseDeref (char ',')) derefTuple = try $ do _ <- char '(' x <- sepBy1 parseDeref (char ',') when (length x < 2) $ pzero _ <- char ')' return $ DerefTuple x parseDeref :: UserParser a Deref parseDeref = skipMany (oneOf " \t") >> (derefList <|> derefTuple <|> (do x <- derefSingle (derefInfix x) <|> (do res <- deref' $ (:) x skipMany $ oneOf " \t" return res))) where delim = (many1 (char ' ') >> return()) <|> lookAhead (oneOf "(\"" >> return ()) derefOp = try $ do _ <- char '(' x <- many1 $ noneOf " \t\n\r()" _ <- char ')' return $ DerefIdent $ Ident x -- See: http://www.haskell.org/onlinereport/haskell2010/haskellch2.html#x7-160002.2 isOperatorChar c | isAscii c = c `elem` "!#$%&*+./<=>?@\\^|-~:" | otherwise = isSymbol c || isPunctuation c derefInfix x = try $ do _ <- delim xs <- many $ try $ derefSingle >>= \x' -> delim >> return x' op <- many1 (satisfy isOperatorChar) "operator" -- special handling for $, which we don't deal with when (op == "$") $ fail "don't handle $" let op' = DerefIdent $ Ident op ys <- many1 $ delim >> derefSingle return $ DerefBranch (DerefBranch op' $ foldl1 DerefBranch $ x : xs) (foldl1 DerefBranch ys) derefSingle = derefTuple <|> derefList <|> derefOp <|> derefParens <|> numeric <|> strLit<|> ident deref' lhs = dollar <|> derefSingle' <|> return (foldl1 DerefBranch $ lhs []) where dollar = do _ <- try $ delim >> char '$' rhs <- parseDeref let lhs' = foldl1 DerefBranch $ lhs [] return $ DerefBranch lhs' rhs derefSingle' = do x <- try $ delim >> derefSingle deref' $ lhs . (:) x numeric = do n <- (char '-' >> return "-") <|> return "" x <- many1 digit y <- (char '.' >> fmap Just (many1 digit)) <|> return Nothing return $ case y of Nothing -> DerefIntegral $ read' "Integral" $ n ++ x Just z -> DerefRational $ toRational (read' "Rational" $ n ++ x ++ '.' : z :: Double) strLit = do _ <- char '"' chars <- many quotedChar _ <- char '"' return $ DerefString chars quotedChar = (char '\\' >> escapedChar) <|> noneOf "\"" escapedChar = let cecs = [('n', '\n'), ('r', '\r'), ('b', '\b'), ('t', '\t') ,('\\', '\\'), ('"', '"'), ('\'', '\'')] in choice [ char c >> return ec | (c, ec) <- cecs] ident = do mods <- many modul func <- many1 (alphaNum <|> char '_' <|> char '\'') let func' = Ident func return $ if null mods then DerefIdent func' else DerefModulesIdent mods func' modul = try $ do c <- upper cs <- many (alphaNum <|> char '_') _ <- char '.' return $ c : cs read' :: Read a => String -> String -> a read' t s = case reads s of (x, _):_ -> x [] -> error $ t ++ " read failed: " ++ s expType :: Ident -> Name -> Exp expType (Ident (c:_)) = if isUpper c || c == ':' then ConE else VarE expType (Ident "") = error "Bad Ident" derefToExp :: Scope -> Deref -> Exp derefToExp s (DerefBranch x y) = derefToExp s x `AppE` derefToExp s y derefToExp _ (DerefModulesIdent mods i@(Ident s)) = expType i $ Name (mkOccName s) (NameQ $ mkModName $ intercalate "." mods) derefToExp scope (DerefIdent i@(Ident s)) = case lookup i scope of Just e -> e Nothing -> expType i $ mkName s derefToExp _ (DerefIntegral i) = LitE $ IntegerL i derefToExp _ (DerefRational r) = LitE $ RationalL r derefToExp _ (DerefString s) = LitE $ StringL s derefToExp s (DerefList ds) = ListE $ map (derefToExp s) ds derefToExp s (DerefTuple ds) = TupE $ map (derefToExp s) ds -- FIXME shouldn't we use something besides a list here? flattenDeref :: Deref -> Maybe [String] flattenDeref (DerefIdent (Ident x)) = Just [x] flattenDeref (DerefBranch (DerefIdent (Ident x)) y) = do y' <- flattenDeref y Just $ y' ++ [x] flattenDeref _ = Nothing parseHash :: UserParser a (Either String Deref) parseHash = parseVar '#' curlyBrackets :: UserParser a String curlyBrackets = do _<- char '{' var <- many1 $ noneOf "}" _<- char '}' return $ ('{':var) ++ "}" type UserParser a = Parsec String a parseVar :: Char -> UserParser a (Either String Deref) parseVar c = do _ <- char c (char '\\' >> return (Left [c])) <|> (do deref <- derefCurlyBrackets return $ Right deref) <|> (do -- Check for hash just before newline _ <- lookAhead (oneOf "\r\n" >> return ()) <|> eof return $ Left "" ) <|> return (Left [c]) parseAt :: UserParser a (Either String (Deref, Bool)) parseAt = parseUrl '@' '?' parseUrl :: Char -> Char -> UserParser a (Either String (Deref, Bool)) parseUrl c d = do _ <- char c (char '\\' >> return (Left [c])) <|> (do x <- (char d >> return True) <|> return False (do deref <- derefCurlyBrackets return $ Right (deref, x)) <|> return (Left $ if x then [c, d] else [c])) parseInterpolatedString :: Char -> UserParser a (Either String String) parseInterpolatedString c = do _ <- char c (char '\\' >> return (Left ['\\', c])) <|> (do bracketed <- curlyBrackets return $ Right (c:bracketed)) <|> return (Left [c]) parseVarString :: Char -> UserParser a (Either String String) parseVarString = parseInterpolatedString parseUrlString :: Char -> Char -> UserParser a (Either String String) parseUrlString c d = do _ <- char c (char '\\' >> return (Left [c, '\\'])) <|> (do ds <- (char d >> return [d]) <|> return [] (do bracketed <- curlyBrackets return $ Right (c:ds ++ bracketed)) <|> return (Left (c:ds))) parseIntString :: Char -> UserParser a (Either String String) parseIntString = parseInterpolatedString parseCaret :: UserParser a (Either String Deref) parseCaret = parseInt '^' parseInt :: Char -> UserParser a (Either String Deref) parseInt c = do _ <- char c (try $ char '\\' >> char '{' >> return (Left [c, '{'])) <|> (do deref <- derefCurlyBrackets return $ Right deref) <|> return (Left [c]) parseUnder :: UserParser a (Either String Deref) parseUnder = do _ <- char '_' (char '\\' >> return (Left "_")) <|> (do deref <- derefCurlyBrackets return $ Right deref) <|> return (Left "_") readUtf8File :: FilePath -> IO TL.Text readUtf8File fp = do h <- SIO.openFile fp SIO.ReadMode SIO.hSetEncoding h SIO.utf8_bom ret <- TIO.hGetContents h return $ #ifdef WINDOWS TL.filter ('\r'/=) ret #else ret #endif