----------------------------------------------------------------------------- -- | -- Module : Lentil.Parse.Source -- Copyright : © 2015 Francesco Ariis -- License : GPLv3 (see the LICENSE file) -- -- Comments from source files ----------------------------------------------------------------------------- module Lentil.Parse.Source where import Text.Parsec hiding (Line) import qualified Data.Char as C import qualified System.Directory as D import qualified System.IO as I import qualified System.FilePath as SF import Control.Applicative hiding (many, (<|>)) ----------- -- TYPES -- ----------- -- our parsers goal is simple: from a source file, only the comments and -- newlines type ParSource a = Parsec String () a data ParSyntax = ParSyntax { psLineComms :: [String], psBlockComms :: [(String, String)], psStringLits :: [Char], psCharLits :: [Char] } deriving (Show) ------------ -- BLOCKS -- ------------ -- TODO: ++ "\n" is inefficient [lint] [refactor] lineComment :: String -> ParSource String lineComment s = string s *> fmap (++ "\n") (manyTill anyChar newline) "line comment" blockComment :: (String, String) -> ParSource String blockComment (i, e) = string i *> manyTill anyChar (try $ string e) "block comment" -- quoted strings, escaped by \, by mauke^ litString :: Char -> ParSource String litString ic = q *> many ((char '\\' *> anyChar) <|> noneOf no) <* q "codestring" where q = char ic no = C.showLitChar ic "\\" -- quoted single character litChar :: Char -> ParSource Char litChar ic = q *> ((char '\\' *> anyChar) <|> anyChar) <* q "char string sign" where q = char ic -- TODO: fallo diventare string così non impazzisci nelle altre parti -- [refactor] -- a program is instructions to the computer. Ends when you meet -- a well formed element from above (linecomm, blockcom, stringlit, -- charLit) program :: ParSyntax -> ParSource String program ps = manyTill1 anyChar (endp <|> ("" <$ eof)) "program" where -- endp :: [ParSource String] endp = lookAhead $ choice $ map try $ map string posts ++ [lchars] -- every unambiguous symbol (init comments, listring char) posts = psLineComms ps ++ map fst (psBlockComms ps) ++ (map (:[]) $ psStringLits ps) -- ambiguous symbols (pall'aaa'foo is valid haskell identifier, -- so it should stay here lchars = "" <$ choice (map litChar (psCharLits ps)) -- TODO: lint program parser [lint] -- occhio che il primo carattere non è controllato da ED -- TODO: manyTill1 broken? occhio che al primon on chiama ed! [test] manyTill1 :: ParSource a -> ParSource b -> ParSource [a] manyTill1 p ed = (:) <$> p <*> manyTill p ed ------------ -- SOURCE -- ------------ -- given a set of lineparsers / blockparsers source :: ParSyntax -> ParSource String source ps@(ParSyntax lc bc sl cl) = choice (map (try . lineComment) lc) -- line comment <|> choice (map (try . blockComment) bc) -- block comment <|> (choice (map (onlynl . try . litString) sl)) <|> ("" <$ (choice (map (try . litChar) cl))) -- todo ugly parens [lint] [refactor] <|> (onlynl $ program ps) "source file" where onlynl a = fmap (filter (== '\n')) a sources :: ParSyntax -> ParSource String sources ps = fmap concat $ many1 (source ps) -------------- -- SYNTAXES -- -------------- -- TODO: add more langparsers [feature:intermediate] -- TODO: fallo che sia estensibile e leggibile -- a compilazione [design] [u:3] langParser :: String -> Maybe (ParSource String) langParser fp | ext `elem` [".hs", ".lhs"] = Just haskell | ext `elem` [".c", ".h"] = Just c | ext `elem` [".cpp", ".hpp"] = Just c -- C++ | ext `elem` [".java"] = Just c -- Java | ext `elem` [".js"] = Just javascript | ext `elem` [".py"] = Just python | ext `elem` [".rb"] = Just ruby | ext `elem` [".pas", ".pp", ".inc"] = Just pascal | ext `elem` [".txt"] = Just text | otherwise = Nothing where ext = SF.takeExtension fp haskell, c, pascal, text :: ParSource String haskell = sources $ ParSyntax ["--"] [("{-", "-}")] ['"'] ['\''] c = sources $ ParSyntax ["//"] [("/*", "*/")] ['"'] ['\''] javascript = sources $ ParSyntax ["//"] [("/*", "*/")] ['"', '\''] [] pascal = sources $ ParSyntax ["//"] [("{", "}" ), ("(*", "*)")] ['\''] [] python = sources $ ParSyntax ["#"] [("\"\"\"", "\"\"\"")] ['"', '\''] [] ruby = sources $ ParSyntax ["#"] [("=begin", "=end")] ['"', '\''] [] text = many anyChar ------------- -- PARSERS -- ------------- commentParser :: FilePath -> IO (Maybe String) commentParser fp = case langParser fp of Nothing -> return Nothing Just p -> fileParser fp p >>= return . Just . ('\n':) -- TODO: ugly hack to allow tODOs to be -- parseable even in first line. This is -- coupled with another hack in parser -- `issue` (another module). [refactor] -- [lint] [duct] -- errors to stderr fileParser :: FilePath -> ParSource String -> IO String fileParser fp p = rfe fp >>= \cs -> case runParSource p fp cs of Left l -> perr (fp ++ " : parse error " ++ show l) >> return [] Right r -> return r where rfe :: FilePath -> IO String rfe _ = D.doesFileExist fp >>= \fb -> if fb then readFile fp else perr (fp ++ " : no such file") >> return "" perr cs = I.hPutStrLn I.stderr cs runParSource :: ParSource a -> FilePath -> String -> Either ParseError a runParSource p fp cs = runParser p () fp cs