----------------------------------------------------------------------------- -- | -- Module : Lentil.Parse.Source -- Copyright : © 2015 Francesco Ariis -- License : GPLv3 (see the LICENSE file) -- -- Comments from source files ----------------------------------------------------------------------------- module Lentil.Parse.Source where import Lentil.Types import Lentil.Helpers import Text.Parsec hiding (Line) import qualified Data.Char as C import qualified Data.Maybe as M import qualified Data.List as L import Control.Applicative hiding (many, (<|>)) import Prelude -- 7.8 hack ----------- -- TYPES -- ----------- -- from source to comment strings type ParSource a = Parsec String () a data ParSyntax = ParSyntax { psLineComms :: [String], psBlockComms :: [(String, String)], psStringStyle :: EscapeStyle, psStringLits :: [Char], psCharStyle :: CharStyle, psCharLits :: [Char] } deriving (Show) data EscapeStyle = ClangLike | SQLLike deriving (Show, Eq) data CharStyle = CommonChr | ErlangChr deriving (Show, Eq) -------------- -- COMMENTS -- -------------- data CommentString = SingleLine Row String -- sans newline | MultiLine Row String deriving (Show, Eq) -- parses stuff like "-- comment" lineComment :: String -> ParSource CommentString lineComment s = SingleLine <$> (initId s *> getRow) <*> manyTill anyChar newline "line comment" where -- for 1-char line comments (#, ;, etc.), erases repeating initId :: String -> ParSource () initId [a] = () <$ many1 (char a) initId as = () <$ string as blockComment :: (String, String) -> ParSource CommentString blockComment (i, e) = MultiLine <$> (string i *> getRow) <*> manyTill anyChar (try $ string e) "block comment" getRow :: ParSource Row getRow = fmap sourceLine getPosition ------------- -- STRINGS -- ------------- litString :: EscapeStyle -> Char -> ParSource String litString ClangLike ic = cString ic litString SQLLike ic = sqlString ic -- string ancillaries -- -- quoted strings, escaped by \, by mauke^ cString :: Char -> ParSource String cString ic = q *> many ((char '\\' *> anyChar) <|> noneOf no) <* q "codestring" where q = char ic no = C.showLitChar ic "\\" -- sqllike string (no escape with \, '' to escape ') sqlString :: Char -> ParSource String sqlString ic = char ic *> manyTill anyChar (char ic) -- we treat 'cdscsad cdscdsa '' csdcs' as two strings -- because the content is meaningless to our program ----------- -- CHARS -- ----------- litChar :: CharStyle -> Char -> ParSource Char litChar CommonChr ic = commonChar ic litChar ErlangChr ic = erlangChar ic -- quoted single character commonChar :: Char -> ParSource Char commonChar ic = q *> ((char '\\' *> anyChar) <|> anyChar) <* q "char string sign" where q = char ic -- $a for 'a' (where ic = '$') erlangChar :: Char -> ParSource Char erlangChar ic = char ic *> anyChar ------------------ -- OTHER BLOCKS -- ------------------ -- a program is instructions to the computer. Ends when you meet -- a well formed element from above (linecomm, blockcom, stringlit, -- charLit) program :: ParSyntax -> ParSource String program ps = manyTill1 anyChar (endp <|> ("" <$ eof)) "program" where -- endp :: [ParSource String] endp = lookAhead . choice . map try $ map string posts ++ [lchars] -- every "post" symbol (init comments, listring char) posts = psLineComms ps ++ -- line map fst (psBlockComms ps) ++ -- block map (:[]) (psStringLits ps) -- lit string -- ambiguous symbols (pall'aaa'foo is valid haskell identifier, -- so it should stay here lchars = "" <$ choice (map (litChar (psCharStyle ps)) (psCharLits ps)) ------------ -- SOURCE -- ------------ -- given a set of lineparsers / blockparsers sourcePart :: ParSyntax -> ParSource (Maybe CommentString) sourcePart ps@(ParSyntax lc bc es sl cs cl) = choice [plc, pbc, psl, pcl, ppr] "source file part" where ct = choice . map try plc = Just <$> (ct . map lineComment $ lc) -- line comlm pbc = Just <$> (ct . map blockComment $ bc) -- block comm psl = Nothing <$ (choice . map (try . litString es) $ sl) -- str lit pcl = Nothing <$ (ct . map (litChar cs) $ cl) -- char lit ppr = Nothing <$ program ps -- program -- ps: syntax to sever comment from the rest source :: ParSyntax -> ParSource [CommentString] source ps = fmap M.catMaybes (many1 (sourcePart ps)) <|> ([] <$ eof) "source file" ------------- -- CONVERT -- ------------- type Comment = (Row, String) comms2Tuple :: [CommentString] -> [Comment] comms2Tuple [] = [] comms2Tuple (c:cs) | isSl c = let (a,z) = span isSl (c:cs) in groupLineComms a ++ comms2Tuple z | otherwise = comment2Tuple c : comms2Tuple cs where isSl (SingleLine _ _) = True isSl _ = False -- use it on Line comments **only**! groupLineComms :: [CommentString] -> [Comment] groupLineComms cs = let cs' = map comment2Tuple cs f (r, s) i = (r, s, r-i) zipped = zipWith f cs' (enumFrom 1) grouped = L.groupBy threeEq zipped in map (flatComm . map back) grouped where threeEq (_, _, x) (_, _, y) = x == y back (a, b, _) = (a, b) flatComm cs' = (fst . head $ cs', unlines . map snd $ cs') comment2Tuple :: CommentString -> Comment comment2Tuple (SingleLine r t) = (r, t) comment2Tuple (MultiLine r t) = (r, t)