----------------------------------------------------------------------------- -- | -- Module : Lentil.Parse.Syntaxes -- Copyright : © 2015 Francesco Ariis, Tomislav -- License : GPLv3 (see the LICENSE file) -- -- Languages descriptors ----------------------------------------------------------------------------- module Lentil.Parse.Syntaxes where import Lentil.Parse.Source import Lentil.Types import Text.Parsec import Control.Applicative hiding (many) import Prelude import qualified System.FilePath as SF import qualified Data.Char as C type MaybePar = Maybe (ParSource [CommentString]) -- todo [u:1] [request] qptain_nemo fake multiline comments in C -- (i.e. // and \ at the bottom of the line, continued into -- next line, are valid C comments but not recognised by lentil -- as langParser, with alias added langParserAlias :: [Alias] -> String -> MaybePar langParserAlias as fp = maybe (langParser fp) langParser (lookup ext as) where ext = map C.toLower (SF.takeExtension fp) extensionList :: [Alias] -> [String] extensionList as = concatMap fst languages ++ map fst as langParser :: String -> MaybePar langParser fp = lookupExt languages ext where ext = map C.toLower (SF.takeExtension fp) languages :: [([String], MaybePar)] languages = [ ([".hs", ".lhs", ".hsc", ".chs" ], Just haskell), ([".cabal"], Just haskell), -- cabal ([".elm"], Just haskell), -- Elm ([".c", ".h"], Just c), ([".cpp", ".hpp"], Just c), -- C++ ([".scala"], Just c), -- Scala ([".java"], Just c), -- Java ([".glsl"], Just c), -- GL Shader ([".xrl"], Just c), -- FLEX ([".js"], Just javascript), ([".ts"], Just javascript), -- TypeScript ([".pas", ".pp", ".inc"], Just pascal), ([".py"], Just python), ([".coffee"], Just python), -- CoffeeScript ([".rb"], Just ruby), ([".pl", ".pm", ".t"], Just perl), ([".sh"], Just perl), -- shell ([".nix"], Just nix), ([".xml", ".html"], Just xml), ([".erl", ".hrl", ".escript", ".yrl"], Just erlang), ([".ml", ".mli"], Just ocaml), ([".rs", ".rlib"], Just rust), ([".sml"], Just sml), ([".rst"], Just rst), -- reStructuredText ([".txt"], Just text) ] haskell, c, javascript, pascal, python, ruby, perl, nix, xml, erlang, ocaml, rust, sml, rst, text :: ParSource [CommentString] haskell = source $ StdSyntax ["--"] [("{-", "-}")] ClangLike ['"'] CommonChr ['\''] c = source $ StdSyntax ["//"] [("/*", "*/")] ClangLike ['"'] CommonChr ['\''] javascript = source $ StdSyntax ["//"] [("/*", "*/")] ClangLike ['"', '\''] CommonChr [] pascal = source $ StdSyntax ["//"] [("{", "}" ), ("(*", "*)")] SQLLike ['\''] CommonChr [] python = source $ StdSyntax ["#"] [("\"\"\"", "\"\"\"")] ClangLike ['"', '\''] CommonChr [] ruby = source $ StdSyntax ["#"] [("=begin", "=end")] ClangLike ['"', '\''] CommonChr [] perl = source $ StdSyntax ["#"] [] ClangLike ['"', '\''] CommonChr [] nix = source $ StdSyntax ["#"] [("/*", "*/")] ClangLike ['"'] CommonChr ['\''] xml = source $ StdSyntax [] [("")] ClangLike ['"', '\''] CommonChr [] erlang = source $ StdSyntax ["%"] [] ClangLike ['"'] ErlangChr ['$'] ocaml = source $ StdSyntax [] [("(*", "*)")] ClangLike ['"', '\''] CommonChr [] rust = source $ StdSyntax ["//!", "///", "//"] [] ClangLike ['"', '\''] CommonChr [] sml = source $ StdSyntax [] [("(*", "*)")] ClangLike ['"'] CommonChr [] rst = source RstSyntax -- todo [bug] [design] rst parser doesn't respect whitespace or -- paragraphs. How to implement this without breaking other -- parsers? text = (:[]) . MultiLine 1 <$> many anyChar -- ANCILLARIES -- lookupExt :: [([String], MaybePar)] -> String -> MaybePar lookupExt [] _ = Nothing lookupExt ((ss,p):ds) e | elem e ss = p | otherwise = lookupExt ds e