{-# LANGUAGE CPP #-}
{-# LANGUAGE RecordWildCards #-}
module Hasktags (
FileData,
generate,
findThings,
findThingsInBS,
Mode(..),
TagsFile(..),
Tags(..),
getOutFile,
dirToFiles
) where
import Control.Monad (when)
import Control.Arrow ((***))
import qualified Data.ByteString.Char8 as BS (ByteString, readFile, unpack)
import qualified Data.ByteString.UTF8 as BS8 (fromString)
import Data.Char (isSpace)
import Data.String (IsString(..))
import Data.List (isPrefixOf, isSuffixOf, groupBy,
tails, nub)
import Data.Maybe (maybeToList)
import DebugShow (trace_)
import System.Directory (doesDirectoryExist, doesFileExist,
getDirectoryContents,
getModificationTime,
canonicalizePath,
#if MIN_VERSION_directory(1,3,0)
pathIsSymbolicLink)
#else
isSymbolicLink)
#endif
import System.FilePath ((</>))
import System.IO (Handle, IOMode, hClose, openFile, stdout)
import Tags (FileData (..), FileName,
FoundThing (..),
FoundThingType (FTClass, FTCons, FTConsAccessor, FTConsGADT, FTData, FTDataGADT, FTFuncImpl, FTFuncTypeDef, FTInstance, FTModule, FTNewtype, FTPattern, FTPatternTypeDef, FTType),
Pos (..), Scope, mywords,
writectagsfile, writeetagsfile)
import Text.JSON.Generic (decodeJSON, encodeJSON)
getOutFile :: String -> IOMode -> IO Handle
getOutFile filepath openMode
| "-" == filepath = return stdout
| otherwise = openFile filepath openMode
data TagsFile = TagsFile
{ _ctagsFile :: FilePath
, _etagsFile :: FilePath
}
instance Show TagsFile where
show TagsFile{..} = "ctags: " ++ _ctagsFile ++ ", etags: " ++ _etagsFile
instance IsString TagsFile where
fromString s = TagsFile s s
data Tags =
Ctags
| Etags
| Both
deriving Show
data Mode = Mode
{ _tags :: Tags
, _extendedCtag :: Bool
, _appendTags :: IOMode
, _outputFile :: TagsFile
, _cacheData :: Bool
, _followSymlinks :: Bool
, _suffixes :: [String]
, _absoluteTagPaths :: Bool
} deriving Show
data Token = Token String Pos
| NewLine Int
deriving (Eq)
instance Show Token where
show (Token t (Pos _ _l _ _) ) = " " ++ t ++ " "
show (NewLine i) = "NewLine " ++ show i
tokenString :: Token -> String
tokenString (Token s _) = s
tokenString (NewLine _) = "\n"
isNewLine :: Maybe Int -> Token -> Bool
isNewLine Nothing (NewLine _) = True
isNewLine (Just c) (NewLine c') = c == c'
isNewLine _ _ = False
trimNewlines :: [Token] -> [Token]
trimNewlines = filter (not . isNewLine Nothing)
generate :: Mode -> [FilePath] -> IO ()
generate Mode{..} files = do
files_or_dirs <- if _absoluteTagPaths
then mapM canonicalizePath files
else return files
filenames <- (nub . concat) <$> mapM (dirToFiles _followSymlinks _suffixes) files_or_dirs
filedata <- mapM (findWithCache _cacheData) filenames
writeTags _tags filedata
where
writeTags Ctags filedata = writeFile' _ctagsFile (writectagsfile _extendedCtag filedata)
writeTags Etags filedata = writeFile' _etagsFile (writeetagsfile filedata)
writeTags Both filedata = writeTags Ctags filedata >> writeTags Etags filedata
writeFile' :: FilePath -> (Handle -> IO ()) -> IO ()
writeFile' name f = do
file <- getOutFile name _appendTags
f file
hClose file
TagsFile{..} = _outputFile
findWithCache :: Bool -> FileName -> IO FileData
findWithCache cache filename = do
cacheExists <- if cache then doesFileExist cacheFilename else return False
if cacheExists
then do fileModified <- getModificationTime filename
cacheModified <- getModificationTime cacheFilename
if cacheModified > fileModified
then do bytes <- BS.readFile cacheFilename
return (decodeJSON (BS.unpack bytes))
else findAndCache
else findAndCache
where cacheFilename = filenameToTagsName filename
filenameToTagsName = (++"tags") . reverse . dropWhile (/='.') . reverse
findAndCache = do
filedata <- findThings filename
when cache (writeFile cacheFilename (encodeJSON filedata))
return filedata
utf8_to_char8_hack :: String -> String
utf8_to_char8_hack = BS.unpack . BS8.fromString
findThings :: FileName -> IO FileData
findThings filename =
findThingsInBS filename <$> BS.readFile filename
findThingsInBS :: String -> BS.ByteString -> FileData
findThingsInBS filename bs = do
let aslines = lines $ BS.unpack bs
let stripNonHaskellLines = let
emptyLine = all (all isSpace . tokenString)
. filter (not . isNewLine Nothing)
cppLine (_nl:t:_) = ("#" `isPrefixOf`) $ tokenString t
cppLine _ = False
in filter (not . emptyLine) . filter (not . cppLine)
let debugStep m s = trace_ (m ++ " result") s s
let (isLiterate, slines) =
debugStep "fromLiterate"
$ fromLiterate filename
$ zip aslines [0..]
let
(fileLines, numbers)
= unzip slines
let tokenLines =
debugStep "stripNonHaskellLines" $ stripNonHaskellLines
$ debugStep "stripslcomments" $ stripslcomments
$ debugStep "splitByNL" $ splitByNL Nothing
$ debugStep "stripblockcomments pipe" $ stripblockcomments
$ concat
$ zipWith3 (withline filename)
(map
(filter (not . all isSpace) . mywords False)
fileLines)
fileLines
numbers
let topLevelIndent = debugStep "top level indent" $ getTopLevelIndent isLiterate tokenLines
let sections = map tail
$ filter (not . null)
$ splitByNL (Just topLevelIndent )
$ concat (trace_ "tokenLines" tokenLines tokenLines)
let filterAdjacentFuncImpl = map head . groupBy (\(FoundThing t1 n1 (Pos f1 _ _ _))
(FoundThing t2 n2 (Pos f2 _ _ _))
-> f1 == f2
&& n1 == n2
&& areFuncImpls t1 t2)
areFuncImpls (FTFuncImpl _) (FTFuncImpl _) = True
areFuncImpls _ _ = False
let iCI = map head . groupBy (\(FoundThing t1 n1 (Pos f1 l1 _ _))
(FoundThing t2 n2 (Pos f2 l2 _ _))
-> f1 == f2
&& n1 == n2
&& skipCons t1 t2
&& ((<= 7) $ abs $ l2 - l1))
skipCons FTData (FTCons _ _) = False
skipCons FTDataGADT (FTConsGADT _) = False
skipCons _ _ = True
let things = iCI $ filterAdjacentFuncImpl $ concatMap (flip findstuff Nothing .
(\s -> trace_ "section in findThingsInBS" s s)) sections
let
uniqueModuleName (FoundThing FTModule moduleName _)
= not
$ any (\(FoundThing thingType thingName _)
-> thingType /= FTModule && thingName == moduleName) things
uniqueModuleName _ = True
FileData filename $ filter uniqueModuleName things
withline :: FileName -> [String] -> String -> Int -> [Token]
withline filename sourceWords fullline i =
let countSpaces (' ':xs) = 1 + countSpaces xs
countSpaces ('\t':xs) = 8 + countSpaces xs
countSpaces _ = 0
in NewLine (countSpaces fullline)
: zipWith (\w t -> Token w (Pos filename i t fullline)) sourceWords [1 ..]
stripslcomments :: [[Token]] -> [[Token]]
stripslcomments = let f (NewLine _ : Token ('-':'-':_) _ : _) = False
f _ = True
isCmt (Token ('-':'-':_) _) = True
isCmt _ = False
in map (takeWhile (not . isCmt)) . filter f
stripblockcomments :: [Token] -> [Token]
stripblockcomments (Token "{-" pos : xs) =
trace_ "{- found at " (show pos) $
afterblockcomend xs
stripblockcomments (x:xs) = x:stripblockcomments xs
stripblockcomments [] = []
afterblockcomend :: [Token] -> [Token]
afterblockcomend (t@(Token _ pos):xs)
| contains "-}" (tokenString t) =
trace_ "-} found at " (show pos) $
stripblockcomments xs
| otherwise = afterblockcomend xs
afterblockcomend [] = []
afterblockcomend (_:xs) = afterblockcomend xs
contains :: Eq a => [a] -> [a] -> Bool
contains sub = any (isPrefixOf sub) . tails
findstuff :: [Token] -> Scope -> [FoundThing]
findstuff (Token "module" _ : Token name pos : _) _ =
trace_ "module" pos [FoundThing FTModule name pos]
findstuff tokens@(Token "data" _ : Token name pos : xs) _
| any ( (== "where"). tokenString ) xs
=
trace_ "findstuff data b1" tokens $
FoundThing FTDataGADT name pos
: getcons2 (FTConsGADT name) "" xs ++ fromWhereOn xs Nothing
| otherwise
=
trace_ "findstuff data otherwise" tokens $
FoundThing FTData name pos
: getcons (FTCons FTData name) (trimNewlines xs)
findstuff tokens@(Token "newtype" _ : ts@(Token name pos : _))_ =
trace_ "findstuff newtype" tokens $
FoundThing FTNewtype name pos
: getcons (FTCons FTNewtype name) (trimNewlines ts)
findstuff tokens@(Token "type" _ : Token name pos : xs) _ =
trace_ "findstuff type" tokens $
case break ((== "where").tokenString) xs of
(ys, []) ->
trace_ "findstuff type b1 " ys [FoundThing FTType name pos]
(ys, r) ->
trace_ "findstuff type b2 " (ys, r) $
FoundThing FTType name pos : fromWhereOn r Nothing
findstuff tokens@(Token "class" _ : xs) _ =
trace_ "findstuff class" tokens $
case break ((== "where").tokenString) xs of
(ys, []) ->
trace_ "findstuff class b1 " ys $
maybeToList $ className ys
(ys, r) ->
trace_ "findstuff class b2 " (ys, r) $
maybe [] (\n@(FoundThing _ name _) -> n : fromWhereOn r (Just (FTClass, name))) $
className ys
where isParenOpen (Token "(" _) = True
isParenOpen _ = False
className lst
= case (head'
. dropWhile isParenOpen
. reverse
. takeWhile ((not . (`elem` ["=>", utf8_to_char8_hack "⇒"])) . tokenString)
. reverse) lst of
(Just (Token name p)) -> Just $ FoundThing FTClass name p
_ -> Nothing
findstuff tokens@(Token "instance" _ : xs) _ =
trace_ "findstuff instance" tokens $
case break ((== "where").tokenString) xs of
(ys, []) ->
trace_ "findstuff instance b1 " ys $
maybeToList $ instanceName ys
(ys, r) ->
trace_ "findstuff instance b2 " (ys, r) $
maybe [] (\n@(FoundThing _ name _) -> n : fromWhereOn r (Just (FTInstance, name))) $
instanceName ys
where instanceName lst@(Token _ p :_) = Just $ FoundThing FTInstance
(map (\a -> if a == '.' then '-' else a) $ concatTokens lst) p
instanceName _ = Nothing
findstuff tokens@(Token "pattern" _ : Token name pos : Token "::" _ : sig) _ =
trace_ "findstuff pattern type annotation" tokens [FoundThing (FTPatternTypeDef (concatTokens sig)) name pos]
findstuff tokens@(Token "pattern" _ : Token name pos : xs) scope =
trace_ "findstuff pattern" tokens $
FoundThing FTPattern name pos : findstuff xs scope
findstuff xs scope =
trace_ "findstuff rest " xs $
findFunc xs scope ++ findFuncTypeDefs [] xs scope
findFuncTypeDefs :: [Token] -> [Token] -> Scope -> [FoundThing]
findFuncTypeDefs found (t@(Token _ _): Token "," _ :xs) scope =
findFuncTypeDefs (t : found) xs scope
findFuncTypeDefs found (t@(Token _ _): Token "::" _ : sig) scope =
map (\(Token name p) -> FoundThing (FTFuncTypeDef (concatTokens sig) scope) name p) (t:found)
findFuncTypeDefs found xs@(Token "(" _ :_) scope =
case break myBreakF xs of
(inner@(Token _ p : _), rp : xs') ->
let merged = Token ( concatMap (\(Token x _) -> x) $ inner ++ [rp] ) p
in if any (isNewLine Nothing) inner
then []
else findFuncTypeDefs found (merged : xs') scope
_ -> []
where myBreakF (Token ")" _) = True
myBreakF _ = False
findFuncTypeDefs _ _ _ = []
fromWhereOn :: [Token] -> Scope -> [FoundThing]
fromWhereOn [] _ = []
fromWhereOn [_] _ = []
fromWhereOn (_: xs@(NewLine _ : _)) scope =
concatMap (flip findstuff scope . tail')
$ splitByNL (Just ( minimum
. (10000:)
. map (\(NewLine i) -> i)
. filter (isNewLine Nothing) $ xs)) xs
fromWhereOn (_:xw) scope = findstuff xw scope
findFunc :: [Token] -> Scope -> [FoundThing]
findFunc x scope = case findInfix x scope of
a@(_:_) -> a
_ -> findF x scope
findInfix :: [Token] -> Scope -> [FoundThing]
findInfix x scope
= case dropWhile
((/= "`"). tokenString)
(takeWhile ( (/= "=") . tokenString) x) of
_ : Token name p : _ -> [FoundThing (FTFuncImpl scope) name p]
_ -> []
findF :: [Token] -> Scope -> [FoundThing]
findF ts@(Token "(" p : _) scope =
let (name, xs) = extractOperator ts in
[FoundThing (FTFuncImpl scope) name p | any (("=" ==) . tokenString) xs]
findF (Token name p : xs) scope =
[FoundThing (FTFuncImpl scope) name p | any (("=" ==) . tokenString) xs]
findF _ _ = []
head' :: [a] -> Maybe a
head' (x:_) = Just x
head' [] = Nothing
tail' :: [a] -> [a]
tail' (_:xs) = xs
tail' [] = []
getcons :: FoundThingType -> [Token] -> [FoundThing]
getcons ftt (Token "=" _: Token name pos : xs) =
FoundThing ftt name pos : getcons2 ftt name xs
getcons ftt (_:xs) = getcons ftt xs
getcons _ [] = []
getcons2 :: FoundThingType -> String -> [Token] -> [FoundThing]
getcons2 ftt@(FTCons pt p) c (Token name pos : Token "::" _ : xs) =
FoundThing (FTConsAccessor pt p c) name pos : getcons2 ftt c xs
getcons2 ftt@(FTConsGADT p) _ (Token name pos : Token "::" _ : xs) =
FoundThing ftt name pos : getcons2 ftt p xs
getcons2 ftt _ (Token "|" _ : Token name pos : xs) =
FoundThing ftt name pos : getcons2 ftt name xs
getcons2 ftt c (_:xs) = getcons2 ftt c xs
getcons2 _ _ [] = []
splitByNL :: Maybe Int -> [Token] -> [[Token]]
splitByNL maybeIndent (nl@(NewLine _):ts) =
let (a,b) = break (isNewLine maybeIndent) ts
in (nl : a) : splitByNL maybeIndent b
splitByNL _ _ = []
getTopLevelIndent :: Bool -> [[Token]] -> Int
getTopLevelIndent _ [] = 0
getTopLevelIndent isLiterate ((nl:next:_):xs) = if "import" == tokenString next
then let (NewLine i) = nl in i
else getTopLevelIndent isLiterate xs
getTopLevelIndent isLiterate (_:xs) = getTopLevelIndent isLiterate xs
fromLiterate :: FilePath -> [(String, Int)]
-> (Bool
, [(String, Int)])
fromLiterate file lns =
if ".lhs" `isSuffixOf` file
then (True, unlit lns)
else (False, lns)
where unlit, returnCode :: [(String, Int)] -> [(String, Int)]
unlit (('>':' ':xs,n):ns) = (' ':xs,n):unlit ns
unlit ((line,_):ns) = if "\\begin{code}" `isPrefixOf` line then returnCode ns else unlit ns
unlit [] = []
returnCode (t@(line,_):ns) = if "\\end{code}" `isPrefixOf` line then unlit ns else t:returnCode ns
returnCode [] = []
dirToFiles :: Bool -> [String] -> FilePath -> IO [ FilePath ]
dirToFiles _ _ "STDIN" = lines <$> getContents
dirToFiles followSyms suffixes p = do
isD <- doesDirectoryExist p
#if MIN_VERSION_directory(1,3,0)
isSymLink <- pathIsSymbolicLink p
#else
isSymLink <- isSymbolicLink p
#endif
if isD
then if isSymLink && not followSyms
then return []
else do
contents <- filter ((/=) '.' . head) `fmap` getDirectoryContents p
concat `fmap` mapM (dirToFiles followSyms suffixes . (</>) p) contents
else return [p | matchingSuffix ]
where matchingSuffix = any (`isSuffixOf` p) suffixes
concatTokens :: [Token] -> String
concatTokens = smartUnwords . map (\(Token name _) -> name) . filter (not . isNewLine Nothing)
where smartUnwords [] = []
smartUnwords a = foldr (\v -> (glueNext v ++)) "" $ a `zip` tail (a ++ [""])
glueNext (a@("("), _) = a
glueNext (a, ")") = a
glueNext (a@("["), _) = a
glueNext (a, "]") = a
glueNext (a, ",") = a
glueNext (a, "") = a
glueNext (a, _) = a ++ " "
extractOperator :: [Token] -> (String, [Token])
extractOperator ts@(Token "(" _ : _) =
foldr ((++) . tokenString) ")" *** tail' $ break ((== ")") . tokenString) ts
extractOperator _ = ("", [])