{-# LANGUAGE CPP #-}
module Hasktags (
FileData,
generate,
findWithCache,
findThings,
findThingsInBS,
Mode(..),
getMode,
getOutFile,
dirToFiles
) where
import Control.Monad (when)
import qualified Data.ByteString.Lazy.Char8 as BS (ByteString, readFile, unpack)
import qualified Data.ByteString.Lazy.UTF8 as BS8 (fromString)
import Data.Char (isSpace)
import Data.List (isPrefixOf, isSuffixOf, groupBy,
tails)
import Data.Maybe (maybeToList)
import DebugShow (trace_)
import System.Directory (doesDirectoryExist, doesFileExist,
getDirectoryContents,
getModificationTime,
#if MIN_VERSION_directory(1,3,0)
pathIsSymbolicLink)
#else
isSymbolicLink)
#endif
import System.FilePath ((</>))
import System.IO (Handle,
IOMode (AppendMode, WriteMode),
hClose, hGetContents, openFile,
stdin, stdout)
import Tags (FileData (..), FileName,
FoundThing (..),
FoundThingType (FTClass, FTCons, FTConsAccessor, FTConsGADT, FTData, FTDataGADT, FTFuncImpl, FTFuncTypeDef, FTInstance, FTModule, FTNewtype, FTPattern, FTPatternTypeDef, FTType),
Pos (..), Scope, mywords,
writectagsfile, writeetagsfile)
import Text.JSON.Generic (decodeJSON, encodeJSON)
getMode :: [Mode] -> Mode
getMode [] = BothTags
getMode xs = maximum xs
getOutFile :: String -> IOMode -> [Mode] -> IO Handle
getOutFile _ _ (OutRedir "-" : _) = return stdout
getOutFile _ openMode (OutRedir f : _) = openFile f openMode
getOutFile name openMode (_:xs) = getOutFile name openMode xs
getOutFile defaultName openMode [] = openFile
defaultName
openMode
data Mode = ExtendedCtag
| ETags
| CTags
| BothTags
| Append
| OutRedir String
| CacheFiles
| FollowDirectorySymLinks
| Help
| HsSuffixes [String]
| AbsolutePath
deriving (Ord, Eq, Show)
data Token = Token String Pos
| NewLine Int
deriving (Eq)
instance Show Token where
show (Token t (Pos _ _l _ _) ) = " " ++ t ++ " "
show (NewLine i) = "NewLine " ++ show i
tokenString :: Token -> String
tokenString (Token s _) = s
tokenString (NewLine _) = "\n"
isNewLine :: Maybe Int -> Token -> Bool
isNewLine Nothing (NewLine _) = True
isNewLine (Just c) (NewLine c') = c == c'
isNewLine _ _ = False
trimNewlines :: [Token] -> [Token]
trimNewlines = filter (not . isNewLine Nothing)
generate :: [Mode] -> [FileName] -> IO ()
generate modes filenames = do
let mode = getMode (filter ( `elem` [BothTags, CTags, ETags] ) modes)
openFileMode = if Append `elem` modes
then AppendMode
else WriteMode
filedata <- mapM (findWithCache (CacheFiles `elem` modes)) filenames
when (mode == CTags)
(do ctagsfile <- getOutFile "tags" openFileMode modes
writectagsfile ctagsfile (ExtendedCtag `elem` modes) filedata
hClose ctagsfile)
when (mode == ETags)
(do etagsfile <- getOutFile "TAGS" openFileMode modes
writeetagsfile etagsfile filedata
hClose etagsfile)
when (mode == BothTags)
(do etagsfile <- getOutFile "TAGS" openFileMode modes
writeetagsfile etagsfile filedata
ctagsfile <- getOutFile "ctags" openFileMode modes
writectagsfile ctagsfile (ExtendedCtag `elem` modes) filedata
hClose etagsfile
hClose ctagsfile)
findWithCache :: Bool -> FileName -> IO FileData
findWithCache cache filename = do
cacheExists <- if cache then doesFileExist cacheFilename else return False
if cacheExists
then do fileModified <- getModificationTime filename
cacheModified <- getModificationTime cacheFilename
if cacheModified > fileModified
then do bytes <- BS.readFile cacheFilename
return (decodeJSON (BS.unpack bytes))
else findAndCache
else findAndCache
where cacheFilename = filenameToTagsName filename
filenameToTagsName = (++"tags") . reverse . dropWhile (/='.') . reverse
findAndCache = do
filedata <- findThings filename
when cache (writeFile cacheFilename (encodeJSON filedata))
return filedata
utf8_to_char8_hack :: String -> String
utf8_to_char8_hack = BS.unpack . BS8.fromString
findThings :: FileName -> IO FileData
findThings filename =
fmap (findThingsInBS filename) $ BS.readFile filename
findThingsInBS :: String -> BS.ByteString -> FileData
findThingsInBS filename bs = do
let aslines = lines $ BS.unpack bs
let stripNonHaskellLines = let
emptyLine = all (all isSpace . tokenString)
. filter (not . isNewLine Nothing)
cppLine (_nl:t:_) = ("#" `isPrefixOf`) $ tokenString t
cppLine _ = False
in filter (not . emptyLine) . filter (not . cppLine)
let debugStep m = (\s -> trace_ (m ++ " result") s s)
let (isLiterate, slines) =
debugStep "fromLiterate"
$ fromLiterate filename
$ zip aslines [0..]
let
(fileLines, numbers)
= unzip slines
let tokenLines =
debugStep "stripNonHaskellLines" $ stripNonHaskellLines
$ debugStep "stripslcomments" $ stripslcomments
$ debugStep "splitByNL" $ splitByNL Nothing
$ debugStep "stripblockcomments pipe" $ stripblockcomments
$ concat
$ zipWith3 (withline filename)
(map
(filter (not . all isSpace) . mywords False)
fileLines)
fileLines
numbers
let topLevelIndent = debugStep "top level indent" $ getTopLevelIndent isLiterate tokenLines
let sections = map tail
$ filter (not . null)
$ splitByNL (Just (topLevelIndent) )
$ concat (trace_ "tokenLines" tokenLines tokenLines)
let filterAdjacentFuncImpl = map head . groupBy (\(FoundThing t1 n1 (Pos f1 _ _ _))
(FoundThing t2 n2 (Pos f2 _ _ _))
-> f1 == f2
&& n1 == n2
&& areFuncImpls t1 t2)
areFuncImpls (FTFuncImpl _) (FTFuncImpl _) = True
areFuncImpls _ _ = False
let iCI = map head . groupBy (\(FoundThing t1 n1 (Pos f1 l1 _ _))
(FoundThing t2 n2 (Pos f2 l2 _ _))
-> f1 == f2
&& n1 == n2
&& skipCons t1 t2
&& ((<= 7) $ abs $ l2 - l1))
skipCons FTData (FTCons _ _) = False
skipCons FTDataGADT (FTConsGADT _) = False
skipCons _ _ = True
let things = iCI $ filterAdjacentFuncImpl $ concatMap (flip findstuff Nothing) $
map (\s -> trace_ "section in findThingsInBS" s s) sections
let
uniqueModuleName (FoundThing FTModule moduleName _)
= not
$ any (\(FoundThing thingType thingName _)
-> thingType /= FTModule && thingName == moduleName) things
uniqueModuleName _ = True
FileData filename $ filter uniqueModuleName things
withline :: FileName -> [String] -> String -> Int -> [Token]
withline filename sourceWords fullline i =
let countSpaces (' ':xs) = 1 + countSpaces xs
countSpaces ('\t':xs) = 8 + countSpaces xs
countSpaces _ = 0
in NewLine (countSpaces fullline)
: zipWith (\w t -> Token w (Pos filename i t fullline)) sourceWords [1 ..]
stripslcomments :: [[Token]] -> [[Token]]
stripslcomments = let f (NewLine _ : Token ('-':'-':_) _ : _) = False
f _ = True
isCmt (Token ('-':'-':_) _) = True
isCmt _ = False
in map (takeWhile (not . isCmt)) . filter f
stripblockcomments :: [Token] -> [Token]
stripblockcomments (Token "{-" pos : xs) =
trace_ "{- found at " (show pos) $
afterblockcomend xs
stripblockcomments (x:xs) = x:stripblockcomments xs
stripblockcomments [] = []
afterblockcomend :: [Token] -> [Token]
afterblockcomend (t@(Token _ pos):xs)
| contains "-}" (tokenString t) =
trace_ "-} found at " (show pos) $
stripblockcomments xs
| otherwise = afterblockcomend xs
afterblockcomend [] = []
afterblockcomend (_:xs) = afterblockcomend xs
contains :: Eq a => [a] -> [a] -> Bool
contains sub = any (isPrefixOf sub) . tails
findstuff :: [Token] -> Scope -> [FoundThing]
findstuff (Token "module" _ : Token name pos : _) _ =
trace_ "module" pos $
[FoundThing FTModule name pos]
findstuff tokens@(Token "data" _ : Token name pos : xs) _
| any ( (== "where"). tokenString ) xs
=
trace_ "findstuff data b1" tokens $
FoundThing FTDataGADT name pos
: getcons2 (FTConsGADT name) "" xs ++ fromWhereOn xs Nothing
| otherwise
=
trace_ "findstuff data otherwise" tokens $
FoundThing FTData name pos
: getcons (FTCons FTData name) (trimNewlines xs)
findstuff tokens@(Token "newtype" _ : ts@(Token name pos : _))_ =
trace_ "findstuff newtype" tokens $
FoundThing FTNewtype name pos
: getcons (FTCons FTNewtype name) (trimNewlines ts)
findstuff tokens@(Token "type" _ : Token name pos : xs) _ =
trace_ "findstuff type" tokens $
case (break ((== "where").tokenString) xs) of
(ys, []) ->
trace_ "findstuff type b1 " ys $ [FoundThing FTType name pos]
(ys, r) ->
trace_ "findstuff type b2 " (ys, r) $
FoundThing FTType name pos : fromWhereOn r Nothing
findstuff tokens@(Token "class" _ : xs) _ =
trace_ "findstuff class" tokens $
case (break ((== "where").tokenString) xs) of
(ys, []) ->
trace_ "findstuff class b1 " ys $
maybeToList $ className ys
(ys, r) ->
trace_ "findstuff class b2 " (ys, r) $
maybe [] (\n@(FoundThing _ name _) -> n : fromWhereOn r (Just (FTClass, name))) $
className ys
where isParenOpen (Token "(" _) = True
isParenOpen _ = False
className lst
= case (head'
. dropWhile isParenOpen
. reverse
. takeWhile ((not . (`elem` ["=>", utf8_to_char8_hack "⇒"])) . tokenString)
. reverse) lst of
(Just (Token name p)) -> Just $ FoundThing FTClass name p
_ -> Nothing
findstuff tokens@(Token "instance" _ : xs) _ =
trace_ "findstuff instance" tokens $
case (break ((== "where").tokenString) xs) of
(ys, []) ->
trace_ "findstuff instance b1 " ys $
maybeToList $ instanceName ys
(ys, r) ->
trace_ "findstuff instance b2 " (ys, r) $
maybe [] (\n@(FoundThing _ name _) -> n : fromWhereOn r (Just (FTInstance, name))) $
instanceName ys
where instanceName lst@(Token _ p :_) = Just $ FoundThing FTInstance
(map (\a -> if a == '.' then '-' else a) $ concatTokens lst) p
instanceName _ = Nothing
findstuff tokens@(Token "pattern" _ : Token name pos : Token "::" _ : sig) _ =
trace_ "findstuff pattern type annotation" tokens $
[FoundThing (FTPatternTypeDef (concatTokens sig)) name pos]
findstuff tokens@(Token "pattern" _ : Token name pos : xs) scope =
trace_ "findstuff pattern" tokens $
FoundThing FTPattern name pos : findstuff xs scope
findstuff xs scope =
trace_ "findstuff rest " xs $
findFunc xs scope ++ findFuncTypeDefs [] xs scope
findFuncTypeDefs :: [Token] -> [Token] -> Scope -> [FoundThing]
findFuncTypeDefs found (t@(Token _ _): Token "," _ :xs) scope =
findFuncTypeDefs (t : found) xs scope
findFuncTypeDefs found (t@(Token _ _): Token "::" _ : sig) scope =
map (\(Token name p) -> FoundThing (FTFuncTypeDef (concatTokens sig) scope) name p) (t:found)
findFuncTypeDefs found xs@(Token "(" _ :_) scope =
case break myBreakF xs of
(inner@(Token _ p : _), rp : xs') ->
let merged = Token ( concatMap (\(Token x _) -> x) $ inner ++ [rp] ) p
in findFuncTypeDefs found (merged : xs') scope
_ -> []
where myBreakF (Token ")" _) = True
myBreakF _ = False
findFuncTypeDefs _ _ _ = []
fromWhereOn :: [Token] -> Scope -> [FoundThing]
fromWhereOn [] _ = []
fromWhereOn [_] _ = []
fromWhereOn (_: xs@(NewLine _ : _)) scope =
concatMap (flip findstuff scope . tail')
$ splitByNL (Just ( minimum
. (10000:)
. map (\(NewLine i) -> i)
. filter (isNewLine Nothing) $ xs)) xs
fromWhereOn (_:xw) scope = findstuff xw scope
findFunc :: [Token] -> Scope -> [FoundThing]
findFunc x scope = case findInfix x scope of
a@(_:_) -> a
_ -> findF x scope
findInfix :: [Token] -> Scope -> [FoundThing]
findInfix x scope
= case dropWhile
((/= "`"). tokenString)
(takeWhile ( (/= "=") . tokenString) x) of
_ : Token name p : _ -> [FoundThing (FTFuncImpl scope) name p]
_ -> []
findF :: [Token] -> Scope -> [FoundThing]
findF ts@(Token "(" p : _) scope =
let (name, xs) = extractOperator ts in
[FoundThing (FTFuncImpl scope) name p | any (("=" ==) . tokenString) xs]
findF (Token name p : xs) scope =
[FoundThing (FTFuncImpl scope) name p | any (("=" ==) . tokenString) xs]
findF _ _ = []
head' :: [a] -> Maybe a
head' (x:_) = Just x
head' [] = Nothing
tail' :: [a] -> [a]
tail' (_:xs) = xs
tail' [] = []
getcons :: FoundThingType -> [Token] -> [FoundThing]
getcons ftt (Token "=" _: Token name pos : xs) =
FoundThing ftt name pos : getcons2 ftt name xs
getcons ftt (_:xs) = getcons ftt xs
getcons _ [] = []
getcons2 :: FoundThingType -> String -> [Token] -> [FoundThing]
getcons2 ftt@(FTCons pt p) c (Token name pos : Token "::" _ : xs) =
FoundThing (FTConsAccessor pt p c) name pos : getcons2 ftt c xs
getcons2 ftt@(FTConsGADT p) _ (Token name pos : Token "::" _ : xs) =
FoundThing ftt name pos : getcons2 ftt p xs
getcons2 ftt _ (Token "|" _ : Token name pos : xs) =
FoundThing ftt name pos : getcons2 ftt name xs
getcons2 ftt c (_:xs) = getcons2 ftt c xs
getcons2 _ _ [] = []
splitByNL :: Maybe Int -> [Token] -> [[Token]]
splitByNL maybeIndent (nl@(NewLine _):ts) =
let (a,b) = break (isNewLine maybeIndent) ts
in (nl : a) : splitByNL maybeIndent b
splitByNL _ _ = []
getTopLevelIndent :: Bool -> [[Token]] -> Int
getTopLevelIndent _ [] = 0
getTopLevelIndent isLiterate ((nl:next:_):xs) = if "import" == (tokenString next)
then let (NewLine i) = nl in i
else getTopLevelIndent isLiterate xs
getTopLevelIndent isLiterate (_:xs) = getTopLevelIndent isLiterate xs
fromLiterate :: FilePath -> [(String, Int)]
-> (Bool
, [(String, Int)])
fromLiterate file lns =
if ".lhs" `isSuffixOf` file
then (True, unlit lns)
else (False, lns)
where unlit, returnCode :: [(String, Int)] -> [(String, Int)]
unlit ((('>':' ':xs),n):ns) = ((' ':xs),n):unlit(ns)
unlit ((line,_):ns) = if "\\begin{code}" `isPrefixOf` line then returnCode ns else unlit ns
unlit [] = []
returnCode (t@(line,_):ns) = if "\\end{code}" `isPrefixOf` line then unlit ns else t:(returnCode ns)
returnCode [] = []
dirToFiles :: Bool -> [String] -> FilePath -> IO [ FilePath ]
dirToFiles _ _ "STDIN" = fmap lines $ hGetContents stdin
dirToFiles followSyms suffixes p = do
isD <- doesDirectoryExist p
#if MIN_VERSION_directory(1,3,0)
isSymLink <- pathIsSymbolicLink p
#else
isSymLink <- isSymbolicLink p
#endif
case isD of
False -> return $ if matchingSuffix then [p] else []
True ->
if isSymLink && not followSyms
then return []
else do
contents <- filter ((/=) '.' . head) `fmap` getDirectoryContents p
concat `fmap` (mapM (dirToFiles followSyms suffixes . (</>) p) contents)
where matchingSuffix = any (`isSuffixOf` p) suffixes
concatTokens :: [Token] -> String
concatTokens = smartUnwords . map (\(Token name _) -> name) . filter (not . isNewLine Nothing)
where smartUnwords [] = []
smartUnwords a = foldr (\v -> (glueNext v ++)) "" $ a `zip` tail (a ++ [""])
glueNext (a@("("), _) = a
glueNext (a, ")") = a
glueNext (a@("["), _) = a
glueNext (a, "]") = a
glueNext (a, ",") = a
glueNext (a, "") = a
glueNext (a, _) = a ++ " "
extractOperator :: [Token] -> (String, [Token])
extractOperator ts@(Token "(" _ : _) =
(\(a, b) -> (foldr ((++) . tokenString) ")" a, tail' b)) $
break ((== ")") . tokenString) ts
extractOperator _ = ("", [])