module Main (main) where import Char import Data.List import IO import System.Environment import System.Console.GetOpt import System.Exit import Debug.Trace -- search for definitions of things -- we do this by looking for the following patterns: -- data XXX = ... giving a datatype location -- newtype XXX = ... giving a newtype location -- bla :: ... giving a function location -- -- by doing it this way, we avoid picking up local definitions -- (whether this is good or not is a matter for debate) -- -- We generate both CTAGS and ETAGS format tags files -- The former is for use in most sensible editors, while EMACS uses ETAGS -- alternatives: http://haskell.org/haskellwiki/Tags {- .hs or literate .lhs haskell file? Really not a easy question - maybe there is an answer - I don't know .hs -> non literate haskel file .lhs -> literate haskell file .chs -> is this always plain? .whatsoever -> try to get to know the answer (*) contains any '> ... ' line -> interpreted as literate else non literate (*) This is difficult because System.Log.Logger is using {- [...] > module Example where > [...] -} module System.Log.Logger( so it might looks like beeing a .lhs file My first fix was checking for \\begin occurence (doesn't work because HUnit is using > but no \\begin) Further ideas: * use unlit executable distributed with ghc or the like and check for errors? (Will this work if cpp is used as well ?) * Remove comments before checking for '> ..' does'nt work because {- -} may be unbalanced in literate comments So my solution is : take file extension and keep guessing code for all unkown files -} -- Reference: http://ctags.sourceforge.net/FORMAT main :: IO () main = do progName <- getProgName args <- getArgs let usageString = "Usage: " ++ progName ++ " [OPTION...] [files...]" let (modes, filenames, errs) = getOpt Permute options args if errs /= [] || elem Help modes || filenames == [] then do putStr $ unlines errs putStr $ usageInfo usageString options exitWith (ExitFailure 1) else return () let mode = getMode (filter ( `elem` [BothTags, CTags, ETags, Append] ) modes) let openFileMode = if elem Append modes then AppendMode else WriteMode filedata <- mapM (findthings (IgnoreCloseImpl `elem` modes)) filenames if mode == BothTags || mode == CTags then do ctagsfile <- openFile "tags" openFileMode writectagsfile ctagsfile filedata hClose ctagsfile else return () if mode == BothTags || mode == ETags then do etagsfile <- openFile "TAGS" openFileMode writeetagsfile etagsfile filedata hClose etagsfile else return () -- | getMode takes a list of modes and extract the mode with the -- highest precedence. These are as follows: Both, CTags, ETags -- The default case is Both. getMode :: [Mode] -> Mode getMode [] = BothTags getMode xs = maximum xs data Mode = ETags | CTags | BothTags | Append | IgnoreCloseImpl | Help deriving (Ord, Eq, Show) options :: [OptDescr Mode] options = [ Option "c" ["ctags"] (NoArg CTags) "generate CTAGS file (ctags)" , Option "e" ["etags"] (NoArg ETags) "generate ETAGS file (etags)" , Option "b" ["both"] (NoArg BothTags) ("generate both CTAGS and ETAGS") , Option "a" ["append"] (NoArg Append) ("append to existing CTAGS and/or ETAGS file(s)") , Option "" ["ignore-close-implementation"] (NoArg IgnoreCloseImpl) ("ignores found implementation if its closer than 7 lines - so you can jump to definition in one shot") , Option "h" ["help"] (NoArg Help) "This help" ] type FileName = String type ThingName = String -- The position of a token or definition data Pos = Pos FileName -- file name Int -- line number Int -- token number String -- string that makes up that line deriving (Show, Eq) -- A definition we have found -- I'm not sure wether I've used the right names.. but I hope you fix it / get what I mean data FoundThingType = FTFuncTypeDef | FTFuncImpl | FTType | FTData | FTDataGADT | FTNewtype | FTClass | FTModule | FTCons | FTOther | FTConsAccessor | FTConsGADT deriving Eq instance Show FoundThingType where show FTFuncTypeDef = "ft" show FTFuncImpl = "fi" show FTType = "t" show FTData = "d" show FTDataGADT = "d-gadt" show FTNewtype = "nt" show FTClass = "c" show FTModule = "m" show FTCons = "cons" show FTConsGADT = "c-gadt" show FTConsAccessor = "c_a" show FTOther = "o" data FoundThing = FoundThing FoundThingType ThingName Pos deriving (Show, Eq) -- Data we have obtained from a file data FileData = FileData FileName [FoundThing] data Token = Token String Pos | NewLine Int -- space 8*" " = "\t" deriving (Eq) instance Show Token where -- show (Token t (Pos _ l _ _) ) = "Token " ++ t ++ " " ++ (show l) show (Token t (Pos _ l _ _) ) = " " ++ t ++ " " show (NewLine i) = "NewLine " ++ (show i) tokenString (Token s _) = s tokenString (NewLine _) = "\n" isNewLine Nothing (NewLine _) = True isNewLine (Just c) (NewLine c') = c == c' isNewLine _ _ = False trimNewlines = filter (not . isNewLine Nothing) -- stuff for dealing with ctags output format writectagsfile :: Handle -> [FileData] -> IO () writectagsfile ctagsfile filedata = do let things = concat $ map getfoundthings filedata mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing x) (sortThings things) sortThings = sortBy (\(FoundThing _ a _) (FoundThing _ b _) -> compare a b) getfoundthings :: FileData -> [FoundThing] getfoundthings (FileData _ things) = things dumpthing :: FoundThing -> String dumpthing (FoundThing ftt name (Pos filename line _ _)) = name ++ "\t" ++ filename ++ "\t" ++ (show $ line + 1) ++ ";\"" ++ "\t" ++ (show ftt) -- Using backward compatible tag kind extension here -- stuff for dealing with etags output format writeetagsfile :: Handle -> [FileData] -> IO () writeetagsfile etagsfile filedata = do mapM_ (\x -> hPutStr etagsfile $ e_dumpfiledata x) filedata e_dumpfiledata :: FileData -> String e_dumpfiledata (FileData filename things) = "\x0c\n" ++ filename ++ "," ++ (show thingslength) ++ "\n" ++ thingsdump where thingsdump = concat $ map e_dumpthing things thingslength = length thingsdump e_dumpthing :: FoundThing -> String e_dumpthing (FoundThing _ name (Pos filename line token fullline)) = (concat $ take (token + 1) $ spacedwords fullline) ++ "\x7f" ++ (show line) ++ "," ++ (show $ line+1) ++ "\n" -- like "words", but keeping the whitespace, and so letting us build -- accurate prefixes spacedwords :: String -> [String] spacedwords [] = [] spacedwords xs = (blanks ++ wordchars):(spacedwords rest2) where (blanks,rest) = span Char.isSpace xs (wordchars,rest2) = span (\x -> not $ Char.isSpace x) rest -- makes sure file is fully closed after reading (taken from haskell-pandoc) readFile' :: FilePath -> IO String readFile' f = do s <- readFile f return $! (length s `seq` s) -- Find the definitions in a file findthings :: Bool -> FileName -> IO FileData findthings ignoreCloseImpl filename = do aslines <- fmap ( lines . evaluate) $ readFile' filename let stripNonHaskellLines = let emptyLine l = ( all ((all isSpace) . tokenString) ) $ filter (not . (isNewLine Nothing)) l cppLine (nl:t:_) = ("#" `isPrefixOf`) $ tokenString t cppLine _ = False in filter (not . emptyLine) . filter (not . cppLine) -- remove -- comments, then break each line into tokens (adding line numbers) -- then remove {- -} comments -- split by lines again ( to get indent let (lines , numbers) = unzip . fromLiterate filename $ zip aslines [0..] let tokenLines = stripNonHaskellLines $ stripslcomments $ (splitByNL Nothing) $ stripblockcomments $ concat $ zipWith3 (withline filename) (map ( filter (not . (all isSpace)). mywords) $ lines) lines numbers -- TODO ($defines / empty lines etc) -- separate by top level declarations (everything starting with the -- same topmost indentation is what I call section here) -- so that z in -- let x = 7 -- z = 20 -- won't be found as function let sections = map tail -- strip leading NL (no longer needed $ filter (not . null) $ splitByNL (Just (getTopLevelIndent tokenLines) ) $ concat $ tokenLines -- only take one of -- a 'x' = 7 -- a _ = 0 let filterAdjacentFuncImpl = nubBy (\(FoundThing t1 n1 (Pos f1 _ _ _)) (FoundThing t2 n2 (Pos f2 _ _ _)) -> f1 == f2 && n1 == n2 && t1 == FTFuncImpl && t2 == FTFuncImpl ) let iCI = if ignoreCloseImpl then nubBy (\(FoundThing t1 n1 (Pos f1 l1 _ _)) (FoundThing t2 n2 (Pos f2 l2 _ _)) -> f1 == f2 && n1 == n2 && ( ( <= 7 ) $ abs $ l2 - l1)) else id return $ FileData filename $ iCI $ filterAdjacentFuncImpl $ concatMap findstuff sections where evaluate :: String -> String evaluate [] = [] evaluate (c:cs) = c `seq` c:evaluate cs -- my words is mainly copied from Data.List. -- difference abc::def is recognized as three words -- `abc` is recognized as "`" "abc" "`" mywords :: String -> [String] mywords ('(':xs) = "(" : mywords xs mywords ('`':xs) = "`" : mywords xs mywords ('=':'>':xs) = "=>" : mywords xs mywords ('=':xs) = "=" : mywords xs mywords (',':xs) = "," : mywords xs mywords (':':':':xs) = "::" : mywords xs mywords s = case dropWhile {-partain:Char.-}isSpace s of ')':xs -> ")" : mywords xs "" -> [] s' -> w : mywords s'' where (w, s'') = myBreak s' myBreak [] = ([],[]) myBreak (':':':':xs) = ([], "::"++xs) myBreak (')':xs) = ([],')':xs) myBreak ('(':xs) = ([],'(':xs) myBreak ('`':xs) = ([],'`':xs) myBreak ('=':xs) = ([],'=':xs) myBreak (',':xs) = ([],',':xs) myBreak (' ':xs) = ([],xs); myBreak (x:xs) = let (a,b) = myBreak xs in (x:a,b) -- Create tokens from words, by recording their line number -- and which token they are through that line withline :: FileName -> [String] -> String -> Int -> [Token] withline filename words fullline i = let countSpaces (' ':xs) = 1 + countSpaces xs countSpaces ('\t':xs) = 8 + countSpaces xs countSpaces _ = 0 in NewLine (countSpaces fullline) : zipWith (\w t -> Token w (Pos filename i t fullline)) words [1 ..] -- comments stripping stripslcomments :: [[Token]] -> [[Token]] stripslcomments = let f ((NewLine _):(Token "--" _):_) = False f _ = True in filter f stripblockcomments :: [Token] -> [Token] stripblockcomments ((Token "\\end{code}" _):xs) = afterlitend xs stripblockcomments ((Token "{-" _):xs) = afterblockcomend xs stripblockcomments (x:xs) = x:stripblockcomments xs stripblockcomments [] = [] afterlitend :: [Token] -> [Token] afterlitend (Token "\\begin{code}" _ : xs) = xs afterlitend (_ : xs) = afterlitend xs afterlitend [] = [] afterblockcomend :: [Token] -> [Token] afterblockcomend (t:xs) | contains "-}" (tokenString t) = xs | otherwise = afterblockcomend xs afterblockcomend [] = [] -- does one string contain another string contains :: Eq a => [a] -> [a] -> Bool contains sub full = any (isPrefixOf sub) $ tails full -- actually pick up definitions findstuff :: [Token] -> [FoundThing] findstuff ((Token "module" _):(Token name pos):xs) = FoundThing FTModule name pos : [] -- nothing will follow this section findstuff ((Token "data" _):(Token name pos):xs) | any ( (== "where"). tokenString ) xs -- GADT -- TODO will be found as FTCons (not FTConsGADT), the same for functions - but they are found :) = FoundThing FTDataGADT name pos : (getcons2 xs) ++ (fromWhereOn xs) -- ++ (findstuff xs) | otherwise = FoundThing FTData name pos : (getcons FTData (trimNewlines xs))-- ++ (findstuff xs) findstuff ((Token "newtype" _):ts@((t@(Token name pos)):_)) = FoundThing FTNewtype name pos : (getcons FTCons (trimNewlines ts))-- ++ (findstuff xs) -- FoundThing FTNewtype name pos : findstuff xs findstuff ((Token "type" _):(Token name pos):xs) = FoundThing FTType name pos : findstuff xs findstuff ((Token "class" _):xs) = case break ((== "where").tokenString) xs of (_,[]) -> [] (t,r) -> case (head . dropWhile isParenOpen . reverse . takeWhile ((/= "=>").tokenString) . reverse) t of (Token name p) -> FoundThing FTClass name p : fromWhereOn r _ -> [] where isParenOpen (Token "(" _) = True isParenOpen _ = False findstuff xs = findFunc xs ++ findFuncTypeDefs [] xs findFuncTypeDefs found (t@(Token name p): Token "," _ :xs) = findFuncTypeDefs (t : found) xs findFuncTypeDefs found (t@(Token name p): Token "::" _ :xs) = map (\(Token name p) -> FoundThing FTFuncTypeDef name p) (t:found) findFuncTypeDefs found (Token "(" _ :xs) = case break myBreakF xs of (inner@((Token _ p):_), _:xs') -> let merged = Token ( ( concat . map (\(Token x _) -> x) ) inner ) p in findFuncTypeDefs found $ merged : xs' _ -> [] where myBreakF (Token ")" _) = True myBreakF _ = False findFuncTypeDefs _ _ = [] fromWhereOn (w:[]) = [] fromWhereOn (w: xs@((NewLine _):_)) = concatMap (findstuff . tail') $ splitByNL (Just ( minimum . (10000:) . map (\(NewLine i) -> i) . filter (isNewLine Nothing) $ xs)) xs fromWhereOn (w:xw) = findstuff xw findFunc x = case findInfix x of x:xs -> x:xs _ -> findF x findInfix x = case dropWhile ((/= "`"). tokenString) (takeWhile ( (/= "=") . tokenString) x) of x:(Token name p):_ -> FoundThing FTFuncImpl name p : [] _ -> [] findF ((Token name p):xs) = if (any (("=" ==).tokenString) xs) then FoundThing FTFuncImpl name p : [] else [] findF _ = [] tail' (x:xs) = xs tail' [] = [] -- get the constructor definitions, knowing that a datatype has just started getcons :: FoundThingType -> [Token] -> [FoundThing] getcons ftt ((Token "=" _):(Token name pos):xs) = FoundThing ftt name pos : getcons2 xs getcons ftt (x:xs) = getcons ftt xs getcons _ [] = [] getcons2 ((Token name pos):(Token "::" _):xs) = FoundThing FTConsAccessor name pos : getcons2 xs getcons2 ((Token "=" _):xs) = [] getcons2 ((Token "|" _):(Token name pos):xs) = FoundThing FTCons name pos : getcons2 xs getcons2 (x:xs) = getcons2 xs getcons2 [] = [] splitByNL :: (Maybe Int) -> [Token] -> [[Token]] splitByNL maybeIndent (nl@(NewLine _):ts) = let (a,b) = break (isNewLine maybeIndent) ts in (nl : a) : splitByNL maybeIndent b splitByNL _ _ = [] getTopLevelIndent [] = 0 -- (no import found , assuming indent 0 : this can be -- done better but should suffice for most needs getTopLevelIndent (x:xs) = if (any ((=="import") . tokenString) x) then let ((NewLine i):_) = x in i else getTopLevelIndent xs -- removes literate stuff if any line '> ... ' is found and any word is \begin (hglogger has ^> in it's commetns) fromLiterate :: FilePath -> [(String, Int)] -> [(String, Int)] fromLiterate file lines = let literate = [ (ls, n) | ('>':ls, n) <- lines ] in if ".lhs" `isSuffixOf` file && (not . null $ literate) then literate -- not . null literate because of Repair.lhs of darcs else if ".hs" `isSuffixOf` file then lines else if (null literate || not ( any ( (any ("\\begin" `isPrefixOf`)). words . fst) lines) ) then lines else literate {- testcase: checkToBeFound(){ toBeFound=$(sed -n 's/-- to be found\s*//p' testcase.hs) for i in $toBeFound; do grep -l $i tags 2>&1 > /dev/null || echo "tag $i was not found" done echo -n "to be found ocunt: " echo "$toBeFound" | wc -l } -- to be found A.B.testcase module A.B.testcase(module System.FilePath.Windows) where import asdf -- to be found Request -- to be found Request2 -- to be found rqBody -- to be found rqMethod -- to be found rqPeer -- to be found Request3 data Request = Request2 { rqMethod::Method, rqBody :: RqBody, rqPeer :: Host } | Request3 deriving(Show,Read,Typeable) -- http://hackage.haskell.org/trac/ghc/ticket/1184 -- ! Convert Bool into another monad -- to be found boolM boolM False = mzero -- to be found sadlkfj sadlkfj = 7 -- to be found onlyTheFirstOne onlyTheFirstOne (x:xs) = 8 onlyTheFirstOne [] = 8 -- to be found AC AC a b c d e f g = 7 -- to be found abc abc = let a = 7 b = 8 in a + b where x = 34 o = 423 -- to be found BB -- to be found AA AA, BB :: Int -- to be found foo ad `foo` oh = 90 -- to be found X -- to be found xyz class (A a) => X a where xyz :: dummy -- to be found Z -- to be found o class (A a) => Z a where o :: Int -- to be found ABC newtype ABC = Int -- to be found DBM newtype IE.ISession sess => DBM mark sess a = DBM (ReaderT sess IO a) -- to be found SAA newtype Symbol = SAA String -- TODO -- to be found =~ (=~) :: (Regex rho) => String -> rho -> Bool -}