
module Main (main) where

import Char
import List
import IO
import System.Environment
import System.Console.GetOpt
import System.Exit


-- search for definitions of things
-- we do this by looking for the following patterns:
-- data XXX = ...      giving a datatype location
-- newtype XXX = ...   giving a newtype location
-- bla :: ...          giving a function location
--
-- by doing it this way, we avoid picking up local definitions
--              (whether this is good or not is a matter for debate)
--

-- We generate both CTAGS and ETAGS format tags files
-- The former is for use in most sensible editors, while EMACS uses ETAGS

-- alternatives: http://haskell.org/haskellwiki/Tags

main :: IO ()
main = do
        progName <- getProgName
        args <- getArgs
        let usageString = "Usage: " ++ progName ++ " [OPTION...] [files...]"
        let (modes, filenames, errs) = getOpt Permute options args
        if errs /= [] || elem Help modes || filenames == []
         then do
           putStr $ unlines errs
           putStr $ usageInfo usageString options
           exitWith (ExitFailure 1)
         else return ()
        let mode = getMode $ filter pureModeFilter modes
            extendedCtag = ExtendedCtag `elem` modes
        let openFileMode = if elem Append modes
                           then AppendMode
                           else WriteMode
        filedata <- mapM findthings filenames
        if mode == CTags
         then do
           ctagsfile <- getOutFile "tags" openFileMode modes
           writectagsfile ctagsfile extendedCtag filedata
           hClose ctagsfile
         else return ()
        if mode == ETags
         then do
           etagsfile <- getOutFile "TAGS" openFileMode modes
           writeetagsfile etagsfile filedata
           hClose etagsfile
         else return ()
        -- avoid problem when both is used in combination
        -- with redirection on stdout
        if mode == BothTags
           then do
            ctagsfile <- getOutFile "tags" openFileMode modes
            writectagsfile ctagsfile extendedCtag filedata
            etagsfile <- getOutFile "TAGS" openFileMode modes
            writeetagsfile etagsfile filedata
            hClose etagsfile
            hClose ctagsfile
           else return ()

-- | Used to filter mode list to avoid problem using the getMode
-- function, the OutRedir was messing with it, and append was already
-- filtered-out.
pureModeFilter :: Mode -> Bool
pureModeFilter Append       = False
pureModeFilter ExtendedCtag = False
pureModeFilter (OutRedir _) = False
pureModeFilter _            = True

-- | getMode takes a list of modes and extract the mode with the
--   highest precedence.  These are as follows: Both, CTags, ETags
--   The default case is Both.
getMode :: [Mode] -> Mode
getMode [] = BothTags
getMode [x] = x
getMode (x:xs) = max x (getMode xs)

-- | getOutFile scan the modes searching for output redirection
--   if not found, open the file with name passed as parameter.
--   Handle special file -, which is stdout
getOutFile :: String -> IOMode -> [Mode] -> IO Handle
getOutFile _           _        ((OutRedir "-"):_) = return stdout
getOutFile _           openMode ((OutRedir f):_)   = openFile f openMode
getOutFile name        openMode (x:xs)             = getOutFile name openMode xs
getOutFile defaultName openMode []                 = openFile defaultName openMode

data Mode = ExtendedCtag
          | OutRedir String 
          | ETags 
          | CTags 
          | BothTags 
          | Append 
          | Help
          deriving (Ord, Eq, Show)

options :: [OptDescr Mode]
options = [ Option "c" ["ctags"]
            (NoArg CTags) "generate CTAGS file (ctags)"
          , Option "e" ["etags"]
            (NoArg ETags) "generate ETAGS file (etags)"
          , Option "b" ["both"]
            (NoArg BothTags) ("generate both CTAGS and ETAGS")
          , Option "a" ["append"]
            (NoArg Append) ("append to existing CTAGS and/or ETAGS file(s)")
          , Option "o" ["output"]
            (ReqArg (OutRedir) "") ("output to given file, instead of 'tags', '-' file is stdout")
          , Option "f" ["file"]
            (ReqArg (OutRedir) "") ("same as -o, but used as compatibility with ctags")
          , Option "x" ["extendedctag"]
            (NoArg ExtendedCtag) ("Generate additional information in ctag file.")
          , Option "h" ["help"] (NoArg Help) "This help"
          ]

type FileName = String

type ThingName = String

-- The position of a token or definition
data Pos = Pos
                FileName -- file name
                Int      -- line number
                Int      -- token number
                String   -- string that makes up that line
    deriving (Show, Eq)

data ThingKind =
      KindClass
    | KindModule
    | KindData
    | KindType
    | KindNewtype
    | KindVal
    | KindConstructor
    deriving (Show, Eq)

-- A definition we have found
data FoundThing = FoundThing ThingName ThingKind Pos
    deriving (Show, Eq)

-- Data we have obtained from a file
data FileData = FileData FileName [FoundThing]

data Token = Token String Pos
    deriving Show


-- stuff for dealing with ctags output format

writectagsfile :: Handle -> Bool -> [FileData] -> IO ()
writectagsfile ctagsfile extended filedata = do
    let things = concat $ map getfoundthings filedata
    if extended
       then do
        hPutStrLn ctagsfile "!_TAG_FILE_FORMAT\t2\t/extended format; --format=1 will not append ;\" to lines/"
        hPutStrLn ctagsfile "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/"
        hPutStrLn ctagsfile "!_TAG_PROGRAM_NAME\thasktags //"
       else return ()
    mapM_ (\x -> hPutStrLn ctagsfile $ dumpthing extended x) (sortThings things)

sortThings :: [FoundThing] -> [FoundThing]
sortThings = sortBy (\(FoundThing a _ _) (FoundThing b _ _) -> compare a b)

getfoundthings :: FileData -> [FoundThing]
getfoundthings (FileData _ things) = things

-- | Associate kind with a letter to be outputed
-- in an extended ctags file
kindLetter :: ThingKind -> String
kindLetter KindClass = "C"
kindLetter KindModule = "m"
kindLetter KindData = "d"
kindLetter KindType = "t"
kindLetter KindNewtype = "n"
kindLetter KindVal = "v"
kindLetter KindConstructor = "c"

-- | Dump found tag in normal or extended (read : vim like) ctag
-- line
dumpthing :: Bool -> FoundThing -> String
dumpthing False (FoundThing name kind (Pos filename line _ _)) =
    name ++ "\t" ++ filename ++ "\t" ++ (show $ line + 1)
dumpthing True (FoundThing name kind (Pos filename line _ lineText)) =
    name ++ "\t" ++ filename
         ++ "\t/^" ++ (concat $ map ctagEncode lineText)
         ++ "$/;\"\t" ++ (kindLetter kind)
         ++ "\tline:" ++ (show $ line + 1)

ctagEncode :: Char -> String
ctagEncode '/' = '\\' : '/' : []
ctagEncode '\\' = '\\' : '\\' : []
ctagEncode a = [a]

-- stuff for dealing with etags output format

writeetagsfile :: Handle -> [FileData] -> IO ()
writeetagsfile etagsfile filedata = do
    mapM_ (\x -> hPutStr etagsfile $ e_dumpfiledata x) filedata

e_dumpfiledata :: FileData -> String
e_dumpfiledata (FileData filename things) =
    "\x0c\n" ++ filename ++ "," ++ (show thingslength) ++ "\n" ++ thingsdump
    where thingsdump = concat $ map e_dumpthing things
          thingslength = length thingsdump

e_dumpthing :: FoundThing -> String
e_dumpthing (FoundThing _ _ (Pos _ line token fullline)) =
    (concat $ take (token + 1) $ spacedwords fullline)
 ++ "\x7f" ++ (show line) ++ "," ++ (show $ line+1) ++ "\n"


-- like "words", but keeping the whitespace, and so letting us build
-- accurate prefixes

spacedwords :: String -> [String]
spacedwords [] = []
spacedwords xs = (blanks ++ wordchars):(spacedwords rest2)
    where (blanks,rest) = span Char.isSpace xs
          (wordchars,rest2) = span (\x -> not $ Char.isSpace x) rest


-- Find the definitions in a file

findthings :: FileName -> IO FileData
findthings filename = do
    text <- readFile filename
    evaluate text -- forces evaluation of text
                  -- too many files were being opened otherwise since
                  -- readFile is lazy
    let aslines = lines text
    let wordlines = map mywords aslines
    let noslcoms = map stripslcomments wordlines
    let tokens = concat $ zipWith3 (withline filename) noslcoms aslines [0 ..]
    -- there are some tokens with "" (don't know why yet) this filter fixes it
    let tokens' = filter (\(Token s _ ) -> (not .  null) s ) tokens
    let nocoms = stripblockcomments tokens'
    -- using nub because getcons and findstuff are parsing parts of the file twice
    return $ FileData filename $ nub $ findstuff nocoms
  where evaluate [] = return ()
        evaluate (c:cs) = c `seq` evaluate cs
        -- my words is mainly copied from Data.List.
        -- difference abc::def is split into three words instead of one.
        -- We should really be lexing Haskell properly here rather
        -- than using hacks like this. In the future we expect hasktags
        -- to be replaced by something using the GHC API.
        mywords :: String -> [String]
        mywords (':':':':xs) = "::" : mywords xs
        mywords s =  case dropWhile isSpace s of
                         "" -> []
                         s' -> w : mywords s''
                             where (w, s'') = myBreak s'
                                   myBreak [] = ([],[])
                                   myBreak (':':':':xs) = ([], "::"++xs)
                                   myBreak (' ':xs) = ([],xs);
                                   myBreak (x:xs) = let (a,b) = myBreak xs
                                                    in  (x:a,b)

-- Create tokens from words, by recording their line number
-- and which token they are through that line

withline :: FileName -> [String] -> String -> Int -> [Token]
withline filename theWords fullline i =
    zipWith (\w t -> Token w (Pos filename i t fullline)) theWords $ [0 ..]

-- comments stripping

stripslcomments :: [String] -> [String]
stripslcomments ("--" : _) = []
stripslcomments (x : xs) = x : stripslcomments xs
stripslcomments [] = []

stripblockcomments :: [Token] -> [Token]
stripblockcomments ((Token "\\end{code}" _):xs) = afterlitend xs
stripblockcomments ((Token "{-" _):xs) = afterblockcomend xs
stripblockcomments (x:xs) = x:stripblockcomments xs
stripblockcomments [] = []

afterlitend :: [Token] -> [Token]
afterlitend (Token "\\begin{code}" _ : xs) = xs
afterlitend (_ : xs) = afterlitend xs
afterlitend [] = []

afterblockcomend :: [Token] -> [Token]
afterblockcomend ((Token token _):xs)
 | contains "-}" token = xs
 | otherwise           = afterblockcomend xs
afterblockcomend [] = []


-- does one string contain another string

contains :: Eq a => [a] -> [a] -> Bool
contains sub full = any (isPrefixOf sub) $ tails full

-- actually pick up definitions

findstuff :: [Token] -> [FoundThing]
findstuff ((Token "module" _):(Token name pos):xs) =
    FoundThing name KindModule pos : (getcons xs) ++ (findstuff xs)
findstuff ((Token "data" _):(Token name pos):xs) =
    FoundThing name KindData pos : (getcons xs) ++ (findstuff xs)
findstuff ((Token "newtype" _):(Token name pos):xs) =
    FoundThing name KindNewtype pos : findstuff xs
findstuff ((Token "type" _):(Token name pos):xs) =
    FoundThing name KindType pos : findstuff xs
findstuff ((Token "class" _):xs) = findClassName xs
findstuff ((Token name pos):(Token "::" _):xs) =
    FoundThing name KindVal pos : findstuff xs
findstuff (_ : xs) = findstuff xs
findstuff [] = []

findClassName :: [Token] -> [FoundThing]
findClassName []  = []
findClassName [Token n p]  = [FoundThing n KindClass p]
findClassName xs = (\(Token n pos : xs') -> FoundThing n KindClass pos : findstuff xs') . drop2 . dropParens 0 $ xs

dropParens :: Integer -> [Token] -> [Token]
dropParens n (Token "(" _ : xs) = dropParens (n + 1) xs
dropParens 0 (x           : xs) = x : xs
dropParens 1 (Token ")" _ : xs) = xs
dropParens n (Token ")" _ : xs) = dropParens (n - 1) xs
dropParens n (_           : xs) = dropParens n xs
dropParens _ []                 = [] -- Shouldn't happen on correct source

-- dropsEverything till token "=>" (if it is on the same line as the
-- first token. if not return tokens)
drop2 :: [Token] -> [Token]
drop2 tokens@(Token _ (Pos _ line_nr _ _ ) : _) =
  let (line, following) = span (\(Token _ (Pos _ l _ _)) -> l == line_nr) tokens
      (_, following_in_line) = span (\(Token n _) -> n /= "=>") line
  in case following_in_line of
          (Token "=>" _ : xs) -> xs ++ following
          _ -> tokens
drop2 xs = xs

-- get the constructor definitions, knowing that a datatype has just started

getcons :: [Token] -> [FoundThing]
getcons (Token "=" _ : Token name pos : xs) =
    FoundThing name KindConstructor pos : getcons2 xs
getcons (_ : xs) = getcons xs
getcons [] = []

getcons2 :: [Token] -> [FoundThing]
getcons2 (Token "=" _ : _) = []
getcons2 (Token "|" _ : Token name pos : xs) =
    FoundThing name KindConstructor pos : getcons2 xs
getcons2 (_:xs) = getcons2 xs
getcons2 [] = []

