module Main (main) where
import qualified Data.ByteString.Char8 as BS
import Data.Char
import Data.List
import Data.Maybe
import Control.Monad( when )

import IO
import System.Environment
import System.Console.GetOpt
import System.Exit
--import Debug.Trace


-- search for definitions of things
-- we do this by looking for the following patterns:
-- data XXX = ...      giving a datatype location
-- newtype XXX = ...   giving a newtype location
-- bla :: ...          giving a function location
--
-- by doing it this way, we avoid picking up local definitions
--              (whether this is good or not is a matter for debate)
--

-- We generate both CTAGS and ETAGS format tags files
-- The former is for use in most sensible editors, while EMACS uses ETAGS

-- alternatives: http://haskell.org/haskellwiki/Tags

{- .hs or literate .lhs haskell file?
Really not a easy question - maybe there is an answer - I don't know

.hs -> non literate haskel file
.lhs -> literate haskell file
.chs -> is this always plain?
.whatsoever -> try to get to know the answer (*)
  contains any '> ... ' line -> interpreted as literate
  else non literate

(*)  This is difficult because
 System.Log.Logger is using 
  {-
  [...]
  > module Example where
  > [...]
  -}
  module System.Log.Logger(
  so it might looks like beeing a .lhs file
  My first fix was checking for \\begin occurence (doesn't work because HUnit is using > but no \\begin)
  Further ideas: 
    * use unlit executable distributed with ghc or the like and check for errors?
      (Will this work if cpp is used as well ?)
    * Remove comments before checking for '> ..'
      does'nt work because {- -} may be unbalanced in literate comments
  So my solution is : take file extension and keep guessing code for all unkown files
-}
 

-- Reference: http://ctags.sourceforge.net/FORMAT
main :: IO ()
main = do
        progName <- getProgName
        args <- getArgs
        let usageString = "Usage: " ++ progName ++ " [OPTION...] [files...]"
        let (modes, filenames, errs) = getOpt Permute options args

        when (errs /= [] || elem Help modes || filenames == [])
             (do putStr $ unlines errs
                 putStr $ usageInfo usageString options
                 exitWith (ExitFailure 1))

        let mode = getMode (filter ( `elem` [BothTags, CTags, ETags, Append] ) modes)
            openFileMode = if elem Append modes
                           then AppendMode
                           else WriteMode
        filedata <- mapM (findthings (IgnoreCloseImpl `elem` modes)) filenames

        when (mode == CTags)
             (do ctagsfile <- getOutFile "tags" openFileMode modes
                 writectagsfile ctagsfile (ExtendedCtag `elem` modes) filedata
                 hClose ctagsfile)

        when (mode == ETags)
             (do etagsfile <- openFile "TAGS" openFileMode
                 writeetagsfile etagsfile filedata
                 hClose etagsfile)

        -- avoid problem when both is used in combination
        -- with redirection on stdout
        when (mode == BothTags)
             (do etagsfile <- getOutFile "TAGS" openFileMode modes
                 writeetagsfile etagsfile filedata
                 ctagsfile <- getOutFile "tags" openFileMode modes
                 writectagsfile ctagsfile (ExtendedCtag `elem` modes) filedata
                 hClose etagsfile
                 hClose ctagsfile)

-- | getMode takes a list of modes and extract the mode with the
--   highest precedence.  These are as follows: Both, CTags, ETags
--   The default case is Both.
getMode :: [Mode] -> Mode
getMode [] = BothTags
getMode xs = maximum xs

-- | getOutFile scan the modes searching for output redirection
--   if not found, open the file with name passed as parameter.
--   Handle special file -, which is stdout
getOutFile :: String -> IOMode -> [Mode] -> IO Handle
getOutFile _           _        ((OutRedir "-"):_) = return stdout
getOutFile _           openMode ((OutRedir f):_)   = openFile f openMode
getOutFile name        openMode (_:xs)             = getOutFile name openMode xs
getOutFile defaultName openMode []                 = openFile defaultName openMode

data Mode = ExtendedCtag
		  | IgnoreCloseImpl
          | ETags 
          | CTags 
          | BothTags 
          | Append 
          | OutRedir String
          | Help
          deriving (Ord, Eq, Show)

options :: [OptDescr Mode]
options = [ Option "c" ["ctags"]
            (NoArg CTags) "generate CTAGS file (ctags)"
          , Option "e" ["etags"]
            (NoArg ETags) "generate ETAGS file (etags)"
          , Option "b" ["both"]
            (NoArg BothTags) "generate both CTAGS and ETAGS"
          , Option "a" ["append"]
            (NoArg Append) "append to existing CTAGS and/or ETAGS file(s)"
          , Option "" ["ignore-close-implementation"]
            (NoArg IgnoreCloseImpl) "ignores found implementation if its closer than 7 lines  - so you can jump to definition in one shot"
          , Option "o" ["output"]
            (ReqArg OutRedir "") "output to given file, instead of 'tags', '-' file is stdout"
          , Option "f" ["file"]
            (ReqArg OutRedir "") "same as -o, but used as compatibility with ctags"
          , Option "x" ["extendedctag"]
            (NoArg ExtendedCtag) "Generate additional information in ctag file."
          , Option "h" ["help"] (NoArg Help) "This help"
          ]

type FileName = String

type ThingName = String

-- The position of a token or definition
data Pos = Pos
                FileName -- file name
                Int      -- line number
                Int      -- token number
                String   -- string that makes up that line
    deriving (Show, Eq)

-- A definition we have found
-- I'm not sure wether I've used the right names.. but I hope you fix it / get what I mean
data FoundThingType = FTFuncTypeDef | FTFuncImpl | FTType | FTData | FTDataGADT | FTNewtype | FTClass | FTModule | FTCons | FTOther | FTConsAccessor | FTConsGADT
  deriving Eq

instance Show FoundThingType where
  show FTFuncTypeDef = "ft"
  show FTFuncImpl = "fi"
  show FTType = "t"
  show FTData = "d"
  show FTDataGADT = "d_gadt"
  show FTNewtype = "nt"
  show FTClass = "c"
  show FTModule = "m"
  show FTCons = "cons"
  show FTConsGADT = "c_gadt"
  show FTConsAccessor = "c_a"
  show FTOther = "o"

data FoundThing = FoundThing FoundThingType ThingName Pos
        deriving (Show, Eq)

-- Data we have obtained from a file
data FileData = FileData FileName [FoundThing]

data Token = Token String Pos
            | NewLine Int -- space 8*" " = "\t"
  deriving (Eq)
instance Show Token where
  -- show (Token t (Pos _ l _ _) ) = "Token " ++ t ++ " " ++ (show l)
  show (Token t (Pos _ _l _ _) ) = " " ++ t ++ " "
  show (NewLine i) = "NewLine " ++ show i

tokenString :: Token -> String
tokenString (Token s _) = s
tokenString (NewLine _) = "\n"

isNewLine :: Maybe Int -> Token -> Bool
isNewLine Nothing (NewLine _) = True
isNewLine (Just c) (NewLine c') = c == c'
isNewLine _ _ = False

trimNewlines :: [Token] -> [Token]
trimNewlines = filter (not . isNewLine Nothing)


-- stuff for dealing with ctags output format

writectagsfile :: Handle -> Bool -> [FileData] -> IO ()
writectagsfile ctagsfile extended filedata = do
    let things = concatMap getfoundthings filedata
    when extended
         (do hPutStrLn ctagsfile "!_TAG_FILE_FORMAT\t2\t/extended format; --format=1 will not append ;\" to lines/"
             hPutStrLn ctagsfile "!_TAG_FILE_SORTED\t1\t/0=unsorted, 1=sorted, 2=foldcase/"
             hPutStrLn ctagsfile "!_TAG_PROGRAM_NAME\thasktags")
    mapM_ (hPutStrLn ctagsfile . dumpthing extended) (sortThings things)

sortThings :: [FoundThing] -> [FoundThing]
sortThings = sortBy (\(FoundThing _ a _) (FoundThing _ b _) -> compare a b)

getfoundthings :: FileData -> [FoundThing]
getfoundthings (FileData _ things) = things

-- | Dump found tag in normal or extended (read : vim like) ctag
-- line
dumpthing :: Bool -> FoundThing -> String
dumpthing False (FoundThing _ name (Pos filename line _ _)) =
    name ++ "\t" ++ filename ++ "\t" ++ show (line + 1)
dumpthing True (FoundThing kind name (Pos filename line _ lineText)) =
    name ++ "\t" ++ filename
         ++ "\t/^" ++ concatMap ctagEncode lineText
         ++ "$/;\"\t" ++ show kind
         ++ "\tline:" ++ show (line + 1)

ctagEncode :: Char -> String
ctagEncode '/' = "\\/"
ctagEncode '\\' = "\\\\"
ctagEncode a = [a]

-- stuff for dealing with etags output format

writeetagsfile :: Handle -> [FileData] -> IO ()
writeetagsfile etagsfile = mapM_ (hPutStr etagsfile . etagsDumpFileData)

etagsDumpFileData :: FileData -> String
etagsDumpFileData (FileData filename things) =
    "\x0c\n" ++ filename ++ "," ++ show thingslength ++ "\n" ++ thingsdump
    where thingsdump = concatMap etagsDumpThing things
          thingslength = length thingsdump

etagsDumpThing :: FoundThing -> String
etagsDumpThing (FoundThing _ _name (Pos _filename line token fullline)) =
        concat (take (token + 1) $ spacedwords fullline)
        ++ "\x7f" ++ show line ++ "," ++ show (line + 1) ++ "\n"


-- like "words", but keeping the whitespace, and so letting us build
-- accurate prefixes

spacedwords :: String -> [String]
spacedwords [] = []
spacedwords xs = (blanks ++ wordchars) : spacedwords rest2
    where (blanks,rest) = span isSpace xs
          (wordchars,rest2) = break isSpace rest

-- Find the definitions in a file
findthings :: Bool -> FileName -> IO FileData
findthings ignoreCloseImpl filename = do
        aslines <- fmap ( lines . evaluate . BS.unpack) $ BS.readFile filename

        let stripNonHaskellLines = let
                  emptyLine = all (all isSpace . tokenString)
                            . filter (not . isNewLine Nothing)
                  cppLine (_nl:t:_) = ("#" `isPrefixOf`) $ tokenString t
                  cppLine _ = False
                in filter (not . emptyLine) . filter (not . cppLine)

        --  remove -- comments, then break each line into tokens (adding line numbers)
        --  then remove {- -} comments
        --  split by lines again ( to get indent
        let (fileLines, numbers) = unzip . fromLiterate filename $ zip aslines [0..]
        let tokenLines =
                      stripNonHaskellLines
                      $ stripslcomments
                      $ splitByNL Nothing
                      $ stripblockcomments
                      $ concat
                      $ zipWith3 (withline filename)
                                 (map ( filter (not . all isSpace) . mywords) fileLines)
                                 fileLines
                                 numbers


        -- TODO  ($defines / empty lines etc)
        -- separate by top level declarations (everything starting with the
        -- same topmost indentation is what I call section here)
        -- so that z in
        -- let x = 7
        --     z = 20
        -- won't be found as function 
        let sections = map tail -- strip leading NL (no longer needed 
                       $ filter (not . null)
                       $ splitByNL (Just (getTopLevelIndent tokenLines) )
                       $ concat tokenLines
        -- only take one of
        -- a 'x' = 7
        -- a _ = 0
        let filterAdjacentFuncImpl = nubBy (\(FoundThing t1 n1 (Pos f1 _ _ _)) 
                                             (FoundThing t2 n2 (Pos f2 _ _ _))
                                             -> f1 == f2 && n1 == n2 && t1 == FTFuncImpl && t2 == FTFuncImpl )

        let iCI = if ignoreCloseImpl 
              then nubBy (\(FoundThing _ n1 (Pos f1 l1 _ _)) 
                         (FoundThing _ n2 (Pos f2 l2 _ _))
                         -> f1 == f2 && n1 == n2  && ( ( <= 7 ) $ abs $ l2 - l1))
              else id
        return $ FileData filename $ iCI $ filterAdjacentFuncImpl $ concatMap findstuff sections

  where 
        evaluate :: String -> String        
        evaluate [] = []
        evaluate (c:cs) = c `seq` c:evaluate cs
	-- my words is mainly copied from Data.List.
	-- difference abc::def is recognized as three words
        -- `abc` is recognized as "`" "abc" "`"
	mywords :: String -> [String]
	mywords ('{':xs) = "{" : mywords xs
	mywords ('(':xs) = "(" : mywords xs
	mywords ('`':xs) = "`" : mywords xs
	mywords ('=':'>':xs) = "=>" : mywords xs
	mywords ('=':xs) = "=" : mywords xs
	mywords (',':xs) = "," : mywords xs
	mywords (':':':':xs) = "::" : mywords xs
	mywords s	=  case dropWhile {-partain:Char.-}isSpace s of
                                ')':xs -> ")" : mywords xs
				"" -> []
				s' -> w : mywords s''
				      where (w, s'') = myBreak s'
					    myBreak [] = ([],[])
					    myBreak (':':':':xs) = ([], "::"++xs)
					    myBreak (')':xs) = ([],')':xs)
                                            myBreak ('(':xs) = ([],'(':xs)
					    myBreak ('`':xs) = ([],'`':xs)
					    myBreak ('=':xs) = ([],'=':xs)
					    myBreak (',':xs) = ([],',':xs)
					    myBreak (' ':xs) = ([],xs);
					    myBreak (x:xs) = let (a,b) = myBreak xs 
							     in  (x:a,b)
	
-- Create tokens from words, by recording their line number
-- and which token they are through that line

withline :: FileName -> [String] -> String -> Int -> [Token]
withline filename sourceWords fullline i =
  let countSpaces (' ':xs) = 1 + countSpaces xs
      countSpaces ('\t':xs) = 8 + countSpaces xs
      countSpaces _ = 0
  in NewLine (countSpaces fullline)
      : zipWith (\w t -> Token w (Pos filename i t fullline)) sourceWords [1 ..]

-- comments stripping

stripslcomments :: [[Token]] -> [[Token]]
stripslcomments = let f ((NewLine _):(Token "--" _):_) = False
                      f _ = True
                  in filter f

stripblockcomments :: [Token] -> [Token]
stripblockcomments ((Token "\\end{code}" _):xs) = afterlitend xs
stripblockcomments ((Token "{-" _):xs) = afterblockcomend xs
stripblockcomments (x:xs) = x:stripblockcomments xs
stripblockcomments [] = []

afterlitend :: [Token] -> [Token]
afterlitend (Token "\\begin{code}" _ : xs) = xs
afterlitend (_ : xs) = afterlitend xs
afterlitend [] = []

afterblockcomend :: [Token] -> [Token]
afterblockcomend (t:xs)
 | contains "-}" (tokenString t) = xs
 | otherwise           = afterblockcomend xs
afterblockcomend [] = []


-- does one string contain another string

contains :: Eq a => [a] -> [a] -> Bool
contains sub = any (isPrefixOf sub) . tails

-- actually pick up definitions

findstuff :: [Token] -> [FoundThing]
findstuff ((Token "module" _):(Token name pos):_) =
        [FoundThing FTModule name pos] -- nothing will follow this section
findstuff ((Token "data" _):(Token name pos):xs)
        | any ( (== "where"). tokenString ) xs -- GADT 
            -- TODO will be found as FTCons (not FTConsGADT), the same for functions - but they are found :) 
            = FoundThing FTDataGADT name pos : getcons2 xs ++ fromWhereOn xs -- ++ (findstuff xs)
        | otherwise = FoundThing FTData name pos : getcons FTData (trimNewlines xs)-- ++ (findstuff xs)
findstuff ((Token "newtype" _):ts@(((Token name pos)):_)) =
        FoundThing FTNewtype name pos : getcons FTCons (trimNewlines ts)-- ++ (findstuff xs)
        -- FoundThing FTNewtype name pos : findstuff xs
findstuff ((Token "type" _):(Token name pos):xs) =
        FoundThing FTType name pos : findstuff xs
findstuff ((Token "class" _):xs) = case break ((== "where").tokenString) xs of
        (ys,[]) -> maybeToList $ className ys
        (_,r) -> maybe [] (:fromWhereOn r) $ className xs
    where isParenOpen (Token "(" _) = True
          isParenOpen _ = False
          className lst = case (head . dropWhile isParenOpen . reverse . takeWhile ((/= "=>").tokenString) . reverse) lst of
            (Token name p) -> Just $ FoundThing FTClass name p
            _ -> Nothing
findstuff xs = findFunc xs ++ findFuncTypeDefs [] xs

findFuncTypeDefs :: [Token] -> [Token] -> [FoundThing]
findFuncTypeDefs found (t@(Token _ _): Token "," _ :xs) =
          findFuncTypeDefs (t : found) xs
findFuncTypeDefs found (t@(Token _ _): Token "::" _ :_) =
          map (\(Token name p) -> FoundThing FTFuncTypeDef name p) (t:found)
findFuncTypeDefs found (Token "(" _ :xs) =
          case break myBreakF xs of
            (inner@((Token _ p):_), _:xs') ->
              let merged = Token ( concatMap (\(Token x _) -> x) inner ) p
              in findFuncTypeDefs found $ merged : xs'
            _ -> []
    where myBreakF (Token ")" _) = True
          myBreakF _ = False          
findFuncTypeDefs _ _ = []

fromWhereOn :: [Token] -> [FoundThing]
fromWhereOn [] = []
fromWhereOn [_] = []
fromWhereOn (_: xs@((NewLine _):_)) =
             concatMap (findstuff . tail')
             $ splitByNL (Just ( minimum
                                . (10000:)
                                . map (\(NewLine i) -> i)
                                . filter (isNewLine Nothing) $ xs)) xs
fromWhereOn (_:xw) = findstuff xw

findFunc :: [Token] -> [FoundThing]
findFunc x = case findInfix x of
    a@(_:_) -> a
    _ -> findF x

findInfix :: [Token] -> [FoundThing]
findInfix x = case dropWhile ((/= "`"). tokenString) (takeWhile ( (/= "=") . tokenString) x) of
          _:(Token name p):_ -> [FoundThing FTFuncImpl name p]
          _ -> []


findF :: [Token] -> [FoundThing]
findF ((Token name p):xs) =
    [FoundThing FTFuncImpl name p | any (("=" ==) . tokenString) xs]
findF _ = []

tail' :: [a] -> [a]
tail' (_:xs) = xs
tail' [] = []

-- get the constructor definitions, knowing that a datatype has just started

getcons :: FoundThingType -> [Token] -> [FoundThing]
getcons ftt ((Token "=" _):(Token name pos):xs) =
        FoundThing ftt name pos : getcons2 xs
getcons ftt (_:xs) = getcons ftt xs
getcons _ [] = []


getcons2 :: [Token] -> [FoundThing]
getcons2 ((Token name pos):(Token "::" _):xs) =
        FoundThing FTConsAccessor name pos : getcons2 xs
getcons2 ((Token "=" _):_) = []
getcons2 ((Token "|" _):(Token name pos):xs) =
        FoundThing FTCons name pos : getcons2 xs
getcons2 (_:xs) = getcons2 xs
getcons2 [] = []


splitByNL :: (Maybe Int) -> [Token] -> [[Token]]
splitByNL maybeIndent (nl@(NewLine _):ts) =
  let (a,b) = break (isNewLine maybeIndent) ts
  in (nl : a) : splitByNL maybeIndent b
splitByNL _ _ = []

getTopLevelIndent :: [[Token]] -> Int
getTopLevelIndent [] = 0 -- (no import found , assuming indent 0 : this can be
                         -- done better but should suffice for most needs
getTopLevelIndent (x:xs) = if any ((=="import") . tokenString) x
                          then let ((NewLine i):_) = x in i
                          else getTopLevelIndent xs

-- removes literate stuff if any line '> ... ' is found and any word is \begin (hglogger has ^> in it's commetns)
fromLiterate :: FilePath -> [(String, Int)] -> [(String, Int)]
fromLiterate file lines = 
  let literate = [ (ls, n) |  ('>':ls, n) <- lines ]
  in if ".lhs" `isSuffixOf` file && (not . null $ literate) then literate -- not . null literate because of Repair.lhs of darcs 
      else if (".hs" `isSuffixOf` file)
            || (null literate || not ( any ( any ("\\begin" `isPrefixOf`). words . fst) lines))
        then lines
        else literate

{- testcase:

checkToBeFound(){
  toBeFound=$(sed -n 's/-- to be found\s*//p' testcase.hs)
  for i in $toBeFound; do
    grep -l $i tags 2>&1 > /dev/null || echo "tag $i was not found"
  done
  echo -n "to be found ocunt: "
  echo "$toBeFound" | wc -l
}

-- to be found A.B.testcase
module A.B.testcase(module System.FilePath.Windows) where
    import asdf

-- to be found Request
-- to be found Request2
-- to be found rqBody
-- to be found rqMethod
-- to be found rqPeer
-- to be found Request3
    data Request = Request2 { rqMethod::Method,
                             rqBody    :: RqBody,
                             rqPeer    :: Host
                           }
                  | Request3
deriving(Show,Read,Typeable)
    --  http://hackage.haskell.org/trac/ghc/ticket/1184
    -- ! Convert Bool into another monad
-- to be found boolM
    boolM False = mzero

-- to be found sadlkfj
    sadlkfj
     = 7

-- to be found onlyTheFirstOne
    onlyTheFirstOne (x:xs) = 8
    onlyTheFirstOne [] = 8
-- to be found AC
    AC a b c d e f g = 7
-- to be found abc
    abc = let a = 7
              b = 8
              in a + b
            where x = 34
                  o = 423
-- to be found BB
-- to be found AA
    AA, BB :: Int


-- to be found foo
    ad `foo` oh = 90

-- to be found X
-- to be found xyz
    class (A a) => X a where
      xyz :: dummy
-- to be found Z
-- to be found o
    class (A a) => Z a where o :: Int

-- to be found ABC
    newtype ABC = Int
-- to be found DBM
    newtype IE.ISession sess => DBM mark sess a = DBM (ReaderT sess IO a)
-- to be found SAA
newtype Symbol = SAA String

-- TODO 

-- to be found =~
(=~)   :: (Regex rho) => String -> rho -> Bool
-}