{-# LANGUAGE ScopedTypeVariables #-}
-- RecordWildCards, TypeSynonymInstances, CPP
-- {-# LANGUAGE NamedFieldPuns #-}
-- {-# LANGUAGE OverloadedStrings #-}
-- {-# OPTIONS_GHC -fwarn-incomplete-patterns #-}
-- {-# OPTIONS_GHC -fwarn-unused-imports #-}

-- | This module contains misc bits used by (multiple) other modules.

module Bio.Phylogeny.PhyBin.Util
       ( 
         is_regular_file, acquireTreeFiles,
         safePrintDendro, sanityCheck
       )
       where

import qualified Data.Foldable as F
import           Data.Function       (on)
import           Data.List           (delete, minimumBy, sortBy, insertBy, intersperse, sort)
import           Data.Maybe          (fromMaybe, catMaybes)
import qualified Data.Map                   as M
import qualified Data.Set                   as S
import qualified Data.Text.Lazy             as T
import           Control.Monad       (forM, forM_, filterM, when, unless)
import           Control.Exception   (evaluate)
import           Control.Applicative ((<$>),(<*>))
import           Control.Concurrent  (Chan)
import           System.FilePath     (combine)
import           System.Directory    (doesFileExist, doesDirectoryExist,
                                      getDirectoryContents, getCurrentDirectory)
import           System.IO           (openFile, hClose, IOMode(ReadMode), stderr,
                                      hPutStr, hPutStrLn)
import           System.Exit         (ExitCode(..))
import           System.Timeout      (timeout)
import           Test.HUnit          ((~:),(~=?),Test,test)

-- For vizualization:
import           Text.PrettyPrint.HughesPJClass hiding (char, Style)
import           Bio.Phylogeny.PhyBin.CoreTypes
-- import           Bio.Phylogeny.PhyBin.Parser (parseNewick)
-- import           Bio.Phylogeny.PhyBin.Visualize (dotToPDF, dotNewickTree, viewNewickTree)

import qualified Data.Clustering.Hierarchical as C
import qualified Data.Graph.Inductive as G
import qualified Data.GraphViz        as Gv
import           Data.GraphViz.Printing (renderDot)
import  Data.GraphViz.Types.Canonical (nodeStmts, graphStatements)

----------------------------------------------------------------------------------------------------
-- OS specific bits:
----------------------------------------------------------------------------------------------------
-- #ifdef WIN32
-- is_regular_file = undefined
-- is_directory path = 
--   getFileAttributes
-- --getFileInformationByHandle
-- --    bhfiFileAttributes
-- file_exists = undefined
-- #else
-- is_regular_file :: FilePath -> IO Bool
-- is_regular_file file = 
--   do stat <- getFileStatus file; 
--      -- Hmm, this is probably bad practice... hard to know its exhaustive:
--      return$ isRegularFile stat || isNamedPipe stat || isSymbolicLink stat
-- is_directory :: FilePath -> IO Bool
-- is_directory path = 
--   do stat <- getFileStatus path
--      return (isDirectory stat)
-- file_exists = fileExist
-- #endif

-- Here we ASSUME it exists, then these functions are good enough:
is_regular_file :: FilePath -> IO Bool
is_regular_file = doesFileExist

is_directory :: FilePath -> IO Bool
is_directory = doesDirectoryExist 

file_exists :: FilePath -> IO Bool
file_exists path = 
  do f <- doesFileExist path
     d <- doesDirectoryExist path
     return (f || d)

--------------------------------------------------------------------------------

-- | Expand out directories to find all the tree files.
acquireTreeFiles :: [String] -> IO [String]
acquireTreeFiles inputs = do 
    all :: [[String]] <- forM inputs $ \ path -> do
      exists <- file_exists path 

      --stat   <- if exists then getFileStatus path else return (error "internal invariant")
      -- [2010.09.23] This is no longer really necessary:
      if not exists then do
        error$ "No file or directory found at this path!: "++path
	 -- hPutStr stderr$ "Input not a file/directory, assuming wildcard, using 'find' for expansion"
	 -- entries <- HSH.run$ "find " ++ path	 
	 -- hPutStrLn stderr$ "("++show (length entries)++" files found):  "++ show path
	 -- return entries
       else do
	 isdir <- is_directory path
	 reg  <- is_regular_file path
	 if isdir then do 
	    hPutStr stderr$ "Input is a directory, reading all regular files contained "
	    children <- getDirectoryContents path
	    filtered <- filterM is_regular_file $ map (combine path) children
	    hPutStrLn stderr$ "("++show (length filtered)++" regular files found):  "++ show path
	    return$ filtered
          else if reg then do 
	    return [path]
	  else error$ "phybin: Unhandled input path: " ++ path

    return (concat all)

--------------------------------------------------------------------------------

-- | Step carefully in case of cycles (argh).
safePrintDendro :: Gv.DotGraph G.Node -> IO (Maybe T.Text)
safePrintDendro dotg= do 
--  putStrLn$ "Dendrogram graph size: "++ show (F.foldl' (\a _ -> a+1) 0 dotg)
  mx <- timeout (2 * 1000 * 1000) $ do
--        putStrLn$ "Dendrogram graph, is directed?: "++ show (Gv.directedGraph dotg)
        putStrLn$ "Dendrogram graph size: "++ show (length $ nodeStmts $ graphStatements dotg)
        let str = renderDot $ Gv.toDot dotg
        evaluate (T.length str)
        return str
  case mx of
    Nothing -> do putStrLn "WARNING: DotGraph appears to be a cyclic structure.  This is probably a bug."
                  return Nothing
    _ -> return mx

sanityCheck :: C.Dendrogram (FullTree DefDecor) -> IO ()
sanityCheck dendro = do 
  let fn seen elm | S.member (treename elm) seen =
                       error$"Dendrogram failed sanity check!  Tree name occurs multiple times: "++(treename elm)
                  | otherwise = S.insert (treename elm) seen
      sz = S.size $ F.foldl' fn S.empty dendro
  putStrLn$ "Sanity checked dendrogram of size: "++show sz