module Bio.Phylogeny.PhyBin.Util
(
is_regular_file, acquireTreeFiles,
safePrintDendro, sanityCheck
)
where
import qualified Data.Foldable as F
import Data.Function (on)
import Data.List (delete, minimumBy, sortBy, insertBy, intersperse, sort)
import Data.Maybe (fromMaybe, catMaybes)
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text.Lazy as T
import Control.Monad (forM, forM_, filterM, when, unless)
import Control.Exception (evaluate)
import Control.Applicative ((<$>),(<*>))
import Control.Concurrent (Chan)
import System.FilePath (combine)
import System.Directory (doesFileExist, doesDirectoryExist,
getDirectoryContents, getCurrentDirectory)
import System.IO (openFile, hClose, IOMode(ReadMode), stderr,
hPutStr, hPutStrLn)
import System.Exit (ExitCode(..))
import System.Timeout (timeout)
import Test.HUnit ((~:),(~=?),Test,test)
import Text.PrettyPrint.HughesPJClass hiding (char, Style)
import Bio.Phylogeny.PhyBin.CoreTypes
import qualified Data.Clustering.Hierarchical as C
import qualified Data.Graph.Inductive as G
import qualified Data.GraphViz as Gv
import Data.GraphViz.Printing (renderDot)
import Data.GraphViz.Types.Canonical (nodeStmts, graphStatements)
is_regular_file :: FilePath -> IO Bool
is_regular_file = doesFileExist
is_directory :: FilePath -> IO Bool
is_directory = doesDirectoryExist
file_exists :: FilePath -> IO Bool
file_exists path =
do f <- doesFileExist path
d <- doesDirectoryExist path
return (f || d)
acquireTreeFiles :: [String] -> IO [String]
acquireTreeFiles inputs = do
all :: [[String]] <- forM inputs $ \ path -> do
exists <- file_exists path
if not exists then do
error$ "No file or directory found at this path!: "++path
else do
isdir <- is_directory path
reg <- is_regular_file path
if isdir then do
hPutStr stderr$ "Input is a directory, reading all regular files contained "
children <- getDirectoryContents path
filtered <- filterM is_regular_file $ map (combine path) children
hPutStrLn stderr$ "("++show (length filtered)++" regular files found): "++ show path
return$ filtered
else if reg then do
return [path]
else error$ "phybin: Unhandled input path: " ++ path
return (concat all)
safePrintDendro :: Gv.DotGraph G.Node -> IO (Maybe T.Text)
safePrintDendro dotg= do
mx <- timeout (2 * 1000 * 1000) $ do
putStrLn$ "Dendrogram graph size: "++ show (length $ nodeStmts $ graphStatements dotg)
let str = renderDot $ Gv.toDot dotg
evaluate (T.length str)
return str
case mx of
Nothing -> do putStrLn "WARNING: DotGraph appears to be a cyclic structure. This is probably a bug."
return Nothing
_ -> return mx
sanityCheck :: C.Dendrogram (FullTree DefDecor) -> IO ()
sanityCheck dendro = do
let fn seen elm | S.member (treename elm) seen =
error$"Dendrogram failed sanity check! Tree name occurs multiple times: "++(treename elm)
| otherwise = S.insert (treename elm) seen
sz = S.size $ F.foldl' fn S.empty dendro
putStrLn$ "Sanity checked dendrogram of size: "++show sz