module Bio.Phylogeny.PhyBin.Visualize
(dotNewickTree, dotToPDF, viewNewickTree,
dotNewickTree_debug,
dendrogramToGraph, dotDendrogram
)
where
import Text.Printf (printf)
import Data.List (elemIndex, isPrefixOf)
import Data.List.Split (chunksOf)
import Data.Maybe (fromJust, isJust)
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Vector as V
import Data.Text.Lazy (pack)
import Control.Monad (void)
import Control.Concurrent (Chan, newChan, writeChan, forkIO)
import qualified Data.Graph.Inductive as G hiding (run)
import qualified Data.GraphViz as Gv hiding (parse, toLabel)
import qualified Data.GraphViz.Attributes.Complete as GA
import qualified Data.GraphViz.Attributes.Colors as GC
import Data.GraphViz.Attributes.Colors (Color(RGB))
import System.Timeout (timeout)
import qualified Data.Clustering.Hierarchical as C
import Bio.Phylogeny.PhyBin.CoreTypes
import Bio.Phylogeny.PhyBin.RFDistance (filterCompatible, compatibleWith, consensusTree)
import Debug.Trace
toGraph :: FullTree StandardDecor -> G.Gr String Double
toGraph (FullTree _ tbl tree) = G.run_ G.empty $ loop tree
where
fromLabel ix = tbl M.! ix
loop (NTLeaf _ name) =
do let str = fromLabel name
_ <- G.insMapNodeM str
return str
loop (NTInterior (StandardDecor{sortedLabels}) ls) =
do let bigname = concatMap fromLabel sortedLabels
names <- mapM loop ls
_ <- G.insMapNodeM bigname
mapM_ (\x -> G.insMapEdgeM (bigname, x, 0.0)) names
return bigname
toGraph2 :: FullTree StandardDecor -> G.Gr (NewickTree StandardDecor) Double
toGraph2 (FullTree _ tbl tree) = G.run_ G.empty $ loop tree
where
loop node@(NTLeaf _ _) =
do _ <- G.insMapNodeM node
return ()
loop node@(NTInterior _ ls) =
do mapM_ loop ls
_ <- G.insMapNodeM node
mapM_ (\x -> G.insMapEdgeM (node, x, branchLen$ get_dec x)) ls
return ()
dotDendrogram :: PhyBinConfig -> String -> Double -> C.Dendrogram (FullTree a) ->
Maybe (M.Map TreeName Int) -> [[NewickTree ()]] -> Gv.DotGraph G.Node
dotDendrogram PBC{show_trees_in_dendro, show_interior_consensus}
title edge_scale origDendro mNameMap highlightTrs =
Gv.graphToDot myparams (G.nmap uid graph)
where
(charsDropped, dendro) = truncateNames origDendro
nameMap' = fmap (M.mapKeys (drop charsDropped)) mNameMap
graph :: DendroGraph
graph = dendrogramToGraph num_taxa dendro
num_taxa = numLeaves $ nwtree fstLeaf
FullTree{labelTable} = fstLeaf
fstLeaf = getFstLeaf dendro
getFstLeaf (C.Leaf ft) = ft
getFstLeaf (C.Branch _ l _) = getFstLeaf l
uidsToNames = M.fromList $
map (\nd@NdLabel{uid} -> (uid,nd)) $
map (fromJust . G.lab graph) $
G.nodes graph
matchers = map mkMatcher highlightTrs
mkMatcher ls = let fns = map compatibleWith ls
in \ tr ->
or$ map (\f -> f tr) fns
wcolors = zip matchers defaultPalette
findColor tr = loop wcolors
where loop [] = Nothing
loop ((f,col):rst) | f tr = Just col
| otherwise = loop rst
highlightMap = M.map fromJust $
M.filter isJust $
M.map (\ (NdLabel _ _ _ ct) -> findColor ct)
uidsToNames
myparams :: Gv.GraphvizParams G.Node String Double () String
myparams = Gv.defaultParams { Gv.globalAttributes= [Gv.GraphAttrs [GA.Label$ GA.StrLabel$ pack title]],
Gv.fmtNode= nodeAttrs,
Gv.fmtEdge= edgeAttrs
}
nodeAttrs :: (Int, UniqueNodeName) -> [GA.Attribute]
nodeAttrs (_num, uid) =
let uid' = if show_trees_in_dendro
then printed_tree++"\n"++uid
else uid
printed_tree =
case M.lookup uid uidsToNames of
Nothing -> ""
Just NdLabel{clumpSize,consensus} ->
(if clumpSize>1 then "size "++show clumpSize++"\n" else "")
++ show (displayStrippedTree (FullTree "" labelTable consensus))
(tag,shp,styl) =
if isPrefixOf "DUMMY_" uid
then (if show_trees_in_dendro && show_interior_consensus
then printed_tree else "",
if show_interior_consensus
then GA.BoxShape
else GA.PointShape,
[ GA.Color [weighted$ GA.X11Color Gv.Transparent] ]
)
else (uid', GA.Ellipse, [ GA.Style [GA.SItem GA.Filled []]])
highlightColor =
case M.lookup uid highlightMap of
Nothing -> []
Just col -> [ GA.Color [weighted col ] ]
clustColor | not (null highlightTrs) = []
| otherwise =
case (nameMap', M.lookup uid uidsToNames) of
(Just nm, Just NdLabel{tre=Just FullTree{treename}}) ->
case M.lookup treename nm of
Nothing -> []
Just ind | ind <= 10 -> [ GA.Color [weighted$ defaultPaletteV V.! (ind1) ] ]
| otherwise -> []
_ -> []
in
[ GA.Label$ GA.StrLabel$ pack tag
, GA.Shape shp
] ++ styl ++ highlightColor ++ clustColor
edgeAttrs = getEdgeAttrs edge_scale
type UniqueNodeName = String
type DendroGraph = G.Gr NdLabel Double
data NdLabel =
NdLabel
{ uid :: UniqueNodeName
, tre :: Maybe (FullTree ())
, clumpSize :: !Int
, consensus :: NewickTree ()
}
deriving (Show, Ord, Eq)
dendrogramToGraph :: Int -> C.Dendrogram (FullTree a) -> DendroGraph
dendrogramToGraph num_taxa orig =
G.run_ G.empty $ void$
loop (fmap (fmap (const())) orig)
where
loop node@(C.Leaf ft) =
let stripped = fmap (const()) ft in
G.insMapNodeM (NdLabel (treename ft) (Just stripped) 1 (nwtree stripped))
loop node@(C.Branch 0 left right) = do
let lvs = collapseZeroes left ++ collapseZeroes right
nms = map (treename) lvs
lens = map length nms
total = sum lens
avg = total `quot` length nms
perline = ceiling$ sqrt (fromIntegral total / ((fromIntegral avg)^2))
chunked = chunksOf perline nms
fatname = unlines (map unwords chunked)
G.insMapNodeM (NdLabel fatname (Just (head lvs))
(length lvs) (consensusTree num_taxa (map nwtree lvs)))
loop node@(C.Branch dist left right) =
do (_,ll@(NdLabel lid _ s1 c1)) <- loop left
(_,rr@(NdLabel rid _ s2 c2)) <- loop right
let ndname = "DUMMY_"++(lid++"_"++rid)
(midN,mid) <- G.insMapNodeM (NdLabel ndname Nothing (s1+s2) (consensusTree num_taxa [c1,c2]))
G.insMapEdgeM (ll, mid, dist)
G.insMapEdgeM (rr, mid, dist)
return (midN,mid)
collapseZeroes (C.Leaf tr) = [tr]
collapseZeroes (C.Branch 0 l r) = collapseZeroes l ++ collapseZeroes r
collapseZeroes oth = error "dendrogramToGraph: internal error. Not expecting non-zero branch length here."
truncateNames :: C.Dendrogram (FullTree a) -> (Int, C.Dendrogram (FullTree a))
truncateNames dendro = (prefChars, fmap chopName dendro)
where
chopName ft = ft{ treename= drop prefChars (treename ft) }
prefChars = length$ commonPrefix$ S.toList$ allNames dendro
allNames (C.Leaf tr) = S.singleton (treename tr)
allNames (C.Branch _ l r) = S.union (allNames l) (allNames r)
viewNewickTree :: String -> FullTree StandardDecor -> IO (Chan (), FullTree StandardDecor)
viewNewickTree title tree@(FullTree{nwtree}) =
do chan <- newChan
let dot = dotNewickTree title (1.0 / avg_branchlen [nwtree])
tree
runit = do mx <-
Gv.runGraphvizCanvas default_cmd dot Gv.Xlib
writeChan chan ()
_ <- forkIO runit
return (chan, tree)
default_cmd :: Gv.GraphvizCommand
default_cmd = Gv.Neato
myShowFloat :: Double -> String
myShowFloat fl =
let rnd = round fl in
if fl == fromIntegral rnd
then show rnd
else printf "%.4f" fl
dotToPDF :: Gv.DotGraph G.Node -> FilePath -> IO (Maybe FilePath)
dotToPDF dot file = do
x <- timeout defaultTimeout $
Gv.runGraphvizCommand default_cmd dot Gv.Pdf file
case x of
Nothing -> do putStrLn$ "WARNING: call to graphviz TIMED OUT. File not plotted: "++file
return Nothing
_ -> return x
defaultTimeout :: Int
defaultTimeout = (15 * 1000 * 1000)
dotNewickTree :: String -> Double -> FullTree StandardDecor -> Gv.DotGraph G.Node
dotNewickTree title edge_scale atree@(FullTree _ tbl tree) =
Gv.graphToDot myparams graph
where
graph = toGraph2 atree
fromLabel ix = tbl M.! ix
myparams :: Gv.GraphvizParams G.Node (NewickTree StandardDecor) Double () (NewickTree StandardDecor)
myparams = Gv.defaultParams { Gv.globalAttributes= [Gv.GraphAttrs [GA.Label$ GA.StrLabel$ pack title]],
Gv.fmtNode= nodeAttrs, Gv.fmtEdge= edgeAttrs }
nodeAttrs :: (Int,NewickTree StandardDecor) -> [GA.Attribute]
nodeAttrs (_num, node) =
let children = get_children node in
[ GA.Label$ GA.StrLabel$ pack$
concatMap fromLabel $
sortedLabels $ get_dec node
, GA.Shape (if null children then GA.Ellipse else GA.PointShape)
, GA.Style [GA.SItem GA.Filled []]
]
edgeAttrs = getEdgeAttrs edge_scale
defaultPaletteV :: V.Vector GA.Color
defaultPaletteV = V.fromList defaultPalette
defaultPalette :: [GA.Color]
defaultPalette = concat$ replicate 4 $ map GA.X11Color
[ Gv.Aquamarine
, Gv.PaleVioletRed
, Gv.MediumPurple
, Gv.PaleGreen
, Gv.PapayaWhip
, Gv.SkyBlue
, Gv.Yellow
, Gv.Crimson
, Gv.Gray
, Gv.PaleGoldenrod
]
altPalette :: V.Vector GA.Color
altPalette = V.fromList $ concat $ replicate 3 $
[ RGB 159 74 81, RGB 217 183 173, RGB 149 91 116, RGB 185 138 148
] ++
[ RGB 108 74 39, RGB 207 179 83, RGB 180 149 60, RGB 244 242 185
]
getEdgeAttrs :: Double -> (t, t1, Double) -> [GA.Attribute]
getEdgeAttrs edge_scale = edgeAttrs
where
edgeAttrs (_,_,weight) =
let draw_weight = compute_draw_weight weight edge_scale in
[GA.ArrowHead Gv.noArrow,
GA.Label$ GA.StrLabel$ pack$ myShowFloat weight] ++
if weight == 0.0
then [GA.Color [weighted$ GA.X11Color Gv.Red],
GA.LWidth 3.0, GA.Len minlen]
else [GA.Len draw_weight]
minlen = 0.7
maxlen = 3.0
compute_draw_weight w scale =
let scaled = (abs w) * scale + minlen in
(min scaled maxlen)
weighted c = GC.WC {GC.wColor=c, GC.weighting=Nothing}
dotNewickTree_debug :: String -> FullTree StandardDecor -> Gv.DotGraph G.Node
dotNewickTree_debug title atree@(FullTree _ tbl tree) = Gv.graphToDot myparams graph
where
graph = toGraph2 atree
fromLabel ix = tbl M.! ix
myparams :: Gv.GraphvizParams G.Node (NewickTree StandardDecor) Double () (NewickTree StandardDecor)
myparams = Gv.defaultParams { Gv.globalAttributes= [Gv.GraphAttrs [GA.Label$ GA.StrLabel$ pack title]],
Gv.fmtNode= nodeAttrs, Gv.fmtEdge= edgeAttrs }
nodeAttrs :: (Int,(NewickTree StandardDecor)) -> [GA.Attribute]
nodeAttrs (num,node) =
let children = get_children node in
[ GA.Label (if null children
then GA.StrLabel$ pack$ concatMap fromLabel $ sortedLabels $ get_dec node
else GA.RecordLabel$ take (length children) $
map (GA.PortName . GA.PN . pack) $ map show [1..]
)
, GA.Shape GA.Record
, GA.Style [GA.SItem GA.Filled []]
]
edgeAttrs (num1, num2, _weight) =
let node1 = fromJust$ G.lab graph num1
node2 = fromJust$ G.lab graph num2
ind = fromJust$ elemIndex node2 (get_children node1)
in [GA.TailPort$ GA.LabelledPort (GA.PN$ pack$ show$ 1+ind) (Just GA.South)]
prettyPrint' :: Show a => a -> String
prettyPrint' = show
commonPrefix :: Eq a => [[a]] -> [a]
commonPrefix [] = []
commonPrefix ls@(hd:tl)
| any null ls = []
| otherwise =
if all ((== (head hd)) . head) tl
then head hd : commonPrefix (map tail ls)
else commonPrefix (map tail ls)