module Bio.Phylogeny.PhyBin.CoreTypes
(
NewickTree(..),
DefDecor, StandardDecor(..), AnnotatedTree, FullTree(..),
ClustMode(..), TreeName, NumTaxa(..),
displayDefaultTree, displayStrippedTree,
treeSize, numLeaves, liftFT,
get_dec, set_dec, get_children,
map_labels, all_labels, foldIsomorphicTrees,
avg_branchlen, get_bootstraps,
PhyBinConfig(..), default_phybin_config,
WhichRFMode(..),
Label, LabelTable,
HasBranchLen(..)
)
where
import qualified Data.Map as M
import qualified Data.Set as S
import Data.Foldable (Foldable(..))
import Data.Maybe (maybeToList)
import Data.Monoid (mappend, mconcat)
import Text.PrettyPrint.HughesPJClass hiding (char, Style)
import qualified Data.Clustering.Hierarchical as C
#define NO_ATOMS
#ifndef NO_ATOMS
import StringTable.Atom
#endif
type BranchLen = Double
data NewickTree a =
NTLeaf a !Label
| NTInterior a [NewickTree a]
deriving (Show, Eq, Ord)
instance Functor NewickTree where
fmap fn (NTLeaf dec x) = NTLeaf (fn dec) x
fmap fn (NTInterior dec ls) = NTInterior (fn dec) (map (fmap fn) ls)
instance Foldable NewickTree where
foldMap f (NTLeaf dec x) = f dec
foldMap f (NTInterior dec ls) = mappend (f dec) $
mconcat (map (foldMap f) ls)
instance Foldable FullTree where
foldMap f (FullTree _ _ tr) = foldMap f tr
instance Functor FullTree where
fmap f (FullTree n l tr) = FullTree n l $ fmap f tr
instance Pretty dec => Pretty (NewickTree dec) where
pPrint (NTLeaf dec name) = "NTLeaf" <+> pPrint dec <+> text (show name)
pPrint (NTInterior dec ls) = "NTInterior" <+> pPrint dec <+> pPrint ls
instance Pretty a => Pretty (FullTree a) where
pPrint (FullTree name mp tr) =
"FullTree " <+> text name <+> loop tr
where
loop (NTLeaf dec ind) = "NTLeaf" <+> pPrint dec <+> text (mp M.! ind)
loop (NTInterior dec ls) = "NTInterior" <+> pPrint dec <+> pPrint ls
instance (Pretty k, Pretty v) => Pretty (M.Map k v) where
pPrint mp = pPrint (M.toList mp)
displayDefaultTree :: FullTree DefDecor -> Doc
displayDefaultTree orig = loop tr <> ";"
where
(FullTree _ mp tr) = orig
loop (NTLeaf (Nothing,_) name) = text (mp M.! name)
loop (NTLeaf _ _) = error "WEIRD -- why did a leaf node have a bootstrap value?"
loop (NTInterior (bootstrap,_) ls) =
case bootstrap of
Nothing -> base
Just val -> base <> text ":[" <> text (show val) <> text "]"
where base = parens$ sep$ map_but_last (<>text",") $ map loop ls
displayStrippedTree :: FullTree a -> Doc
displayStrippedTree orig = loop tr <> ";"
where
(FullTree _ mp tr) = orig
loop (NTLeaf _ name) = text (mp M.! name)
loop (NTInterior _ ls) = parens$ sep$ map_but_last (<>text",") $ map loop ls
type Label = Int
type LabelTable = M.Map Label String
type DefDecor = (Maybe Int, BranchLen)
type AnnotatedTree = NewickTree StandardDecor
data StandardDecor = StandardDecor {
branchLen :: BranchLen,
bootStrap :: Maybe Int,
subtreeWeight :: Int,
sortedLabels :: [Label]
}
deriving (Show,Read,Eq,Ord)
class HasBranchLen a where
getBranchLen :: a -> BranchLen
instance HasBranchLen StandardDecor where
getBranchLen = branchLen
instance HasBranchLen DefDecor where
getBranchLen = snd
data FullTree a =
FullTree { treename :: TreeName
, labelTable :: LabelTable
, nwtree :: NewickTree a
}
deriving (Show, Ord, Eq)
liftFT :: (NewickTree t -> NewickTree a) -> FullTree t -> FullTree a
liftFT fn (FullTree nm labs x) = FullTree nm labs (fn x)
type TreeName = String
instance Pretty StandardDecor where
pPrint (StandardDecor bl bs wt ls) = parens$
"StandardDecor" <+> hsep [pPrint bl, pPrint bs
]
data PhyBinConfig =
PBC { verbose :: Bool
, num_taxa :: NumTaxa
, name_hack :: String -> String
, output_dir :: String
, inputs :: [String]
, do_graph :: Bool
, do_draw :: Bool
, clust_mode :: ClustMode
, highlights :: [FilePath]
, show_trees_in_dendro :: Bool
, show_interior_consensus :: Bool
, rfmode :: WhichRFMode
, preprune_labels :: Maybe [String]
, print_rfmatrix :: Bool
, dist_thresh :: Maybe Int
, branch_collapse_thresh :: Maybe Double
, bootstrap_collapse_thresh :: Maybe Int
}
data WhichRFMode = HashRF | TolerantNaive
deriving (Show, Eq, Ord)
data NumTaxa = Expected Int
| Unknown
| Variable
deriving (Show, Read, Eq)
default_phybin_config :: PhyBinConfig
default_phybin_config =
PBC { verbose = False
, num_taxa = Unknown
, name_hack = id
, output_dir = "./phybin_out/"
, inputs = []
, do_graph = False
, do_draw = False
, clust_mode = ClusterThem C.UPGMA
, rfmode = HashRF
, preprune_labels = Nothing
, highlights = []
, show_trees_in_dendro = False
, show_interior_consensus = False
, print_rfmatrix = False
, dist_thresh = Nothing
, branch_collapse_thresh = Nothing
, bootstrap_collapse_thresh = Nothing
}
data ClustMode = BinThem | ClusterThem { linkage :: C.Linkage }
treeSize :: NewickTree a -> Int
treeSize (NTLeaf _ _) = 1
treeSize (NTInterior _ ls) = 1 + sum (map treeSize ls)
numLeaves :: NewickTree a -> Int
numLeaves (NTLeaf _ _) = 1
numLeaves (NTInterior _ ls) = sum (map numLeaves ls)
map_but_last :: (a -> a) -> [a] -> [a]
map_but_last _ [] = []
map_but_last _ [h] = [h]
map_but_last fn (h:t) = fn h : map_but_last fn t
get_dec :: NewickTree t -> t
get_dec (NTLeaf dec _) = dec
get_dec (NTInterior dec _) = dec
set_dec :: b -> NewickTree a -> NewickTree b
set_dec d = fmap (const d)
get_children :: NewickTree t -> [NewickTree t]
get_children (NTLeaf _ _) = []
get_children (NTInterior _ ls) = ls
avg_branchlen :: HasBranchLen a => [NewickTree a] -> Double
avg_branchlen origls = fst total / snd total
where
total = sum_ls $ map sum_tree origls
sum_ls ls = (sum$ map fst ls, sum$ map snd ls)
sum_tree (NTLeaf dec _) | getBranchLen dec == 0 = (0,0)
| otherwise = (abs (getBranchLen dec),1)
sum_tree (NTInterior dec ls) =
let branchLen = getBranchLen dec
(x,y) = sum_ls$ map sum_tree ls in
if branchLen == 0 then (x, y) else ((abs branchLen) + x, 1+y)
get_bootstraps :: NewickTree StandardDecor -> [Int]
get_bootstraps (NTLeaf (StandardDecor{bootStrap}) _) = maybeToList bootStrap
get_bootstraps (NTInterior (StandardDecor{bootStrap}) ls) =
maybeToList bootStrap ++ concatMap get_bootstraps ls
map_labels :: (Label -> Label) -> NewickTree a -> NewickTree a
map_labels fn (NTLeaf dec lbl) = NTLeaf dec $ fn lbl
map_labels fn (NTInterior dec ls) = NTInterior dec$ map (map_labels fn) ls
all_labels :: NewickTree t -> [Label]
all_labels (NTLeaf _ lbl) = [lbl]
all_labels (NTInterior _ ls) = concat$ map all_labels ls
foldIsomorphicTrees :: ([a] -> b) -> [NewickTree a] -> NewickTree b
foldIsomorphicTrees _ [] = error "foldIsomorphicTrees: empty list of input trees"
foldIsomorphicTrees fn ls@(hd:_) = fmap fn horiztrees
where
horiztrees = Prelude.foldr consTrees (fmap (const []) hd) ls
consTrees a b = case (a,b) of
(NTLeaf dec nm1, NTLeaf decls nm2) | nm1 /= nm2 -> error$"foldIsomorphicTrees: mismatched names: "++show (nm1,nm2)
| otherwise ->
NTLeaf (dec : decls) nm1
(NTInterior dec ls1, NTInterior decls ls2) ->
NTInterior (dec:decls) $ zipWith consTrees ls1 ls2
_ -> error "foldIsomorphicTrees: difference in tree shapes"