-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Utility for clustering phylogenetic trees in Newick format based on Robinson-Foulds distance.
--
-- This package provides a libary and executable for dealing with Newick
-- tree files.
--
-- It can do simple binning of identical trees or more complex clustering
-- based on an all-to-all Robinson-Foulds distance matrix.
--
-- phybin produces output files that characterize the size and contents
-- of each bin or cluster (including generating GraphViz-based visual
-- representations of the tree topologies).
@package phybin
@version 0.3
module Bio.Phylogeny.PhyBin.CoreTypes
-- | Even though the Newick format allows it, here we ignore interior node
-- labels. (They are not commonly used.)
--
-- Note that these trees are rooted. The normalize function
-- ensures that a single, canonical rooted representation is chosen.
data NewickTree a
NTLeaf :: a -> {-# UNPACK #-} !Label -> NewickTree a
NTInterior :: a -> [NewickTree a] -> NewickTree a
-- | The barebones default decorator for NewickTrees contains BOOTSTRAP and
-- BRANCHLENGTH. The bootstrap values, if present, will range in [0..100]
type DefDecor = (Maybe Int, BranchLen)
-- | The standard decoration includes everything in DefDecor plus
-- some extra cached data:
--
--
-- - branch length from parent to this node (2) bootstrap values
-- for the node
-- - subtree weights for future use (defined as number of LEAVES, not
-- counting intermediate nodes) (4) sorted lists of labels for symmetry
-- breaking
--
data StandardDecor
StandardDecor :: BranchLen -> Maybe Int -> Int -> [Label] -> StandardDecor
branchLen :: StandardDecor -> BranchLen
bootStrap :: StandardDecor -> Maybe Int
subtreeWeight :: StandardDecor -> Int
sortedLabels :: StandardDecor -> [Label]
-- | Additionally includes some scratch data that is used by the binning
-- algorithm.
type AnnotatedTree = NewickTree StandardDecor
-- | A common type of tree contains the standard decorator and also a table
-- for restoring the human-readable node names.
data FullTree a
FullTree :: TreeName -> LabelTable -> NewickTree a -> FullTree a
treename :: FullTree a -> TreeName
labelTable :: FullTree a -> LabelTable
nwtree :: FullTree a -> NewickTree a
data ClustMode
BinThem :: ClustMode
ClusterThem :: Linkage -> ClustMode
linkage :: ClustMode -> Linkage
type TreeName = String
-- | How many taxa should we expect in the incoming dataset?
data NumTaxa
-- | Supplied by the user. Committed.
Expected :: Int -> NumTaxa
-- | In the future we may automatically pick a behavior. Now this one is
-- usually an error.
Unknown :: NumTaxa
-- | Explicitly ignore this setting in favor of comparing all trees (even
-- if some are missing taxa). This only works with certain modes.
Variable :: NumTaxa
-- | Display a tree WITH the bootstrap and branch lengths. This prints in
-- NEWICK format.
displayDefaultTree :: FullTree DefDecor -> Doc
-- | The same, except with no bootstrap or branch lengths. Any tree
-- annotations ignored.
displayStrippedTree :: FullTree a -> Doc
-- | How many nodes (leaves and interior) are contained in a NewickTree?
treeSize :: NewickTree a -> Int
-- | This counts only leaf nodes, which should include all taxa.
numLeaves :: NewickTree a -> Int
liftFT :: (NewickTree t -> NewickTree a) -> FullTree t -> FullTree a
get_dec :: NewickTree t -> t
set_dec :: b -> NewickTree a -> NewickTree b
get_children :: NewickTree t -> [NewickTree t]
-- | Apply a function to all the *labels* (leaf names) in a tree.
map_labels :: (Label -> Label) -> NewickTree a -> NewickTree a
-- | Return all the labels contained in the tree.
all_labels :: NewickTree t -> [Label]
-- | This function allows one to collapse multiple trees while looking only
-- at the horizontal slice of all the annotations *at a given
-- position* in the tree.
--
-- Isomorphic must apply both to the shape and the name labels or
-- it is an error to apply this function.
foldIsomorphicTrees :: ([a] -> b) -> [NewickTree a] -> NewickTree b
-- | Average branch length across all branches in all all trees.
avg_branchlen :: HasBranchLen a => [NewickTree a] -> Double
-- | Retrieve all the bootstraps values actually present in a tree.
get_bootstraps :: NewickTree StandardDecor -> [Int]
-- | Due to the number of configuration options for the driver, we pack
-- them into a record.
data PhyBinConfig
PBC :: Bool -> NumTaxa -> (String -> String) -> String -> [String] -> Bool -> Bool -> ClustMode -> [FilePath] -> Bool -> Bool -> WhichRFMode -> Maybe [String] -> Bool -> Maybe Int -> Maybe Double -> Maybe Int -> PhyBinConfig
verbose :: PhyBinConfig -> Bool
num_taxa :: PhyBinConfig -> NumTaxa
name_hack :: PhyBinConfig -> String -> String
output_dir :: PhyBinConfig -> String
inputs :: PhyBinConfig -> [String]
do_graph :: PhyBinConfig -> Bool
do_draw :: PhyBinConfig -> Bool
clust_mode :: PhyBinConfig -> ClustMode
highlights :: PhyBinConfig -> [FilePath]
show_trees_in_dendro :: PhyBinConfig -> Bool
show_interior_consensus :: PhyBinConfig -> Bool
rfmode :: PhyBinConfig -> WhichRFMode
preprune_labels :: PhyBinConfig -> Maybe [String]
print_rfmatrix :: PhyBinConfig -> Bool
dist_thresh :: PhyBinConfig -> Maybe Int
-- | Branches less than this length are collapsed.
branch_collapse_thresh :: PhyBinConfig -> Maybe Double
-- | BootStrap values less than this result in the intermediate node being
-- collapsed.
bootstrap_collapse_thresh :: PhyBinConfig -> Maybe Int
-- | The default phybin configuration.
default_phybin_config :: PhyBinConfig
-- | Supported modes for computing RFDistance.
data WhichRFMode
HashRF :: WhichRFMode
TolerantNaive :: WhichRFMode
-- | Labels are inexpensive unique integers. The table is necessary for
-- converting them back.
type Label = Int
-- | Map labels back onto meaningful names.
type LabelTable = Map Label String
class HasBranchLen a
getBranchLen :: HasBranchLen a => a -> BranchLen
instance Show a => Show (NewickTree a)
instance Eq a => Eq (NewickTree a)
instance Ord a => Ord (NewickTree a)
instance Show StandardDecor
instance Read StandardDecor
instance Eq StandardDecor
instance Ord StandardDecor
instance Show a => Show (FullTree a)
instance Ord a => Ord (FullTree a)
instance Eq a => Eq (FullTree a)
instance Show WhichRFMode
instance Eq WhichRFMode
instance Ord WhichRFMode
instance Show NumTaxa
instance Read NumTaxa
instance Eq NumTaxa
instance Pretty StandardDecor
instance HasBranchLen DefDecor
instance HasBranchLen StandardDecor
instance (Pretty k, Pretty v) => Pretty (Map k v)
instance Pretty a => Pretty (FullTree a)
instance Pretty dec => Pretty (NewickTree dec)
instance Functor FullTree
instance Foldable FullTree
instance Foldable NewickTree
instance Functor NewickTree
module Bio.Phylogeny.PhyBin.Parser
-- | This parser ASSUMES that whitespace has been prefiltered from the
-- input.
newick_parser :: NameHack -> Parser TempTree
-- | Parse a bytestring into a NewickTree with branch lengths. The first
-- argument is file from which the data came and is just for better error
-- messages.
--
-- If the single bytestring contains more than one tree, then a number is
-- appended to the tree names.
parseNewick :: LabelTable -> NameHack -> String -> ByteString -> (LabelTable, [NewickTree DefDecor])
-- | A version which takes in-memory trees as ByteStrings.
parseNewicks :: NameHack -> [(String, ByteString)] -> (LabelTable, [FullTree DefDecor])
-- | Parse a list of trees, starting with an empty map of labels and
-- accumulating a final map.
parseNewickFiles :: NameHack -> [String] -> IO (LabelTable, [FullTree DefDecor])
unitTests :: Test
-- | This is a preprocessor which can (optionally) be invoked to collapse
-- short branch lengths.
module Bio.Phylogeny.PhyBin.PreProcessor
-- | Removes branches that do not meet a predicate, leaving a shallower,
-- bushier tree. This does NOT change the set of leaves (taxa), it
-- only removes interior nodes.
--
-- `collapseBranches pred collapser tree` uses pred to test the
-- meta-data to see if collapsing the intermediate node below the branch
-- is necessary, and if it is, it uses collapser to reduce all
-- the metadata for the collapsed branches into a single piece of
-- metadata.
collapseBranches :: (a -> Bool) -> (a -> a -> a) -> NewickTree a -> NewickTree a
-- | A common configuration. Collapse branches based on a length threshold.
collapseBranchLenThresh :: Double -> NewickTree DefDecor -> NewickTree DefDecor
-- | A common configuration. Collapse branches based on bootstrap values.
collapseBranchBootStrapThresh :: Int -> NewickTree DefDecor -> NewickTree DefDecor
-- | Prune the leaves of the tree to only those leaves in the provided set.
--
-- If ALL leaves are pruned from the set, this function returns nothing.
pruneTreeLeaves :: Set Label -> NewickTree DefDecor -> Maybe (NewickTree DefDecor)
module Bio.Phylogeny.PhyBin.RFDistance
-- | Dense sets of taxa, aka Bipartitions or BiPs We assume that taxa
-- labels have been mapped onto a dense, contiguous range of integers
-- [0,N).
--
-- Rule: Bipartitions are really two disjoint sets. But as long as the
-- parent set (the union of the partitions, aka all taxa) then a
-- bipartition can be represented just by *one* subset. Yet we must
-- choose WHICH subset for consistency. We use the rule that we always
-- choose the SMALLER. Thus the DenseLabelSet should always be half the
-- size or less, compared to the total number of taxa.
--
-- A set that is more than a majority of the taxa can be normalized by
-- flipping, i.e. taking the taxa that are NOT in that set.
type DenseLabelSet = IntSet
type DistanceMatrix = Vector (Vector Int)
-- | Get all non-singleton BiPs implied by a tree.
allBips :: NewickTree a -> Set DenseLabelSet
foldBips :: Monoid m => (DenseLabelSet -> m) -> NewickTree a -> m
-- | Print a BiPartition in a pretty form
dispBip :: LabelTable -> DenseLabelSet -> String
-- | Take only the bipartitions that are agreed on by all trees.
consensusTree :: Int -> [NewickTree a] -> NewickTree ()
-- | Convert from bipartitions BACK to a single tree.
bipsToTree :: Int -> Set DenseLabelSet -> NewickTree ()
-- | Which of a set of trees are compatible with a consensus?
filterCompatible :: NewickTree a -> [NewickTree b] -> [NewickTree b]
-- | `compatibleWith consensus tree` -- Is a tree compatible with a
-- consensus? This is more efficient if partially applied then used
-- repeatedly.
--
-- Note, tree compatibility is not the same as an exact match. It's like
-- (<=) rather than (==). The star topology is consistent with
-- the all trees, because it induces the empty set of bipartitions.
compatibleWith :: NewickTree a -> NewickTree b -> Bool
mkSingleDense :: Int -> Label -> DenseLabelSet
mkEmptyDense :: Int -> DenseLabelSet
bipSize :: DenseLabelSet -> Int
denseUnions :: Int -> [DenseLabelSet] -> DenseLabelSet
denseDiff :: IntSet -> IntSet -> IntSet
-- | Assume that total taxa are 0..N-1, invert membership:
invertDense :: Int -> DenseLabelSet -> DenseLabelSet
markLabel :: Label -> DenseLabelSet -> DenseLabelSet
-- | Returns a triangular distance matrix encoded as a vector. Also return
-- the set-of-BIPs representation for each tree.
--
-- This uses a naive method, directly computing the pairwise distance
-- between each pair of trees.
--
-- This method is TOLERANT of differences in the laba/taxa sets between
-- two trees. It simply prunes to the intersection before doing the
-- distance comparison. Other scoring methods may be added in the future.
-- (For example, penalizing for missing taxa.)
naiveDistMatrix :: [NewickTree DefDecor] -> (DistanceMatrix, Vector (Set DenseLabelSet))
-- | This version slices the problem a different way. A single pass over
-- the trees populates the table of bipartitions. Then the table can be
-- processed (locally) to produce (non-localized) increments to a
-- distance matrix.
hashRF :: Int -> [NewickTree a] -> DistanceMatrix
printDistMat :: Handle -> Vector (Vector Int) -> IO ()
instance Pretty a => Pretty (Set a)
module Bio.Phylogeny.PhyBin.Visualize
-- | Convert a NewickTree to a graphviz Dot graph representation.
dotNewickTree :: String -> Double -> FullTree StandardDecor -> DotGraph Node
-- | Convert a .dot file to .pdf.
dotToPDF :: DotGraph Node -> FilePath -> IO (Maybe FilePath)
-- | Open a GUI window to displaya tree.
--
-- Fork a thread that then runs graphviz. The channel retuned will carry
-- a single message to signal completion of the subprocess.
viewNewickTree :: String -> FullTree StandardDecor -> IO (Chan (), FullTree StandardDecor)
-- | This version shows the ordered/rooted structure of the normalized
-- tree.
dotNewickTree_debug :: String -> FullTree StandardDecor -> DotGraph Node
-- | Create a graph using TreeNames for node labels and edit-distance for
-- edge labels.
dendrogramToGraph :: Int -> Dendrogram (FullTree a) -> DendroGraph
-- | Convert to a dotGraph. Some duplicated code with dotNewickTree.
dotDendrogram :: PhyBinConfig -> String -> Double -> Dendrogram (FullTree a) -> Maybe (Map TreeName Int) -> [[NewickTree ()]] -> DotGraph Node
instance Show NdLabel
instance Ord NdLabel
instance Eq NdLabel
-- | This module contains the code that does the tree normalization and
-- binning.
module Bio.Phylogeny.PhyBin.Binning
-- | The binning function. Takes labeled trees, classifies labels into
-- equivalence classes.
binthem :: [FullTree DefDecor] -> BinResults StandardDecor
-- | This is it, here's the routine that transforms a tree into normal
-- form. This relies HEAVILY on lazy evaluation.
normalize :: AnnotatedTree -> AnnotatedTree
-- | A version lifted to operate over full trees.
normalizeFT :: FullTree StandardDecor -> FullTree StandardDecor
-- | Add the metadata that is used for binning
annotateWLabLists :: NewickTree DefDecor -> AnnotatedTree
-- | Take the extra annotations away. Inverse of annotateWLabLists.
deAnnotate :: FullTree StandardDecor -> FullTree DefDecor
-- | When binning, the members of a OneCluster are isomorphic trees. When
-- clustering based on robinson-foulds distance they are merely similar
-- trees.
newtype OneCluster a
OneCluster :: [FullTree a] -> OneCluster a
clustMembers :: OneCluster a -> [FullTree a]
-- | Index the results of binning by topology-only stripped trees that have
-- their decorations removed.
type BinResults a = Map StrippedTree (OneCluster a)
-- | Ignore metadata (but keep weights) for the purpose of binning
type StrippedTree = NewickTree Int
get_weight :: AnnotatedTree -> Int
unitTests :: Test
-- | For binning. Remove branch lengths and labels but leave weights.
anonymize_annotated :: AnnotatedTree -> StrippedTree
instance Show a => Show (OneCluster a)
-- | This module contains misc bits used by (multiple) other modules.
module Bio.Phylogeny.PhyBin.Util
is_regular_file :: FilePath -> IO Bool
-- | Expand out directories to find all the tree files.
acquireTreeFiles :: [String] -> IO [String]
-- | Step carefully in case of cycles (argh).
safePrintDendro :: DotGraph Node -> IO (Maybe Text)
sanityCheck :: Dendrogram (FullTree DefDecor) -> IO ()
-- | This module contains the code that does the tree normalization and
-- binning. It's the heart of the program.
module Bio.Phylogeny.PhyBin
-- | Driver to put all the pieces together (parse, normalize, bin)
driver :: PhyBinConfig -> IO ()
-- | The binning function. Takes labeled trees, classifies labels into
-- equivalence classes.
binthem :: [FullTree DefDecor] -> BinResults StandardDecor
-- | This is it, here's the routine that transforms a tree into normal
-- form. This relies HEAVILY on lazy evaluation.
normalize :: AnnotatedTree -> AnnotatedTree
-- | Add the metadata that is used for binning
annotateWLabLists :: NewickTree DefDecor -> AnnotatedTree
unitTests :: Test
-- | Expand out directories to find all the tree files.
acquireTreeFiles :: [String] -> IO [String]
-- | Take the extra annotations away. Inverse of annotateWLabLists.
deAnnotate :: FullTree StandardDecor -> FullTree DefDecor
-- | Parse extra trees in addition to the main inputs (for --highlight).
retrieveHighlights :: (String -> String) -> LabelTable -> [FilePath] -> IO [[NewickTree ()]]
-- | Create a predicate that tests trees for consistency with the set of
-- --highlight (consensus) trees.
--
-- Note, tree consistency is not the same as an exact match. It's like
-- (<=) rather than (==). All trees are consistent with the star
-- topology.
matchAnyHighlight :: [[NewickTree ()]] -> NewickTree () -> Bool