{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskellQuotes #-}

-- |
-- Description :  Compute distances between trees
-- Copyright   :  2021 Dominik Schrempf
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Wed May 29 18:09:39 2019.
--
-- - Symmetric (Robinson-Foulds) distance.
-- - Incompatible splits distance.
module TLynx.Distance.Distance
  ( distance,
  )
where

import Control.Monad
  ( unless,
    when,
  )
import Control.Monad.IO.Class
import Control.Monad.Trans.Class
import Control.Monad.Trans.Reader hiding (local)
import Data.Bifunctor
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.List hiding (intersect)
import Data.Maybe
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Vector.Unboxed as V
import ELynx.Tools.ByteString
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import ELynx.Tree
import Statistics.Sample
import System.IO
import TLynx.Distance.Options
import TLynx.Parsers
import Text.Printf

median :: Ord a => [a] -> a
median :: [a] -> a
median [a]
xs = [a] -> [a]
forall a. Ord a => [a] -> [a]
sort [a]
xs [a] -> Int -> a
forall a. [a] -> Int -> a
!! Int
l2 where l2 :: Int
l2 = [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2

pf :: String
pf :: String
pf = String
"%.3f"

header :: Int -> Int -> DistanceMeasure -> BL.ByteString
header :: Int -> Int -> DistanceMeasure -> ByteString
header Int
n Int
m DistanceMeasure
d =
  Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) ByteString
"Tree 1"
    ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) ByteString
"Tree 2"
    ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
alignRight
      (Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
      (String -> ByteString
BL.pack (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ DistanceMeasure -> String
forall a. Show a => a -> String
show DistanceMeasure
d)

showTriplet ::
  (PrintfArg a) => Int -> Int -> [String] -> (Int, Int, a) -> BL.ByteString
showTriplet :: Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m [String]
args (Int
i, Int
j, a
d) = ByteString
i' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
j' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
d'
  where
    i' :: ByteString
i' = Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack ([String]
args [String] -> Int -> String
forall a. [a] -> Int -> a
!! Int
i)
    j' :: ByteString
j' = Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack ([String]
args [String] -> Int -> String
forall a. [a] -> Int -> a
!! Int
j)
    d' :: ByteString
d' = Int -> ByteString -> ByteString
alignRight (Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack (String -> a -> String
forall r. PrintfType r => String -> r
printf String
pf a
d)

-- Compute pairwise distances of a list of input trees. Use given distance
-- measure. Returns a triple, the first two elements are the indices of the
-- compared trees, the third is the distance.
pairwise ::
  -- | Distance function
  (a -> a -> b) ->
  -- | Input trees
  [a] ->
  -- | (index i, index j, distance i j)
  [(Int, Int, b)]
pairwise :: (a -> a -> b) -> [a] -> [(Int, Int, b)]
pairwise a -> a -> b
dist [a]
trs =
  [ (Int
i, Int
j, a -> a -> b
dist a
x a
y)
    | (Int
i : [Int]
is, a
x : [a]
xs) <- [[Int]] -> [[a]] -> [([Int], [a])]
forall a b. [a] -> [b] -> [(a, b)]
zip ([Int] -> [[Int]]
forall a. [a] -> [[a]]
tails [Int
0 ..]) ([a] -> [[a]]
forall a. [a] -> [[a]]
tails [a]
trs),
      (Int
j, a
y) <- [Int] -> [a] -> [(Int, a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
is [a]
xs
  ]

-- | Compute distance functions between phylogenetic trees.
distance :: ELynx DistanceArguments ()
distance :: ELynx DistanceArguments ()
distance = do
  DistanceArguments
l <- Environment DistanceArguments -> DistanceArguments
forall a. Environment a -> a
localArguments (Environment DistanceArguments -> DistanceArguments)
-> ReaderT
     (Environment DistanceArguments) IO (Environment DistanceArguments)
-> ReaderT (Environment DistanceArguments) IO DistanceArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Environment DistanceArguments) IO (Environment DistanceArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  let nwFormat :: NewickFormat
nwFormat = DistanceArguments -> NewickFormat
argsNewickFormat DistanceArguments
l
  -- Determine output handle (stdout or file).
  Handle
outH <- String -> String -> ELynx DistanceArguments Handle
forall a. Reproducible a => String -> String -> ELynx a Handle
outHandle String
"results" String
".out"
  -- Master tree (in case it is given).
  let mname :: Maybe String
mname = DistanceArguments -> Maybe String
argsMasterTreeFile DistanceArguments
l
  Maybe (Tree Phylo Name)
mtree <- case Maybe String
mname of
    Maybe String
Nothing -> Maybe (Tree Phylo Name)
-> ReaderT
     (Environment DistanceArguments) IO (Maybe (Tree Phylo Name))
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Tree Phylo Name)
forall a. Maybe a
Nothing
    Just String
f -> do
      String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx DistanceArguments ())
-> String -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ String
"Read master tree from file: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
f String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"."
      Tree Phylo Name
t <- IO (Tree Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Tree Phylo Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Phylo Name)
 -> ReaderT (Environment DistanceArguments) IO (Tree Phylo Name))
-> IO (Tree Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Tree Phylo Name)
forall a b. (a -> b) -> a -> b
$ NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
nwFormat String
f
      String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Compute distances between all trees and master tree."
      Maybe (Tree Phylo Name)
-> ReaderT
     (Environment DistanceArguments) IO (Maybe (Tree Phylo Name))
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe (Tree Phylo Name)
 -> ReaderT
      (Environment DistanceArguments) IO (Maybe (Tree Phylo Name)))
-> Maybe (Tree Phylo Name)
-> ReaderT
     (Environment DistanceArguments) IO (Maybe (Tree Phylo Name))
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Maybe (Tree Phylo Name)
forall a. a -> Maybe a
Just Tree Phylo Name
t
  let tfps :: [String]
tfps = DistanceArguments -> [String]
argsInFiles DistanceArguments
l
  (Forest Phylo Name
trees, [String]
names) <- case [String]
tfps of
    [] -> String
-> ReaderT
     (Environment DistanceArguments) IO (Forest Phylo Name, [String])
forall a. HasCallStack => String -> a
error String
"No tree input files given."
    [String
tf] -> do
      String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Read trees from single file."
      Forest Phylo Name
ts <- IO (Forest Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Phylo Name)
 -> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name))
-> IO (Forest Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name)
forall a b. (a -> b) -> a -> b
$ NewickFormat -> String -> IO (Forest Phylo Name)
parseTrees NewickFormat
nwFormat String
tf
      String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx DistanceArguments ())
-> String -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ Int -> String
forall a. Show a => a -> String
show (Forest Phylo Name -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
ts) String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
" trees found in file."
      String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Trees are indexed with integers."
      (Forest Phylo Name, [String])
-> ReaderT
     (Environment DistanceArguments) IO (Forest Phylo Name, [String])
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Phylo Name
ts, (Int -> String) -> [Int] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Int -> String
forall a. Show a => a -> String
show [Int
0 .. Forest Phylo Name -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
ts Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1])
    [String]
_ -> do
      String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Read trees from files."
      Forest Phylo Name
ts <- IO (Forest Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Phylo Name)
 -> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name))
-> IO (Forest Phylo Name)
-> ReaderT (Environment DistanceArguments) IO (Forest Phylo Name)
forall a b. (a -> b) -> a -> b
$ (String -> IO (Tree Phylo Name))
-> [String] -> IO (Forest Phylo Name)
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
nwFormat) [String]
tfps
      String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Trees are named according to their file names."
      (Forest Phylo Name, [String])
-> ReaderT
     (Environment DistanceArguments) IO (Forest Phylo Name, [String])
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Phylo Name
ts, [String]
tfps)
  Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Forest Phylo Name -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null Forest Phylo Name
trees) (String -> ELynx DistanceArguments ()
forall a. HasCallStack => String -> a
error String
"Not enough trees found in files.")
  Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when
    (Maybe (Tree Phylo Name) -> Bool
forall a. Maybe a -> Bool
isNothing Maybe (Tree Phylo Name)
mtree Bool -> Bool -> Bool
&& Forest Phylo Name -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
trees Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1)
    (String -> ELynx DistanceArguments ()
forall a. HasCallStack => String -> a
error String
"Not enough trees found in files.")
  -- when (isNothing mtree) $ logInfoS
  --   "Compute pairwise distances between trees from different files."
  String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS String
"The trees are:"
  ByteString -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logDebugB (ByteString -> ELynx DistanceArguments ())
-> ByteString -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.unlines ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ (Tree Phylo Name -> ByteString)
-> Forest Phylo Name -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Tree Phylo Name -> ByteString
forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick Forest Phylo Name
trees
  -- Set the distance measure.
  let dist :: DistanceMeasure
dist = DistanceArguments -> DistanceMeasure
argsDistance DistanceArguments
l
  case DistanceArguments -> DistanceMeasure
argsDistance DistanceArguments
l of
    DistanceMeasure
Symmetric -> String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Use symmetric (Robinson-Foulds) distance."
    IncompatibleSplit Support
val -> do
      String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Use incompatible split distance."
      String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS (String -> ELynx DistanceArguments ())
-> String -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
        String
"Collapse nodes with support less than "
          String -> String -> String
forall a. [a] -> [a] -> [a]
++ Support -> String
forall a. Show a => a -> String
show Support
val
          String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"."
    DistanceMeasure
BranchScore -> String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Use branch score distance."
  let distanceMeasure' ::
        Tree Phylo Name ->
        Tree Phylo Name ->
        Double
      distanceMeasure' :: Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure' Tree Phylo Name
t1 Tree Phylo Name
t2 = (String -> Double)
-> (Double -> Double) -> Either String Double -> Double
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Double
forall a. HasCallStack => String -> a
error Double -> Double
forall a. a -> a
id (Either String Double -> Double) -> Either String Double -> Double
forall a b. (a -> b) -> a -> b
$ case DistanceMeasure
dist of
        DistanceMeasure
Symmetric -> (Int -> Double) -> Either String Int -> Either String Double
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Either String Int -> Either String Double)
-> Either String Int -> Either String Double
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Tree Phylo Name -> Either String Int
forall a e1 e2.
Ord a =>
Tree e1 a -> Tree e2 a -> Either String Int
symmetric Tree Phylo Name
t1 Tree Phylo Name
t2
        IncompatibleSplit Support
val ->
          (Int -> Double) -> Either String Int -> Either String Double
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Either String Int -> Either String Double)
-> Either String Int -> Either String Double
forall a b. (a -> b) -> a -> b
$
            Tree Support Name -> Tree Support Name -> Either String Int
forall a e1 e2.
(Show a, Ord a) =>
Tree e1 a -> Tree e2 a -> Either String Int
incompatibleSplits
              (Support -> Tree Support Name -> Tree Support Name
forall e a.
(Eq e, Eq a, HasSupport e) =>
Support -> Tree e a -> Tree e a
collapse Support
val (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Support Name -> Tree Support Name
forall e a. HasSupport e => Tree e a -> Tree e a
normalizeBranchSupport (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Support Name)
-> (Tree Support Name -> Tree Support Name)
-> Either String (Tree Support Name)
-> Tree Support Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Support Name
forall a. HasCallStack => String -> a
error Tree Support Name -> Tree Support Name
forall a. a -> a
id (Either String (Tree Support Name) -> Tree Support Name)
-> Either String (Tree Support Name) -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Support Name)
forall e a.
HasMaybeSupport e =>
Tree e a -> Either String (Tree Support a)
toSupportTree Tree Phylo Name
t1)
              (Support -> Tree Support Name -> Tree Support Name
forall e a.
(Eq e, Eq a, HasSupport e) =>
Support -> Tree e a -> Tree e a
collapse Support
val (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Support Name -> Tree Support Name
forall e a. HasSupport e => Tree e a -> Tree e a
normalizeBranchSupport (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Support Name)
-> (Tree Support Name -> Tree Support Name)
-> Either String (Tree Support Name)
-> Tree Support Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Support Name
forall a. HasCallStack => String -> a
error Tree Support Name -> Tree Support Name
forall a. a -> a
id (Either String (Tree Support Name) -> Tree Support Name)
-> Either String (Tree Support Name) -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Support Name)
forall e a.
HasMaybeSupport e =>
Tree e a -> Either String (Tree Support a)
toSupportTree Tree Phylo Name
t2)
        DistanceMeasure
BranchScore ->
          Tree Length Name -> Tree Length Name -> Either String Double
forall e1 e2 a.
(HasLength e1, HasLength e2, Ord a) =>
Tree e1 a -> Tree e2 a -> Either String Double
branchScore
            (Tree Length Name -> Tree Length Name
forall a. Tree Length a -> Tree Length a
normalizeF (Tree Length Name -> Tree Length Name)
-> Tree Length Name -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Length Name)
-> (Tree Length Name -> Tree Length Name)
-> Either String (Tree Length Name)
-> Tree Length Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Length Name
forall a. HasCallStack => String -> a
error Tree Length Name -> Tree Length Name
forall a. a -> a
id (Either String (Tree Length Name) -> Tree Length Name)
-> Either String (Tree Length Name) -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Length Name)
forall e a.
HasMaybeLength e =>
Tree e a -> Either String (Tree Length a)
toLengthTree Tree Phylo Name
t1)
            (Tree Length Name -> Tree Length Name
forall a. Tree Length a -> Tree Length a
normalizeF (Tree Length Name -> Tree Length Name)
-> Tree Length Name -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Length Name)
-> (Tree Length Name -> Tree Length Name)
-> Either String (Tree Length Name)
-> Tree Length Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Length Name
forall a. HasCallStack => String -> a
error Tree Length Name -> Tree Length Name
forall a. a -> a
id (Either String (Tree Length Name) -> Tree Length Name)
-> Either String (Tree Length Name) -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Length Name)
forall e a.
HasMaybeLength e =>
Tree e a -> Either String (Tree Length a)
toLengthTree Tree Phylo Name
t2)
        where
          normalizeF :: Tree Length a -> Tree Length a
normalizeF = if DistanceArguments -> Bool
argsNormalize DistanceArguments
l then Tree Length a -> Tree Length a
forall e a. HasLength e => Tree e a -> Tree e a
normalizeBranchLengths else Tree Length a -> Tree Length a
forall a. a -> a
id
  -- Possibly intersect trees before distance calculation.
  Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (DistanceArguments -> Bool
argsIntersect DistanceArguments
l) (ELynx DistanceArguments () -> ELynx DistanceArguments ())
-> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Intersect trees before calculation of distances."
  let distanceMeasure :: Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure =
        if DistanceArguments -> Bool
argsIntersect DistanceArguments
l
          then
            ( \Tree Phylo Name
t1 Tree Phylo Name
t2 -> case (String -> Forest Phylo Name)
-> (Forest Phylo Name -> Forest Phylo Name)
-> Either String (Forest Phylo Name)
-> Forest Phylo Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Forest Phylo Name
forall a. HasCallStack => String -> a
error Forest Phylo Name -> Forest Phylo Name
forall a. a -> a
id (Either String (Forest Phylo Name) -> Forest Phylo Name)
-> Either String (Forest Phylo Name) -> Forest Phylo Name
forall a b. (a -> b) -> a -> b
$ Forest Phylo Name -> Either String (Forest Phylo Name)
forall e a.
(Semigroup e, Eq e, Ord a) =>
Forest e a -> Either String (Forest e a)
intersect [Tree Phylo Name
t1, Tree Phylo Name
t2] of
                [Tree Phylo Name
t1', Tree Phylo Name
t2'] -> Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure' Tree Phylo Name
t1' Tree Phylo Name
t2'
                Forest Phylo Name
_ -> String -> Double
forall a. HasCallStack => String -> a
error String
"distance: Could not intersect trees."
            )
          else Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure'
  -- Possibly normalize trees.
  Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (DistanceArguments -> Bool
argsNormalize DistanceArguments
l) (ELynx DistanceArguments () -> ELynx DistanceArguments ())
-> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    String -> ELynx DistanceArguments ()
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Normalize trees before calculation of distances."
  let dsTriplets :: [(Int, Int, Double)]
dsTriplets = case Maybe (Tree Phylo Name)
mtree of
        Maybe (Tree Phylo Name)
Nothing -> (Tree Phylo Name -> Tree Phylo Name -> Double)
-> Forest Phylo Name -> [(Int, Int, Double)]
forall a b. (a -> a -> b) -> [a] -> [(Int, Int, b)]
pairwise Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure Forest Phylo Name
trees
        Just Tree Phylo Name
masterTree -> [(Int
0, Int
i, Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure Tree Phylo Name
masterTree Tree Phylo Name
t') | (Int
i, Tree Phylo Name
t') <- [Int] -> Forest Phylo Name -> [(Int, Tree Phylo Name)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1 ..] Forest Phylo Name
trees]
      ds :: [Double]
ds = ((Int, Int, Double) -> Double) -> [(Int, Int, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (\(Int
_, Int
_, Double
x) -> Double
x) [(Int, Int, Double)]
dsTriplets
      dsVec :: Vector Double
dsVec = [Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
V.fromList [Double]
ds
  IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    Handle -> String -> IO ()
hPutStrLn Handle
outH (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$
      String
"Summary statistics of "
        String -> String -> String
forall a. [a] -> [a] -> [a]
++ DistanceMeasure -> String
forall a. Show a => a -> String
show DistanceMeasure
dist
        String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" Distance:"
  IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    Handle -> Text -> IO ()
T.hPutStrLn Handle
outH (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$
      Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Mean: "
        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
          (String -> Double -> String
forall r. PrintfType r => String -> r
printf String
pf (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean Vector Double
dsVec))
  IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    Handle -> Text -> IO ()
T.hPutStrLn Handle
outH (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$
      Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Median: "
        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
          (String -> Double -> String
forall r. PrintfType r => String -> r
printf String
pf ([Double] -> Double
forall a. Ord a => [a] -> a
median [Double]
ds))
  IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    Handle -> Text -> IO ()
T.hPutStrLn Handle
outH (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$
      Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Variance: "
        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
          (String -> Double -> String
forall r. PrintfType r => String -> r
printf String
pf (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
variance Vector Double
dsVec))

  Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless
    (DistanceArguments -> Bool
argsSummaryStatistics DistanceArguments
l)
    ( do
        let n :: Int
n = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ Int
6 Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: (String -> Int) -> [String] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
names
            m :: Int
m = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ DistanceMeasure -> String
forall a. Show a => a -> String
show DistanceMeasure
dist
        IO () -> ELynx DistanceArguments ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ Handle -> String -> IO ()
hPutStrLn Handle
outH String
""
        IO () -> ELynx DistanceArguments ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ Handle -> ByteString -> IO ()
BL.hPutStrLn Handle
outH (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Int -> DistanceMeasure -> ByteString
header Int
n Int
m DistanceMeasure
dist
        case Maybe String
mname of
          Maybe String
Nothing ->
            IO () -> ELynx DistanceArguments ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
              Handle -> ByteString -> IO ()
BL.hPutStr Handle
outH (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$
                [ByteString] -> ByteString
BL.unlines
                  (((Int, Int, Double) -> ByteString)
-> [(Int, Int, Double)] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Int -> [String] -> (Int, Int, Double) -> ByteString
forall a.
PrintfArg a =>
Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m [String]
names) [(Int, Int, Double)]
dsTriplets)
          Just String
mn ->
            IO () -> ELynx DistanceArguments ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
              Handle -> ByteString -> IO ()
BL.hPutStr Handle
outH (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$
                [ByteString] -> ByteString
BL.unlines
                  (((Int, Int, Double) -> ByteString)
-> [(Int, Int, Double)] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Int -> [String] -> (Int, Int, Double) -> ByteString
forall a.
PrintfArg a =>
Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m (String
mn String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
names)) [(Int, Int, Double)]
dsTriplets)
    )
  IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ Handle -> IO ()
hClose Handle
outH