{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TemplateHaskell #-}

-- |
-- Description :  Compute distances between trees
-- Copyright   :  (c) Dominik Schrempf 2020
-- License     :  GPL-3.0-or-later
--
-- Maintainer  :  dominik.schrempf@gmail.com
-- Stability   :  unstable
-- Portability :  portable
--
-- Creation date: Wed May 29 18:09:39 2019.
--
-- - Symmetric (Robinson-Foulds) distance.
-- - Incompatible splits distance.
module TLynx.Distance.Distance
  ( distance,
  )
where

import Control.Monad
  ( unless,
    when,
  )
import Control.Monad.IO.Class
import Control.Monad.Logger
  ( logDebug,
    logInfo,
  )
import Control.Monad.Trans.Class
import Control.Monad.Trans.Reader hiding (local)
import Data.Bifunctor
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.List hiding (intersect)
import Data.Maybe
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Text.Lazy as LT
import qualified Data.Text.Lazy.Encoding as LT
import qualified Data.Vector.Unboxed as V
import ELynx.Tools
import ELynx.Tree
import Statistics.Sample
import System.IO
import TLynx.Distance.Options
import TLynx.Parsers
import Text.Printf

median :: Ord a => [a] -> a
median :: [a] -> a
median [a]
xs = [a] -> [a]
forall a. Ord a => [a] -> [a]
sort [a]
xs [a] -> Int -> a
forall a. [a] -> Int -> a
!! Int
l2 where l2 :: Int
l2 = [a] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2

pf :: String
pf :: String
pf = String
"%.3f"

header :: Int -> Int -> DistanceMeasure -> BL.ByteString
header :: Int -> Int -> DistanceMeasure -> ByteString
header Int
n Int
m DistanceMeasure
d =
  Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) ByteString
"Tree 1" ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) ByteString
"Tree 2"
    ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
alignRight
      (Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2)
      (String -> ByteString
BL.pack (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ DistanceMeasure -> String
forall a. Show a => a -> String
show DistanceMeasure
d)

showTriplet ::
  (PrintfArg a) => Int -> Int -> [String] -> (Int, Int, a) -> BL.ByteString
showTriplet :: Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m [String]
args (Int
i, Int
j, a
d) = ByteString
i' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
j' ByteString -> ByteString -> ByteString
forall a. Semigroup a => a -> a -> a
<> ByteString
d'
  where
    i' :: ByteString
i' = Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack ([String]
args [String] -> Int -> String
forall a. [a] -> Int -> a
!! Int
i)
    j' :: ByteString
j' = Int -> ByteString -> ByteString
alignLeft (Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack ([String]
args [String] -> Int -> String
forall a. [a] -> Int -> a
!! Int
j)
    d' :: ByteString
d' = Int -> ByteString -> ByteString
alignRight (Int
m Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
2) (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack (String -> a -> String
forall r. PrintfType r => String -> r
printf String
pf a
d)

-- Compute pairwise distances of a list of input trees. Use given distance
-- measure. Returns a triple, the first two elements are the indices of the
-- compared trees, the third is the distance.
pairwise ::
  -- | Distance function
  (a -> a -> b) ->
  -- | Input trees
  [a] ->
  -- | (index i, index j, distance i j)
  [(Int, Int, b)]
pairwise :: (a -> a -> b) -> [a] -> [(Int, Int, b)]
pairwise a -> a -> b
dist [a]
trs =
  [ (Int
i, Int
j, a -> a -> b
dist a
x a
y)
    | (Int
i : [Int]
is, a
x : [a]
xs) <- [[Int]] -> [[a]] -> [([Int], [a])]
forall a b. [a] -> [b] -> [(a, b)]
zip ([Int] -> [[Int]]
forall a. [a] -> [[a]]
tails [Int
0 ..]) ([a] -> [[a]]
forall a. [a] -> [[a]]
tails [a]
trs),
      (Int
j, a
y) <- [Int] -> [a] -> [(Int, a)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
is [a]
xs
  ]

-- | Compute distance functions between phylogenetic trees.
distance :: ELynx DistanceArguments ()
distance :: ELynx DistanceArguments ()
distance = do
  DistanceArguments
l <- Arguments DistanceArguments -> DistanceArguments
forall a. Arguments a -> a
local (Arguments DistanceArguments -> DistanceArguments)
-> ReaderT
     (Arguments DistanceArguments)
     (LoggingT IO)
     (Arguments DistanceArguments)
-> ReaderT
     (Arguments DistanceArguments) (LoggingT IO) DistanceArguments
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ReaderT
  (Arguments DistanceArguments)
  (LoggingT IO)
  (Arguments DistanceArguments)
forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
  let nwFormat :: NewickFormat
nwFormat = DistanceArguments -> NewickFormat
argsNewickFormat DistanceArguments
l
  -- Determine output handle (stdout or file).
  Handle
outH <- String -> String -> ELynx DistanceArguments Handle
forall a. Reproducible a => String -> String -> ELynx a Handle
outHandle String
"results" String
".out"
  -- Master tree (in case it is given).
  let mname :: Maybe String
mname = DistanceArguments -> Maybe String
argsMasterTreeFile DistanceArguments
l
  Maybe (Tree Phylo Name)
mtree <- case Maybe String
mname of
    Maybe String
Nothing -> Maybe (Tree Phylo Name)
-> ReaderT
     (Arguments DistanceArguments)
     (LoggingT IO)
     (Maybe (Tree Phylo Name))
forall (m :: * -> *) a. Monad m => a -> m a
return Maybe (Tree Phylo Name)
forall a. Maybe a
Nothing
    Just String
f -> do
      $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) (Text -> ELynx DistanceArguments ())
-> Text -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ String
"Read master tree from file: " String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
f String -> String -> String
forall a. Semigroup a => a -> a -> a
<> String
"."
      Tree Phylo Name
t <- IO (Tree Phylo Name)
-> ReaderT
     (Arguments DistanceArguments) (LoggingT IO) (Tree Phylo Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Tree Phylo Name)
 -> ReaderT
      (Arguments DistanceArguments) (LoggingT IO) (Tree Phylo Name))
-> IO (Tree Phylo Name)
-> ReaderT
     (Arguments DistanceArguments) (LoggingT IO) (Tree Phylo Name)
forall a b. (a -> b) -> a -> b
$ NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
nwFormat String
f
      $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) Text
"Compute distances between all trees and master tree."
      Maybe (Tree Phylo Name)
-> ReaderT
     (Arguments DistanceArguments)
     (LoggingT IO)
     (Maybe (Tree Phylo Name))
forall (m :: * -> *) a. Monad m => a -> m a
return (Maybe (Tree Phylo Name)
 -> ReaderT
      (Arguments DistanceArguments)
      (LoggingT IO)
      (Maybe (Tree Phylo Name)))
-> Maybe (Tree Phylo Name)
-> ReaderT
     (Arguments DistanceArguments)
     (LoggingT IO)
     (Maybe (Tree Phylo Name))
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Maybe (Tree Phylo Name)
forall a. a -> Maybe a
Just Tree Phylo Name
t
  let tfps :: [String]
tfps = DistanceArguments -> [String]
argsInFiles DistanceArguments
l
  (Forest Phylo Name
trees, [String]
names) <- case [String]
tfps of
    [] -> String
-> ReaderT
     (Arguments DistanceArguments)
     (LoggingT IO)
     (Forest Phylo Name, [String])
forall a. HasCallStack => String -> a
error String
"No tree input files given."
    [String
tf] -> do
      $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) Text
"Read trees from single file."
      Forest Phylo Name
ts <- IO (Forest Phylo Name)
-> ReaderT
     (Arguments DistanceArguments) (LoggingT IO) (Forest Phylo Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Phylo Name)
 -> ReaderT
      (Arguments DistanceArguments) (LoggingT IO) (Forest Phylo Name))
-> IO (Forest Phylo Name)
-> ReaderT
     (Arguments DistanceArguments) (LoggingT IO) (Forest Phylo Name)
forall a b. (a -> b) -> a -> b
$ NewickFormat -> String -> IO (Forest Phylo Name)
parseTrees NewickFormat
nwFormat String
tf
      $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) (Text -> ELynx DistanceArguments ())
-> Text -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ Int -> Text
forall a. Show a => a -> Text
tShow (Forest Phylo Name -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
ts) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" trees found in file."
      $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) Text
"Trees are indexed with integers."
      (Forest Phylo Name, [String])
-> ReaderT
     (Arguments DistanceArguments)
     (LoggingT IO)
     (Forest Phylo Name, [String])
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Phylo Name
ts, (Int -> String) -> [Int] -> [String]
forall a b. (a -> b) -> [a] -> [b]
map Int -> String
forall a. Show a => a -> String
show [Int
0 .. Forest Phylo Name -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
ts Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1])
    [String]
_ -> do
      $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) Text
"Read trees from files."
      Forest Phylo Name
ts <- IO (Forest Phylo Name)
-> ReaderT
     (Arguments DistanceArguments) (LoggingT IO) (Forest Phylo Name)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Forest Phylo Name)
 -> ReaderT
      (Arguments DistanceArguments) (LoggingT IO) (Forest Phylo Name))
-> IO (Forest Phylo Name)
-> ReaderT
     (Arguments DistanceArguments) (LoggingT IO) (Forest Phylo Name)
forall a b. (a -> b) -> a -> b
$ (String -> IO (Tree Phylo Name))
-> [String] -> IO (Forest Phylo Name)
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
nwFormat) [String]
tfps
      $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) Text
"Trees are named according to their file names."
      (Forest Phylo Name, [String])
-> ReaderT
     (Arguments DistanceArguments)
     (LoggingT IO)
     (Forest Phylo Name, [String])
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Phylo Name
ts, [String]
tfps)
  Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Forest Phylo Name -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null Forest Phylo Name
trees) (String -> ELynx DistanceArguments ()
forall a. HasCallStack => String -> a
error String
"Not enough trees found in files.")
  Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when
    (Maybe (Tree Phylo Name) -> Bool
forall a. Maybe a -> Bool
isNothing Maybe (Tree Phylo Name)
mtree Bool -> Bool -> Bool
&& Forest Phylo Name -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
trees Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
1)
    (String -> ELynx DistanceArguments ()
forall a. HasCallStack => String -> a
error String
"Not enough trees found in files.")
  -- when (isNothing mtree) $ $(logInfo)
  --   "Compute pairwise distances between trees from different files."
  $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logDebug) Text
"The trees are:"
  $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logDebug) (Text -> ELynx DistanceArguments ())
-> Text -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ Text -> Text
LT.toStrict (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ ByteString -> Text
LT.decodeUtf8 (ByteString -> Text) -> ByteString -> Text
forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.unlines ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ (Tree Phylo Name -> ByteString)
-> Forest Phylo Name -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Tree Phylo Name -> ByteString
forall a. HasName a => Tree Phylo a -> ByteString
toNewick Forest Phylo Name
trees
  -- Set the distance measure.
  let dist :: DistanceMeasure
dist = DistanceArguments -> DistanceMeasure
argsDistance DistanceArguments
l
  case DistanceArguments -> DistanceMeasure
argsDistance DistanceArguments
l of
    DistanceMeasure
Symmetric -> $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) Text
"Use symmetric (Robinson-Foulds) distance."
    IncompatibleSplit Support
val -> do
      $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) Text
"Use incompatible split distance."
      $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) (Text -> ELynx DistanceArguments ())
-> Text -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
        String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$
          String
"Collapse nodes with support less than "
            String -> String -> String
forall a. [a] -> [a] -> [a]
++ Support -> String
forall a. Show a => a -> String
show Support
val
            String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
"."
    DistanceMeasure
BranchScore -> $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) Text
"Use branch score distance."
  let distanceMeasure' ::
        Tree Phylo Name ->
        Tree Phylo Name ->
        Double
      distanceMeasure' :: Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure' Tree Phylo Name
t1 Tree Phylo Name
t2 = (String -> Double)
-> (Double -> Double) -> Either String Double -> Double
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Double
forall a. HasCallStack => String -> a
error Double -> Double
forall a. a -> a
id (Either String Double -> Double) -> Either String Double -> Double
forall a b. (a -> b) -> a -> b
$ case DistanceMeasure
dist of
        DistanceMeasure
Symmetric -> (Int -> Double) -> Either String Int -> Either String Double
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Either String Int -> Either String Double)
-> Either String Int -> Either String Double
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Tree Phylo Name -> Either String Int
forall a e1 e2.
Ord a =>
Tree e1 a -> Tree e2 a -> Either String Int
symmetric Tree Phylo Name
t1 Tree Phylo Name
t2
        IncompatibleSplit Support
val ->
          (Int -> Double) -> Either String Int -> Either String Double
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Either String Int -> Either String Double)
-> Either String Int -> Either String Double
forall a b. (a -> b) -> a -> b
$
            Tree Support Name -> Tree Support Name -> Either String Int
forall a e1 e2.
(Show a, Ord a) =>
Tree e1 a -> Tree e2 a -> Either String Int
incompatibleSplits
              (Support -> Tree Support Name -> Tree Support Name
forall e a.
(Eq e, Eq a, HasSupport e) =>
Support -> Tree e a -> Tree e a
collapse Support
val (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Support Name -> Tree Support Name
forall e a. HasSupport e => Tree e a -> Tree e a
normalizeBranchSupport (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Support Name)
-> (Tree Support Name -> Tree Support Name)
-> Either String (Tree Support Name)
-> Tree Support Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Support Name
forall a. HasCallStack => String -> a
error Tree Support Name -> Tree Support Name
forall a. a -> a
id (Either String (Tree Support Name) -> Tree Support Name)
-> Either String (Tree Support Name) -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Support Name)
forall a. Tree Phylo a -> Either String (Tree Support a)
phyloToSupportTree Tree Phylo Name
t1)
              (Support -> Tree Support Name -> Tree Support Name
forall e a.
(Eq e, Eq a, HasSupport e) =>
Support -> Tree e a -> Tree e a
collapse Support
val (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Support Name -> Tree Support Name
forall e a. HasSupport e => Tree e a -> Tree e a
normalizeBranchSupport (Tree Support Name -> Tree Support Name)
-> Tree Support Name -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Support Name)
-> (Tree Support Name -> Tree Support Name)
-> Either String (Tree Support Name)
-> Tree Support Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Support Name
forall a. HasCallStack => String -> a
error Tree Support Name -> Tree Support Name
forall a. a -> a
id (Either String (Tree Support Name) -> Tree Support Name)
-> Either String (Tree Support Name) -> Tree Support Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Support Name)
forall a. Tree Phylo a -> Either String (Tree Support a)
phyloToSupportTree Tree Phylo Name
t2)
        DistanceMeasure
BranchScore ->
          Tree Length Name -> Tree Length Name -> Either String Double
forall e1 e2 a.
(HasLength e1, HasLength e2, Ord a) =>
Tree e1 a -> Tree e2 a -> Either String Double
branchScore (Tree Length Name -> Tree Length Name
forall a. Tree Length a -> Tree Length a
normalizeF (Tree Length Name -> Tree Length Name)
-> Tree Length Name -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Length Name)
-> (Tree Length Name -> Tree Length Name)
-> Either String (Tree Length Name)
-> Tree Length Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Length Name
forall a. HasCallStack => String -> a
error Tree Length Name -> Tree Length Name
forall a. a -> a
id (Either String (Tree Length Name) -> Tree Length Name)
-> Either String (Tree Length Name) -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Length Name)
forall a. Tree Phylo a -> Either String (Tree Length a)
phyloToLengthTree Tree Phylo Name
t1) (Tree Length Name -> Tree Length Name
forall a. Tree Length a -> Tree Length a
normalizeF (Tree Length Name -> Tree Length Name)
-> Tree Length Name -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ (String -> Tree Length Name)
-> (Tree Length Name -> Tree Length Name)
-> Either String (Tree Length Name)
-> Tree Length Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Tree Length Name
forall a. HasCallStack => String -> a
error Tree Length Name -> Tree Length Name
forall a. a -> a
id (Either String (Tree Length Name) -> Tree Length Name)
-> Either String (Tree Length Name) -> Tree Length Name
forall a b. (a -> b) -> a -> b
$ Tree Phylo Name -> Either String (Tree Length Name)
forall a. Tree Phylo a -> Either String (Tree Length a)
phyloToLengthTree Tree Phylo Name
t2)
        where
          normalizeF :: Tree Length a -> Tree Length a
normalizeF = if DistanceArguments -> Bool
argsNormalize DistanceArguments
l then Tree Length a -> Tree Length a
forall e a. HasLength e => Tree e a -> Tree e a
normalizeBranchLengths else Tree Length a -> Tree Length a
forall a. a -> a
id
  -- Possibly intersect trees before distance calculation.
  Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (DistanceArguments -> Bool
argsIntersect DistanceArguments
l) (ELynx DistanceArguments () -> ELynx DistanceArguments ())
-> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) Text
"Intersect trees before calculation of distances."
  let distanceMeasure :: Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure =
        if DistanceArguments -> Bool
argsIntersect DistanceArguments
l
          then
            ( \Tree Phylo Name
t1 Tree Phylo Name
t2 ->
                let [Tree Phylo Name
t1', Tree Phylo Name
t2'] = (String -> Forest Phylo Name)
-> (Forest Phylo Name -> Forest Phylo Name)
-> Either String (Forest Phylo Name)
-> Forest Phylo Name
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either String -> Forest Phylo Name
forall a. HasCallStack => String -> a
error Forest Phylo Name -> Forest Phylo Name
forall a. a -> a
id (Either String (Forest Phylo Name) -> Forest Phylo Name)
-> Either String (Forest Phylo Name) -> Forest Phylo Name
forall a b. (a -> b) -> a -> b
$ Forest Phylo Name -> Either String (Forest Phylo Name)
forall e a.
(Semigroup e, Eq e, Ord a) =>
Forest e a -> Either String (Forest e a)
intersect [Tree Phylo Name
t1, Tree Phylo Name
t2]
                 in Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure' Tree Phylo Name
t1' Tree Phylo Name
t2'
            )
          else Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure'
  -- Possibly normalize trees.
  Bool -> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (DistanceArguments -> Bool
argsNormalize DistanceArguments
l) (ELynx DistanceArguments () -> ELynx DistanceArguments ())
-> ELynx DistanceArguments () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    $(Int
String
LogLevel
String -> Text
String -> String -> String -> CharPos -> CharPos -> Loc
Text -> Text
Loc -> Text -> LogLevel -> Text -> ELynx DistanceArguments ()
(Text -> ELynx DistanceArguments ())
-> (Text -> Text) -> Text -> ELynx DistanceArguments ()
forall a. a -> a
forall b c a. (b -> c) -> (a -> b) -> a -> c
forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
pack :: String -> Text
monadLoggerLog :: forall (m :: * -> *) msg.
(MonadLogger m, ToLogStr msg) =>
Loc -> Text -> LogLevel -> msg -> m ()
id :: forall a. a -> a
. :: forall b c a. (b -> c) -> (a -> b) -> a -> c
logInfo) Text
"Normalize trees before calculation of distances."
  let dsTriplets :: [(Int, Int, Double)]
dsTriplets = case Maybe (Tree Phylo Name)
mtree of
        Maybe (Tree Phylo Name)
Nothing -> (Tree Phylo Name -> Tree Phylo Name -> Double)
-> Forest Phylo Name -> [(Int, Int, Double)]
forall a b. (a -> a -> b) -> [a] -> [(Int, Int, b)]
pairwise Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure Forest Phylo Name
trees
        Just Tree Phylo Name
masterTree -> [(Int
0, Int
i, Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure Tree Phylo Name
masterTree Tree Phylo Name
t') | (Int
i, Tree Phylo Name
t') <- [Int] -> Forest Phylo Name -> [(Int, Tree Phylo Name)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1 ..] Forest Phylo Name
trees]
      ds :: [Double]
ds = ((Int, Int, Double) -> Double) -> [(Int, Int, Double)] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map (\(Int
_, Int
_, Double
x) -> Double
x) [(Int, Int, Double)]
dsTriplets
      dsVec :: Vector Double
dsVec = [Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
V.fromList [Double]
ds
  IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    Handle -> String -> IO ()
hPutStrLn Handle
outH (String -> IO ()) -> String -> IO ()
forall a b. (a -> b) -> a -> b
$
      String
"Summary statistics of "
        String -> String -> String
forall a. [a] -> [a] -> [a]
++ DistanceMeasure -> String
forall a. Show a => a -> String
show DistanceMeasure
dist
        String -> String -> String
forall a. [a] -> [a] -> [a]
++ String
" Distance:"
  IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    Handle -> Text -> IO ()
T.hPutStrLn Handle
outH (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$
      Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Mean: "
        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
          (String -> Double -> String
forall r. PrintfType r => String -> r
printf String
pf (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
mean Vector Double
dsVec))
  IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    Handle -> Text -> IO ()
T.hPutStrLn Handle
outH (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$
      Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Median: "
        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
          (String -> Double -> String
forall r. PrintfType r => String -> r
printf String
pf ([Double] -> Double
forall a. Ord a => [a] -> a
median [Double]
ds))
  IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    Handle -> Text -> IO ()
T.hPutStrLn Handle
outH (Text -> IO ()) -> Text -> IO ()
forall a b. (a -> b) -> a -> b
$
      Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Variance: "
        Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
          (String -> Double -> String
forall r. PrintfType r => String -> r
printf String
pf (Vector Double -> Double
forall (v :: * -> *). Vector v Double => v Double -> Double
variance Vector Double
dsVec))
  -- BS.putStrLn $ BS.unlines $ map toNewick ts
  -- BS.putStrLn $ BS.unlines $ map toNewick tsN
  -- BS.putStrLn $ BS.unlines $ map toNewick tsC

  LoggingT IO () -> ELynx DistanceArguments ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (LoggingT IO () -> ELynx DistanceArguments ())
-> LoggingT IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$
    Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless
      (DistanceArguments -> Bool
argsSummaryStatistics DistanceArguments
l)
      ( do
          let n :: Int
n = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ Int
6 Int -> [Int] -> [Int]
forall a. a -> [a] -> [a]
: (String -> Int) -> [String] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
names
              m :: Int
m = String -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length (String -> Int) -> String -> Int
forall a b. (a -> b) -> a -> b
$ DistanceMeasure -> String
forall a. Show a => a -> String
show DistanceMeasure
dist
          IO () -> LoggingT IO ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> LoggingT IO ()) -> IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ Handle -> String -> IO ()
hPutStrLn Handle
outH String
""
          IO () -> LoggingT IO ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> LoggingT IO ()) -> IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ Handle -> ByteString -> IO ()
BL.hPutStrLn Handle
outH (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ Int -> Int -> DistanceMeasure -> ByteString
header Int
n Int
m DistanceMeasure
dist
          case Maybe String
mname of
            Maybe String
Nothing ->
              IO () -> LoggingT IO ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> LoggingT IO ()) -> IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$
                Handle -> ByteString -> IO ()
BL.hPutStr Handle
outH (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$
                  [ByteString] -> ByteString
BL.unlines
                    (((Int, Int, Double) -> ByteString)
-> [(Int, Int, Double)] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Int -> [String] -> (Int, Int, Double) -> ByteString
forall a.
PrintfArg a =>
Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m [String]
names) [(Int, Int, Double)]
dsTriplets)
            Just String
mn ->
              IO () -> LoggingT IO ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (IO () -> LoggingT IO ()) -> IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$
                Handle -> ByteString -> IO ()
BL.hPutStr Handle
outH (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$
                  [ByteString] -> ByteString
BL.unlines
                    (((Int, Int, Double) -> ByteString)
-> [(Int, Int, Double)] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Int -> [String] -> (Int, Int, Double) -> ByteString
forall a.
PrintfArg a =>
Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m (String
mn String -> [String] -> [String]
forall a. a -> [a] -> [a]
: [String]
names)) [(Int, Int, Double)]
dsTriplets)
      )
  IO () -> ELynx DistanceArguments ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ELynx DistanceArguments ())
-> IO () -> ELynx DistanceArguments ()
forall a b. (a -> b) -> a -> b
$ Handle -> IO ()
hClose Handle
outH