{-# LANGUAGE OverloadedStrings #-}
module TLynx.Distance.Distance
( distance,
)
where
import Control.Monad
( unless,
when,
)
import Control.Monad.IO.Class
import Control.Monad.Trans.Class
import Control.Monad.Trans.Reader hiding (local)
import Data.Bifunctor
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.List hiding (intersect)
import Data.Maybe
import qualified Data.Text as T
import qualified Data.Text.IO as T
import qualified Data.Vector.Unboxed as V
import ELynx.Tools.ByteString
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import ELynx.Tree
import Statistics.Sample
import System.IO
import TLynx.Distance.Options
import TLynx.Parsers
import Text.Printf
median :: Ord a => [a] -> a
median :: forall a. Ord a => [a] -> a
median [a]
xs = forall a. Ord a => [a] -> [a]
sort [a]
xs forall a. [a] -> Int -> a
!! Int
l2 where l2 :: Int
l2 = forall (t :: * -> *) a. Foldable t => t a -> Int
length [a]
xs forall a. Integral a => a -> a -> a
`div` Int
2
pf :: String
pf :: String
pf = String
"%.3f"
header :: Int -> Int -> DistanceMeasure -> BL.ByteString
Int
n Int
m DistanceMeasure
d =
Int -> ByteString -> ByteString
alignLeft (Int
n forall a. Num a => a -> a -> a
+ Int
2) ByteString
"Tree 1"
forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
alignLeft (Int
n forall a. Num a => a -> a -> a
+ Int
2) ByteString
"Tree 2"
forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
alignRight
(Int
m forall a. Num a => a -> a -> a
+ Int
2)
(String -> ByteString
BL.pack forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show DistanceMeasure
d)
showTriplet ::
(PrintfArg a) => Int -> Int -> [String] -> (Int, Int, a) -> BL.ByteString
showTriplet :: forall a.
PrintfArg a =>
Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m [String]
args (Int
i, Int
j, a
d) = ByteString
i' forall a. Semigroup a => a -> a -> a
<> ByteString
j' forall a. Semigroup a => a -> a -> a
<> ByteString
d'
where
i' :: ByteString
i' = Int -> ByteString -> ByteString
alignLeft (Int
n forall a. Num a => a -> a -> a
+ Int
2) forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack ([String]
args forall a. [a] -> Int -> a
!! Int
i)
j' :: ByteString
j' = Int -> ByteString -> ByteString
alignLeft (Int
n forall a. Num a => a -> a -> a
+ Int
2) forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack ([String]
args forall a. [a] -> Int -> a
!! Int
j)
d' :: ByteString
d' = Int -> ByteString -> ByteString
alignRight (Int
m forall a. Num a => a -> a -> a
+ Int
2) forall a b. (a -> b) -> a -> b
$ String -> ByteString
BL.pack (forall r. PrintfType r => String -> r
printf String
pf a
d)
pairwise ::
(a -> a -> b) ->
[a] ->
[(Int, Int, b)]
pairwise :: forall a b. (a -> a -> b) -> [a] -> [(Int, Int, b)]
pairwise a -> a -> b
dist [a]
trs =
[ (Int
i, Int
j, a -> a -> b
dist a
x a
y)
| (Int
i : [Int]
is, a
x : [a]
xs) <- forall a b. [a] -> [b] -> [(a, b)]
zip (forall a. [a] -> [[a]]
tails [Int
0 ..]) (forall a. [a] -> [[a]]
tails [a]
trs),
(Int
j, a
y) <- forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
is [a]
xs
]
distance :: ELynx DistanceArguments ()
distance :: ELynx DistanceArguments ()
distance = do
DistanceArguments
l <- forall a. Environment a -> a
localArguments forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
let nwFormat :: NewickFormat
nwFormat = DistanceArguments -> NewickFormat
argsNewickFormat DistanceArguments
l
Handle
outH <- forall a. Reproducible a => String -> String -> ELynx a Handle
outHandle String
"results" String
".out"
let mname :: Maybe String
mname = DistanceArguments -> Maybe String
argsMasterTreeFile DistanceArguments
l
Maybe (Tree Phylo Name)
mtree <- case Maybe String
mname of
Maybe String
Nothing -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a. Maybe a
Nothing
Just String
f -> do
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$ String
"Read master tree from file: " forall a. Semigroup a => a -> a -> a
<> String
f forall a. Semigroup a => a -> a -> a
<> String
"."
Tree Phylo Name
t <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
nwFormat String
f
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Compute distances between all trees and master tree."
forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just Tree Phylo Name
t
let tfps :: [String]
tfps = DistanceArguments -> [String]
argsInFiles DistanceArguments
l
(Forest Phylo Name
trees, [String]
names) <- case [String]
tfps of
[] -> forall a. HasCallStack => String -> a
error String
"No tree input files given."
[String
tf] -> do
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Read trees from single file."
Forest Phylo Name
ts <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ NewickFormat -> String -> IO (Forest Phylo Name)
parseTrees NewickFormat
nwFormat String
tf
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show (forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
ts) forall a. Semigroup a => a -> a -> a
<> String
" trees found in file."
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Trees are indexed with integers."
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Phylo Name
ts, forall a b. (a -> b) -> [a] -> [b]
map forall a. Show a => a -> String
show [Int
0 .. forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
ts forall a. Num a => a -> a -> a
- Int
1])
[String]
_ -> do
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Read trees from files."
Forest Phylo Name
ts <- forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (NewickFormat -> String -> IO (Tree Phylo Name)
parseTree NewickFormat
nwFormat) [String]
tfps
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Trees are named according to their file names."
forall (m :: * -> *) a. Monad m => a -> m a
return (Forest Phylo Name
ts, [String]
tfps)
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (forall (t :: * -> *) a. Foldable t => t a -> Bool
null Forest Phylo Name
trees) (forall a. HasCallStack => String -> a
error String
"Not enough trees found in files.")
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when
(forall a. Maybe a -> Bool
isNothing Maybe (Tree Phylo Name)
mtree Bool -> Bool -> Bool
&& forall (t :: * -> *) a. Foldable t => t a -> Int
length Forest Phylo Name
trees forall a. Eq a => a -> a -> Bool
== Int
1)
(forall a. HasCallStack => String -> a
error String
"Not enough trees found in files.")
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logDebugS String
"The trees are:"
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
ByteString -> Logger e ()
logDebugB forall a b. (a -> b) -> a -> b
$ [ByteString] -> ByteString
BL.unlines forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map forall e a.
(HasMaybeLength e, HasMaybeSupport e, HasName a) =>
Tree e a -> ByteString
toNewick Forest Phylo Name
trees
let dist :: DistanceMeasure
dist = DistanceArguments -> DistanceMeasure
argsDistance DistanceArguments
l
case DistanceArguments -> DistanceMeasure
argsDistance DistanceArguments
l of
DistanceMeasure
Symmetric -> forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Use symmetric (Robinson-Foulds) distance."
IncompatibleSplit Support
val -> do
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Use incompatible split distance."
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS forall a b. (a -> b) -> a -> b
$
String
"Collapse nodes with support less than "
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show Support
val
forall a. [a] -> [a] -> [a]
++ String
"."
DistanceMeasure
BranchScore -> forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Use branch score distance."
let distanceMeasure' ::
Tree Phylo Name ->
Tree Phylo Name ->
Double
distanceMeasure' :: Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure' Tree Phylo Name
t1 Tree Phylo Name
t2 = forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ case DistanceMeasure
dist of
DistanceMeasure
Symmetric -> forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall a e1 e2.
Ord a =>
Tree e1 a -> Tree e2 a -> Either String Int
symmetric Tree Phylo Name
t1 Tree Phylo Name
t2
IncompatibleSplit Support
val ->
forall (p :: * -> * -> *) b c a.
Bifunctor p =>
(b -> c) -> p a b -> p a c
second forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$
forall a e1 e2.
(Show a, Ord a) =>
Tree e1 a -> Tree e2 a -> Either String Int
incompatibleSplits
(forall e a.
(Eq e, Eq a, HasSupport e) =>
Support -> Tree e a -> Tree e a
collapse Support
val forall a b. (a -> b) -> a -> b
$ forall e a. HasSupport e => Tree e a -> Tree e a
normalizeBranchSupport forall a b. (a -> b) -> a -> b
$ forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ forall e a.
HasMaybeSupport e =>
Tree e a -> Either String (Tree Support a)
toSupportTree Tree Phylo Name
t1)
(forall e a.
(Eq e, Eq a, HasSupport e) =>
Support -> Tree e a -> Tree e a
collapse Support
val forall a b. (a -> b) -> a -> b
$ forall e a. HasSupport e => Tree e a -> Tree e a
normalizeBranchSupport forall a b. (a -> b) -> a -> b
$ forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ forall e a.
HasMaybeSupport e =>
Tree e a -> Either String (Tree Support a)
toSupportTree Tree Phylo Name
t2)
DistanceMeasure
BranchScore ->
forall e1 e2 a.
(HasLength e1, HasLength e2, Ord a) =>
Tree e1 a -> Tree e2 a -> Either String Double
branchScore
(forall {a}. Tree Length a -> Tree Length a
normalizeF forall a b. (a -> b) -> a -> b
$ forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ forall e a.
HasMaybeLength e =>
Tree e a -> Either String (Tree Length a)
toLengthTree Tree Phylo Name
t1)
(forall {a}. Tree Length a -> Tree Length a
normalizeF forall a b. (a -> b) -> a -> b
$ forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ forall e a.
HasMaybeLength e =>
Tree e a -> Either String (Tree Length a)
toLengthTree Tree Phylo Name
t2)
where
normalizeF :: Tree Length a -> Tree Length a
normalizeF = if DistanceArguments -> Bool
argsNormalize DistanceArguments
l then forall e a. HasLength e => Tree e a -> Tree e a
normalizeBranchLengths else forall a. a -> a
id
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (DistanceArguments -> Bool
argsIntersect DistanceArguments
l) forall a b. (a -> b) -> a -> b
$
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Intersect trees before calculation of distances."
let distanceMeasure :: Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure =
if DistanceArguments -> Bool
argsIntersect DistanceArguments
l
then
( \Tree Phylo Name
t1 Tree Phylo Name
t2 -> case forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ forall e a.
(Semigroup e, Eq e, Ord a) =>
Forest e a -> Either String (Forest e a)
intersect [Tree Phylo Name
t1, Tree Phylo Name
t2] of
[Tree Phylo Name
t1', Tree Phylo Name
t2'] -> Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure' Tree Phylo Name
t1' Tree Phylo Name
t2'
Forest Phylo Name
_ -> forall a. HasCallStack => String -> a
error String
"distance: Could not intersect trees."
)
else Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure'
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (DistanceArguments -> Bool
argsNormalize DistanceArguments
l) forall a b. (a -> b) -> a -> b
$
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Normalize trees before calculation of distances."
let dsTriplets :: [(Int, Int, Double)]
dsTriplets = case Maybe (Tree Phylo Name)
mtree of
Maybe (Tree Phylo Name)
Nothing -> forall a b. (a -> a -> b) -> [a] -> [(Int, Int, b)]
pairwise Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure Forest Phylo Name
trees
Just Tree Phylo Name
masterTree -> [(Int
0, Int
i, Tree Phylo Name -> Tree Phylo Name -> Double
distanceMeasure Tree Phylo Name
masterTree Tree Phylo Name
t') | (Int
i, Tree Phylo Name
t') <- forall a b. [a] -> [b] -> [(a, b)]
zip [Int
1 ..] Forest Phylo Name
trees]
ds :: [Double]
ds = forall a b. (a -> b) -> [a] -> [b]
map (\(Int
_, Int
_, Double
x) -> Double
x) [(Int, Int, Double)]
dsTriplets
dsVec :: Vector Double
dsVec = forall a. Unbox a => [a] -> Vector a
V.fromList [Double]
ds
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$
Handle -> String -> IO ()
hPutStrLn Handle
outH forall a b. (a -> b) -> a -> b
$
String
"Summary statistics of "
forall a. [a] -> [a] -> [a]
++ forall a. Show a => a -> String
show DistanceMeasure
dist
forall a. [a] -> [a] -> [a]
++ String
" Distance:"
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$
Handle -> Text -> IO ()
T.hPutStrLn Handle
outH forall a b. (a -> b) -> a -> b
$
Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Mean: "
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
(forall r. PrintfType r => String -> r
printf String
pf (forall (v :: * -> *). Vector v Double => v Double -> Double
mean Vector Double
dsVec))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$
Handle -> Text -> IO ()
T.hPutStrLn Handle
outH forall a b. (a -> b) -> a -> b
$
Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Median: "
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
(forall r. PrintfType r => String -> r
printf String
pf (forall a. Ord a => [a] -> a
median [Double]
ds))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$
Handle -> Text -> IO ()
T.hPutStrLn Handle
outH forall a b. (a -> b) -> a -> b
$
Int -> Char -> Text -> Text
T.justifyLeft Int
10 Char
' ' Text
"Variance: "
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack
(forall r. PrintfType r => String -> r
printf String
pf (forall (v :: * -> *). Vector v Double => v Double -> Double
variance Vector Double
dsVec))
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless
(DistanceArguments -> Bool
argsSummaryStatistics DistanceArguments
l)
( do
let n :: Int
n = forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum forall a b. (a -> b) -> a -> b
$ Int
6 forall a. a -> [a] -> [a]
: forall a b. (a -> b) -> [a] -> [b]
map forall (t :: * -> *) a. Foldable t => t a -> Int
length [String]
names
m :: Int
m = forall (t :: * -> *) a. Foldable t => t a -> Int
length forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show DistanceMeasure
dist
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ Handle -> String -> IO ()
hPutStrLn Handle
outH String
""
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$ Handle -> ByteString -> IO ()
BL.hPutStrLn Handle
outH forall a b. (a -> b) -> a -> b
$ Int -> Int -> DistanceMeasure -> ByteString
header Int
n Int
m DistanceMeasure
dist
case Maybe String
mname of
Maybe String
Nothing ->
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$
Handle -> ByteString -> IO ()
BL.hPutStr Handle
outH forall a b. (a -> b) -> a -> b
$
[ByteString] -> ByteString
BL.unlines
(forall a b. (a -> b) -> [a] -> [b]
map (forall a.
PrintfArg a =>
Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m [String]
names) [(Int, Int, Double)]
dsTriplets)
Just String
mn ->
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift forall a b. (a -> b) -> a -> b
$
Handle -> ByteString -> IO ()
BL.hPutStr Handle
outH forall a b. (a -> b) -> a -> b
$
[ByteString] -> ByteString
BL.unlines
(forall a b. (a -> b) -> [a] -> [b]
map (forall a.
PrintfArg a =>
Int -> Int -> [String] -> (Int, Int, a) -> ByteString
showTriplet Int
n Int
m (String
mn forall a. a -> [a] -> [a]
: [String]
names)) [(Int, Int, Double)]
dsTriplets)
)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ Handle -> IO ()
hClose Handle
outH