{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module SLynx.Examine.Examine
( examineCmd,
)
where
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Trans.Reader
import qualified Data.ByteString.Lazy.Char8 as BL
import qualified Data.Matrix.Unboxed as MU
import qualified Data.Set as S
import qualified Data.Vector.Unboxed as V
import qualified ELynx.Alphabet.Alphabet as A
import qualified ELynx.Alphabet.Character as C
import qualified ELynx.Sequence.Alignment as M
import qualified ELynx.Sequence.Distance as D
import ELynx.Sequence.Divergence
import qualified ELynx.Sequence.Sequence as Seq
import ELynx.Tools.ByteString
import ELynx.Tools.ELynx
import ELynx.Tools.Environment
import ELynx.Tools.Logger
import qualified Numeric.LinearAlgebra as L
import SLynx.Examine.Options
import SLynx.Tools
import qualified Statistics.Sample as Sm
import System.IO
import Text.Printf
pRow :: String -> String -> BL.ByteString
pRow :: String -> String -> ByteString
pRow String
name String
val = Int -> ByteString -> ByteString
alignLeft Int
50 ByteString
n forall a. Semigroup a => a -> a -> a
<> Int -> ByteString -> ByteString
alignRight Int
10 ByteString
v
where
n :: ByteString
n = String -> ByteString
BL.pack String
name
v :: ByteString
v = String -> ByteString
BL.pack String
val
examineAlignment :: Bool -> M.Alignment -> BL.ByteString
examineAlignment :: Bool -> Alignment -> ByteString
examineAlignment Bool
perSiteFlag Alignment
a =
[ByteString] -> ByteString
BL.unlines
[ ByteString
"Sequences have equal length (multi sequence alignment, or single sequence).",
ByteString
"Number of columns in alignment:",
String -> String -> ByteString
pRow String
" Total:" forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show Int
aL,
String -> String -> ByteString
pRow String
" Constant:" forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show Int
nConstant,
String -> String -> ByteString
pRow String
" Constant (including gaps or unknowns):" forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show Int
nConstantSoft,
String -> String -> ByteString
pRow String
" Without gaps:" forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show (Alignment -> Int
M.length Alignment
aNoGaps),
String -> String -> ByteString
pRow String
" With standard characters only:" forall a b. (a -> b) -> a -> b
$
forall a. Show a => a -> String
show (Alignment -> Int
M.length Alignment
aOnlyStd),
ByteString
"",
String -> String -> ByteString
pRow String
"Total number of characters:" forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show Int
nTot,
String -> String -> ByteString
pRow String
"Standard (i.e., not extended IUPAC) characters:" forall a b. (a -> b) -> a -> b
$
forall a. Show a => a -> String
show (Int
nTot forall a. Num a => a -> a -> a
- Int
nIUPAC forall a. Num a => a -> a -> a
- Int
nGaps forall a. Num a => a -> a -> a
- Int
nUnknowns),
String -> String -> ByteString
pRow String
"Extended IUPAC characters:" forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show Int
nIUPAC,
String -> String -> ByteString
pRow String
"Gaps:" forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show Int
nGaps,
String -> String -> ByteString
pRow String
"Unknowns:" forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> String
show Int
nUnknowns,
ByteString
"",
String -> String -> ByteString
pRow String
"Percentage of standard characters:" forall a b. (a -> b) -> a -> b
$
forall r. PrintfType r => String -> r
printf String
"%2.2f" (Double
100.0 forall a. Num a => a -> a -> a
- Double
percentIUPAC forall a. Num a => a -> a -> a
- Double
percentGaps forall a. Num a => a -> a -> a
- Double
percentUnknowns),
String -> String -> ByteString
pRow String
"Percentage of extended IUPAC characters:" forall a b. (a -> b) -> a -> b
$
forall r. PrintfType r => String -> r
printf String
"%2.2f" Double
percentIUPAC,
String -> String -> ByteString
pRow String
"Percentage of gaps:" forall a b. (a -> b) -> a -> b
$ forall r. PrintfType r => String -> r
printf String
"%2.2f" Double
percentGaps,
String -> String -> ByteString
pRow String
"Percentage of unknowns:" forall a b. (a -> b) -> a -> b
$ forall r. PrintfType r => String -> r
printf String
"%2.2f" Double
percentUnknowns,
ByteString
"",
String -> ByteString
BL.pack String
"Distribution of characters:",
String -> ByteString
BL.pack forall a b. (a -> b) -> a -> b
$
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap ((forall a. a -> [a] -> [a]
: String
" ") forall b c a. (b -> c) -> (a -> b) -> a -> c
. Character -> Char
C.toChar) forall a b. (a -> b) -> a -> b
$
forall a. Set a -> [a]
S.toList forall a b. (a -> b) -> a -> b
$
AlphabetSpec -> Set Character
A.std forall a b. (a -> b) -> a -> b
$
Alphabet -> AlphabetSpec
A.alphabetSpec forall a b. (a -> b) -> a -> b
$
Alignment -> Alphabet
M.alphabet Alignment
a,
String -> ByteString
BL.pack forall a b. (a -> b) -> a -> b
$ [String] -> String
unwords forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (forall r. PrintfType r => String -> r
printf String
"%.3f") [Double]
charFreqs,
ByteString
"",
String -> ByteString
BL.pack String
"Pairwise hamming distances (per site):",
String -> String -> ByteString
pRow String
" Mean:" forall a b. (a -> b) -> a -> b
$ forall r. PrintfType r => String -> r
printf String
"%.3f" Double
hMean,
String -> String -> ByteString
pRow String
" Standard deviation:" forall a b. (a -> b) -> a -> b
$ forall r. PrintfType r => String -> r
printf String
"%.3f" forall a b. (a -> b) -> a -> b
$ forall a. Floating a => a -> a
sqrt Double
hVar,
String -> String -> ByteString
pRow String
" Minimum:" forall a b. (a -> b) -> a -> b
$ forall r. PrintfType r => String -> r
printf String
"%.3f" Double
hMin,
String -> String -> ByteString
pRow String
" Maximum:" forall a b. (a -> b) -> a -> b
$ forall r. PrintfType r => String -> r
printf String
"%.3f" Double
hMax,
ByteString
"",
ByteString
"Mean effective number of states (measured using entropy):",
String -> String -> ByteString
pRow String
"Across whole alignment:" forall a b. (a -> b) -> a -> b
$ forall r. PrintfType r => String -> r
printf String
"%.3f" Double
kEffMean,
String -> String -> ByteString
pRow String
"Across columns without gaps:" forall a b. (a -> b) -> a -> b
$ forall r. PrintfType r => String -> r
printf String
"%.3f" Double
kEffMeanNoGaps,
String -> String -> ByteString
pRow String
"Across columns without extended IUPAC characters:" forall a b. (a -> b) -> a -> b
$
forall r. PrintfType r => String -> r
printf String
"%.3f" Double
kEffMeanOnlyStd,
ByteString
"",
String -> ByteString
BL.pack String
"Mean effective number of states (measured using homoplasy):",
String -> String -> ByteString
pRow String
"Across whole alignment:" forall a b. (a -> b) -> a -> b
$ forall r. PrintfType r => String -> r
printf String
"%.3f" Double
kEffMeanHomo,
String -> String -> ByteString
pRow String
"Across columns without gaps:" forall a b. (a -> b) -> a -> b
$ forall r. PrintfType r => String -> r
printf String
"%.3f" Double
kEffMeanNoGapsHomo,
String -> String -> ByteString
pRow String
"Across columns without extended IUPAC characters:" forall a b. (a -> b) -> a -> b
$
forall r. PrintfType r => String -> r
printf String
"%.3f" Double
kEffMeanOnlyStdHomo,
ByteString
"",
ByteString
"Divergence matrix:"
]
forall a. Semigroup a => a -> a -> a
<> ByteString
perSiteBS
where
aL :: Int
aL = Alignment -> Int
M.length Alignment
a
nConstant :: Int
nConstant = Alignment -> Int
M.length forall a b. (a -> b) -> a -> b
$ Alignment -> Alignment
M.filterColsConstant Alignment
a
nConstantSoft :: Int
nConstantSoft = Alignment -> Int
M.length forall a b. (a -> b) -> a -> b
$ Alignment -> Alignment
M.filterColsConstantSoft Alignment
a
nTot :: Int
nTot = Alignment -> Int
M.length Alignment
a forall a. Num a => a -> a -> a
* Alignment -> Int
M.nSequences Alignment
a
nIUPAC :: Int
nIUPAC = Alignment -> Int
M.countIUPACChars Alignment
a
nGaps :: Int
nGaps = Alignment -> Int
M.countGaps Alignment
a
nUnknowns :: Int
nUnknowns = Alignment -> Int
M.countUnknowns Alignment
a
percentIUPAC :: Double
percentIUPAC = Double
100 forall a. Num a => a -> a -> a
* forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nIUPAC forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nTot :: Double
percentGaps :: Double
percentGaps = Double
100 forall a. Num a => a -> a -> a
* forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nGaps forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nTot :: Double
percentUnknowns :: Double
percentUnknowns = Double
100 forall a. Num a => a -> a -> a
* forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nUnknowns forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nTot :: Double
aNoGaps :: Alignment
aNoGaps = Alignment -> Alignment
M.filterColsNoGaps Alignment
a
aNoGapsFreq :: FrequencyData
aNoGapsFreq = Alignment -> FrequencyData
M.toFrequencyData Alignment
aNoGaps
aOnlyStd :: Alignment
aOnlyStd = Alignment -> Alignment
M.filterColsOnlyStd Alignment
aNoGaps
aOnlyStdFreq :: FrequencyData
aOnlyStdFreq = Alignment -> FrequencyData
M.toFrequencyData Alignment
aOnlyStd
charFreqsPerSite :: FrequencyData
charFreqsPerSite = Alignment -> FrequencyData
M.toFrequencyData Alignment
a
charFreqs :: [Double]
charFreqs = FrequencyData -> [Double]
M.distribution FrequencyData
charFreqsPerSite
seqs :: [Sequence]
seqs = Alignment -> [Sequence]
M.toSequences Alignment
a
normlz :: a -> a
normlz a
x = forall a b. (Integral a, Num b) => a -> b
fromIntegral a
x forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
aL
pairwiseHamming :: Vector Double
pairwiseHamming =
forall a. Unbox a => [a] -> Vector a
V.fromList
[ forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall {a} {a}. (Fractional a, Integral a) => a -> a
normlz forall a b. (a -> b) -> a -> b
$ Sequence -> Sequence -> Either String Int
D.hamming Sequence
x Sequence
y
| Sequence
x <- [Sequence]
seqs,
Sequence
y <- [Sequence]
seqs,
Sequence
x forall a. Eq a => a -> a -> Bool
/= Sequence
y
]
(Double
hMean, Double
hVar) = forall (v :: * -> *).
Vector v Double =>
v Double -> (Double, Double)
Sm.meanVariance Vector Double
pairwiseHamming
hMin :: Double
hMin = forall a. (Unbox a, Ord a) => Vector a -> a
V.minimum Vector Double
pairwiseHamming
hMax :: Double
hMax = forall a. (Unbox a, Ord a) => Vector a -> a
V.maximum Vector Double
pairwiseHamming
kEffs :: [Double]
kEffs = FrequencyData -> [Double]
M.kEffEntropy FrequencyData
charFreqsPerSite
kEffsNoGaps :: [Double]
kEffsNoGaps = FrequencyData -> [Double]
M.kEffEntropy FrequencyData
aNoGapsFreq
kEffsOnlyStd :: [Double]
kEffsOnlyStd = FrequencyData -> [Double]
M.kEffEntropy FrequencyData
aOnlyStdFreq
kEffMean :: Double
kEffMean = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double]
kEffs forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
kEffs)
kEffMeanNoGaps :: Double
kEffMeanNoGaps = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double]
kEffsNoGaps forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
kEffsNoGaps)
kEffMeanOnlyStd :: Double
kEffMeanOnlyStd = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double]
kEffsOnlyStd forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
kEffsOnlyStd)
kEffsHomo :: [Double]
kEffsHomo = FrequencyData -> [Double]
M.kEffHomoplasy FrequencyData
charFreqsPerSite
kEffsNoGapsHomo :: [Double]
kEffsNoGapsHomo = FrequencyData -> [Double]
M.kEffHomoplasy FrequencyData
aNoGapsFreq
kEffsOnlyStdHomo :: [Double]
kEffsOnlyStdHomo = FrequencyData -> [Double]
M.kEffHomoplasy FrequencyData
aOnlyStdFreq
kEffMeanHomo :: Double
kEffMeanHomo = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double]
kEffsHomo forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
kEffsHomo)
kEffMeanNoGapsHomo :: Double
kEffMeanNoGapsHomo =
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double]
kEffsNoGapsHomo forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
kEffsNoGapsHomo)
kEffMeanOnlyStdHomo :: Double
kEffMeanOnlyStdHomo =
forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum [Double]
kEffsOnlyStdHomo forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral (forall (t :: * -> *) a. Foldable t => t a -> Int
length [Double]
kEffsOnlyStdHomo)
perSiteBS :: ByteString
perSiteBS =
if Bool
perSiteFlag
then
[ByteString] -> ByteString
BL.unlines
[ String -> ByteString
BL.pack String
"Effective number of used states per site (measured using entropy):",
String -> ByteString
BL.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> String
show forall a b. (a -> b) -> a -> b
$ [Double]
kEffs
]
else ByteString
BL.empty
examine :: Bool -> [Seq.Sequence] -> BL.ByteString
examine :: Bool -> [Sequence] -> ByteString
examine Bool
perSiteFlag [Sequence]
ss =
[Sequence] -> ByteString
Seq.summarizeSequences [Sequence]
ss forall a. Semigroup a => a -> a -> a
<> case [Sequence] -> Either String Alignment
M.fromSequences [Sequence]
ss of
Left String
_ -> ByteString
BL.empty
Right Alignment
a -> String -> ByteString
BL.pack String
"\n" forall a. Semigroup a => a -> a -> a
<> Bool -> Alignment -> ByteString
examineAlignment Bool
perSiteFlag Alignment
a
tuples :: [a] -> [(a, a)]
tuples :: forall a. [a] -> [(a, a)]
tuples [] = []
tuples (a
x : [a]
xs) = forall a b. (a -> b) -> [a] -> [b]
map (a
x,) [a]
xs forall a. [a] -> [a] -> [a]
++ forall a. [a] -> [(a, a)]
tuples [a]
xs
writeDivergenceMatrix :: Handle -> (Seq.Sequence, Seq.Sequence, MU.Matrix Int) -> IO ()
writeDivergenceMatrix :: Handle -> (Sequence, Sequence, Matrix Int) -> IO ()
writeDivergenceMatrix Handle
h (Sequence
x, Sequence
y, Matrix Int
xs) = do
Handle -> ByteString -> IO ()
BL.hPutStrLn Handle
h forall a b. (a -> b) -> a -> b
$ ByteString
"> " forall a. Semigroup a => a -> a -> a
<> Sequence -> ByteString
Seq.name Sequence
x forall a. Semigroup a => a -> a -> a
<> ByteString
", " forall a. Semigroup a => a -> a -> a
<> Sequence -> ByteString
Seq.name Sequence
y
Handle -> String -> IO ()
hPutStr Handle
h forall a b. (a -> b) -> a -> b
$ Int -> Matrix Double -> String
L.dispf Int
0 Matrix Double
m
where
n :: Int
n = forall a. Context a => Matrix a -> Int
MU.rows Matrix Int
xs
m :: Matrix Double
m = Int -> [Double] -> Matrix Double
L.matrix Int
n forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map forall a b. (Integral a, Num b) => a -> b
fromIntegral forall a b. (a -> b) -> a -> b
$ forall a. Context a => Matrix a -> [a]
MU.toList Matrix Int
xs
computeDivergenceMatrices :: [Seq.Sequence] -> ELynx ExamineArguments ()
computeDivergenceMatrices :: [Sequence] -> ELynx ExamineArguments ()
computeDivergenceMatrices [Sequence]
ss = do
forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
String -> Logger e ()
logInfoS String
"Compute divergence matrices."
let xys :: [(Sequence, Sequence)]
xys = forall a. [a] -> [(a, a)]
tuples [Sequence]
ss
ds :: [(Sequence, Sequence, Matrix Int)]
ds = forall a b. (a -> b) -> [a] -> [b]
map (\(Sequence
x, Sequence
y) -> (Sequence
x, Sequence
y, forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either forall a. HasCallStack => String -> a
error forall a. a -> a
id forall a b. (a -> b) -> a -> b
$ Sequence -> Sequence -> Either String (Matrix Int)
divergence Sequence
x Sequence
y)) [(Sequence, Sequence)]
xys
Handle
h <- forall a. Reproducible a => String -> String -> ELynx a Handle
outHandle String
"divergence matrices" String
".div"
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. Handle -> (Sequence, Sequence, Matrix Int) -> IO ()
writeDivergenceMatrix Handle
h) [(Sequence, Sequence, Matrix Int)]
ds
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ Handle -> IO ()
hClose Handle
h
examineCmd :: ELynx ExamineArguments ()
examineCmd :: ELynx ExamineArguments ()
examineCmd = do
(ExamineArguments Alphabet
al String
inFile Bool
perSiteFlag Bool
divergenceFlag) <- forall a. Environment a -> a
localArguments forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) r. Monad m => ReaderT r m r
ask
[Sequence]
ss <- forall e.
(HasLock e, HasLogHandles e, HasVerbosity e) =>
Alphabet -> String -> Logger e [Sequence]
readSeqs Alphabet
al String
inFile
let result :: ByteString
result = Bool -> [Sequence] -> ByteString
examine Bool
perSiteFlag [Sequence]
ss
forall a.
Reproducible a =>
String -> ByteString -> String -> ELynx a ()
out String
"result of examination" ByteString
result String
".out"
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
divergenceFlag ([Sequence] -> ELynx ExamineArguments ()
computeDivergenceMatrices [Sequence]
ss)