-- | This module provides statistics needed for substitution matrices. It is a
-- very modest attempt to replicate some of the Blast statistics.

module Biobase.SubstMatrix.Statistics where

import Data.Vector.Unboxed (Unbox)
import Data.Vector.Fusion.Util
import Data.Vector.Fusion.Stream.Monadic as SM
import Debug.Trace

import Data.PrimitiveArray as PA
import           Biobase.Primary.Letter

import Biobase.SubstMatrix.Types



-- | estimate Blast lambda.
--
-- TODO use ExceptT

estimateLambda
  :: (Unbox s, Num s, Real s)
  => AASubstMat t s a b -> Double
{-# Inlinable estimateLambda #-}
estimateLambda :: AASubstMat t s a b -> Double
estimateLambda (AASubstMat Unboxed ((Z :. Letter AA a) :. Letter AA b) s
mat Double
_) = Integer -> Double -> Double -> Double -> Double
go Integer
1000 Double
1 Double
2 Double
0 where
  go :: Integer -> Double -> Double -> Double -> Double
go Integer
count Double
lambda Double
high Double
low
    | Integer
count Integer -> Integer -> Bool
forall a. Ord a => a -> a -> Bool
<= Integer
0 = [Char] -> Double
forall a. HasCallStack => [Char] -> a
error [Char]
"no convergence?!"
    | (Double
highDouble -> Double -> Double
forall a. Num a => a -> a -> a
-Double
low) Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
0.001 = Double
lambda
    | Double
s Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>  Double
1              = Integer -> Double -> Double -> Double -> Double
go (Integer
countInteger -> Integer -> Integer
forall a. Num a => a -> a -> a
-Integer
1) ((Double
lambdaDouble -> Double -> Double
forall a. Num a => a -> a -> a
+Double
low)Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
2)  Double
lambda Double
low
    | Double
s Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
<= Double
1              = Integer -> Double -> Double -> Double -> Double
go (Integer
countInteger -> Integer -> Integer
forall a. Num a => a -> a -> a
-Integer
1) ((Double
lambdaDouble -> Double -> Double
forall a. Num a => a -> a -> a
+Double
high)Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
2) Double
high   Double
lambda
    where
      -- get the rows and columns, needed to get the probs for each row/column right.
      (ZZ:..r':..c') = Unboxed ((Z :. Letter AA a) :. Letter AA b) s
-> LimitType ((Z :. Letter AA a) :. Letter AA b)
forall (arr :: * -> * -> *) sh elm.
PrimArrayOps arr sh elm =>
arr sh elm -> LimitType sh
PA.upperBound Unboxed ((Z :. Letter AA a) :. Letter AA b) s
mat
      r :: Double
r = Rational -> Double
forall a. Fractional a => Rational -> a
fromRational (Rational -> Double) -> Rational -> Double
forall a b. (a -> b) -> a -> b
$ Int -> Rational
forall a. Real a => a -> Rational
toRational (Int -> Rational) -> Int -> Rational
forall a b. (a -> b) -> a -> b
$ LimitType (Letter AA a) -> Int
forall i. Index i => LimitType i -> Int
size LimitType (Letter AA a)
r'
      c :: Double
c = Rational -> Double
forall a. Fractional a => Rational -> a
fromRational (Rational -> Double) -> Rational -> Double
forall a b. (a -> b) -> a -> b
$ Int -> Rational
forall a. Real a => a -> Rational
toRational (Int -> Rational) -> Int -> Rational
forall a b. (a -> b) -> a -> b
$ LimitType (Letter AA b) -> Int
forall i. Index i => LimitType i -> Int
size LimitType (Letter AA b)
c'
      -- sum of all scores
      s :: Double
s = Id Double -> Double
forall a. Id a -> a
unId (Id Double -> Double)
-> (Stream Id Double -> Id Double) -> Stream Id Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> Double -> Double)
-> Double -> Stream Id Double -> Id Double
forall (m :: * -> *) a b.
Monad m =>
(a -> b -> a) -> a -> Stream m b -> m a
SM.foldl' Double -> Double -> Double
forall a. Num a => a -> a -> a
(+) Double
0 (Stream Id Double -> Double) -> Stream Id Double -> Double
forall a b. (a -> b) -> a -> b
$ (((Z :. Letter AA a) :. Letter AA b, s) -> Double)
-> Stream Id ((Z :. Letter AA a) :. Letter AA b, s)
-> Stream Id Double
forall (m :: * -> *) a b.
Monad m =>
(a -> b) -> Stream m a -> Stream m b
SM.map ((Z :. Letter AA a) :. Letter AA b, s) -> Double
eachElem (Stream Id ((Z :. Letter AA a) :. Letter AA b, s)
 -> Stream Id Double)
-> Stream Id ((Z :. Letter AA a) :. Letter AA b, s)
-> Stream Id Double
forall a b. (a -> b) -> a -> b
$ Unboxed ((Z :. Letter AA a) :. Letter AA b) s
-> Stream Id ((Z :. Letter AA a) :. Letter AA b, s)
forall (m :: * -> *) (arr :: * -> * -> *) sh elm.
(Monad m, IndexStream sh, PrimArrayOps arr sh elm) =>
arr sh elm -> Stream m (sh, elm)
PA.assocsS Unboxed ((Z :. Letter AA a) :. Letter AA b) s
mat
      eachElem :: ((Z :. Letter AA a) :. Letter AA b, s) -> Double
eachElem (Z
Z:.Letter AA a
i:.Letter AA b
j, s
z) = (Double
1Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
r) Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Double
1Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
c) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double -> Double
forall a. Floating a => a -> a
exp (Double
lambda Double -> Double -> Double
forall a. Num a => a -> a -> a
* (Rational -> Double
forall a. Fractional a => Rational -> a
fromRational (Rational -> Double) -> Rational -> Double
forall a b. (a -> b) -> a -> b
$ s -> Rational
forall a. Real a => a -> Rational
toRational s
z))