{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE FlexibleContexts #-}
module Bio.Motif.Merge
    ( mergePWM
    , mergePWMWeighted
    , dilute
    , trim
    , mergeTree
    , mergeTreeWeighted
    , iterativeMerge
    , buildTree
    , cutTreeBy
    )where

import           AI.Clustering.Hierarchical    hiding (size)
import Control.Arrow (first)
import           Control.Monad                 (forM_, when)
import           Control.Monad.ST              (runST, ST)
import qualified Data.ByteString.Char8         as B
import           Data.List                     (dropWhileEnd)
import qualified Data.Matrix.Symmetric.Generic.Mutable as MSU
import qualified Data.Matrix.Unboxed           as MU
import           Data.Maybe
import qualified Data.Vector                   as V
import qualified Data.Vector.Mutable           as VM
import qualified Data.Vector.Unboxed           as U

import           Bio.Motif
import           Bio.Motif.Alignment
import           Bio.Utils.Functions           (kld)

mergePWM :: (PWM, PWM, Int) -> PWM
mergePWM :: (PWM, PWM, Int) -> PWM
mergePWM (PWM
m1, PWM
m2, Int
shift) | Int
shift Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 = Int -> Matrix Vector Double -> Matrix Vector Double -> PWM
merge Int
shift (PWM -> Matrix Vector Double
_mat PWM
m1) (Matrix Vector Double -> PWM) -> Matrix Vector Double -> PWM
forall a b. (a -> b) -> a -> b
$ PWM -> Matrix Vector Double
_mat PWM
m2
                         | Bool
otherwise = Int -> Matrix Vector Double -> Matrix Vector Double -> PWM
merge (-Int
shift) (PWM -> Matrix Vector Double
_mat PWM
m2) (Matrix Vector Double -> PWM) -> Matrix Vector Double -> PWM
forall a b. (a -> b) -> a -> b
$ PWM -> Matrix Vector Double
_mat PWM
m1
  where
    merge :: Int -> Matrix Vector Double -> Matrix Vector Double -> PWM
merge Int
s Matrix Vector Double
a Matrix Vector Double
b = Maybe Int -> Matrix Vector Double -> PWM
PWM Maybe Int
forall a. Maybe a
Nothing (Matrix Vector Double -> PWM) -> Matrix Vector Double -> PWM
forall a b. (a -> b) -> a -> b
$ [Vector Double] -> Matrix Vector Double
forall a. Context a => [Vector a] -> Matrix a
MU.fromRows ([Vector Double] -> Matrix Vector Double)
-> [Vector Double] -> Matrix Vector Double
forall a b. (a -> b) -> a -> b
$ Int -> [Vector Double]
loop Int
0
      where
        n1 :: Int
n1 = Matrix Vector Double -> Int
forall a. Context a => Matrix a -> Int
MU.rows Matrix Vector Double
a
        n2 :: Int
n2 = Matrix Vector Double -> Int
forall a. Context a => Matrix a -> Int
MU.rows Matrix Vector Double
b
        loop :: Int -> [Vector Double]
loop Int
i | Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 Bool -> Bool -> Bool
|| (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n1 Bool -> Bool -> Bool
&& Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n2) = Matrix Vector Double -> Int -> Vector Double
forall a. Context a => Matrix a -> Int -> Vector a
MU.takeRow Matrix Vector Double
a Int
i Vector Double -> [Vector Double] -> [Vector Double]
forall a. a -> [a] -> [a]
: Int -> [Vector Double]
loop (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
               | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n1 Bool -> Bool -> Bool
&& Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n2 = Vector Double -> Vector Double -> Vector Double
f (Matrix Vector Double -> Int -> Vector Double
forall a. Context a => Matrix a -> Int -> Vector a
MU.takeRow Matrix Vector Double
a Int
i) (Matrix Vector Double -> Int -> Vector Double
forall a. Context a => Matrix a -> Int -> Vector a
MU.takeRow Matrix Vector Double
b Int
i') Vector Double -> [Vector Double] -> [Vector Double]
forall a. a -> [a] -> [a]
: Int -> [Vector Double]
loop (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
               | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n1 Bool -> Bool -> Bool
&& Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n2 = Matrix Vector Double -> Int -> Vector Double
forall a. Context a => Matrix a -> Int -> Vector a
MU.takeRow Matrix Vector Double
b Int
i' Vector Double -> [Vector Double] -> [Vector Double]
forall a. a -> [a] -> [a]
: Int -> [Vector Double]
loop (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
               | Bool
otherwise = []
          where
            i' :: Int
i' = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
s
            f :: Vector Double -> Vector Double -> Vector Double
f = (Double -> Double -> Double)
-> Vector Double -> Vector Double -> Vector Double
forall a b c.
(Unbox a, Unbox b, Unbox c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
U.zipWith (\Double
x Double
y -> (Double
xDouble -> Double -> Double
forall a. Num a => a -> a -> a
+Double
y)Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
2)

mergePWMWeighted :: (PWM, [Int])  -- ^ pwm and weights at each position
                 -> (PWM, [Int])
                 -> Int                  -- ^ shift
                 -> (PWM, [Int])
mergePWMWeighted :: (PWM, [Int]) -> (PWM, [Int]) -> Int -> (PWM, [Int])
mergePWMWeighted (PWM, [Int])
m1 (PWM, [Int])
m2 Int
shift
    | Int
shift Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
0 = Int
-> (Matrix Vector Double, [Int])
-> (Matrix Vector Double, [Int])
-> (PWM, [Int])
forall b.
Integral b =>
Int
-> (Matrix Vector Double, [b])
-> (Matrix Vector Double, [b])
-> (PWM, [b])
merge Int
shift ((PWM -> Matrix Vector Double)
-> (PWM, [Int]) -> (Matrix Vector Double, [Int])
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first PWM -> Matrix Vector Double
_mat (PWM, [Int])
m1) ((Matrix Vector Double, [Int]) -> (PWM, [Int]))
-> (Matrix Vector Double, [Int]) -> (PWM, [Int])
forall a b. (a -> b) -> a -> b
$ (PWM -> Matrix Vector Double)
-> (PWM, [Int]) -> (Matrix Vector Double, [Int])
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first PWM -> Matrix Vector Double
_mat (PWM, [Int])
m2
    | Bool
otherwise = Int
-> (Matrix Vector Double, [Int])
-> (Matrix Vector Double, [Int])
-> (PWM, [Int])
forall b.
Integral b =>
Int
-> (Matrix Vector Double, [b])
-> (Matrix Vector Double, [b])
-> (PWM, [b])
merge (-Int
shift) ((PWM -> Matrix Vector Double)
-> (PWM, [Int]) -> (Matrix Vector Double, [Int])
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first PWM -> Matrix Vector Double
_mat (PWM, [Int])
m2) ((Matrix Vector Double, [Int]) -> (PWM, [Int]))
-> (Matrix Vector Double, [Int]) -> (PWM, [Int])
forall a b. (a -> b) -> a -> b
$ (PWM -> Matrix Vector Double)
-> (PWM, [Int]) -> (Matrix Vector Double, [Int])
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first PWM -> Matrix Vector Double
_mat (PWM, [Int])
m1

  where
    merge :: Int
-> (Matrix Vector Double, [b])
-> (Matrix Vector Double, [b])
-> (PWM, [b])
merge Int
s (Matrix Vector Double
p1,[b]
w1) (Matrix Vector Double
p2,[b]
w2) = ([Vector Double] -> PWM) -> ([Vector Double], [b]) -> (PWM, [b])
forall (a :: * -> * -> *) b c d.
Arrow a =>
a b c -> a (b, d) (c, d)
first (Maybe Int -> Matrix Vector Double -> PWM
PWM Maybe Int
forall a. Maybe a
Nothing (Matrix Vector Double -> PWM)
-> ([Vector Double] -> Matrix Vector Double)
-> [Vector Double]
-> PWM
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Vector Double] -> Matrix Vector Double
forall a. Context a => [Vector a] -> Matrix a
MU.fromRows) (([Vector Double], [b]) -> (PWM, [b]))
-> ([Vector Double], [b]) -> (PWM, [b])
forall a b. (a -> b) -> a -> b
$ [(Vector Double, b)] -> ([Vector Double], [b])
forall a b. [(a, b)] -> ([a], [b])
unzip ([(Vector Double, b)] -> ([Vector Double], [b]))
-> [(Vector Double, b)] -> ([Vector Double], [b])
forall a b. (a -> b) -> a -> b
$ Int -> [(Vector Double, b)]
loop Int
0
      where
        a :: Vector (Vector Double, b)
a = [(Vector Double, b)] -> Vector (Vector Double, b)
forall a. [a] -> Vector a
V.fromList ([(Vector Double, b)] -> Vector (Vector Double, b))
-> [(Vector Double, b)] -> Vector (Vector Double, b)
forall a b. (a -> b) -> a -> b
$ [Vector Double] -> [b] -> [(Vector Double, b)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Matrix Vector Double -> [Vector Double]
forall a. Context a => Matrix a -> [Vector a]
MU.toRows Matrix Vector Double
p1) [b]
w1
        b :: Vector (Vector Double, b)
b = [(Vector Double, b)] -> Vector (Vector Double, b)
forall a. [a] -> Vector a
V.fromList ([(Vector Double, b)] -> Vector (Vector Double, b))
-> [(Vector Double, b)] -> Vector (Vector Double, b)
forall a b. (a -> b) -> a -> b
$ [Vector Double] -> [b] -> [(Vector Double, b)]
forall a b. [a] -> [b] -> [(a, b)]
zip (Matrix Vector Double -> [Vector Double]
forall a. Context a => Matrix a -> [Vector a]
MU.toRows Matrix Vector Double
p2) [b]
w2
        n1 :: Int
n1 = Vector (Vector Double, b) -> Int
forall a. Vector a -> Int
V.length Vector (Vector Double, b)
a
        n2 :: Int
n2 = Vector (Vector Double, b) -> Int
forall a. Vector a -> Int
V.length Vector (Vector Double, b)
b
        loop :: Int -> [(Vector Double, b)]
loop Int
i | Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 Bool -> Bool -> Bool
|| (Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n1 Bool -> Bool -> Bool
&& Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n2) = Vector (Vector Double, b)
a Vector (Vector Double, b) -> Int -> (Vector Double, b)
forall a. Vector a -> Int -> a
V.! Int
i (Vector Double, b) -> [(Vector Double, b)] -> [(Vector Double, b)]
forall a. a -> [a] -> [a]
: Int -> [(Vector Double, b)]
loop (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
               | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n1 Bool -> Bool -> Bool
&& Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n2 = (Vector Double, b) -> (Vector Double, b) -> (Vector Double, b)
forall c b.
(Unbox c, Fractional c, Integral b) =>
(Vector c, b) -> (Vector c, b) -> (Vector c, b)
f (Vector (Vector Double, b)
a Vector (Vector Double, b) -> Int -> (Vector Double, b)
forall a. Vector a -> Int -> a
V.! Int
i) (Vector (Vector Double, b)
b Vector (Vector Double, b) -> Int -> (Vector Double, b)
forall a. Vector a -> Int -> a
V.! Int
i') (Vector Double, b) -> [(Vector Double, b)] -> [(Vector Double, b)]
forall a. a -> [a] -> [a]
: Int -> [(Vector Double, b)]
loop (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
               | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n1 Bool -> Bool -> Bool
&& Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n2 = Vector (Vector Double, b)
b Vector (Vector Double, b) -> Int -> (Vector Double, b)
forall a. Vector a -> Int -> a
V.! Int
i' (Vector Double, b) -> [(Vector Double, b)] -> [(Vector Double, b)]
forall a. a -> [a] -> [a]
: Int -> [(Vector Double, b)]
loop (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
               | Bool
otherwise = []
          where
            i' :: Int
i' = Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
s
            f :: (Vector c, b) -> (Vector c, b) -> (Vector c, b)
f (Vector c
xs, b
wx) (Vector c
ys, b
wy) = ((c -> c -> c) -> Vector c -> Vector c -> Vector c
forall a b c.
(Unbox a, Unbox b, Unbox c) =>
(a -> b -> c) -> Vector a -> Vector b -> Vector c
U.zipWith (\c
x c
y ->
                (b -> c
forall a b. (Integral a, Num b) => a -> b
fromIntegral b
wx c -> c -> c
forall a. Num a => a -> a -> a
* c
x c -> c -> c
forall a. Num a => a -> a -> a
+ b -> c
forall a b. (Integral a, Num b) => a -> b
fromIntegral b
wy c -> c -> c
forall a. Num a => a -> a -> a
* c
y) c -> c -> c
forall a. Fractional a => a -> a -> a
/
                b -> c
forall a b. (Integral a, Num b) => a -> b
fromIntegral (b
wx b -> b -> b
forall a. Num a => a -> a -> a
+ b
wy)) Vector c
xs Vector c
ys, b
wx b -> b -> b
forall a. Num a => a -> a -> a
+ b
wy)

-- | dilute positions in a PWM that are associated with low weights
dilute :: (PWM, [Int]) -> PWM
dilute :: (PWM, [Int]) -> PWM
dilute (PWM
pwm, [Int]
ws) = Maybe Int -> Matrix Vector Double -> PWM
PWM Maybe Int
forall a. Maybe a
Nothing (Matrix Vector Double -> PWM) -> Matrix Vector Double -> PWM
forall a b. (a -> b) -> a -> b
$ [Vector Double] -> Matrix Vector Double
forall a. Context a => [Vector a] -> Matrix a
MU.fromRows ([Vector Double] -> Matrix Vector Double)
-> [Vector Double] -> Matrix Vector Double
forall a b. (a -> b) -> a -> b
$ (Int -> Vector Double -> Vector Double)
-> [Int] -> [Vector Double] -> [Vector Double]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith Int -> Vector Double -> Vector Double
forall b. (Unbox b, Fractional b) => Int -> Vector b -> Vector b
f [Int]
ws ([Vector Double] -> [Vector Double])
-> [Vector Double] -> [Vector Double]
forall a b. (a -> b) -> a -> b
$ Matrix Vector Double -> [Vector Double]
forall a. Context a => Matrix a -> [Vector a]
MU.toRows (Matrix Vector Double -> [Vector Double])
-> Matrix Vector Double -> [Vector Double]
forall a b. (a -> b) -> a -> b
$ PWM -> Matrix Vector Double
_mat PWM
pwm
  where
    f :: Int -> Vector b -> Vector b
f Int
w Vector b
r | Int
w Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n = let d :: b
d = Int -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> b) -> Int -> b
forall a b. (a -> b) -> a -> b
$ Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
w
                    in (b -> b) -> Vector b -> Vector b
forall a b. (Unbox a, Unbox b) => (a -> b) -> Vector a -> Vector b
U.map (\b
x -> (Int -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
w b -> b -> b
forall a. Num a => a -> a -> a
* b
x b -> b -> b
forall a. Num a => a -> a -> a
+ b
0.25 b -> b -> b
forall a. Num a => a -> a -> a
* b
d) b -> b -> b
forall a. Fractional a => a -> a -> a
/ Int -> b
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n) Vector b
r
          | Bool
otherwise = Vector b
r
    n :: Int
n = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum [Int]
ws
{-# INLINE dilute #-}

trim :: Bkgd -> Double -> PWM -> PWM
trim :: Bkgd -> Double -> PWM -> PWM
trim (BG (Double
a,Double
c,Double
g,Double
t)) Double
cutoff PWM
pwm = Maybe Int -> Matrix Vector Double -> PWM
PWM Maybe Int
forall a. Maybe a
Nothing (Matrix Vector Double -> PWM) -> Matrix Vector Double -> PWM
forall a b. (a -> b) -> a -> b
$ [Vector Double] -> Matrix Vector Double
forall a. Context a => [Vector a] -> Matrix a
MU.fromRows ([Vector Double] -> Matrix Vector Double)
-> [Vector Double] -> Matrix Vector Double
forall a b. (a -> b) -> a -> b
$ (Vector Double -> Bool) -> [Vector Double] -> [Vector Double]
forall a. (a -> Bool) -> [a] -> [a]
dropWhileEnd Vector Double -> Bool
f ([Vector Double] -> [Vector Double])
-> [Vector Double] -> [Vector Double]
forall a b. (a -> b) -> a -> b
$
    (Vector Double -> Bool) -> [Vector Double] -> [Vector Double]
forall a. (a -> Bool) -> [a] -> [a]
dropWhile Vector Double -> Bool
f [Vector Double]
rs
  where
    f :: Vector Double -> Bool
f Vector Double
x = Vector Double -> Vector Double -> Double
forall (v :: * -> *).
(Vector v Double, Vector v (Double, Double)) =>
v Double -> v Double -> Double
kld Vector Double
x Vector Double
bg Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
cutoff
    rs :: [Vector Double]
rs = Matrix Vector Double -> [Vector Double]
forall a. Context a => Matrix a -> [Vector a]
MU.toRows (Matrix Vector Double -> [Vector Double])
-> Matrix Vector Double -> [Vector Double]
forall a b. (a -> b) -> a -> b
$ PWM -> Matrix Vector Double
_mat PWM
pwm
    bg :: Vector Double
bg = [Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
U.fromList [Double
a,Double
c,Double
g,Double
t]
{-# INLINE trim #-}

mergeTree :: AlignFn -> Dendrogram Motif -> PWM
mergeTree :: AlignFn -> Dendrogram Motif -> PWM
mergeTree AlignFn
align Dendrogram Motif
t = case Dendrogram Motif
t of
    Branch Int
_ Double
_ Dendrogram Motif
left Dendrogram Motif
right -> PWM -> PWM -> PWM
f (AlignFn -> Dendrogram Motif -> PWM
mergeTree AlignFn
align Dendrogram Motif
left) (PWM -> PWM) -> PWM -> PWM
forall a b. (a -> b) -> a -> b
$ AlignFn -> Dendrogram Motif -> PWM
mergeTree AlignFn
align Dendrogram Motif
right
    Leaf Motif
a -> Motif -> PWM
_pwm Motif
a
  where
    f :: PWM -> PWM -> PWM
f PWM
a PWM
b | Bool
isSame = (PWM, PWM, Int) -> PWM
mergePWM (PWM
a, PWM
b, Int
i)
          | Bool
otherwise = (PWM, PWM, Int) -> PWM
mergePWM (PWM
a, PWM -> PWM
rcPWM PWM
b, Int
i)
      where (Double
_, (Bool
isSame, Int
i)) = AlignFn
align PWM
a PWM
b

mergeTreeWeighted :: AlignFn -> Dendrogram Motif -> (PWM, [Int])
mergeTreeWeighted :: AlignFn -> Dendrogram Motif -> (PWM, [Int])
mergeTreeWeighted AlignFn
align Dendrogram Motif
t = case Dendrogram Motif
t of
    Branch Int
_ Double
_ Dendrogram Motif
left Dendrogram Motif
right -> (PWM, [Int]) -> (PWM, [Int]) -> (PWM, [Int])
f (AlignFn -> Dendrogram Motif -> (PWM, [Int])
mergeTreeWeighted AlignFn
align Dendrogram Motif
left) ((PWM, [Int]) -> (PWM, [Int])) -> (PWM, [Int]) -> (PWM, [Int])
forall a b. (a -> b) -> a -> b
$
        AlignFn -> Dendrogram Motif -> (PWM, [Int])
mergeTreeWeighted AlignFn
align Dendrogram Motif
right
    Leaf Motif
a -> (Motif -> PWM
_pwm Motif
a, Int -> Int -> [Int]
forall a. Int -> a -> [a]
replicate (PWM -> Int
size (PWM -> Int) -> PWM -> Int
forall a b. (a -> b) -> a -> b
$ Motif -> PWM
_pwm Motif
a) Int
1)
  where
    f :: (PWM, [Int]) -> (PWM, [Int]) -> (PWM, [Int])
f (PWM
a,[Int]
w1) (PWM
b,[Int]
w2)
        | Bool
isSame = (PWM, [Int]) -> (PWM, [Int]) -> Int -> (PWM, [Int])
mergePWMWeighted (PWM
a,[Int]
w1) (PWM
b,[Int]
w2) Int
i
        | Bool
otherwise = (PWM, [Int]) -> (PWM, [Int]) -> Int -> (PWM, [Int])
mergePWMWeighted (PWM
a,[Int]
w1) (PWM -> PWM
rcPWM PWM
b, [Int] -> [Int]
forall a. [a] -> [a]
reverse [Int]
w2) Int
i
      where (Double
_, (Bool
isSame, Int
i)) = AlignFn
align PWM
a PWM
b
{-# INLINE mergeTreeWeighted #-}

iterativeMerge :: AlignFn
               -> Double    -- cutoff
               -> [Motif]   -- ^ Motifs to be merged. Motifs must have unique name.
               -> [([B.ByteString], PWM, [Int])]
iterativeMerge :: AlignFn -> Double -> [Motif] -> [([ByteString], PWM, [Int])]
iterativeMerge AlignFn
align Double
th [Motif]
motifs = (forall s. ST s [([ByteString], PWM, [Int])])
-> [([ByteString], PWM, [Int])]
forall a. (forall s. ST s a) -> a
runST ((forall s. ST s [([ByteString], PWM, [Int])])
 -> [([ByteString], PWM, [Int])])
-> (forall s. ST s [([ByteString], PWM, [Int])])
-> [([ByteString], PWM, [Int])]
forall a b. (a -> b) -> a -> b
$ do
    MVector s (Maybe ([ByteString], PWM, [Int]))
motifs' <- Vector (Maybe ([ByteString], PWM, [Int]))
-> ST
     s (MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int])))
forall (m :: * -> *) a.
PrimMonad m =>
Vector a -> m (MVector (PrimState m) a)
V.unsafeThaw (Vector (Maybe ([ByteString], PWM, [Int]))
 -> ST
      s (MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))))
-> Vector (Maybe ([ByteString], PWM, [Int]))
-> ST
     s (MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int])))
forall a b. (a -> b) -> a -> b
$ [Maybe ([ByteString], PWM, [Int])]
-> Vector (Maybe ([ByteString], PWM, [Int]))
forall a. [a] -> Vector a
V.fromList ([Maybe ([ByteString], PWM, [Int])]
 -> Vector (Maybe ([ByteString], PWM, [Int])))
-> [Maybe ([ByteString], PWM, [Int])]
-> Vector (Maybe ([ByteString], PWM, [Int]))
forall a b. (a -> b) -> a -> b
$ ((Motif -> Maybe ([ByteString], PWM, [Int]))
 -> [Motif] -> [Maybe ([ByteString], PWM, [Int])])
-> [Motif]
-> (Motif -> Maybe ([ByteString], PWM, [Int]))
-> [Maybe ([ByteString], PWM, [Int])]
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Motif -> Maybe ([ByteString], PWM, [Int]))
-> [Motif] -> [Maybe ([ByteString], PWM, [Int])]
forall a b. (a -> b) -> [a] -> [b]
map [Motif]
motifs ((Motif -> Maybe ([ByteString], PWM, [Int]))
 -> [Maybe ([ByteString], PWM, [Int])])
-> (Motif -> Maybe ([ByteString], PWM, [Int]))
-> [Maybe ([ByteString], PWM, [Int])]
forall a b. (a -> b) -> a -> b
$ \Motif
x ->
        ([ByteString], PWM, [Int]) -> Maybe ([ByteString], PWM, [Int])
forall a. a -> Maybe a
Just ([Motif -> ByteString
_name Motif
x], Motif -> PWM
_pwm Motif
x, Int -> Int -> [Int]
forall a. Int -> a -> [a]
replicate (PWM -> Int
size (PWM -> Int) -> PWM -> Int
forall a b. (a -> b) -> a -> b
$ Motif -> PWM
_pwm Motif
x) Int
1)

    let n :: Int
n = MVector s (Maybe ([ByteString], PWM, [Int])) -> Int
forall s a. MVector s a -> Int
VM.length MVector s (Maybe ([ByteString], PWM, [Int]))
motifs'
        iter :: m v (PrimState (ST s)) (Maybe (Double, (Bool, Int))) -> ST s ()
iter m v (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
mat = do
            -- retrieve the minimum value
            ((Int
i, Int
j), (Double
d, (Bool
isSame, Int
pos))) <- ((Int, Int), (Double, (Bool, Int)))
-> Int -> Int -> ST s ((Int, Int), (Double, (Bool, Int)))
loop ((-Int
1,-Int
1), (Double
1Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
0, (Bool, Int)
forall a. HasCallStack => a
undefined)) Int
0 Int
1
            if Double
d Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
th
                then do
                    Just ([ByteString]
nm1, PWM
pwm1, [Int]
w1) <- MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
-> Int -> ST s (Maybe ([ByteString], PWM, [Int]))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> m a
VM.unsafeRead MVector s (Maybe ([ByteString], PWM, [Int]))
MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
motifs' Int
i
                    Just ([ByteString]
nm2, PWM
pwm2, [Int]
w2) <- MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
-> Int -> ST s (Maybe ([ByteString], PWM, [Int]))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> m a
VM.unsafeRead MVector s (Maybe ([ByteString], PWM, [Int]))
MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
motifs' Int
j
                    let merged :: ([ByteString], PWM, [Int])
merged = ([ByteString]
nm1 [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString]
nm2, PWM
pwm', [Int]
w')
                        (PWM
pwm',[Int]
w') | Bool
isSame = (PWM, [Int]) -> (PWM, [Int]) -> Int -> (PWM, [Int])
mergePWMWeighted (PWM
pwm1, [Int]
w1) (PWM
pwm2, [Int]
w2) Int
pos
                                  | Bool
otherwise = (PWM, [Int]) -> (PWM, [Int]) -> Int -> (PWM, [Int])
mergePWMWeighted (PWM
pwm1, [Int]
w1)
                                              (PWM -> PWM
rcPWM (PWM -> PWM) -> PWM -> PWM
forall a b. (a -> b) -> a -> b
$ PWM
pwm2, [Int] -> [Int]
forall a. [a] -> [a]
reverse [Int]
w2) Int
pos

                    -- update
                    [Int] -> (Int -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0..Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1] ((Int -> ST s ()) -> ST s ()) -> (Int -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \Int
i' -> m v (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
-> (Int, Int) -> Maybe (Double, (Bool, Int)) -> ST s ()
forall (m :: (* -> * -> *) -> * -> * -> *) (v :: * -> * -> *) a
       (s :: * -> *).
(MMatrix m v a, PrimMonad s) =>
m v (PrimState s) a -> (Int, Int) -> a -> s ()
MSU.unsafeWrite m v (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
mat (Int
i',Int
j) Maybe (Double, (Bool, Int))
forall a. Maybe a
Nothing
                    MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
-> Int -> Maybe ([ByteString], PWM, [Int]) -> ST s ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.unsafeWrite MVector s (Maybe ([ByteString], PWM, [Int]))
MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
motifs' Int
i (Maybe ([ByteString], PWM, [Int]) -> ST s ())
-> Maybe ([ByteString], PWM, [Int]) -> ST s ()
forall a b. (a -> b) -> a -> b
$ ([ByteString], PWM, [Int]) -> Maybe ([ByteString], PWM, [Int])
forall a. a -> Maybe a
Just ([ByteString], PWM, [Int])
merged
                    MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
-> Int -> Maybe ([ByteString], PWM, [Int]) -> ST s ()
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> a -> m ()
VM.unsafeWrite MVector s (Maybe ([ByteString], PWM, [Int]))
MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
motifs' Int
j Maybe ([ByteString], PWM, [Int])
forall a. Maybe a
Nothing
                    [Int] -> (Int -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0..Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1] ((Int -> ST s ()) -> ST s ()) -> (Int -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \Int
j' -> Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
/= Int
j') (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
                        Maybe ([ByteString], PWM, [Int])
x <- MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
-> Int -> ST s (Maybe ([ByteString], PWM, [Int]))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> m a
VM.unsafeRead MVector s (Maybe ([ByteString], PWM, [Int]))
MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
motifs' Int
j'
                        case Maybe ([ByteString], PWM, [Int])
x of
                            Just ([ByteString]
_, PWM
pwm2',[Int]
_) -> do
                                let ali :: Maybe (Double, (Bool, Int))
ali | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
j' = (Double, (Bool, Int)) -> Maybe (Double, (Bool, Int))
forall a. a -> Maybe a
Just ((Double, (Bool, Int)) -> Maybe (Double, (Bool, Int)))
-> (Double, (Bool, Int)) -> Maybe (Double, (Bool, Int))
forall a b. (a -> b) -> a -> b
$ AlignFn
align PWM
pwm' PWM
pwm2'
                                        | Bool
otherwise = (Double, (Bool, Int)) -> Maybe (Double, (Bool, Int))
forall a. a -> Maybe a
Just ((Double, (Bool, Int)) -> Maybe (Double, (Bool, Int)))
-> (Double, (Bool, Int)) -> Maybe (Double, (Bool, Int))
forall a b. (a -> b) -> a -> b
$ AlignFn
align PWM
pwm2' PWM
pwm'
                                m v (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
-> (Int, Int) -> Maybe (Double, (Bool, Int)) -> ST s ()
forall (m :: (* -> * -> *) -> * -> * -> *) (v :: * -> * -> *) a
       (s :: * -> *).
(MMatrix m v a, PrimMonad s) =>
m v (PrimState s) a -> (Int, Int) -> a -> s ()
MSU.unsafeWrite m v (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
mat (Int
i,Int
j') Maybe (Double, (Bool, Int))
ali
                            Maybe ([ByteString], PWM, [Int])
_ -> () -> ST s ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
                    m v (PrimState (ST s)) (Maybe (Double, (Bool, Int))) -> ST s ()
iter m v (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
mat
                else () -> ST s ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
          where
            loop :: ((Int, Int), (Double, (Bool, Int)))
-> Int -> Int -> ST s ((Int, Int), (Double, (Bool, Int)))
loop ((Int
i_min, Int
j_min), (Double, (Bool, Int))
d_min) Int
i Int
j
                | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n = ((Int, Int), (Double, (Bool, Int)))
-> ST s ((Int, Int), (Double, (Bool, Int)))
forall (m :: * -> *) a. Monad m => a -> m a
return ((Int
i_min, Int
j_min), (Double, (Bool, Int))
d_min)
                | Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n = ((Int, Int), (Double, (Bool, Int)))
-> Int -> Int -> ST s ((Int, Int), (Double, (Bool, Int)))
loop ((Int
i_min, Int
j_min), (Double, (Bool, Int))
d_min) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
2)
                | Bool
otherwise = do
                    Maybe (Double, (Bool, Int))
x <- m v (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
-> (Int, Int) -> ST s (Maybe (Double, (Bool, Int)))
forall (m :: (* -> * -> *) -> * -> * -> *) (v :: * -> * -> *) a
       (s :: * -> *).
(MMatrix m v a, PrimMonad s) =>
m v (PrimState s) a -> (Int, Int) -> s a
MSU.unsafeRead m v (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
mat (Int
i,Int
j)
                    case Maybe (Double, (Bool, Int))
x of
                        Just (Double, (Bool, Int))
d -> if (Double, (Bool, Int)) -> Double
forall a b. (a, b) -> a
fst (Double, (Bool, Int))
d Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< (Double, (Bool, Int)) -> Double
forall a b. (a, b) -> a
fst (Double, (Bool, Int))
d_min
                            then ((Int, Int), (Double, (Bool, Int)))
-> Int -> Int -> ST s ((Int, Int), (Double, (Bool, Int)))
loop ((Int
i,Int
j), (Double, (Bool, Int))
d) Int
i (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                            else ((Int, Int), (Double, (Bool, Int)))
-> Int -> Int -> ST s ((Int, Int), (Double, (Bool, Int)))
loop ((Int
i_min, Int
j_min), (Double, (Bool, Int))
d_min) Int
i (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
                        Maybe (Double, (Bool, Int))
_ -> ((Int, Int), (Double, (Bool, Int)))
-> Int -> Int -> ST s ((Int, Int), (Double, (Bool, Int)))
loop ((Int
i_min, Int
j_min), (Double, (Bool, Int))
d_min) Int
i (Int
jInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

    -- initialization
    SymMMatrix MVector s (Maybe (Double, (Bool, Int)))
mat <- (Int, Int)
-> Maybe (Double, (Bool, Int))
-> ST
     s
     (SymMMatrix
        MVector (PrimState (ST s)) (Maybe (Double, (Bool, Int))))
forall (m :: (* -> * -> *) -> * -> * -> *) (v :: * -> * -> *) a
       (s :: * -> *).
(MMatrix m v a, PrimMonad s) =>
(Int, Int) -> a -> s (m v (PrimState s) a)
MSU.replicate (Int
n,Int
n) Maybe (Double, (Bool, Int))
forall a. Maybe a
Nothing :: ST s (MSU.SymMMatrix VM.MVector s (Maybe (Double, (Bool, Int))))
    [Int] -> (Int -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
0..Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1] ((Int -> ST s ()) -> ST s ()) -> (Int -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \Int
i -> [Int] -> (Int -> ST s ()) -> ST s ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1 .. Int
nInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1] ((Int -> ST s ()) -> ST s ()) -> (Int -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \Int
j -> do
        Just ([ByteString]
_, PWM
pwm1, [Int]
_) <- MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
-> Int -> ST s (Maybe ([ByteString], PWM, [Int]))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> m a
VM.unsafeRead MVector s (Maybe ([ByteString], PWM, [Int]))
MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
motifs' Int
i
        Just ([ByteString]
_, PWM
pwm2, [Int]
_) <- MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
-> Int -> ST s (Maybe ([ByteString], PWM, [Int]))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> Int -> m a
VM.unsafeRead MVector s (Maybe ([ByteString], PWM, [Int]))
MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
motifs' Int
j
        SymMMatrix MVector (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
-> (Int, Int) -> Maybe (Double, (Bool, Int)) -> ST s ()
forall (m :: (* -> * -> *) -> * -> * -> *) (v :: * -> * -> *) a
       (s :: * -> *).
(MMatrix m v a, PrimMonad s) =>
m v (PrimState s) a -> (Int, Int) -> a -> s ()
MSU.unsafeWrite SymMMatrix MVector s (Maybe (Double, (Bool, Int)))
SymMMatrix MVector (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
mat (Int
i,Int
j) (Maybe (Double, (Bool, Int)) -> ST s ())
-> Maybe (Double, (Bool, Int)) -> ST s ()
forall a b. (a -> b) -> a -> b
$ (Double, (Bool, Int)) -> Maybe (Double, (Bool, Int))
forall a. a -> Maybe a
Just ((Double, (Bool, Int)) -> Maybe (Double, (Bool, Int)))
-> (Double, (Bool, Int)) -> Maybe (Double, (Bool, Int))
forall a b. (a -> b) -> a -> b
$ AlignFn
align PWM
pwm1 PWM
pwm2

    SymMMatrix MVector (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
-> ST s ()
forall (m :: (* -> * -> *) -> * -> * -> *) (v :: * -> * -> *).
MMatrix m v (Maybe (Double, (Bool, Int))) =>
m v (PrimState (ST s)) (Maybe (Double, (Bool, Int))) -> ST s ()
iter SymMMatrix MVector s (Maybe (Double, (Bool, Int)))
SymMMatrix MVector (PrimState (ST s)) (Maybe (Double, (Bool, Int)))
mat
    Vector (Maybe ([ByteString], PWM, [Int]))
results <- MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
-> ST s (Vector (Maybe ([ByteString], PWM, [Int])))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.unsafeFreeze MVector s (Maybe ([ByteString], PWM, [Int]))
MVector (PrimState (ST s)) (Maybe ([ByteString], PWM, [Int]))
motifs'
    [([ByteString], PWM, [Int])] -> ST s [([ByteString], PWM, [Int])]
forall (m :: * -> *) a. Monad m => a -> m a
return ([([ByteString], PWM, [Int])] -> ST s [([ByteString], PWM, [Int])])
-> [([ByteString], PWM, [Int])]
-> ST s [([ByteString], PWM, [Int])]
forall a b. (a -> b) -> a -> b
$ Vector ([ByteString], PWM, [Int]) -> [([ByteString], PWM, [Int])]
forall a. Vector a -> [a]
V.toList (Vector ([ByteString], PWM, [Int]) -> [([ByteString], PWM, [Int])])
-> Vector ([ByteString], PWM, [Int])
-> [([ByteString], PWM, [Int])]
forall a b. (a -> b) -> a -> b
$ (Maybe ([ByteString], PWM, [Int]) -> ([ByteString], PWM, [Int]))
-> Vector (Maybe ([ByteString], PWM, [Int]))
-> Vector ([ByteString], PWM, [Int])
forall a b. (a -> b) -> Vector a -> Vector b
V.map Maybe ([ByteString], PWM, [Int]) -> ([ByteString], PWM, [Int])
forall a. HasCallStack => Maybe a -> a
fromJust (Vector (Maybe ([ByteString], PWM, [Int]))
 -> Vector ([ByteString], PWM, [Int]))
-> Vector (Maybe ([ByteString], PWM, [Int]))
-> Vector ([ByteString], PWM, [Int])
forall a b. (a -> b) -> a -> b
$ (Maybe ([ByteString], PWM, [Int]) -> Bool)
-> Vector (Maybe ([ByteString], PWM, [Int]))
-> Vector (Maybe ([ByteString], PWM, [Int]))
forall a. (a -> Bool) -> Vector a -> Vector a
V.filter Maybe ([ByteString], PWM, [Int]) -> Bool
forall a. Maybe a -> Bool
isJust Vector (Maybe ([ByteString], PWM, [Int]))
results
{-# INLINE iterativeMerge #-}

-- | build a guide tree from a set of motifs
buildTree :: AlignFn -> [Motif] -> Dendrogram Motif
buildTree :: AlignFn -> [Motif] -> Dendrogram Motif
buildTree AlignFn
align [Motif]
motifs = Linkage -> Vector Motif -> DistFn Motif -> Dendrogram Motif
forall (v :: * -> *) a.
Vector v a =>
Linkage -> v a -> DistFn a -> Dendrogram a
hclust Linkage
Average ([Motif] -> Vector Motif
forall a. [a] -> Vector a
V.fromList [Motif]
motifs) DistFn Motif
δ
  where
    δ :: DistFn Motif
δ (Motif ByteString
_ PWM
x) (Motif ByteString
_ PWM
y) = (Double, (Bool, Int)) -> Double
forall a b. (a, b) -> a
fst ((Double, (Bool, Int)) -> Double)
-> (Double, (Bool, Int)) -> Double
forall a b. (a -> b) -> a -> b
$ AlignFn
align PWM
x PWM
y

cutTreeBy :: Double   -- ^ start
          -> Double   -- ^ step
          -> ([Dendrogram a] -> Bool) -> Dendrogram a -> [Dendrogram a]
cutTreeBy :: Double
-> Double
-> ([Dendrogram a] -> Bool)
-> Dendrogram a
-> [Dendrogram a]
cutTreeBy Double
start Double
step [Dendrogram a] -> Bool
fn Dendrogram a
tree = Double -> [Dendrogram a]
go Double
start
  where
    go :: Double -> [Dendrogram a]
go Double
x | [Dendrogram a] -> Bool
fn [Dendrogram a]
clusters = [Dendrogram a]
clusters
         | Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
step Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
0 = Double -> [Dendrogram a]
go (Double -> [Dendrogram a]) -> Double -> [Dendrogram a]
forall a b. (a -> b) -> a -> b
$ Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
step
         | Bool
otherwise = [Dendrogram a]
clusters
      where clusters :: [Dendrogram a]
clusters = Dendrogram a -> Double -> [Dendrogram a]
forall a. Dendrogram a -> Double -> [Dendrogram a]
cutAt Dendrogram a
tree Double
x