{-# LANGUAGE BangPatterns      #-}
{-# LANGUAGE OverloadedStrings #-}

module Bio.Motif
    ( PWM(..)
    , size
    , subPWM
    , rcPWM
    , gcContentPWM
    , Motif(..)
    , Bkgd(..)
    , toPWM
    , ic
    , scores
    , scores'
    , score
    , optimalScore
    , CDF(..)
    , cdf
    , cdf'
    , truncateCDF
    , scoreCDF
    , pValueToScore
    , pValueToScoreExact
    , toIUPAC
    , readMEME
    , toMEME
    , fromMEME
    , writeMEME
    , writeFasta

    -- * References
    -- $references
    ) where

import           Conduit
import           Control.Arrow                     ((&&&))
import           Control.Monad.State.Lazy
import qualified Data.ByteString.Char8             as B
import           Data.Default.Class
import           Data.Double.Conversion.ByteString (toShortest)
import           Data.List                         (foldl', sortBy)
import qualified Data.Matrix.Unboxed               as M
import           Data.Maybe                        (fromJust, isNothing)
import           Data.Ord                          (comparing)
import qualified Data.Vector.Algorithms.Intro      as I
import qualified Data.Vector.Unboxed               as U
import qualified Data.Vector.Unboxed.Mutable       as UM
import           Numeric.MathFunctions.Constants   (m_epsilon)
import           Prelude                           hiding (sum)
import           Text.Printf                       (printf)
import Statistics.Function (minMax)

import           Bio.Seq
import           Bio.Utils.Functions               (binarySearchBy)
import           Bio.Utils.Misc                    (readDouble, readInt)

-- | k x 4 position weight matrix for motifs
data PWM = PWM
    { PWM -> Maybe Int
_nSites :: !(Maybe Int)  -- ^ number of sites used to generate this matrix
    , PWM -> Matrix Double
_mat    :: !(M.Matrix Double)
    } deriving (Int -> PWM -> ShowS
[PWM] -> ShowS
PWM -> String
(Int -> PWM -> ShowS)
-> (PWM -> String) -> ([PWM] -> ShowS) -> Show PWM
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [PWM] -> ShowS
$cshowList :: [PWM] -> ShowS
show :: PWM -> String
$cshow :: PWM -> String
showsPrec :: Int -> PWM -> ShowS
$cshowsPrec :: Int -> PWM -> ShowS
Show, ReadPrec [PWM]
ReadPrec PWM
Int -> ReadS PWM
ReadS [PWM]
(Int -> ReadS PWM)
-> ReadS [PWM] -> ReadPrec PWM -> ReadPrec [PWM] -> Read PWM
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [PWM]
$creadListPrec :: ReadPrec [PWM]
readPrec :: ReadPrec PWM
$creadPrec :: ReadPrec PWM
readList :: ReadS [PWM]
$creadList :: ReadS [PWM]
readsPrec :: Int -> ReadS PWM
$creadsPrec :: Int -> ReadS PWM
Read)

size :: PWM -> Int
size :: PWM -> Int
size (PWM Maybe Int
_ Matrix Double
mat) = Matrix Double -> Int
forall a. Context a => Matrix a -> Int
M.rows Matrix Double
mat

-- | Information content of a poistion in pwm. (Not implemented)
ic :: PWM -> Int -> Double
ic :: PWM -> Int -> Double
ic = PWM -> Int -> Double
forall a. HasCallStack => a
undefined

-- | Extract sub-PWM given starting position and length, zero indexed.
subPWM :: Int -> Int -> PWM -> PWM
subPWM :: Int -> Int -> PWM -> PWM
subPWM Int
i Int
l (PWM Maybe Int
n Matrix Double
mat) = Maybe Int -> Matrix Double -> PWM
PWM Maybe Int
n (Matrix Double -> PWM) -> Matrix Double -> PWM
forall a b. (a -> b) -> a -> b
$ (Int, Int) -> (Int, Int) -> Matrix Double -> Matrix Double
forall a.
Context a =>
(Int, Int) -> (Int, Int) -> Matrix a -> Matrix a
M.subMatrix (Int
i,Int
0) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
l,Int
3) Matrix Double
mat
{-# INLINE subPWM #-}

-- | Reverse complementary of PWM.
rcPWM :: PWM -> PWM
rcPWM :: PWM -> PWM
rcPWM (PWM Maybe Int
n Matrix Double
mat) = Maybe Int -> Matrix Double -> PWM
PWM Maybe Int
n (Matrix Double -> PWM)
-> (Matrix Double -> Matrix Double) -> Matrix Double -> PWM
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, Int) -> Vector Double -> Matrix Double
forall a. Context a => (Int, Int) -> Vector a -> Matrix a
M.fromVector (Int, Int)
d (Vector Double -> Matrix Double)
-> (Matrix Double -> Vector Double)
-> Matrix Double
-> Matrix Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Double -> Vector Double
forall a. Unbox a => Vector a -> Vector a
U.reverse (Vector Double -> Vector Double)
-> (Matrix Double -> Vector Double)
-> Matrix Double
-> Vector Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Matrix Double -> Vector Double
forall a. Context a => Matrix a -> Vector a
M.flatten (Matrix Double -> PWM) -> Matrix Double -> PWM
forall a b. (a -> b) -> a -> b
$ Matrix Double
mat
  where
    d :: (Int, Int)
d = Matrix Double -> (Int, Int)
forall a. Context a => Matrix a -> (Int, Int)
M.dim Matrix Double
mat
{-# INLINE rcPWM #-}

-- | GC content of PWM.
gcContentPWM :: PWM -> Double
gcContentPWM :: PWM -> Double
gcContentPWM (PWM Maybe Int
_ Matrix Double
mat) = Double -> Int -> Double
loop Double
0 Int
0 Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
m
  where
    m :: Int
m = Matrix Double -> Int
forall a. Context a => Matrix a -> Int
M.rows Matrix Double
mat
    loop :: Double -> Int -> Double
loop !Double
acc !Int
i
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
m = Double
acc
        | Bool
otherwise =
            let acc' :: Double
acc' = Double
acc Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Matrix Double -> (Int, Int) -> Double
forall a. Context a => Matrix a -> (Int, Int) -> a
M.unsafeIndex Matrix Double
mat (Int
i,Int
1) Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Matrix Double -> (Int, Int) -> Double
forall a. Context a => Matrix a -> (Int, Int) -> a
M.unsafeIndex Matrix Double
mat (Int
i,Int
2)
            in Double -> Int -> Double
loop Double
acc' (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)

data Motif = Motif
    { Motif -> ByteString
_name :: !B.ByteString
    , Motif -> PWM
_pwm  :: !PWM
    } deriving (Int -> Motif -> ShowS
[Motif] -> ShowS
Motif -> String
(Int -> Motif -> ShowS)
-> (Motif -> String) -> ([Motif] -> ShowS) -> Show Motif
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Motif] -> ShowS
$cshowList :: [Motif] -> ShowS
show :: Motif -> String
$cshow :: Motif -> String
showsPrec :: Int -> Motif -> ShowS
$cshowsPrec :: Int -> Motif -> ShowS
Show, ReadPrec [Motif]
ReadPrec Motif
Int -> ReadS Motif
ReadS [Motif]
(Int -> ReadS Motif)
-> ReadS [Motif]
-> ReadPrec Motif
-> ReadPrec [Motif]
-> Read Motif
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [Motif]
$creadListPrec :: ReadPrec [Motif]
readPrec :: ReadPrec Motif
$creadPrec :: ReadPrec Motif
readList :: ReadS [Motif]
$creadList :: ReadS [Motif]
readsPrec :: Int -> ReadS Motif
$creadsPrec :: Int -> ReadS Motif
Read)

-- | background model which consists of single nucletide frequencies, and di-nucletide
-- frequencies.
newtype Bkgd = BG (Double, Double, Double, Double)

instance Default Bkgd where
    def :: Bkgd
def = (Double, Double, Double, Double) -> Bkgd
BG (Double
0.25, Double
0.25, Double
0.25, Double
0.25)

-- | Convert pwm to consensus sequence, see D. R. Cavener (1987).
toIUPAC :: PWM -> DNA IUPAC
toIUPAC :: PWM -> DNA IUPAC
toIUPAC (PWM Maybe Int
_ Matrix Double
pwm) = ByteString -> DNA IUPAC
forall (s :: * -> *) a. BioSeq' s => ByteString -> s a
unsafeFromBS (ByteString -> DNA IUPAC)
-> ([Vector Double] -> ByteString) -> [Vector Double] -> DNA IUPAC
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
B.pack (String -> ByteString)
-> ([Vector Double] -> String) -> [Vector Double] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Vector Double -> Char) -> [Vector Double] -> String
forall a b. (a -> b) -> [a] -> [b]
map Vector Double -> Char
forall b. (Fractional b, Ord b, Unbox b) => Vector b -> Char
f ([Vector Double] -> DNA IUPAC) -> [Vector Double] -> DNA IUPAC
forall a b. (a -> b) -> a -> b
$ Matrix Double -> [Vector Double]
forall a. Context a => Matrix a -> [Vector a]
M.toRows Matrix Double
pwm
  where
    f :: Vector b -> Char
f Vector b
v | (Char, b) -> b
forall a b. (a, b) -> b
snd (Char, b)
a b -> b -> Bool
forall a. Ord a => a -> a -> Bool
> b
0.5 Bool -> Bool -> Bool
&& (Char, b) -> b
forall a b. (a, b) -> b
snd (Char, b)
a b -> b -> Bool
forall a. Ord a => a -> a -> Bool
> b
2 b -> b -> b
forall a. Num a => a -> a -> a
* (Char, b) -> b
forall a b. (a, b) -> b
snd (Char, b)
b = (Char, b) -> Char
forall a b. (a, b) -> a
fst (Char, b)
a
        | (Char, b) -> b
forall a b. (a, b) -> b
snd (Char, b)
a b -> b -> b
forall a. Num a => a -> a -> a
+ (Char, b) -> b
forall a b. (a, b) -> b
snd (Char, b)
b b -> b -> Bool
forall a. Ord a => a -> a -> Bool
> b
0.75             = (Char, Char) -> Char
iupac ((Char, b) -> Char
forall a b. (a, b) -> a
fst (Char, b)
a, (Char, b) -> Char
forall a b. (a, b) -> a
fst (Char, b)
b)
        | Bool
otherwise                        = Char
'N'
      where
        [(Char, b)
a, (Char, b)
b, (Char, b)
_, (Char, b)
_] = ((Char, b) -> (Char, b) -> Ordering) -> [(Char, b)] -> [(Char, b)]
forall a. (a -> a -> Ordering) -> [a] -> [a]
sortBy (((Char, b) -> (Char, b) -> Ordering)
-> (Char, b) -> (Char, b) -> Ordering
forall a b c. (a -> b -> c) -> b -> a -> c
flip (((Char, b) -> b) -> (Char, b) -> (Char, b) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Char, b) -> b
forall a b. (a, b) -> b
snd)) ([(Char, b)] -> [(Char, b)]) -> [(Char, b)] -> [(Char, b)]
forall a b. (a -> b) -> a -> b
$ String -> [b] -> [(Char, b)]
forall a b. [a] -> [b] -> [(a, b)]
zip String
"ACGT" ([b] -> [(Char, b)]) -> [b] -> [(Char, b)]
forall a b. (a -> b) -> a -> b
$ Vector b -> [b]
forall a. Unbox a => Vector a -> [a]
U.toList Vector b
v
    iupac :: (Char, Char) -> Char
iupac (Char, Char)
x = case (Char, Char) -> (Char, Char)
forall b. Ord b => (b, b) -> (b, b)
sort' (Char, Char)
x of
        (Char
'A', Char
'C') -> Char
'M'
        (Char
'G', Char
'T') -> Char
'K'
        (Char
'A', Char
'T') -> Char
'W'
        (Char
'C', Char
'G') -> Char
'S'
        (Char
'C', Char
'T') -> Char
'Y'
        (Char
'A', Char
'G') -> Char
'R'
        (Char, Char)
_ -> Char
forall a. HasCallStack => a
undefined
    sort' :: (b, b) -> (b, b)
sort' (b
x, b
y) | b
x b -> b -> Bool
forall a. Ord a => a -> a -> Bool
> b
y = (b
y, b
x)
                 | Bool
otherwise = (b
x, b
y)

-- | Get scores of a long sequences at each position.
scores :: Bkgd -> PWM -> DNA a -> [Double]
scores :: Bkgd -> PWM -> DNA a -> [Double]
scores Bkgd
bg p :: PWM
p@(PWM Maybe Int
_ Matrix Double
pwm) DNA a
dna = ByteString -> [Double]
go (ByteString -> [Double]) -> ByteString -> [Double]
forall a b. (a -> b) -> a -> b
$! DNA a -> ByteString
forall (s :: * -> *) a. BioSeq' s => s a -> ByteString
toBS DNA a
dna
  where
    go :: ByteString -> [Double]
go ByteString
s | ByteString -> Int
B.length ByteString
s Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
len = Bkgd -> PWM -> ByteString -> Double
scoreHelp Bkgd
bg PWM
p (Int -> ByteString -> ByteString
B.take Int
len ByteString
s) Double -> [Double] -> [Double]
forall a. a -> [a] -> [a]
: ByteString -> [Double]
go (ByteString -> ByteString
B.tail ByteString
s)
         | Bool
otherwise = []
    len :: Int
len = Matrix Double -> Int
forall a. Context a => Matrix a -> Int
M.rows Matrix Double
pwm
{-# INLINE scores #-}

-- | A streaming version of scores.
scores' :: Monad m => Bkgd -> PWM -> DNA a -> ConduitT i Double m ()
scores' :: Bkgd -> PWM -> DNA a -> ConduitT i Double m ()
scores' Bkgd
bg p :: PWM
p@(PWM Maybe Int
_ Matrix Double
pwm) DNA a
dna = Int -> ConduitT i Double m ()
forall (m :: * -> *) i. Monad m => Int -> ConduitT i Double m ()
go Int
0
  where
    go :: Int -> ConduitT i Double m ()
go Int
i | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
n Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
len Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1 = do Double -> ConduitT i Double m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield (Double -> ConduitT i Double m ())
-> Double -> ConduitT i Double m ()
forall a b. (a -> b) -> a -> b
$ Bkgd -> PWM -> ByteString -> Double
scoreHelp Bkgd
bg PWM
p (ByteString -> Double) -> ByteString -> Double
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
B.take Int
len (ByteString -> ByteString) -> ByteString -> ByteString
forall a b. (a -> b) -> a -> b
$ Int -> ByteString -> ByteString
B.drop Int
i ByteString
s
                                Int -> ConduitT i Double m ()
go (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
         | Bool
otherwise = () -> ConduitT i Double m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
    s :: ByteString
s = DNA a -> ByteString
forall (s :: * -> *) a. BioSeq' s => s a -> ByteString
toBS DNA a
dna
    n :: Int
n = ByteString -> Int
B.length ByteString
s
    len :: Int
len = Matrix Double -> Int
forall a. Context a => Matrix a -> Int
M.rows Matrix Double
pwm
{-# INLINE scores' #-}

score :: Bkgd -> PWM -> DNA a -> Double
score :: Bkgd -> PWM -> DNA a -> Double
score Bkgd
bg PWM
p DNA a
dna = Bkgd -> PWM -> ByteString -> Double
scoreHelp Bkgd
bg PWM
p (ByteString -> Double) -> ByteString -> Double
forall a b. (a -> b) -> a -> b
$! DNA a -> ByteString
forall (s :: * -> *) a. BioSeq' s => s a -> ByteString
toBS DNA a
dna
{-# INLINE score #-}

-- | The best possible matching score of a pwm.
optimalScore :: Bkgd -> PWM -> Double
optimalScore :: Bkgd -> PWM -> Double
optimalScore (BG (Double
a, Double
c, Double
g, Double
t)) (PWM Maybe Int
_ Matrix Double
pwm) = (Double -> Double -> Double) -> Double -> [Double] -> Double
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Double -> Double -> Double
forall a. Num a => a -> a -> a
(+) Double
0 ([Double] -> Double)
-> (Matrix Double -> [Double]) -> Matrix Double -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Vector Double -> Double) -> [Vector Double] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map Vector Double -> Double
f ([Vector Double] -> [Double])
-> (Matrix Double -> [Vector Double]) -> Matrix Double -> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Matrix Double -> [Vector Double]
forall a. Context a => Matrix a -> [Vector a]
M.toRows (Matrix Double -> Double) -> Matrix Double -> Double
forall a b. (a -> b) -> a -> b
$ Matrix Double
pwm
  where f :: Vector Double -> Double
f Vector Double
xs = let (Int
i, Double
s) = ((Int, Double) -> (Int, Double) -> Ordering)
-> Vector (Int, Double) -> (Int, Double)
forall a. Unbox a => (a -> a -> Ordering) -> Vector a -> a
U.maximumBy (((Int, Double) -> Double)
-> (Int, Double) -> (Int, Double) -> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (Int, Double) -> Double
forall a b. (a, b) -> b
snd) (Vector (Int, Double) -> (Int, Double))
-> (Vector Double -> Vector (Int, Double))
-> Vector Double
-> (Int, Double)
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                                Vector Int -> Vector Double -> Vector (Int, Double)
forall a b.
(Unbox a, Unbox b) =>
Vector a -> Vector b -> Vector (a, b)
U.zip ([Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
U.fromList ([Int
0..Int
3] :: [Int])) (Vector Double -> (Int, Double)) -> Vector Double -> (Int, Double)
forall a b. (a -> b) -> a -> b
$ Vector Double
xs
               in case Int
i of
                   Int
0 -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
s Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
a
                   Int
1 -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
s Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
c
                   Int
2 -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
s Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
g
                   Int
3 -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
s Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
t
                   Int
_ -> Double
forall a. HasCallStack => a
undefined
{-# INLINE optimalScore #-}

scoreHelp :: Bkgd -> PWM -> B.ByteString -> Double
scoreHelp :: Bkgd -> PWM -> ByteString -> Double
scoreHelp (BG (Double
a, Double
c, Double
g, Double
t)) (PWM Maybe Int
_ Matrix Double
pwm) ByteString
dna = (Double, Int) -> Double
forall a b. (a, b) -> a
fst ((Double, Int) -> Double)
-> (ByteString -> (Double, Int)) -> ByteString -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ((Double, Int) -> Char -> (Double, Int))
-> (Double, Int) -> ByteString -> (Double, Int)
forall a. (a -> Char -> a) -> a -> ByteString -> a
B.foldl (Double, Int) -> Char -> (Double, Int)
f (Double
0,Int
0) (ByteString -> Double) -> ByteString -> Double
forall a b. (a -> b) -> a -> b
$ ByteString
dna
  where
    f :: (Double, Int) -> Char -> (Double, Int)
f (!Double
acc,!Int
i) Char
x = (Double
acc Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
sc, Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
      where
        sc :: Double
sc = case Char
x of
              Char
'A' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! Double
matchA Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
a
              Char
'C' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! Double
matchC Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
c
              Char
'G' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! Double
matchG Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
g
              Char
'T' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! Double
matchT Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
t
              Char
'N' -> Double
0
              Char
'V' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! (Double
matchA Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchC Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchG) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
c Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
g)
              Char
'H' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! (Double
matchA Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchC Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchT) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
c Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
t)
              Char
'D' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! (Double
matchA Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchG Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchT) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
g Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
t)
              Char
'B' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! (Double
matchC Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchG Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchT) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
c Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
g Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
t)
              Char
'M' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! (Double
matchA Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchC) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
c)
              Char
'K' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! (Double
matchG Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchT) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
g Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
t)
              Char
'W' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! (Double
matchA Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchT) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
t)
              Char
'S' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! (Double
matchC Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchG) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
c Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
g)
              Char
'Y' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! (Double
matchC Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchT) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
c Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
t)
              Char
'R' -> Double -> Double
forall a. Floating a => a -> a
log (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$! (Double
matchA Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
matchG) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
g)
              Char
_   -> String -> Double
forall a. HasCallStack => String -> a
error String
"Bio.Motif.score: invalid nucleotide"
        matchA :: Double
matchA = Double -> Double
addSome (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Matrix Double -> (Int, Int) -> Double
forall a. Context a => Matrix a -> (Int, Int) -> a
M.unsafeIndex Matrix Double
pwm (Int
i,Int
0)
        matchC :: Double
matchC = Double -> Double
addSome (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Matrix Double -> (Int, Int) -> Double
forall a. Context a => Matrix a -> (Int, Int) -> a
M.unsafeIndex Matrix Double
pwm (Int
i,Int
1)
        matchG :: Double
matchG = Double -> Double
addSome (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Matrix Double -> (Int, Int) -> Double
forall a. Context a => Matrix a -> (Int, Int) -> a
M.unsafeIndex Matrix Double
pwm (Int
i,Int
2)
        matchT :: Double
matchT = Double -> Double
addSome (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Matrix Double -> (Int, Int) -> Double
forall a. Context a => Matrix a -> (Int, Int) -> a
M.unsafeIndex Matrix Double
pwm (Int
i,Int
3)
        addSome :: Double -> Double
addSome !Double
y | Double
y Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== Double
0 = Double
pseudoCount
                   | Bool
otherwise = Double
y
        pseudoCount :: Double
pseudoCount = Double
0.0001
{-# INLINE scoreHelp #-}

-- | The probability that a kmer is generated by background based on a
-- 0-orderd Markov model.
pBkgd :: Bkgd -> B.ByteString -> Double
pBkgd :: Bkgd -> ByteString -> Double
pBkgd (BG (Double
a, Double
c, Double
g, Double
t)) = (Double -> Char -> Double) -> Double -> ByteString -> Double
forall a. (a -> Char -> a) -> a -> ByteString -> a
B.foldl' Double -> Char -> Double
f Double
1
  where
    f :: Double -> Char -> Double
f Double
acc Char
x = Double
acc Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
sc
      where
        sc :: Double
sc = case Char
x of
              Char
'A' -> Double
a
              Char
'C' -> Double
c
              Char
'G' -> Double
g
              Char
'T' -> Double
t
              Char
_ -> Double
forall a. HasCallStack => a
undefined
{-# INLINE pBkgd #-}

-- | calculate the minimum motif mathching score that would produce a kmer with
-- p-Value less than the given number. This score would then be used to search
-- for motif occurrences with significant p-Value
pValueToScore :: Double -> Bkgd -> PWM -> Double
pValueToScore :: Double -> Bkgd -> PWM -> Double
pValueToScore Double
p Bkgd
bg PWM
pwm = CDF -> Double -> Double
cdf' (Bkgd -> PWM -> CDF
scoreCDF Bkgd
bg PWM
pwm) (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
p

-- | Unlike pValueToScore, this version compute the exact score but much slower
-- and is inpractical for long motifs.
pValueToScoreExact :: Double -- ^ desirable p-Value
              -> Bkgd
              -> PWM
              -> Double
pValueToScoreExact :: Double -> Bkgd -> PWM -> Double
pValueToScoreExact Double
p Bkgd
bg PWM
pwm = Double -> Int -> Vector (Double, Double) -> Double
forall a. Unbox a => Double -> Int -> Vector (a, Double) -> a
go Double
0 Int
0 (Vector (Double, Double) -> Double)
-> Vector (Double, Double) -> Double
forall a b. (a -> b) -> a -> b
$ [(Double, Double)] -> Vector (Double, Double)
forall a b. (Unbox a, Unbox b, Ord a) => [(a, b)] -> Vector (a, b)
sort' ([(Double, Double)] -> Vector (Double, Double))
-> [(Double, Double)] -> Vector (Double, Double)
forall a b. (a -> b) -> a -> b
$
    (String -> (Double, Double)) -> [String] -> [(Double, Double)]
forall a b. (a -> b) -> [a] -> [b]
map ((Bkgd -> PWM -> ByteString -> Double
scoreHelp Bkgd
bg PWM
pwm (ByteString -> Double)
-> (ByteString -> Double) -> ByteString -> (Double, Double)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& Bkgd -> ByteString -> Double
pBkgd Bkgd
bg) (ByteString -> (Double, Double))
-> (String -> ByteString) -> String -> (Double, Double)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> ByteString
B.pack) ([String] -> [(Double, Double)]) -> [String] -> [(Double, Double)]
forall a b. (a -> b) -> a -> b
$ Int -> String -> [String]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM Int
l String
"ACGT"
  where
    sort' :: [(a, b)] -> Vector (a, b)
sort' [(a, b)]
xs = (forall s. ST s (MVector s (a, b))) -> Vector (a, b)
forall a. Unbox a => (forall s. ST s (MVector s a)) -> Vector a
U.create ((forall s. ST s (MVector s (a, b))) -> Vector (a, b))
-> (forall s. ST s (MVector s (a, b))) -> Vector (a, b)
forall a b. (a -> b) -> a -> b
$ do MVector s (a, b)
v <- Vector (a, b) -> ST s (MVector s (a, b))
forall a (m :: * -> *).
(Unbox a, PrimMonad m) =>
Vector a -> m (MVector (PrimState m) a)
U.unsafeThaw (Vector (a, b) -> ST s (MVector s (a, b)))
-> ([(a, b)] -> Vector (a, b))
-> [(a, b)]
-> ST s (MVector s (a, b))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(a, b)] -> Vector (a, b)
forall a. Unbox a => [a] -> Vector a
U.fromList ([(a, b)] -> ST s (MVector s (a, b)))
-> [(a, b)] -> ST s (MVector s (a, b))
forall a b. (a -> b) -> a -> b
$ [(a, b)]
xs
                             Comparison (a, b) -> MVector (PrimState (ST s)) (a, b) -> ST s ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
I.sortBy (Comparison (a, b) -> Comparison (a, b)
forall a b c. (a -> b -> c) -> b -> a -> c
flip (((a, b) -> a) -> Comparison (a, b)
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (a, b) -> a
forall a b. (a, b) -> a
fst)) MVector s (a, b)
MVector (PrimState (ST s)) (a, b)
v
                             MVector s (a, b) -> ST s (MVector s (a, b))
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s (a, b)
v
    go :: Double -> Int -> Vector (a, Double) -> a
go !Double
acc !Int
i Vector (a, Double)
vec | Double
acc Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
p = (a, Double) -> a
forall a b. (a, b) -> a
fst ((a, Double) -> a) -> (a, Double) -> a
forall a b. (a -> b) -> a -> b
$ Vector (a, Double)
vec Vector (a, Double) -> Int -> (a, Double)
forall a. Unbox a => Vector a -> Int -> a
U.! (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)
                   | Bool
otherwise = Double -> Int -> Vector (a, Double) -> a
go (Double
acc Double -> Double -> Double
forall a. Num a => a -> a -> a
+ (a, Double) -> Double
forall a b. (a, b) -> b
snd (Vector (a, Double)
vec Vector (a, Double) -> Int -> (a, Double)
forall a. Unbox a => Vector a -> Int -> a
U.! Int
i)) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1) Vector (a, Double)
vec
    l :: Int
l = PWM -> Int
size PWM
pwm

-- | The cumulative distribution function in the form of (x, P(X <= x)).
newtype CDF = CDF (U.Vector (Double, Double)) deriving (ReadPrec [CDF]
ReadPrec CDF
Int -> ReadS CDF
ReadS [CDF]
(Int -> ReadS CDF)
-> ReadS [CDF] -> ReadPrec CDF -> ReadPrec [CDF] -> Read CDF
forall a.
(Int -> ReadS a)
-> ReadS [a] -> ReadPrec a -> ReadPrec [a] -> Read a
readListPrec :: ReadPrec [CDF]
$creadListPrec :: ReadPrec [CDF]
readPrec :: ReadPrec CDF
$creadPrec :: ReadPrec CDF
readList :: ReadS [CDF]
$creadList :: ReadS [CDF]
readsPrec :: Int -> ReadS CDF
$creadsPrec :: Int -> ReadS CDF
Read, Int -> CDF -> ShowS
[CDF] -> ShowS
CDF -> String
(Int -> CDF -> ShowS)
-> (CDF -> String) -> ([CDF] -> ShowS) -> Show CDF
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CDF] -> ShowS
$cshowList :: [CDF] -> ShowS
show :: CDF -> String
$cshow :: CDF -> String
showsPrec :: Int -> CDF -> ShowS
$cshowsPrec :: Int -> CDF -> ShowS
Show)

-- P(X <= x)
cdf :: CDF -> Double -> Double
cdf :: CDF -> Double -> Double
cdf (CDF Vector (Double, Double)
v) Double
x = let i :: Int
i = ((Double, Double) -> Double -> Ordering)
-> Vector (Double, Double) -> Double -> Int
forall (v :: * -> *) e a.
Vector v e =>
(e -> a -> Ordering) -> v e -> a -> Int
binarySearchBy (Double, Double) -> Double -> Ordering
forall a b. Ord a => (a, b) -> a -> Ordering
cmp Vector (Double, Double)
v Double
x
                in case () of
                    ()
_ | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n -> (Double, Double) -> Double
forall a b. (a, b) -> b
snd ((Double, Double) -> Double) -> (Double, Double) -> Double
forall a b. (a -> b) -> a -> b
$ Vector (Double, Double) -> (Double, Double)
forall a. Unbox a => Vector a -> a
U.last Vector (Double, Double)
v
                      | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 -> if Double
x Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== (Double, Double) -> Double
forall a b. (a, b) -> a
fst (Vector (Double, Double) -> (Double, Double)
forall a. Unbox a => Vector a -> a
U.head Vector (Double, Double)
v) then (Double, Double) -> Double
forall a b. (a, b) -> b
snd (Vector (Double, Double) -> (Double, Double)
forall a. Unbox a => Vector a -> a
U.head Vector (Double, Double)
v) else Double
0.0
                      | Bool
otherwise -> let (Double
a, Double
a') = Vector (Double, Double)
v Vector (Double, Double) -> Int -> (Double, Double)
forall a. Unbox a => Vector a -> Int -> a
U.! (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
                                         (Double
b, Double
b') = Vector (Double, Double)
v Vector (Double, Double) -> Int -> (Double, Double)
forall a. Unbox a => Vector a -> Int -> a
U.! Int
i
                                         α :: Double
α = (Double
b Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
x) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
b Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
a)
                                         β :: Double
β = (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
a) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
b Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
a)
                                     in Double
α Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
a' Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
β Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
b'
  where
    cmp :: (a, b) -> a -> Ordering
cmp (a
a,b
_) = a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare a
a
    n :: Int
n = Vector (Double, Double) -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector (Double, Double)
v
{-# INLINE cdf #-}

-- | The inverse of cdf.
cdf' :: CDF -> Double -> Double
cdf' :: CDF -> Double -> Double
cdf' (CDF Vector (Double, Double)
v) Double
p
    | Double
p Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
1 Bool -> Bool -> Bool
|| Double
p Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
0 = String -> Double
forall a. HasCallStack => String -> a
error String
"p must be in [0,1]"
    | Bool
otherwise =
        let i :: Int
i = ((Double, Double) -> Double -> Ordering)
-> Vector (Double, Double) -> Double -> Int
forall (v :: * -> *) e a.
Vector v e =>
(e -> a -> Ordering) -> v e -> a -> Int
binarySearchBy (Double, Double) -> Double -> Ordering
forall a a. Ord a => (a, a) -> a -> Ordering
cmp Vector (Double, Double)
v Double
p
        in case () of
          ()
_ | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n -> Double
1Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/Double
0
            | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 -> if Double
p Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
== (Double, Double) -> Double
forall a b. (a, b) -> b
snd (Vector (Double, Double) -> (Double, Double)
forall a. Unbox a => Vector a -> a
U.head Vector (Double, Double)
v)
                then (Double, Double) -> Double
forall a b. (a, b) -> a
fst (Vector (Double, Double) -> (Double, Double)
forall a. Unbox a => Vector a -> a
U.head Vector (Double, Double)
v)
                else String -> Double
forall a. HasCallStack => String -> a
error String
"cdf': impossible!"
            | Bool
otherwise -> let (Double
a, Double
a') = Vector (Double, Double)
v Vector (Double, Double) -> Int -> (Double, Double)
forall a. Unbox a => Vector a -> Int -> a
U.! (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)
                               (Double
b, Double
b') = Vector (Double, Double)
v Vector (Double, Double) -> Int -> (Double, Double)
forall a. Unbox a => Vector a -> Int -> a
U.! Int
i
                               α :: Double
α = (Double
b' Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
p) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
b' Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
a')
                               β :: Double
β = (Double
p Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
a') Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Double
b' Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
a')
                           in Double
α Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
β Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
b
  where
    cmp :: (a, a) -> a -> Ordering
cmp (a
_,a
a) = a -> a -> Ordering
forall a. Ord a => a -> a -> Ordering
compare a
a
    n :: Int
n = Vector (Double, Double) -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector (Double, Double)
v
{-# INLINE cdf' #-}

-- | Truncate the CDF by a value, in order to reduce the memory usage.
truncateCDF :: Double -> CDF -> CDF
truncateCDF :: Double -> CDF -> CDF
truncateCDF Double
x (CDF Vector (Double, Double)
v) = Vector (Double, Double) -> CDF
CDF (Vector (Double, Double) -> CDF) -> Vector (Double, Double) -> CDF
forall a b. (a -> b) -> a -> b
$ ((Double, Double) -> Bool)
-> Vector (Double, Double) -> Vector (Double, Double)
forall a. Unbox a => (a -> Bool) -> Vector a -> Vector a
U.filter ((Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
>=Double
x) (Double -> Bool)
-> ((Double, Double) -> Double) -> (Double, Double) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double, Double) -> Double
forall a b. (a, b) -> b
snd) Vector (Double, Double)
v
{-# INLINE truncateCDF #-}

-- | Approximate the cdf of motif matching scores using dynamic programming.
-- Algorithm:
-- Scan the PWM from left to right. For each position $i$, compute a score
-- density function $s_i$ such that $s_i(x)$ is the total number of sequences
-- with score $x$.
scoreCDF :: Bkgd -> PWM -> CDF
scoreCDF :: Bkgd -> PWM -> CDF
scoreCDF (BG (Double
a,Double
c,Double
g,Double
t)) PWM
pwm = (Vector Double, Int -> Double) -> CDF
toCDF ((Vector Double, Int -> Double) -> CDF)
-> (Vector Double, Int -> Double) -> CDF
forall a b. (a -> b) -> a -> b
$ (Vector Double, Int -> Double)
-> Int -> (Vector Double, Int -> Double)
loop (Double -> Vector Double
forall a. Unbox a => a -> Vector a
U.singleton Double
1, Double -> Int -> Double
forall a b. a -> b -> a
const Double
0) Int
0
  where
    loop :: (Vector Double, Int -> Double)
-> Int -> (Vector Double, Int -> Double)
loop (Vector Double
prev,Int -> Double
scFn) Int
i
        | Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n = (Vector Double
prev, Int -> Double
scFn)
        | Double
lo Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
< Double
hi =
            let v :: Vector Double
v = (forall s. ST s (MVector s Double)) -> Vector Double
forall a. Unbox a => (forall s. ST s (MVector s a)) -> Vector a
U.create ((forall s. ST s (MVector s Double)) -> Vector Double)
-> (forall s. ST s (MVector s Double)) -> Vector Double
forall a b. (a -> b) -> a -> b
$ do
                    MVector s Double
new <- Int -> Double -> ST s (MVector (PrimState (ST s)) Double)
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
Int -> a -> m (MVector (PrimState m) a)
UM.replicate Int
nBin' Double
0
                    ((Int -> Double -> ST s ()) -> Vector Double -> ST s ())
-> Vector Double -> (Int -> Double -> ST s ()) -> ST s ()
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Int -> Double -> ST s ()) -> Vector Double -> ST s ()
forall (m :: * -> *) a b.
(Monad m, Unbox a) =>
(Int -> a -> m b) -> Vector a -> m ()
U.imapM_ Vector Double
prev ((Int -> Double -> ST s ()) -> ST s ())
-> (Int -> Double -> ST s ()) -> ST s ()
forall a b. (a -> b) -> a -> b
$ \Int
x Double
prob ->
                        Bool -> ST s () -> ST s ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (Double
prob Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
/= Double
0) (ST s () -> ST s ()) -> ST s () -> ST s ()
forall a b. (a -> b) -> a -> b
$ do
                            let sc :: Double
sc = Int -> Double
scFn Int
x
                            MVector (PrimState (ST s)) Double
-> (Double -> Double) -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> (a -> a) -> Int -> m ()
UM.modify MVector s Double
MVector (PrimState (ST s)) Double
new (Double
a Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
prob Double -> Double -> Double
forall a. Num a => a -> a -> a
+) (Int -> ST s ()) -> Int -> ST s ()
forall a b. (a -> b) -> a -> b
$ Double -> Int
getIdx (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Double
sc Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Int -> Double
a_at Int
i
                            MVector (PrimState (ST s)) Double
-> (Double -> Double) -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> (a -> a) -> Int -> m ()
UM.modify MVector s Double
MVector (PrimState (ST s)) Double
new (Double
c Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
prob Double -> Double -> Double
forall a. Num a => a -> a -> a
+) (Int -> ST s ()) -> Int -> ST s ()
forall a b. (a -> b) -> a -> b
$ Double -> Int
getIdx (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Double
sc Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Int -> Double
c_at Int
i
                            MVector (PrimState (ST s)) Double
-> (Double -> Double) -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> (a -> a) -> Int -> m ()
UM.modify MVector s Double
MVector (PrimState (ST s)) Double
new (Double
g Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
prob Double -> Double -> Double
forall a. Num a => a -> a -> a
+) (Int -> ST s ()) -> Int -> ST s ()
forall a b. (a -> b) -> a -> b
$ Double -> Int
getIdx (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Double
sc Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Int -> Double
g_at Int
i
                            MVector (PrimState (ST s)) Double
-> (Double -> Double) -> Int -> ST s ()
forall (m :: * -> *) a.
(PrimMonad m, Unbox a) =>
MVector (PrimState m) a -> (a -> a) -> Int -> m ()
UM.modify MVector s Double
MVector (PrimState (ST s)) Double
new (Double
t Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
prob Double -> Double -> Double
forall a. Num a => a -> a -> a
+) (Int -> ST s ()) -> Int -> ST s ()
forall a b. (a -> b) -> a -> b
$ Double -> Int
getIdx (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Double
sc Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Int -> Double
t_at Int
i
                    MVector s Double -> ST s (MVector s Double)
forall (m :: * -> *) a. Monad m => a -> m a
return MVector s Double
new
            in (Vector Double, Int -> Double)
-> Int -> (Vector Double, Int -> Double)
loop (Vector Double
v, \Int
x -> (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
x Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
0.5) Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
step Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
lo) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
        | Bool
otherwise = (Vector Double, Int -> Double)
-> Int -> (Vector Double, Int -> Double)
loop (Vector Double
prev, Int -> Double
scFn) (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)
      where
        getIdx :: Double -> Int
getIdx Double
x = let j :: Int
j = Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
truncate (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ (Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
lo) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
step
                   in if Int
j Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
nBin' then Int
nBin' Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 else Int
j
        lo :: Double
lo = Double
lo' Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
min'
        hi :: Double
hi = Double
hi' Double -> Double -> Double
forall a. Num a => a -> a -> a
+ Double
max'
        nBin' :: Int
nBin' = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
200000 (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ Double -> Int
forall a b. (RealFrac a, Integral b) => a -> b
ceiling (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ (Double
hi Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
lo) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
precision
        step :: Double
step = (Double
hi Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
lo) Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
nBin'
        lo' :: Double
lo' = Int -> Double
scFn (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ (Int, Double) -> Int
forall a b. (a, b) -> a
fst ((Int, Double) -> Int)
-> (Vector (Int, Double) -> (Int, Double))
-> Vector (Int, Double)
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Int, Double) -> (Int, Double)
forall a. Unbox a => Vector a -> a
U.head (Vector (Int, Double) -> Int) -> Vector (Int, Double) -> Int
forall a b. (a -> b) -> a -> b
$ ((Int, Double) -> Bool)
-> Vector (Int, Double) -> Vector (Int, Double)
forall a. Unbox a => (a -> Bool) -> Vector a -> Vector a
U.dropWhile ((Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
==Double
0) (Double -> Bool)
-> ((Int, Double) -> Double) -> (Int, Double) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, Double) -> Double
forall a b. (a, b) -> b
snd) (Vector (Int, Double) -> Vector (Int, Double))
-> Vector (Int, Double) -> Vector (Int, Double)
forall a b. (a -> b) -> a -> b
$ Vector Double -> Vector (Int, Double)
forall a. Unbox a => Vector a -> Vector (Int, a)
U.indexed Vector Double
prev
        hi' :: Double
hi' = Int -> Double
scFn (Int -> Double) -> Int -> Double
forall a b. (a -> b) -> a -> b
$ (Int, Double) -> Int
forall a b. (a, b) -> a
fst ((Int, Double) -> Int)
-> (Vector (Int, Double) -> (Int, Double))
-> Vector (Int, Double)
-> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Int, Double) -> (Int, Double)
forall a. Unbox a => Vector a -> a
U.head (Vector (Int, Double) -> Int) -> Vector (Int, Double) -> Int
forall a b. (a -> b) -> a -> b
$ ((Int, Double) -> Bool)
-> Vector (Int, Double) -> Vector (Int, Double)
forall a. Unbox a => (a -> Bool) -> Vector a -> Vector a
U.dropWhile ((Double -> Double -> Bool
forall a. Eq a => a -> a -> Bool
==Double
0) (Double -> Bool)
-> ((Int, Double) -> Double) -> (Int, Double) -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int, Double) -> Double
forall a b. (a, b) -> b
snd) (Vector (Int, Double) -> Vector (Int, Double))
-> Vector (Int, Double) -> Vector (Int, Double)
forall a b. (a -> b) -> a -> b
$ Vector (Int, Double) -> Vector (Int, Double)
forall a. Unbox a => Vector a -> Vector a
U.reverse (Vector (Int, Double) -> Vector (Int, Double))
-> Vector (Int, Double) -> Vector (Int, Double)
forall a b. (a -> b) -> a -> b
$
            Vector Double -> Vector (Int, Double)
forall a. Unbox a => Vector a -> Vector (Int, a)
U.indexed Vector Double
prev
        (Double
min', Double
max') = Vector Double -> (Double, Double)
forall (v :: * -> *).
Vector v Double =>
v Double -> (Double, Double)
minMax (Vector Double -> (Double, Double))
-> Vector Double -> (Double, Double)
forall a b. (a -> b) -> a -> b
$ [Double] -> Vector Double
forall a. Unbox a => [a] -> Vector a
U.fromList [Int -> Double
a_at Int
i, Int -> Double
c_at Int
i, Int -> Double
g_at Int
i, Int -> Double
t_at Int
i]
    a_at :: Int -> Double
a_at Int
i = Double -> Double
forall p. (Eq p, Floating p) => p -> p
log' (Matrix Double -> (Int, Int) -> Double
forall a. Context a => Matrix a -> (Int, Int) -> a
M.unsafeIndex (PWM -> Matrix Double
_mat PWM
pwm) (Int
i,Int
0)) Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double -> Double
forall a. Floating a => a -> a
log Double
a
    c_at :: Int -> Double
c_at Int
i = Double -> Double
forall p. (Eq p, Floating p) => p -> p
log' (Matrix Double -> (Int, Int) -> Double
forall a. Context a => Matrix a -> (Int, Int) -> a
M.unsafeIndex (PWM -> Matrix Double
_mat PWM
pwm) (Int
i,Int
1)) Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double -> Double
forall a. Floating a => a -> a
log Double
c
    g_at :: Int -> Double
g_at Int
i = Double -> Double
forall p. (Eq p, Floating p) => p -> p
log' (Matrix Double -> (Int, Int) -> Double
forall a. Context a => Matrix a -> (Int, Int) -> a
M.unsafeIndex (PWM -> Matrix Double
_mat PWM
pwm) (Int
i,Int
2)) Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double -> Double
forall a. Floating a => a -> a
log Double
g
    t_at :: Int -> Double
t_at Int
i = Double -> Double
forall p. (Eq p, Floating p) => p -> p
log' (Matrix Double -> (Int, Int) -> Double
forall a. Context a => Matrix a -> (Int, Int) -> a
M.unsafeIndex (PWM -> Matrix Double
_mat PWM
pwm) (Int
i,Int
3)) Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double -> Double
forall a. Floating a => a -> a
log Double
t
    toCDF :: (Vector Double, Int -> Double) -> CDF
toCDF (Vector Double
v, Int -> Double
scFn) = Vector (Double, Double) -> CDF
CDF (Vector (Double, Double) -> CDF) -> Vector (Double, Double) -> CDF
forall a b. (a -> b) -> a -> b
$ Vector (Double, Double) -> Vector (Double, Double)
forall a. Unbox a => Vector (a, Double) -> Vector (a, Double)
compressCDF (Vector (Double, Double) -> Vector (Double, Double))
-> Vector (Double, Double) -> Vector (Double, Double)
forall a b. (a -> b) -> a -> b
$ (Int -> Double -> (Double, Double))
-> Vector Double -> Vector (Double, Double)
forall a b.
(Unbox a, Unbox b) =>
(Int -> a -> b) -> Vector a -> Vector b
U.imap (\Int
i Double
x -> (Int -> Double
scFn Int
i, Double
x)) (Vector Double -> Vector (Double, Double))
-> Vector Double -> Vector (Double, Double)
forall a b. (a -> b) -> a -> b
$ (Double -> Double -> Double) -> Vector Double -> Vector Double
forall a. Unbox a => (a -> a -> a) -> Vector a -> Vector a
U.scanl1 Double -> Double -> Double
forall a. Num a => a -> a -> a
(+) Vector Double
v
    compressCDF :: Vector (a, Double) -> Vector (a, Double)
compressCDF Vector (a, Double)
v = (Int -> (a, Double) -> Bool)
-> Vector (a, Double) -> Vector (a, Double)
forall a. Unbox a => (Int -> a -> Bool) -> Vector a -> Vector a
U.ifilter Int -> (a, Double) -> Bool
forall a. Int -> (a, Double) -> Bool
f Vector (a, Double)
v
      where
        len :: Int
len = Vector (a, Double) -> Int
forall a. Unbox a => Vector a -> Int
U.length Vector (a, Double)
v
        f :: Int -> (a, Double) -> Bool
f Int
i (a
_, Double
x) | Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
0 Bool -> Bool -> Bool
|| Int
i Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
len = Bool
True
                   | Bool
otherwise = Double
x Double -> Double -> Double
forall a. Num a => a -> a -> a
- (a, Double) -> Double
forall a b. (a, b) -> b
snd (Vector (a, Double)
v Vector (a, Double) -> Int -> (a, Double)
forall a. Unbox a => Vector a -> Int -> a
`U.unsafeIndex` (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
1)) Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
m_epsilon Bool -> Bool -> Bool
||
                        (a, Double) -> Double
forall a b. (a, b) -> b
snd (Vector (a, Double)
v Vector (a, Double) -> Int -> (a, Double)
forall a. Unbox a => Vector a -> Int -> a
`U.unsafeIndex` (Int
iInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
1)) Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
x Double -> Double -> Bool
forall a. Ord a => a -> a -> Bool
> Double
m_epsilon
    precision :: Double
precision = Double
1e-5
    n :: Int
n = PWM -> Int
size PWM
pwm
    log' :: p -> p
log' p
x | p
x p -> p -> Bool
forall a. Eq a => a -> a -> Bool
== p
0 = p -> p
forall a. Floating a => a -> a
log p
0.001
           | Bool
otherwise = p -> p
forall a. Floating a => a -> a
log p
x
{-# INLINE scoreCDF #-}

-- | Get pwm from a matrix.
toPWM :: [B.ByteString] -> PWM
toPWM :: [ByteString] -> PWM
toPWM [ByteString]
x = Maybe Int -> Matrix Double -> PWM
PWM Maybe Int
forall a. Maybe a
Nothing (Matrix Double -> PWM)
-> ([ByteString] -> Matrix Double) -> [ByteString] -> PWM
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [[Double]] -> Matrix Double
forall a. Context a => [[a]] -> Matrix a
M.fromLists ([[Double]] -> Matrix Double)
-> ([ByteString] -> [[Double]]) -> [ByteString] -> Matrix Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> [Double]) -> [ByteString] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map ((ByteString -> Double) -> [ByteString] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Double
readDouble([ByteString] -> [Double])
-> (ByteString -> [ByteString]) -> ByteString -> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.ByteString -> [ByteString]
B.words) ([ByteString] -> PWM) -> [ByteString] -> PWM
forall a b. (a -> b) -> a -> b
$ [ByteString]
x

-- | Convert pwm to bytestring.
fromPWM :: PWM -> B.ByteString
fromPWM :: PWM -> ByteString
fromPWM = [ByteString] -> ByteString
B.unlines ([ByteString] -> ByteString)
-> (PWM -> [ByteString]) -> PWM -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Double] -> ByteString) -> [[Double]] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map ([ByteString] -> ByteString
B.unwords ([ByteString] -> ByteString)
-> ([Double] -> [ByteString]) -> [Double] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> ByteString) -> [Double] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Double -> ByteString
toShortest) ([[Double]] -> [ByteString])
-> (PWM -> [[Double]]) -> PWM -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Matrix Double -> [[Double]]
forall a. Context a => Matrix a -> [[a]]
M.toLists (Matrix Double -> [[Double]])
-> (PWM -> Matrix Double) -> PWM -> [[Double]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PWM -> Matrix Double
_mat

writeFasta :: FilePath -> [Motif] -> IO ()
writeFasta :: String -> [Motif] -> IO ()
writeFasta String
fl [Motif]
motifs = String -> ByteString -> IO ()
B.writeFile String
fl ByteString
contents
  where
    contents :: ByteString
contents = [ByteString] -> ByteString
B.unlines ([ByteString] -> ByteString)
-> ([Motif] -> [ByteString]) -> [Motif] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Motif -> [ByteString]) -> [Motif] -> [ByteString]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Motif -> [ByteString]
f ([Motif] -> ByteString) -> [Motif] -> ByteString
forall a b. (a -> b) -> a -> b
$ [Motif]
motifs
    f :: Motif -> [ByteString]
f Motif
x = [ByteString
">" ByteString -> ByteString -> ByteString
`B.append` Motif -> ByteString
_name Motif
x, PWM -> ByteString
fromPWM (PWM -> ByteString) -> PWM -> ByteString
forall a b. (a -> b) -> a -> b
$ Motif -> PWM
_pwm Motif
x]

readMEME :: FilePath -> IO [Motif]
readMEME :: String -> IO [Motif]
readMEME = (ByteString -> [Motif]) -> IO ByteString -> IO [Motif]
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM ByteString -> [Motif]
fromMEME (IO ByteString -> IO [Motif])
-> (String -> IO ByteString) -> String -> IO [Motif]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> IO ByteString
B.readFile

writeMEME :: FilePath -> [Motif] -> Bkgd -> IO ()
writeMEME :: String -> [Motif] -> Bkgd -> IO ()
writeMEME String
fl [Motif]
xs Bkgd
bg = String -> ByteString -> IO ()
B.writeFile String
fl (ByteString -> IO ()) -> ByteString -> IO ()
forall a b. (a -> b) -> a -> b
$ [Motif] -> Bkgd -> ByteString
toMEME [Motif]
xs Bkgd
bg

toMEME :: [Motif] -> Bkgd -> B.ByteString
toMEME :: [Motif] -> Bkgd -> ByteString
toMEME [Motif]
xs (BG (Double
a,Double
c,Double
g,Double
t)) = ByteString -> [ByteString] -> ByteString
B.intercalate ByteString
"" ([ByteString] -> ByteString) -> [ByteString] -> ByteString
forall a b. (a -> b) -> a -> b
$ ByteString
header ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
: (Motif -> ByteString) -> [Motif] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Motif -> ByteString
f [Motif]
xs
  where
    header :: ByteString
header = String -> ByteString
B.pack (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> Double -> Double -> Double -> Double -> String
forall r. PrintfType r => String -> r
printf String
"MEME version 4\n\nALPHABET= ACGT\n\nstrands: + -\n\nBackground letter frequencies\nA %f C %f G %f T %f\n\n" Double
a Double
c Double
g Double
t
    f :: Motif -> ByteString
f (Motif ByteString
nm PWM
pwm) =
        let x :: ByteString
x = ByteString
"MOTIF " ByteString -> ByteString -> ByteString
`B.append` ByteString
nm
            y :: ByteString
y = String -> ByteString
B.pack (String -> ByteString) -> String -> ByteString
forall a b. (a -> b) -> a -> b
$ String -> Int -> Int -> String
forall r. PrintfType r => String -> r
printf String
"letter-probability matrix: alength= 4 w= %d nsites= %d E= 0" (PWM -> Int
size PWM
pwm) Int
sites
            z :: ByteString
z = [ByteString] -> ByteString
B.unlines ([ByteString] -> ByteString)
-> (PWM -> [ByteString]) -> PWM -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Double] -> ByteString) -> [[Double]] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map ([ByteString] -> ByteString
B.unwords ([ByteString] -> ByteString)
-> ([Double] -> [ByteString]) -> [Double] -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString
""ByteString -> [ByteString] -> [ByteString]
forall a. a -> [a] -> [a]
:) ([ByteString] -> [ByteString])
-> ([Double] -> [ByteString]) -> [Double] -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Double -> ByteString) -> [Double] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map Double -> ByteString
toShortest) ([[Double]] -> [ByteString])
-> (PWM -> [[Double]]) -> PWM -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Matrix Double -> [[Double]]
forall a. Context a => Matrix a -> [[a]]
M.toLists (Matrix Double -> [[Double]])
-> (PWM -> Matrix Double) -> PWM -> [[Double]]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. PWM -> Matrix Double
_mat (PWM -> ByteString) -> PWM -> ByteString
forall a b. (a -> b) -> a -> b
$ PWM
pwm
            sites :: Int
sites | Maybe Int -> Bool
forall a. Maybe a -> Bool
isNothing (PWM -> Maybe Int
_nSites PWM
pwm) = Int
1
                  | Bool
otherwise = Maybe Int -> Int
forall a. HasCallStack => Maybe a -> a
fromJust (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$ PWM -> Maybe Int
_nSites PWM
pwm
        in [ByteString] -> ByteString
B.unlines [ByteString
x,ByteString
y,ByteString
z]
{-# INLINE toMEME #-}

fromMEME :: B.ByteString -> [Motif]
fromMEME :: ByteString -> [Motif]
fromMEME ByteString
meme = State (Int, [ByteString]) [Motif] -> (Int, [ByteString]) -> [Motif]
forall s a. State s a -> s -> a
evalState ([ByteString] -> State (Int, [ByteString]) [Motif]
go ([ByteString] -> State (Int, [ByteString]) [Motif])
-> [ByteString] -> State (Int, [ByteString]) [Motif]
forall a b. (a -> b) -> a -> b
$ ByteString -> [ByteString]
B.lines ByteString
meme) (Int
0, [])
  where
    go :: [B.ByteString] -> State (Int, [B.ByteString]) [Motif]
    go :: [ByteString] -> State (Int, [ByteString]) [Motif]
go (ByteString
x:[ByteString]
xs)
      | ByteString
"MOTIF" ByteString -> ByteString -> Bool
`B.isPrefixOf` ByteString
x = (Int, [ByteString]) -> StateT (Int, [ByteString]) Identity ()
forall s (m :: * -> *). MonadState s m => s -> m ()
put (Int
1, [Int -> ByteString -> ByteString
B.drop Int
6 ByteString
x]) StateT (Int, [ByteString]) Identity ()
-> State (Int, [ByteString]) [Motif]
-> State (Int, [ByteString]) [Motif]
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [ByteString] -> State (Int, [ByteString]) [Motif]
go [ByteString]
xs
      | Bool
otherwise = do
          (Int
st, [ByteString]
str) <- StateT (Int, [ByteString]) Identity (Int, [ByteString])
forall s (m :: * -> *). MonadState s m => m s
get
          case Int
st of
              Int
1 -> do Bool
-> StateT (Int, [ByteString]) Identity ()
-> StateT (Int, [ByteString]) Identity ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when (ByteString -> Bool
startOfPwm ByteString
x) (StateT (Int, [ByteString]) Identity ()
 -> StateT (Int, [ByteString]) Identity ())
-> StateT (Int, [ByteString]) Identity ()
-> StateT (Int, [ByteString]) Identity ()
forall a b. (a -> b) -> a -> b
$ (Int, [ByteString]) -> StateT (Int, [ByteString]) Identity ()
forall s (m :: * -> *). MonadState s m => s -> m ()
put (Int
2, [ByteString]
str [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString -> [ByteString]
B.words ByteString
x [ByteString] -> Int -> ByteString
forall a. [a] -> Int -> a
!! Int
7])
                      [ByteString] -> State (Int, [ByteString]) [Motif]
go [ByteString]
xs
              Int
2 -> let x' :: ByteString
x' = (Char -> Bool) -> ByteString -> ByteString
B.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
' ') ByteString
x
                   in if ByteString -> Bool
B.null ByteString
x' Bool -> Bool -> Bool
|| ByteString
"URL" ByteString -> ByteString -> Bool
`B.isPrefixOf` ByteString
x'
                         then do (Int, [ByteString]) -> StateT (Int, [ByteString]) Identity ()
forall s (m :: * -> *). MonadState s m => s -> m ()
put (Int
0, [])
                                 [Motif]
r <- [ByteString] -> State (Int, [ByteString]) [Motif]
go [ByteString]
xs
                                 [Motif] -> State (Int, [ByteString]) [Motif]
forall (m :: * -> *) a. Monad m => a -> m a
return ([ByteString] -> Motif
toMotif [ByteString]
str Motif -> [Motif] -> [Motif]
forall a. a -> [a] -> [a]
: [Motif]
r)
                         else (Int, [ByteString]) -> StateT (Int, [ByteString]) Identity ()
forall s (m :: * -> *). MonadState s m => s -> m ()
put (Int
2, [ByteString]
str [ByteString] -> [ByteString] -> [ByteString]
forall a. [a] -> [a] -> [a]
++ [ByteString
x']) StateT (Int, [ByteString]) Identity ()
-> State (Int, [ByteString]) [Motif]
-> State (Int, [ByteString]) [Motif]
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [ByteString] -> State (Int, [ByteString]) [Motif]
go [ByteString]
xs
              Int
_ -> [ByteString] -> State (Int, [ByteString]) [Motif]
go [ByteString]
xs
    go [] = do (Int
st, [ByteString]
str) <- StateT (Int, [ByteString]) Identity (Int, [ByteString])
forall s (m :: * -> *). MonadState s m => m s
get
               [Motif] -> State (Int, [ByteString]) [Motif]
forall (m :: * -> *) a. Monad m => a -> m a
return [[ByteString] -> Motif
toMotif [ByteString]
str | Int
st Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
2]
    startOfPwm :: ByteString -> Bool
startOfPwm = ByteString -> ByteString -> Bool
B.isPrefixOf ByteString
"letter-probability matrix:"
    toMotif :: [ByteString] -> Motif
toMotif (ByteString
name:ByteString
n:[ByteString]
xs) = ByteString -> PWM -> Motif
Motif ByteString
name PWM
pwm
      where
        pwm :: PWM
pwm = Maybe Int -> Matrix Double -> PWM
PWM (Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Maybe Int) -> Int -> Maybe Int
forall a b. (a -> b) -> a -> b
$ ByteString -> Int
readInt ByteString
n) (Matrix Double -> PWM) -> Matrix Double -> PWM
forall a b. (a -> b) -> a -> b
$ [[Double]] -> Matrix Double
forall a. Context a => [[a]] -> Matrix a
M.fromLists ([[Double]] -> Matrix Double)
-> ([ByteString] -> [[Double]]) -> [ByteString] -> Matrix Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> [Double]) -> [ByteString] -> [[Double]]
forall a b. (a -> b) -> [a] -> [b]
map ((ByteString -> Double) -> [ByteString] -> [Double]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> Double
readDouble([ByteString] -> [Double])
-> (ByteString -> [ByteString]) -> ByteString -> [Double]
forall b c a. (b -> c) -> (a -> b) -> a -> c
.ByteString -> [ByteString]
B.words) ([ByteString] -> Matrix Double) -> [ByteString] -> Matrix Double
forall a b. (a -> b) -> a -> b
$ [ByteString]
xs
    toMotif [ByteString]
_ = String -> Motif
forall a. HasCallStack => String -> a
error String
"error"
{-# INLINE fromMEME #-}

-- $references
--
-- * Douglas R. Cavener. (1987) Comparison of the consensus sequence flanking
-- translational start sites in Drosophila and vertebrates.
-- /Nucleic Acids Research/ 15 (4): 1353–1361.
-- <doi:10.1093/nar/15.4.1353 http://nar.oxfordjournals.org/content/15/4/1353>