{-# LANGUAGE FlexibleContexts #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE BangPatterns #-}
module Bio.Data.Bed.Utils
( fetchSeq
, clipBed
, CutoffMotif(..)
, mkCutoffMotif
, scanMotif
, monoColonalize
, BaseMap(..)
, baseMap
, queryBaseMap
, rpkmBed
, rpkmSortedBed
, countTagsBinBed
, countTagsBinBed'
, tagCountDistr
, peakCluster
) where
import Conduit
import Lens.Micro
import Control.Monad.State.Strict
import qualified Data.ByteString.Char8 as B
import qualified Data.Foldable as F
import Data.Function (on)
import qualified Data.HashMap.Strict as M
import qualified Data.IntervalMap.Strict as IM
import Data.Maybe (fromJust, fromMaybe)
import qualified Data.Vector as V
import qualified Data.Vector.Algorithms.Intro as I
import qualified Data.Vector.Generic as G
import qualified Data.Vector.Generic.Mutable as GM
import qualified Data.Vector.Unboxed as U
import System.IO
import Bio.Data.Bed
import Bio.Data.Bed.Types
import Bio.Motif (Bkgd (..), Motif (..), CDF, PWM)
import qualified Bio.Motif as Motif
import qualified Bio.Motif.Search as Motif
import Bio.Seq hiding (length)
import Bio.Seq.IO
import qualified Bio.Utils.BitVector as BV
clipBed :: (BEDLike b, Monad m)
=> [(B.ByteString, Int)]
-> ConduitT b b m ()
clipBed :: [(ByteString, Int)] -> ConduitT b b m ()
clipBed [(ByteString, Int)]
chrsize = (b -> Maybe b) -> ConduitT b (Element (Maybe b)) m ()
forall (m :: * -> *) mono a.
(Monad m, MonoFoldable mono) =>
(a -> mono) -> ConduitT a (Element mono) m ()
concatMapC b -> Maybe b
forall a. BEDLike a => a -> Maybe a
f
where
f :: a -> Maybe a
f a
x = case ByteString -> HashMap ByteString Int -> Maybe Int
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (a
xa -> Getting ByteString a ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString a ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString Int
chrsize' of
Maybe Int
Nothing -> Maybe a
forall a. Maybe a
Nothing
Just Int
n -> if a
xa -> Getting Int a Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int a Int
forall b. BEDLike b => Lens' b Int
chromStart Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n
then Maybe a
forall a. Maybe a
Nothing
else a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> a -> Maybe a
forall a b. (a -> b) -> a -> b
$ (Int -> Identity Int) -> a -> Identity a
forall b. BEDLike b => Lens' b Int
chromStart ((Int -> Identity Int) -> a -> Identity a)
-> (Int -> Int) -> a -> a
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
%~ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
0 (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ (Int -> Identity Int) -> a -> Identity a
forall b. BEDLike b => Lens' b Int
chromEnd ((Int -> Identity Int) -> a -> Identity a)
-> (Int -> Int) -> a -> a
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
%~ Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
n (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ a
x
chrsize' :: HashMap ByteString Int
chrsize' = (Int -> Int -> Int)
-> [(ByteString, Int)] -> HashMap ByteString Int
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> [(k, v)] -> HashMap k v
M.fromListWith ([Char] -> Int -> Int -> Int
forall a. HasCallStack => [Char] -> a
error [Char]
"redundant chromosomes") [(ByteString, Int)]
chrsize
{-# INLINE clipBed #-}
fetchSeq :: BioSeq DNA a
=> Genome
-> BED
-> IO (Either String (DNA a))
fetchSeq :: Genome -> BED -> IO (Either [Char] (DNA a))
fetchSeq Genome
g BED
bed = do
Either [Char] (DNA a)
dna <- Genome -> Query -> IO (Either [Char] (DNA a))
forall (s :: * -> *) a.
BioSeq s a =>
Genome -> Query -> IO (Either [Char] (s a))
getSeq Genome
g (BED
bedBED -> Getting ByteString BED ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString BED ByteString
forall b. BEDLike b => Lens' b ByteString
chrom, BED
bedBED -> Getting Int BED Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int BED Int
forall b. BEDLike b => Lens' b Int
chromStart, BED
bedBED -> Getting Int BED Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int BED Int
forall b. BEDLike b => Lens' b Int
chromEnd)
Either [Char] (DNA a) -> IO (Either [Char] (DNA a))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either [Char] (DNA a) -> IO (Either [Char] (DNA a)))
-> Either [Char] (DNA a) -> IO (Either [Char] (DNA a))
forall a b. (a -> b) -> a -> b
$ case BED
bedBED -> Getting (Maybe Bool) BED (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) BED (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand of
Just Bool
False -> DNA a -> DNA a
forall alphabet. DNA alphabet -> DNA alphabet
rc (DNA a -> DNA a) -> Either [Char] (DNA a) -> Either [Char] (DNA a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either [Char] (DNA a)
dna
Maybe Bool
_ -> Either [Char] (DNA a)
dna
{-# INLINE fetchSeq #-}
data CutoffMotif = CutoffMotif
{ CutoffMotif -> ByteString
_motif_name :: B.ByteString
, CutoffMotif -> PWM
_motif_pwm :: PWM
, CutoffMotif -> Vector Double
_motif_sigma :: U.Vector Double
, CutoffMotif -> PWM
_motif_pwm_rc :: PWM
, CutoffMotif -> Vector Double
_motif_sigma_rc :: U.Vector Double
, CutoffMotif -> Bkgd
_background :: Bkgd
, CutoffMotif -> Double
_cutoff :: Double
, CutoffMotif -> CDF
_cdf :: CDF }
mkCutoffMotif :: Bkgd
-> Double
-> Motif -> CutoffMotif
mkCutoffMotif :: Bkgd -> Double -> Motif -> CutoffMotif
mkCutoffMotif Bkgd
bg Double
p Motif
motif = ByteString
-> PWM
-> Vector Double
-> PWM
-> Vector Double
-> Bkgd
-> Double
-> CDF
-> CutoffMotif
CutoffMotif (Motif -> ByteString
_name Motif
motif) (Motif -> PWM
_pwm Motif
motif) Vector Double
sigma PWM
pwm'
Vector Double
sigma' Bkgd
bg Double
sc (CDF -> CutoffMotif) -> CDF -> CutoffMotif
forall a b. (a -> b) -> a -> b
$ Double -> CDF -> CDF
Motif.truncateCDF (Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
p Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
10) CDF
cdf
where
cdf :: CDF
cdf = Bkgd -> PWM -> CDF
Motif.scoreCDF Bkgd
bg (PWM -> CDF) -> PWM -> CDF
forall a b. (a -> b) -> a -> b
$ Motif -> PWM
_pwm Motif
motif
sc :: Double
sc = CDF -> Double -> Double
Motif.cdf' CDF
cdf (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
p
sigma :: Vector Double
sigma = Bkgd -> PWM -> Vector Double
Motif.optimalScoresSuffix Bkgd
bg (PWM -> Vector Double) -> PWM -> Vector Double
forall a b. (a -> b) -> a -> b
$ Motif -> PWM
_pwm Motif
motif
pwm' :: PWM
pwm' = PWM -> PWM
Motif.rcPWM (PWM -> PWM) -> PWM -> PWM
forall a b. (a -> b) -> a -> b
$ Motif -> PWM
_pwm Motif
motif
sigma' :: Vector Double
sigma' = Bkgd -> PWM -> Vector Double
Motif.optimalScoresSuffix Bkgd
bg PWM
pwm'
scanMotif :: (BEDLike b, MonadIO m)
=> Genome -> [CutoffMotif] -> ConduitT b BED m ()
scanMotif :: Genome -> [CutoffMotif] -> ConduitT b BED m ()
scanMotif Genome
g [CutoffMotif]
motifs = (b -> ConduitT b BED m ()) -> ConduitT b BED m ()
forall (m :: * -> *) i o r.
Monad m =>
(i -> ConduitT i o m r) -> ConduitT i o m ()
awaitForever ((b -> ConduitT b BED m ()) -> ConduitT b BED m ())
-> (b -> ConduitT b BED m ()) -> ConduitT b BED m ()
forall a b. (a -> b) -> a -> b
$ \b
bed -> do
let (ByteString
chr, Int
start, Int
end) = (b
bedb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom, b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart, b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd)
IO (Either [Char] (DNA IUPAC))
-> ConduitT b BED m (Either [Char] (DNA IUPAC))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (Genome -> Query -> IO (Either [Char] (DNA IUPAC))
forall (s :: * -> *) a.
BioSeq s a =>
Genome -> Query -> IO (Either [Char] (s a))
getSeq Genome
g (ByteString
chr, Int
start, Int
end)) ConduitT b BED m (Either [Char] (DNA IUPAC))
-> (Either [Char] (DNA IUPAC) -> ConduitT b BED m ())
-> ConduitT b BED m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
Left [Char]
_ -> IO () -> ConduitT b BED m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ConduitT b BED m ()) -> IO () -> ConduitT b BED m ()
forall a b. (a -> b) -> a -> b
$ Handle -> [Char] -> IO ()
hPutStrLn Handle
stderr ([Char] -> IO ()) -> [Char] -> IO ()
forall a b. (a -> b) -> a -> b
$
[Char]
"Warning: no sequence for region: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Query -> [Char]
forall a. Show a => a -> [Char]
show (ByteString
chr, Int
start, Int
end)
Right DNA IUPAC
dna -> [CutoffMotif]
-> (CutoffMotif -> ConduitT b BED m ()) -> ConduitT b BED m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [CutoffMotif]
motifs ((CutoffMotif -> ConduitT b BED m ()) -> ConduitT b BED m ())
-> (CutoffMotif -> ConduitT b BED m ()) -> ConduitT b BED m ()
forall a b. (a -> b) -> a -> b
$ \CutoffMotif{Double
ByteString
Vector Double
CDF
Bkgd
PWM
_cdf :: CDF
_cutoff :: Double
_background :: Bkgd
_motif_sigma_rc :: Vector Double
_motif_pwm_rc :: PWM
_motif_sigma :: Vector Double
_motif_pwm :: PWM
_motif_name :: ByteString
_cdf :: CutoffMotif -> CDF
_cutoff :: CutoffMotif -> Double
_background :: CutoffMotif -> Bkgd
_motif_sigma_rc :: CutoffMotif -> Vector Double
_motif_pwm_rc :: CutoffMotif -> PWM
_motif_sigma :: CutoffMotif -> Vector Double
_motif_pwm :: CutoffMotif -> PWM
_motif_name :: CutoffMotif -> ByteString
..} -> do
let mkBed :: Bool -> (Int, Double) -> BED
mkBed Bool
str (Int
i, Double
sc) = ByteString
-> Int -> Int -> Maybe ByteString -> Maybe Int -> Maybe Bool -> BED
BED ByteString
chr (Int
start Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i) (Int
start Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n)
(ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ ByteString
_motif_name) (Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Maybe Int) -> Int -> Maybe Int
forall a b. (a -> b) -> a -> b
$ Double -> Int
forall a b. (RealFrac a, Integral b, Floating a) => a -> b
toAffinity (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- CDF -> Double -> Double
Motif.cdf CDF
_cdf Double
sc)
(Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
str)
n :: Int
n = PWM -> Int
Motif.size PWM
_motif_pwm
Vector Double
-> Bkgd
-> PWM
-> DNA IUPAC
-> Double
-> Bool
-> ConduitT b (Int, Double) m ()
forall (m :: * -> *) a i.
Monad m =>
Vector Double
-> Bkgd
-> PWM
-> DNA a
-> Double
-> Bool
-> ConduitT i (Int, Double) m ()
Motif.findTFBSWith Vector Double
_motif_sigma Bkgd
_background PWM
_motif_pwm
(DNA IUPAC
dna :: DNA IUPAC) Double
_cutoff Bool
True ConduitT b (Int, Double) m ()
-> ConduitM (Int, Double) BED m () -> ConduitT b BED m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ((Int, Double) -> BED) -> ConduitM (Int, Double) BED m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC (Bool -> (Int, Double) -> BED
mkBed Bool
True)
Vector Double
-> Bkgd
-> PWM
-> DNA IUPAC
-> Double
-> Bool
-> ConduitT b (Int, Double) m ()
forall (m :: * -> *) a i.
Monad m =>
Vector Double
-> Bkgd
-> PWM
-> DNA a
-> Double
-> Bool
-> ConduitT i (Int, Double) m ()
Motif.findTFBSWith Vector Double
_motif_sigma_rc Bkgd
_background PWM
_motif_pwm_rc
DNA IUPAC
dna Double
_cutoff Bool
True ConduitT b (Int, Double) m ()
-> ConduitM (Int, Double) BED m () -> ConduitT b BED m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ((Int, Double) -> BED) -> ConduitM (Int, Double) BED m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC (Bool -> (Int, Double) -> BED
mkBed Bool
False)
where
toAffinity :: a -> b
toAffinity a
x' = a -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (a -> b) -> a -> b
forall a b. (a -> b) -> a -> b
$ a
sc a -> a -> a
forall a. Num a => a -> a -> a
* a
1000
where
sc :: a
sc = a
1 a -> a -> a
forall a. Fractional a => a -> a -> a
/ (a
1 a -> a -> a
forall a. Num a => a -> a -> a
+ a -> a
forall a. Floating a => a -> a
exp (-(a
x a -> a -> a
forall a. Num a => a -> a -> a
- a
5)))
x :: a
x = a -> a
forall a. Num a => a -> a
negate (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ a -> a -> a
forall a. Floating a => a -> a -> a
logBase a
10 (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ a -> a -> a
forall a. Ord a => a -> a -> a
max a
1e-20 a
x'
{-# INLINE scanMotif #-}
monoColonalize :: Monad m => ConduitT BED BED m ()
monoColonalize :: ConduitT BED BED m ()
monoColonalize = do
Maybe BED
x <- ConduitT BED BED m (Maybe BED)
forall (m :: * -> *) a o. Monad m => ConduitT a o m (Maybe a)
headC
case Maybe BED
x of
Just BED
b -> BED -> ConduitT BED BED m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield BED
b ConduitT BED BED m ()
-> ConduitT BED BED m () -> ConduitT BED BED m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (BED -> BED -> (BED, [BED])) -> BED -> ConduitT BED BED m ()
forall (m :: * -> *) a accum b.
Monad m =>
(a -> accum -> (accum, [b])) -> accum -> ConduitT a b m ()
concatMapAccumC BED -> BED -> (BED, [BED])
forall s s.
(BEDLike s, BEDLike s, Show s, Show s) =>
s -> s -> (s, [s])
f BED
b
Maybe BED
Nothing -> () -> ConduitT BED BED m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
where
f :: s -> s -> (s, [s])
f s
cur s
prev = case s -> s -> Ordering
forall b1 b2. (BEDLike b1, BEDLike b2) => b1 -> b2 -> Ordering
compareBed s
prev s
cur of
Ordering
GT -> [Char] -> (s, [s])
forall a. HasCallStack => [Char] -> a
error ([Char] -> (s, [s])) -> [Char] -> (s, [s])
forall a b. (a -> b) -> a -> b
$
[Char]
"Input is not sorted: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ s -> [Char]
forall a. Show a => a -> [Char]
show s
prev [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" > " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ s -> [Char]
forall a. Show a => a -> [Char]
show s
cur
Ordering
LT -> (s
cur, [s
cur])
Ordering
_ -> if s
prevs -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== s
curs -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand then (s
cur, []) else (s
cur, [s
cur])
{-# INLINE monoColonalize #-}
newtype BaseMap = BaseMap (M.HashMap B.ByteString BV.BitVector)
baseMap :: PrimMonad m
=> [(B.ByteString, Int)]
-> ConduitT BED o m BaseMap
baseMap :: [(ByteString, Int)] -> ConduitT BED o m BaseMap
baseMap [(ByteString, Int)]
chrs = do
HashMap ByteString (BitMVector (PrimState m))
bvs <- m (HashMap ByteString (BitMVector (PrimState m)))
-> ConduitT BED o m (HashMap ByteString (BitMVector (PrimState m)))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (HashMap ByteString (BitMVector (PrimState m)))
-> ConduitT
BED o m (HashMap ByteString (BitMVector (PrimState m))))
-> m (HashMap ByteString (BitMVector (PrimState m)))
-> ConduitT BED o m (HashMap ByteString (BitMVector (PrimState m)))
forall a b. (a -> b) -> a -> b
$ ([(ByteString, BitMVector (PrimState m))]
-> HashMap ByteString (BitMVector (PrimState m)))
-> m [(ByteString, BitMVector (PrimState m))]
-> m (HashMap ByteString (BitMVector (PrimState m)))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [(ByteString, BitMVector (PrimState m))]
-> HashMap ByteString (BitMVector (PrimState m))
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList (m [(ByteString, BitMVector (PrimState m))]
-> m (HashMap ByteString (BitMVector (PrimState m))))
-> m [(ByteString, BitMVector (PrimState m))]
-> m (HashMap ByteString (BitMVector (PrimState m)))
forall a b. (a -> b) -> a -> b
$ [(ByteString, Int)]
-> ((ByteString, Int) -> m (ByteString, BitMVector (PrimState m)))
-> m [(ByteString, BitMVector (PrimState m))]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [(ByteString, Int)]
chrs (((ByteString, Int) -> m (ByteString, BitMVector (PrimState m)))
-> m [(ByteString, BitMVector (PrimState m))])
-> ((ByteString, Int) -> m (ByteString, BitMVector (PrimState m)))
-> m [(ByteString, BitMVector (PrimState m))]
forall a b. (a -> b) -> a -> b
$ \(ByteString
chr, Int
n) -> do
BitMVector (PrimState m)
bv <- Int -> m (BitMVector (PrimState m))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (BitMVector (PrimState m))
BV.zeros Int
n
(ByteString, BitMVector (PrimState m))
-> m (ByteString, BitMVector (PrimState m))
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString
chr, BitMVector (PrimState m)
bv)
(BED -> m ()) -> ConduitT BED o m ()
forall (m :: * -> *) a o.
Monad m =>
(a -> m ()) -> ConduitT a o m ()
mapM_C ((BED -> m ()) -> ConduitT BED o m ())
-> (BED -> m ()) -> ConduitT BED o m ()
forall a b. (a -> b) -> a -> b
$ \BED
bed -> case ByteString
-> HashMap ByteString (BitMVector (PrimState m))
-> Maybe (BitMVector (PrimState m))
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (BED
bedBED -> Getting ByteString BED ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString BED ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString (BitMVector (PrimState m))
bvs of
Maybe (BitMVector (PrimState m))
Nothing -> () -> m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
Just BitMVector (PrimState m)
bv -> if Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
fromMaybe Bool
True (Maybe Bool -> Bool) -> Maybe Bool -> Bool
forall a b. (a -> b) -> a -> b
$ BED
bedBED -> Getting (Maybe Bool) BED (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) BED (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand
then BitMVector (PrimState m) -> Int -> m ()
forall (m :: * -> *).
PrimMonad m =>
BitMVector (PrimState m) -> Int -> m ()
BV.set BitMVector (PrimState m)
bv (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ BED
bedBED -> Getting Int BED Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int BED Int
forall b. BEDLike b => Lens' b Int
chromStart
else BitMVector (PrimState m) -> Int -> m ()
forall (m :: * -> *).
PrimMonad m =>
BitMVector (PrimState m) -> Int -> m ()
BV.set BitMVector (PrimState m)
bv (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ BED
bedBED -> Getting Int BED Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int BED Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
m BaseMap -> ConduitT BED o m BaseMap
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m BaseMap -> ConduitT BED o m BaseMap)
-> m BaseMap -> ConduitT BED o m BaseMap
forall a b. (a -> b) -> a -> b
$ (HashMap ByteString BitVector -> BaseMap)
-> m (HashMap ByteString BitVector) -> m BaseMap
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap HashMap ByteString BitVector -> BaseMap
BaseMap (m (HashMap ByteString BitVector) -> m BaseMap)
-> m (HashMap ByteString BitVector) -> m BaseMap
forall a b. (a -> b) -> a -> b
$ HashMap ByteString (m BitVector)
-> m (HashMap ByteString BitVector)
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence (HashMap ByteString (m BitVector)
-> m (HashMap ByteString BitVector))
-> HashMap ByteString (m BitVector)
-> m (HashMap ByteString BitVector)
forall a b. (a -> b) -> a -> b
$ (BitMVector (PrimState m) -> m BitVector)
-> HashMap ByteString (BitMVector (PrimState m))
-> HashMap ByteString (m BitVector)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap BitMVector (PrimState m) -> m BitVector
forall (m :: * -> *).
PrimMonad m =>
BitMVector (PrimState m) -> m BitVector
BV.unsafeFreeze HashMap ByteString (BitMVector (PrimState m))
bvs
queryBaseMap :: BEDLike b => b -> BaseMap -> Maybe [Bool]
queryBaseMap :: b -> BaseMap -> Maybe [Bool]
queryBaseMap b
bed (BaseMap HashMap ByteString BitVector
bm) = case ByteString -> HashMap ByteString BitVector -> Maybe BitVector
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (b
bedb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString BitVector
bm of
Maybe BitVector
Nothing -> Maybe [Bool]
forall a. Maybe a
Nothing
Just BitVector
bv ->
let res :: [Bool]
res = (Int -> Bool) -> [Int] -> [Bool]
forall a b. (a -> b) -> [a] -> [b]
map (BitVector
bv BitVector -> Int -> Bool
BV.!) [b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart .. b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
in case b
bedb -> Getting (Maybe Bool) b (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) b (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand of
Just Bool
False -> [Bool] -> Maybe [Bool]
forall a. a -> Maybe a
Just ([Bool] -> Maybe [Bool]) -> [Bool] -> Maybe [Bool]
forall a b. (a -> b) -> a -> b
$ [Bool] -> [Bool]
forall a. [a] -> [a]
reverse [Bool]
res
Maybe Bool
_ -> [Bool] -> Maybe [Bool]
forall a. a -> Maybe a
Just [Bool]
res
rpkmBed :: (PrimMonad m, BEDLike b, G.Vector v Double)
=> [b] -> ConduitT BED o m (v Double)
rpkmBed :: [b] -> ConduitT BED o m (v Double)
rpkmBed [b]
regions = do
Vector (Int, b)
v <- m (Vector (Int, b)) -> ConduitT BED o m (Vector (Int, b))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Vector (Int, b)) -> ConduitT BED o m (Vector (Int, b)))
-> m (Vector (Int, b)) -> ConduitT BED o m (Vector (Int, b))
forall a b. (a -> b) -> a -> b
$ do MVector (PrimState m) (Int, b)
v' <- Vector (Int, b) -> m (MVector (PrimState m) (Int, b))
forall (m :: * -> *) a.
PrimMonad m =>
Vector a -> m (MVector (PrimState m) a)
V.unsafeThaw (Vector (Int, b) -> m (MVector (PrimState m) (Int, b)))
-> ([b] -> Vector (Int, b))
-> [b]
-> m (MVector (PrimState m) (Int, b))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, b)] -> Vector (Int, b)
forall a. [a] -> Vector a
V.fromList ([(Int, b)] -> Vector (Int, b))
-> ([b] -> [(Int, b)]) -> [b] -> Vector (Int, b)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [b] -> [(Int, b)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..] ([b] -> m (MVector (PrimState m) (Int, b)))
-> [b] -> m (MVector (PrimState m) (Int, b))
forall a b. (a -> b) -> a -> b
$ [b]
regions
Comparison (Int, b) -> MVector (PrimState m) (Int, b) -> m ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
I.sortBy (b -> b -> Ordering
forall b1 b2. (BEDLike b1, BEDLike b2) => b1 -> b2 -> Ordering
compareBed (b -> b -> Ordering) -> ((Int, b) -> b) -> Comparison (Int, b)
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Int, b) -> b
forall a b. (a, b) -> b
snd) MVector (PrimState m) (Int, b)
v'
MVector (PrimState m) (Int, b) -> m (Vector (Int, b))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.unsafeFreeze MVector (PrimState m) (Int, b)
v'
let (Vector Int
idx, Vector b
sortedRegions) = Vector (Int, b) -> (Vector Int, Vector b)
forall a b. Vector (a, b) -> (Vector a, Vector b)
V.unzip Vector (Int, b)
v
n :: Int
n = Vector Int -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector Int
idx
Vector Double
rc <- Sorted (Vector b) -> ConduitT BED o m (Vector Double)
forall (m :: * -> *) b (v :: * -> *) o.
(PrimMonad m, BEDLike b, Vector v Double) =>
Sorted (Vector b) -> ConduitT BED o m (v Double)
rpkmSortedBed (Sorted (Vector b) -> ConduitT BED o m (Vector Double))
-> Sorted (Vector b) -> ConduitT BED o m (Vector Double)
forall a b. (a -> b) -> a -> b
$ Vector b -> Sorted (Vector b)
forall b. b -> Sorted b
Sorted Vector b
sortedRegions
m (v Double) -> ConduitT BED o m (v Double)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (v Double) -> ConduitT BED o m (v Double))
-> m (v Double) -> ConduitT BED o m (v Double)
forall a b. (a -> b) -> a -> b
$ do
Mutable v (PrimState m) Double
result <- Int -> m (Mutable v (PrimState m) Double)
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> m (v (PrimState m) a)
GM.new Int
n
Vector (m ()) -> m ()
forall (m :: * -> *) (v :: * -> *) a.
(Monad m, Vector v (m a)) =>
v (m a) -> m ()
G.sequence_ (Vector (m ()) -> m ())
-> (Vector Int -> Vector (m ())) -> Vector Int -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int -> m ()) -> Vector Int -> Vector (m ())
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(Int -> a -> b) -> v a -> v b
G.imap (\Int
x Int
i -> Mutable v (PrimState m) Double -> Int -> Double -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
GM.unsafeWrite Mutable v (PrimState m) Double
result Int
i (Vector Double
rc Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
U.! Int
x)) (Vector Int -> m ()) -> Vector Int -> m ()
forall a b. (a -> b) -> a -> b
$ Vector Int
idx
Mutable v (PrimState m) Double -> m (v Double)
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
G.unsafeFreeze Mutable v (PrimState m) Double
result
{-# INLINE rpkmBed #-}
rpkmSortedBed :: (PrimMonad m, BEDLike b, G.Vector v Double)
=> Sorted (V.Vector b) -> ConduitT BED o m (v Double)
rpkmSortedBed :: Sorted (Vector b) -> ConduitT BED o m (v Double)
rpkmSortedBed (Sorted Vector b
regions) = do
Mutable v (PrimState m) Double
vec <- m (Mutable v (PrimState m) Double)
-> ConduitT BED o m (Mutable v (PrimState m) Double)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Mutable v (PrimState m) Double)
-> ConduitT BED o m (Mutable v (PrimState m) Double))
-> m (Mutable v (PrimState m) Double)
-> ConduitT BED o m (Mutable v (PrimState m) Double)
forall a b. (a -> b) -> a -> b
$ Int -> Double -> m (Mutable v (PrimState m) Double)
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> a -> m (v (PrimState m) a)
GM.replicate Int
l Double
0
Int
n <- (Int -> BED -> m Int) -> Int -> ConduitT BED o m Int
forall (m :: * -> *) a b o.
Monad m =>
(a -> b -> m a) -> a -> ConduitT b o m a
foldMC (Mutable v (PrimState m) Double -> Int -> BED -> m Int
forall (m :: * -> *) (v :: * -> * -> *) a b s.
(PrimMonad m, MVector v a, Num a, Enum b, BEDLike s) =>
v (PrimState m) a -> b -> s -> m b
count Mutable v (PrimState m) Double
vec) (Int
0 :: Int)
let factor :: Double
factor = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
1e9
m (v Double) -> ConduitT BED o m (v Double)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (v Double) -> ConduitT BED o m (v Double))
-> m (v Double) -> ConduitT BED o m (v Double)
forall a b. (a -> b) -> a -> b
$ (v Double -> v Double) -> m (v Double) -> m (v Double)
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM ((Int -> Double -> Double) -> v Double -> v Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(Int -> a -> b) -> v a -> v b
G.imap (\Int
i Double
x -> Double
x Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
factor Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> (b -> Int) -> b -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. b -> Int
forall b. BEDLike b => b -> Int
size) (Vector b
regions Vector b -> Int -> b
forall a. Vector a -> Int -> a
V.! Int
i)))
(m (v Double) -> m (v Double)) -> m (v Double) -> m (v Double)
forall a b. (a -> b) -> a -> b
$ Mutable v (PrimState m) Double -> m (v Double)
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
G.unsafeFreeze Mutable v (PrimState m) Double
vec
where
count :: v (PrimState m) a -> b -> s -> m b
count v (PrimState m) a
v b
nTags s
tag = do
let p :: Int
p | s
tags -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True = s
tags -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromStart
| s
tags -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
False = s
tags -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
| Bool
otherwise = [Char] -> Int
forall a. HasCallStack => [Char] -> a
error [Char]
"Unkown strand"
xs :: [Int]
xs = [[Int]] -> [Int]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int]] -> [Int]) -> [[Int]] -> [Int]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) [Int] -> [[Int]]
forall k v. IntervalMap k v -> [v]
IM.elems (IntervalMap (Interval Int) [Int] -> [[Int]])
-> IntervalMap (Interval Int) [Int] -> [[Int]]
forall a b. (a -> b) -> a -> b
$
IntervalMap (Interval Int) [Int]
-> Int -> IntervalMap (Interval Int) [Int]
forall k e v.
Interval k e =>
IntervalMap k v -> e -> IntervalMap k v
IM.containing (IntervalMap (Interval Int) [Int]
-> ByteString
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
-> IntervalMap (Interval Int) [Int]
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
M.lookupDefault IntervalMap (Interval Int) [Int]
forall k v. IntervalMap k v
IM.empty (s
tags -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap) Int
p
v (PrimState m) a -> [Int] -> m ()
forall (t :: * -> *) (m :: * -> *) (v :: * -> * -> *) a.
(Foldable t, PrimMonad m, MVector v a, Num a) =>
v (PrimState m) a -> t Int -> m ()
addOne v (PrimState m) a
v [Int]
xs
b -> m b
forall (m :: * -> *) a. Monad m => a -> m a
return (b -> m b) -> b -> m b
forall a b. (a -> b) -> a -> b
$ b -> b
forall a. Enum a => a -> a
succ b
nTags
intervalMap :: HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap = ([Int] -> [Int] -> [Int])
-> Sorted [(b, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall b (f :: * -> *) a.
(BEDLike b, Foldable f) =>
(a -> a -> a) -> Sorted (f (b, a)) -> BEDTree a
sortedBedToTree [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
(++) (Sorted [(b, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int]))
-> (Int -> Sorted [(b, [Int])])
-> Int
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(b, [Int])] -> Sorted [(b, [Int])]
forall b. b -> Sorted b
Sorted ([(b, [Int])] -> Sorted [(b, [Int])])
-> (Int -> [(b, [Int])]) -> Int -> Sorted [(b, [Int])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (b, [Int]) -> [(b, [Int])]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList (Vector (b, [Int]) -> [(b, [Int])])
-> (Int -> Vector (b, [Int])) -> Int -> [(b, [Int])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector b -> Vector [Int] -> Vector (b, [Int])
forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v a -> v b -> v (a, b)
G.zip Vector b
regions (Vector [Int] -> Vector (b, [Int]))
-> (Int -> Vector [Int]) -> Int -> Vector (b, [Int])
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
(Int -> [Int]) -> Vector Int -> Vector [Int]
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map Int -> [Int]
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector Int -> Vector [Int])
-> (Int -> Vector Int) -> Int -> Vector [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> Vector Int
forall (v :: * -> *) a. (Vector v a, Num a) => a -> Int -> v a
G.enumFromN Int
0 (Int -> HashMap ByteString (IntervalMap (Interval Int) [Int]))
-> Int -> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall a b. (a -> b) -> a -> b
$ Int
l
addOne :: v (PrimState m) a -> t Int -> m ()
addOne v (PrimState m) a
v' = (Int -> m ()) -> t Int -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ ((Int -> m ()) -> t Int -> m ()) -> (Int -> m ()) -> t Int -> m ()
forall a b. (a -> b) -> a -> b
$ \Int
x -> v (PrimState m) a -> Int -> m a
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> m a
GM.unsafeRead v (PrimState m) a
v' Int
x m a -> (a -> m ()) -> m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= v (PrimState m) a -> Int -> a -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
GM.unsafeWrite v (PrimState m) a
v' Int
x (a -> m ()) -> (a -> a) -> a -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> a -> a
forall a. Num a => a -> a -> a
+a
1)
l :: Int
l = Vector b -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector b
regions
{-# INLINE rpkmSortedBed #-}
countTagsBinBed :: (Integral a, PrimMonad m, G.Vector v a, BEDLike b)
=> Int
-> [b]
-> ConduitT BED o m ([v a], Int)
countTagsBinBed :: Int -> [b] -> ConduitT BED o m ([v a], Int)
countTagsBinBed Int
k [b]
beds = do
Vector (Mutable v (PrimState m) a, Int -> Int)
vs <- m (Vector (Mutable v (PrimState m) a, Int -> Int))
-> ConduitT
BED o m (Vector (Mutable v (PrimState m) a, Int -> Int))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Vector (Mutable v (PrimState m) a, Int -> Int))
-> ConduitT
BED o m (Vector (Mutable v (PrimState m) a, Int -> Int)))
-> m (Vector (Mutable v (PrimState m) a, Int -> Int))
-> ConduitT
BED o m (Vector (Mutable v (PrimState m) a, Int -> Int))
forall a b. (a -> b) -> a -> b
$ ([(Mutable v (PrimState m) a, Int -> Int)]
-> Vector (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
-> m (Vector (Mutable v (PrimState m) a, Int -> Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [(Mutable v (PrimState m) a, Int -> Int)]
-> Vector (Mutable v (PrimState m) a, Int -> Int)
forall a. [a] -> Vector a
V.fromList (m [(Mutable v (PrimState m) a, Int -> Int)]
-> m (Vector (Mutable v (PrimState m) a, Int -> Int)))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
-> m (Vector (Mutable v (PrimState m) a, Int -> Int))
forall a b. (a -> b) -> a -> b
$ [b]
-> (b -> m (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [b]
beds ((b -> m (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)])
-> (b -> m (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
forall a b. (a -> b) -> a -> b
$ \b
bed -> do
let start :: Int
start = b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart
num :: Int
num = ((b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
k
index :: Int -> Int
index Int
i = (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
k
Mutable v (PrimState m) a
v <- Int -> a -> m (Mutable v (PrimState m) a)
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> a -> m (v (PrimState m) a)
GM.replicate Int
num a
0
(Mutable v (PrimState m) a, Int -> Int)
-> m (Mutable v (PrimState m) a, Int -> Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Mutable v (PrimState m) a
v, Int -> Int
index)
Int
nTags <- (Int -> BED -> m Int) -> Int -> ConduitT BED o m Int
forall (m :: * -> *) a b o.
Monad m =>
(a -> b -> m a) -> a -> ConduitT b o m a
foldMC (Vector (Mutable v (PrimState m) a, Int -> Int)
-> Int -> BED -> m Int
forall (m :: * -> *) (v :: * -> * -> *) a b s (v :: * -> *).
(PrimMonad m, Num a, Num b, BEDLike s,
Vector v (v (PrimState m) a, Int -> Int), MVector v a) =>
v (v (PrimState m) a, Int -> Int) -> b -> s -> m b
f Vector (Mutable v (PrimState m) a, Int -> Int)
vs) Int
0
[v a]
rc <- m [v a] -> ConduitT BED o m [v a]
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m [v a] -> ConduitT BED o m [v a])
-> m [v a] -> ConduitT BED o m [v a]
forall a b. (a -> b) -> a -> b
$ ((Mutable v (PrimState m) a, Int -> Int) -> m (v a))
-> [(Mutable v (PrimState m) a, Int -> Int)] -> m [v a]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (Mutable v (PrimState m) a -> m (v a)
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
G.unsafeFreeze (Mutable v (PrimState m) a -> m (v a))
-> ((Mutable v (PrimState m) a, Int -> Int)
-> Mutable v (PrimState m) a)
-> (Mutable v (PrimState m) a, Int -> Int)
-> m (v a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Mutable v (PrimState m) a, Int -> Int)
-> Mutable v (PrimState m) a
forall a b. (a, b) -> a
fst) ([(Mutable v (PrimState m) a, Int -> Int)] -> m [v a])
-> [(Mutable v (PrimState m) a, Int -> Int)] -> m [v a]
forall a b. (a -> b) -> a -> b
$ Vector (Mutable v (PrimState m) a, Int -> Int)
-> [(Mutable v (PrimState m) a, Int -> Int)]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList Vector (Mutable v (PrimState m) a, Int -> Int)
vs
([v a], Int) -> ConduitT BED o m ([v a], Int)
forall (m :: * -> *) a. Monad m => a -> m a
return ([v a]
rc, Int
nTags)
where
f :: v (v (PrimState m) a, Int -> Int) -> b -> s -> m b
f v (v (PrimState m) a, Int -> Int)
vs b
n s
bed = do
let pos :: Int
pos | s
beds -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True = s
beds -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromStart
| s
beds -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
False = s
beds -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
| Bool
otherwise = [Char] -> Int
forall a. HasCallStack => [Char] -> a
error [Char]
"unkown strand."
overlaps :: [Int]
overlaps = [[Int]] -> [Int]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int]] -> [Int]) -> [[Int]] -> [Int]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) [Int] -> [[Int]]
forall k v. IntervalMap k v -> [v]
IM.elems (IntervalMap (Interval Int) [Int] -> [[Int]])
-> IntervalMap (Interval Int) [Int] -> [[Int]]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) [Int]
-> Int -> IntervalMap (Interval Int) [Int]
forall k e v.
Interval k e =>
IntervalMap k v -> e -> IntervalMap k v
IM.containing
(IntervalMap (Interval Int) [Int]
-> ByteString
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
-> IntervalMap (Interval Int) [Int]
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
M.lookupDefault IntervalMap (Interval Int) [Int]
forall k v. IntervalMap k v
IM.empty (s
beds -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap) Int
pos
[Int] -> (Int -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int]
overlaps ((Int -> m ()) -> m ()) -> (Int -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Int
x -> do
let (v (PrimState m) a
v, Int -> Int
idxFn) = v (v (PrimState m) a, Int -> Int)
vs v (v (PrimState m) a, Int -> Int)
-> Int -> (v (PrimState m) a, Int -> Int)
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
`G.unsafeIndex` Int
x
i :: Int
i = let i' :: Int
i' = Int -> Int
idxFn Int
pos
l :: Int
l = v (PrimState m) a -> Int
forall (v :: * -> * -> *) a s. MVector v a => v s a -> Int
GM.length v (PrimState m) a
v
in if Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l then Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 else Int
i'
v (PrimState m) a -> (a -> a) -> Int -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> (a -> a) -> Int -> m ()
GM.unsafeModify v (PrimState m) a
v (a -> a -> a
forall a. Num a => a -> a -> a
+a
1) Int
i
b -> m b
forall (m :: * -> *) a. Monad m => a -> m a
return (b -> m b) -> b -> m b
forall a b. (a -> b) -> a -> b
$ b
n b -> b -> b
forall a. Num a => a -> a -> a
+ b
1
intervalMap :: HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap = ([Int] -> [Int] -> [Int])
-> [(b, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall b a. BEDLike b => (a -> a -> a) -> [(b, a)] -> BEDTree a
bedToTree [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
(++) ([(b, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int]))
-> [(b, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall a b. (a -> b) -> a -> b
$ [b] -> [[Int]] -> [(b, [Int])]
forall a b. [a] -> [b] -> [(a, b)]
zip [b]
beds ([[Int]] -> [(b, [Int])]) -> [[Int]] -> [(b, [Int])]
forall a b. (a -> b) -> a -> b
$ (Int -> [Int]) -> [Int] -> [[Int]]
forall a b. (a -> b) -> [a] -> [b]
map Int -> [Int]
forall (m :: * -> *) a. Monad m => a -> m a
return [Int
0..]
{-# INLINE countTagsBinBed #-}
countTagsBinBed' :: (Integral a, PrimMonad m, G.Vector v a, BEDLike b1, BEDLike b2)
=> Int
-> [b1]
-> ConduitT b2 o m ([v a], Int)
countTagsBinBed' :: Int -> [b1] -> ConduitT b2 o m ([v a], Int)
countTagsBinBed' Int
k [b1]
beds = do
[(Mutable v (PrimState m) a, Int -> Int)]
initRC <- m [(Mutable v (PrimState m) a, Int -> Int)]
-> ConduitT b2 o m [(Mutable v (PrimState m) a, Int -> Int)]
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m [(Mutable v (PrimState m) a, Int -> Int)]
-> ConduitT b2 o m [(Mutable v (PrimState m) a, Int -> Int)])
-> m [(Mutable v (PrimState m) a, Int -> Int)]
-> ConduitT b2 o m [(Mutable v (PrimState m) a, Int -> Int)]
forall a b. (a -> b) -> a -> b
$ [b1]
-> (b1 -> m (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [b1]
beds ((b1 -> m (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)])
-> (b1 -> m (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
forall a b. (a -> b) -> a -> b
$ \b1
bed -> do
let start :: Int
start = b1
bedb1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromStart
end :: Int
end = b1
bedb1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromEnd
num :: Int
num = (Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
k
index :: Int -> Int
index Int
i = (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
k
Mutable v (PrimState m) a
v <- Int -> a -> m (Mutable v (PrimState m) a)
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> a -> m (v (PrimState m) a)
GM.replicate Int
num a
0
(Mutable v (PrimState m) a, Int -> Int)
-> m (Mutable v (PrimState m) a, Int -> Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Mutable v (PrimState m) a
v, Int -> Int
index)
Int
-> Vector (Mutable v (PrimState m) a, Int -> Int)
-> ConduitT b2 o m ([v a], Int)
forall (m :: * -> *) a b (v :: * -> *) (v :: * -> *) i o.
(PrimMonad m, Num a, Num b, Vector v a, BEDLike i,
Vector v (Mutable v (PrimState m) a, Int -> Int)) =>
b
-> v (Mutable v (PrimState m) a, Int -> Int)
-> ConduitT i o m ([v a], b)
sink Int
0 (Vector (Mutable v (PrimState m) a, Int -> Int)
-> ConduitT b2 o m ([v a], Int))
-> Vector (Mutable v (PrimState m) a, Int -> Int)
-> ConduitT b2 o m ([v a], Int)
forall a b. (a -> b) -> a -> b
$ [(Mutable v (PrimState m) a, Int -> Int)]
-> Vector (Mutable v (PrimState m) a, Int -> Int)
forall a. [a] -> Vector a
V.fromList [(Mutable v (PrimState m) a, Int -> Int)]
initRC
where
sink :: b
-> v (Mutable v (PrimState m) a, Int -> Int)
-> ConduitT i o m ([v a], b)
sink !b
nTags v (Mutable v (PrimState m) a, Int -> Int)
vs = do
Maybe i
tag <- ConduitT i o m (Maybe i)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await
case Maybe i
tag of
Just i
bed -> do
let chr :: ByteString
chr = i
bedi -> Getting ByteString i ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString i ByteString
forall b. BEDLike b => Lens' b ByteString
chrom
start :: Int
start = i
bedi -> Getting Int i Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int i Int
forall b. BEDLike b => Lens' b Int
chromStart
end :: Int
end = i
bedi -> Getting Int i Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int i Int
forall b. BEDLike b => Lens' b Int
chromEnd
overlaps :: [Int]
overlaps = [[Int]] -> [Int]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int]] -> [Int]) -> [[Int]] -> [Int]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) [Int] -> [[Int]]
forall k v. IntervalMap k v -> [v]
IM.elems (IntervalMap (Interval Int) [Int] -> [[Int]])
-> IntervalMap (Interval Int) [Int] -> [[Int]]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) [Int]
-> Interval Int -> IntervalMap (Interval Int) [Int]
forall k e v.
Interval k e =>
IntervalMap k v -> k -> IntervalMap k v
IM.intersecting
(IntervalMap (Interval Int) [Int]
-> ByteString
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
-> IntervalMap (Interval Int) [Int]
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
M.lookupDefault IntervalMap (Interval Int) [Int]
forall k v. IntervalMap k v
IM.empty ByteString
chr HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap) (Interval Int -> IntervalMap (Interval Int) [Int])
-> Interval Int -> IntervalMap (Interval Int) [Int]
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Interval Int
forall a. a -> a -> Interval a
IM.IntervalCO Int
start Int
end
m () -> ConduitT i o m ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m () -> ConduitT i o m ()) -> m () -> ConduitT i o m ()
forall a b. (a -> b) -> a -> b
$ [Int] -> (Int -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int]
overlaps ((Int -> m ()) -> m ()) -> (Int -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Int
x -> do
let (Mutable v (PrimState m) a
v, Int -> Int
idxFn) = v (Mutable v (PrimState m) a, Int -> Int)
vs v (Mutable v (PrimState m) a, Int -> Int)
-> Int -> (Mutable v (PrimState m) a, Int -> Int)
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
`G.unsafeIndex` Int
x
lo :: Int
lo = let i :: Int
i = Int -> Int
idxFn Int
start
in if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 then Int
0 else Int
i
hi :: Int
hi = let i :: Int
i = Int -> Int
idxFn Int
end
l :: Int
l = Mutable v (PrimState m) a -> Int
forall (v :: * -> * -> *) a s. MVector v a => v s a -> Int
GM.length Mutable v (PrimState m) a
v
in if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l then Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 else Int
i
[Int] -> (Int -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
lo..Int
hi] ((Int -> m ()) -> m ()) -> (Int -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Int
i ->
Mutable v (PrimState m) a -> Int -> m a
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> m a
GM.unsafeRead Mutable v (PrimState m) a
v Int
i m a -> (a -> m ()) -> m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Mutable v (PrimState m) a -> Int -> a -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
GM.unsafeWrite Mutable v (PrimState m) a
v Int
i (a -> m ()) -> (a -> a) -> a -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> a -> a
forall a. Num a => a -> a -> a
+a
1)
b
-> v (Mutable v (PrimState m) a, Int -> Int)
-> ConduitT i o m ([v a], b)
sink (b
nTagsb -> b -> b
forall a. Num a => a -> a -> a
+b
1) v (Mutable v (PrimState m) a, Int -> Int)
vs
Maybe i
_ -> do [v a]
rc <- m [v a] -> ConduitT i o m [v a]
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m [v a] -> ConduitT i o m [v a])
-> m [v a] -> ConduitT i o m [v a]
forall a b. (a -> b) -> a -> b
$ ((Mutable v (PrimState m) a, Int -> Int) -> m (v a))
-> [(Mutable v (PrimState m) a, Int -> Int)] -> m [v a]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (Mutable v (PrimState m) a -> m (v a)
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
G.unsafeFreeze (Mutable v (PrimState m) a -> m (v a))
-> ((Mutable v (PrimState m) a, Int -> Int)
-> Mutable v (PrimState m) a)
-> (Mutable v (PrimState m) a, Int -> Int)
-> m (v a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Mutable v (PrimState m) a, Int -> Int)
-> Mutable v (PrimState m) a
forall a b. (a, b) -> a
fst) ([(Mutable v (PrimState m) a, Int -> Int)] -> m [v a])
-> [(Mutable v (PrimState m) a, Int -> Int)] -> m [v a]
forall a b. (a -> b) -> a -> b
$ v (Mutable v (PrimState m) a, Int -> Int)
-> [(Mutable v (PrimState m) a, Int -> Int)]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList v (Mutable v (PrimState m) a, Int -> Int)
vs
([v a], b) -> ConduitT i o m ([v a], b)
forall (m :: * -> *) a. Monad m => a -> m a
return ([v a]
rc, b
nTags)
intervalMap :: HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap = ([Int] -> [Int] -> [Int])
-> [(b1, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall b a. BEDLike b => (a -> a -> a) -> [(b, a)] -> BEDTree a
bedToTree [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
(++) ([(b1, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int]))
-> [(b1, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall a b. (a -> b) -> a -> b
$ [b1] -> [[Int]] -> [(b1, [Int])]
forall a b. [a] -> [b] -> [(a, b)]
zip [b1]
beds ([[Int]] -> [(b1, [Int])]) -> [[Int]] -> [(b1, [Int])]
forall a b. (a -> b) -> a -> b
$ (Int -> [Int]) -> [Int] -> [[Int]]
forall a b. (a -> b) -> [a] -> [b]
map Int -> [Int]
forall (m :: * -> *) a. Monad m => a -> m a
return [Int
0..]
{-# INLINE countTagsBinBed' #-}
tagCountDistr :: PrimMonad m => G.Vector v Int => ConduitT BED o m (v Int)
tagCountDistr :: ConduitT BED o m (v Int)
tagCountDistr = HashMap ByteString (HashMap Int Int) -> ConduitT BED o m (v Int)
forall (m :: * -> *) s (v :: * -> *) a o.
(PrimMonad m, Num a, Vector v a, BEDLike s) =>
HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
loop HashMap ByteString (HashMap Int Int)
forall k v. HashMap k v
M.empty
where
loop :: HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
loop HashMap ByteString (HashMap Int Int)
m = do
Maybe s
x <- ConduitT s o m (Maybe s)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await
case Maybe s
x of
Just s
bed -> do
let p :: Int
p | Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
fromMaybe Bool
True (s
beds -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand) = s
beds -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromStart
| Bool
otherwise = Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- s
beds -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromEnd
case ByteString
-> HashMap ByteString (HashMap Int Int) -> Maybe (HashMap Int Int)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (s
beds -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString (HashMap Int Int)
m of
Just HashMap Int Int
table -> HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
loop (HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a))
-> HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
forall a b. (a -> b) -> a -> b
$ ByteString
-> HashMap Int Int
-> HashMap ByteString (HashMap Int Int)
-> HashMap ByteString (HashMap Int Int)
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.insert (s
beds -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) ((Int -> Int -> Int)
-> Int -> Int -> HashMap Int Int -> HashMap Int Int
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v
M.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) Int
p Int
1 HashMap Int Int
table) HashMap ByteString (HashMap Int Int)
m
Maybe (HashMap Int Int)
_ -> HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
loop (HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a))
-> HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
forall a b. (a -> b) -> a -> b
$ ByteString
-> HashMap Int Int
-> HashMap ByteString (HashMap Int Int)
-> HashMap ByteString (HashMap Int Int)
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.insert (s
beds -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) ([(Int, Int)] -> HashMap Int Int
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList [(Int
p,Int
1)]) HashMap ByteString (HashMap Int Int)
m
Maybe s
_ -> m (v a) -> ConduitT s o m (v a)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (v a) -> ConduitT s o m (v a))
-> m (v a) -> ConduitT s o m (v a)
forall a b. (a -> b) -> a -> b
$ do
Mutable v (PrimState m) a
vec <- Int -> a -> m (Mutable v (PrimState m) a)
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> a -> m (v (PrimState m) a)
GM.replicate Int
100 a
0
HashMap ByteString (HashMap Int Int)
-> (HashMap Int Int -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
F.forM_ HashMap ByteString (HashMap Int Int)
m ((HashMap Int Int -> m ()) -> m ())
-> (HashMap Int Int -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \HashMap Int Int
table ->
HashMap Int Int -> (Int -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
F.forM_ HashMap Int Int
table ((Int -> m ()) -> m ()) -> (Int -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Int
v -> do
let i :: Int
i = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
99 Int
v
Mutable v (PrimState m) a -> Int -> m a
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> m a
GM.unsafeRead Mutable v (PrimState m) a
vec Int
i m a -> (a -> m ()) -> m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Mutable v (PrimState m) a -> Int -> a -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
GM.unsafeWrite Mutable v (PrimState m) a
vec Int
i (a -> m ()) -> (a -> a) -> a -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> a -> a
forall a. Num a => a -> a -> a
+a
1)
Mutable v (PrimState m) a -> m (v a)
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
G.unsafeFreeze Mutable v (PrimState m) a
vec
{-# INLINE tagCountDistr #-}
peakCluster :: (BEDLike b, Monad m)
=> [b]
-> Int
-> Int
-> ConduitT o BED m ()
peakCluster :: [b] -> Int -> Int -> ConduitT o BED m ()
peakCluster [b]
peaks Int
r Int
th = ([BED3] -> BED) -> [BED3] -> ConduitT o BED m ()
forall b (m :: * -> *) a i.
(BEDLike b, Monad m) =>
([b] -> a) -> [b] -> ConduitT i a m ()
mergeBedWith [BED3] -> BED
forall b s. (BEDConvert b, BEDLike s) => [s] -> b
mergeFn [BED3]
peaks' ConduitT o BED m () -> ConduitM BED BED m () -> ConduitT o BED m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| (BED -> Bool) -> ConduitM BED BED m ()
forall (m :: * -> *) a. Monad m => (a -> Bool) -> ConduitT a a m ()
filterC BED -> Bool
forall s. BEDLike s => s -> Bool
g
where
peaks' :: [BED3]
peaks' = (b -> BED3) -> [b] -> [BED3]
forall a b. (a -> b) -> [a] -> [b]
map b -> BED3
forall s. BEDLike s => s -> BED3
f [b]
peaks
f :: s -> BED3
f s
b = let c :: Int
c = (s
bs -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromStart Int -> Int -> Int
forall a. Num a => a -> a -> a
+ s
bs -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromEnd) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2
in ByteString -> Int -> Int -> BED3
forall b. BEDConvert b => ByteString -> Int -> Int -> b
asBed (s
bs -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) (Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
r) (Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
r) :: BED3
mergeFn :: [s] -> b
mergeFn [s]
xs = ByteString -> Int -> Int -> b
forall b. BEDConvert b => ByteString -> Int -> Int -> b
asBed ([s] -> s
forall a. [a] -> a
head [s]
xs s -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^. Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) Int
lo Int
hi b -> (b -> b) -> b
forall a b. a -> (a -> b) -> b
& (Maybe Int -> Identity (Maybe Int)) -> b -> Identity b
forall b. BEDLike b => Lens' b (Maybe Int)
score ((Maybe Int -> Identity (Maybe Int)) -> b -> Identity b)
-> Maybe Int -> b -> b
forall s t a b. ASetter s t a b -> b -> s -> t
.~ Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ [s] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [s]
xs)
where
lo :: Int
lo = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (s -> Int) -> [s] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (s -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromStart) [s]
xs
hi :: Int
hi = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (s -> Int) -> [s] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (s -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromEnd) [s]
xs
g :: s -> Bool
g s
b = Maybe Int -> Int
forall a. HasCallStack => Maybe a -> a
fromJust (s
bs -> Getting (Maybe Int) s (Maybe Int) -> Maybe Int
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Int) s (Maybe Int)
forall b. BEDLike b => Lens' b (Maybe Int)
score) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
th
{-# INLINE peakCluster #-}