{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE OverloadedStrings     #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE BangPatterns #-}

module Bio.Data.Bed.Utils
    ( fetchSeq
    , clipBed
    , CutoffMotif(..)
    , mkCutoffMotif
    , scanMotif 
    , monoColonalize
    , BaseMap(..)
    , baseMap
    , queryBaseMap
    , rpkmBed
    , rpkmSortedBed
    , countTagsBinBed
    , countTagsBinBed'
    , tagCountDistr
    , peakCluster
    ) where

import           Conduit
import           Lens.Micro
import           Control.Monad.State.Strict
import qualified Data.ByteString.Char8 as B
import qualified Data.Foldable                as F
import           Data.Function                (on)
import qualified Data.HashMap.Strict          as M
import qualified Data.IntervalMap.Strict      as IM
import           Data.Maybe                   (fromJust, fromMaybe)
import qualified Data.Vector                  as V
import qualified Data.Vector.Algorithms.Intro as I
import qualified Data.Vector.Generic          as G
import qualified Data.Vector.Generic.Mutable  as GM
import qualified Data.Vector.Unboxed          as U
import System.IO

import           Bio.Data.Bed
import           Bio.Data.Bed.Types
import           Bio.Motif                    (Bkgd (..), Motif (..), CDF, PWM)
import qualified Bio.Motif                    as Motif
import qualified Bio.Motif.Search             as Motif
import           Bio.Seq hiding (length)
import           Bio.Seq.IO
import qualified Bio.Utils.BitVector as BV

clipBed :: (BEDLike b, Monad m)
        => [(B.ByteString, Int)]  -- ^ Chromosome sizes
        -> ConduitT b b m ()
clipBed :: [(ByteString, Int)] -> ConduitT b b m ()
clipBed [(ByteString, Int)]
chrsize = (b -> Maybe b) -> ConduitT b (Element (Maybe b)) m ()
forall (m :: * -> *) mono a.
(Monad m, MonoFoldable mono) =>
(a -> mono) -> ConduitT a (Element mono) m ()
concatMapC b -> Maybe b
forall a. BEDLike a => a -> Maybe a
f
  where
    f :: a -> Maybe a
f a
x = case ByteString -> HashMap ByteString Int -> Maybe Int
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (a
xa -> Getting ByteString a ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString a ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString Int
chrsize' of
        Maybe Int
Nothing -> Maybe a
forall a. Maybe a
Nothing
        Just Int
n -> if a
xa -> Getting Int a Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int a Int
forall b. BEDLike b => Lens' b Int
chromStart Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
n
            then Maybe a
forall a. Maybe a
Nothing
            else a -> Maybe a
forall a. a -> Maybe a
Just (a -> Maybe a) -> a -> Maybe a
forall a b. (a -> b) -> a -> b
$ (Int -> Identity Int) -> a -> Identity a
forall b. BEDLike b => Lens' b Int
chromStart ((Int -> Identity Int) -> a -> Identity a)
-> (Int -> Int) -> a -> a
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
%~ Int -> Int -> Int
forall a. Ord a => a -> a -> a
max Int
0 (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ (Int -> Identity Int) -> a -> Identity a
forall b. BEDLike b => Lens' b Int
chromEnd ((Int -> Identity Int) -> a -> Identity a)
-> (Int -> Int) -> a -> a
forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
%~ Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
n (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ a
x
    chrsize' :: HashMap ByteString Int
chrsize' = (Int -> Int -> Int)
-> [(ByteString, Int)] -> HashMap ByteString Int
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> [(k, v)] -> HashMap k v
M.fromListWith ([Char] -> Int -> Int -> Int
forall a. HasCallStack => [Char] -> a
error [Char]
"redundant chromosomes") [(ByteString, Int)]
chrsize
{-# INLINE clipBed #-}

-- | retreive sequences
fetchSeq :: BioSeq DNA a
         => Genome
         -> BED
         -> IO (Either String (DNA a))
fetchSeq :: Genome -> BED -> IO (Either [Char] (DNA a))
fetchSeq Genome
g BED
bed = do
    Either [Char] (DNA a)
dna <- Genome -> Query -> IO (Either [Char] (DNA a))
forall (s :: * -> *) a.
BioSeq s a =>
Genome -> Query -> IO (Either [Char] (s a))
getSeq Genome
g (BED
bedBED -> Getting ByteString BED ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString BED ByteString
forall b. BEDLike b => Lens' b ByteString
chrom, BED
bedBED -> Getting Int BED Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int BED Int
forall b. BEDLike b => Lens' b Int
chromStart, BED
bedBED -> Getting Int BED Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int BED Int
forall b. BEDLike b => Lens' b Int
chromEnd)
    Either [Char] (DNA a) -> IO (Either [Char] (DNA a))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either [Char] (DNA a) -> IO (Either [Char] (DNA a)))
-> Either [Char] (DNA a) -> IO (Either [Char] (DNA a))
forall a b. (a -> b) -> a -> b
$ case BED
bedBED -> Getting (Maybe Bool) BED (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) BED (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand of
        Just Bool
False -> DNA a -> DNA a
forall alphabet. DNA alphabet -> DNA alphabet
rc (DNA a -> DNA a) -> Either [Char] (DNA a) -> Either [Char] (DNA a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Either [Char] (DNA a)
dna
        Maybe Bool
_          -> Either [Char] (DNA a)
dna
{-# INLINE fetchSeq #-}

-- | Motif with predefined cutoff score. All necessary intermediate data
-- structure for motif scanning are stored.
data CutoffMotif = CutoffMotif
    { CutoffMotif -> ByteString
_motif_name :: B.ByteString
    , CutoffMotif -> PWM
_motif_pwm :: PWM
    , CutoffMotif -> Vector Double
_motif_sigma :: U.Vector Double
    , CutoffMotif -> PWM
_motif_pwm_rc :: PWM
    , CutoffMotif -> Vector Double
_motif_sigma_rc :: U.Vector Double
    , CutoffMotif -> Bkgd
_background :: Bkgd
    , CutoffMotif -> Double
_cutoff :: Double
    , CutoffMotif -> CDF
_cdf :: CDF }

mkCutoffMotif :: Bkgd
              -> Double  -- ^ p-value
              -> Motif -> CutoffMotif
mkCutoffMotif :: Bkgd -> Double -> Motif -> CutoffMotif
mkCutoffMotif Bkgd
bg Double
p Motif
motif = ByteString
-> PWM
-> Vector Double
-> PWM
-> Vector Double
-> Bkgd
-> Double
-> CDF
-> CutoffMotif
CutoffMotif (Motif -> ByteString
_name Motif
motif) (Motif -> PWM
_pwm Motif
motif) Vector Double
sigma PWM
pwm'
    Vector Double
sigma' Bkgd
bg Double
sc (CDF -> CutoffMotif) -> CDF -> CutoffMotif
forall a b. (a -> b) -> a -> b
$ Double -> CDF -> CDF
Motif.truncateCDF (Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
p Double -> Double -> Double
forall a. Num a => a -> a -> a
* Double
10) CDF
cdf
  where
    cdf :: CDF
cdf = Bkgd -> PWM -> CDF
Motif.scoreCDF Bkgd
bg (PWM -> CDF) -> PWM -> CDF
forall a b. (a -> b) -> a -> b
$ Motif -> PWM
_pwm Motif
motif
    sc :: Double
sc = CDF -> Double -> Double
Motif.cdf' CDF
cdf (Double -> Double) -> Double -> Double
forall a b. (a -> b) -> a -> b
$ Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- Double
p
    sigma :: Vector Double
sigma = Bkgd -> PWM -> Vector Double
Motif.optimalScoresSuffix Bkgd
bg (PWM -> Vector Double) -> PWM -> Vector Double
forall a b. (a -> b) -> a -> b
$ Motif -> PWM
_pwm Motif
motif
    pwm' :: PWM
pwm' = PWM -> PWM
Motif.rcPWM (PWM -> PWM) -> PWM -> PWM
forall a b. (a -> b) -> a -> b
$ Motif -> PWM
_pwm Motif
motif
    sigma' :: Vector Double
sigma' = Bkgd -> PWM -> Vector Double
Motif.optimalScoresSuffix Bkgd
bg PWM
pwm'

-- | Motif score is in [0, 1000]: ( 1 / (1 + exp (-(-logP - 5))) ) * 1000.
scanMotif :: (BEDLike b, MonadIO m)
          => Genome -> [CutoffMotif] -> ConduitT b BED m ()
scanMotif :: Genome -> [CutoffMotif] -> ConduitT b BED m ()
scanMotif Genome
g [CutoffMotif]
motifs = (b -> ConduitT b BED m ()) -> ConduitT b BED m ()
forall (m :: * -> *) i o r.
Monad m =>
(i -> ConduitT i o m r) -> ConduitT i o m ()
awaitForever ((b -> ConduitT b BED m ()) -> ConduitT b BED m ())
-> (b -> ConduitT b BED m ()) -> ConduitT b BED m ()
forall a b. (a -> b) -> a -> b
$ \b
bed -> do
    let (ByteString
chr, Int
start, Int
end) = (b
bedb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom, b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart, b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd)
    IO (Either [Char] (DNA IUPAC))
-> ConduitT b BED m (Either [Char] (DNA IUPAC))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (Genome -> Query -> IO (Either [Char] (DNA IUPAC))
forall (s :: * -> *) a.
BioSeq s a =>
Genome -> Query -> IO (Either [Char] (s a))
getSeq Genome
g (ByteString
chr, Int
start, Int
end)) ConduitT b BED m (Either [Char] (DNA IUPAC))
-> (Either [Char] (DNA IUPAC) -> ConduitT b BED m ())
-> ConduitT b BED m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
        Left [Char]
_    -> IO () -> ConduitT b BED m ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> ConduitT b BED m ()) -> IO () -> ConduitT b BED m ()
forall a b. (a -> b) -> a -> b
$ Handle -> [Char] -> IO ()
hPutStrLn Handle
stderr ([Char] -> IO ()) -> [Char] -> IO ()
forall a b. (a -> b) -> a -> b
$
            [Char]
"Warning: no sequence for region: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ Query -> [Char]
forall a. Show a => a -> [Char]
show (ByteString
chr, Int
start, Int
end)
        Right DNA IUPAC
dna -> [CutoffMotif]
-> (CutoffMotif -> ConduitT b BED m ()) -> ConduitT b BED m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [CutoffMotif]
motifs ((CutoffMotif -> ConduitT b BED m ()) -> ConduitT b BED m ())
-> (CutoffMotif -> ConduitT b BED m ()) -> ConduitT b BED m ()
forall a b. (a -> b) -> a -> b
$ \CutoffMotif{Double
ByteString
Vector Double
CDF
Bkgd
PWM
_cdf :: CDF
_cutoff :: Double
_background :: Bkgd
_motif_sigma_rc :: Vector Double
_motif_pwm_rc :: PWM
_motif_sigma :: Vector Double
_motif_pwm :: PWM
_motif_name :: ByteString
_cdf :: CutoffMotif -> CDF
_cutoff :: CutoffMotif -> Double
_background :: CutoffMotif -> Bkgd
_motif_sigma_rc :: CutoffMotif -> Vector Double
_motif_pwm_rc :: CutoffMotif -> PWM
_motif_sigma :: CutoffMotif -> Vector Double
_motif_pwm :: CutoffMotif -> PWM
_motif_name :: CutoffMotif -> ByteString
..} -> do
            let mkBed :: Bool -> (Int, Double) -> BED
mkBed Bool
str (Int
i, Double
sc) = ByteString
-> Int -> Int -> Maybe ByteString -> Maybe Int -> Maybe Bool -> BED
BED ByteString
chr (Int
start Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i) (Int
start Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
n)
                    (ByteString -> Maybe ByteString
forall a. a -> Maybe a
Just (ByteString -> Maybe ByteString) -> ByteString -> Maybe ByteString
forall a b. (a -> b) -> a -> b
$ ByteString
_motif_name) (Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Maybe Int) -> Int -> Maybe Int
forall a b. (a -> b) -> a -> b
$ Double -> Int
forall a b. (RealFrac a, Integral b, Floating a) => a -> b
toAffinity (Double -> Int) -> Double -> Int
forall a b. (a -> b) -> a -> b
$ Double
1 Double -> Double -> Double
forall a. Num a => a -> a -> a
- CDF -> Double -> Double
Motif.cdf CDF
_cdf Double
sc)
                    (Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
str)
                n :: Int
n = PWM -> Int
Motif.size PWM
_motif_pwm
            -- Scan forward strand
            Vector Double
-> Bkgd
-> PWM
-> DNA IUPAC
-> Double
-> Bool
-> ConduitT b (Int, Double) m ()
forall (m :: * -> *) a i.
Monad m =>
Vector Double
-> Bkgd
-> PWM
-> DNA a
-> Double
-> Bool
-> ConduitT i (Int, Double) m ()
Motif.findTFBSWith Vector Double
_motif_sigma Bkgd
_background PWM
_motif_pwm
                (DNA IUPAC
dna :: DNA IUPAC) Double
_cutoff Bool
True ConduitT b (Int, Double) m ()
-> ConduitM (Int, Double) BED m () -> ConduitT b BED m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ((Int, Double) -> BED) -> ConduitM (Int, Double) BED m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC (Bool -> (Int, Double) -> BED
mkBed Bool
True)
            -- Scan reverse strand
            Vector Double
-> Bkgd
-> PWM
-> DNA IUPAC
-> Double
-> Bool
-> ConduitT b (Int, Double) m ()
forall (m :: * -> *) a i.
Monad m =>
Vector Double
-> Bkgd
-> PWM
-> DNA a
-> Double
-> Bool
-> ConduitT i (Int, Double) m ()
Motif.findTFBSWith Vector Double
_motif_sigma_rc Bkgd
_background PWM
_motif_pwm_rc
                DNA IUPAC
dna Double
_cutoff Bool
True ConduitT b (Int, Double) m ()
-> ConduitM (Int, Double) BED m () -> ConduitT b BED m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| ((Int, Double) -> BED) -> ConduitM (Int, Double) BED m ()
forall (m :: * -> *) a b. Monad m => (a -> b) -> ConduitT a b m ()
mapC (Bool -> (Int, Double) -> BED
mkBed Bool
False)
  where
    toAffinity :: a -> b
toAffinity a
x' = a -> b
forall a b. (RealFrac a, Integral b) => a -> b
round (a -> b) -> a -> b
forall a b. (a -> b) -> a -> b
$ a
sc a -> a -> a
forall a. Num a => a -> a -> a
* a
1000
      where
        sc :: a
sc = a
1 a -> a -> a
forall a. Fractional a => a -> a -> a
/ (a
1 a -> a -> a
forall a. Num a => a -> a -> a
+ a -> a
forall a. Floating a => a -> a
exp (-(a
x a -> a -> a
forall a. Num a => a -> a -> a
- a
5)))
        x :: a
x = a -> a
forall a. Num a => a -> a
negate (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ a -> a -> a
forall a. Floating a => a -> a -> a
logBase a
10 (a -> a) -> a -> a
forall a b. (a -> b) -> a -> b
$ a -> a -> a
forall a. Ord a => a -> a -> a
max a
1e-20 a
x'
{-# INLINE scanMotif #-}

-- | process a sorted BED stream, keep only mono-colonal tags
monoColonalize :: Monad m => ConduitT BED BED m ()
monoColonalize :: ConduitT BED BED m ()
monoColonalize = do
    Maybe BED
x <- ConduitT BED BED m (Maybe BED)
forall (m :: * -> *) a o. Monad m => ConduitT a o m (Maybe a)
headC
    case Maybe BED
x of
        Just BED
b -> BED -> ConduitT BED BED m ()
forall (m :: * -> *) o i. Monad m => o -> ConduitT i o m ()
yield BED
b ConduitT BED BED m ()
-> ConduitT BED BED m () -> ConduitT BED BED m ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (BED -> BED -> (BED, [BED])) -> BED -> ConduitT BED BED m ()
forall (m :: * -> *) a accum b.
Monad m =>
(a -> accum -> (accum, [b])) -> accum -> ConduitT a b m ()
concatMapAccumC BED -> BED -> (BED, [BED])
forall s s.
(BEDLike s, BEDLike s, Show s, Show s) =>
s -> s -> (s, [s])
f BED
b
        Maybe BED
Nothing -> () -> ConduitT BED BED m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
  where
    f :: s -> s -> (s, [s])
f s
cur s
prev = case s -> s -> Ordering
forall b1 b2. (BEDLike b1, BEDLike b2) => b1 -> b2 -> Ordering
compareBed s
prev s
cur of
        Ordering
GT -> [Char] -> (s, [s])
forall a. HasCallStack => [Char] -> a
error ([Char] -> (s, [s])) -> [Char] -> (s, [s])
forall a b. (a -> b) -> a -> b
$
            [Char]
"Input is not sorted: " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ s -> [Char]
forall a. Show a => a -> [Char]
show s
prev [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ [Char]
" > " [Char] -> [Char] -> [Char]
forall a. [a] -> [a] -> [a]
++ s -> [Char]
forall a. Show a => a -> [Char]
show s
cur
        Ordering
LT -> (s
cur, [s
cur])
        Ordering
_ -> if s
prevs -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== s
curs -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand then (s
cur, []) else (s
cur, [s
cur])
{-# INLINE monoColonalize #-}

newtype BaseMap = BaseMap (M.HashMap B.ByteString BV.BitVector)

-- | Count the tags (starting positions) at each position in the genome.
baseMap :: PrimMonad m
        => [(B.ByteString, Int)]   -- ^ chromosomes and their sizes
        -> ConduitT BED o m BaseMap
baseMap :: [(ByteString, Int)] -> ConduitT BED o m BaseMap
baseMap [(ByteString, Int)]
chrs = do
    HashMap ByteString (BitMVector (PrimState m))
bvs <- m (HashMap ByteString (BitMVector (PrimState m)))
-> ConduitT BED o m (HashMap ByteString (BitMVector (PrimState m)))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (HashMap ByteString (BitMVector (PrimState m)))
 -> ConduitT
      BED o m (HashMap ByteString (BitMVector (PrimState m))))
-> m (HashMap ByteString (BitMVector (PrimState m)))
-> ConduitT BED o m (HashMap ByteString (BitMVector (PrimState m)))
forall a b. (a -> b) -> a -> b
$ ([(ByteString, BitMVector (PrimState m))]
 -> HashMap ByteString (BitMVector (PrimState m)))
-> m [(ByteString, BitMVector (PrimState m))]
-> m (HashMap ByteString (BitMVector (PrimState m)))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [(ByteString, BitMVector (PrimState m))]
-> HashMap ByteString (BitMVector (PrimState m))
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList (m [(ByteString, BitMVector (PrimState m))]
 -> m (HashMap ByteString (BitMVector (PrimState m))))
-> m [(ByteString, BitMVector (PrimState m))]
-> m (HashMap ByteString (BitMVector (PrimState m)))
forall a b. (a -> b) -> a -> b
$ [(ByteString, Int)]
-> ((ByteString, Int) -> m (ByteString, BitMVector (PrimState m)))
-> m [(ByteString, BitMVector (PrimState m))]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [(ByteString, Int)]
chrs (((ByteString, Int) -> m (ByteString, BitMVector (PrimState m)))
 -> m [(ByteString, BitMVector (PrimState m))])
-> ((ByteString, Int) -> m (ByteString, BitMVector (PrimState m)))
-> m [(ByteString, BitMVector (PrimState m))]
forall a b. (a -> b) -> a -> b
$ \(ByteString
chr, Int
n) -> do
        BitMVector (PrimState m)
bv <- Int -> m (BitMVector (PrimState m))
forall (m :: * -> *).
PrimMonad m =>
Int -> m (BitMVector (PrimState m))
BV.zeros Int
n
        (ByteString, BitMVector (PrimState m))
-> m (ByteString, BitMVector (PrimState m))
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString
chr, BitMVector (PrimState m)
bv)

    (BED -> m ()) -> ConduitT BED o m ()
forall (m :: * -> *) a o.
Monad m =>
(a -> m ()) -> ConduitT a o m ()
mapM_C ((BED -> m ()) -> ConduitT BED o m ())
-> (BED -> m ()) -> ConduitT BED o m ()
forall a b. (a -> b) -> a -> b
$ \BED
bed -> case ByteString
-> HashMap ByteString (BitMVector (PrimState m))
-> Maybe (BitMVector (PrimState m))
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (BED
bedBED -> Getting ByteString BED ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString BED ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString (BitMVector (PrimState m))
bvs of
        Maybe (BitMVector (PrimState m))
Nothing -> () -> m ()
forall (m :: * -> *) a. Monad m => a -> m a
return ()
        Just BitMVector (PrimState m)
bv -> if Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
fromMaybe Bool
True (Maybe Bool -> Bool) -> Maybe Bool -> Bool
forall a b. (a -> b) -> a -> b
$ BED
bedBED -> Getting (Maybe Bool) BED (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) BED (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand
            then BitMVector (PrimState m) -> Int -> m ()
forall (m :: * -> *).
PrimMonad m =>
BitMVector (PrimState m) -> Int -> m ()
BV.set BitMVector (PrimState m)
bv (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ BED
bedBED -> Getting Int BED Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int BED Int
forall b. BEDLike b => Lens' b Int
chromStart
            else BitMVector (PrimState m) -> Int -> m ()
forall (m :: * -> *).
PrimMonad m =>
BitMVector (PrimState m) -> Int -> m ()
BV.set BitMVector (PrimState m)
bv (Int -> m ()) -> Int -> m ()
forall a b. (a -> b) -> a -> b
$ BED
bedBED -> Getting Int BED Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int BED Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1

    m BaseMap -> ConduitT BED o m BaseMap
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m BaseMap -> ConduitT BED o m BaseMap)
-> m BaseMap -> ConduitT BED o m BaseMap
forall a b. (a -> b) -> a -> b
$ (HashMap ByteString BitVector -> BaseMap)
-> m (HashMap ByteString BitVector) -> m BaseMap
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap HashMap ByteString BitVector -> BaseMap
BaseMap (m (HashMap ByteString BitVector) -> m BaseMap)
-> m (HashMap ByteString BitVector) -> m BaseMap
forall a b. (a -> b) -> a -> b
$ HashMap ByteString (m BitVector)
-> m (HashMap ByteString BitVector)
forall (t :: * -> *) (m :: * -> *) a.
(Traversable t, Monad m) =>
t (m a) -> m (t a)
sequence (HashMap ByteString (m BitVector)
 -> m (HashMap ByteString BitVector))
-> HashMap ByteString (m BitVector)
-> m (HashMap ByteString BitVector)
forall a b. (a -> b) -> a -> b
$ (BitMVector (PrimState m) -> m BitVector)
-> HashMap ByteString (BitMVector (PrimState m))
-> HashMap ByteString (m BitVector)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap BitMVector (PrimState m) -> m BitVector
forall (m :: * -> *).
PrimMonad m =>
BitMVector (PrimState m) -> m BitVector
BV.unsafeFreeze HashMap ByteString (BitMVector (PrimState m))
bvs 

queryBaseMap :: BEDLike b => b -> BaseMap -> Maybe [Bool]
queryBaseMap :: b -> BaseMap -> Maybe [Bool]
queryBaseMap b
bed (BaseMap HashMap ByteString BitVector
bm) = case ByteString -> HashMap ByteString BitVector -> Maybe BitVector
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (b
bedb -> Getting ByteString b ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString b ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString BitVector
bm of
    Maybe BitVector
Nothing -> Maybe [Bool]
forall a. Maybe a
Nothing
    Just BitVector
bv ->
        let res :: [Bool]
res = (Int -> Bool) -> [Int] -> [Bool]
forall a b. (a -> b) -> [a] -> [b]
map (BitVector
bv BitVector -> Int -> Bool
BV.!) [b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart .. b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
        in case b
bedb -> Getting (Maybe Bool) b (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) b (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand of
            Just Bool
False -> [Bool] -> Maybe [Bool]
forall a. a -> Maybe a
Just ([Bool] -> Maybe [Bool]) -> [Bool] -> Maybe [Bool]
forall a b. (a -> b) -> a -> b
$ [Bool] -> [Bool]
forall a. [a] -> [a]
reverse [Bool]
res
            Maybe Bool
_ -> [Bool] -> Maybe [Bool]
forall a. a -> Maybe a
Just [Bool]
res

-- | calculate RPKM on a set of unique regions. Regions (in bed format) would be kept in
-- memory but not tag file.
-- RPKM: Readcounts per kilobase per million reads. Only counts the starts of tags
rpkmBed :: (PrimMonad m, BEDLike b, G.Vector v Double)
     => [b] -> ConduitT BED o m (v Double)
rpkmBed :: [b] -> ConduitT BED o m (v Double)
rpkmBed [b]
regions = do
    Vector (Int, b)
v <- m (Vector (Int, b)) -> ConduitT BED o m (Vector (Int, b))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Vector (Int, b)) -> ConduitT BED o m (Vector (Int, b)))
-> m (Vector (Int, b)) -> ConduitT BED o m (Vector (Int, b))
forall a b. (a -> b) -> a -> b
$ do MVector (PrimState m) (Int, b)
v' <- Vector (Int, b) -> m (MVector (PrimState m) (Int, b))
forall (m :: * -> *) a.
PrimMonad m =>
Vector a -> m (MVector (PrimState m) a)
V.unsafeThaw (Vector (Int, b) -> m (MVector (PrimState m) (Int, b)))
-> ([b] -> Vector (Int, b))
-> [b]
-> m (MVector (PrimState m) (Int, b))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(Int, b)] -> Vector (Int, b)
forall a. [a] -> Vector a
V.fromList ([(Int, b)] -> Vector (Int, b))
-> ([b] -> [(Int, b)]) -> [b] -> Vector (Int, b)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Int] -> [b] -> [(Int, b)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int
0..] ([b] -> m (MVector (PrimState m) (Int, b)))
-> [b] -> m (MVector (PrimState m) (Int, b))
forall a b. (a -> b) -> a -> b
$ [b]
regions
                   Comparison (Int, b) -> MVector (PrimState m) (Int, b) -> m ()
forall (m :: * -> *) (v :: * -> * -> *) e.
(PrimMonad m, MVector v e) =>
Comparison e -> v (PrimState m) e -> m ()
I.sortBy (b -> b -> Ordering
forall b1 b2. (BEDLike b1, BEDLike b2) => b1 -> b2 -> Ordering
compareBed (b -> b -> Ordering) -> ((Int, b) -> b) -> Comparison (Int, b)
forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
`on` (Int, b) -> b
forall a b. (a, b) -> b
snd) MVector (PrimState m) (Int, b)
v'
                   MVector (PrimState m) (Int, b) -> m (Vector (Int, b))
forall (m :: * -> *) a.
PrimMonad m =>
MVector (PrimState m) a -> m (Vector a)
V.unsafeFreeze MVector (PrimState m) (Int, b)
v'
    let (Vector Int
idx, Vector b
sortedRegions) = Vector (Int, b) -> (Vector Int, Vector b)
forall a b. Vector (a, b) -> (Vector a, Vector b)
V.unzip Vector (Int, b)
v
        n :: Int
n = Vector Int -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector Int
idx
    Vector Double
rc <- Sorted (Vector b) -> ConduitT BED o m (Vector Double)
forall (m :: * -> *) b (v :: * -> *) o.
(PrimMonad m, BEDLike b, Vector v Double) =>
Sorted (Vector b) -> ConduitT BED o m (v Double)
rpkmSortedBed (Sorted (Vector b) -> ConduitT BED o m (Vector Double))
-> Sorted (Vector b) -> ConduitT BED o m (Vector Double)
forall a b. (a -> b) -> a -> b
$ Vector b -> Sorted (Vector b)
forall b. b -> Sorted b
Sorted Vector b
sortedRegions

    m (v Double) -> ConduitT BED o m (v Double)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (v Double) -> ConduitT BED o m (v Double))
-> m (v Double) -> ConduitT BED o m (v Double)
forall a b. (a -> b) -> a -> b
$ do
        Mutable v (PrimState m) Double
result <- Int -> m (Mutable v (PrimState m) Double)
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> m (v (PrimState m) a)
GM.new Int
n
        Vector (m ()) -> m ()
forall (m :: * -> *) (v :: * -> *) a.
(Monad m, Vector v (m a)) =>
v (m a) -> m ()
G.sequence_ (Vector (m ()) -> m ())
-> (Vector Int -> Vector (m ())) -> Vector Int -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Int -> Int -> m ()) -> Vector Int -> Vector (m ())
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(Int -> a -> b) -> v a -> v b
G.imap (\Int
x Int
i -> Mutable v (PrimState m) Double -> Int -> Double -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
GM.unsafeWrite Mutable v (PrimState m) Double
result Int
i (Vector Double
rc Vector Double -> Int -> Double
forall a. Unbox a => Vector a -> Int -> a
U.! Int
x)) (Vector Int -> m ()) -> Vector Int -> m ()
forall a b. (a -> b) -> a -> b
$ Vector Int
idx
        Mutable v (PrimState m) Double -> m (v Double)
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
G.unsafeFreeze Mutable v (PrimState m) Double
result
{-# INLINE rpkmBed #-}

-- | calculate RPKM on a set of regions. Regions must be sorted. The Sorted data
-- type is used to remind users to sort their data.
rpkmSortedBed :: (PrimMonad m, BEDLike b, G.Vector v Double)
              => Sorted (V.Vector b) -> ConduitT BED o m (v Double)
rpkmSortedBed :: Sorted (Vector b) -> ConduitT BED o m (v Double)
rpkmSortedBed (Sorted Vector b
regions) = do
    Mutable v (PrimState m) Double
vec <- m (Mutable v (PrimState m) Double)
-> ConduitT BED o m (Mutable v (PrimState m) Double)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Mutable v (PrimState m) Double)
 -> ConduitT BED o m (Mutable v (PrimState m) Double))
-> m (Mutable v (PrimState m) Double)
-> ConduitT BED o m (Mutable v (PrimState m) Double)
forall a b. (a -> b) -> a -> b
$ Int -> Double -> m (Mutable v (PrimState m) Double)
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> a -> m (v (PrimState m) a)
GM.replicate Int
l Double
0
    Int
n <- (Int -> BED -> m Int) -> Int -> ConduitT BED o m Int
forall (m :: * -> *) a b o.
Monad m =>
(a -> b -> m a) -> a -> ConduitT b o m a
foldMC (Mutable v (PrimState m) Double -> Int -> BED -> m Int
forall (m :: * -> *) (v :: * -> * -> *) a b s.
(PrimMonad m, MVector v a, Num a, Enum b, BEDLike s) =>
v (PrimState m) a -> b -> s -> m b
count Mutable v (PrimState m) Double
vec) (Int
0 :: Int)
    let factor :: Double
factor = Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
n Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
1e9
    m (v Double) -> ConduitT BED o m (v Double)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (v Double) -> ConduitT BED o m (v Double))
-> m (v Double) -> ConduitT BED o m (v Double)
forall a b. (a -> b) -> a -> b
$ (v Double -> v Double) -> m (v Double) -> m (v Double)
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
liftM ((Int -> Double -> Double) -> v Double -> v Double
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(Int -> a -> b) -> v a -> v b
G.imap (\Int
i Double
x -> Double
x Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ Double
factor Double -> Double -> Double
forall a. Fractional a => a -> a -> a
/ (Int -> Double
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Double) -> (b -> Int) -> b -> Double
forall b c a. (b -> c) -> (a -> b) -> a -> c
. b -> Int
forall b. BEDLike b => b -> Int
size) (Vector b
regions Vector b -> Int -> b
forall a. Vector a -> Int -> a
V.! Int
i)))
         (m (v Double) -> m (v Double)) -> m (v Double) -> m (v Double)
forall a b. (a -> b) -> a -> b
$ Mutable v (PrimState m) Double -> m (v Double)
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
G.unsafeFreeze Mutable v (PrimState m) Double
vec
  where
    count :: v (PrimState m) a -> b -> s -> m b
count v (PrimState m) a
v b
nTags s
tag = do
        let p :: Int
p | s
tags -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True = s
tags -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromStart
              | s
tags -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
False = s
tags -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
              | Bool
otherwise = [Char] -> Int
forall a. HasCallStack => [Char] -> a
error [Char]
"Unkown strand"
            xs :: [Int]
xs = [[Int]] -> [Int]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int]] -> [Int]) -> [[Int]] -> [Int]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) [Int] -> [[Int]]
forall k v. IntervalMap k v -> [v]
IM.elems (IntervalMap (Interval Int) [Int] -> [[Int]])
-> IntervalMap (Interval Int) [Int] -> [[Int]]
forall a b. (a -> b) -> a -> b
$
                IntervalMap (Interval Int) [Int]
-> Int -> IntervalMap (Interval Int) [Int]
forall k e v.
Interval k e =>
IntervalMap k v -> e -> IntervalMap k v
IM.containing (IntervalMap (Interval Int) [Int]
-> ByteString
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
-> IntervalMap (Interval Int) [Int]
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
M.lookupDefault IntervalMap (Interval Int) [Int]
forall k v. IntervalMap k v
IM.empty (s
tags -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap) Int
p
        v (PrimState m) a -> [Int] -> m ()
forall (t :: * -> *) (m :: * -> *) (v :: * -> * -> *) a.
(Foldable t, PrimMonad m, MVector v a, Num a) =>
v (PrimState m) a -> t Int -> m ()
addOne v (PrimState m) a
v [Int]
xs
        b -> m b
forall (m :: * -> *) a. Monad m => a -> m a
return (b -> m b) -> b -> m b
forall a b. (a -> b) -> a -> b
$ b -> b
forall a. Enum a => a -> a
succ b
nTags

    intervalMap :: HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap = ([Int] -> [Int] -> [Int])
-> Sorted [(b, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall b (f :: * -> *) a.
(BEDLike b, Foldable f) =>
(a -> a -> a) -> Sorted (f (b, a)) -> BEDTree a
sortedBedToTree [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
(++) (Sorted [(b, [Int])]
 -> HashMap ByteString (IntervalMap (Interval Int) [Int]))
-> (Int -> Sorted [(b, [Int])])
-> Int
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [(b, [Int])] -> Sorted [(b, [Int])]
forall b. b -> Sorted b
Sorted ([(b, [Int])] -> Sorted [(b, [Int])])
-> (Int -> [(b, [Int])]) -> Int -> Sorted [(b, [Int])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (b, [Int]) -> [(b, [Int])]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList (Vector (b, [Int]) -> [(b, [Int])])
-> (Int -> Vector (b, [Int])) -> Int -> [(b, [Int])]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector b -> Vector [Int] -> Vector (b, [Int])
forall (v :: * -> *) a b.
(Vector v a, Vector v b, Vector v (a, b)) =>
v a -> v b -> v (a, b)
G.zip Vector b
regions (Vector [Int] -> Vector (b, [Int]))
-> (Int -> Vector [Int]) -> Int -> Vector (b, [Int])
forall b c a. (b -> c) -> (a -> b) -> a -> c
.
                  (Int -> [Int]) -> Vector Int -> Vector [Int]
forall (v :: * -> *) a b.
(Vector v a, Vector v b) =>
(a -> b) -> v a -> v b
G.map Int -> [Int]
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector Int -> Vector [Int])
-> (Int -> Vector Int) -> Int -> Vector [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Int -> Vector Int
forall (v :: * -> *) a. (Vector v a, Num a) => a -> Int -> v a
G.enumFromN Int
0 (Int -> HashMap ByteString (IntervalMap (Interval Int) [Int]))
-> Int -> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall a b. (a -> b) -> a -> b
$ Int
l
    addOne :: v (PrimState m) a -> t Int -> m ()
addOne v (PrimState m) a
v' = (Int -> m ()) -> t Int -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ ((Int -> m ()) -> t Int -> m ()) -> (Int -> m ()) -> t Int -> m ()
forall a b. (a -> b) -> a -> b
$ \Int
x -> v (PrimState m) a -> Int -> m a
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> m a
GM.unsafeRead v (PrimState m) a
v' Int
x m a -> (a -> m ()) -> m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= v (PrimState m) a -> Int -> a -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
GM.unsafeWrite v (PrimState m) a
v' Int
x (a -> m ()) -> (a -> a) -> a -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> a -> a
forall a. Num a => a -> a -> a
+a
1)
    l :: Int
l = Vector b -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
G.length Vector b
regions
{-# INLINE rpkmSortedBed #-}

-- | divide each region into consecutive bins, and count tags for each bin and
-- return the number of all tags. Note: a tag is considered to be overlapped
-- with a region only if the starting position of the tag is in the region. For
-- the common sense overlapping, use countTagsBinBed'.
countTagsBinBed :: (Integral a, PrimMonad m, G.Vector v a, BEDLike b)
           => Int   -- ^ bin size
           -> [b]   -- ^ regions
           -> ConduitT BED o m ([v a], Int)
countTagsBinBed :: Int -> [b] -> ConduitT BED o m ([v a], Int)
countTagsBinBed Int
k [b]
beds = do
    Vector (Mutable v (PrimState m) a, Int -> Int)
vs <- m (Vector (Mutable v (PrimState m) a, Int -> Int))
-> ConduitT
     BED o m (Vector (Mutable v (PrimState m) a, Int -> Int))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Vector (Mutable v (PrimState m) a, Int -> Int))
 -> ConduitT
      BED o m (Vector (Mutable v (PrimState m) a, Int -> Int)))
-> m (Vector (Mutable v (PrimState m) a, Int -> Int))
-> ConduitT
     BED o m (Vector (Mutable v (PrimState m) a, Int -> Int))
forall a b. (a -> b) -> a -> b
$ ([(Mutable v (PrimState m) a, Int -> Int)]
 -> Vector (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
-> m (Vector (Mutable v (PrimState m) a, Int -> Int))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [(Mutable v (PrimState m) a, Int -> Int)]
-> Vector (Mutable v (PrimState m) a, Int -> Int)
forall a. [a] -> Vector a
V.fromList (m [(Mutable v (PrimState m) a, Int -> Int)]
 -> m (Vector (Mutable v (PrimState m) a, Int -> Int)))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
-> m (Vector (Mutable v (PrimState m) a, Int -> Int))
forall a b. (a -> b) -> a -> b
$ [b]
-> (b -> m (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [b]
beds ((b -> m (Mutable v (PrimState m) a, Int -> Int))
 -> m [(Mutable v (PrimState m) a, Int -> Int)])
-> (b -> m (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
forall a b. (a -> b) -> a -> b
$ \b
bed -> do
        let start :: Int
start = b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromStart
            num :: Int
num = ((b
bedb -> Getting Int b Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b Int
forall b. BEDLike b => Lens' b Int
chromEnd) Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
k
            index :: Int -> Int
index Int
i = (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
k
        Mutable v (PrimState m) a
v <- Int -> a -> m (Mutable v (PrimState m) a)
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> a -> m (v (PrimState m) a)
GM.replicate Int
num a
0
        (Mutable v (PrimState m) a, Int -> Int)
-> m (Mutable v (PrimState m) a, Int -> Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Mutable v (PrimState m) a
v, Int -> Int
index)
    Int
nTags <- (Int -> BED -> m Int) -> Int -> ConduitT BED o m Int
forall (m :: * -> *) a b o.
Monad m =>
(a -> b -> m a) -> a -> ConduitT b o m a
foldMC (Vector (Mutable v (PrimState m) a, Int -> Int)
-> Int -> BED -> m Int
forall (m :: * -> *) (v :: * -> * -> *) a b s (v :: * -> *).
(PrimMonad m, Num a, Num b, BEDLike s,
 Vector v (v (PrimState m) a, Int -> Int), MVector v a) =>
v (v (PrimState m) a, Int -> Int) -> b -> s -> m b
f Vector (Mutable v (PrimState m) a, Int -> Int)
vs) Int
0
    [v a]
rc <- m [v a] -> ConduitT BED o m [v a]
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m [v a] -> ConduitT BED o m [v a])
-> m [v a] -> ConduitT BED o m [v a]
forall a b. (a -> b) -> a -> b
$ ((Mutable v (PrimState m) a, Int -> Int) -> m (v a))
-> [(Mutable v (PrimState m) a, Int -> Int)] -> m [v a]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (Mutable v (PrimState m) a -> m (v a)
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
G.unsafeFreeze (Mutable v (PrimState m) a -> m (v a))
-> ((Mutable v (PrimState m) a, Int -> Int)
    -> Mutable v (PrimState m) a)
-> (Mutable v (PrimState m) a, Int -> Int)
-> m (v a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Mutable v (PrimState m) a, Int -> Int)
-> Mutable v (PrimState m) a
forall a b. (a, b) -> a
fst) ([(Mutable v (PrimState m) a, Int -> Int)] -> m [v a])
-> [(Mutable v (PrimState m) a, Int -> Int)] -> m [v a]
forall a b. (a -> b) -> a -> b
$ Vector (Mutable v (PrimState m) a, Int -> Int)
-> [(Mutable v (PrimState m) a, Int -> Int)]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList Vector (Mutable v (PrimState m) a, Int -> Int)
vs
    ([v a], Int) -> ConduitT BED o m ([v a], Int)
forall (m :: * -> *) a. Monad m => a -> m a
return ([v a]
rc, Int
nTags)
  where
    f :: v (v (PrimState m) a, Int -> Int) -> b -> s -> m b
f v (v (PrimState m) a, Int -> Int)
vs b
n s
bed = do
        let pos :: Int
pos | s
beds -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
True = s
beds -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromStart
                | s
beds -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand Maybe Bool -> Maybe Bool -> Bool
forall a. Eq a => a -> a -> Bool
== Bool -> Maybe Bool
forall a. a -> Maybe a
Just Bool
False = s
beds -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromEnd Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1
                | Bool
otherwise = [Char] -> Int
forall a. HasCallStack => [Char] -> a
error [Char]
"unkown strand."
            overlaps :: [Int]
overlaps = [[Int]] -> [Int]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int]] -> [Int]) -> [[Int]] -> [Int]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) [Int] -> [[Int]]
forall k v. IntervalMap k v -> [v]
IM.elems (IntervalMap (Interval Int) [Int] -> [[Int]])
-> IntervalMap (Interval Int) [Int] -> [[Int]]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) [Int]
-> Int -> IntervalMap (Interval Int) [Int]
forall k e v.
Interval k e =>
IntervalMap k v -> e -> IntervalMap k v
IM.containing
                (IntervalMap (Interval Int) [Int]
-> ByteString
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
-> IntervalMap (Interval Int) [Int]
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
M.lookupDefault IntervalMap (Interval Int) [Int]
forall k v. IntervalMap k v
IM.empty (s
beds -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap) Int
pos
        [Int] -> (Int -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int]
overlaps ((Int -> m ()) -> m ()) -> (Int -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Int
x -> do
            let (v (PrimState m) a
v, Int -> Int
idxFn) = v (v (PrimState m) a, Int -> Int)
vs v (v (PrimState m) a, Int -> Int)
-> Int -> (v (PrimState m) a, Int -> Int)
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
`G.unsafeIndex` Int
x
                i :: Int
i = let i' :: Int
i' = Int -> Int
idxFn Int
pos
                        l :: Int
l = v (PrimState m) a -> Int
forall (v :: * -> * -> *) a s. MVector v a => v s a -> Int
GM.length v (PrimState m) a
v
                    in if Int
i' Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l then Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 else Int
i'
            v (PrimState m) a -> (a -> a) -> Int -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> (a -> a) -> Int -> m ()
GM.unsafeModify v (PrimState m) a
v (a -> a -> a
forall a. Num a => a -> a -> a
+a
1) Int
i
        b -> m b
forall (m :: * -> *) a. Monad m => a -> m a
return (b -> m b) -> b -> m b
forall a b. (a -> b) -> a -> b
$ b
n b -> b -> b
forall a. Num a => a -> a -> a
+ b
1
    intervalMap :: HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap = ([Int] -> [Int] -> [Int])
-> [(b, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall b a. BEDLike b => (a -> a -> a) -> [(b, a)] -> BEDTree a
bedToTree [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
(++) ([(b, [Int])]
 -> HashMap ByteString (IntervalMap (Interval Int) [Int]))
-> [(b, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall a b. (a -> b) -> a -> b
$ [b] -> [[Int]] -> [(b, [Int])]
forall a b. [a] -> [b] -> [(a, b)]
zip [b]
beds ([[Int]] -> [(b, [Int])]) -> [[Int]] -> [(b, [Int])]
forall a b. (a -> b) -> a -> b
$ (Int -> [Int]) -> [Int] -> [[Int]]
forall a b. (a -> b) -> [a] -> [b]
map Int -> [Int]
forall (m :: * -> *) a. Monad m => a -> m a
return [Int
0..]
{-# INLINE countTagsBinBed #-}

-- | Same as countTagsBinBed, except that tags are treated as complete intervals
-- instead of single points.
countTagsBinBed' :: (Integral a, PrimMonad m, G.Vector v a, BEDLike b1, BEDLike b2)
                 => Int   -- ^ bin size
                 -> [b1]   -- ^ regions
                 -> ConduitT b2 o m ([v a], Int)
countTagsBinBed' :: Int -> [b1] -> ConduitT b2 o m ([v a], Int)
countTagsBinBed' Int
k [b1]
beds = do
    [(Mutable v (PrimState m) a, Int -> Int)]
initRC <- m [(Mutable v (PrimState m) a, Int -> Int)]
-> ConduitT b2 o m [(Mutable v (PrimState m) a, Int -> Int)]
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m [(Mutable v (PrimState m) a, Int -> Int)]
 -> ConduitT b2 o m [(Mutable v (PrimState m) a, Int -> Int)])
-> m [(Mutable v (PrimState m) a, Int -> Int)]
-> ConduitT b2 o m [(Mutable v (PrimState m) a, Int -> Int)]
forall a b. (a -> b) -> a -> b
$ [b1]
-> (b1 -> m (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
t a -> (a -> m b) -> m (t b)
forM [b1]
beds ((b1 -> m (Mutable v (PrimState m) a, Int -> Int))
 -> m [(Mutable v (PrimState m) a, Int -> Int)])
-> (b1 -> m (Mutable v (PrimState m) a, Int -> Int))
-> m [(Mutable v (PrimState m) a, Int -> Int)]
forall a b. (a -> b) -> a -> b
$ \b1
bed -> do
        let start :: Int
start = b1
bedb1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromStart
            end :: Int
end = b1
bedb1 -> Getting Int b1 Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int b1 Int
forall b. BEDLike b => Lens' b Int
chromEnd
            num :: Int
num = (Int
end Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
k
            index :: Int -> Int
index Int
i = (Int
i Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
start) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
k
        Mutable v (PrimState m) a
v <- Int -> a -> m (Mutable v (PrimState m) a)
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> a -> m (v (PrimState m) a)
GM.replicate Int
num a
0
        (Mutable v (PrimState m) a, Int -> Int)
-> m (Mutable v (PrimState m) a, Int -> Int)
forall (m :: * -> *) a. Monad m => a -> m a
return (Mutable v (PrimState m) a
v, Int -> Int
index)

    Int
-> Vector (Mutable v (PrimState m) a, Int -> Int)
-> ConduitT b2 o m ([v a], Int)
forall (m :: * -> *) a b (v :: * -> *) (v :: * -> *) i o.
(PrimMonad m, Num a, Num b, Vector v a, BEDLike i,
 Vector v (Mutable v (PrimState m) a, Int -> Int)) =>
b
-> v (Mutable v (PrimState m) a, Int -> Int)
-> ConduitT i o m ([v a], b)
sink Int
0 (Vector (Mutable v (PrimState m) a, Int -> Int)
 -> ConduitT b2 o m ([v a], Int))
-> Vector (Mutable v (PrimState m) a, Int -> Int)
-> ConduitT b2 o m ([v a], Int)
forall a b. (a -> b) -> a -> b
$ [(Mutable v (PrimState m) a, Int -> Int)]
-> Vector (Mutable v (PrimState m) a, Int -> Int)
forall a. [a] -> Vector a
V.fromList [(Mutable v (PrimState m) a, Int -> Int)]
initRC
  where
    sink :: b
-> v (Mutable v (PrimState m) a, Int -> Int)
-> ConduitT i o m ([v a], b)
sink !b
nTags v (Mutable v (PrimState m) a, Int -> Int)
vs = do
        Maybe i
tag <- ConduitT i o m (Maybe i)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await
        case Maybe i
tag of
            Just i
bed -> do
                let chr :: ByteString
chr = i
bedi -> Getting ByteString i ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString i ByteString
forall b. BEDLike b => Lens' b ByteString
chrom
                    start :: Int
start = i
bedi -> Getting Int i Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int i Int
forall b. BEDLike b => Lens' b Int
chromStart
                    end :: Int
end = i
bedi -> Getting Int i Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int i Int
forall b. BEDLike b => Lens' b Int
chromEnd
                    overlaps :: [Int]
overlaps = [[Int]] -> [Int]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[Int]] -> [Int]) -> [[Int]] -> [Int]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) [Int] -> [[Int]]
forall k v. IntervalMap k v -> [v]
IM.elems (IntervalMap (Interval Int) [Int] -> [[Int]])
-> IntervalMap (Interval Int) [Int] -> [[Int]]
forall a b. (a -> b) -> a -> b
$ IntervalMap (Interval Int) [Int]
-> Interval Int -> IntervalMap (Interval Int) [Int]
forall k e v.
Interval k e =>
IntervalMap k v -> k -> IntervalMap k v
IM.intersecting
                        (IntervalMap (Interval Int) [Int]
-> ByteString
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
-> IntervalMap (Interval Int) [Int]
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
M.lookupDefault IntervalMap (Interval Int) [Int]
forall k v. IntervalMap k v
IM.empty ByteString
chr HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap) (Interval Int -> IntervalMap (Interval Int) [Int])
-> Interval Int -> IntervalMap (Interval Int) [Int]
forall a b. (a -> b) -> a -> b
$ Int -> Int -> Interval Int
forall a. a -> a -> Interval a
IM.IntervalCO Int
start Int
end
                m () -> ConduitT i o m ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m () -> ConduitT i o m ()) -> m () -> ConduitT i o m ()
forall a b. (a -> b) -> a -> b
$ [Int] -> (Int -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int]
overlaps ((Int -> m ()) -> m ()) -> (Int -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Int
x -> do
                    let (Mutable v (PrimState m) a
v, Int -> Int
idxFn) = v (Mutable v (PrimState m) a, Int -> Int)
vs v (Mutable v (PrimState m) a, Int -> Int)
-> Int -> (Mutable v (PrimState m) a, Int -> Int)
forall (v :: * -> *) a. Vector v a => v a -> Int -> a
`G.unsafeIndex` Int
x
                        lo :: Int
lo = let i :: Int
i = Int -> Int
idxFn Int
start
                             in if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
0 then Int
0 else Int
i
                        hi :: Int
hi = let i :: Int
i = Int -> Int
idxFn Int
end
                                 l :: Int
l = Mutable v (PrimState m) a -> Int
forall (v :: * -> * -> *) a s. MVector v a => v s a -> Int
GM.length Mutable v (PrimState m) a
v
                             in if Int
i Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int
l then Int
l Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1 else Int
i
                    [Int] -> (Int -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ [Int
lo..Int
hi] ((Int -> m ()) -> m ()) -> (Int -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Int
i ->
                        Mutable v (PrimState m) a -> Int -> m a
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> m a
GM.unsafeRead Mutable v (PrimState m) a
v Int
i m a -> (a -> m ()) -> m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Mutable v (PrimState m) a -> Int -> a -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
GM.unsafeWrite Mutable v (PrimState m) a
v Int
i (a -> m ()) -> (a -> a) -> a -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> a -> a
forall a. Num a => a -> a -> a
+a
1)
                b
-> v (Mutable v (PrimState m) a, Int -> Int)
-> ConduitT i o m ([v a], b)
sink (b
nTagsb -> b -> b
forall a. Num a => a -> a -> a
+b
1) v (Mutable v (PrimState m) a, Int -> Int)
vs

            Maybe i
_ -> do [v a]
rc <- m [v a] -> ConduitT i o m [v a]
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m [v a] -> ConduitT i o m [v a])
-> m [v a] -> ConduitT i o m [v a]
forall a b. (a -> b) -> a -> b
$ ((Mutable v (PrimState m) a, Int -> Int) -> m (v a))
-> [(Mutable v (PrimState m) a, Int -> Int)] -> m [v a]
forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (Mutable v (PrimState m) a -> m (v a)
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
G.unsafeFreeze (Mutable v (PrimState m) a -> m (v a))
-> ((Mutable v (PrimState m) a, Int -> Int)
    -> Mutable v (PrimState m) a)
-> (Mutable v (PrimState m) a, Int -> Int)
-> m (v a)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Mutable v (PrimState m) a, Int -> Int)
-> Mutable v (PrimState m) a
forall a b. (a, b) -> a
fst) ([(Mutable v (PrimState m) a, Int -> Int)] -> m [v a])
-> [(Mutable v (PrimState m) a, Int -> Int)] -> m [v a]
forall a b. (a -> b) -> a -> b
$ v (Mutable v (PrimState m) a, Int -> Int)
-> [(Mutable v (PrimState m) a, Int -> Int)]
forall (v :: * -> *) a. Vector v a => v a -> [a]
G.toList v (Mutable v (PrimState m) a, Int -> Int)
vs
                    ([v a], b) -> ConduitT i o m ([v a], b)
forall (m :: * -> *) a. Monad m => a -> m a
return ([v a]
rc, b
nTags)

    intervalMap :: HashMap ByteString (IntervalMap (Interval Int) [Int])
intervalMap = ([Int] -> [Int] -> [Int])
-> [(b1, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall b a. BEDLike b => (a -> a -> a) -> [(b, a)] -> BEDTree a
bedToTree [Int] -> [Int] -> [Int]
forall a. [a] -> [a] -> [a]
(++) ([(b1, [Int])]
 -> HashMap ByteString (IntervalMap (Interval Int) [Int]))
-> [(b1, [Int])]
-> HashMap ByteString (IntervalMap (Interval Int) [Int])
forall a b. (a -> b) -> a -> b
$ [b1] -> [[Int]] -> [(b1, [Int])]
forall a b. [a] -> [b] -> [(a, b)]
zip [b1]
beds ([[Int]] -> [(b1, [Int])]) -> [[Int]] -> [(b1, [Int])]
forall a b. (a -> b) -> a -> b
$ (Int -> [Int]) -> [Int] -> [[Int]]
forall a b. (a -> b) -> [a] -> [b]
map Int -> [Int]
forall (m :: * -> *) a. Monad m => a -> m a
return [Int
0..]
{-# INLINE countTagsBinBed' #-}

tagCountDistr :: PrimMonad m => G.Vector v Int => ConduitT BED o m (v Int)
tagCountDistr :: ConduitT BED o m (v Int)
tagCountDistr = HashMap ByteString (HashMap Int Int) -> ConduitT BED o m (v Int)
forall (m :: * -> *) s (v :: * -> *) a o.
(PrimMonad m, Num a, Vector v a, BEDLike s) =>
HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
loop HashMap ByteString (HashMap Int Int)
forall k v. HashMap k v
M.empty
  where
    loop :: HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
loop HashMap ByteString (HashMap Int Int)
m = do
        Maybe s
x <- ConduitT s o m (Maybe s)
forall (m :: * -> *) i. Monad m => Consumer i m (Maybe i)
await
        case Maybe s
x of
            Just s
bed -> do
                let p :: Int
p | Bool -> Maybe Bool -> Bool
forall a. a -> Maybe a -> a
fromMaybe Bool
True (s
beds -> Getting (Maybe Bool) s (Maybe Bool) -> Maybe Bool
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Bool) s (Maybe Bool)
forall b. BEDLike b => Lens' b (Maybe Bool)
strand) = s
beds -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromStart
                      | Bool
otherwise = Int
1 Int -> Int -> Int
forall a. Num a => a -> a -> a
- s
beds -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromEnd
                case ByteString
-> HashMap ByteString (HashMap Int Int) -> Maybe (HashMap Int Int)
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (s
beds -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) HashMap ByteString (HashMap Int Int)
m of
                    Just HashMap Int Int
table -> HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
loop (HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a))
-> HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
forall a b. (a -> b) -> a -> b
$ ByteString
-> HashMap Int Int
-> HashMap ByteString (HashMap Int Int)
-> HashMap ByteString (HashMap Int Int)
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.insert (s
beds -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) ((Int -> Int -> Int)
-> Int -> Int -> HashMap Int Int -> HashMap Int Int
forall k v.
(Eq k, Hashable k) =>
(v -> v -> v) -> k -> v -> HashMap k v -> HashMap k v
M.insertWith Int -> Int -> Int
forall a. Num a => a -> a -> a
(+) Int
p Int
1 HashMap Int Int
table) HashMap ByteString (HashMap Int Int)
m
                    Maybe (HashMap Int Int)
_ -> HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
loop (HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a))
-> HashMap ByteString (HashMap Int Int) -> ConduitT s o m (v a)
forall a b. (a -> b) -> a -> b
$ ByteString
-> HashMap Int Int
-> HashMap ByteString (HashMap Int Int)
-> HashMap ByteString (HashMap Int Int)
forall k v.
(Eq k, Hashable k) =>
k -> v -> HashMap k v -> HashMap k v
M.insert (s
beds -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) ([(Int, Int)] -> HashMap Int Int
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList [(Int
p,Int
1)]) HashMap ByteString (HashMap Int Int)
m
            Maybe s
_ -> m (v a) -> ConduitT s o m (v a)
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (v a) -> ConduitT s o m (v a))
-> m (v a) -> ConduitT s o m (v a)
forall a b. (a -> b) -> a -> b
$ do
                Mutable v (PrimState m) a
vec <- Int -> a -> m (Mutable v (PrimState m) a)
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
Int -> a -> m (v (PrimState m) a)
GM.replicate Int
100 a
0
                HashMap ByteString (HashMap Int Int)
-> (HashMap Int Int -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
F.forM_ HashMap ByteString (HashMap Int Int)
m ((HashMap Int Int -> m ()) -> m ())
-> (HashMap Int Int -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \HashMap Int Int
table ->
                    HashMap Int Int -> (Int -> m ()) -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
F.forM_ HashMap Int Int
table ((Int -> m ()) -> m ()) -> (Int -> m ()) -> m ()
forall a b. (a -> b) -> a -> b
$ \Int
v -> do
                        let i :: Int
i = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
99 Int
v
                        Mutable v (PrimState m) a -> Int -> m a
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> m a
GM.unsafeRead Mutable v (PrimState m) a
vec Int
i m a -> (a -> m ()) -> m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Mutable v (PrimState m) a -> Int -> a -> m ()
forall (m :: * -> *) (v :: * -> * -> *) a.
(PrimMonad m, MVector v a) =>
v (PrimState m) a -> Int -> a -> m ()
GM.unsafeWrite Mutable v (PrimState m) a
vec Int
i (a -> m ()) -> (a -> a) -> a -> m ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (a -> a -> a
forall a. Num a => a -> a -> a
+a
1)
                Mutable v (PrimState m) a -> m (v a)
forall (m :: * -> *) (v :: * -> *) a.
(PrimMonad m, Vector v a) =>
Mutable v (PrimState m) a -> m (v a)
G.unsafeFreeze Mutable v (PrimState m) a
vec
{-# INLINE tagCountDistr #-}

-- | cluster peaks
peakCluster :: (BEDLike b, Monad m)
            => [b]   -- ^ peaks
            -> Int   -- ^ radius
            -> Int   -- ^ cutoff
            -> ConduitT o BED m ()
peakCluster :: [b] -> Int -> Int -> ConduitT o BED m ()
peakCluster [b]
peaks Int
r Int
th = ([BED3] -> BED) -> [BED3] -> ConduitT o BED m ()
forall b (m :: * -> *) a i.
(BEDLike b, Monad m) =>
([b] -> a) -> [b] -> ConduitT i a m ()
mergeBedWith [BED3] -> BED
forall b s. (BEDConvert b, BEDLike s) => [s] -> b
mergeFn [BED3]
peaks' ConduitT o BED m () -> ConduitM BED BED m () -> ConduitT o BED m ()
forall (m :: * -> *) a b c r.
Monad m =>
ConduitM a b m () -> ConduitM b c m r -> ConduitM a c m r
.| (BED -> Bool) -> ConduitM BED BED m ()
forall (m :: * -> *) a. Monad m => (a -> Bool) -> ConduitT a a m ()
filterC BED -> Bool
forall s. BEDLike s => s -> Bool
g
  where
    peaks' :: [BED3]
peaks' = (b -> BED3) -> [b] -> [BED3]
forall a b. (a -> b) -> [a] -> [b]
map b -> BED3
forall s. BEDLike s => s -> BED3
f [b]
peaks
    f :: s -> BED3
f s
b = let c :: Int
c = (s
bs -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromStart Int -> Int -> Int
forall a. Num a => a -> a -> a
+ s
bs -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromEnd) Int -> Int -> Int
forall a. Integral a => a -> a -> a
`div` Int
2
          in ByteString -> Int -> Int -> BED3
forall b. BEDConvert b => ByteString -> Int -> Int -> b
asBed (s
bs -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^.Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) (Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
-Int
r) (Int
cInt -> Int -> Int
forall a. Num a => a -> a -> a
+Int
r) :: BED3
    mergeFn :: [s] -> b
mergeFn [s]
xs = ByteString -> Int -> Int -> b
forall b. BEDConvert b => ByteString -> Int -> Int -> b
asBed ([s] -> s
forall a. [a] -> a
head [s]
xs s -> Getting ByteString s ByteString -> ByteString
forall s a. s -> Getting a s a -> a
^. Getting ByteString s ByteString
forall b. BEDLike b => Lens' b ByteString
chrom) Int
lo Int
hi b -> (b -> b) -> b
forall a b. a -> (a -> b) -> b
& (Maybe Int -> Identity (Maybe Int)) -> b -> Identity b
forall b. BEDLike b => Lens' b (Maybe Int)
score ((Maybe Int -> Identity (Maybe Int)) -> b -> Identity b)
-> Maybe Int -> b -> b
forall s t a b. ASetter s t a b -> b -> s -> t
.~ Int -> Maybe Int
forall a. a -> Maybe a
Just (Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int) -> Int -> Int
forall a b. (a -> b) -> a -> b
$ [s] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [s]
xs)
      where
        lo :: Int
lo = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
minimum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (s -> Int) -> [s] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (s -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromStart) [s]
xs
        hi :: Int
hi = [Int] -> Int
forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum ([Int] -> Int) -> [Int] -> Int
forall a b. (a -> b) -> a -> b
$ (s -> Int) -> [s] -> [Int]
forall a b. (a -> b) -> [a] -> [b]
map (s -> Getting Int s Int -> Int
forall s a. s -> Getting a s a -> a
^.Getting Int s Int
forall b. BEDLike b => Lens' b Int
chromEnd) [s]
xs
    g :: s -> Bool
g s
b = Maybe Int -> Int
forall a. HasCallStack => Maybe a -> a
fromJust (s
bs -> Getting (Maybe Int) s (Maybe Int) -> Maybe Int
forall s a. s -> Getting a s a -> a
^.Getting (Maybe Int) s (Maybe Int)
forall b. BEDLike b => Lens' b (Maybe Int)
score) Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
>= Int -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
th
{-# INLINE peakCluster #-}