Safe Haskell	None
Language	Haskell2010

Bio.Data.Bed.Utils

Synopsis

fetchSeq :: (BioSeq DNA a, MonadIO m) => Genome -> ConduitT BED (Either String (DNA a)) m ()
fetchSeq' :: (BioSeq DNA a, MonadIO m) => Genome -> [BED] -> m [Either String (DNA a)]
motifScan :: (BEDLike b, MonadIO m) => Genome -> [Motif] -> Bkgd -> Double -> ConduitT b BED m ()
getMotifScore :: MonadIO m => Genome -> [Motif] -> Bkgd -> ConduitT BED BED m ()
getMotifPValue :: Monad m => Maybe Double -> [Motif] -> Bkgd -> ConduitT BED BED m ()
monoColonalize :: Monad m => ConduitT BED BED m ()
newtype BaseMap = BaseMap (HashMap ByteString BitVector)
baseMap :: PrimMonad m => [(ByteString, Int)] -> ConduitT BED o m BaseMap
queryBaseMap :: BEDLike b => b -> BaseMap -> Maybe [Bool]
rpkmBed :: (PrimMonad m, BEDLike b, Vector v Double) => [b] -> ConduitT BED o m (v Double)
rpkmSortedBed :: (PrimMonad m, BEDLike b, Vector v Double) => Sorted (Vector b) -> ConduitT BED o m (v Double)
countTagsBinBed :: (Integral a, PrimMonad m, Vector v a, BEDLike b) => Int -> [b] -> ConduitT BED o m ([v a], Int)
countTagsBinBed' :: (Integral a, PrimMonad m, Vector v a, BEDLike b1, BEDLike b2) => Int -> [b1] -> ConduitT b2 o m ([v a], Int)
tagCountDistr :: PrimMonad m => Vector v Int => ConduitT BED o m (v Int)
peakCluster :: (BEDLike b, Monad m) => [b] -> Int -> Int -> ConduitT o BED m ()

Documentation

fetchSeq :: (BioSeq DNA a, MonadIO m) => Genome -> ConduitT BED (Either String (DNA a)) m () Source #

retreive sequences

fetchSeq' :: (BioSeq DNA a, MonadIO m) => Genome -> [BED] -> m [Either String (DNA a)] Source #

motifScan :: (BEDLike b, MonadIO m) => Genome -> [Motif] -> Bkgd -> Double -> ConduitT b BED m () Source #

Identify motif binding sites

getMotifScore :: MonadIO m => Genome -> [Motif] -> Bkgd -> ConduitT BED BED m () Source #

Retrieve motif matching scores

getMotifPValue Source #

Arguments

:: Monad m
=> Maybe Double	whether to truncate the motif score CDF. Doing this will significantly reduce memory usage without sacrifice accuracy.
-> [Motif]
-> Bkgd
-> ConduitT BED BED m ()

monoColonalize :: Monad m => ConduitT BED BED m () Source #

process a sorted BED stream, keep only mono-colonal tags

newtype BaseMap Source #

Constructors

BaseMap (HashMap ByteString BitVector)

baseMap Source #

Arguments

:: PrimMonad m
=> [(ByteString, Int)]	chromosomes and their sizes
-> ConduitT BED o m BaseMap

Count the tags (starting positions) at each position in the genome.

queryBaseMap :: BEDLike b => b -> BaseMap -> Maybe [Bool] Source #

rpkmBed :: (PrimMonad m, BEDLike b, Vector v Double) => [b] -> ConduitT BED o m (v Double) Source #

calculate RPKM on a set of unique regions. Regions (in bed format) would be kept in memory but not tag file. RPKM: Readcounts per kilobase per million reads. Only counts the starts of tags

rpkmSortedBed :: (PrimMonad m, BEDLike b, Vector v Double) => Sorted (Vector b) -> ConduitT BED o m (v Double) Source #

calculate RPKM on a set of regions. Regions must be sorted. The Sorted data type is used to remind users to sort their data.

countTagsBinBed Source #

Arguments

:: (Integral a, PrimMonad m, Vector v a, BEDLike b)
=> Int	bin size
-> [b]	regions
-> ConduitT BED o m ([v a], Int)

divide each region into consecutive bins, and count tags for each bin and return the number of all tags. Note: a tag is considered to be overlapped with a region only if the starting position of the tag is in the region. For the common sense overlapping, use countTagsBinBed'.

countTagsBinBed' Source #

Arguments

:: (Integral a, PrimMonad m, Vector v a, BEDLike b1, BEDLike b2)
=> Int	bin size
-> [b1]	regions
-> ConduitT b2 o m ([v a], Int)

Same as countTagsBinBed, except that tags are treated as complete intervals instead of single points.

tagCountDistr :: PrimMonad m => Vector v Int => ConduitT BED o m (v Int) Source #

peakCluster Source #

Arguments

:: (BEDLike b, Monad m)
=> [b]	peaks
-> Int	radius
-> Int	cutoff
-> ConduitT o BED m ()

cluster peaks