bioinformatics-toolkit-0.3.1: A collection of bioinformatics tools

Safe HaskellNone
LanguageHaskell2010

Bio.Data.Bed

Contents

Synopsis

Documentation

class BEDLike b where Source #

A class representing BED-like data, e.g., BED3, BED6 and BED12. BED format uses 0-based index (see documentation).

Methods

asBed :: ByteString -> Int -> Int -> b Source #

Construct bed record from chromsomoe, start location and end location

fromLine :: ByteString -> b Source #

Convert bytestring to bed format

toLine :: b -> ByteString Source #

Convert bed to bytestring

chrom :: b -> ByteString Source #

Field accessor

chromStart :: b -> Int Source #

chromEnd :: b -> Int Source #

bedName :: b -> Maybe ByteString Source #

bedScore :: b -> Maybe Double Source #

bedStrand :: b -> Maybe Bool Source #

convert :: BEDLike b' => b' -> b Source #

bedSize :: b -> Int Source #

Return the size of a bed region.

BED6 format

BED3 format

NarrowPeak format

bedToTree :: BEDLike b => (a -> a -> a) -> [(b, a)] -> BEDTree a Source #

sortedBedToTree :: (BEDLike b, Foldable f) => (a -> a -> a) -> Sorted (f (b, a)) -> BEDTree a Source #

convert a set of bed records to interval tree, with combining function for equal keys

sizeOverlapped :: (BEDLike b1, BEDLike b2) => b1 -> b2 -> Int Source #

splitBed :: BEDLike b => Int -> b -> [b] Source #

split a bed region into k consecutive subregions, discarding leftovers

splitBedBySize :: BEDLike b => Int -> b -> [b] Source #

split a bed region into consecutive fixed size subregions, discarding leftovers

splitBedBySizeLeft :: BEDLike b => Int -> b -> [b] Source #

split a bed region into consecutive fixed size subregions, including leftovers

splitBedBySizeOverlap Source #

Arguments

:: BEDLike b 
=> Int

bin size

-> Int

overlap size

-> b 
-> [b] 

newtype Sorted b Source #

a type to imply that underlying data structure is sorted

Constructors

Sorted 

Fields

Instances

Eq b => Eq (Sorted b) Source # 

Methods

(==) :: Sorted b -> Sorted b -> Bool #

(/=) :: Sorted b -> Sorted b -> Bool #

Read b => Read (Sorted b) Source # 
Show b => Show (Sorted b) Source # 

Methods

showsPrec :: Int -> Sorted b -> ShowS #

show :: Sorted b -> String #

showList :: [Sorted b] -> ShowS #

sortBed :: BEDLike b => [b] -> Sorted (Vector b) Source #

sort BED, first by chromosome (alphabetical order), then by chromStart, last by chromEnd

intersectBed :: (BEDLike b1, BEDLike b2, Monad m) => [b2] -> Conduit b1 m b1 Source #

return records in A that are overlapped with records in B

intersectBedWith :: (BEDLike b1, BEDLike b2, Monad m) => ([b2] -> a) -> [b2] -> Conduit b1 m (b1, a) Source #

intersectSortedBed :: (BEDLike b1, BEDLike b2, Monad m) => Sorted (Vector b2) -> Conduit b1 m b1 Source #

return records in A that are overlapped with records in B

intersectSortedBedWith :: (BEDLike b1, BEDLike b2, Monad m) => ([b2] -> a) -> Sorted (Vector b2) -> Conduit b1 m (b1, a) Source #

isOverlapped :: (BEDLike b1, BEDLike b2) => b1 -> b2 -> Bool Source #

mergeBed :: (BEDLike b, Monad m) => [b] -> Source m b Source #

mergeBedWith :: (BEDLike b, Monad m) => ([b] -> a) -> [b] -> Source m a Source #

mergeSortedBedWith :: (BEDLike b, Monad m) => ([b] -> a) -> Sorted (Vector b) -> Source m a Source #

splitOverlapped :: BEDLike b => ([b] -> a) -> [b] -> [(BED3, a)] Source #

Split overlapped regions into non-overlapped regions. The input must be overlapped. This function is usually used with mergeBedWith.

hReadBed :: (BEDLike b, MonadIO m) => Handle -> Source m b Source #

Read records from a bed file handler in a streaming fashion.

hReadBed' :: (BEDLike b, MonadIO m) => Handle -> m [b] Source #

Non-streaming version.

readBed :: (BEDLike b, MonadIO m) => FilePath -> Source m b Source #

Read records from a bed file in a streaming fashion.

readBed' :: (BEDLike b, MonadIO m) => FilePath -> m [b] Source #

Non-streaming version.

hWriteBed :: (BEDLike b, MonadIO m) => Handle -> Sink b m () Source #

hWriteBed' :: (BEDLike b, MonadIO m) => Handle -> [b] -> m () Source #

writeBed :: (BEDLike b, MonadIO m) => FilePath -> Sink b m () Source #

writeBed' :: (BEDLike b, MonadIO m) => FilePath -> [b] -> m () Source #

Utilities

fetchSeq :: (BioSeq DNA a, MonadIO m) => Genome -> Conduit BED m (Either String (DNA a)) Source #

retreive sequences

fetchSeq' :: (BioSeq DNA a, MonadIO m) => Genome -> [BED] -> m [Either String (DNA a)] Source #

motifScan :: (BEDLike b, MonadIO m) => Genome -> [Motif] -> Bkgd -> Double -> Conduit b m BED Source #

Identify motif binding sites

getMotifScore :: MonadIO m => Genome -> [Motif] -> Bkgd -> Conduit BED m BED Source #

Retrieve motif matching scores

getMotifPValue Source #

Arguments

:: Monad m 
=> Maybe Double

whether to truncate the motif score CDF. Doing this will significantly reduce memory usage without sacrifice accuracy.

-> [Motif] 
-> Bkgd 
-> Conduit BED m BED 

compareBed :: (BEDLike b1, BEDLike b2) => b1 -> b2 -> Ordering Source #