Safe Haskell	None
Language	Haskell2010

Bio.Motif

Contents

References

Synopsis

data PWM = PWM {
- _nSites :: !(Maybe Int)
- _mat :: !(Matrix Double)
}
size :: PWM -> Int
subPWM :: Int -> Int -> PWM -> PWM
rcPWM :: PWM -> PWM
gcContentPWM :: PWM -> Double
data Motif = Motif {
- _name :: !ByteString
- _pwm :: !PWM
}
newtype Bkgd = BG (Double, Double, Double, Double)
toPWM :: [ByteString] -> PWM
ic :: PWM -> Int -> Double
scores :: Bkgd -> PWM -> DNA a -> [Double]
scores' :: Monad m => Bkgd -> PWM -> DNA a -> ConduitT i Double m ()
score :: Bkgd -> PWM -> DNA a -> Double
optimalScore :: Bkgd -> PWM -> Double
newtype CDF = CDF (Vector (Double, Double))
cdf :: CDF -> Double -> Double
cdf' :: CDF -> Double -> Double
truncateCDF :: Double -> CDF -> CDF
scoreCDF :: Bkgd -> PWM -> CDF
pValueToScore :: Double -> Bkgd -> PWM -> Double
pValueToScoreExact :: Double -> Bkgd -> PWM -> Double
toIUPAC :: PWM -> DNA IUPAC
readMEME :: FilePath -> IO [Motif]
toMEME :: [Motif] -> Bkgd -> ByteString
fromMEME :: ByteString -> [Motif]
writeMEME :: FilePath -> [Motif] -> Bkgd -> IO ()
writeFasta :: FilePath -> [Motif] -> IO ()

Documentation

data PWM Source #

k x 4 position weight matrix for motifs

Constructors

PWM
Fields _nSites :: !(Maybe Int) number of sites used to generate this matrix _mat :: !(Matrix Double)

Instances

Read PWM Source #
Instance details Defined in Bio.Motif Methods readsPrec :: Int -> ReadS PWM # readList :: ReadS [PWM] # readPrec :: ReadPrec PWM # readListPrec :: ReadPrec [PWM] #
Show PWM Source #
Instance details Defined in Bio.Motif Methods showsPrec :: Int -> PWM -> ShowS # show :: PWM -> String # showList :: [PWM] -> ShowS #

size :: PWM -> Int Source #

subPWM :: Int -> Int -> PWM -> PWM Source #

Extract sub-PWM given starting position and length, zero indexed.

rcPWM :: PWM -> PWM Source #

Reverse complementary of PWM.

gcContentPWM :: PWM -> Double Source #

GC content of PWM.

data Motif Source #

Constructors

Motif
Fields _name :: !ByteString _pwm :: !PWM

Instances

Read Motif Source #
Instance details Defined in Bio.Motif Methods readsPrec :: Int -> ReadS Motif # readList :: ReadS [Motif] # readPrec :: ReadPrec Motif # readListPrec :: ReadPrec [Motif] #
Show Motif Source #
Instance details Defined in Bio.Motif Methods showsPrec :: Int -> Motif -> ShowS # show :: Motif -> String # showList :: [Motif] -> ShowS #
FastaLike Motif Source #
Instance details Defined in Bio.Data.Fasta Methods fromFastaRecord :: (ByteString, [ByteString]) -> Motif Source # readFasta :: FilePath -> ConduitT i Motif (ResourceT IO) () Source # readFasta' :: FilePath -> IO [Motif] Source #

newtype Bkgd Source #

background model which consists of single nucletide frequencies, and di-nucletide frequencies.

Constructors

BG (Double, Double, Double, Double)

Instances

Default Bkgd Source #
Instance details Defined in Bio.Motif Methods def :: Bkgd #

toPWM :: [ByteString] -> PWM Source #

Get pwm from a matrix.

ic :: PWM -> Int -> Double Source #

Information content of a poistion in pwm. (Not implemented)

scores :: Bkgd -> PWM -> DNA a -> [Double] Source #

Get scores of a long sequences at each position.

scores' :: Monad m => Bkgd -> PWM -> DNA a -> ConduitT i Double m () Source #

A streaming version of scores.

score :: Bkgd -> PWM -> DNA a -> Double Source #

optimalScore :: Bkgd -> PWM -> Double Source #

The best possible matching score of a pwm.

newtype CDF Source #

The cumulative distribution function in the form of (x, P(X <= x)).

Constructors

CDF (Vector (Double, Double))

Instances

Read CDF Source #
Instance details Defined in Bio.Motif Methods readsPrec :: Int -> ReadS CDF # readList :: ReadS [CDF] # readPrec :: ReadPrec CDF # readListPrec :: ReadPrec [CDF] #
Show CDF Source #
Instance details Defined in Bio.Motif Methods showsPrec :: Int -> CDF -> ShowS # show :: CDF -> String # showList :: [CDF] -> ShowS #

cdf :: CDF -> Double -> Double Source #

cdf' :: CDF -> Double -> Double Source #

The inverse of cdf.

truncateCDF :: Double -> CDF -> CDF Source #

Truncate the CDF by a value, in order to reduce the memory usage.

scoreCDF :: Bkgd -> PWM -> CDF Source #

Approximate the cdf of motif matching scores using dynamic programming. Algorithm: Scan the PWM from left to right. For each position $i$, compute a score density function $s_i$ such that $s_i(x)$ is the total number of sequences with score $x$.

pValueToScore :: Double -> Bkgd -> PWM -> Double Source #

calculate the minimum motif mathching score that would produce a kmer with p-Value less than the given number. This score would then be used to search for motif occurrences with significant p-Value

pValueToScoreExact Source #

Arguments

:: Double	desirable p-Value
-> Bkgd
-> PWM
-> Double

Unlike pValueToScore, this version compute the exact score but much slower and is inpractical for long motifs.

toIUPAC :: PWM -> DNA IUPAC Source #

Convert pwm to consensus sequence, see D. R. Cavener (1987).

readMEME :: FilePath -> IO [Motif] Source #

toMEME :: [Motif] -> Bkgd -> ByteString Source #

fromMEME :: ByteString -> [Motif] Source #

writeMEME :: FilePath -> [Motif] -> Bkgd -> IO () Source #

writeFasta :: FilePath -> [Motif] -> IO () Source #

References

Douglas R. Cavener. (1987) Comparison of the consensus sequence flanking translational start sites in Drosophila and vertebrates. Nucleic Acids Research 15 (4): 1353–1361. http://nar.oxfordjournals.org/content/15/4/1353