Safe Haskell | None |
---|---|
Language | Haskell2010 |
Synopsis
- data PWM = PWM {}
- size :: PWM -> Int
- subPWM :: Int -> Int -> PWM -> PWM
- rcPWM :: PWM -> PWM
- gcContentPWM :: PWM -> Double
- data Motif = Motif {
- _name :: !ByteString
- _pwm :: !PWM
- newtype Bkgd = BG (Double, Double, Double, Double)
- toPWM :: [ByteString] -> PWM
- ic :: PWM -> Int -> Double
- scores :: Bkgd -> PWM -> DNA a -> [Double]
- scores' :: Monad m => Bkgd -> PWM -> DNA a -> ConduitT i Double m ()
- score :: Bkgd -> PWM -> DNA a -> Double
- optimalScore :: Bkgd -> PWM -> Double
- newtype CDF = CDF (Vector (Double, Double))
- cdf :: CDF -> Double -> Double
- cdf' :: CDF -> Double -> Double
- truncateCDF :: Double -> CDF -> CDF
- scoreCDF :: Bkgd -> PWM -> CDF
- pValueToScore :: Double -> Bkgd -> PWM -> Double
- pValueToScoreExact :: Double -> Bkgd -> PWM -> Double
- toIUPAC :: PWM -> DNA IUPAC
- readMEME :: FilePath -> IO [Motif]
- toMEME :: [Motif] -> Bkgd -> ByteString
- fromMEME :: ByteString -> [Motif]
- writeMEME :: FilePath -> [Motif] -> Bkgd -> IO ()
- writeFasta :: FilePath -> [Motif] -> IO ()
Documentation
k x 4 position weight matrix for motifs
subPWM :: Int -> Int -> PWM -> PWM Source #
Extract sub-PWM given starting position and length, zero indexed.
gcContentPWM :: PWM -> Double Source #
GC content of PWM.
Motif | |
|
background model which consists of single nucletide frequencies, and di-nucletide frequencies.
toPWM :: [ByteString] -> PWM Source #
Get pwm from a matrix.
scores' :: Monad m => Bkgd -> PWM -> DNA a -> ConduitT i Double m () Source #
A streaming version of scores.
The cumulative distribution function in the form of (x, P(X <= x)).
truncateCDF :: Double -> CDF -> CDF Source #
Truncate the CDF by a value, in order to reduce the memory usage.
scoreCDF :: Bkgd -> PWM -> CDF Source #
Approximate the cdf of motif matching scores using dynamic programming. Algorithm: Scan the PWM from left to right. For each position $i$, compute a score density function $s_i$ such that $s_i(x)$ is the total number of sequences with score $x$.
pValueToScore :: Double -> Bkgd -> PWM -> Double Source #
calculate the minimum motif mathching score that would produce a kmer with p-Value less than the given number. This score would then be used to search for motif occurrences with significant p-Value
Unlike pValueToScore, this version compute the exact score but much slower and is inpractical for long motifs.
fromMEME :: ByteString -> [Motif] Source #
References
- Douglas R. Cavener. (1987) Comparison of the consensus sequence flanking translational start sites in Drosophila and vertebrates. Nucleic Acids Research 15 (4): 1353–1361. http://nar.oxfordjournals.org/content/15/4/1353