module BioInf.ViennaRNA.RNAfold
( module BioInf.ViennaRNA.RNAfold
, module BioInf.ViennaRNA.Types
) where
import Control.Arrow
import Control.DeepSeq
import Control.Lens
import Control.Monad (guard)
import Data.Attoparsec.ByteString as A
import Data.Attoparsec.ByteString.Char8 as A8
import Data.ByteString.Builder
import Data.ByteString (ByteString)
import Data.ByteString.Char8 as BS
import Data.Char (toUpper)
import Data.Maybe.Strict
import Data.Monoid
import Data.Tuple (swap)
import Debug.Trace
import GHC.Generics (Generic)
import Prelude hiding(Maybe(..))
import Test.QuickCheck as QC
import Text.Printf
import Biobase.Types.Energy
import Biobase.Types.Sequence (mkRNAseq, RNAseq(..), rnaseq)
import Biobase.Types.Structure
import qualified BioInf.ViennaRNA.Bindings as Bindings
import BioInf.ViennaRNA.Internal
import BioInf.ViennaRNA.Types
data RNAfold = RNAfold
{ _sequenceID ∷ !ByteString
, _input ∷ !RNAseq
, _mfe ∷ !Folded
, _mfeFrequency ∷ !Double
, _ensemble ∷ !Folded
, _centroid ∷ !Folded
, _centroidDistance ∷ !Double
, _diversity ∷ !Double
, _temperature ∷ !(Maybe Double)
}
deriving (Read,Show,Eq,Ord,Generic)
makeLensesWith (lensRules & generateUpdateableOptics .~ False) ''RNAfold
makeLensesFor [("_sequenceID", "sequenceIDlens")] ''RNAfold
instance NFData RNAfold
rnafold ∷ RNAseq → RNAfold
rnafold _input = unsafePerformIO . withMutex $! do
let _temperature = Just 37
let _sequenceID = ""
_mfe ← uncurry Folded . swap <$> (DG *** RNAss) <$> Bindings.mfe (_input^.rnaseq)
(_centroid, _centroidDistance) ← (\(e,s,d) → (Folded (RNAss s) (DG e), d)) <$> Bindings.centroidTemp 37 (_input^.rnaseq)
let k0 = 273.15
let gasconst = 1.98717
let kT = (k0 + 37) * gasconst * 1000
let _ensemble = Folded (RNAss "DO NOT USE ME") (DG 999999)
let _diversity = 999999
let d1mfeenergy = 0
let _mfeFrequency = exp $ (_centroid^.foldedEnergy.to dG d1mfeenergy) / kT
return $! RNAfold {..}
data RNArewrite
= NoRewrite
| ForceRNA
deriving (Read,Show,Eq,Ord,Generic)
pRNAfold
∷ RNArewrite
→ Double
→ Parser RNAfold
pRNAfold r celsius = do
let _temperature = Just celsius
let endedLine = A.takeTill isEndOfLine <* endOfLine
let s2line = A8.takeWhile1 (`BS.elem` "(.)") <* skipSpace
let ensline = A8.takeWhile1 (`BS.elem` "(){}.,|") <* skipSpace
let nope = Folded (RNAss "") (DG 0)
let rewrite = case r of
NoRewrite → id
ForceRNA → BS.map (go . toUpper) where
go x
| x `BS.elem` "ACGUacgu" = x
go x = 'N'
_sequenceID ← option "" $ char '>' *> endedLine
_input ← RNAseq <$> rewrite <$> endedLine
let l = _input^.rnaseq.to BS.length
let lenGuard s2 = guard (BS.length s2 == l)
<?> ("s2 line length /= _input length: " ++ show (s2,_input) ++ "")
_mfe ← do s2 ← s2line
lenGuard s2
skipSpace
e ← char '(' *> skipSpace *> signed double <* char ')'
let _foldedStructure = RNAss s2
let _foldedEnergy = DG e
return Folded{..}
<?> "mfe"
_ensemble ← option nope $
(do endOfLine
s2 ← ensline
lenGuard s2
skipSpace
e ← char '[' *> skipSpace *> signed double <* char ']'
let _foldedStructure = RNAss s2
let _foldedEnergy = DG e
return Folded{..}
<?> "ensemble")
(_centroid, _centroidDistance) ← option (nope, 0) $
(do endOfLine
s2 ← s2line
lenGuard s2
e ← char '{' *> skipSpace *> signed double
d ← skipSpace *> "d=" *> skipSpace *> double
skipSpace *> char '}'
let _foldedStructure = RNAss s2
let _foldedEnergy = DG e
return (Folded{..}, d)
<?> "centroid")
(_mfeFrequency, _diversity) ← option (0, 0) $
(do endOfLine
skipSpace
"frequency of mfe structure in ensemble"
skipSpace
f ← double
"; ensemble diversity"
skipSpace
ed ← double
skipSpace
return (f, ed)
<?> "mfe frequency / diversity")
return RNAfold{..}
builderRNAfold ∷ RNAfold → Builder
builderRNAfold RNAfold{..} = mconcat [hdr, sqnce, emfe, nsmbl, cntrd]
where
dbl d = byteString . BS.pack $ printf "%.2f" (d∷Double)
addFolded l r Folded{..} = if BS.null (_rnass _foldedStructure)
then mempty
else nl <> byteString (_rnass _foldedStructure) <> char7 ' '
<> char7 l <> dbl (dG _foldedEnergy) <> char7 r
nl = char7 '\n'
hdr = if BS.null _sequenceID then mempty else char7 '>' <> byteString _sequenceID <> nl
sqnce = byteString (_input^.rnaseq)
emfe = addFolded '(' ')' _mfe
nsmbl = addFolded '[' ']' _ensemble
cntrd = let s = _centroid^.foldedStructure.rnass
in if BS.null (_centroid^.foldedStructure.rnass)
then mempty
else nl <> byteString s <> " {"
<> dbl (_centroid^.foldedEnergy.to dG)
<> " d=" <> dbl _diversity <> "}"
instance Arbitrary RNAfold where
arbitrary = choose (0,100)
>>= \k → rnafold <$> mkRNAseq <$> BS.pack <$> vectorOf k (QC.elements "ACGU")
shrink rna = [ rnafold $ mkRNAseq $ BS.pack s | s ← shrink $ rna^.input.rnaseq.to unpack ]