{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} -- | FR3D provides a very convenient library of explored RNA structures. We are -- mostly interested in the "basepairs" files. In contrast to the RNAstrand -- library or melting experiments, these data sets provide non-canonical RNA -- pairing. -- -- NOTE that FR3D entries contain basepairs both in (i,j) as well as (j,i) -- orientation (with i3' order; then produces one sequence with "&" -- separating the sequences and pairs reduced to (Int,Int,cWW). linearizeFR3D :: FR3D -> LinFR3D linearizeFR3D FR3D{..} = LinFR3D { pdbID = pdbid , sequence = BS.intercalate "&" $ L.map snd chains , pairs = L.map f basepairs } where trans = snd $ L.mapAccumL ( \acc (x,y) -> (acc + 1 + BS.length y, (x,acc)) ) 0 chains f bp@Basepair{..} = (pi,p,bp) where pi = ( ( maybe (-1) (\v -> v+seqpos1) $ L.lookup chain1 trans , maybe (-1) (\v -> v+seqpos2) $ L.lookup chain2 trans ) , interaction ) p = ( (mkNuc nucleotide1, mkNuc nucleotide2), interaction ) class RemoveDuplicatePairs a where removeDuplicatePairs :: a -> a instance RemoveDuplicatePairs FR3D where removeDuplicatePairs x@FR3D{..} = x{basepairs = L.filter f basepairs} where f Basepair{..} = (chain1,seqpos1) < (chain2,seqpos2) instance RemoveDuplicatePairs LinFR3D where removeDuplicatePairs x@LinFR3D{..} = x{pairs = L.filter (f.sel1) pairs} where f ((x,y),_) = x= BS.length c1 || seqpos2 x >= BS.length c2 || nucleotide1 x /= c1 `BS.index` seqpos1 x || nucleotide2 x /= c2 `BS.index` seqpos2 x ] checkLinFR3D linfr3d@LinFR3D{..} | L.null xs = Right linfr3d | otherwise = Left (linfr3d,xs) where xs = [ x | x@(pi,p,_) <- pairs , baseL pi < 0 || baseR pi < 0 || baseL pi >= BS.length sequence || baseR pi >= BS.length sequence || mkNuc (sequence `BS.index` baseL pi) /= baseL p || mkNuc (sequence `BS.index` baseR pi) /= baseR p ]