-- | Wrappers for structural data. Encoded as bytestrings. This differs from -- @BiobaseXNA@, where specialized encodings are used. These structures are -- supposedly "short", they need to fit into a strict bytestring. -- -- TODO Consider where to move each type. There are merge possibilities between -- BiobaseXNA and BiobaseTypes. module Biobase.Types.Structure where import Control.DeepSeq import Control.Lens import Control.Monad.Error.Class import Control.Monad (foldM,unless) import Data.ByteString (ByteString) import Data.Data import Data.Set (Set) import GHC.Generics (Generic) import qualified Data.ByteString.Char8 as BS8 import qualified Data.List as L import qualified Data.Set as Set -- | Secondary structure using @()@ for paired elements, and @.@ for unpaired -- ones. It is assumed that the @()@ match up. These structures from a Monoid. newtype RNAss = RNAss { _rnass ∷ ByteString } deriving (Eq,Ord,Show,Read,Data,Typeable,Generic,Monoid) makeLenses ''RNAss instance NFData RNAss -- | Ensemble structure encoding. *Very* different type ctor name chosen! The -- structure of this string makes verification much more complicated. -- -- TODO describe encoding used by RNAfold for the ensemble string. newtype RNAensembleStructure = RNAes { _rnaes ∷ ByteString } deriving (Eq,Ord,Show,Read,Data,Typeable,Generic) makeLenses ''RNAensembleStructure instance NFData RNAensembleStructure -- | Cofolded structure. data RNAds = RNAds { _rnadsL ∷ !ByteString , _rnadsR ∷ !ByteString } deriving (Eq,Ord,Show,Read,Data,Typeable,Generic) makeLenses ''RNAds instance NFData RNAds -- | A Prism that turns ByteStrings with a single @&@ into @RNAds@. rnads ∷ Prism' ByteString RNAds rnads = prism (\(RNAds l r) → BS8.concat [l, "&", r]) (\s → case BS8.split '&' s of [l,r] → Right (RNAds l r) ; _ → Left s) {-# Inline rnads #-} -- | Isomorphism from @RNAds@ to @(RNAss,RNAss)@. The @RNAss@ are only -- legal if taken both: @rnassFromDimer . both@. rnads2rnassPair ∷ Iso' RNAds (RNAss, RNAss) rnads2rnassPair = iso (\(RNAds l r) → (RNAss l, RNAss r)) (\(RNAss l, RNAss r) → RNAds l r) {-# Inline rnads2rnassPair #-} -- | Try to create a dimeric structure. mkRNAds ∷ (Monad m, MonadError RNAStructureError m) ⇒ ByteString → m RNAds mkRNAds q = BS8.split '&' q & \case [l,r] → do -- TODO can still fail with unmatched brackets. return $ RNAds { _rnadsL = l , _rnadsR = r } _ → throwError $ RNAStructureError "mkRNAds: not a dimer" q {-# Inline mkRNAds #-} -- | Capture what might be wrong with the RNAss. data RNAStructureError = RNAStructureError { _rnaStructureError ∷ String , _rnaOffender ∷ ByteString } deriving (Show,Generic) instance NFData RNAStructureError -- | Verifies that the given RNAss is properly formatted. Otherwise, error out. -- -- TODO Implement! Check with BiobaseXNA and the stack effort in there. This -- might influence if the verification goes into BiobaseXNA and happens via an -- @Iso'@. verifyRNAss ∷ (Monad m, MonadError RNAStructureError m) ⇒ RNAss → m RNAss verifyRNAss ss = do return ss newtype RNApset = RNApset { _rnapset ∷ Set (Int,Int) } deriving (Read,Show,Eq,Ord,Generic) makeLenses ''RNApset instance NFData RNApset -- | Transform an 'RNAss' into a set of base pairs @(i,j)@. The pairs are -- 0-based. rnassPairSet ∷ (MonadError String m) ⇒ RNAss → m RNApset rnassPairSet (RNAss s2) = do let go (set,ks ) (i,'(') = return (set,i:ks) go (set,i:is) (j,')') = return (Set.insert (i,j) set, is) go (set,[] ) (j,')') = throwError $ "unequal brackets in \"" ++ BS8.unpack s2 ++ "\" at position: " ++ show j go (set,ks ) (_,'.') = return (set,ks) (set,ss) ← foldM go (Set.empty,[]) . L.zip [0..] $ BS8.unpack s2 unless (null ss) $ throwError $ "unequal brackets in \"" ++ BS8.unpack s2 ++ "\" with opening bracket(s): " ++ show ss return $ RNApset set {-# Inlinable rnassPairSet #-} -- | RNA pair set, but a transformation error calls @error@. rnassPairSet' ∷ RNAss → RNApset rnassPairSet' = either error id . rnassPairSet -- | Calculates the number of different base pairs betwwen two structures. pairDist ∷ RNApset → RNApset → Int pairDist (RNApset p1) (RNApset p2) = Set.size z1 + Set.size z2 where i = Set.intersection p1 p2 z1 = p1 `Set.difference` i z2 = p2 `Set.difference` i