{-# LANGUAGE RecordWildCards #-} -- | Holds the 'FR3DBasepairs' structure extracted from a FR3D basepairs file. -- The other files are still on the TODO list -- -- TODO write data structures and parsers for the other files. -- -- TODO rewrite by reducing all those data-structures to something more -- intelligent. Use 'Nucleotide's for encoding. Have a converter to 'Complex', -- too. module Biobase.DataSource.FR3D where import Data.Map (Map) import qualified Data.Map as M import Data.Char (isDigit) import Control.Monad.Error -- TODO GHC < 7.0.0 only ??? import Data.List (nubBy) import qualified Biobase.RNA.Complex as C import Biobase.RNA.Pairs (ExtPair) -- * FR3D data structures -- | Wrap everything up, giving us our data source. data FR3D = FR3D { complex :: C.Complex ExtPair -- ^ parsed PDB/FR3D as normal 'Complex' , fr3dBasepairs :: FR3DBasepairs -- ^ the source information } deriving (Show) -- | A complete 'FR3DBasepairs' structure. We have the 'pdbID' from which the FR3D -- came from, the 'versions' of different programs, the 'chains' or different -- nucleotide sequences used in the structure, the 'pairs' themselves and other -- 'comments' which could not be identified correctly. data FR3DBasepairs = FR3DBasepairs { pdbID :: String , versions :: String , chains :: Map String String , pairs :: [Pair] , comments :: [String] -- unknown comments, maybe really just comments } deriving (Read,Show) -- | Each 'Pair' is a triple of the 'interaction' between the two 'Nucleotide's -- 'nuc1' and nuc2'. data Pair = Pair { interaction :: Interaction , nuc1 :: Nucleotide , nuc2 :: Nucleotide } deriving (Read,Show,Eq,Ord) -- | Each 'Nucleotide' is described by the 'nuc' itself (A,C,G,U) (something -- else?), the pdb 'num'ber (whatever that is), the 'chain' id and the -- 'pos'ition in the chain. data Nucleotide = Nucleotide { nuc :: Char , num :: String -- should be "20" but could be "20A" (grr) , chain :: String -- could be "chain 1" or "chain E" or so , pos :: Int } deriving (Read,Show,Eq,Ord) type Interaction = String -- ? -- | fix the 'pos' in all 'Nucleotide's. Not actually clear, if this /DOES/ fix -- the problem! fixLengths :: FR3DBasepairs -> FR3DBasepairs fixLengths b = b {pairs = map f $ pairs b} where f p = p{nuc1 = g $ nuc1 p, nuc2 = g $ nuc2 p} g n | pos n > (length $ chains b M.! chain n) = n{pos = read $ filter isDigit $ num n} | otherwise = n -- | Check if a given 'FR3DBasepairs' structure is correct, or if there are errors checkFR3DBasepairs :: FR3DBasepairs -> Either String FR3DBasepairs checkFR3DBasepairs b' = checkChains b' >>= checkLength >>= return where checkChains b | null cnucs = Right b | otherwise = Left $ "CHAIN: " ++ show cnucs where cnucs = filter (\n -> not $ chain n `M.member` chains b) ps checkLength b | null cnucs = Right b | otherwise = Left $ "LENGTH: " ++ show b ++ show cnucs where cnucs = filter (\n -> length (chains b M.! chain n) < pos n) ps -- positions start countaing at "1" checkNucs b | null cnucs = Right b | otherwise = Left $ "NUCS: " ++ show cnucs where cnucs = filter (\n -> (chains b M.! chain n !! (pos n -1)) == nuc n) ps ps = concatMap (\x -> [nuc1 x, nuc2 x]) $ pairs b' -- * Transform 'FR3DBasepairs' into 'Complex' -- | Aquire a 'Complex' from the data read. -- -- TODO write me makeComplex :: FR3DBasepairs -> C.Complex ExtPair makeComplex b@FR3DBasepairs{..} | length sqnc /= length chns = error $ "makeComplex: " ++ show b | otherwise = C.mkComplex "TODO write me" {- sqnc strc chns -} where sqnc = concatMap snd $ M.toAscList chains strc = nubBy eqT12 $ map (\(Pair z n1 n2) -> (pos n1-1,pos n2-1,z)) pairs chns = concatMap (\(k,v) -> concat $ replicate (length v) k) $ M.toAscList chains eqT12 (a,b,_) (x,y,_) = (a,b)==(x,y) || (a,b)==(y,x) -- | Simple wrapper fixing up an 'FR3D' mkFR3D :: FR3DBasepairs -> FR3D mkFR3D b = FR3D {complex=makeComplex b, fr3dBasepairs=b}