{-# OPTIONS_GHC -fno-warn-orphans #-}
module Bio.PDB
( modelsFromPDBText, modelsToPDBText
, modelsFromPDBFile, modelsToPDBFile
) where
import Bio.PDB.BondRestoring (residueID,
restoreChainLocalBonds,
restoreModelGlobalBonds)
import Bio.PDB.Functions (groupChainByResidue)
import Bio.PDB.Reader (PDBWarnings, fromTextPDB)
import qualified Bio.PDB.Type as PDB
import Bio.PDB.Writer (pdbToFile, pdbToText)
import Bio.Structure (Residue(..),
Bond,
Atom(..),
LocalID,
GlobalID(GlobalID, getGlobalID),
SecondaryStructure(Undefined),
Chain(..),
Model(Model, modelChains),
StructureSerializable(..),
StructureModels(..))
import qualified Bio.Utils.Map as M ((!?!))
import qualified Bio.Utils.Vector as V ((!?!))
import Control.Arrow ((&&&))
import Control.Lens ((^.))
import Control.Monad (join)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.List (sort)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T (head, pack, singleton, strip,
unpack)
import Data.Text.IO as TIO (readFile)
import Data.Vector (Vector)
import qualified Data.Vector as V (toList,
concat,
fromList,
length)
import Linear.V3 (V3 (..), _x, _y, _z)
import Text.Read (readMaybe)
instance StructureModels PDB.PDB where
modelsOf :: PDB -> Vector Model
modelsOf PDB.PDB {Vector Model
Text
Map RemarkCode RemarkData
Map FieldType RemarkData
otherFields :: PDB -> Map FieldType RemarkData
remarks :: PDB -> Map RemarkCode RemarkData
models :: PDB -> Vector Model
title :: PDB -> Text
otherFields :: Map FieldType RemarkData
remarks :: Map RemarkCode RemarkData
models :: Vector Model
title :: Text
..} = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
mkModel Vector Model
models
where
mkModel :: PDB.Model -> Model
mkModel :: Model -> Model
mkModel Model
model = if forall (t :: * -> *) a. Foldable t => t a -> Int
length Map Atom Int
atomToNilBasedIndex forall a. Eq a => a -> a -> Bool
== forall (t :: * -> *) a. Foldable t => t a -> Int
length [Atom]
allModelAtoms
then Vector Chain -> Vector (Bond GlobalID) -> Model
Model (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
mkChain Model
model) (Map Atom Int -> Model -> Vector (Bond GlobalID)
restoreModelGlobalBonds Map Atom Int
atomToNilBasedIndex Model
model)
else forall a. HasCallStack => [Char] -> a
error [Char]
"Mapping from PDB id to nil based index must be a bijection."
where
atomToNilBasedIndex :: Map PDB.Atom Int
atomToNilBasedIndex :: Map Atom Int
atomToNilBasedIndex = forall k a. Ord k => [(k, a)] -> Map k a
M.fromList forall a b. (a -> b) -> a -> b
$ [Atom]
allModelAtoms forall a b. [a] -> [b] -> [(a, b)]
`zip` [Int
0..]
allModelAtoms :: [PDB.Atom]
allModelAtoms :: [Atom]
allModelAtoms = forall a. Ord a => [a] -> [a]
sort forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Vector a -> [a]
V.toList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [Vector a] -> Vector a
V.concat forall a b. (a -> b) -> a -> b
$ forall a. Vector a -> [a]
V.toList Model
model
mkChain :: PDB.Chain -> Chain
mkChain :: Chain -> Chain
mkChain = forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> Vector Residue -> Chain
Chain forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Chain -> Text
mkChainName forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& Chain -> Vector Residue
mkChainResidues)
mkChainName :: PDB.Chain -> Text
mkChainName :: Chain -> Text
mkChainName = Char -> Text
T.singleton forall b c a. (b -> c) -> (a -> b) -> a -> c
. Atom -> Char
PDB.atomChainID forall b c a. (b -> c) -> (a -> b) -> a -> c
. Chain -> Atom
safeFirstAtom
mkChainResidues :: PDB.Chain -> Vector Residue
mkChainResidues :: Chain -> Vector Residue
mkChainResidues Chain
chain = forall a. [a] -> Vector a
V.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue (Chain -> Map Text (Vector (Bond LocalID))
restoreChainLocalBonds Chain
chain)) forall a b. (a -> b) -> a -> b
$ Chain -> [[Atom]]
groupChainByResidue Chain
chain
safeFirstAtom :: Vector PDB.Atom -> PDB.Atom
safeFirstAtom :: Chain -> Atom
safeFirstAtom Chain
arr | forall a. Vector a -> Int
V.length Chain
arr forall a. Ord a => a -> a -> Bool
> Int
0 = Chain
arr forall a. (HasCallStack, Show a) => Vector a -> Int -> a
V.!?! Int
0
| Bool
otherwise = forall a. HasCallStack => [Char] -> a
error [Char]
"Could not pick first atom"
mkResidue :: Map Text (Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
mkResidue :: Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue Map Text (Vector (Bond LocalID))
_ [] = forall a. HasCallStack => [Char] -> a
error [Char]
"Cound not make residue from empty list"
mkResidue Map Text (Vector (Bond LocalID))
localBondsMap [Atom]
atoms' = Text
-> Int
-> Char
-> Vector Atom
-> Vector (Bond LocalID)
-> SecondaryStructure
-> Text
-> Residue
Residue (Text -> Text
T.strip forall a b. (a -> b) -> a -> b
$ Atom -> Text
PDB.atomResName Atom
firstResidueAtom)
(Atom -> Int
PDB.atomResSeq Atom
firstResidueAtom)
(Atom -> Char
PDB.atomICode Atom
firstResidueAtom)
(forall a. [a] -> Vector a
V.fromList forall a b. (a -> b) -> a -> b
$ Atom -> Atom
mkAtom forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Atom]
atoms')
(Map Text (Vector (Bond LocalID))
localBondsMap forall k a.
(HasCallStack, Ord k, Show k, Show a) =>
Map k a -> k -> a
M.!?! Atom -> Text
residueID Atom
firstResidueAtom)
SecondaryStructure
Undefined
Text
""
where
firstResidueAtom :: Atom
firstResidueAtom = forall a. [a] -> a
head [Atom]
atoms'
mkAtom :: PDB.Atom -> Atom
mkAtom :: Atom -> Atom
mkAtom atom :: Atom
atom@PDB.Atom{Char
Float
Int
Text
atomCharge :: Atom -> Text
atomElement :: Atom -> Text
atomTempFactor :: Atom -> Float
atomOccupancy :: Atom -> Float
atomZ :: Atom -> Float
atomY :: Atom -> Float
atomX :: Atom -> Float
atomAltLoc :: Atom -> Char
atomName :: Atom -> Text
atomSerial :: Atom -> Int
atomCharge :: Text
atomElement :: Text
atomTempFactor :: Float
atomOccupancy :: Float
atomZ :: Float
atomY :: Float
atomX :: Float
atomICode :: Char
atomResSeq :: Int
atomChainID :: Char
atomResName :: Text
atomAltLoc :: Char
atomName :: Text
atomSerial :: Int
atomICode :: Atom -> Char
atomResSeq :: Atom -> Int
atomResName :: Atom -> Text
atomChainID :: Atom -> Char
..} = GlobalID
-> Int -> Text -> Text -> V3 Float -> Int -> Float -> Float -> Atom
Atom (Int -> GlobalID
GlobalID forall a b. (a -> b) -> a -> b
$ Map Atom Int
atomToNilBasedIndex forall k a.
(HasCallStack, Ord k, Show k, Show a) =>
Map k a -> k -> a
M.!?! Atom
atom)
Int
atomSerial
(Text -> Text
T.strip Text
atomName)
Text
atomElement
(forall a. a -> a -> a -> V3 a
V3 Float
atomX Float
atomY Float
atomZ)
(forall a. a -> Maybe a -> a
fromMaybe Int
0 forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Read a => [Char] -> Maybe a
readMaybe forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack Text
atomCharge)
Float
atomTempFactor
Float
atomOccupancy
modelsFromPDBFile :: (MonadIO m) => FilePath -> m (Either Text ([PDBWarnings], Vector Model))
modelsFromPDBFile :: forall (m :: * -> *).
MonadIO m =>
[Char] -> m (Either Text ([PDBWarnings], Vector Model))
modelsFromPDBFile = forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IO Text
TIO.readFile
modelsFromPDBText :: Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText :: Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText Text
pdbText = do
([PDBWarnings]
warnings, PDB
parsedPDB) <- Text -> Either Text ([PDBWarnings], PDB)
fromTextPDB Text
pdbText
let models :: Vector Model
models = forall a. StructureModels a => a -> Vector Model
modelsOf PDB
parsedPDB
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([PDBWarnings]
warnings, Vector Model
models)
instance StructureSerializable PDB.PDB where
serializeModels :: Vector Model -> PDB
serializeModels Vector Model
models = Text
-> Vector Model
-> Map RemarkCode RemarkData
-> Map FieldType RemarkData
-> PDB
PDB.PDB Text
"Serialized model" Vector Model
pdbModels forall a. Monoid a => a
mempty forall a. Monoid a => a
mempty
where
pdbModels :: Vector Model
pdbModels = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
toPDBModel Vector Model
models
toPDBModel :: Model -> PDB.Model
toPDBModel :: Model -> Model
toPDBModel = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
toPDBChain forall b c a. (b -> c) -> (a -> b) -> a -> c
. Model -> Vector Chain
modelChains
toPDBChain :: Chain -> PDB.Chain
toPDBChain :: Chain -> Chain
toPDBChain Chain
ch = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Chain, Residue, Atom) -> Atom
toPDBAtom forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (m :: * -> *) a. Monad m => m (m a) -> m a
join forall a b. (a -> b) -> a -> b
$ (\Residue
r -> forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((,,) Chain
ch Residue
r) forall a b. (a -> b) -> a -> b
$ Residue -> Vector Atom
resAtoms Residue
r) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Chain -> Vector Residue
chainResidues Chain
ch
toPDBAtom :: (Chain, Residue, Atom) -> PDB.Atom
toPDBAtom :: (Chain, Residue, Atom) -> Atom
toPDBAtom (Chain{Vector Residue
Text
chainName :: Chain -> Text
chainResidues :: Vector Residue
chainName :: Text
chainResidues :: Chain -> Vector Residue
..}, Residue{Char
Int
Vector (Bond LocalID)
Vector Atom
Text
SecondaryStructure
resChemCompType :: Residue -> Text
resSecondary :: Residue -> SecondaryStructure
resBonds :: Residue -> Vector (Bond LocalID)
resInsertionCode :: Residue -> Char
resNumber :: Residue -> Int
resName :: Residue -> Text
resChemCompType :: Text
resSecondary :: SecondaryStructure
resBonds :: Vector (Bond LocalID)
resAtoms :: Vector Atom
resInsertionCode :: Char
resNumber :: Int
resName :: Text
resAtoms :: Residue -> Vector Atom
..}, Atom{Float
Int
Text
V3 Float
GlobalID
occupancy :: Atom -> Float
bFactor :: Atom -> Float
formalCharge :: Atom -> Int
atomCoords :: Atom -> V3 Float
atomElement :: Atom -> Text
atomName :: Atom -> Text
atomInputIndex :: Atom -> Int
atomId :: Atom -> GlobalID
occupancy :: Float
bFactor :: Float
formalCharge :: Int
atomCoords :: V3 Float
atomElement :: Text
atomName :: Text
atomInputIndex :: Int
atomId :: GlobalID
..}) = Atom
res
where
res :: Atom
res =
Int
-> Text
-> Char
-> Text
-> Char
-> Int
-> Char
-> Float
-> Float
-> Float
-> Float
-> Float
-> Text
-> Text
-> Atom
PDB.Atom
(GlobalID -> Int
getGlobalID GlobalID
atomId forall a. Num a => a -> a -> a
+ Int
1)
Text
atomName
Char
nullAltLoc
Text
resName
(Text -> Char
T.head Text
chainName)
Int
resNumber
Char
resInsertionCode
(V3 Float
atomCoords forall s a. s -> Getting a s a -> a
^. forall (t :: * -> *) a. R1 t => Lens' (t a) a
_x)
(V3 Float
atomCoords forall s a. s -> Getting a s a -> a
^. forall (t :: * -> *) a. R2 t => Lens' (t a) a
_y)
(V3 Float
atomCoords forall s a. s -> Getting a s a -> a
^. forall (t :: * -> *) a. R3 t => Lens' (t a) a
_z)
Float
occupancy
Float
bFactor
Text
atomElement
([Char] -> Text
T.pack forall a b. (a -> b) -> a -> b
$ forall a. Show a => a -> [Char]
show Int
formalCharge)
nullAltLoc :: Char
nullAltLoc :: Char
nullAltLoc = Char
' '
modelsToPDBFile :: MonadIO m => Vector Model -> FilePath -> m ()
modelsToPDBFile :: forall (m :: * -> *). MonadIO m => Vector Model -> [Char] -> m ()
modelsToPDBFile Vector Model
models = forall (m :: * -> *). MonadIO m => PDB -> [Char] -> m ()
pdbToFile forall a b. (a -> b) -> a -> b
$ forall a. StructureSerializable a => Vector Model -> a
serializeModels Vector Model
models
modelsToPDBText :: Vector Model -> Text
modelsToPDBText :: Vector Model -> Text
modelsToPDBText = PDB -> Text
pdbToText forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. StructureSerializable a => Vector Model -> a
serializeModels