{-# OPTIONS_GHC -fno-warn-orphans #-}
module Bio.PDB
( modelsFromPDBText, modelsToPDBText
, modelsFromPDBFile, modelsToPDBFile
) where
import Bio.PDB.BondRestoring (residueID,
restoreChainLocalBonds,
restoreModelGlobalBonds)
import Bio.PDB.Functions (groupChainByResidue)
import Bio.PDB.Reader (PDBWarnings, fromTextPDB)
import qualified Bio.PDB.Type as PDB
import Bio.PDB.Writer (pdbToFile, pdbToText)
import Bio.Structure (Residue(..),
Bond,
Atom(..),
LocalID,
GlobalID(GlobalID, getGlobalID),
SecondaryStructure(Undefined),
Chain(..),
Model(Model, modelChains),
StructureSerializable(..),
StructureModels(..))
import qualified Bio.Utils.Map as M ((!?!))
import qualified Bio.Utils.Vector as V ((!?!))
import Control.Arrow ((&&&))
import Control.Lens ((^.))
import Control.Monad (join)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.List (sort)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T (head, pack, singleton, strip,
unpack)
import Data.Text.IO as TIO (readFile)
import Data.Vector (Vector)
import qualified Data.Vector as V (toList,
concat,
fromList,
length)
import Linear.V3 (V3 (..), _x, _y, _z)
import Text.Read (readMaybe)
instance StructureModels PDB.PDB where
modelsOf :: PDB -> Vector Model
modelsOf PDB.PDB {Text
Map RemarkCode RemarkData
Map FieldType RemarkData
Vector Model
otherFields :: PDB -> Map FieldType RemarkData
remarks :: PDB -> Map RemarkCode RemarkData
models :: PDB -> Vector Model
title :: PDB -> Text
otherFields :: Map FieldType RemarkData
remarks :: Map RemarkCode RemarkData
models :: Vector Model
title :: Text
..} = (Model -> Model) -> Vector Model -> Vector Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
mkModel Vector Model
models
where
mkModel :: PDB.Model -> Model
mkModel :: Model -> Model
mkModel Model
model = if Map Atom Int -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Map Atom Int
atomToNilBasedIndex Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== [Atom] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Atom]
allModelAtoms
then Vector Chain -> Vector (Bond GlobalID) -> Model
Model ((Chain -> Chain) -> Model -> Vector Chain
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
mkChain Model
model) (Map Atom Int -> Model -> Vector (Bond GlobalID)
restoreModelGlobalBonds Map Atom Int
atomToNilBasedIndex Model
model)
else [Char] -> Model
forall a. HasCallStack => [Char] -> a
error [Char]
"Mapping from PDB id to nil based index must be a bijection."
where
atomToNilBasedIndex :: Map PDB.Atom Int
atomToNilBasedIndex :: Map Atom Int
atomToNilBasedIndex = [(Atom, Int)] -> Map Atom Int
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Atom, Int)] -> Map Atom Int) -> [(Atom, Int)] -> Map Atom Int
forall a b. (a -> b) -> a -> b
$ [Atom]
allModelAtoms [Atom] -> [Int] -> [(Atom, Int)]
forall a b. [a] -> [b] -> [(a, b)]
`zip` [Int
0..]
allModelAtoms :: [PDB.Atom]
allModelAtoms :: [Atom]
allModelAtoms = [Atom] -> [Atom]
forall a. Ord a => [a] -> [a]
sort ([Atom] -> [Atom]) -> ([Chain] -> [Atom]) -> [Chain] -> [Atom]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Chain -> [Atom]
forall a. Vector a -> [a]
V.toList (Chain -> [Atom]) -> ([Chain] -> Chain) -> [Chain] -> [Atom]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Chain] -> Chain
forall a. [Vector a] -> Vector a
V.concat ([Chain] -> [Atom]) -> [Chain] -> [Atom]
forall a b. (a -> b) -> a -> b
$ Model -> [Chain]
forall a. Vector a -> [a]
V.toList Model
model
mkChain :: PDB.Chain -> Chain
mkChain :: Chain -> Chain
mkChain = (Text -> Vector Residue -> Chain)
-> (Text, Vector Residue) -> Chain
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> Vector Residue -> Chain
Chain ((Text, Vector Residue) -> Chain)
-> (Chain -> (Text, Vector Residue)) -> Chain -> Chain
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Chain -> Text
mkChainName (Chain -> Text)
-> (Chain -> Vector Residue) -> Chain -> (Text, Vector Residue)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& Chain -> Vector Residue
mkChainResidues)
mkChainName :: PDB.Chain -> Text
mkChainName :: Chain -> Text
mkChainName = Char -> Text
T.singleton (Char -> Text) -> (Chain -> Char) -> Chain -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Atom -> Char
PDB.atomChainID (Atom -> Char) -> (Chain -> Atom) -> Chain -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Chain -> Atom
safeFirstAtom
mkChainResidues :: PDB.Chain -> Vector Residue
mkChainResidues :: Chain -> Vector Residue
mkChainResidues Chain
chain = [Residue] -> Vector Residue
forall a. [a] -> Vector a
V.fromList ([Residue] -> Vector Residue)
-> ([[Atom]] -> [Residue]) -> [[Atom]] -> Vector Residue
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Atom] -> Residue) -> [[Atom]] -> [Residue]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue (Chain -> Map Text (Vector (Bond LocalID))
restoreChainLocalBonds Chain
chain)) ([[Atom]] -> Vector Residue) -> [[Atom]] -> Vector Residue
forall a b. (a -> b) -> a -> b
$ Chain -> [[Atom]]
groupChainByResidue Chain
chain
safeFirstAtom :: Vector PDB.Atom -> PDB.Atom
safeFirstAtom :: Chain -> Atom
safeFirstAtom Chain
arr | Chain -> Int
forall a. Vector a -> Int
V.length Chain
arr Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 = Chain
arr Chain -> Int -> Atom
forall a. (HasCallStack, Show a) => Vector a -> Int -> a
V.!?! Int
0
| Bool
otherwise = [Char] -> Atom
forall a. HasCallStack => [Char] -> a
error [Char]
"Could not pick first atom"
mkResidue :: Map Text (Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
mkResidue :: Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue Map Text (Vector (Bond LocalID))
_ [] = [Char] -> Residue
forall a. HasCallStack => [Char] -> a
error [Char]
"Cound not make residue from empty list"
mkResidue Map Text (Vector (Bond LocalID))
localBondsMap [Atom]
atoms' = Text
-> Int
-> Char
-> Vector Atom
-> Vector (Bond LocalID)
-> SecondaryStructure
-> Text
-> Residue
Residue (Text -> Text
T.strip (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Atom -> Text
PDB.atomResName Atom
firstResidueAtom)
(Atom -> Int
PDB.atomResSeq Atom
firstResidueAtom)
(Atom -> Char
PDB.atomICode Atom
firstResidueAtom)
([Atom] -> Vector Atom
forall a. [a] -> Vector a
V.fromList ([Atom] -> Vector Atom) -> [Atom] -> Vector Atom
forall a b. (a -> b) -> a -> b
$ Atom -> Atom
mkAtom (Atom -> Atom) -> [Atom] -> [Atom]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Atom]
atoms')
(Map Text (Vector (Bond LocalID))
localBondsMap Map Text (Vector (Bond LocalID)) -> Text -> Vector (Bond LocalID)
forall k a.
(HasCallStack, Ord k, Show k, Show a) =>
Map k a -> k -> a
M.!?! Atom -> Text
residueID Atom
firstResidueAtom)
SecondaryStructure
Undefined
Text
""
where
firstResidueAtom :: Atom
firstResidueAtom = [Atom] -> Atom
forall a. [a] -> a
head [Atom]
atoms'
mkAtom :: PDB.Atom -> Atom
mkAtom :: Atom -> Atom
mkAtom atom :: Atom
atom@PDB.Atom{Char
Float
Int
Text
atomCharge :: Atom -> Text
atomElement :: Atom -> Text
atomTempFactor :: Atom -> Float
atomOccupancy :: Atom -> Float
atomZ :: Atom -> Float
atomY :: Atom -> Float
atomX :: Atom -> Float
atomAltLoc :: Atom -> Char
atomName :: Atom -> Text
atomSerial :: Atom -> Int
atomCharge :: Text
atomElement :: Text
atomTempFactor :: Float
atomOccupancy :: Float
atomZ :: Float
atomY :: Float
atomX :: Float
atomICode :: Char
atomResSeq :: Int
atomChainID :: Char
atomResName :: Text
atomAltLoc :: Char
atomName :: Text
atomSerial :: Int
atomICode :: Atom -> Char
atomResSeq :: Atom -> Int
atomResName :: Atom -> Text
atomChainID :: Atom -> Char
..} = GlobalID
-> Int -> Text -> Text -> V3 Float -> Int -> Float -> Float -> Atom
Atom (Int -> GlobalID
GlobalID (Int -> GlobalID) -> Int -> GlobalID
forall a b. (a -> b) -> a -> b
$ Map Atom Int
atomToNilBasedIndex Map Atom Int -> Atom -> Int
forall k a.
(HasCallStack, Ord k, Show k, Show a) =>
Map k a -> k -> a
M.!?! Atom
atom)
Int
atomSerial
(Text -> Text
T.strip Text
atomName)
Text
atomElement
(Float -> Float -> Float -> V3 Float
forall a. a -> a -> a -> V3 a
V3 Float
atomX Float
atomY Float
atomZ)
(Int -> RemarkCode -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
0 (RemarkCode -> Int) -> ([Char] -> RemarkCode) -> [Char] -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> RemarkCode
forall a. Read a => [Char] -> Maybe a
readMaybe ([Char] -> Int) -> [Char] -> Int
forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack Text
atomCharge)
Float
atomTempFactor
Float
atomOccupancy
modelsFromPDBFile :: (MonadIO m) => FilePath -> m (Either Text ([PDBWarnings], Vector Model))
modelsFromPDBFile :: [Char] -> m (Either Text ([PDBWarnings], Vector Model))
modelsFromPDBFile = IO (Either Text ([PDBWarnings], Vector Model))
-> m (Either Text ([PDBWarnings], Vector Model))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Either Text ([PDBWarnings], Vector Model))
-> m (Either Text ([PDBWarnings], Vector Model)))
-> ([Char] -> IO (Either Text ([PDBWarnings], Vector Model)))
-> [Char]
-> m (Either Text ([PDBWarnings], Vector Model))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Either Text ([PDBWarnings], Vector Model))
-> IO Text -> IO (Either Text ([PDBWarnings], Vector Model))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText (IO Text -> IO (Either Text ([PDBWarnings], Vector Model)))
-> ([Char] -> IO Text)
-> [Char]
-> IO (Either Text ([PDBWarnings], Vector Model))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IO Text
TIO.readFile
modelsFromPDBText :: Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText :: Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText Text
pdbText = do
([PDBWarnings]
warnings, PDB
parsedPDB) <- Text -> Either Text ([PDBWarnings], PDB)
fromTextPDB Text
pdbText
let models :: Vector Model
models = PDB -> Vector Model
forall a. StructureModels a => a -> Vector Model
modelsOf PDB
parsedPDB
([PDBWarnings], Vector Model)
-> Either Text ([PDBWarnings], Vector Model)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([PDBWarnings]
warnings, Vector Model
models)
instance StructureSerializable PDB.PDB where
serializeModels :: Vector Model -> PDB
serializeModels Vector Model
models = Text
-> Vector Model
-> Map RemarkCode RemarkData
-> Map FieldType RemarkData
-> PDB
PDB.PDB Text
"Serialized model" Vector Model
pdbModels Map RemarkCode RemarkData
forall a. Monoid a => a
mempty Map FieldType RemarkData
forall a. Monoid a => a
mempty
where
pdbModels :: Vector Model
pdbModels = (Model -> Model) -> Vector Model -> Vector Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
toPDBModel Vector Model
models
toPDBModel :: Model -> PDB.Model
toPDBModel :: Model -> Model
toPDBModel = (Chain -> Chain) -> Vector Chain -> Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
toPDBChain (Vector Chain -> Model)
-> (Model -> Vector Chain) -> Model -> Model
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Model -> Vector Chain
modelChains
toPDBChain :: Chain -> PDB.Chain
toPDBChain :: Chain -> Chain
toPDBChain Chain
ch = ((Chain, Residue, Atom) -> Atom)
-> Vector (Chain, Residue, Atom) -> Chain
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Chain, Residue, Atom) -> Atom
toPDBAtom (Vector (Chain, Residue, Atom) -> Chain)
-> (Vector (Vector (Chain, Residue, Atom))
-> Vector (Chain, Residue, Atom))
-> Vector (Vector (Chain, Residue, Atom))
-> Chain
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Vector (Chain, Residue, Atom))
-> Vector (Chain, Residue, Atom)
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Vector (Vector (Chain, Residue, Atom)) -> Chain)
-> Vector (Vector (Chain, Residue, Atom)) -> Chain
forall a b. (a -> b) -> a -> b
$ (\Residue
r -> (Atom -> (Chain, Residue, Atom))
-> Vector Atom -> Vector (Chain, Residue, Atom)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((,,) Chain
ch Residue
r) (Vector Atom -> Vector (Chain, Residue, Atom))
-> Vector Atom -> Vector (Chain, Residue, Atom)
forall a b. (a -> b) -> a -> b
$ Residue -> Vector Atom
resAtoms Residue
r) (Residue -> Vector (Chain, Residue, Atom))
-> Vector Residue -> Vector (Vector (Chain, Residue, Atom))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Chain -> Vector Residue
chainResidues Chain
ch
toPDBAtom :: (Chain, Residue, Atom) -> PDB.Atom
toPDBAtom :: (Chain, Residue, Atom) -> Atom
toPDBAtom (Chain{Text
Vector Residue
chainName :: Chain -> Text
chainResidues :: Vector Residue
chainName :: Text
chainResidues :: Chain -> Vector Residue
..}, Residue{Char
Int
Text
Vector (Bond LocalID)
Vector Atom
SecondaryStructure
resChemCompType :: Residue -> Text
resSecondary :: Residue -> SecondaryStructure
resBonds :: Residue -> Vector (Bond LocalID)
resInsertionCode :: Residue -> Char
resNumber :: Residue -> Int
resName :: Residue -> Text
resChemCompType :: Text
resSecondary :: SecondaryStructure
resBonds :: Vector (Bond LocalID)
resAtoms :: Vector Atom
resInsertionCode :: Char
resNumber :: Int
resName :: Text
resAtoms :: Residue -> Vector Atom
..}, Atom{Float
Int
Text
V3 Float
GlobalID
occupancy :: Atom -> Float
bFactor :: Atom -> Float
formalCharge :: Atom -> Int
atomCoords :: Atom -> V3 Float
atomElement :: Atom -> Text
atomName :: Atom -> Text
atomInputIndex :: Atom -> Int
atomId :: Atom -> GlobalID
occupancy :: Float
bFactor :: Float
formalCharge :: Int
atomCoords :: V3 Float
atomElement :: Text
atomName :: Text
atomInputIndex :: Int
atomId :: GlobalID
..}) = Atom
res
where
res :: Atom
res =
Int
-> Text
-> Char
-> Text
-> Char
-> Int
-> Char
-> Float
-> Float
-> Float
-> Float
-> Float
-> Text
-> Text
-> Atom
PDB.Atom
(GlobalID -> Int
getGlobalID GlobalID
atomId Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Text
atomName
Char
nullAltLoc
Text
resName
(Text -> Char
T.head Text
chainName)
Int
resNumber
Char
resInsertionCode
(V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R1 t => Lens' (t a) a
_x)
(V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R2 t => Lens' (t a) a
_y)
(V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R3 t => Lens' (t a) a
_z)
Float
occupancy
Float
bFactor
Text
atomElement
([Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
formalCharge)
nullAltLoc :: Char
nullAltLoc :: Char
nullAltLoc = Char
' '
modelsToPDBFile :: MonadIO m => Vector Model -> FilePath -> m ()
modelsToPDBFile :: Vector Model -> [Char] -> m ()
modelsToPDBFile Vector Model
models = PDB -> [Char] -> m ()
forall (m :: * -> *). MonadIO m => PDB -> [Char] -> m ()
pdbToFile (PDB -> [Char] -> m ()) -> PDB -> [Char] -> m ()
forall a b. (a -> b) -> a -> b
$ Vector Model -> PDB
forall a. StructureSerializable a => Vector Model -> a
serializeModels Vector Model
models
modelsToPDBText :: Vector Model -> Text
modelsToPDBText :: Vector Model -> Text
modelsToPDBText = PDB -> Text
pdbToText (PDB -> Text) -> (Vector Model -> PDB) -> Vector Model -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Model -> PDB
forall a. StructureSerializable a => Vector Model -> a
serializeModels