{-# OPTIONS_GHC -fno-warn-orphans #-}
module Bio.PDB
( modelsFromPDBText, modelsToPDBText
, modelsFromPDBFile, modelsToPDBFile
) where
import Bio.PDB.BondRestoring (residueID, restoreChainLocalBonds,
restoreModelGlobalBonds)
import Bio.PDB.Functions (groupChainByResidue)
import Bio.PDB.Reader (PDBWarnings, fromTextPDB)
import qualified Bio.PDB.Type as PDB
import Bio.PDB.Writer (pdbToFile, pdbToText)
import Bio.Structure
import Control.Arrow ((&&&))
import Control.Lens ((^.))
import Control.Monad (join)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.List (sort)
import Data.Map (Map)
import qualified Data.Map as M (fromList, (!))
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T (head, pack, singleton, strip,
unpack)
import Data.Text.IO as TIO (readFile)
import Data.Vector (Vector)
import qualified Data.Vector as V
import Linear.V3 (V3 (..), _x, _y, _z)
import Text.Read (readMaybe)
instance StructureModels PDB.PDB where
modelsOf :: PDB -> Vector Model
modelsOf PDB.PDB {Text
Map RemarkCode RemarkData
Map FieldType RemarkData
Vector Model
otherFields :: PDB -> Map FieldType RemarkData
remarks :: PDB -> Map RemarkCode RemarkData
models :: PDB -> Vector Model
title :: PDB -> Text
otherFields :: Map FieldType RemarkData
remarks :: Map RemarkCode RemarkData
models :: Vector Model
title :: Text
..} = (Model -> Model) -> Vector Model -> Vector Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
mkModel Vector Model
models
where
mkModel :: PDB.Model -> Model
mkModel :: Model -> Model
mkModel Model
model = Vector Chain -> Vector (Bond GlobalID) -> Model
Model ((Chain -> Chain) -> Model -> Vector Chain
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
mkChain Model
model) (Map Int Int -> Model -> Vector (Bond GlobalID)
restoreModelGlobalBonds Map Int Int
atomSerialToNilBasedIndex Model
model)
where
atomSerialToNilBasedIndex :: Map Int Int
atomSerialToNilBasedIndex :: Map Int Int
atomSerialToNilBasedIndex = [(Int, Int)] -> Map Int Int
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Int, Int)] -> Map Int Int) -> [(Int, Int)] -> Map Int Int
forall a b. (a -> b) -> a -> b
$ [Int]
allModelAtomSerials [Int] -> [Int] -> [(Int, Int)]
forall a b. [a] -> [b] -> [(a, b)]
`zip` [Int
0..]
allModelAtomSerials :: [Int]
allModelAtomSerials :: [Int]
allModelAtomSerials = [Int] -> [Int]
forall a. Ord a => [a] -> [a]
sort ([Int] -> [Int]) -> ([Chain] -> [Int]) -> [Chain] -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Int -> [Int]
forall a. Vector a -> [a]
V.toList (Vector Int -> [Int])
-> ([Chain] -> Vector Int) -> [Chain] -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Atom -> Int) -> Chain -> Vector Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Atom -> Int
PDB.atomSerial (Chain -> Vector Int)
-> ([Chain] -> Chain) -> [Chain] -> Vector Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Chain] -> Chain
forall a. [Vector a] -> Vector a
V.concat ([Chain] -> [Int]) -> [Chain] -> [Int]
forall a b. (a -> b) -> a -> b
$ Model -> [Chain]
forall a. Vector a -> [a]
V.toList Model
model
mkChain :: PDB.Chain -> Chain
mkChain :: Chain -> Chain
mkChain = (Text -> Vector Residue -> Chain)
-> (Text, Vector Residue) -> Chain
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> Vector Residue -> Chain
Chain ((Text, Vector Residue) -> Chain)
-> (Chain -> (Text, Vector Residue)) -> Chain -> Chain
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Chain -> Text
mkChainName (Chain -> Text)
-> (Chain -> Vector Residue) -> Chain -> (Text, Vector Residue)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& Chain -> Vector Residue
mkChainResidues)
mkChainName :: PDB.Chain -> Text
mkChainName :: Chain -> Text
mkChainName = Char -> Text
T.singleton (Char -> Text) -> (Chain -> Char) -> Chain -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Atom -> Char
PDB.atomChainID (Atom -> Char) -> (Chain -> Atom) -> Chain -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Chain -> Atom
safeFirstAtom
mkChainResidues :: PDB.Chain -> V.Vector Residue
mkChainResidues :: Chain -> Vector Residue
mkChainResidues Chain
chain = [Residue] -> Vector Residue
forall a. [a] -> Vector a
V.fromList ([Residue] -> Vector Residue)
-> ([[Atom]] -> [Residue]) -> [[Atom]] -> Vector Residue
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Atom] -> Residue) -> [[Atom]] -> [Residue]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue (Chain -> Map Text (Vector (Bond LocalID))
restoreChainLocalBonds Chain
chain)) ([[Atom]] -> Vector Residue) -> [[Atom]] -> Vector Residue
forall a b. (a -> b) -> a -> b
$ Chain -> [[Atom]]
groupChainByResidue Chain
chain
safeFirstAtom :: V.Vector PDB.Atom -> PDB.Atom
safeFirstAtom :: Chain -> Atom
safeFirstAtom Chain
arr | Chain -> Int
forall a. Vector a -> Int
V.length Chain
arr Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 = Chain
arr Chain -> Int -> Atom
forall a. Vector a -> Int -> a
V.! Int
0
| Bool
otherwise = [Char] -> Atom
forall a. HasCallStack => [Char] -> a
error [Char]
"Could not pick first atom"
mkResidue :: Map Text (V.Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
mkResidue :: Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue Map Text (Vector (Bond LocalID))
_ [] = [Char] -> Residue
forall a. HasCallStack => [Char] -> a
error [Char]
"Cound not make residue from empty list"
mkResidue Map Text (Vector (Bond LocalID))
localBondsMap [Atom]
atoms' = Text
-> Int
-> Char
-> Vector Atom
-> Vector (Bond LocalID)
-> SecondaryStructure
-> Text
-> Residue
Residue (Text -> Text
T.strip (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Atom -> Text
PDB.atomResName Atom
firstResidueAtom)
(Atom -> Int
PDB.atomResSeq Atom
firstResidueAtom)
(Atom -> Char
PDB.atomICode Atom
firstResidueAtom)
([Atom] -> Vector Atom
forall a. [a] -> Vector a
V.fromList ([Atom] -> Vector Atom) -> [Atom] -> Vector Atom
forall a b. (a -> b) -> a -> b
$ Atom -> Atom
mkAtom (Atom -> Atom) -> [Atom] -> [Atom]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Atom]
atoms')
(Map Text (Vector (Bond LocalID))
localBondsMap Map Text (Vector (Bond LocalID)) -> Text -> Vector (Bond LocalID)
forall k a. Ord k => Map k a -> k -> a
M.! Atom -> Text
residueID Atom
firstResidueAtom)
SecondaryStructure
Undefined
Text
""
where
firstResidueAtom :: Atom
firstResidueAtom = [Atom] -> Atom
forall a. [a] -> a
head [Atom]
atoms'
mkAtom :: PDB.Atom -> Atom
mkAtom :: Atom -> Atom
mkAtom PDB.Atom{Char
Float
Int
Text
atomCharge :: Atom -> Text
atomElement :: Atom -> Text
atomTempFactor :: Atom -> Float
atomOccupancy :: Atom -> Float
atomZ :: Atom -> Float
atomY :: Atom -> Float
atomX :: Atom -> Float
atomAltLoc :: Atom -> Char
atomName :: Atom -> Text
atomCharge :: Text
atomElement :: Text
atomTempFactor :: Float
atomOccupancy :: Float
atomZ :: Float
atomY :: Float
atomX :: Float
atomICode :: Char
atomResSeq :: Int
atomChainID :: Char
atomResName :: Text
atomAltLoc :: Char
atomName :: Text
atomSerial :: Int
atomICode :: Atom -> Char
atomResSeq :: Atom -> Int
atomResName :: Atom -> Text
atomChainID :: Atom -> Char
atomSerial :: Atom -> Int
..} = GlobalID
-> Int -> Text -> Text -> V3 Float -> Int -> Float -> Float -> Atom
Atom (Int -> GlobalID
GlobalID (Int -> GlobalID) -> Int -> GlobalID
forall a b. (a -> b) -> a -> b
$ Map Int Int
atomSerialToNilBasedIndex Map Int Int -> Int -> Int
forall k a. Ord k => Map k a -> k -> a
M.! Int
atomSerial)
Int
atomSerial
(Text -> Text
T.strip Text
atomName)
Text
atomElement
(Float -> Float -> Float -> V3 Float
forall a. a -> a -> a -> V3 a
V3 Float
atomX Float
atomY Float
atomZ)
(Int -> RemarkCode -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
0 (RemarkCode -> Int) -> ([Char] -> RemarkCode) -> [Char] -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> RemarkCode
forall a. Read a => [Char] -> Maybe a
readMaybe ([Char] -> Int) -> [Char] -> Int
forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack Text
atomCharge)
Float
atomTempFactor
Float
atomOccupancy
modelsFromPDBFile :: (MonadIO m) => FilePath -> m (Either Text ([PDBWarnings], V.Vector Model))
modelsFromPDBFile :: [Char] -> m (Either Text ([PDBWarnings], Vector Model))
modelsFromPDBFile = IO (Either Text ([PDBWarnings], Vector Model))
-> m (Either Text ([PDBWarnings], Vector Model))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Either Text ([PDBWarnings], Vector Model))
-> m (Either Text ([PDBWarnings], Vector Model)))
-> ([Char] -> IO (Either Text ([PDBWarnings], Vector Model)))
-> [Char]
-> m (Either Text ([PDBWarnings], Vector Model))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Either Text ([PDBWarnings], Vector Model))
-> IO Text -> IO (Either Text ([PDBWarnings], Vector Model))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText (IO Text -> IO (Either Text ([PDBWarnings], Vector Model)))
-> ([Char] -> IO Text)
-> [Char]
-> IO (Either Text ([PDBWarnings], Vector Model))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IO Text
TIO.readFile
modelsFromPDBText :: Text -> Either Text ([PDBWarnings], V.Vector Model)
modelsFromPDBText :: Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText Text
pdbText = do
([PDBWarnings]
warnings, PDB
parsedPDB) <- Text -> Either Text ([PDBWarnings], PDB)
fromTextPDB Text
pdbText
let models :: Vector Model
models = PDB -> Vector Model
forall a. StructureModels a => a -> Vector Model
modelsOf PDB
parsedPDB
([PDBWarnings], Vector Model)
-> Either Text ([PDBWarnings], Vector Model)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([PDBWarnings]
warnings, Vector Model
models)
instance StructureSerializable PDB.PDB where
serializeModels :: Vector Model -> PDB
serializeModels Vector Model
models = Text
-> Vector Model
-> Map RemarkCode RemarkData
-> Map FieldType RemarkData
-> PDB
PDB.PDB Text
"Serialized model" Vector Model
pdbModels Map RemarkCode RemarkData
forall a. Monoid a => a
mempty Map FieldType RemarkData
forall a. Monoid a => a
mempty
where
pdbModels :: Vector Model
pdbModels = (Model -> Model) -> Vector Model -> Vector Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
toPDBModel Vector Model
models
toPDBModel :: Model -> PDB.Model
toPDBModel :: Model -> Model
toPDBModel = (Chain -> Chain) -> Vector Chain -> Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
toPDBChain (Vector Chain -> Model)
-> (Model -> Vector Chain) -> Model -> Model
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Model -> Vector Chain
modelChains
toPDBChain :: Chain -> PDB.Chain
toPDBChain :: Chain -> Chain
toPDBChain Chain
ch = ((Chain, Residue, Atom) -> Atom)
-> Vector (Chain, Residue, Atom) -> Chain
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Chain, Residue, Atom) -> Atom
toPDBAtom (Vector (Chain, Residue, Atom) -> Chain)
-> (Vector (Vector (Chain, Residue, Atom))
-> Vector (Chain, Residue, Atom))
-> Vector (Vector (Chain, Residue, Atom))
-> Chain
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Vector (Chain, Residue, Atom))
-> Vector (Chain, Residue, Atom)
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Vector (Vector (Chain, Residue, Atom)) -> Chain)
-> Vector (Vector (Chain, Residue, Atom)) -> Chain
forall a b. (a -> b) -> a -> b
$ (\Residue
r -> (Atom -> (Chain, Residue, Atom))
-> Vector Atom -> Vector (Chain, Residue, Atom)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((,,) Chain
ch Residue
r) (Vector Atom -> Vector (Chain, Residue, Atom))
-> Vector Atom -> Vector (Chain, Residue, Atom)
forall a b. (a -> b) -> a -> b
$ Residue -> Vector Atom
resAtoms Residue
r) (Residue -> Vector (Chain, Residue, Atom))
-> Vector Residue -> Vector (Vector (Chain, Residue, Atom))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Chain -> Vector Residue
chainResidues Chain
ch
toPDBAtom :: (Chain, Residue, Atom) -> PDB.Atom
toPDBAtom :: (Chain, Residue, Atom) -> Atom
toPDBAtom (Chain{Text
Vector Residue
chainName :: Chain -> Text
chainResidues :: Vector Residue
chainName :: Text
chainResidues :: Chain -> Vector Residue
..}, Residue{Char
Int
Text
Vector (Bond LocalID)
Vector Atom
SecondaryStructure
resChemCompType :: Residue -> Text
resSecondary :: Residue -> SecondaryStructure
resBonds :: Residue -> Vector (Bond LocalID)
resInsertionCode :: Residue -> Char
resNumber :: Residue -> Int
resName :: Residue -> Text
resChemCompType :: Text
resSecondary :: SecondaryStructure
resBonds :: Vector (Bond LocalID)
resAtoms :: Vector Atom
resInsertionCode :: Char
resNumber :: Int
resName :: Text
resAtoms :: Residue -> Vector Atom
..}, Atom{Float
Int
Text
V3 Float
GlobalID
occupancy :: Atom -> Float
bFactor :: Atom -> Float
formalCharge :: Atom -> Int
atomCoords :: Atom -> V3 Float
atomElement :: Atom -> Text
atomName :: Atom -> Text
atomInputIndex :: Atom -> Int
atomId :: Atom -> GlobalID
occupancy :: Float
bFactor :: Float
formalCharge :: Int
atomCoords :: V3 Float
atomElement :: Text
atomName :: Text
atomInputIndex :: Int
atomId :: GlobalID
..}) = Atom
res
where
res :: Atom
res =
Int
-> Text
-> Char
-> Text
-> Char
-> Int
-> Char
-> Float
-> Float
-> Float
-> Float
-> Float
-> Text
-> Text
-> Atom
PDB.Atom
(GlobalID -> Int
getGlobalID GlobalID
atomId Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
Text
atomName
Char
nullAltLoc
Text
resName
(Text -> Char
T.head Text
chainName)
Int
resNumber
Char
resInsertionCode
(V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R1 t => Lens' (t a) a
_x)
(V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R2 t => Lens' (t a) a
_y)
(V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R3 t => Lens' (t a) a
_z)
Float
occupancy
Float
bFactor
Text
atomElement
([Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
formalCharge)
nullAltLoc :: Char
nullAltLoc :: Char
nullAltLoc = Char
' '
modelsToPDBFile :: MonadIO m => Vector Model -> FilePath -> m ()
modelsToPDBFile :: Vector Model -> [Char] -> m ()
modelsToPDBFile Vector Model
models = PDB -> [Char] -> m ()
forall (m :: * -> *). MonadIO m => PDB -> [Char] -> m ()
pdbToFile (PDB -> [Char] -> m ()) -> PDB -> [Char] -> m ()
forall a b. (a -> b) -> a -> b
$ Vector Model -> PDB
forall a. StructureSerializable a => Vector Model -> a
serializeModels Vector Model
models
modelsToPDBText :: Vector Model -> Text
modelsToPDBText :: Vector Model -> Text
modelsToPDBText = PDB -> Text
pdbToText (PDB -> Text) -> (Vector Model -> PDB) -> Vector Model -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Model -> PDB
forall a. StructureSerializable a => Vector Model -> a
serializeModels