{-# OPTIONS_GHC -fno-warn-orphans #-}
module Bio.PDB
  ( modelsFromPDBText, modelsToPDBText
  , modelsFromPDBFile, modelsToPDBFile
  ) where

import           Bio.PDB.BondRestoring  (residueID,
                                         restoreChainLocalBonds,
                                         restoreModelGlobalBonds)
import           Bio.PDB.Functions      (groupChainByResidue)
import           Bio.PDB.Reader         (PDBWarnings, fromTextPDB)
import qualified Bio.PDB.Type           as PDB
import           Bio.PDB.Writer         (pdbToFile, pdbToText)
import           Bio.Structure         (Residue(..),
                                        Bond,
                                        Atom(..),
                                        LocalID,
                                        GlobalID(GlobalID, getGlobalID),
                                        SecondaryStructure(Undefined),
                                        Chain(..),
                                        Model(Model, modelChains),
                                        StructureSerializable(..),
                                        StructureModels(..))
import qualified Bio.Utils.Map          as M ((!?!))
import qualified Bio.Utils.Vector       as V ((!?!))
import           Control.Arrow          ((&&&))
import           Control.Lens           ((^.))
import           Control.Monad          (join)
import           Control.Monad.IO.Class (MonadIO, liftIO)
import           Data.List              (sort)
import           Data.Map.Strict        (Map)
import qualified Data.Map.Strict        as M (fromList)
import           Data.Maybe             (fromMaybe)
import           Data.Text              (Text)
import qualified Data.Text              as T (head, pack, singleton, strip,
                                              unpack)
import           Data.Text.IO           as TIO (readFile)
import           Data.Vector            (Vector)
import qualified Data.Vector            as V (toList,
                                              concat,
                                              fromList,
                                              length)
import           Linear.V3              (V3 (..), _x, _y, _z)
import           Text.Read              (readMaybe)

instance StructureModels PDB.PDB where
    modelsOf :: PDB -> Vector Model
modelsOf PDB.PDB {Text
Map RemarkCode RemarkData
Map FieldType RemarkData
Vector Model
otherFields :: PDB -> Map FieldType RemarkData
remarks :: PDB -> Map RemarkCode RemarkData
models :: PDB -> Vector Model
title :: PDB -> Text
otherFields :: Map FieldType RemarkData
remarks :: Map RemarkCode RemarkData
models :: Vector Model
title :: Text
..} = (Model -> Model) -> Vector Model -> Vector Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
mkModel Vector Model
models
      where
        mkModel :: PDB.Model -> Model
        mkModel :: Model -> Model
mkModel Model
model = if Map Atom Int -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length Map Atom Int
atomToNilBasedIndex Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== [Atom] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [Atom]
allModelAtoms
                          then Vector Chain -> Vector (Bond GlobalID) -> Model
Model ((Chain -> Chain) -> Model -> Vector Chain
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
mkChain Model
model) (Map Atom Int -> Model -> Vector (Bond GlobalID)
restoreModelGlobalBonds Map Atom Int
atomToNilBasedIndex Model
model)
                          else [Char] -> Model
forall a. HasCallStack => [Char] -> a
error [Char]
"Mapping from PDB id to nil based index must be a bijection."
          where
            atomToNilBasedIndex :: Map PDB.Atom Int
            atomToNilBasedIndex :: Map Atom Int
atomToNilBasedIndex = [(Atom, Int)] -> Map Atom Int
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Atom, Int)] -> Map Atom Int) -> [(Atom, Int)] -> Map Atom Int
forall a b. (a -> b) -> a -> b
$ [Atom]
allModelAtoms [Atom] -> [Int] -> [(Atom, Int)]
forall a b. [a] -> [b] -> [(a, b)]
`zip` [Int
0..]

            allModelAtoms :: [PDB.Atom]
            allModelAtoms :: [Atom]
allModelAtoms = [Atom] -> [Atom]
forall a. Ord a => [a] -> [a]
sort ([Atom] -> [Atom]) -> ([Chain] -> [Atom]) -> [Chain] -> [Atom]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Chain -> [Atom]
forall a. Vector a -> [a]
V.toList (Chain -> [Atom]) -> ([Chain] -> Chain) -> [Chain] -> [Atom]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Chain] -> Chain
forall a. [Vector a] -> Vector a
V.concat ([Chain] -> [Atom]) -> [Chain] -> [Atom]
forall a b. (a -> b) -> a -> b
$ Model -> [Chain]
forall a. Vector a -> [a]
V.toList Model
model

            mkChain :: PDB.Chain -> Chain
            mkChain :: Chain -> Chain
mkChain = (Text -> Vector Residue -> Chain)
-> (Text, Vector Residue) -> Chain
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> Vector Residue -> Chain
Chain ((Text, Vector Residue) -> Chain)
-> (Chain -> (Text, Vector Residue)) -> Chain -> Chain
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Chain -> Text
mkChainName (Chain -> Text)
-> (Chain -> Vector Residue) -> Chain -> (Text, Vector Residue)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& Chain -> Vector Residue
mkChainResidues)

            mkChainName :: PDB.Chain -> Text
            mkChainName :: Chain -> Text
mkChainName = Char -> Text
T.singleton (Char -> Text) -> (Chain -> Char) -> Chain -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Atom -> Char
PDB.atomChainID (Atom -> Char) -> (Chain -> Atom) -> Chain -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Chain -> Atom
safeFirstAtom

            mkChainResidues :: PDB.Chain -> Vector Residue
            mkChainResidues :: Chain -> Vector Residue
mkChainResidues Chain
chain = [Residue] -> Vector Residue
forall a. [a] -> Vector a
V.fromList ([Residue] -> Vector Residue)
-> ([[Atom]] -> [Residue]) -> [[Atom]] -> Vector Residue
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Atom] -> Residue) -> [[Atom]] -> [Residue]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue (Chain -> Map Text (Vector (Bond LocalID))
restoreChainLocalBonds Chain
chain)) ([[Atom]] -> Vector Residue) -> [[Atom]] -> Vector Residue
forall a b. (a -> b) -> a -> b
$ Chain -> [[Atom]]
groupChainByResidue Chain
chain

            safeFirstAtom :: Vector PDB.Atom -> PDB.Atom
            safeFirstAtom :: Chain -> Atom
safeFirstAtom Chain
arr | Chain -> Int
forall a. Vector a -> Int
V.length Chain
arr Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 = Chain
arr Chain -> Int -> Atom
forall a. (HasCallStack, Show a) => Vector a -> Int -> a
V.!?! Int
0
                              | Bool
otherwise        = [Char] -> Atom
forall a. HasCallStack => [Char] -> a
error [Char]
"Could not pick first atom"

            mkResidue :: Map Text (Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
            mkResidue :: Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue Map Text (Vector (Bond LocalID))
_ []    = [Char] -> Residue
forall a. HasCallStack => [Char] -> a
error [Char]
"Cound not make residue from empty list"
            mkResidue Map Text (Vector (Bond LocalID))
localBondsMap [Atom]
atoms' = Text
-> Int
-> Char
-> Vector Atom
-> Vector (Bond LocalID)
-> SecondaryStructure
-> Text
-> Residue
Residue (Text -> Text
T.strip (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Atom -> Text
PDB.atomResName Atom
firstResidueAtom)
                                                     (Atom -> Int
PDB.atomResSeq Atom
firstResidueAtom)
                                                     (Atom -> Char
PDB.atomICode Atom
firstResidueAtom)
                                                     ([Atom] -> Vector Atom
forall a. [a] -> Vector a
V.fromList ([Atom] -> Vector Atom) -> [Atom] -> Vector Atom
forall a b. (a -> b) -> a -> b
$ Atom -> Atom
mkAtom (Atom -> Atom) -> [Atom] -> [Atom]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Atom]
atoms')
                                                     (Map Text (Vector (Bond LocalID))
localBondsMap Map Text (Vector (Bond LocalID)) -> Text -> Vector (Bond LocalID)
forall k a.
(HasCallStack, Ord k, Show k, Show a) =>
Map k a -> k -> a
M.!?! Atom -> Text
residueID Atom
firstResidueAtom)
                                                     SecondaryStructure
Undefined -- now we do not read secondary structure
                                                     Text
""        -- chemical component type?!
              where
                firstResidueAtom :: Atom
firstResidueAtom = [Atom] -> Atom
forall a. [a] -> a
head [Atom]
atoms'

            mkAtom :: PDB.Atom -> Atom
            mkAtom :: Atom -> Atom
mkAtom atom :: Atom
atom@PDB.Atom{Char
Float
Int
Text
atomCharge :: Atom -> Text
atomElement :: Atom -> Text
atomTempFactor :: Atom -> Float
atomOccupancy :: Atom -> Float
atomZ :: Atom -> Float
atomY :: Atom -> Float
atomX :: Atom -> Float
atomAltLoc :: Atom -> Char
atomName :: Atom -> Text
atomSerial :: Atom -> Int
atomCharge :: Text
atomElement :: Text
atomTempFactor :: Float
atomOccupancy :: Float
atomZ :: Float
atomY :: Float
atomX :: Float
atomICode :: Char
atomResSeq :: Int
atomChainID :: Char
atomResName :: Text
atomAltLoc :: Char
atomName :: Text
atomSerial :: Int
atomICode :: Atom -> Char
atomResSeq :: Atom -> Int
atomResName :: Atom -> Text
atomChainID :: Atom -> Char
..} = GlobalID
-> Int -> Text -> Text -> V3 Float -> Int -> Float -> Float -> Atom
Atom (Int -> GlobalID
GlobalID (Int -> GlobalID) -> Int -> GlobalID
forall a b. (a -> b) -> a -> b
$ Map Atom Int
atomToNilBasedIndex Map Atom Int -> Atom -> Int
forall k a.
(HasCallStack, Ord k, Show k, Show a) =>
Map k a -> k -> a
M.!?! Atom
atom)
                                       Int
atomSerial
                                       (Text -> Text
T.strip Text
atomName)
                                       Text
atomElement
                                       (Float -> Float -> Float -> V3 Float
forall a. a -> a -> a -> V3 a
V3 Float
atomX Float
atomY Float
atomZ)
                                       (Int -> RemarkCode -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
0 (RemarkCode -> Int) -> ([Char] -> RemarkCode) -> [Char] -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> RemarkCode
forall a. Read a => [Char] -> Maybe a
readMaybe ([Char] -> Int) -> [Char] -> Int
forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack Text
atomCharge)
                                       Float
atomTempFactor
                                       Float
atomOccupancy

modelsFromPDBFile :: (MonadIO m) => FilePath -> m (Either Text ([PDBWarnings], Vector Model))
modelsFromPDBFile :: [Char] -> m (Either Text ([PDBWarnings], Vector Model))
modelsFromPDBFile = IO (Either Text ([PDBWarnings], Vector Model))
-> m (Either Text ([PDBWarnings], Vector Model))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Either Text ([PDBWarnings], Vector Model))
 -> m (Either Text ([PDBWarnings], Vector Model)))
-> ([Char] -> IO (Either Text ([PDBWarnings], Vector Model)))
-> [Char]
-> m (Either Text ([PDBWarnings], Vector Model))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Either Text ([PDBWarnings], Vector Model))
-> IO Text -> IO (Either Text ([PDBWarnings], Vector Model))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText (IO Text -> IO (Either Text ([PDBWarnings], Vector Model)))
-> ([Char] -> IO Text)
-> [Char]
-> IO (Either Text ([PDBWarnings], Vector Model))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IO Text
TIO.readFile

modelsFromPDBText :: Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText :: Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText Text
pdbText = do
  ([PDBWarnings]
warnings, PDB
parsedPDB) <- Text -> Either Text ([PDBWarnings], PDB)
fromTextPDB Text
pdbText
  let models :: Vector Model
models = PDB -> Vector Model
forall a. StructureModels a => a -> Vector Model
modelsOf PDB
parsedPDB
  ([PDBWarnings], Vector Model)
-> Either Text ([PDBWarnings], Vector Model)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([PDBWarnings]
warnings, Vector Model
models)

instance StructureSerializable PDB.PDB where
  serializeModels :: Vector Model -> PDB
serializeModels Vector Model
models = Text
-> Vector Model
-> Map RemarkCode RemarkData
-> Map FieldType RemarkData
-> PDB
PDB.PDB Text
"Serialized model" Vector Model
pdbModels Map RemarkCode RemarkData
forall a. Monoid a => a
mempty Map FieldType RemarkData
forall a. Monoid a => a
mempty
    where
      pdbModels :: Vector Model
pdbModels = (Model -> Model) -> Vector Model -> Vector Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
toPDBModel Vector Model
models

      toPDBModel :: Model -> PDB.Model
      toPDBModel :: Model -> Model
toPDBModel = (Chain -> Chain) -> Vector Chain -> Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
toPDBChain (Vector Chain -> Model)
-> (Model -> Vector Chain) -> Model -> Model
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Model -> Vector Chain
modelChains

      toPDBChain :: Chain -> PDB.Chain
      toPDBChain :: Chain -> Chain
toPDBChain Chain
ch = ((Chain, Residue, Atom) -> Atom)
-> Vector (Chain, Residue, Atom) -> Chain
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Chain, Residue, Atom) -> Atom
toPDBAtom (Vector (Chain, Residue, Atom) -> Chain)
-> (Vector (Vector (Chain, Residue, Atom))
    -> Vector (Chain, Residue, Atom))
-> Vector (Vector (Chain, Residue, Atom))
-> Chain
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Vector (Chain, Residue, Atom))
-> Vector (Chain, Residue, Atom)
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Vector (Vector (Chain, Residue, Atom)) -> Chain)
-> Vector (Vector (Chain, Residue, Atom)) -> Chain
forall a b. (a -> b) -> a -> b
$ (\Residue
r -> (Atom -> (Chain, Residue, Atom))
-> Vector Atom -> Vector (Chain, Residue, Atom)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((,,) Chain
ch Residue
r) (Vector Atom -> Vector (Chain, Residue, Atom))
-> Vector Atom -> Vector (Chain, Residue, Atom)
forall a b. (a -> b) -> a -> b
$ Residue -> Vector Atom
resAtoms Residue
r) (Residue -> Vector (Chain, Residue, Atom))
-> Vector Residue -> Vector (Vector (Chain, Residue, Atom))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Chain -> Vector Residue
chainResidues Chain
ch

      toPDBAtom :: (Chain, Residue, Atom) -> PDB.Atom
      toPDBAtom :: (Chain, Residue, Atom) -> Atom
toPDBAtom (Chain{Text
Vector Residue
chainName :: Chain -> Text
chainResidues :: Vector Residue
chainName :: Text
chainResidues :: Chain -> Vector Residue
..}, Residue{Char
Int
Text
Vector (Bond LocalID)
Vector Atom
SecondaryStructure
resChemCompType :: Residue -> Text
resSecondary :: Residue -> SecondaryStructure
resBonds :: Residue -> Vector (Bond LocalID)
resInsertionCode :: Residue -> Char
resNumber :: Residue -> Int
resName :: Residue -> Text
resChemCompType :: Text
resSecondary :: SecondaryStructure
resBonds :: Vector (Bond LocalID)
resAtoms :: Vector Atom
resInsertionCode :: Char
resNumber :: Int
resName :: Text
resAtoms :: Residue -> Vector Atom
..}, Atom{Float
Int
Text
V3 Float
GlobalID
occupancy :: Atom -> Float
bFactor :: Atom -> Float
formalCharge :: Atom -> Int
atomCoords :: Atom -> V3 Float
atomElement :: Atom -> Text
atomName :: Atom -> Text
atomInputIndex :: Atom -> Int
atomId :: Atom -> GlobalID
occupancy :: Float
bFactor :: Float
formalCharge :: Int
atomCoords :: V3 Float
atomElement :: Text
atomName :: Text
atomInputIndex :: Int
atomId :: GlobalID
..}) = Atom
res
        where
          res :: Atom
res =
            Int
-> Text
-> Char
-> Text
-> Char
-> Int
-> Char
-> Float
-> Float
-> Float
-> Float
-> Float
-> Text
-> Text
-> Atom
PDB.Atom
              (GlobalID -> Int
getGlobalID GlobalID
atomId Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
              Text
atomName
              Char
nullAltLoc
              Text
resName
              (Text -> Char
T.head Text
chainName)
              Int
resNumber
              Char
resInsertionCode
              (V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R1 t => Lens' (t a) a
_x)
              (V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R2 t => Lens' (t a) a
_y)
              (V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R3 t => Lens' (t a) a
_z)
              Float
occupancy
              Float
bFactor
              Text
atomElement
              ([Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
formalCharge)

          nullAltLoc :: Char
          nullAltLoc :: Char
nullAltLoc = Char
' '

-- | Writes models to the given path as PDB.
--
modelsToPDBFile :: MonadIO m => Vector Model -> FilePath -> m ()
modelsToPDBFile :: Vector Model -> [Char] -> m ()
modelsToPDBFile Vector Model
models = PDB -> [Char] -> m ()
forall (m :: * -> *). MonadIO m => PDB -> [Char] -> m ()
pdbToFile (PDB -> [Char] -> m ()) -> PDB -> [Char] -> m ()
forall a b. (a -> b) -> a -> b
$ Vector Model -> PDB
forall a. StructureSerializable a => Vector Model -> a
serializeModels Vector Model
models

-- | Converts models to their 'Text' representation as PDB.
--
modelsToPDBText :: Vector Model -> Text
modelsToPDBText :: Vector Model -> Text
modelsToPDBText = PDB -> Text
pdbToText (PDB -> Text) -> (Vector Model -> PDB) -> Vector Model -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Model -> PDB
forall a. StructureSerializable a => Vector Model -> a
serializeModels