{-# OPTIONS_GHC -fno-warn-orphans #-}
module Bio.PDB
  ( modelsFromPDBText, modelsToPDBText
  , modelsFromPDBFile, modelsToPDBFile
  ) where

import           Bio.PDB.BondRestoring  (residueID, restoreChainLocalBonds,
                                         restoreModelGlobalBonds)
import           Bio.PDB.Functions      (groupChainByResidue)
import           Bio.PDB.Reader         (PDBWarnings, fromTextPDB)
import qualified Bio.PDB.Type           as PDB
import           Bio.PDB.Writer         (pdbToFile, pdbToText)
import           Bio.Structure
import           Control.Arrow          ((&&&))
import           Control.Lens           ((^.))
import           Control.Monad          (join)
import           Control.Monad.IO.Class (MonadIO, liftIO)
import           Data.List              (sort)
import           Data.Map               (Map)
import qualified Data.Map               as M (fromList, (!))
import           Data.Maybe             (fromMaybe)
import           Data.Text              (Text)
import qualified Data.Text              as T (head, pack, singleton, strip,
                                              unpack)
import           Data.Text.IO           as TIO (readFile)
import           Data.Vector            (Vector)
import qualified Data.Vector            as V
import           Linear.V3              (V3 (..), _x, _y, _z)
import           Text.Read              (readMaybe)

instance StructureModels PDB.PDB where
    modelsOf :: PDB -> Vector Model
modelsOf PDB.PDB {Text
Map RemarkCode RemarkData
Map FieldType RemarkData
Vector Model
otherFields :: PDB -> Map FieldType RemarkData
remarks :: PDB -> Map RemarkCode RemarkData
models :: PDB -> Vector Model
title :: PDB -> Text
otherFields :: Map FieldType RemarkData
remarks :: Map RemarkCode RemarkData
models :: Vector Model
title :: Text
..} = (Model -> Model) -> Vector Model -> Vector Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
mkModel Vector Model
models
      where
        mkModel :: PDB.Model -> Model
        mkModel :: Model -> Model
mkModel Model
model = Vector Chain -> Vector (Bond GlobalID) -> Model
Model ((Chain -> Chain) -> Model -> Vector Chain
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
mkChain Model
model) (Map Int Int -> Model -> Vector (Bond GlobalID)
restoreModelGlobalBonds Map Int Int
atomSerialToNilBasedIndex Model
model)
          where
            atomSerialToNilBasedIndex :: Map Int Int
            atomSerialToNilBasedIndex :: Map Int Int
atomSerialToNilBasedIndex = [(Int, Int)] -> Map Int Int
forall k a. Ord k => [(k, a)] -> Map k a
M.fromList ([(Int, Int)] -> Map Int Int) -> [(Int, Int)] -> Map Int Int
forall a b. (a -> b) -> a -> b
$ [Int]
allModelAtomSerials [Int] -> [Int] -> [(Int, Int)]
forall a b. [a] -> [b] -> [(a, b)]
`zip` [Int
0..]

            allModelAtomSerials :: [Int]
            allModelAtomSerials :: [Int]
allModelAtomSerials = [Int] -> [Int]
forall a. Ord a => [a] -> [a]
sort ([Int] -> [Int]) -> ([Chain] -> [Int]) -> [Chain] -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Int -> [Int]
forall a. Vector a -> [a]
V.toList (Vector Int -> [Int])
-> ([Chain] -> Vector Int) -> [Chain] -> [Int]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Atom -> Int) -> Chain -> Vector Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Atom -> Int
PDB.atomSerial (Chain -> Vector Int)
-> ([Chain] -> Chain) -> [Chain] -> Vector Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Chain] -> Chain
forall a. [Vector a] -> Vector a
V.concat ([Chain] -> [Int]) -> [Chain] -> [Int]
forall a b. (a -> b) -> a -> b
$ Model -> [Chain]
forall a. Vector a -> [a]
V.toList Model
model

            mkChain :: PDB.Chain -> Chain
            mkChain :: Chain -> Chain
mkChain = (Text -> Vector Residue -> Chain)
-> (Text, Vector Residue) -> Chain
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Text -> Vector Residue -> Chain
Chain ((Text, Vector Residue) -> Chain)
-> (Chain -> (Text, Vector Residue)) -> Chain -> Chain
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Chain -> Text
mkChainName (Chain -> Text)
-> (Chain -> Vector Residue) -> Chain -> (Text, Vector Residue)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& Chain -> Vector Residue
mkChainResidues)

            mkChainName :: PDB.Chain -> Text
            mkChainName :: Chain -> Text
mkChainName = Char -> Text
T.singleton (Char -> Text) -> (Chain -> Char) -> Chain -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Atom -> Char
PDB.atomChainID (Atom -> Char) -> (Chain -> Atom) -> Chain -> Char
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Chain -> Atom
safeFirstAtom

            mkChainResidues :: PDB.Chain -> V.Vector Residue
            mkChainResidues :: Chain -> Vector Residue
mkChainResidues Chain
chain = [Residue] -> Vector Residue
forall a. [a] -> Vector a
V.fromList ([Residue] -> Vector Residue)
-> ([[Atom]] -> [Residue]) -> [[Atom]] -> Vector Residue
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Atom] -> Residue) -> [[Atom]] -> [Residue]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue (Chain -> Map Text (Vector (Bond LocalID))
restoreChainLocalBonds Chain
chain)) ([[Atom]] -> Vector Residue) -> [[Atom]] -> Vector Residue
forall a b. (a -> b) -> a -> b
$ Chain -> [[Atom]]
groupChainByResidue Chain
chain

            safeFirstAtom :: V.Vector PDB.Atom -> PDB.Atom
            safeFirstAtom :: Chain -> Atom
safeFirstAtom Chain
arr | Chain -> Int
forall a. Vector a -> Int
V.length Chain
arr Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 = Chain
arr Chain -> Int -> Atom
forall a. Vector a -> Int -> a
V.! Int
0
                              | Bool
otherwise        = [Char] -> Atom
forall a. HasCallStack => [Char] -> a
error [Char]
"Could not pick first atom"

            mkResidue :: Map Text (V.Vector (Bond LocalID)) -> [PDB.Atom] -> Residue
            mkResidue :: Map Text (Vector (Bond LocalID)) -> [Atom] -> Residue
mkResidue Map Text (Vector (Bond LocalID))
_ []    = [Char] -> Residue
forall a. HasCallStack => [Char] -> a
error [Char]
"Cound not make residue from empty list"
            mkResidue Map Text (Vector (Bond LocalID))
localBondsMap [Atom]
atoms' = Text
-> Int
-> Char
-> Vector Atom
-> Vector (Bond LocalID)
-> SecondaryStructure
-> Text
-> Residue
Residue (Text -> Text
T.strip (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Atom -> Text
PDB.atomResName Atom
firstResidueAtom)
                                                     (Atom -> Int
PDB.atomResSeq Atom
firstResidueAtom)
                                                     (Atom -> Char
PDB.atomICode Atom
firstResidueAtom)
                                                     ([Atom] -> Vector Atom
forall a. [a] -> Vector a
V.fromList ([Atom] -> Vector Atom) -> [Atom] -> Vector Atom
forall a b. (a -> b) -> a -> b
$ Atom -> Atom
mkAtom (Atom -> Atom) -> [Atom] -> [Atom]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Atom]
atoms')
                                                     (Map Text (Vector (Bond LocalID))
localBondsMap Map Text (Vector (Bond LocalID)) -> Text -> Vector (Bond LocalID)
forall k a. Ord k => Map k a -> k -> a
M.! Atom -> Text
residueID Atom
firstResidueAtom)
                                                     SecondaryStructure
Undefined -- now we do not read secondary structure
                                                     Text
""        -- chemical component type?!
              where
                firstResidueAtom :: Atom
firstResidueAtom = [Atom] -> Atom
forall a. [a] -> a
head [Atom]
atoms'

            mkAtom :: PDB.Atom -> Atom
            mkAtom :: Atom -> Atom
mkAtom PDB.Atom{Char
Float
Int
Text
atomCharge :: Atom -> Text
atomElement :: Atom -> Text
atomTempFactor :: Atom -> Float
atomOccupancy :: Atom -> Float
atomZ :: Atom -> Float
atomY :: Atom -> Float
atomX :: Atom -> Float
atomAltLoc :: Atom -> Char
atomName :: Atom -> Text
atomCharge :: Text
atomElement :: Text
atomTempFactor :: Float
atomOccupancy :: Float
atomZ :: Float
atomY :: Float
atomX :: Float
atomICode :: Char
atomResSeq :: Int
atomChainID :: Char
atomResName :: Text
atomAltLoc :: Char
atomName :: Text
atomSerial :: Int
atomICode :: Atom -> Char
atomResSeq :: Atom -> Int
atomResName :: Atom -> Text
atomChainID :: Atom -> Char
atomSerial :: Atom -> Int
..} = GlobalID
-> Int -> Text -> Text -> V3 Float -> Int -> Float -> Float -> Atom
Atom (Int -> GlobalID
GlobalID (Int -> GlobalID) -> Int -> GlobalID
forall a b. (a -> b) -> a -> b
$ Map Int Int
atomSerialToNilBasedIndex Map Int Int -> Int -> Int
forall k a. Ord k => Map k a -> k -> a
M.! Int
atomSerial)
                                       Int
atomSerial
                                       (Text -> Text
T.strip Text
atomName)
                                       Text
atomElement
                                       (Float -> Float -> Float -> V3 Float
forall a. a -> a -> a -> V3 a
V3 Float
atomX Float
atomY Float
atomZ)
                                       (Int -> RemarkCode -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
0 (RemarkCode -> Int) -> ([Char] -> RemarkCode) -> [Char] -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> RemarkCode
forall a. Read a => [Char] -> Maybe a
readMaybe ([Char] -> Int) -> [Char] -> Int
forall a b. (a -> b) -> a -> b
$ Text -> [Char]
T.unpack Text
atomCharge)
                                       Float
atomTempFactor
                                       Float
atomOccupancy

modelsFromPDBFile :: (MonadIO m) => FilePath -> m (Either Text ([PDBWarnings], V.Vector Model))
modelsFromPDBFile :: [Char] -> m (Either Text ([PDBWarnings], Vector Model))
modelsFromPDBFile = IO (Either Text ([PDBWarnings], Vector Model))
-> m (Either Text ([PDBWarnings], Vector Model))
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Either Text ([PDBWarnings], Vector Model))
 -> m (Either Text ([PDBWarnings], Vector Model)))
-> ([Char] -> IO (Either Text ([PDBWarnings], Vector Model)))
-> [Char]
-> m (Either Text ([PDBWarnings], Vector Model))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Either Text ([PDBWarnings], Vector Model))
-> IO Text -> IO (Either Text ([PDBWarnings], Vector Model))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText (IO Text -> IO (Either Text ([PDBWarnings], Vector Model)))
-> ([Char] -> IO Text)
-> [Char]
-> IO (Either Text ([PDBWarnings], Vector Model))
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Char] -> IO Text
TIO.readFile

modelsFromPDBText :: Text -> Either Text ([PDBWarnings], V.Vector Model)
modelsFromPDBText :: Text -> Either Text ([PDBWarnings], Vector Model)
modelsFromPDBText Text
pdbText = do
  ([PDBWarnings]
warnings, PDB
parsedPDB) <- Text -> Either Text ([PDBWarnings], PDB)
fromTextPDB Text
pdbText
  let models :: Vector Model
models = PDB -> Vector Model
forall a. StructureModels a => a -> Vector Model
modelsOf PDB
parsedPDB
  ([PDBWarnings], Vector Model)
-> Either Text ([PDBWarnings], Vector Model)
forall (f :: * -> *) a. Applicative f => a -> f a
pure ([PDBWarnings]
warnings, Vector Model
models)

instance StructureSerializable PDB.PDB where
  serializeModels :: Vector Model -> PDB
serializeModels Vector Model
models = Text
-> Vector Model
-> Map RemarkCode RemarkData
-> Map FieldType RemarkData
-> PDB
PDB.PDB Text
"Serialized model" Vector Model
pdbModels Map RemarkCode RemarkData
forall a. Monoid a => a
mempty Map FieldType RemarkData
forall a. Monoid a => a
mempty
    where
      pdbModels :: Vector Model
pdbModels = (Model -> Model) -> Vector Model -> Vector Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Model -> Model
toPDBModel Vector Model
models

      toPDBModel :: Model -> PDB.Model
      toPDBModel :: Model -> Model
toPDBModel = (Chain -> Chain) -> Vector Chain -> Model
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Chain -> Chain
toPDBChain (Vector Chain -> Model)
-> (Model -> Vector Chain) -> Model -> Model
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Model -> Vector Chain
modelChains

      toPDBChain :: Chain -> PDB.Chain
      toPDBChain :: Chain -> Chain
toPDBChain Chain
ch = ((Chain, Residue, Atom) -> Atom)
-> Vector (Chain, Residue, Atom) -> Chain
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Chain, Residue, Atom) -> Atom
toPDBAtom (Vector (Chain, Residue, Atom) -> Chain)
-> (Vector (Vector (Chain, Residue, Atom))
    -> Vector (Chain, Residue, Atom))
-> Vector (Vector (Chain, Residue, Atom))
-> Chain
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector (Vector (Chain, Residue, Atom))
-> Vector (Chain, Residue, Atom)
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Vector (Vector (Chain, Residue, Atom)) -> Chain)
-> Vector (Vector (Chain, Residue, Atom)) -> Chain
forall a b. (a -> b) -> a -> b
$ (\Residue
r -> (Atom -> (Chain, Residue, Atom))
-> Vector Atom -> Vector (Chain, Residue, Atom)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((,,) Chain
ch Residue
r) (Vector Atom -> Vector (Chain, Residue, Atom))
-> Vector Atom -> Vector (Chain, Residue, Atom)
forall a b. (a -> b) -> a -> b
$ Residue -> Vector Atom
resAtoms Residue
r) (Residue -> Vector (Chain, Residue, Atom))
-> Vector Residue -> Vector (Vector (Chain, Residue, Atom))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Chain -> Vector Residue
chainResidues Chain
ch

      toPDBAtom :: (Chain, Residue, Atom) -> PDB.Atom
      toPDBAtom :: (Chain, Residue, Atom) -> Atom
toPDBAtom (Chain{Text
Vector Residue
chainName :: Chain -> Text
chainResidues :: Vector Residue
chainName :: Text
chainResidues :: Chain -> Vector Residue
..}, Residue{Char
Int
Text
Vector (Bond LocalID)
Vector Atom
SecondaryStructure
resChemCompType :: Residue -> Text
resSecondary :: Residue -> SecondaryStructure
resBonds :: Residue -> Vector (Bond LocalID)
resInsertionCode :: Residue -> Char
resNumber :: Residue -> Int
resName :: Residue -> Text
resChemCompType :: Text
resSecondary :: SecondaryStructure
resBonds :: Vector (Bond LocalID)
resAtoms :: Vector Atom
resInsertionCode :: Char
resNumber :: Int
resName :: Text
resAtoms :: Residue -> Vector Atom
..}, Atom{Float
Int
Text
V3 Float
GlobalID
occupancy :: Atom -> Float
bFactor :: Atom -> Float
formalCharge :: Atom -> Int
atomCoords :: Atom -> V3 Float
atomElement :: Atom -> Text
atomName :: Atom -> Text
atomInputIndex :: Atom -> Int
atomId :: Atom -> GlobalID
occupancy :: Float
bFactor :: Float
formalCharge :: Int
atomCoords :: V3 Float
atomElement :: Text
atomName :: Text
atomInputIndex :: Int
atomId :: GlobalID
..}) = Atom
res
        where
          res :: Atom
res =
            Int
-> Text
-> Char
-> Text
-> Char
-> Int
-> Char
-> Float
-> Float
-> Float
-> Float
-> Float
-> Text
-> Text
-> Atom
PDB.Atom
              (GlobalID -> Int
getGlobalID GlobalID
atomId Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Int
1)
              Text
atomName
              Char
nullAltLoc
              Text
resName
              (Text -> Char
T.head Text
chainName)
              Int
resNumber
              Char
resInsertionCode
              (V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R1 t => Lens' (t a) a
_x)
              (V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R2 t => Lens' (t a) a
_y)
              (V3 Float
atomCoords V3 Float -> Getting Float (V3 Float) Float -> Float
forall s a. s -> Getting a s a -> a
^. Getting Float (V3 Float) Float
forall (t :: * -> *) a. R3 t => Lens' (t a) a
_z)
              Float
occupancy
              Float
bFactor
              Text
atomElement
              ([Char] -> Text
T.pack ([Char] -> Text) -> [Char] -> Text
forall a b. (a -> b) -> a -> b
$ Int -> [Char]
forall a. Show a => a -> [Char]
show Int
formalCharge)

          nullAltLoc :: Char
          nullAltLoc :: Char
nullAltLoc = Char
' '

-- | Writes models to the given path as PDB.
--
modelsToPDBFile :: MonadIO m => Vector Model -> FilePath -> m ()
modelsToPDBFile :: Vector Model -> [Char] -> m ()
modelsToPDBFile Vector Model
models = PDB -> [Char] -> m ()
forall (m :: * -> *). MonadIO m => PDB -> [Char] -> m ()
pdbToFile (PDB -> [Char] -> m ()) -> PDB -> [Char] -> m ()
forall a b. (a -> b) -> a -> b
$ Vector Model -> PDB
forall a. StructureSerializable a => Vector Model -> a
serializeModels Vector Model
models

-- | Converts models to their 'Text' representation as PDB.
--
modelsToPDBText :: Vector Model -> Text
modelsToPDBText :: Vector Model -> Text
modelsToPDBText = PDB -> Text
pdbToText (PDB -> Text) -> (Vector Model -> PDB) -> Vector Model -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Vector Model -> PDB
forall a. StructureSerializable a => Vector Model -> a
serializeModels