{-# LANGUAGE AllowAmbiguousTypes #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Bio.MAE
( Block (..), FromMaeValue (..)
, Mae (..), MaeValue (..)
, Table (..)
, fromFile
, fromText
, maeP
) where
import Bio.MAE.Parser
import Bio.MAE.Type (Block (..), FromMaeValue (..),
Mae (..), MaeValue (..), Table (..))
import Bio.Structure (Atom (..), Bond (..), Chain (..),
GlobalID (..), LocalID (..),
Model (..), Residue (..),
SecondaryStructure (..),
StructureModels (..))
import Control.Monad (join)
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.Attoparsec.Text (parseOnly)
import Data.Bifunctor (bimap, first)
import Data.Function (on)
import qualified Data.List as L (find, groupBy, sortOn)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList, lookup, (!))
import Data.Maybe (catMaybes, fromJust)
import Data.Text (Text)
import qualified Data.Text as T (head, init, last, null, pack,
strip, tail)
import qualified Data.Text.IO as TIO (readFile)
import Data.Vector (Vector)
import qualified Data.Vector as V (fromList)
import Linear.V3 (V3 (..))
fromFile :: MonadIO m => FilePath -> m Mae
fromFile f = liftIO (TIO.readFile f) >>= either fail pure . parseOnly maeP
fromText :: Text -> Either Text Mae
fromText = first T.pack . parseOnly maeP
instance StructureModels Mae where
modelsOf Mae{..} = V.fromList $ fmap blockToModel blocks
where
unsafeGetFromContentsMap :: FromMaeValue a => Map Text [MaeValue] -> Text -> Int -> a
unsafeGetFromContentsMap m name i = unsafeFromMaeValue $ (m M.! name) !! i
getFromContentsMap :: FromMaeValue a => Map Text [MaeValue] -> Text -> Int -> Maybe a
getFromContentsMap m name i = join $ fromMaeValue . (!! i) <$> name `M.lookup` m
blockToModel :: Block -> Model
blockToModel Block{..} = Model (atomsTableToChains atomsTable) bonds
where
atomsTable = findTable "m_atom"
numberOfAtoms = length $ atomsTable M.! "r_m_x_coord"
bondsTable = findTable "m_bond"
(bondGraph, bonds) = bondsTableToGlobalBonds bondsTable
findTable :: Text -> Map Text [MaeValue]
findTable name = contents $ fromJust $ L.find ((== name) . tableName) tables
stripQuotes :: Text -> Text
stripQuotes t | not (T.null t) && T.head t == T.last t, T.last t == '\"' = T.strip $ T.init $ T.tail t
| otherwise = T.strip t
toGroupsOn :: (Eq b, Ord b) => (a -> b) -> [a] -> [[a]]
toGroupsOn f = L.groupBy ((==) `on` f) . L.sortOn f
bondsTableToGlobalBonds :: Map Text [MaeValue] -> (Map Int [(Int, Int)], Vector (Bond GlobalID))
bondsTableToGlobalBonds m = bimap toMap V.fromList bonds'
where
numberOfBonds = length $ m M.! "i_m_from"
bonds' = unzip $ fmap indexToBond [0 .. numberOfBonds - 1]
toMap :: [(Int, (Int, Int))] -> Map Int [(Int, Int)]
toMap = M.fromList . fmap (\l@((k, _) : _) -> (k, fmap snd l)) . toGroupsOn fst
indexToBond :: Int -> ((Int, (Int, Int)), Bond GlobalID)
indexToBond i = ((x, (y, o)), Bond (GlobalID x) (GlobalID y) o)
where
x = getFromContentsI "i_m_from" - 1
y = getFromContentsI "i_m_to" - 1
o = getFromContentsI "i_m_order"
getFromContentsI :: FromMaeValue a => Text -> a
getFromContentsI = flip (unsafeGetFromContentsMap m) i
atomsTableToChains :: Map Text [MaeValue] -> Vector Chain
atomsTableToChains m = V.fromList $ fmap groupToChain groupedByChains
where
groupedByChains = toGroupsOn (getFromContents defaultChainName "s_m_chain_name") [0 .. numberOfAtoms - 1]
getFromContents :: FromMaeValue a => a -> Text -> Int -> a
getFromContents def name ind = maybe def id $ getFromContentsMap m name ind
unsafeGetFromContents :: FromMaeValue a => Text -> Int -> a
unsafeGetFromContents = unsafeGetFromContentsMap m
groupToChain :: [Int] -> Chain
groupToChain [] = error "Group that is result of List.groupBy can't be empty."
groupToChain group@(h : _) = Chain name residues
where
name = stripQuotes $ getFromContents defaultChainName "s_m_chain_name" h
groupedByResidues = toGroupsOn by group
residues = V.fromList $ fmap groupToResidue groupedByResidues
by :: Int -> (Int, Text)
by i = (unsafeGetFromContents "i_m_residue_number" i, unsafeGetFromContents "s_m_pdb_residue_name" i)
defaultChainName :: Text
defaultChainName = "A"
groupToResidue :: [Int] -> Residue
groupToResidue [] = error "Group that is result of List.groupBy can't be empty."
groupToResidue group@(h : _) = Residue name atoms (V.fromList localBonds) secondary chemCompType
where
name = stripQuotes $ unsafeGetFromContents "s_m_pdb_residue_name" h
atoms = V.fromList $ fmap indexToAtom group
localInds = [0 .. length group - 1]
globalToLocal = M.fromList $ zip group localInds
bondsParts = fmap (`M.lookup` bondGraph) group
localBonds = concat $ catMaybes $ zipWith (\l x -> fmap (concatMap (toLocalBond x)) l) bondsParts localInds
toLocalBond :: Int -> (Int, Int) -> [Bond LocalID]
toLocalBond x (y, o) | y `elem` group = pure $ Bond (LocalID x)
(LocalID $ globalToLocal M.! y)
o
| otherwise = []
secondary = Undefined
chemCompType = mempty
indexToAtom :: Int -> Atom
indexToAtom i = Atom (GlobalID i)
(stripQuotes $ getFromContentsI "s_m_pdb_atom_name")
(elIndToElement M.! getFromContentsI "i_m_atomic_number")
coords
(getFromContents 0 "i_m_formal_charge" i)
(getFromContents 0 "r_m_pdb_tfactor" i)
(getFromContents 0 "r_m_pdb_occupancy" i)
where
getFromContentsI :: FromMaeValue a => Text -> a
getFromContentsI = flip unsafeGetFromContents i
coords :: V3 Float
coords = V3 (getFromContentsI "r_m_x_coord")
(getFromContentsI "r_m_y_coord")
(getFromContentsI "r_m_z_coord")
elIndToElement :: Map Int Text
elIndToElement = M.fromList $ zip [1 .. 118] [ "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne"
, "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", "K", "Ca"
, "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn"
, "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr"
, "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn"
, "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd"
, "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb"
, "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg"
, "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th"
, "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm"
, "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt", "Ds"
, "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og"
]