{-# LANGUAGE AllowAmbiguousTypes #-}
{-# OPTIONS_GHC -fno-warn-orphans #-}
module Bio.MAE
( Block (..), FromMaeValue (..)
, Mae (..), MaeValue (..)
, Table (..)
, fromFile
, fromText
, maeP
) where
import Bio.MAE.Parser
import Bio.MAE.Type (Block (..), FromMaeValue (..),
Mae (..), MaeValue (..), Table (..))
import Bio.Structure (Atom (..), Bond (..), Chain (..),
GlobalID (..), LocalID (..),
Model (..), Residue (..),
SecondaryStructure (..),
StructureModels (..))
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.Attoparsec.Text (parseOnly)
import Data.Bifunctor (bimap, first)
import Data.Function (on)
import qualified Data.List as L (find, groupBy, sortOn)
import Data.Map.Strict (Map)
import qualified Data.Map.Strict as M (fromList, lookup, member, (!))
import Data.Maybe (catMaybes, fromJust)
import Data.Text (Text)
import qualified Data.Text as T (head, init, last, null, pack,
strip, tail)
import qualified Data.Text.IO as TIO (readFile)
import Data.Vector (Vector)
import qualified Data.Vector as V (fromList)
import Linear.V3 (V3 (..))
fromFile :: MonadIO m => FilePath -> m Mae
fromFile f = liftIO (TIO.readFile f) >>= either fail pure . parseOnly maeP
fromText :: Text -> Either Text Mae
fromText = first T.pack . parseOnly maeP
instance StructureModels Mae where
modelsOf Mae{..} = V.fromList $ fmap blockToModel blocks
where
unsafeGetFromContentsMap :: FromMaeValue a => Map Text [MaeValue] -> Text -> Int -> a
unsafeGetFromContentsMap m name i = unsafeFromMaeValue $ (m M.! name) !! i
getFromContentsMap :: FromMaeValue a => Map Text [MaeValue] -> Text -> Int -> Maybe a
getFromContentsMap m name i = fromMaeValue $ (m M.! name) !! i
blockToModel :: Block -> Model
blockToModel Block{..} = Model (atomsTableToChains atomsTable) bonds
where
atomsTable = findTable "m_atom"
numberOfAtoms = length $ atomsTable M.! "r_m_x_coord"
bondsTable = findTable "m_bond"
(bondGraph, bonds) = bondsTableToGlobalBonds bondsTable
findTable :: Text -> Map Text [MaeValue]
findTable name = contents $ fromJust $ L.find ((== name) . tableName) tables
stripQuotes :: Text -> Text
stripQuotes t | not (T.null t) && T.head t == T.last t, T.last t == '\"' = T.strip $ T.init $ T.tail t
| otherwise = T.strip t
toGroupsOn :: (Eq b, Ord b) => (a -> b) -> [a] -> [[a]]
toGroupsOn f = L.groupBy ((==) `on` f) . L.sortOn f
bondsTableToGlobalBonds :: Map Text [MaeValue] -> (Map Int [(Int, Int)], Vector (Bond GlobalID))
bondsTableToGlobalBonds m = bimap toMap V.fromList bonds'
where
numberOfBonds = length $ m M.! "i_m_from"
bonds' = unzip $ fmap indexToBond [0 .. numberOfBonds - 1]
toMap :: [(Int, (Int, Int))] -> Map Int [(Int, Int)]
toMap = M.fromList . fmap (\l@((k, _) : _) -> (k, fmap snd l)) . toGroupsOn fst
indexToBond :: Int -> ((Int, (Int, Int)), Bond GlobalID)
indexToBond i = ((x, (y, o)), Bond (GlobalID x) (GlobalID y) o)
where
x = getFromContentsI "i_m_from" - 1
y = getFromContentsI "i_m_to" - 1
o = getFromContentsI "i_m_order"
getFromContentsI :: FromMaeValue a => Text -> a
getFromContentsI = flip (unsafeGetFromContentsMap m) i
atomsTableToChains :: Map Text [MaeValue] -> Vector Chain
atomsTableToChains m = V.fromList $ fmap groupToChain groupedByChains
where
groupedByChains = toGroupsOn (unsafeGetFromContents @Text "s_m_chain_name") [0 .. numberOfAtoms - 1]
unsafeGetFromContents :: FromMaeValue a => Text -> Int -> a
unsafeGetFromContents = unsafeGetFromContentsMap m
getFromContents :: FromMaeValue a => Text -> Int -> Maybe a
getFromContents = getFromContentsMap m
groupToChain :: [Int] -> Chain
groupToChain [] = error "Group that is result of List.groupBy can't be empty."
groupToChain group@(h : _) = Chain name residues
where
name = stripQuotes $ unsafeGetFromContents "s_m_chain_name" h
groupedByResidues = toGroupsOn by group
residues = V.fromList $ fmap groupToResidue groupedByResidues
by :: Int -> (Int, Text)
by i = (unsafeGetFromContents "i_m_residue_number" i, unsafeGetFromContents "s_m_pdb_residue_name" i)
groupToResidue :: [Int] -> Residue
groupToResidue [] = error "Group that is result of List.groupBy can't be empty."
groupToResidue group@(h : _) = Residue name atoms (V.fromList localBonds) secondary chemCompType
where
name = stripQuotes $ unsafeGetFromContents "s_m_pdb_residue_name" h
atoms = V.fromList $ fmap indexToAtom group
localInds = [0 .. length group - 1]
globalToLocal = M.fromList $ zip group localInds
bondsParts = fmap (`M.lookup` bondGraph) group
localBonds = concat $ catMaybes $ zipWith (\l x -> fmap (concatMap (toLocalBond x)) l) bondsParts localInds
toLocalBond :: Int -> (Int, Int) -> [Bond LocalID]
toLocalBond x (y, o) | y `elem` group = pure $ Bond (LocalID x)
(LocalID $ globalToLocal M.! y)
o
| otherwise = []
secondary = Undefined
chemCompType = mempty
indexToAtom :: Int -> Atom
indexToAtom i = Atom (GlobalID i)
(stripQuotes $ getFromContentsI "s_m_pdb_atom_name")
(elIndToElement M.! getFromContentsI "i_m_atomic_number")
coords
(getFromContentsIWithDef 0 "i_m_formal_charge")
(getFromContentsIWithDef 0 "r_m_pdb_tfactor")
(getFromContentsIWithDef 0 "r_m_pdb_occupancy")
where
getFromContentsI :: FromMaeValue a => Text -> a
getFromContentsI = flip unsafeGetFromContents i
getFromContentsIWithDef :: FromMaeValue a => a -> Text -> a
getFromContentsIWithDef def n | n `M.member` m = maybe def id $ getFromContents n i
| otherwise = def
coords :: V3 Float
coords = V3 (getFromContentsI "r_m_x_coord")
(getFromContentsI "r_m_y_coord")
(getFromContentsI "r_m_z_coord")
elIndToElement :: Map Int Text
elIndToElement = M.fromList $ zip [1 .. 118] [ "H", "He", "Li", "Be", "B", "C", "N", "O", "F", "Ne"
, "Na", "Mg", "Al", "Si", "P", "S", "Cl", "Ar", "K", "Ca"
, "Sc", "Ti", "V", "Cr", "Mn", "Fe", "Co", "Ni", "Cu", "Zn"
, "Ga", "Ge", "As", "Se", "Br", "Kr", "Rb", "Sr", "Y", "Zr"
, "Nb", "Mo", "Tc", "Ru", "Rh", "Pd", "Ag", "Cd", "In", "Sn"
, "Sb", "Te", "I", "Xe", "Cs", "Ba", "La", "Ce", "Pr", "Nd"
, "Pm", "Sm", "Eu", "Gd", "Tb", "Dy", "Ho", "Er", "Tm", "Yb"
, "Lu", "Hf", "Ta", "W", "Re", "Os", "Ir", "Pt", "Au", "Hg"
, "Tl", "Pb", "Bi", "Po", "At", "Rn", "Fr", "Ra", "Ac", "Th"
, "Pa", "U", "Np", "Pu", "Am", "Cm", "Bk", "Cf", "Es", "Fm"
, "Md", "No", "Lr", "Rf", "Db", "Sg", "Bh", "Hs", "Mt", "Ds"
, "Rg", "Cn", "Nh", "Fl", "Mc", "Lv", "Ts", "Og"
]