module Bio.PDB.Reader
( fromTextPDB
, fromFilePDB
, PDBWarnings(..)
) where
import Bio.PDB.Parser (pdbP)
import Bio.PDB.Type (PDB (..))
import Control.Monad.IO.Class (MonadIO, liftIO)
import Data.Attoparsec.Text (parseOnly)
import Data.Bifunctor (first)
import Data.List as L (findIndices, length)
import Data.Maybe (catMaybes)
import Data.Text as T (Text, length, lines, pack,
replicate, take, unlines)
import qualified Data.Text.IO as TIO (readFile)
type LineNumber = Int
data PDBWarnings = LineTooLong LineNumber
| LineTooShort LineNumber
deriving (Show, Eq)
standardizeText :: Text -> ([PDBWarnings], Text)
standardizeText text = (textWarnings, T.unlines standardizedLines)
where
textLines = T.lines text
desiredLength = 80
warnings'n'text = map standardizeLine $ zip [0..] textLines
textWarnings = catMaybes (fst <$> warnings'n'text)
standardizedLines = snd <$> warnings'n'text
standardizeLine :: (Int, Text) -> (Maybe PDBWarnings, Text)
standardizeLine (lineNumber,line) | lineLength < desiredLength = (Just (LineTooShort lineNumber), line <> T.replicate spacesCount " ")
| lineLength > desiredLength = (Just (LineTooLong lineNumber), T.take desiredLength line)
| otherwise = (Nothing, line)
where
lineLength = T.length line
spacesCount = desiredLength - lineLength
isMdlLine :: Text -> Bool
isMdlLine line = elem (T.take 6 line) modelStrings || elem (T.take 5 line) modelStrings
where
modelStrings = ["MODEL ", "ENDMDL", "ATOM ", "TER ", "HETATM", "ANISOU", "CONECT"]
checkRow :: [Int] -> Bool
checkRow [] = True
checkRow xs = last xs - head xs + 1 == L.length xs
checkMdlLines :: ([PDBWarnings], Text) -> Bool
checkMdlLines warnings'n'text = checkRow mdlLineNumbers
where
mdlLineNumbers = findIndices isMdlLine $ T.lines (snd warnings'n'text)
preprocess :: Text -> Either Text ([PDBWarnings], Text)
preprocess text = do
let standardizedText = standardizeText text
if checkMdlLines standardizedText
then Right standardizedText
else Left "There are trash strings between model strings"
fromFilePDB :: MonadIO m => FilePath -> m (Either Text ([PDBWarnings], PDB))
fromFilePDB = liftIO . fmap fromTextPDB . TIO.readFile
fromTextPDB :: Text -> Either Text ([PDBWarnings], PDB)
fromTextPDB text = do
(warnings, preprocessedText) <- preprocess text
pdb <- first pack $ parseOnly pdbP preprocessedText
pure (warnings, pdb)