module PDBtools.PDButil.PDBparse where
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as B
import System.IO (FilePath)
data Atom = Atom { name :: ByteString,
atid :: Int,
chain :: ByteString,
resid :: Int,
resname :: ByteString,
coords :: [Double],
aField :: Double,
bField :: Double,
atype :: ByteString }
deriving (Show,Eq)
data Protein = Protein { atoms :: [Atom] }
deriving (Show)
parseAtom :: ByteString -> Atom
parseAtom record = Atom { name = pull 13 16,
atid = rpull 7 11,
chain = pull 22 22,
resid = rpull 23 26,
resname = pull 18 20,
coords = [rpull 31 38,rpull 39 46,rpull 47 54],
aField = rpull 55 60,
bField = rpull 61 66,
atype = pull 77 78 } where
pull m n = cutspace $ B.drop (m1) $ B.take n record
rpull m n = read $ B.unpack $ pull m n
cutspace = B.pack . filter (/=' ') . B.unpack
isAtom :: ByteString -> Bool
isAtom line = (B.take 4 line) == (B.pack "ATOM")
isHETATM :: ByteString -> Bool
isHETATM line = (B.take 6 line) == (B.pack "HETATM")
parse :: FilePath -> IO ([Protein],[Atom])
parse pdb = do
let input = B.readFile pdb
bstring <- input
let atms = map parseAtom $ filter isAtom (B.lines bstring)
let hetatms = map parseAtom $ filter isHETATM (B.lines bstring)
return (splitChains atms, hetatms)
parseCofactorOnly :: FilePath -> IO [Atom]
parseCofactorOnly pdb = do
bstring <- B.readFile pdb
let hetatms = map parseAtom $ filter isHETATM (B.lines bstring)
return hetatms
parseProteinOnly :: FilePath -> IO [Protein]
parseProteinOnly pdb = do
bstring <- B.readFile pdb
let atms = map parseAtom $ filter isAtom (B.lines bstring)
return $ splitChains atms
splitChains :: [Atom] -> [Protein]
splitChains [] = []
splitChains contents = [Protein {atoms = chain1}] ++ splitChains remainder where
chain1 = takeWhile (\s -> id == chain s) contents
remainder = dropWhile (\s -> id == chain s) contents
id = chain (head contents)