{-# LANGUAGE ScopedTypeVariables, OverloadedStrings, BangPatterns, NoMonomorphismRestriction #-} -- | Low-level event-based parser interface. module Bio.PDB.EventParser.PDBEventParser(parsePDBRecords) where -- Parses PDB file format version 3.20 (dated Sept 15, 2008) import qualified Data.ByteString.Char8 as BS import Control.Monad(unless, foldM) import Bio.PDB.EventParser.PDBEvents import Bio.PDB.EventParser.PDBParsingAbstractions -- Methods parsing individual records: import Bio.PDB.EventParser.ParseATOM import Bio.PDB.EventParser.ParseHEADER import Bio.PDB.EventParser.ParseTITLE import Bio.PDB.EventParser.ParseREMARK import Bio.PDB.EventParser.ParseIntRecord import Bio.PDB.EventParser.ParseREVDAT import Bio.PDB.EventParser.ParseCONECT import Bio.PDB.EventParser.ParseSEQRES import Bio.PDB.EventParser.ParseCRYST1 import Bio.PDB.EventParser.ParseHELIX import Bio.PDB.EventParser.ParseSHEET import Bio.PDB.EventParser.ParseTER import Bio.PDB.EventParser.ParseMASTER import Bio.PDB.EventParser.ParseMODRES import Bio.PDB.EventParser.ParseSEQADV import Bio.PDB.EventParser.ParseCAVEAT import Bio.PDB.EventParser.ParseSPLIT import Bio.PDB.EventParser.ParseJRNL import Bio.PDB.EventParser.ParseDBREF import Bio.PDB.EventParser.ParseHETNAM import Bio.PDB.EventParser.ParseHET import Bio.PDB.EventParser.ParseFORMUL import Bio.PDB.EventParser.ParseCISPEP import Bio.PDB.EventParser.ParseSSBOND import Bio.PDB.EventParser.ParseLINK import Bio.PDB.EventParser.ParseSLTBRG import Bio.PDB.EventParser.ParseHYDBND import Bio.PDB.EventParser.ParseSITE import Bio.PDB.EventParser.ParseObsoleting import Bio.PDB.EventParser.ParseSpecListRecord import Bio.PDB.EventParser.ParseListRecord import Bio.PDB.EventParser.ParseMatrixRecord import Bio.PDB.EventParser.ParseTVECT import System.IO.Unsafe --debug --------------- {{{ Record parsers --------------- }}} Record parsers --------------- {{{ Main parser: putting it together --parsePDBLines :: (Monad m) => BS.ByteString -> BS.ByteString -> Int -> m [PDBEvent] {- | Parses an input stream 'input' with name 'fname' at line 'line_no', and uses parsed input 'evts' to perform an 'action' on them and accumulator 'acc'. Returns the ultimate value of the accumulated results in 'acc' after all actions are performed in an order consistent with input. -} parsePDBLines !fname !input !line_no action acc = if BS.null input then return acc else ( case line of -- Most frequent records a | "ATOM " `BS.isPrefixOf` a -> cont1 $! parseATOM line line_no a | "HETATM" `BS.isPrefixOf` a -> cont1 $! parseATOM line line_no a | "ANISOU" `BS.isPrefixOf` a -> cont1 $! parseANISOU line line_no a | "REMARK" `BS.isPrefixOf` a -> cont1 $! parseREMARK line line_no a | "SEQRES" `BS.isPrefixOf` a -> cont1 $! parseSEQRES line line_no a | "CONECT" `BS.isPrefixOf` a -> cont1 $! parseCONECT line line_no a | "SIGATM" `BS.isPrefixOf` a -> cont1 $! parseATOM line line_no a | "SIGUIJ" `BS.isPrefixOf` a -> cont1 $! parseANISOU line line_no -- Delimiters a | "ENDMDL" `BS.isPrefixOf` a -> cont1 $! return [ENDMDL] a | "END" `BS.isPrefixOf` a -> cont1 $! return [END] -- common error in treatment of TER - omitting rest of the record "TER" -> cont1 $! return [TER { num = -1, resname = "", chain = ' ', resid = -1, insCode = ' ' }] -- proper TER a | "TER" `BS.isPrefixOf` a -> cont1 $! parseTER line line_no a | "MASTER" `BS.isPrefixOf` a -> cont1 $! parseMASTER line line_no -- Secondary structure declarations a | "HELIX" `BS.isPrefixOf` a -> cont1 $! parseHELIX line line_no a | "SHEET" `BS.isPrefixOf` a -> cont1 $! parseSHEET line line_no -- Crystallographic information a | "SCALE" `BS.isPrefixOf` a -> cont1 $! parseSCALEn line line_no a | "ORIGX" `BS.isPrefixOf` a -> cont1 $! parseORIGXn line line_no a | "MTRIX" `BS.isPrefixOf` a -> cont1 $! parseMTRIXn line line_no a | "CRYST1" `BS.isPrefixOf` a -> cont1 $! parseCRYST1 line line_no a | "TVECT " `BS.isPrefixOf` a -> cont1 $! parseTVECT line line_no -- Singular metarecords a | "DBREF " `BS.isPrefixOf` a -> cont1 $! parseDBREF line line_no a | "DBREF1" `BS.isPrefixOf` a -> cont2 $! parseDBREF12 (line, line2) line_no a | "HETNAM" `BS.isPrefixOf` a -> cont1 $! parseHETNAM True line line_no a | "HETSYN" `BS.isPrefixOf` a -> cont1 $! parseHETNAM False line line_no a | "HET " `BS.isPrefixOf` a -> cont1 $! parseHET line line_no a | "FORMUL" `BS.isPrefixOf` a -> cont1 $! parseFORMUL line line_no a | "CISPEP" `BS.isPrefixOf` a -> cont1 $! parseCISPEP line line_no a | "SSBOND" `BS.isPrefixOf` a -> cont1 $! parseSSBOND line line_no a | "LINK " `BS.isPrefixOf` a -> cont1 $! parseLINK line line_no a | "SLTBRG" `BS.isPrefixOf` a -> cont1 $! parseSLTBRG line line_no a | "HYDBND" `BS.isPrefixOf` a -> cont1 $! parseHYDBND line line_no a | "SITE " `BS.isPrefixOf` a -> cont1 $! parseSITE line line_no a | "MODRES" `BS.isPrefixOf` a -> cont1 $! parseMODRES line line_no a | "SEQADV" `BS.isPrefixOf` a -> cont1 $! parseSEQADV line line_no a | "MDLTYP" `BS.isPrefixOf` a -> cont1 $! parseMDLTYP line line_no a | "EXPDTA" `BS.isPrefixOf` a -> cont1 $! parseEXPDTA line line_no a | "SOURCE" `BS.isPrefixOf` a -> cont1 $! parseSOURCE line line_no a | "COMPND" `BS.isPrefixOf` a -> cont1 $! parseCOMPND line line_no a | "NUMMDL" `BS.isPrefixOf` a -> cont1 $! parseNUMMDL line line_no a | "MODEL " `BS.isPrefixOf` a -> cont1 $! parseMODEL line line_no a | "REVDAT" `BS.isPrefixOf` a -> cont1 $! parseREVDAT line line_no a | "HEADER" `BS.isPrefixOf` a -> cont1 $! parseHEADER line line_no a | "TITLE " `BS.isPrefixOf` a -> cont1 $! parseTITLE line line_no a | "AUTHOR" `BS.isPrefixOf` a -> cont1 $! parseAUTHOR line line_no a | "KEYWDS" `BS.isPrefixOf` a -> cont1 $! parseKEYWDS line line_no a | "CAVEAT" `BS.isPrefixOf` a -> cont1 $! parseCAVEAT line line_no a | "OBSLTE" `BS.isPrefixOf` a -> cont1 $! parseOBSLTE line line_no a | "SPRSDE" `BS.isPrefixOf` a -> cont1 $! parseSPRSDE line line_no a | "SPLIT " `BS.isPrefixOf` a -> cont1 $! parseSPLIT line line_no a | "JRNL " `BS.isPrefixOf` a -> cont1 $! parseJRNL line line_no _ -> cont1 $! return [PDBIgnoredLine line]) where cont1 !a = do !evts <- a !new_acc <- foldM action acc evts --(nextLine1 `seq` line_no1 `seq` new_acc `seq` parsePDBLines fname nextLine1 line_no1 action new_acc cont2 a = do !evts <- a !new_acc <- foldM action acc evts parsePDBLines fname nextLine2 line_no2 action acc (!line, !rest1) = BS.break (=='\n') input nextLine1 = BS.drop 1 rest1 (line2, rest2) = BS.break (=='\n') nextLine1 nextLine2 = BS.drop 1 rest2 !line_no1 = line_no + 1 line_no2 = line_no1 + 1 --parsePDBRecords :: (Monad m) =>String -> BS.ByteString -> (a -> PDBEvent -> m a) -> a -> m a -- | Parses a strict ByteString 'contents' named 'fname' and performs 'action' -- on events given by parsing chunks, returning accumulated results. Accumulator -- is primed by 'acc'. parsePDBRecords fname contents = parsePDBLines fname contents 0 -- | Checks whether line was ignored as unknown record type ignoreLine (PDBIgnoredLine _) = False ignoreLine _ = True --------------- }}} Main parser: putting it together