module Bio.PDB.StructureBuilder(parse)
where
import Prelude hiding (String)
import qualified Data.ByteString.Char8 as BS hiding (reverse)
import qualified Control.Monad.ST as ST
import Control.Monad.State.Strict as State
import Control.Monad(when)
import Data.STRef as STRef
import Bio.PDB.EventParser.PDBEvents(PDBEvent(..), RESID(..))
import qualified Bio.PDB.EventParser.PDBEventParser(parsePDBRecords)
import Bio.PDB.Structure
import Bio.PDB.Structure.List as L
type ParsingMonad t a = State.StateT (BState t) (ST.ST t) a
parsePDBRec :: String -> String -> (() -> PDBEvent -> ParsingMonad t ()) -> () -> ParsingMonad t ()
parsePDBRec = Bio.PDB.EventParser.PDBEventParser.parsePDBRecords
parse fname contents = ST.runST $ do initial <- initializeState
(s, e) <- State.evalStateT parsing initial
return $! (s :: Structure, e :: L.List PDBEvent)
where parsing = do parsePDBRec fname contents (\() !ev -> parseStep ev) ()
closeStructure
s <- State.gets currentStructure
e <- State.gets errors
e' <- L.finalize e
return (s, e')
data BState s = BState { currentResidue :: Maybe Residue,
currentModel :: Maybe Model,
currentChain :: Maybe Chain,
currentStructure :: Structure,
residueContents :: L.TempList s Atom,
chainContents :: L.TempList s Residue,
modelContents :: L.TempList s Chain,
structureContents :: L.TempList s Model,
errors :: L.TempList s PDBEvent,
line_no :: STRef.STRef s Int
}
initializeState :: ST.ST t (BState t)
initializeState = do r <- L.initialNew L.residueVectorSize
c <- L.initialNew L.chainVectorSize
m <- L.initialNew 1
s <- L.initialNew 1
e <- L.initialNew 100
l <- STRef.newSTRef 1
return $ BState { currentResidue = Nothing,
currentModel = Nothing,
currentChain = Nothing,
currentStructure = Structure { models = L.empty },
residueContents = r,
chainContents = c,
modelContents = m,
structureContents = s,
errors = e,
line_no = l }
checkResidue :: Bio.PDB.EventParser.PDBEvents.RESID -> ParsingMonad t ()
checkResidue (RESID (newName, newChain, newResseq, newInsCode)) =
do checkChain newChain
res <- State.gets currentResidue
when (residueChanged res) $ do closeResidue
l <- L.new L.residueVectorSize
State.modify $! createResidue l
where
residueChanged Nothing = True
residueChanged (Just (Residue { resName = oldResName,
resSeq = oldResSeq,
insCode = oldInsCode,
atoms = _atoms })) =
(oldResName, oldResSeq, oldInsCode) /= (newName, newResseq, newInsCode)
createResidue l st = st { currentResidue = Just newResidue,
residueContents = l }
newResidue = Bio.PDB.Structure.Residue { resName = newName,
resSeq = newResseq,
insCode = newInsCode,
atoms = L.empty }
checkChain :: Char -> ParsingMonad t ()
checkChain name = do checkModel
curChain <- State.gets currentChain
when (chainChanged curChain) $ do closeChain
l <- L.new L.chainVectorSize
State.modify $ createChain l
where
chainChanged Nothing = True
chainChanged (Just (Chain { chainId = oldChain })) = oldChain /= name
createChain l state = state { currentChain = Just $! Chain { chainId = name,
residues = L.empty },
chainContents = l }
checkModel :: ParsingMonad t ()
checkModel = do curModel <- State.gets currentModel
when (curModel == Nothing) $ openModel 1
closeResidue :: ParsingMonad t ()
closeResidue = do r <- State.gets currentResidue
when (r /= Nothing) $ do let Just res = r
rc <- State.gets residueContents
rf <- L.finalize rc
cc <- State.gets chainContents
cc' <- L.add cc $ res { Bio.PDB.Structure.atoms = rf }
State.modify clearResidue
where
clearResidue st = st { currentResidue = Nothing }
closeChain :: ParsingMonad t ()
closeChain = do closeResidue
c <- State.gets currentChain
ac <- State.gets chainContents
when (c /= Nothing) $ do l <- State.gets chainContents
l' <- L.finalize l
let Just ch = c
ch' = ch { Bio.PDB.Structure.residues = l' }
m <- State.gets currentModel
when (m == Nothing) $ do mli <- State.gets structureContents
i <- L.tempLength mli
openModel i
addError ["Trying to close chain when currentChain is ",
BS.pack . show $ ch,
" and currentModel is ",
BS.pack . show $ m]
ml <- State.gets modelContents
ml' <- L.add ml ch'
State.modify clearChain
where
clearChain st = st { currentChain = Nothing }
addError :: [String] -> ParsingMonad t ()
addError msg = do e <- State.gets errors
lnref <- State.gets line_no
ln <- lift $ STRef.readSTRef lnref
lift $ STRef.modifySTRef lnref (+1)
L.add e $ anError ln
where anError ln = PDBParseError ln 0 $ BS.concat msg
closeModel :: ParsingMonad t ()
closeModel = do closeChain
cm <- State.gets currentModel
case cm of
Nothing -> return ()
Just m -> do mc <- State.gets modelContents
chs <- L.finalize mc
let m' = m { chains = chs }
sc <- State.gets structureContents
State.modify clearModel
L.add sc m'
where clearModel st = st { currentModel = Nothing }
closeStructure :: ParsingMonad t ()
closeStructure = do closeModel
sc <- State.gets structureContents
sc' <- L.finalize sc
State.modify (closeStructure' sc')
where
closeStructure' sc bstate@(BState { currentStructure = aStructure}) =
bstate { currentStructure = aStructure { models = sc },
structureContents = undefined }
nextLine :: ParsingMonad t ()
nextLine = do lnref <- State.gets line_no
lift $ STRef.modifySTRef lnref (+1)
parseStep pe@(PDBParseError l _ _) = do e <- State.gets errors
L.add e pe
lnref <- State.gets line_no
lift $ STRef.writeSTRef lnref l
parseStep (ATOM { no = atSer,
atomtype = atType,
restype = resName,
chain = chainName,
resid = resSeq,
resins = resInsCode,
altloc = altloc,
coords = atCoord,
occupancy = atOccupancy,
bfactor = atBFactor,
segid = atSegId,
elt = atElement,
charge = atCharge,
hetatm = isHet
}) =
do checkResidue $ RESID (resName, chainName, resSeq, resInsCode)
reslist <- State.gets residueContents
newAtom `seq` L.add reslist newAtom
nextLine
where newAtom = Atom { atName = atType,
atSerial = atSer,
coord = atCoord,
bFactor = atBFactor,
occupancy = atOccupancy,
element = atElement,
segid = atSegId,
charge = atCharge,
hetatm = isHet
}
parseStep (MODEL { num = n }) = do closeModel
openModel n
nextLine
parseStep ENDMDL = do closeModel
nextLine
parseStep END = do closeModel
nextLine
parseStep (TER {..}) = do closeChain
nextLine
parseStep (MASTER {..}) = do closeModel
nextLine
parseStep _ = nextLine
openModel :: Int -> ParsingMonad t ()
openModel n = do l <- L.new L.defaultSize
State.modify $ changeModel l
where changeModel l st = st { currentModel = Just newModel,
modelContents = l }
newModel = Bio.PDB.Structure.Model { modelId = n,
chains = empty }
parseFinish :: ParsingMonad t (Structure, L.List PDBEvent)
parseFinish = do closeStructure
st <- State.gets currentStructure
er <- State.gets errors
er' <- finalize er
st `seq` return (st, er')