-- Import / parse FR3D basepairs files. module Biobase.DataSource.FR3D.Import where import Control.Applicative import Control.Monad import Data.Either import Data.List import System.Directory import System.FilePath import System.IO.Unsafe import Text.Parsec.Char import Text.Parsec.Combinator hiding (optional) import Text.Parsec.Error import Text.Parsec hiding ((<|>), many, optional) import Text.Parsec.String import qualified Data.Map as M import Control.Arrow (first) import Biobase.DataSource.FR3D -- * New default importer -- | import a number of 'FR3D' structures. fromDir :: FilePath -> IO ([FR3D], [ParseError]) fromDir dirname = do (bps,errs) <- importFR3DBasepairsFiles dirname return (map mkFR3D bps,errs) fromFile :: FilePath -> IO (Either ParseError FR3D) fromFile fname = do bp <- importFR3DBasepairsFile fname return $ mkFR3D `fmap` bp -- * Older functions -- | Parse a complete FR3D basepairs file and return a 'FR3DBasepairs' structure. -- Does not set the 'pdbID', that one is encoded in the filename. (And in each -- individual 'Pair'...). parseFR3DBasepairs :: GenParser Char st FR3DBasepairs parseFR3DBasepairs = mk <$> comments <* pdbdesc <*> many (parsePair <* newline) where comments = many commentline commentline = char '#' *> manyTill anyChar newline pdbdesc = string "PDB_ID" *> manyTill anyChar newline mk cs bps = let -- TODO this is too fragile! (v:_:chns) = cs f (x:y:_) = (x,y) in FR3DBasepairs "" v (M.fromList $ map (f . words) chns) {-(map ((\[n,c] -> (read n,c)) . drop 1 . words) chns)-} bps cs -- | One line of connected nucleotides forms a 'Pair' parsePair :: GenParser Char st Pair parsePair = Pair <$ mtas <*> interact <*> parseNucleotide <* space <*> parseNucleotide where interact = mtas -- | One of the two 'Nucleotide's in a 'Pair' parseNucleotide :: GenParser Char st Nucleotide parseNucleotide = Nucleotide <$> anyChar <* space <*> mtas <*> mtas <*> number where number = read <$> many1 digit -- | Given a directory, try to import all files matching "_basepairs_FR3D.txt". -- In the result (as,bs), the as are the successful parses, while bs failed. -- For now (NOV 2010), bs is empty ;-) importFR3DBasepairsFiles :: FilePath -> IO ([FR3DBasepairs], [Text.Parsec.Error.ParseError]) importFR3DBasepairsFiles d = do de <- doesDirectoryExist d if de then do cnts <- getDirectoryContents d let cands = filter ("basepairs_FR3D.txt" `isSuffixOf`) cnts (psL,psR) <- partitionEithers `fmap` mapM (pFF d) cands return (psR, psL) else return ([], []) pFF d c = unsafeInterleaveIO . fmap (fmap (\x -> x{pdbID = take 4 c})) . parseFromFile parseFR3DBasepairs $ d c -- | Try to import just one basepair file. importFR3DBasepairsFile :: FilePath -> IO (Either Text.Parsec.Error.ParseError FR3DBasepairs) importFR3DBasepairsFile f = fmap (fmap (\x -> x{pdbID = take 4 f})) $ parseFromFile parseFR3DBasepairs f -- * helper functions mtas = manyTill anyChar space