{-# LANGUAGE PatternGuards #-} {-# LANGUAGE OverloadedStrings #-} -- | Imports an extended secondary structure from bytestring. Such a structure -- is defined as: -- -- # comment 1 -- comments -- # comment ... -- comments -- # comment n -- comments -- AUGACUACUAGC -- sequence part -- 3 9 cWW -- detailed pair information (indented) module Biobase.ExtSS.Import where import Control.Applicative (pure) import Data.ByteString.Char8 as BS import Data.Iteratee as I import Data.Iteratee.Char as I import Data.Iteratee.IO as I import Data.Iteratee.ListLike as I import Prelude as P hiding (sequence) import Data.Char (isSpace) import Biobase.ExtSS -- | Convert input bytestring into a list of extended secondary structures. eneeExtSS :: (Monad m) => Enumeratee ByteString [ExtSS] m a eneeExtSS = enumLinesBS ><> convStream f where f = do cs <- I.break (not . isPrefixOf "#") -- comments first seq <- I.head -- sequence line str <- I.head -- structure line xs <- I.break (not . isPrefixOf " ") -- extended pair information return . pure $ ExtSS { sequence = seq , structure = str , detailed = P.map convLine xs , comments = P.map (BS.drop 1) cs } -- | Convert a line into an extended pair. convLine :: ByteString -> (Int,Int,ByteString) convLine x | Just (i,s) <- readInt $ BS.dropWhile isSpace x , Just (j,t) <- readInt $ BS.dropWhile isSpace s = (i,j,BS.dropWhile isSpace t) | otherwise = error $ "can not parse line: " ++ unpack x -- | Convenience function fromFile :: FilePath -> IO [ExtSS] fromFile fp = run =<< ( enumFile 8192 fp . joinI $ eneeExtSS stream2list )