module Biobase.RNAstrand.Import where
import Control.Applicative (pure)
import Data.ByteString.Char8 as BS
import Data.Iteratee as I
import Data.Iteratee.Char as I
import Data.Iteratee.IO as I
import Data.Iteratee.ListLike as I
import Data.List as L
import Prelude as P hiding (sequence)
import Biobase.RNAstrand
eneeRNAstrand :: (Monad m) => Enumeratee ByteString [RNAstrand] m a
eneeRNAstrand = enumLinesBS ><> convStream f where
f = do
I.dropWhile BS.null
cs <- I.break (not . BS.isPrefixOf "#")
I.dropWhile BS.null
xs' <- I.break (BS.isPrefixOf "#")
let xs = L.filter (not . BS.null) xs'
let n = L.length xs
if n `mod` 2 == 1
then error $ "can't parse: \n" ++ (BS.unpack $ BS.unlines xs)
else let (se,st) = L.splitAt (n `div` 2) xs
in return . pure $ RNAstrand
{ sequence = BS.concat se
, structure = BS.concat st
, comments = L.map (BS.drop 1) cs
}
fromFile :: FilePath -> IO [RNAstrand]
fromFile fp = run =<< ( enumFile 8192 fp
. joinI
$ eneeRNAstrand stream2list
)