{-# LANGUAGE DoAndIfThenElse #-}
{-# LANGUAGE OverloadedStrings #-}

module Biobase.RNAstrand.Import where

import Control.Applicative (pure)
import Data.ByteString.Char8 as BS
import Data.Iteratee as I
import Data.Iteratee.Char as I
import Data.Iteratee.IO as I
import Data.Iteratee.ListLike as I
import Data.List as L
import Prelude as P hiding (sequence)

import Biobase.RNAstrand



-- | RNAstrand element creation.

eneeRNAstrand :: (Monad m) => Enumeratee ByteString [RNAstrand] m a
eneeRNAstrand = enumLinesBS ><> convStream f where
  f = do
    I.dropWhile BS.null
    cs <- I.break (not . BS.isPrefixOf "#") -- comments
    I.dropWhile BS.null
    xs' <- I.break (BS.isPrefixOf "#") -- sequence/structure lines
    let xs = L.filter (not . BS.null) xs'
    let n = L.length xs
    if n `mod` 2 == 1
    then error $ "can't parse: \n" ++ (BS.unpack $ BS.unlines xs)
    else let (se,st) = L.splitAt (n `div` 2) xs
         in return . pure $ RNAstrand
              { sequence  = BS.concat se
              , structure = BS.concat st
              , comments  = L.map (BS.drop 1) cs
              }

-- | Convenience function.

fromFile :: FilePath -> IO [RNAstrand]
fromFile fp = run =<< ( enumFile 8192 fp
                      . joinI
                      $ eneeRNAstrand stream2list
                      )