{-# LANGUAGE PatternGuards #-}
{-# LANGUAGE OverloadedStrings #-}

-- | Imports an extended secondary structure from bytestring. Such a structure
-- is defined as:
--
-- # comment 1      -- comments
-- # comment ...    -- comments
-- # comment n      -- comments
-- AUGACUACUAGC     -- sequence part
--    3  9  cWW     -- detailed pair information (indented)

module Biobase.ExtSS.Import where

import Control.Applicative (pure)
import Data.ByteString.Char8 as BS
import Data.Iteratee as I
import Data.Iteratee.Char as I
import Data.Iteratee.IO as I
import Data.Iteratee.ListLike as I
import Prelude as P hiding (sequence)
import Data.Char (isSpace)

import Biobase.ExtSS



-- | Convert input bytestring into a list of extended secondary structures.

eneeExtSS :: (Monad m) => Enumeratee ByteString [ExtSS] m a
eneeExtSS = enumLinesBS ><> convStream f where
  f = do
    cs <- I.break (not . isPrefixOf "#")  -- comments first
    seq <- I.head -- sequence line
    str <- I.head -- structure line
    xs <- I.break (not . isPrefixOf " ")  -- extended pair information
    return . pure $ ExtSS
      { sequence  = seq
      , structure = str
      , detailed  = P.map convLine xs
      , comments  = P.map (BS.drop 1) cs
      }

-- | Convert a line into an extended pair.

convLine :: ByteString -> (Int,Int,ByteString)
convLine x
  | Just (i,s) <- readInt $ BS.dropWhile isSpace x
  , Just (j,t) <- readInt $ BS.dropWhile isSpace s
  = (i,j,BS.dropWhile isSpace t)
  | otherwise = error $ "can not parse line: " ++ unpack x

-- | Convenience function

fromFile :: FilePath -> IO [ExtSS]
fromFile fp = run =<< ( enumFile 8192 fp
                      . joinI
                      $ eneeExtSS stream2list
                      )