{-# LANGUAGE OverloadedStrings #-}

-- Importing tabular hits is rather easy, as they are one entry per line.

module Biobase.Infernal.TabularHit.Import where

import Data.ByteString.Char8 as BS
import Data.Iteratee as I
import Data.Iteratee.Iteratee as I
import Data.Iteratee.ListLike as I
import Data.Iteratee.Char as I
import Data.Either.Unwrap
import Data.Attoparsec as A hiding (takeTill)
import Data.Attoparsec.Char8 as A
import Control.Applicative
import Data.Iteratee.IO as I

import Biobase.Infernal.TabularHit
import Biobase.Infernal.Types



-- | Transform a stream into tabular hits.

eneeTabularHit :: (Functor m, Monad m) => Enumeratee ByteString [TabularHit] m a
eneeTabularHit = enumLinesBS ><> I.filter (\x -> not $ BS.null x || isPrefixOf "#" x) ><> mapStream f where
  f = fromRight . parseOnly p
  mkTH mName tName tStart tStop qStart qStop bScore eValue gc = TabularHit
    (ModelIdentification tName)
    (Scaffold tName)
    tStart
    tStop
    qStart
    qStop
    (BitScore bScore)
    eValue
    gc
  p = mkTH <$> pString  -- model name
           <*> pString  -- target name
           <*> pDecimal -- target start
           <*> pDecimal -- target stop
           <*> pDecimal -- query start
           <*> pDecimal -- query stop
           <*> pDouble  -- bit score
           <*> pDouble  -- evalue
           <*> pDecimal -- gc content
  pString  = A.skipSpace *> A.takeTill A.isSpace
  pDecimal = A.skipSpace *> A.decimal
  pDouble  = A.skipSpace *> A.double

-- | Convenience function to load from file and return a big list of tabular
-- hits.

fromFile :: FilePath -> IO [TabularHit]
fromFile fp = do
  i <- enumFile 8192 fp . joinI $ eneeTabularHit stream2stream
  run i