{-# LANGUAGE OverloadedStrings #-} -- Importing tabular hits is rather easy, as they are one entry per line. module Biobase.Infernal.TabularHit.Import where import Data.ByteString.Char8 as BS import Data.Iteratee as I import Data.Iteratee.Iteratee as I import Data.Iteratee.ListLike as I import Data.Iteratee.Char as I import Data.Either.Unwrap import Data.Attoparsec as A hiding (takeTill) import Data.Attoparsec.Char8 as A import Control.Applicative import Data.Iteratee.IO as I import Biobase.Infernal.TabularHit -- | Transform a stream into tabular hits. eneeTabularHit :: (Functor m, Monad m) => Enumeratee ByteString [TabularHit] m a eneeTabularHit = enumLinesBS ><> I.filter (\x -> not $ BS.null x || isPrefixOf "#" x) ><> mapStream f where f = fromRight . parseOnly p p = TabularHit <$> pString -- model name <*> pString -- target name <*> pDecimal -- target start <*> pDecimal -- target stop <*> pDecimal -- query start <*> pDecimal -- query stop <*> pDouble -- bit score <*> pDouble -- evalue <*> pDecimal -- gc content pString = A.skipSpace *> A.takeTill A.isSpace pDecimal = A.skipSpace *> A.decimal pDouble = A.skipSpace *> A.double -- | Convenience function to load from file and return a big list of tabular -- hits. fromFile :: FilePath -> IO [TabularHit] fromFile fp = do i <- enumFile 8192 fp . joinI $ eneeTabularHit stream2stream run i