{-# LANGUAGE DoAndIfThenElse #-} {-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RecordWildCards #-} -- | Enumeratee that transforms a stream of 'ByteString's into a stream of -- 'VerboseHit's. module Biobase.Infernal.VerboseHit.Import ( eneeVerboseHit , fromFile ) where import Control.Applicative import Control.Monad as M import Data.Attoparsec as A import Data.Attoparsec.Char8 as A8 import Data.Attoparsec.Iteratee as EAP import Data.ByteString.Char8 as BS import Data.Either.Unwrap import Data.Iteratee as I import Data.Iteratee.Char as I import Data.Iteratee.IO as I import Data.Iteratee.Iteratee as I import Data.Iteratee.ListLike as I import Data.Tuple.Select import Prelude as P import Biobase.Infernal.Types import Biobase.Infernal.VerboseHit import Biobase.Infernal.VerboseHit.Internal -- | Transforms a stream into verbose hits. We need to keep a state in the -- accumulator to keep track of the current CM, scaffold and strand. eneeVerboseHit :: (Functor m, Monad m) => Enumeratee BS.ByteString [VerboseHit] m a eneeVerboseHit = enumLinesBS ><> I.filter (not . BS.null) ><> unfoldConvStream f (AliGo BS.empty BS.empty '?' []) where f acc = do h' <- tryHead case h' of Nothing -> return (acc, []) (Just h) | "##" `isPrefixOf` h -> return (acc{aliAnnotation = aliAnnotation acc ++ [BS.drop 2 h]},[]) | "CM: " `isPrefixOf` h -> return (acc{aliCM = BS.copy $ BS.drop 4 h, aliAnnotation = []}, []) | ">" `isPrefixOf` h -> return (acc{aliScaffold = BS.copy $ BS.drop 1 h, aliAnnotation = []}, []) | "Plus strand results" `isInfixOf` h -> return (acc{aliStrand = '+', aliAnnotation = []}, []) | "Minus strand results" `isInfixOf` h -> return (acc{aliStrand = '-', aliAnnotation = []}, []) | " Query" `isInfixOf` h -> do x <- qs h (aliCM acc) (aliScaffold acc) (aliStrand acc) (aliAnnotation acc) return (acc{aliAnnotation = []},x) | otherwise -> return (acc,[]) -- | Parses one CM query result. qs :: Monad m => ByteString -> ByteString -> ByteString -> Char -> [ByteString] -> Iteratee [ByteString] m [VerboseHit] qs query cm scaf pm anno = do let q = fromRight . parseOnly qt $ query s <- I.head >>= return . fromRight . parseOnly sepg l <- fourLines $ sel4 q return . pure $ VerboseHit { vhTarget = Scaffold scaf , vhModel = ModelIdentification cm , vhStrand = pm , vhModelStart = sel1 q , vhModelStop = sel2 q , vhTargetStart = sel3 q , vhTargetStop = sel4 q , vhBitScore = BitScore $ sel1 s , vhEvalue = sel2 s , vhPvalue = sel3 s , vhGCpercent = sel4 s , vhWuss = cpy $ l!!0 , vhConsensus = cpy $ l!!1 , vhScoring = cpy $ l!!2 , vhSequence = cpy $ l!!3 , vhAnnotation = anno } where cpy = BS.copy . BS.concat qt = (,,,) <$ A.string " Query = " <*> A8.decimal <* A.string " - " <*> A8.decimal <* A.string ", Target = " <*> A8.decimal <* A.string " - " <*> A8.decimal "qt" sepg = (,,,) <$ A.string " Score = " <*> A8.double <* A.string ", E = " <*> A8.double <* A.string ", P = " <*> A8.double <* A.string ", GC = " <* A8.skipSpace <*> A8.decimal "sepg" -- | Parses multiple four-line elements. fourLinesOld to = do I.dropWhile BS.null ls <- joinI $ I.take 4 stream2stream let ws = BS.length . BS.takeWhile isSpace . P.head $ ls let cs = BS.length . BS.dropWhile isSpace . P.head $ ls let xs = P.map (BS.take cs . BS.drop ws) ls if (P.length ls /= 4) || ("-" == (P.last . BS.words . P.last $ ls)) || (to == (read . BS.unpack . P.last . BS.words . P.last $ ls)) then return . P.map (:[]) $ xs else fourLines to >>= return . (P.zipWith (:) xs) fourLines to = do I.dropWhile BS.null mp <- I.peek case mp of Nothing -> return $ P.replicate 4 [] Just p | "//" `isInfixOf` p || "CM" `isInfixOf` p || "Query" `isInfixOf` p || ">" `isPrefixOf` p || "strand" `isInfixOf` p || "#" `isPrefixOf` p -> return $ P.replicate 4 [] | otherwise -> do ls <- joinI $ I.take 4 stream2stream let ws = BS.length . BS.takeWhile isSpace . P.head $ ls let cs = BS.length . BS.dropWhile isSpace . P.head $ ls let xs = P.map (BS.take cs . BS.drop ws) ls fourLines to >>= return . (P.zipWith (:) xs) -- | Convenience function: read all results into a single list. fromFile :: FilePath -> IO [VerboseHit] fromFile fp = do i <- enumFile 8192 fp . joinI $ eneeVerboseHit stream2list run i -- How to use this enumeratee. {- test = do i <- enumFile 8192 "test.vh" $ joinI $ eneeVerboseHit stream2list xs <- run i P.mapM_ (\x -> print x >> P.putStrLn "\n\n\n") xs print $ P.length xs -}