{-# LANGUAGE RankNTypes #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE OverloadedStrings #-} -- | Exports VerboseHit results back into text. As a likely scenario is a -- pipeline where hits are to be filtered out, this provides enumeratee's that -- handle additional annotations as required by the file format for CMs, -- scaffolds, and strand information. If you just need a way to show the data, -- use printVerboseHit. module Biobase.Infernal.VerboseHit.Export where {- ( eeFromVerboseHit ) where -} import Control.Monad.Trans.Class (lift) import qualified Data.ByteString.Char8 as BS import qualified Data.Enumerator as E import qualified Data.Enumerator.List as EL import Text.Printf import Biobase.Infernal.VerboseHit import Biobase.Infernal.VerboseHit.Internal -- | Takes a list of 'VerboseHit's and produces a list of bytestrings. Unlining -- those bytestrings produces a file that is \in essence\ an Infernal -- verbose-hit output file and should be parse-able by ours and other -- importers. -- -- TODO block length (for the alignment of query/sequenc) ?! -- -- TODO is there a more elegant treatment of the eof condition than asking at -- every verbose hit that is created? {- eeFromVerboseHit :: Monad m => E.Enumeratee VerboseHit BS.ByteString m a eeFromVerboseHit = goS (AliGo "" "" '?') where goS s (E.Continue k) = EL.head >>= go s where go s Nothing = return $ E.Continue k go s@AliGo{..} (Just l@VerboseHit{..}) = do eof <- E.isEOF let res = [ "\n//\n" | vhCM /= aliCM && aliCM /= "" ] ++ [ "\nCM: " `BS.append` vhCM `BS.append` "\n" | vhCM /= aliCM] ++ [ "\n>" `BS.append` vhSeqName `BS.append` "\n" | vhSeqName /= aliScaffold] ++ [ strand vhStrand | vhStrand /= aliStrand] ++ [ BS.pack $ printf " Query = %d - %d, Target = %d - %d" (fst vhQuery) (snd vhQuery) (fst vhTarget) (snd vhTarget) , BS.pack $ printf " Score = %.2f, E = %f, P = %.4e, GC = %d" vhScore vhEvalue vhPvalue vhGC , "" , ws11 `BS.append` vhWuss , (BS.pack $ printf "%10d " (fst vhQuery)) `BS.append` vhConsensus `BS.append` (BS.pack $ printf " %d" (snd vhQuery)) , ws11 `BS.append` vhScoring , (BS.pack $ printf "%10d " (fst vhTarget)) `BS.append` vhSequence `BS.append` (BS.pack $ printf " %d" (snd vhTarget)) ] ++ [ "\n//" | eof] newStep <- lift $ E.runIteratee $ k $ E.Chunks res goS (AliGo vhCM vhSequence vhStrand) newStep strand '+' = " Plus strand results:\n" strand '-' = " Minus strand results:\n" strand _ = " Unknown strand results:\n" ws11 = BS.pack $ replicate 11 ' ' goS _ step = return step -} -- | Convert a 'VerboseHit' to a string, ready for printing as in the input -- file. showVerboseHit :: VerboseHit -> BS.ByteString showVerboseHit VerboseHit{..} = BS.unlines [ BS.pack $ printf " Query = %d - %d, Target = %d - %d" (fst vhQuery) (snd vhQuery) (fst vhTarget) (snd vhTarget) , BS.pack $ printf " Score = %.2f, E = %f, P = %.4e, GC = %d" vhScore vhEvalue vhPvalue vhGC , "" , ws11 `BS.append` vhWuss , (BS.pack $ printf "%10d " (fst vhQuery)) `BS.append` vhConsensus `BS.append` (BS.pack $ printf " %d" (snd vhQuery)) , ws11 `BS.append` vhScoring , (BS.pack $ printf "%10d " (fst vhTarget)) `BS.append` vhSequence `BS.append` (BS.pack $ printf " %d" (snd vhTarget)) ] where ws11 = BS.pack $ replicate 11 ' ' -- | CM information, ready for printing. showCM :: BS.ByteString -> BS.ByteString showCM cm = "CM: " `BS.append` cm -- | Scaffold information showScaffold :: BS.ByteString -> BS.ByteString showScaffold sc = ">" `BS.append` sc -- | Strand information showStrand :: Char -> BS.ByteString showStrand = f where f '+' = " Plus strand results:" f '-' = " Minus strand results:" f _ = " Unknown strand results:" -- | Turning a list of 'VerboseHit's back into lines of characters is, in -- principle, not too hard. But just before we actually stream out, we might -- want to inject arbitrary data into the stream. This is done via -- 'StreamInsertion'. The other constructors merely wrap certain data. -- -- One way to, say, tag verbose hits is like this (note the output type of -- 'eeHitToStream'): -- -- > tag [s@(StreamVerboseHit _)] = [StreamInsertion (), s] -- > tag xs = xs data HitStream a = StreamVerboseHit {streamVerboseHit :: VerboseHit} | StreamCM {streamCM :: BS.ByteString} | StreamScaffold {streamScaffold :: BS.ByteString} | StreamStrand {streamStrand :: Char} | StreamInsertion {streamInsertion :: a} deriving (Show) -- | This enumeratee turns 'VerboseHit's into a 'HitStream'. Each VerboseHit -- can emit one or more elements, depending on if the CM, scaffold, or strand -- changes. -- -- TODO try to rewrite use Control.Monad.State eeHitToStream :: Monad m => E.Enumeratee VerboseHit [HitStream z] m a eeHitToStream = EL.mapAccum go (AliGo "" "" '?') where go AliGo{..} vh@VerboseHit{..} = (AliGo vhCM vhScaffold vhStrand, [StreamCM vhCM | aliCM /= vhCM] ++ [StreamScaffold vhScaffold | aliCM /= vhCM || aliScaffold /= vhScaffold] ++ [StreamStrand vhStrand | aliCM /= vhCM || aliScaffold /= vhScaffold || aliStrand /= vhStrand] ++ [StreamVerboseHit vh] ) -- | Flattens a stream from a list of lists to a single list. After this point, -- you probably want to insert elements into the stream, then flatten again. eeFlattenStream :: Monad m => E.Enumeratee [HitStream z] (HitStream z) m a eeFlattenStream = EL.concatMap id -- | If the 'HitStream' contains 'StreamInsertion's that are an instance of -- 'Show', this provides a default method to turn the stream into a bytestring. -- -- TODO add some newline characters for good measure -- -- TODO on end-of-stream, we should print out "//" eeStreamToByteString :: (Monad m, Show z) => StreamToByteString m z eeStreamToByteString = EL.map f where f StreamVerboseHit{..} = showVerboseHit streamVerboseHit `BS.snoc` '\n' f StreamCM{..} = showCM streamCM `BS.append` "\n\n" f StreamScaffold{..} = showScaffold streamScaffold `BS.append` "\n\n" f StreamStrand{..} = showStrand streamStrand `BS.append` "\n\n" f StreamInsertion{..} = BS.pack . show $ streamInsertion eeStreamToByteString' :: (Monad m) => StreamToByteString m () eeStreamToByteString' = eeStreamToByteString type StreamToByteString m z = forall a . E.Enumeratee (HitStream z) BS.ByteString m a