module Biobase.Infernal.VerboseHit.Import
( eneeVerboseHit
, fromFile
) where
import Control.Applicative
import Control.Monad as M
import Data.Attoparsec as A
import Data.Attoparsec.Char8 as A8
import Data.Attoparsec.Iteratee as EAP
import Data.ByteString.Char8 as BS
import Data.Either.Unwrap
import Data.Iteratee as I
import Data.Iteratee.Char as I
import Data.Iteratee.IO as I
import Data.Iteratee.Iteratee as I
import Data.Iteratee.ListLike as I
import Data.Tuple.Select
import Prelude as P
import Biobase.Infernal.Types
import Biobase.Infernal.VerboseHit
import Biobase.Infernal.VerboseHit.Internal
eneeVerboseHit :: (Functor m, Monad m) => Enumeratee BS.ByteString [VerboseHit] m a
eneeVerboseHit = enumLinesBS ><> I.filter (not . BS.null) ><> unfoldConvStream f (AliGo BS.empty BS.empty '?' []) where
f acc = do
h' <- tryHead
case h' of
Nothing -> return (acc, [])
(Just h)
| "##" `isPrefixOf` h -> return (acc{aliAnnotation = aliAnnotation acc ++ [BS.drop 2 h]},[])
| "CM: " `isPrefixOf` h -> return (acc{aliCM = BS.copy $ BS.drop 4 h, aliAnnotation = []}, [])
| ">" `isPrefixOf` h -> return (acc{aliScaffold = BS.copy $ BS.drop 1 h, aliAnnotation = []}, [])
| "Plus strand results" `isInfixOf` h -> return (acc{aliStrand = '+', aliAnnotation = []}, [])
| "Minus strand results" `isInfixOf` h -> return (acc{aliStrand = '-', aliAnnotation = []}, [])
| " Query" `isInfixOf` h -> do
x <- qs h (aliCM acc) (aliScaffold acc) (aliStrand acc) (aliAnnotation acc)
return (acc{aliAnnotation = []},x)
| otherwise -> return (acc,[])
qs :: Monad m => ByteString -> ByteString -> ByteString -> Char -> [ByteString] -> Iteratee [ByteString] m [VerboseHit]
qs query cm scaf pm anno = do
let q = fromRight . parseOnly qt $ query
s <- I.head >>= return . fromRight . parseOnly sepg
l <- fourLines $ sel4 q
return . pure $ VerboseHit
{ vhTarget = Scaffold scaf
, vhModel = ModelIdentification cm
, vhStrand = pm
, vhModelStart = sel1 q
, vhModelStop = sel2 q
, vhTargetStart = sel3 q
, vhTargetStop = sel4 q
, vhBitScore = BitScore $ sel1 s
, vhEvalue = sel2 s
, vhPvalue = sel3 s
, vhGCpercent = sel4 s
, vhWuss = cpy $ l!!0
, vhConsensus = cpy $ l!!1
, vhScoring = cpy $ l!!2
, vhSequence = cpy $ l!!3
, vhAnnotation = anno
}
where
cpy = BS.copy . BS.concat
qt = (,,,) <$ A.string " Query = " <*> A8.decimal <* A.string " - " <*> A8.decimal
<* A.string ", Target = " <*> A8.decimal <* A.string " - " <*> A8.decimal
<?> "qt"
sepg = (,,,) <$ A.string " Score = " <*> A8.double
<* A.string ", E = " <*> A8.double
<* A.string ", P = " <*> A8.double
<* A.string ", GC = " <* A8.skipSpace <*> A8.decimal
<?> "sepg"
fourLinesOld to = do
I.dropWhile BS.null
ls <- joinI $ I.take 4 stream2stream
let ws = BS.length . BS.takeWhile isSpace . P.head $ ls
let cs = BS.length . BS.dropWhile isSpace . P.head $ ls
let xs = P.map (BS.take cs . BS.drop ws) ls
if (P.length ls /= 4) ||
("-" == (P.last . BS.words . P.last $ ls)) ||
(to == (read . BS.unpack . P.last . BS.words . P.last $ ls))
then return . P.map (:[]) $ xs
else fourLines to >>= return . (P.zipWith (:) xs)
fourLines to = do
I.dropWhile BS.null
mp <- I.peek
case mp of
Nothing -> return $ P.replicate 4 []
Just p
| "//" `isInfixOf` p
|| "CM" `isInfixOf` p
|| "Query" `isInfixOf` p
|| ">" `isPrefixOf` p
|| "strand" `isInfixOf` p
|| "#" `isPrefixOf` p
-> return $ P.replicate 4 []
| otherwise
-> do ls <- joinI $ I.take 4 stream2stream
let ws = BS.length . BS.takeWhile isSpace . P.head $ ls
let cs = BS.length . BS.dropWhile isSpace . P.head $ ls
let xs = P.map (BS.take cs . BS.drop ws) ls
fourLines to >>= return . (P.zipWith (:) xs)
fromFile :: FilePath -> IO [VerboseHit]
fromFile fp = do
i <- enumFile 8192 fp . joinI $ eneeVerboseHit stream2list
run i