{-# LANGUAGE OverloadedStrings #-} module Pipes.NextSeq where import Control.Applicative import Control.Monad import System.IO import qualified Data.ByteString as B import Data.ByteString (ByteString) import Data.ByteString.Unsafe import Data.List import Pipes import qualified Pipes.Prelude as P import Pipes.Illumina import System.FilePath import System.Directory data Cluster = Cluster { sq :: !ByteString , qual :: !ByteString , xCoord :: !Float , yCoord :: !Float , passedFilter :: !Bool , lane :: !Int , tile :: !Int } nextSeqProducer :: FilePath -> Producer Cluster IO () nextSeqProducer rundir = forM_ [1..4] proclane where proclane ln = do let lanestr = "L00" ++ show ln bcldir = joinPath [rundir, "Data", "Intensities", "BaseCalls", lanestr] allconts <- liftIO $ getDirectoryContents bcldir let bcls = sort $ filter (\q -> takeExtension q == ".bgzf") allconts locshdl <- liftIO $ openFile (joinPath [rundir, "Data", "Intensities", lanestr, "s_" ++ show ln ++ ".locs"]) ReadMode filthdl <- liftIO $ openFile (joinPath [bcldir, "s_" ++ show ln ++ ".filter"]) ReadMode bcihdl <- liftIO $ openFile (joinPath [bcldir, "s_" ++ show ln ++ ".bci"]) ReadMode bclhdls <- liftIO $ mapM (\r -> openFile (joinPath [bcldir, r]) ReadMode) bcls let bclprod = bclBgzfProducer bclhdls tileprod = for (bciProducer bcihdl) $ \(tnum, cnt) -> replicateM_ cnt $ yield tnum filtprod = filterProducer filthdl locsprod = locsProducer locshdl zipd = P.zip tileprod $ P.zip filtprod $ P.zip locsprod bclprod for zipd $ \(tnum, (pf, ((x,y), (sq, qu)))) -> yield $ Cluster sq qu x y pf ln tnum liftIO $ hClose locshdl liftIO $ hClose filthdl liftIO $ hClose bcihdl liftIO $ mapM_ hClose bclhdls