-- | Parsers for BAM and SAM.

module Bio.Bam.Reader (
    decodeBam,
    decodeBamFile,
    decodeBamFiles,
    IncompatibleRefs(..),

    decodePlainBam,
    decodePlainSam,
    getBamMeta,
    getBamRaw,
    getSamRec,

    concatInputs,
    mergeInputsOn,
    guardRefCompat,
    coordinates,
    qnames
                      ) where

import Bio.Bam.Header
import Bio.Bam.Rec
import Bio.Bam.Writer               ( packBam )
import Bio.Streaming
import Bio.Streaming.Bgzf           ( getBgzfHdr, bgunzip )
import Bio.Prelude
import Data.Attoparsec.ByteString   ( anyWord8 )

import qualified Data.Attoparsec.ByteString.Char8   as P
import qualified Data.ByteString                    as B
import qualified Data.ByteString.Char8              as C
import qualified Data.HashMap.Strict                as M
import qualified Data.Vector.Generic                as V
import qualified Data.Vector.Storable               as W
import qualified Data.Vector.Unboxed                as U
import qualified Bio.Streaming.Bytes                as S
import qualified Bio.Streaming.Parse                as S
import qualified Streaming.Prelude                  as Q

{- | Decodes either BAM or SAM.

The input can be plain, gzip'ed or bgzf'd and either BAM or SAM.  BAM
is reliably recognized, anything else is treated as SAM.  The offsets
stored in BAM records make sense only for uncompressed or bgzf'd BAM.
-}
decodeBam :: (MonadIO m, MonadLog m)
          => S.ByteStream m r
          -> m (BamMeta, Stream (Of BamRaw) m r)
decodeBam :: ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodeBam = ByteStream m r -> m (Maybe Int, ByteString, ByteStream m r)
forall (m :: * -> *) r.
Monad m =>
ByteStream m r -> m (Maybe Int, ByteString, ByteStream m r)
getBgzfHdr (ByteStream m r -> m (Maybe Int, ByteString, ByteStream m r))
-> ((Maybe Int, ByteString, ByteStream m r)
    -> m (BamMeta, Stream (Of BamRaw) m r))
-> ByteStream m r
-> m (BamMeta, Stream (Of BamRaw) m r)
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> Int -> ByteStream m r -> m (Of ByteString (ByteStream m r))
forall (m :: * -> *) r.
Monad m =>
Int -> ByteStream m r -> m (Of ByteString (ByteStream m r))
S.splitAt' 4 (ByteStream m r -> m (Of ByteString (ByteStream m r)))
-> ((Maybe Int, ByteString, ByteStream m r) -> ByteStream m r)
-> (Maybe Int, ByteString, ByteStream m r)
-> m (Of ByteString (ByteStream m r))
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Maybe Int, ByteString, ByteStream m r) -> ByteStream m r
forall (m :: * -> *) a r.
MonadIO m =>
(Maybe a, ByteString, ByteStream m r) -> ByteStream m r
pgunzip ((Maybe Int, ByteString, ByteStream m r)
 -> m (Of ByteString (ByteStream m r)))
-> (Of ByteString (ByteStream m r)
    -> m (BamMeta, Stream (Of BamRaw) m r))
-> (Maybe Int, ByteString, ByteStream m r)
-> m (BamMeta, Stream (Of BamRaw) m r)
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> Of ByteString (ByteStream m r)
-> m (BamMeta, Stream (Of BamRaw) m r)
forall (m :: * -> *) r.
(MonadLog m, MonadIO m) =>
Of ByteString (ByteStream m r)
-> m (BamMeta, Stream (Of BamRaw) m r)
unbam
  where
    unbam :: Of ByteString (ByteStream m r)
-> m (BamMeta, Stream (Of BamRaw) m r)
unbam ("BAM\SOH" :> s :: ByteStream m r
s) = ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
forall (m :: * -> *) r.
MonadLog m =>
ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodePlainBam ByteStream m r
s
    unbam (magic :: ByteString
magic     :> s :: ByteStream m r
s) = ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
forall (m :: * -> *) r.
(MonadLog m, MonadIO m) =>
ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodePlainSam (ByteString -> ByteStream m r -> ByteStream m r
forall (m :: * -> *) r.
ByteString -> ByteStream m r -> ByteStream m r
S.consChunk ByteString
magic ByteStream m r
s)

    pgunzip :: (Maybe a, ByteString, ByteStream m r) -> ByteStream m r
pgunzip (Nothing, hdr :: ByteString
hdr, s :: ByteStream m r
s) = ByteStream m r -> ByteStream m r
forall (m :: * -> *) r.
MonadIO m =>
ByteStream m r -> ByteStream m r
S.gunzip (ByteString -> ByteStream m r -> ByteStream m r
forall (m :: * -> *) r.
ByteString -> ByteStream m r -> ByteStream m r
S.consChunk ByteString
hdr ByteStream m r
s)
    pgunzip (Just _,  hdr :: ByteString
hdr, s :: ByteStream m r
s) =  ByteStream m r -> ByteStream m r
forall (m :: * -> *) r.
MonadIO m =>
ByteStream m r -> ByteStream m r
bgunzip (ByteString -> ByteStream m r -> ByteStream m r
forall (m :: * -> *) r.
ByteString -> ByteStream m r -> ByteStream m r
S.consChunk ByteString
hdr ByteStream m r
s)
{-# INLINE decodeBam #-}

decodeBamFile :: (MonadIO m, MonadLog m, MonadMask m) => FilePath -> (BamMeta -> Stream (Of BamRaw) m () -> m r) -> m r
decodeBamFile :: FilePath -> (BamMeta -> Stream (Of BamRaw) m () -> m r) -> m r
decodeBamFile f :: FilePath
f k :: BamMeta -> Stream (Of BamRaw) m () -> m r
k = FilePath -> (ByteStream m () -> m r) -> m r
forall (m :: * -> *) r.
(MonadIO m, MonadMask m) =>
FilePath -> (ByteStream m () -> m r) -> m r
streamFile FilePath
f ((ByteStream m () -> m r) -> m r)
-> (ByteStream m () -> m r) -> m r
forall a b. (a -> b) -> a -> b
$ ByteStream m () -> m (BamMeta, Stream (Of BamRaw) m ())
forall (m :: * -> *) r.
(MonadIO m, MonadLog m) =>
ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodeBam (ByteStream m () -> m (BamMeta, Stream (Of BamRaw) m ()))
-> ((BamMeta, Stream (Of BamRaw) m ()) -> m r)
-> ByteStream m ()
-> m r
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> (BamMeta -> Stream (Of BamRaw) m () -> m r)
-> (BamMeta, Stream (Of BamRaw) m ()) -> m r
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry BamMeta -> Stream (Of BamRaw) m () -> m r
k
{-# INLINE decodeBamFile #-}

{- | Reads multiple bam files.

A continuation is run on the list of headers and streams.  Since no
attempt is made to unify the headers, this will work for completely
unrelated bam files.  All files are opened at the same time, which might
run into the file descriptor limit given some ridiculous workflows.
-}
decodeBamFiles :: (MonadMask m, MonadLog m, MonadIO m) => [FilePath] -> ([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r
decodeBamFiles :: [FilePath] -> ([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r
decodeBamFiles [      ] k :: [(BamMeta, Stream (Of BamRaw) m ())] -> m r
k = [(BamMeta, Stream (Of BamRaw) m ())] -> m r
k []
decodeBamFiles ("-":fs :: [FilePath]
fs) k :: [(BamMeta, Stream (Of BamRaw) m ())] -> m r
k = ByteStream m () -> m (BamMeta, Stream (Of BamRaw) m ())
forall (m :: * -> *) r.
(MonadIO m, MonadLog m) =>
ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodeBam (Handle -> ByteStream m ()
forall (m :: * -> *). MonadIO m => Handle -> ByteStream m ()
streamHandle Handle
stdin)   m (BamMeta, Stream (Of BamRaw) m ())
-> ((BamMeta, Stream (Of BamRaw) m ()) -> m r) -> m r
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \b :: (BamMeta, Stream (Of BamRaw) m ())
b -> [FilePath] -> ([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r
forall (m :: * -> *) r.
(MonadMask m, MonadLog m, MonadIO m) =>
[FilePath] -> ([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r
decodeBamFiles [FilePath]
fs (([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r)
-> ([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r
forall a b. (a -> b) -> a -> b
$ \bs :: [(BamMeta, Stream (Of BamRaw) m ())]
bs -> [(BamMeta, Stream (Of BamRaw) m ())] -> m r
k ((BamMeta, Stream (Of BamRaw) m ())
b(BamMeta, Stream (Of BamRaw) m ())
-> [(BamMeta, Stream (Of BamRaw) m ())]
-> [(BamMeta, Stream (Of BamRaw) m ())]
forall a. a -> [a] -> [a]
:[(BamMeta, Stream (Of BamRaw) m ())]
bs)
decodeBamFiles ( f :: FilePath
f :fs :: [FilePath]
fs) k :: [(BamMeta, Stream (Of BamRaw) m ())] -> m r
k = FilePath -> (ByteStream m () -> m r) -> m r
forall (m :: * -> *) r.
(MonadIO m, MonadMask m) =>
FilePath -> (ByteStream m () -> m r) -> m r
streamFile FilePath
f ((ByteStream m () -> m r) -> m r)
-> (ByteStream m () -> m r) -> m r
forall a b. (a -> b) -> a -> b
$ \s :: ByteStream m ()
s -> ByteStream m () -> m (BamMeta, Stream (Of BamRaw) m ())
forall (m :: * -> *) r.
(MonadIO m, MonadLog m) =>
ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodeBam ByteStream m ()
s m (BamMeta, Stream (Of BamRaw) m ())
-> ((BamMeta, Stream (Of BamRaw) m ()) -> m r) -> m r
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \b :: (BamMeta, Stream (Of BamRaw) m ())
b -> [FilePath] -> ([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r
forall (m :: * -> *) r.
(MonadMask m, MonadLog m, MonadIO m) =>
[FilePath] -> ([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r
decodeBamFiles [FilePath]
fs (([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r)
-> ([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r
forall a b. (a -> b) -> a -> b
$ \bs :: [(BamMeta, Stream (Of BamRaw) m ())]
bs -> [(BamMeta, Stream (Of BamRaw) m ())] -> m r
k ((BamMeta, Stream (Of BamRaw) m ())
b(BamMeta, Stream (Of BamRaw) m ())
-> [(BamMeta, Stream (Of BamRaw) m ())]
-> [(BamMeta, Stream (Of BamRaw) m ())]
forall a. a -> [a] -> [a]
:[(BamMeta, Stream (Of BamRaw) m ())]
bs)
{-# INLINE decodeBamFiles #-}

decodePlainBam :: MonadLog m => S.ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodePlainBam :: ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodePlainBam =
    (Int64 -> Parser r m BamMeta)
-> ByteStream m r
-> m (Either
        (SomeException, ByteStream m r)
        (Either r (BamMeta, ByteStream m r)))
forall (m :: * -> *) r a.
Monad m =>
(Int64 -> Parser r m a)
-> ByteStream m r
-> m (Either
        (SomeException, ByteStream m r) (Either r (a, ByteStream m r)))
S.parse (Parser r m BamMeta -> Int64 -> Parser r m BamMeta
forall a b. a -> b -> a
const Parser r m BamMeta
forall (m :: * -> *) r. Monad m => Parser r m BamMeta
getBamMeta) (ByteStream m r
 -> m (Either
         (SomeException, ByteStream m r)
         (Either r (BamMeta, ByteStream m r))))
-> (Either
      (SomeException, ByteStream m r)
      (Either r (BamMeta, ByteStream m r))
    -> m (BamMeta, Stream (Of BamRaw) m r))
-> ByteStream m r
-> m (BamMeta, Stream (Of BamRaw) m r)
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> \case
        Left (exception :: SomeException
exception, rest :: ByteStream m r
rest) -> Level -> SomeException -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Error SomeException
exception m () -> m r -> m r
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ByteStream m r -> m r
forall (m :: * -> *) r. Monad m => ByteStream m r -> m r
S.effects ByteStream m r
rest m r
-> (r -> m (BamMeta, Stream (Of BamRaw) m r))
-> m (BamMeta, Stream (Of BamRaw) m r)
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \r :: r
r -> (BamMeta, Stream (Of BamRaw) m r)
-> m (BamMeta, Stream (Of BamRaw) m r)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (BamMeta
forall a. Monoid a => a
mempty, r -> Stream (Of BamRaw) m r
forall (f :: * -> *) a. Applicative f => a -> f a
pure r
r)
        Right (Left r :: r
r)         -> Level -> EofException -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Error EofException
S.EofException m ()
-> m (BamMeta, Stream (Of BamRaw) m r)
-> m (BamMeta, Stream (Of BamRaw) m r)
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (BamMeta, Stream (Of BamRaw) m r)
-> m (BamMeta, Stream (Of BamRaw) m r)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (BamMeta
forall a. Monoid a => a
mempty, r -> Stream (Of BamRaw) m r
forall (f :: * -> *) a. Applicative f => a -> f a
pure r
r)
        Right (Right (h :: BamMeta
h,s :: ByteStream m r
s))    -> (BamMeta, Stream (Of BamRaw) m r)
-> m (BamMeta, Stream (Of BamRaw) m r)
forall (m :: * -> *) a. Monad m => a -> m a
return (BamMeta
h, (ByteStream m r -> m (Either r (BamRaw, ByteStream m r)))
-> ByteStream m r -> Stream (Of BamRaw) m r
forall (m :: * -> *) s r a.
Monad m =>
(s -> m (Either r (a, s))) -> s -> Stream (Of a) m r
Q.unfoldr (Level
-> (Int64 -> Parser r m BamRaw)
-> ByteStream m r
-> m (Either r (BamRaw, ByteStream m r))
forall (m :: * -> *) r a.
MonadLog m =>
Level
-> (Int64 -> Parser r m a)
-> ByteStream m r
-> m (Either r (a, ByteStream m r))
S.parseLog Level
Error Int64 -> Parser r m BamRaw
forall (m :: * -> *) r. Monad m => Int64 -> Parser r m BamRaw
getBamRaw) ByteStream m r
s)

getBamMeta :: Monad m => S.Parser r m BamMeta
getBamMeta :: Parser r m BamMeta
getBamMeta = (BamMeta -> Vector BamSQ -> BamMeta)
-> Parser r m BamMeta
-> Parser r m (Vector BamSQ)
-> Parser r m BamMeta
forall (m :: * -> *) a1 a2 r.
Monad m =>
(a1 -> a2 -> r) -> m a1 -> m a2 -> m r
liftM2 BamMeta -> Vector BamSQ -> BamMeta
mmerge Parser r m BamMeta
forall r. Parser r m BamMeta
get_bam_header Parser r m (Vector BamSQ)
forall r. Parser r m (Vector BamSQ)
get_ref_array
  where
    get_bam_header :: Parser r m BamMeta
get_bam_header  = do Word32
hdr_len <- Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
S.getWord32
                         Int -> Parser (ByteStream m r) m BamMeta -> Parser r m BamMeta
forall (m :: * -> *) r a.
Monad m =>
Int -> Parser (ByteStream m r) m a -> Parser r m a
S.isolate (Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
hdr_len) (Parser BamMeta -> Parser (ByteStream m r) m BamMeta
forall (m :: * -> *) a r. Monad m => Parser a -> Parser r m a
S.atto Parser BamMeta
parseBamMeta)

    get_ref_array :: Parser r m (Vector BamSQ)
get_ref_array = do Word32
nref <- Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
S.getWord32
                       [BamSQ] -> Vector BamSQ
forall (v :: * -> *) a. Vector v a => [a] -> v a
V.fromList ([BamSQ] -> Vector BamSQ)
-> Parser r m [BamSQ] -> Parser r m (Vector BamSQ)
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Int -> Parser r m BamSQ -> Parser r m [BamSQ]
forall (m :: * -> *) a. Applicative m => Int -> m a -> m [a]
replicateM (Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
nref)
                            (do ByteString
nm <- Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
S.getWord32 Parser r m Word32
-> (Word32 -> Parser r m ByteString) -> Parser r m ByteString
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Int -> Parser r m ByteString
forall (m :: * -> *) r. Monad m => Int -> Parser r m ByteString
S.getString (Int -> Parser r m ByteString)
-> (Word32 -> Int) -> Word32 -> Parser r m ByteString
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral
                                Word32
ln <- Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
S.getWord32
                                BamSQ -> Parser r m BamSQ
forall (m :: * -> *) a. Monad m => a -> m a
return (BamSQ -> Parser r m BamSQ) -> BamSQ -> Parser r m BamSQ
forall a b. (a -> b) -> a -> b
$! ByteString -> Int -> BamOtherShit -> BamSQ
BamSQ (ByteString -> ByteString
C.init ByteString
nm) (Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral Word32
ln) [])

    -- Need to merge information from header into actual reference list.
    -- The latter is the authoritative source for the *order* of the
    -- sequences, so leftovers from the header are discarded.  Merging
    -- is by name.  So we merge information from the header into the
    -- list, then replace the header information.
    mmerge :: BamMeta -> Vector BamSQ -> BamMeta
mmerge meta :: BamMeta
meta refs :: Vector BamSQ
refs =
        let tbl :: HashMap ByteString BamSQ
tbl = [(ByteString, BamSQ)] -> HashMap ByteString BamSQ
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList [ (BamSQ -> ByteString
sq_name BamSQ
sq, BamSQ
sq) | BamSQ
sq <- Vector BamSQ -> [BamSQ]
forall (v :: * -> *) a. Vector v a => v a -> [a]
V.toList (Refs -> Vector BamSQ
unRefs (BamMeta -> Refs
meta_refs BamMeta
meta)) ]
        in BamMeta
meta { meta_refs :: Refs
meta_refs = Vector BamSQ -> Refs
Refs (Vector BamSQ -> Refs) -> Vector BamSQ -> Refs
forall a b. (a -> b) -> a -> b
$ (BamSQ -> BamSQ) -> Vector BamSQ -> Vector BamSQ
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\s :: BamSQ
s -> BamSQ -> (BamSQ -> BamSQ) -> Maybe BamSQ -> BamSQ
forall b a. b -> (a -> b) -> Maybe a -> b
maybe BamSQ
s (BamSQ -> BamSQ -> BamSQ
mmerge' BamSQ
s) (ByteString -> HashMap ByteString BamSQ -> Maybe BamSQ
forall k v. (Eq k, Hashable k) => k -> HashMap k v -> Maybe v
M.lookup (BamSQ -> ByteString
sq_name BamSQ
s) HashMap ByteString BamSQ
tbl)) Vector BamSQ
refs }

    mmerge' :: BamSQ -> BamSQ -> BamSQ
mmerge' l :: BamSQ
l r :: BamSQ
r | BamSQ -> Int
sq_length BamSQ
l Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== BamSQ -> Int
sq_length BamSQ
r = BamSQ
l { sq_other_shit :: BamOtherShit
sq_other_shit = BamSQ -> BamOtherShit
sq_other_shit BamSQ
l BamOtherShit -> BamOtherShit -> BamOtherShit
forall a. [a] -> [a] -> [a]
++ BamSQ -> BamOtherShit
sq_other_shit BamSQ
r }
                | Bool
otherwise                  = BamSQ
l -- contradiction in header, but we'll just ignore it
{-# INLINABLE getBamMeta #-}

getBamRaw :: Monad m => Int64 -> S.Parser r m BamRaw
getBamRaw :: Int64 -> Parser r m BamRaw
getBamRaw o :: Int64
o = do
        Int
bsize <- Word32 -> Int
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Word32 -> Int) -> Parser r m Word32 -> Parser r m Int
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` Parser r m Word32
forall (m :: * -> *) r. Monad m => Parser r m Word32
S.getWord32
        ByteString
s <- Int -> Parser r m ByteString
forall (m :: * -> *) r. Monad m => Int -> Parser r m ByteString
S.getString Int
bsize
        Bool -> Parser r m () -> Parser r m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (ByteString -> Int
B.length ByteString
s Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Int
bsize) Parser r m ()
forall (m :: * -> *) r a. Monad m => Parser r m a
S.abortParse
        Int64 -> ByteString -> Parser r m BamRaw
forall (m :: * -> *).
MonadThrow m =>
Int64 -> ByteString -> m BamRaw
bamRaw Int64
o ByteString
s
{-# INLINABLE getBamRaw #-}

{- | Streaming parser for SAM files.

It parses plain uncompressed SAM and returns a result compatible with
'decodePlainBam'.  Since it is supposed to work the same way as the BAM
parser, it requires a symbol table for the reference names.  This is
extracted from the @SQ lines in the header.  Note that reading SAM tends
to be inefficient; if you care about performance at all, use BAM.  -}

decodePlainSam :: (MonadLog m, MonadIO m) => S.ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodePlainSam :: ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodePlainSam s :: ByteStream m r
s = do
    (hdr :: BamMeta
hdr,rest :: ByteStream m r
rest) <- (r -> (BamMeta, ByteStream m r))
-> ((BamMeta, ByteStream m r) -> (BamMeta, ByteStream m r))
-> Either r (BamMeta, ByteStream m r)
-> (BamMeta, ByteStream m r)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (\r :: r
r -> (BamMeta
forall a. Monoid a => a
mempty, r -> ByteStream m r
forall (f :: * -> *) a. Applicative f => a -> f a
pure r
r)) (BamMeta, ByteStream m r) -> (BamMeta, ByteStream m r)
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id (Either r (BamMeta, ByteStream m r) -> (BamMeta, ByteStream m r))
-> m (Either r (BamMeta, ByteStream m r))
-> m (BamMeta, ByteStream m r)
forall (m :: * -> *) a1 r. Monad m => (a1 -> r) -> m a1 -> m r
`liftM` (Int64 -> Parser r m BamMeta)
-> ByteStream m r -> m (Either r (BamMeta, ByteStream m r))
forall (m :: * -> *) r a.
MonadIO m =>
(Int64 -> Parser r m a)
-> ByteStream m r -> m (Either r (a, ByteStream m r))
S.parseIO (Parser r m BamMeta -> Int64 -> Parser r m BamMeta
forall a b. a -> b -> a
const (Parser r m BamMeta -> Int64 -> Parser r m BamMeta)
-> Parser r m BamMeta -> Int64 -> Parser r m BamMeta
forall a b. (a -> b) -> a -> b
$ Parser BamMeta -> Parser r m BamMeta
forall (m :: * -> *) a r. Monad m => Parser a -> Parser r m a
S.atto Parser BamMeta
parseBamMeta) ByteStream m r
s
    let !refs :: HashMap ByteString Refseq
refs  = [(ByteString, Refseq)] -> HashMap ByteString Refseq
forall k v. (Eq k, Hashable k) => [(k, v)] -> HashMap k v
M.fromList ([(ByteString, Refseq)] -> HashMap ByteString Refseq)
-> [(ByteString, Refseq)] -> HashMap ByteString Refseq
forall a b. (a -> b) -> a -> b
$ [ByteString] -> [Refseq] -> [(ByteString, Refseq)]
forall a b. [a] -> [b] -> [(a, b)]
zip [ ByteString
nm | BamSQ { sq_name :: BamSQ -> ByteString
sq_name = ByteString
nm } <- Vector BamSQ -> [BamSQ]
forall (v :: * -> *) a. Vector v a => v a -> [a]
V.toList (Refs -> Vector BamSQ
unRefs (BamMeta -> Refs
meta_refs BamMeta
hdr))] [Int -> Refseq
forall a. Enum a => Int -> a
toEnum 0..]
        ref :: ByteString -> Refseq
ref  x :: ByteString
x = Refseq -> ByteString -> HashMap ByteString Refseq -> Refseq
forall k v. (Eq k, Hashable k) => v -> k -> HashMap k v -> v
M.lookupDefault Refseq
invalidRefseq ByteString
x HashMap ByteString Refseq
refs
        report :: ParseError -> m (Maybe a)
report = (() -> Maybe a) -> m () -> m (Maybe a)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Maybe a -> () -> Maybe a
forall a b. a -> b -> a
const Maybe a
forall a. Maybe a
Nothing) (m () -> m (Maybe a))
-> (ParseError -> m ()) -> ParseError -> m (Maybe a)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Level -> ParseError -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Error
        use :: BamRec -> m (Maybe BamRaw)
use    = (BamRaw -> Maybe BamRaw) -> m BamRaw -> m (Maybe BamRaw)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap BamRaw -> Maybe BamRaw
forall a. a -> Maybe a
Just (m BamRaw -> m (Maybe BamRaw))
-> (BamRec -> m BamRaw) -> BamRec -> m (Maybe BamRaw)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. IO BamRaw -> m BamRaw
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO BamRaw -> m BamRaw)
-> (BamRec -> IO BamRaw) -> BamRec -> m BamRaw
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> IO BamRaw
packBam
        strm :: Stream (Of BamRaw) m r
strm   = Stream (Of (Maybe BamRaw)) m r -> Stream (Of BamRaw) m r
forall (m :: * -> *) (f :: * -> *) a r.
(Monad m, Foldable f) =>
Stream (Of (f a)) m r -> Stream (Of a) m r
Q.concat (Stream (Of (Maybe BamRaw)) m r -> Stream (Of BamRaw) m r)
-> (Stream (Of ByteString) m r -> Stream (Of (Maybe BamRaw)) m r)
-> Stream (Of ByteString) m r
-> Stream (Of BamRaw) m r
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (ByteString -> m (Maybe BamRaw))
-> Stream (Of ByteString) m r -> Stream (Of (Maybe BamRaw)) m r
forall (m :: * -> *) a b r.
Monad m =>
(a -> m b) -> Stream (Of a) m r -> Stream (Of b) m r
Q.mapM ((ParseError -> m (Maybe BamRaw))
-> (BamRec -> m (Maybe BamRaw))
-> Either ParseError BamRec
-> m (Maybe BamRaw)
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either ParseError -> m (Maybe BamRaw)
forall a. ParseError -> m (Maybe a)
report BamRec -> m (Maybe BamRaw)
use (Either ParseError BamRec -> m (Maybe BamRaw))
-> (ByteString -> m (Either ParseError BamRec))
-> ByteString
-> m (Maybe BamRaw)
forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< (ByteString -> Refseq)
-> ByteString -> m (Either ParseError BamRec)
forall (m :: * -> *).
MonadLog m =>
(ByteString -> Refseq)
-> ByteString -> m (Either ParseError BamRec)
getSamRec ByteString -> Refseq
ref) (Stream (Of ByteString) m r -> Stream (Of BamRaw) m r)
-> Stream (Of ByteString) m r -> Stream (Of BamRaw) m r
forall a b. (a -> b) -> a -> b
$ ByteStream m r -> Stream (Of ByteString) m r
forall (m :: * -> *) r.
Monad m =>
ByteStream m r -> Stream (Of ByteString) m r
S.lines' ByteStream m r
rest
    (BamMeta, Stream (Of BamRaw) m r)
-> m (BamMeta, Stream (Of BamRaw) m r)
forall (m :: * -> *) a. Monad m => a -> m a
return (BamMeta
hdr, Stream (Of BamRaw) m r
strm)


getSamRec :: MonadLog m => (Bytes -> Refseq) -> Bytes -> m (Either S.ParseError BamRec)
getSamRec :: (ByteString -> Refseq)
-> ByteString -> m (Either ParseError BamRec)
getSamRec ref :: ByteString -> Refseq
ref s :: ByteString
s = case Parser BamRec -> ByteString -> Either FilePath BamRec
forall a. Parser a -> ByteString -> Either FilePath a
P.parseOnly Parser BamRec
record ByteString
s of
    Left  e :: FilePath
e                                         -> Either ParseError BamRec -> m (Either ParseError BamRec)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either ParseError BamRec -> m (Either ParseError BamRec))
-> (ParseError -> Either ParseError BamRec)
-> ParseError
-> m (Either ParseError BamRec)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ParseError -> Either ParseError BamRec
forall a b. a -> Either a b
Left (ParseError -> m (Either ParseError BamRec))
-> ParseError -> m (Either ParseError BamRec)
forall a b. (a -> b) -> a -> b
$ [FilePath] -> FilePath -> ParseError
S.ParseError [ByteString -> FilePath
forall s. Unpack s => s -> FilePath
unpack ByteString
s] FilePath
e
    Right b :: BamRec
b -> case BamRec -> Maybe (Vector Qual)
b_qual BamRec
b of
        Nothing                                     -> Either ParseError BamRec -> m (Either ParseError BamRec)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either ParseError BamRec -> m (Either ParseError BamRec))
-> Either ParseError BamRec -> m (Either ParseError BamRec)
forall a b. (a -> b) -> a -> b
$ BamRec -> Either ParseError BamRec
forall a b. b -> Either a b
Right BamRec
b
        Just qs :: Vector Qual
qs | Vector Qual -> Int
forall a. Storable a => Vector a -> Int
W.length Vector Qual
qs Int -> Int -> Bool
forall a. Eq a => a -> a -> Bool
== Vector_Nucs_half Nucleotides -> Int
forall (v :: * -> *) a. Vector v a => v a -> Int
V.length (BamRec -> Vector_Nucs_half Nucleotides
b_seq BamRec
b) -> Either ParseError BamRec -> m (Either ParseError BamRec)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either ParseError BamRec -> m (Either ParseError BamRec))
-> Either ParseError BamRec -> m (Either ParseError BamRec)
forall a b. (a -> b) -> a -> b
$ BamRec -> Either ParseError BamRec
forall a b. b -> Either a b
Right BamRec
b
                | Bool
otherwise                         -> do Level -> LengthMismatch -> m ()
forall (m :: * -> *) e.
(MonadLog m, Exception e) =>
Level -> e -> m ()
logMsg Level
Warning (LengthMismatch -> m ()) -> LengthMismatch -> m ()
forall a b. (a -> b) -> a -> b
$ ByteString -> LengthMismatch
LengthMismatch (BamRec -> ByteString
b_qname BamRec
b)
                                                          Either ParseError BamRec -> m (Either ParseError BamRec)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Either ParseError BamRec -> m (Either ParseError BamRec))
-> (BamRec -> Either ParseError BamRec)
-> BamRec
-> m (Either ParseError BamRec)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRec -> Either ParseError BamRec
forall a b. b -> Either a b
Right (BamRec -> m (Either ParseError BamRec))
-> BamRec -> m (Either ParseError BamRec)
forall a b. (a -> b) -> a -> b
$ BamRec
b { b_qual :: Maybe (Vector Qual)
b_qual = Maybe (Vector Qual)
forall a. Maybe a
Nothing }
  where
    record :: Parser BamRec
record = do ByteString
b_qname <- Parser ByteString ByteString
word
                Int
b_flag  <- Parser ByteString Int
num
                Refseq
b_rname <- ByteString -> Refseq
ref (ByteString -> Refseq)
-> Parser ByteString ByteString -> Parser ByteString Refseq
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString ByteString
word
                Int
b_pos   <- Int -> Int -> Int
forall a. Num a => a -> a -> a
subtract 1 (Int -> Int) -> Parser ByteString Int -> Parser ByteString Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Int
num
                Qual
b_mapq  <- Word8 -> Qual
Q (Word8 -> Qual)
-> Parser ByteString Word8 -> Parser ByteString Qual
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Word8
num'
                Vector Cigar
b_cigar <- [Cigar] -> Vector Cigar
forall a. Storable a => [a] -> Vector a
W.fromList ([Cigar] -> Vector Cigar)
-> Parser ByteString [Cigar] -> Parser ByteString (Vector Cigar)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString [Cigar]
cigar
                Refseq
b_mrnm  <- Parser ByteString (Refseq -> Refseq)
rnext Parser ByteString (Refseq -> Refseq)
-> Parser ByteString Refseq -> Parser ByteString Refseq
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Refseq -> Parser ByteString Refseq
forall (f :: * -> *) a. Applicative f => a -> f a
pure Refseq
b_rname
                Int
b_mpos  <- Int -> Int -> Int
forall a. Num a => a -> a -> a
subtract 1 (Int -> Int) -> Parser ByteString Int -> Parser ByteString Int
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Int
num
                Int
b_isize <- Parser ByteString Int
snum
                Vector_Nucs_half Nucleotides
b_seq   <- Parser ByteString (Vector_Nucs_half Nucleotides)
sequ
                Maybe (Vector Qual)
b_qual  <- Parser ByteString (Maybe (Vector Qual))
quals
                [(BamKey, Ext)]
b_exts  <- Parser ByteString [(BamKey, Ext)]
exts
                let b_virtual_offset :: Int64
b_virtual_offset = 0
                BamRec -> Parser BamRec
forall (m :: * -> *) a. Monad m => a -> m a
return BamRec :: ByteString
-> Int
-> Refseq
-> Int
-> Qual
-> Vector Cigar
-> Refseq
-> Int
-> Int
-> Vector_Nucs_half Nucleotides
-> Maybe (Vector Qual)
-> [(BamKey, Ext)]
-> Int64
-> BamRec
BamRec{..}

    sep :: Parser ByteString ()
sep      = Parser ByteString ()
forall t. Chunk t => Parser t ()
P.endOfInput Parser ByteString ()
-> Parser ByteString () -> Parser ByteString ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> () () -> Parser ByteString Char -> Parser ByteString ()
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ByteString Char
P.char '\t'
    word :: Parser ByteString ByteString
word     = (Char -> Bool) -> Parser ByteString ByteString
P.takeTill ('\t' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==) Parser ByteString ByteString
-> Parser ByteString () -> Parser ByteString ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
sep
    num :: Parser ByteString Int
num      = Parser ByteString Int
forall a. Integral a => Parser a
P.decimal Parser ByteString Int
-> Parser ByteString () -> Parser ByteString Int
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
sep
    num' :: Parser ByteString Word8
num'     = Parser ByteString Word8
forall a. Integral a => Parser a
P.decimal Parser ByteString Word8
-> Parser ByteString () -> Parser ByteString Word8
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
sep
    snum :: Parser ByteString Int
snum     = Parser ByteString Int -> Parser ByteString Int
forall a. Num a => Parser a -> Parser a
P.signed Parser ByteString Int
forall a. Integral a => Parser a
P.decimal Parser ByteString Int
-> Parser ByteString () -> Parser ByteString Int
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
sep

    rnext :: Parser ByteString (Refseq -> Refseq)
rnext    = Refseq -> Refseq
forall k (cat :: k -> k -> *) (a :: k). Category cat => cat a a
id (Refseq -> Refseq)
-> Parser ByteString Char -> Parser ByteString (Refseq -> Refseq)
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ByteString Char
P.char '=' Parser ByteString (Refseq -> Refseq)
-> Parser ByteString () -> Parser ByteString (Refseq -> Refseq)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
sep Parser ByteString (Refseq -> Refseq)
-> Parser ByteString (Refseq -> Refseq)
-> Parser ByteString (Refseq -> Refseq)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Refseq -> Refseq -> Refseq
forall a b. a -> b -> a
const (Refseq -> Refseq -> Refseq)
-> (ByteString -> Refseq) -> ByteString -> Refseq -> Refseq
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> Refseq
ref (ByteString -> Refseq -> Refseq)
-> Parser ByteString ByteString
-> Parser ByteString (Refseq -> Refseq)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString ByteString
word
    sequ :: Parser ByteString (Vector_Nucs_half Nucleotides)
sequ     = (Vector_Nucs_half Nucleotides
forall (v :: * -> *) a. Vector v a => v a
V.empty Vector_Nucs_half Nucleotides
-> Parser ByteString Char
-> Parser ByteString (Vector_Nucs_half Nucleotides)
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ByteString Char
P.char '*' Parser ByteString (Vector_Nucs_half Nucleotides)
-> Parser ByteString (Vector_Nucs_half Nucleotides)
-> Parser ByteString (Vector_Nucs_half Nucleotides)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
               [Nucleotides] -> Vector_Nucs_half Nucleotides
forall (v :: * -> *) a. Vector v a => [a] -> v a
V.fromList ([Nucleotides] -> Vector_Nucs_half Nucleotides)
-> (ByteString -> [Nucleotides])
-> ByteString
-> Vector_Nucs_half Nucleotides
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Word8 -> Nucleotides) -> [Word8] -> [Nucleotides]
forall a b. (a -> b) -> [a] -> [b]
map Word8 -> Nucleotides
toNucleotides ([Word8] -> [Nucleotides])
-> (ByteString -> [Word8]) -> ByteString -> [Nucleotides]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> [Word8]
B.unpack (ByteString -> Vector_Nucs_half Nucleotides)
-> Parser ByteString ByteString
-> Parser ByteString (Vector_Nucs_half Nucleotides)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString ByteString
P.takeWhile Char -> Bool
is_nuc) Parser ByteString (Vector_Nucs_half Nucleotides)
-> Parser ByteString ()
-> Parser ByteString (Vector_Nucs_half Nucleotides)
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
sep

    quals :: Parser ByteString (Maybe (Vector Qual))
quals    = Maybe (Vector Qual)
forall a. Maybe a
Nothing Maybe (Vector Qual)
-> Parser ByteString Char
-> Parser ByteString (Maybe (Vector Qual))
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ByteString Char
P.char '*' Parser ByteString (Maybe (Vector Qual))
-> Parser ByteString () -> Parser ByteString (Maybe (Vector Qual))
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
sep Parser ByteString (Maybe (Vector Qual))
-> Parser ByteString (Maybe (Vector Qual))
-> Parser ByteString (Maybe (Vector Qual))
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> ByteString -> Maybe (Vector Qual)
bsToVec (ByteString -> Maybe (Vector Qual))
-> Parser ByteString ByteString
-> Parser ByteString (Maybe (Vector Qual))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString ByteString
word
        where
            bsToVec :: ByteString -> Maybe (Vector Qual)
bsToVec = Vector Qual -> Maybe (Vector Qual)
forall a. a -> Maybe a
Just (Vector Qual -> Maybe (Vector Qual))
-> (ByteString -> Vector Qual) -> ByteString -> Maybe (Vector Qual)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. [Qual] -> Vector Qual
forall a. Storable a => [a] -> Vector a
W.fromList ([Qual] -> Vector Qual)
-> (ByteString -> [Qual]) -> ByteString -> Vector Qual
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. (Word8 -> Qual) -> [Word8] -> [Qual]
forall a b. (a -> b) -> [a] -> [b]
map (Word8 -> Qual
Q (Word8 -> Qual) -> (Word8 -> Word8) -> Word8 -> Qual
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Word8 -> Word8 -> Word8
forall a. Num a => a -> a -> a
subtract 33) ([Word8] -> [Qual])
-> (ByteString -> [Word8]) -> ByteString -> [Qual]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> [Word8]
B.unpack

    cigar :: Parser ByteString [Cigar]
cigar    = [] [Cigar] -> Parser ByteString Char -> Parser ByteString [Cigar]
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ByteString Char
P.char '*' Parser ByteString [Cigar]
-> Parser ByteString () -> Parser ByteString [Cigar]
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ByteString ()
sep Parser ByteString [Cigar]
-> Parser ByteString [Cigar] -> Parser ByteString [Cigar]
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
               Parser ByteString Cigar
-> Parser ByteString () -> Parser ByteString [Cigar]
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
P.manyTill ((CigOp -> Int -> Cigar) -> Int -> CigOp -> Cigar
forall a b c. (a -> b -> c) -> b -> a -> c
flip CigOp -> Int -> Cigar
(:*) (Int -> CigOp -> Cigar)
-> Parser ByteString Int -> Parser ByteString (CigOp -> Cigar)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Int
forall a. Integral a => Parser a
P.decimal Parser ByteString (CigOp -> Cigar)
-> Parser ByteString CigOp -> Parser ByteString Cigar
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString CigOp
cigop) Parser ByteString ()
sep

    cigop :: Parser ByteString CigOp
cigop    = [Parser ByteString CigOp] -> Parser ByteString CigOp
forall (f :: * -> *) a. Alternative f => [f a] -> f a
P.choice ([Parser ByteString CigOp] -> Parser ByteString CigOp)
-> [Parser ByteString CigOp] -> Parser ByteString CigOp
forall a b. (a -> b) -> a -> b
$ (Char -> CigOp -> Parser ByteString CigOp)
-> FilePath -> [CigOp] -> [Parser ByteString CigOp]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\c :: Char
c r :: CigOp
r -> CigOp
r CigOp -> Parser ByteString Char -> Parser ByteString CigOp
forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ Char -> Parser ByteString Char
P.char Char
c) "MIDNSHP" [CigOp
Mat,CigOp
Ins,CigOp
Del,CigOp
Nop,CigOp
SMa,CigOp
HMa,CigOp
Pad]
    exts :: Parser ByteString [(BamKey, Ext)]
exts     = Parser ByteString (BamKey, Ext)
ext Parser ByteString (BamKey, Ext)
-> Parser ByteString () -> Parser ByteString [(BamKey, Ext)]
forall (f :: * -> *) a b. Alternative f => f a -> f b -> f [a]
`P.sepBy` Parser ByteString ()
sep
    ext :: Parser ByteString (BamKey, Ext)
ext      = (\a :: Char
a b :: Char
b v :: Ext
v -> (FilePath -> BamKey
forall a. IsString a => FilePath -> a
fromString [Char
a,Char
b],Ext
v)) (Char -> Char -> Ext -> (BamKey, Ext))
-> Parser ByteString Char
-> Parser ByteString (Char -> Ext -> (BamKey, Ext))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Char
P.anyChar Parser ByteString (Char -> Ext -> (BamKey, Ext))
-> Parser ByteString Char
-> Parser ByteString (Ext -> (BamKey, Ext))
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser ByteString Char
P.anyChar Parser ByteString (Ext -> (BamKey, Ext))
-> Parser ByteString Ext -> Parser ByteString (BamKey, Ext)
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> (Char -> Parser ByteString Char
P.char ':' Parser ByteString Char
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString Ext
value)

    value :: Parser ByteString Ext
value    = Char -> Parser ByteString Char
P.char 'A' Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString Char
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser ByteString Char
P.char ':' Parser ByteString Char
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Word8 -> Ext
Char (Word8 -> Ext) -> Parser ByteString Word8 -> Parser ByteString Ext
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>               Parser ByteString Word8
anyWord8) Parser ByteString Ext
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
               Char -> Parser ByteString Char
P.char 'i' Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString Char
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser ByteString Char
P.char ':' Parser ByteString Char
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Int -> Ext
Int  (Int -> Ext) -> Parser ByteString Int -> Parser ByteString Ext
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>     Parser ByteString Int -> Parser ByteString Int
forall a. Num a => Parser a -> Parser a
P.signed Parser ByteString Int
forall a. Integral a => Parser a
P.decimal) Parser ByteString Ext
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
               Char -> Parser ByteString Char
P.char 'Z' Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString Char
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser ByteString Char
P.char ':' Parser ByteString Char
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (ByteString -> Ext
Text (ByteString -> Ext)
-> Parser ByteString ByteString -> Parser ByteString Ext
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>   (Char -> Bool) -> Parser ByteString ByteString
P.takeTill ('\t' Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==)) Parser ByteString Ext
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
               Char -> Parser ByteString Char
P.char 'H' Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString Char
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser ByteString Char
P.char ':' Parser ByteString Char
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (ByteString -> Ext
Bin  (ByteString -> Ext)
-> Parser ByteString ByteString -> Parser ByteString Ext
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>               Parser ByteString ByteString
hexarray) Parser ByteString Ext
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
               Char -> Parser ByteString Char
P.char 'f' Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString Char
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser ByteString Char
P.char ':' Parser ByteString Char
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (Float -> Ext
Float (Float -> Ext) -> (Double -> Float) -> Double -> Ext
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Double -> Float
forall a b. (Real a, Fractional b) => a -> b
realToFrac (Double -> Ext)
-> Parser ByteString Double -> Parser ByteString Ext
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Double
P.double) Parser ByteString Ext
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
               Char -> Parser ByteString Char
P.char 'B' Parser ByteString Char
-> Parser ByteString Char -> Parser ByteString Char
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Char -> Parser ByteString Char
P.char ':' Parser ByteString Char
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> (
                    (Char -> Bool) -> Parser ByteString Char
P.satisfy (FilePath -> Char -> Bool
P.inClass "cCsSiI") Parser ByteString Char
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ([Int] -> Ext
intArr   ([Int] -> Ext) -> Parser ByteString [Int] -> Parser ByteString Ext
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Int -> Parser ByteString [Int]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Char -> Parser ByteString Char
P.char ',' Parser ByteString Char
-> Parser ByteString Int -> Parser ByteString Int
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString Int -> Parser ByteString Int
forall a. Num a => Parser a -> Parser a
P.signed Parser ByteString Int
forall a. Integral a => Parser a
P.decimal)) Parser ByteString Ext
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
                    Char -> Parser ByteString Char
P.char 'f'                     Parser ByteString Char
-> Parser ByteString Ext -> Parser ByteString Ext
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> ([Double] -> Ext
forall a. Real a => [a] -> Ext
floatArr ([Double] -> Ext)
-> Parser ByteString [Double] -> Parser ByteString Ext
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString Double -> Parser ByteString [Double]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Char -> Parser ByteString Char
P.char ',' Parser ByteString Char
-> Parser ByteString Double -> Parser ByteString Double
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parser ByteString Double
P.double)))

    intArr :: [Int] -> Ext
intArr   is :: [Int]
is = Vector Int -> Ext
IntArr   (Vector Int -> Ext) -> Vector Int -> Ext
forall a b. (a -> b) -> a -> b
$ [Int] -> Vector Int
forall a. Unbox a => [a] -> Vector a
U.fromList [Int]
is
    floatArr :: [a] -> Ext
floatArr fs :: [a]
fs = Vector Float -> Ext
FloatArr (Vector Float -> Ext) -> Vector Float -> Ext
forall a b. (a -> b) -> a -> b
$ [Float] -> Vector Float
forall a. Unbox a => [a] -> Vector a
U.fromList ([Float] -> Vector Float) -> [Float] -> Vector Float
forall a b. (a -> b) -> a -> b
$ (a -> Float) -> [a] -> [Float]
forall a b. (a -> b) -> [a] -> [b]
map a -> Float
forall a b. (Real a, Fractional b) => a -> b
realToFrac [a]
fs
    hexarray :: Parser ByteString ByteString
hexarray    = [Word8] -> ByteString
B.pack ([Word8] -> ByteString)
-> (ByteString -> [Word8]) -> ByteString -> ByteString
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. FilePath -> [Word8]
forall a. Num a => FilePath -> [a]
repack (FilePath -> [Word8])
-> (ByteString -> FilePath) -> ByteString -> [Word8]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. ByteString -> FilePath
C.unpack (ByteString -> ByteString)
-> Parser ByteString ByteString -> Parser ByteString ByteString
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser ByteString ByteString
P.takeWhile (FilePath -> Char -> Bool
P.inClass "0-9A-Fa-f")
    repack :: FilePath -> [a]
repack (a :: Char
a:b :: Char
b:cs :: FilePath
cs) = Int -> a
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Char -> Int
digitToInt Char
a Int -> Int -> Int
forall a. Num a => a -> a -> a
* 16 Int -> Int -> Int
forall a. Num a => a -> a -> a
+ Char -> Int
digitToInt Char
b) a -> [a] -> [a]
forall a. a -> [a] -> [a]
: FilePath -> [a]
repack FilePath
cs ; repack _ = []
    is_nuc :: Char -> Bool
is_nuc = FilePath -> Char -> Bool
P.inClass "acgtswkmrybdhvnACGTSWKMRYBDHVN"


data IncompatibleRefs = IncompatibleRefs FilePath FilePath deriving (Typeable, Int -> IncompatibleRefs -> ShowS
[IncompatibleRefs] -> ShowS
IncompatibleRefs -> FilePath
(Int -> IncompatibleRefs -> ShowS)
-> (IncompatibleRefs -> FilePath)
-> ([IncompatibleRefs] -> ShowS)
-> Show IncompatibleRefs
forall a.
(Int -> a -> ShowS) -> (a -> FilePath) -> ([a] -> ShowS) -> Show a
showList :: [IncompatibleRefs] -> ShowS
$cshowList :: [IncompatibleRefs] -> ShowS
show :: IncompatibleRefs -> FilePath
$cshow :: IncompatibleRefs -> FilePath
showsPrec :: Int -> IncompatibleRefs -> ShowS
$cshowsPrec :: Int -> IncompatibleRefs -> ShowS
Show)

instance Exception IncompatibleRefs where
    displayException :: IncompatibleRefs -> FilePath
displayException (IncompatibleRefs a :: FilePath
a b :: FilePath
b) = "references in " FilePath -> ShowS
forall a. [a] -> [a] -> [a]
++ FilePath
a FilePath -> ShowS
forall a. [a] -> [a] -> [a]
++ " and " FilePath -> ShowS
forall a. [a] -> [a] -> [a]
++ FilePath
b FilePath -> ShowS
forall a. [a] -> [a] -> [a]
++ " are incompatible"

guardRefCompat :: MonadThrow m => (FilePath,BamMeta) -> (FilePath,BamMeta) -> m ()
guardRefCompat :: (FilePath, BamMeta) -> (FilePath, BamMeta) -> m ()
guardRefCompat (f0 :: FilePath
f0,hdr0 :: BamMeta
hdr0) (f1 :: FilePath
f1,hdr1 :: BamMeta
hdr1) =
    Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (BamMeta -> [BamSQ]
p BamMeta
hdr1 [BamSQ] -> [BamSQ] -> Bool
forall a. Eq a => [a] -> [a] -> Bool
`isPrefixOf` BamMeta -> [BamSQ]
p BamMeta
hdr0) (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$ IncompatibleRefs -> m ()
forall (m :: * -> *) e a. (MonadThrow m, Exception e) => e -> m a
throwM (IncompatibleRefs -> m ()) -> IncompatibleRefs -> m ()
forall a b. (a -> b) -> a -> b
$ FilePath -> FilePath -> IncompatibleRefs
IncompatibleRefs FilePath
f0 FilePath
f1
  where
    p :: BamMeta -> [BamSQ]
p = Vector BamSQ -> [BamSQ]
forall (v :: * -> *) a. Vector v a => v a -> [a]
V.toList (Vector BamSQ -> [BamSQ])
-> (BamMeta -> Vector BamSQ) -> BamMeta -> [BamSQ]
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Refs -> Vector BamSQ
unRefs (Refs -> Vector BamSQ)
-> (BamMeta -> Refs) -> BamMeta -> Vector BamSQ
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamMeta -> Refs
meta_refs


{- | Reads multiple bam inputs in sequence.

Only one file is opened at a time, so they must also be consumed in
sequence.  If you can afford to open all inputs simultaneously, you
probably want to use 'mergeInputsOn' instead.  The filename \"-\" refers
to stdin, if no filenames are given, stdin is read.  Since we can't look
ahead into further files, the header of the first input is used
for the result, and an exception is thrown if one of the subsequent
headers is incompatible with the first one.
-}
concatInputs :: (MonadIO m, MonadLog m, MonadMask m) => [FilePath] -> (BamMeta -> Stream (Of BamRaw) m () -> m r) -> m r
concatInputs :: [FilePath] -> (BamMeta -> Stream (Of BamRaw) m () -> m r) -> m r
concatInputs fs0 :: [FilePath]
fs0 k :: BamMeta -> Stream (Of BamRaw) m () -> m r
k = [FilePath] -> (Stream (ByteStream m) m () -> m r) -> m r
forall (m :: * -> *) r.
MonadIO m =>
[FilePath] -> (Stream (ByteStream m) m () -> r) -> r
streamInputs [FilePath]
fs0 ([FilePath] -> Stream (ByteStream m) m () -> m r
go1 ([FilePath] -> Stream (ByteStream m) m () -> m r)
-> [FilePath] -> Stream (ByteStream m) m () -> m r
forall a b. (a -> b) -> a -> b
$ [FilePath]
fs0 [FilePath] -> [FilePath] -> [FilePath]
forall a. [a] -> [a] -> [a]
++ FilePath -> [FilePath]
forall a. a -> [a]
repeat "-")
  where
    go1 :: [FilePath] -> Stream (ByteStream m) m () -> m r
go1 fs :: [FilePath]
fs = Stream (ByteStream m) m ()
-> m (Either () (ByteStream m (Stream (ByteStream m) m ())))
forall (m :: * -> *) (f :: * -> *) r.
Monad m =>
Stream f m r -> m (Either r (f (Stream f m r)))
inspect (Stream (ByteStream m) m ()
 -> m (Either () (ByteStream m (Stream (ByteStream m) m ()))))
-> (Either () (ByteStream m (Stream (ByteStream m) m ())) -> m r)
-> Stream (ByteStream m) m ()
-> m r
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> \case
        Left () -> BamMeta -> Stream (Of BamRaw) m () -> m r
k BamMeta
forall a. Monoid a => a
mempty (() -> Stream (Of BamRaw) m ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ())
        Right s :: ByteStream m (Stream (ByteStream m) m ())
s -> do (hdr :: BamMeta
hdr,bs :: Stream (Of BamRaw) m (Stream (ByteStream m) m ())
bs) <- ByteStream m (Stream (ByteStream m) m ())
-> m (BamMeta, Stream (Of BamRaw) m (Stream (ByteStream m) m ()))
forall (m :: * -> *) r.
(MonadIO m, MonadLog m) =>
ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodeBam ByteStream m (Stream (ByteStream m) m ())
s
                      BamMeta -> Stream (Of BamRaw) m () -> m r
k BamMeta
hdr (Stream (Of BamRaw) m (Stream (ByteStream m) m ())
bs Stream (Of BamRaw) m (Stream (ByteStream m) m ())
-> (Stream (ByteStream m) m () -> Stream (Of BamRaw) m ())
-> Stream (Of BamRaw) m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= FilePath
-> BamMeta
-> [FilePath]
-> Stream (ByteStream m) m ()
-> Stream (Of BamRaw) m ()
forall (m :: * -> *).
(MonadIO m, MonadLog m, MonadThrow m) =>
FilePath
-> BamMeta
-> [FilePath]
-> Stream (ByteStream m) m ()
-> Stream (Of BamRaw) m ()
go ([FilePath] -> FilePath
forall a. [a] -> a
head [FilePath]
fs) BamMeta
hdr ([FilePath] -> [FilePath]
forall a. [a] -> [a]
tail [FilePath]
fs))

    go :: FilePath
-> BamMeta
-> [FilePath]
-> Stream (ByteStream m) m ()
-> Stream (Of BamRaw) m ()
go f0 :: FilePath
f0 hdr0 :: BamMeta
hdr0 fs :: [FilePath]
fs = m (Either () (ByteStream m (Stream (ByteStream m) m ())))
-> Stream
     (Of BamRaw)
     m
     (Either () (ByteStream m (Stream (ByteStream m) m ())))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (Either () (ByteStream m (Stream (ByteStream m) m ())))
 -> Stream
      (Of BamRaw)
      m
      (Either () (ByteStream m (Stream (ByteStream m) m ()))))
-> (Stream (ByteStream m) m ()
    -> m (Either () (ByteStream m (Stream (ByteStream m) m ()))))
-> Stream (ByteStream m) m ()
-> Stream
     (Of BamRaw)
     m
     (Either () (ByteStream m (Stream (ByteStream m) m ())))
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. Stream (ByteStream m) m ()
-> m (Either () (ByteStream m (Stream (ByteStream m) m ())))
forall (m :: * -> *) (f :: * -> *) r.
Monad m =>
Stream f m r -> m (Either r (f (Stream f m r)))
inspect (Stream (ByteStream m) m ()
 -> Stream
      (Of BamRaw)
      m
      (Either () (ByteStream m (Stream (ByteStream m) m ()))))
-> (Either () (ByteStream m (Stream (ByteStream m) m ()))
    -> Stream (Of BamRaw) m ())
-> Stream (ByteStream m) m ()
-> Stream (Of BamRaw) m ()
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> \case
        Left () -> () -> Stream (Of BamRaw) m ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
        Right s :: ByteStream m (Stream (ByteStream m) m ())
s -> do (hdr :: BamMeta
hdr,bs :: Stream (Of BamRaw) m (Stream (ByteStream m) m ())
bs) <- m (BamMeta, Stream (Of BamRaw) m (Stream (ByteStream m) m ()))
-> Stream
     (Of BamRaw)
     m
     (BamMeta, Stream (Of BamRaw) m (Stream (ByteStream m) m ()))
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m (BamMeta, Stream (Of BamRaw) m (Stream (ByteStream m) m ()))
 -> Stream
      (Of BamRaw)
      m
      (BamMeta, Stream (Of BamRaw) m (Stream (ByteStream m) m ())))
-> m (BamMeta, Stream (Of BamRaw) m (Stream (ByteStream m) m ()))
-> Stream
     (Of BamRaw)
     m
     (BamMeta, Stream (Of BamRaw) m (Stream (ByteStream m) m ()))
forall a b. (a -> b) -> a -> b
$ ByteStream m (Stream (ByteStream m) m ())
-> m (BamMeta, Stream (Of BamRaw) m (Stream (ByteStream m) m ()))
forall (m :: * -> *) r.
(MonadIO m, MonadLog m) =>
ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodeBam ByteStream m (Stream (ByteStream m) m ())
s
                      m () -> Stream (Of BamRaw) m ()
forall (t :: (* -> *) -> * -> *) (m :: * -> *) a.
(MonadTrans t, Monad m) =>
m a -> t m a
lift (m () -> Stream (Of BamRaw) m ())
-> m () -> Stream (Of BamRaw) m ()
forall a b. (a -> b) -> a -> b
$ (FilePath, BamMeta) -> (FilePath, BamMeta) -> m ()
forall (m :: * -> *).
MonadThrow m =>
(FilePath, BamMeta) -> (FilePath, BamMeta) -> m ()
guardRefCompat (FilePath
f0,BamMeta
hdr0) ([FilePath] -> FilePath
forall a. [a] -> a
head [FilePath]
fs,BamMeta
hdr)
                      Stream (Of BamRaw) m (Stream (ByteStream m) m ())
bs Stream (Of BamRaw) m (Stream (ByteStream m) m ())
-> (Stream (ByteStream m) m () -> Stream (Of BamRaw) m ())
-> Stream (Of BamRaw) m ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= FilePath
-> BamMeta
-> [FilePath]
-> Stream (ByteStream m) m ()
-> Stream (Of BamRaw) m ()
go FilePath
f0 BamMeta
hdr0 ([FilePath] -> [FilePath]
forall a. [a] -> [a]
tail [FilePath]
fs)
{-# INLINABLE concatInputs #-}

{- | Reads multiple bam files and merges them.

If the inputs are all sorted by the thing being merged on, the output
will be sorted, too.  The headers are all merged sensibly, even if their
reference lists differ.  However, for performance reasons, we don't want
to change the rname and mrnm fields in potentially all records.  So
instead of allowing arbitrary reference lists to be merged, we throw an
exception unless every input is compatible with the effective reference
list.
-}
mergeInputsOn :: (Ord x, MonadIO m, MonadLog m, MonadMask m)
              => (BamRaw -> x) -> [FilePath]
              -> (BamMeta -> Stream (Of BamRaw) m () -> m r) -> m r
mergeInputsOn :: (BamRaw -> x)
-> [FilePath] -> (BamMeta -> Stream (Of BamRaw) m () -> m r) -> m r
mergeInputsOn _ [] k :: BamMeta -> Stream (Of BamRaw) m () -> m r
k = ByteStream m () -> m (BamMeta, Stream (Of BamRaw) m ())
forall (m :: * -> *) r.
(MonadIO m, MonadLog m) =>
ByteStream m r -> m (BamMeta, Stream (Of BamRaw) m r)
decodeBam (Handle -> ByteStream m ()
forall (m :: * -> *). MonadIO m => Handle -> ByteStream m ()
streamHandle Handle
stdin) m (BamMeta, Stream (Of BamRaw) m ())
-> ((BamMeta, Stream (Of BamRaw) m ()) -> m r) -> m r
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (BamMeta -> Stream (Of BamRaw) m () -> m r)
-> (BamMeta, Stream (Of BamRaw) m ()) -> m r
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry BamMeta -> Stream (Of BamRaw) m () -> m r
k
mergeInputsOn p :: BamRaw -> x
p fs :: [FilePath]
fs k :: BamMeta -> Stream (Of BamRaw) m () -> m r
k = [FilePath] -> ([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r
forall (m :: * -> *) r.
(MonadMask m, MonadLog m, MonadIO m) =>
[FilePath] -> ([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r
decodeBamFiles [FilePath]
fs (([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r)
-> ([(BamMeta, Stream (Of BamRaw) m ())] -> m r) -> m r
forall a b. (a -> b) -> a -> b
$ \bs :: [(BamMeta, Stream (Of BamRaw) m ())]
bs -> do
    let hdr :: BamMeta
hdr = ((BamMeta, Stream (Of BamRaw) m ()) -> BamMeta)
-> [(BamMeta, Stream (Of BamRaw) m ())] -> BamMeta
forall (t :: * -> *) m a.
(Foldable t, Monoid m) =>
(a -> m) -> t a -> m
foldMap (BamMeta, Stream (Of BamRaw) m ()) -> BamMeta
forall a b. (a, b) -> a
fst [(BamMeta, Stream (Of BamRaw) m ())]
bs
    [m ()] -> m ()
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, Monad m) =>
t (m a) -> m ()
sequence_ ([m ()] -> m ()) -> [m ()] -> m ()
forall a b. (a -> b) -> a -> b
$ (FilePath -> (BamMeta, Stream (Of BamRaw) m ()) -> m ())
-> [FilePath] -> [(BamMeta, Stream (Of BamRaw) m ())] -> [m ()]
forall a b c. (a -> b -> c) -> [a] -> [b] -> [c]
zipWith (\f :: FilePath
f (h :: BamMeta
h,_) -> (FilePath, BamMeta) -> (FilePath, BamMeta) -> m ()
forall (m :: * -> *).
MonadThrow m =>
(FilePath, BamMeta) -> (FilePath, BamMeta) -> m ()
guardRefCompat ("*",BamMeta
hdr) (FilePath
f,BamMeta
h)) [FilePath]
fs [(BamMeta, Stream (Of BamRaw) m ())]
bs
    BamMeta -> Stream (Of BamRaw) m () -> m r
k BamMeta
hdr (((BamMeta, Stream (Of BamRaw) m ())
 -> Stream (Of BamRaw) m () -> Stream (Of BamRaw) m ())
-> Stream (Of BamRaw) m ()
-> [(BamMeta, Stream (Of BamRaw) m ())]
-> Stream (Of BamRaw) m ()
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\a :: (BamMeta, Stream (Of BamRaw) m ())
a b :: Stream (Of BamRaw) m ()
b -> Stream (Of BamRaw) m ((), ()) -> Stream (Of BamRaw) m ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Stream (Of BamRaw) m ((), ()) -> Stream (Of BamRaw) m ())
-> Stream (Of BamRaw) m ((), ()) -> Stream (Of BamRaw) m ()
forall a b. (a -> b) -> a -> b
$ (BamRaw -> x)
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m ()
-> Stream (Of BamRaw) m ((), ())
forall (m :: * -> *) b a r s.
(Monad m, Ord b) =>
(a -> b)
-> Stream (Of a) m r -> Stream (Of a) m s -> Stream (Of a) m (r, s)
mergeStreamsOn BamRaw -> x
p ((BamMeta, Stream (Of BamRaw) m ()) -> Stream (Of BamRaw) m ()
forall a b. (a, b) -> b
snd (BamMeta, Stream (Of BamRaw) m ())
a) Stream (Of BamRaw) m ()
b) (() -> Stream (Of BamRaw) m ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()) [(BamMeta, Stream (Of BamRaw) m ())]
bs)
{-# INLINABLE mergeInputsOn #-}

coordinates :: BamRaw -> (Refseq, Int)
coordinates :: BamRaw -> (Refseq, Int)
coordinates = (BamRec -> Refseq
b_rname (BamRec -> Refseq) -> (BamRec -> Int) -> BamRec -> (Refseq, Int)
forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& BamRec -> Int
b_pos) (BamRec -> (Refseq, Int))
-> (BamRaw -> BamRec) -> BamRaw -> (Refseq, Int)
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRaw -> BamRec
unpackBam
{-# INLINE coordinates #-}

qnames :: BamRaw -> Bytes
qnames :: BamRaw -> ByteString
qnames = BamRec -> ByteString
b_qname (BamRec -> ByteString)
-> (BamRaw -> BamRec) -> BamRaw -> ByteString
forall k (cat :: k -> k -> *) (b :: k) (c :: k) (a :: k).
Category cat =>
cat b c -> cat a b -> cat a c
. BamRaw -> BamRec
unpackBam
{-# INLINE qnames #-}