{-# LANGUAGE FlexibleContexts      #-}
{-# LANGUAGE FlexibleInstances     #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE TypeFamilies          #-}

-- |
-- Module      :  Data.BAM.Version1_6.Read.Base
-- Copyright   :  (c) Matthew Mosior 2024
-- License     :  BSD-style
-- Maintainer  :  mattm.github@gmail.com
-- Portability :  portable
--
-- = Description
--
-- This library enables the decoding/encoding of SAM, BAM and CRAM file formats.

module Data.BAM.Version1_6.Read.Base ( -- * Reading
                                       readBAM_V1_6
                                     ) where

import Data.BAM.Version1_6.Base
import Data.BAM.Version1_6.Read.Error 
import Data.BAM.Version1_6.Read.Parser.BGZFBlock

import           Data.Attoparsec.ByteString.Lazy   as DABL
import           Data.ByteString                   as DB
import           Data.ByteString.Lazy              as DBL
import           Data.Sequence                     as DSeq
import qualified Streamly.Data.Stream              as S
import           Streamly.External.ByteString.Lazy as StreamlyLByteString  (fromChunksIO)
import           Streamly.Internal.FileSystem.File as StreamlyInternalFile (chunkReader)
import qualified System.IO                         as SIO

-- | Define the @"BAM_V1_6"@ parser.
parse_BAM_V1_6 :: Parser BAM_V1_6
parse_BAM_V1_6 :: Parser BAM_V1_6
parse_BAM_V1_6 = do
  [BAM_V1_6_BGZFBlock]
bgzfblocks <-
    Parser ByteString BAM_V1_6_BGZFBlock
-> Parser ByteString [BAM_V1_6_BGZFBlock]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
DABL.many' Parser ByteString BAM_V1_6_BGZFBlock
parse_BAM_V1_6_BGZFBlock
  BAM_V1_6 -> Parser BAM_V1_6
forall a. a -> Parser ByteString a
forall (m :: * -> *) a. Monad m => a -> m a
return BAM_V1_6
          { bam_v1_6 :: Seq BAM_V1_6_BGZFBlock
bam_v1_6 = [BAM_V1_6_BGZFBlock] -> Seq BAM_V1_6_BGZFBlock
forall a. [a] -> Seq a
DSeq.fromList [BAM_V1_6_BGZFBlock]
bgzfblocks
          }

-- | Run the @"SAM_V1_6"@ parser.
readBAM_V1_6_LBS :: DBL.ByteString
                 -> IO BAM_V1_6
readBAM_V1_6_LBS :: ByteString -> IO BAM_V1_6
readBAM_V1_6_LBS ByteString
lbs =
  case (Parser BAM_V1_6 -> ByteString -> Either String BAM_V1_6
forall a. Parser a -> ByteString -> Either String a
DABL.parseOnly Parser BAM_V1_6
parse_BAM_V1_6 ByteString
lbs) of 
    Left  String
samparseerror ->
      String -> IO BAM_V1_6
forall a. HasCallStack => String -> a
error String
samparseerror
    Right BAM_V1_6
sam           ->
      BAM_V1_6 -> IO BAM_V1_6
forall a. a -> IO a
forall (m :: * -> *) a. Monad m => a -> m a
return BAM_V1_6
sam

-- | Read a @"BAM_V1_6"@ from a file.
--
-- The file is checked for errors as it is parsed.
--
-- See the [SAM v1.6](http://samtools.github.io/hts-specs/SAMv1.pdf) specification documentation.
readBAM_V1_6 :: FilePath -- ^ Input path to BAM file.
             -> IO BAM_V1_6
readBAM_V1_6 :: String -> IO BAM_V1_6
readBAM_V1_6 String
fp = do
  -- Ensure that BAM file is intact
  -- by checking for end-of-file
  -- marker.
  Handle
bamfileh        <-
    String -> IOMode -> IO Handle
SIO.openBinaryFile String
fp
                       IOMode
SIO.ReadMode
  ()
_               <-
    Handle -> SeekMode -> Integer -> IO ()
SIO.hSeek Handle
bamfileh
              SeekMode
SIO.SeekFromEnd
              (-Integer
28)
  ByteString
endoffilemarker <-
    Handle -> IO ByteString
DB.hGetContents Handle
bamfileh
  case ( ByteString
endoffilemarker ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString
endoffilemarkerbytes
       ) of
    Bool
False ->
      String -> IO BAM_V1_6
forall a. HasCallStack => String -> a
error (String -> IO BAM_V1_6) -> String -> IO BAM_V1_6
forall a b. (a -> b) -> a -> b
$
        BAM_V1_6_Read_Error -> String
forall a. Show a => a -> String
show BAM_V1_6_Read_Error
BAM_V1_6_Read_Error_End_Of_File_Marker_Incorrect_Format
    Bool
True  -> do
      let lazybamfile :: Stream IO (Array Word8)
lazybamfile = Unfold IO String (Array Word8) -> String -> Stream IO (Array Word8)
forall (m :: * -> *) a b.
Applicative m =>
Unfold m a b -> a -> Stream m b
S.unfold Unfold IO String (Array Word8)
forall (m :: * -> *).
(MonadIO m, MonadCatch m) =>
Unfold m String (Array Word8)
StreamlyInternalFile.chunkReader String
fp
      ByteString
lazybamfilef    <-
        Stream IO (Array Word8) -> IO ByteString
StreamlyLByteString.fromChunksIO Stream IO (Array Word8)
lazybamfile
      ByteString -> IO BAM_V1_6
readBAM_V1_6_LBS ByteString
lazybamfilef
  where
    endoffilemarkerbytes :: ByteString
endoffilemarkerbytes = [Word8] -> ByteString
DB.pack 
                             [ Word8
0x1f
                             , Word8
0x8b
                             , Word8
0x08
                             , Word8
0x04
                             , Word8
0x00
                             , Word8
0x00
                             , Word8
0x00
                             , Word8
0x00
                             , Word8
0x00
                             , Word8
0xff
                             , Word8
0x06
                             , Word8
0x00
                             , Word8
0x42
                             , Word8
0x43
                             , Word8
0x02
                             , Word8
0x00
                             , Word8
0x1b
                             , Word8
0x00
                             , Word8
0x03
                             , Word8
0x00
                             , Word8
0x00
                             , Word8
0x00
                             , Word8
0x00
                             , Word8
0x00
                             , Word8
0x00
                             , Word8
0x00
                             , Word8
0x00
                             , Word8
0x00
                             ]