Safe Haskell | None |
---|---|
Language | Haskell98 |
This module provides a fairly direct representation of the SAM/BAM alignment format, along with an interface to read and write alignments in this format.
The package is based on the C SamTools library available at
http://samtools.sourceforge.net/
and the SAM/BAM file format is described here
http://samtools.sourceforge.net/SAM-1.3.pdf
This package only reads existing alignment files generated by other tools. The meaning of the various flags is actually determined by the program that produced the alignment file.
- data HeaderSeq = HeaderSeq {
- name :: !ByteString
- len :: !Int64
- data Header
- nTargets :: Header -> Int
- targetSeqList :: Header -> [HeaderSeq]
- targetSeq :: Header -> Int -> HeaderSeq
- targetSeqName :: Header -> Int -> ByteString
- targetSeqLen :: Header -> Int -> Int64
- lookupTarget :: Header -> ByteString -> Maybe Int
- data Bam1
- header :: Bam1 -> Header
- targetID :: Bam1 -> Maybe Int
- targetName :: Bam1 -> Maybe ByteString
- targetLen :: Bam1 -> Maybe Int64
- position :: Bam1 -> Maybe Int64
- isPaired :: Bam1 -> Bool
- isProperPair :: Bam1 -> Bool
- isUnmap :: Bam1 -> Bool
- isMateUnmap :: Bam1 -> Bool
- isReverse :: Bam1 -> Bool
- isMateReverse :: Bam1 -> Bool
- isRead1 :: Bam1 -> Bool
- isRead2 :: Bam1 -> Bool
- isSecondary :: Bam1 -> Bool
- isQCFail :: Bam1 -> Bool
- isDup :: Bam1 -> Bool
- isSupplementary :: Bam1 -> Bool
- cigars :: Bam1 -> [Cigar]
- queryName :: Bam1 -> ByteString
- queryLength :: Bam1 -> Maybe Int64
- querySeq :: Bam1 -> Maybe ByteString
- queryQual :: Bam1 -> Maybe ByteString
- mateTargetID :: Bam1 -> Maybe Int
- mateTargetName :: Bam1 -> Maybe ByteString
- mateTargetLen :: Bam1 -> Maybe Int64
- matePosition :: Bam1 -> Maybe Int64
- insertSize :: Bam1 -> Maybe Int64
- nMismatch :: Bam1 -> Maybe Int
- nHits :: Bam1 -> Maybe Int
- matchDesc :: Bam1 -> Maybe ByteString
- auxGeti :: Bam1 -> String -> Maybe Int
- auxGetf :: Bam1 -> String -> Maybe Float
- auxGetd :: Bam1 -> String -> Maybe Double
- auxGetA :: Bam1 -> String -> Maybe Char
- auxGetZ :: Bam1 -> String -> Maybe ByteString
- auxGet :: AuxGet a => Bam1 -> String -> Maybe a
- addAuxA :: Bam1 -> String -> Char -> IO Bam1
- addAuxi :: Bam1 -> String -> Int -> IO Bam1
- addAuxZ :: Bam1 -> String -> String -> IO Bam1
- refSpLoc :: Bam1 -> Maybe SpliceLoc
- refSeqLoc :: Bam1 -> Maybe SpliceSeqLoc
- data InHandle
- inHeader :: InHandle -> Header
- openTamInFile :: FilePath -> IO InHandle
- openTamInFileWithIndex :: FilePath -> FilePath -> IO InHandle
- openBamInFile :: FilePath -> IO InHandle
- closeInHandle :: InHandle -> IO ()
- withTamInFile :: FilePath -> (InHandle -> IO a) -> IO a
- withTamInFileWithIndex :: FilePath -> FilePath -> (InHandle -> IO a) -> IO a
- withBamInFile :: FilePath -> (InHandle -> IO a) -> IO a
- get1 :: InHandle -> IO (Maybe Bam1)
- readBams :: FilePath -> IO [Bam1]
- data OutHandle
- outHeader :: OutHandle -> Header
- openTamOutFile :: FilePath -> Header -> IO OutHandle
- openBamOutFile :: FilePath -> Header -> IO OutHandle
- closeOutHandle :: OutHandle -> IO ()
- withTamOutFile :: FilePath -> Header -> (OutHandle -> IO a) -> IO a
- withBamOutFile :: FilePath -> Header -> (OutHandle -> IO a) -> IO a
- put1 :: OutHandle -> Bam1 -> IO ()
Target sequence sets
Information about one target sequence in a SAM alignment set
HeaderSeq | |
|
targetSeqList :: Header -> [HeaderSeq] Source #
Returns the list of target sequences
targetSeq :: Header -> Int -> HeaderSeq Source #
Returns a target sequence by ID, which is a 0-based index
targetSeqName :: Header -> Int -> ByteString Source #
Returns a target sequence name by ID
lookupTarget :: Header -> ByteString -> Maybe Int Source #
SAM/BAM format alignments
targetName :: Bam1 -> Maybe ByteString Source #
isProperPair :: Bam1 -> Bool Source #
Is the pair properly aligned (usually based on relative orientation and distance)
isMateUnmap :: Bam1 -> Bool Source #
Is the read paired and the mate unmapped
isMateReverse :: Bam1 -> Bool Source #
Is the read paired and the mate's reverse complement aligned to the target
isSecondary :: Bam1 -> Bool Source #
Is the fragment alignment secondary
isSupplementary :: Bam1 -> Bool Source #
Is the read a supplementary read
queryName :: Bam1 -> ByteString Source #
Name of the query sequence
mateTargetName :: Bam1 -> Maybe ByteString Source #
matePosition :: Bam1 -> Maybe Int64 Source #
'Just the 0-based coordinate of the left-most position in the
mate alignment on the target, or Nothing
when the read is
unpaired or the mate is unmapped.
refSpLoc :: Bam1 -> Maybe SpliceLoc Source #
Just
the reference sequence location covered by the
alignment. This includes nucleotide positions that are reported to
be deleted in the read, but not skipped nucleotide position
(typically intronic positions in a spliced alignment). If the
reference location is unavailable, e.g. for an unmapped read or for
a read with no CIGAR format alignment information, then Nothing
.
refSeqLoc :: Bam1 -> Maybe SpliceSeqLoc Source #
Just
the reference sequence location (as per refSpLoc
) on
the target reference (as per targetName
)
Reading SAM/BAM format files
openTamInFile :: FilePath -> IO InHandle Source #
Open a TAM (tab-delimited text) format file with @SQ
headers
for the target sequence set.
openTamInFileWithIndex :: FilePath -> FilePath -> IO InHandle Source #
Open a TAM format file with a separate target sequence set index
closeInHandle :: InHandle -> IO () Source #
Close a SAM/BAM format alignment input handle
Target sequence set data is still available after the file input has been closed.
withTamInFile :: FilePath -> (InHandle -> IO a) -> IO a Source #
Run an IO action using a handle to a TAM format file that will be
opened (see openTamInFile
) and closed for the action.
withTamInFileWithIndex :: FilePath -> FilePath -> (InHandle -> IO a) -> IO a Source #
As withTamInFile
with a separate target sequence index set (see
openTamInFileWithIndex
)
withBamInFile :: FilePath -> (InHandle -> IO a) -> IO a Source #
As withTamInFile
for BAM (binary) format files
get1 :: InHandle -> IO (Maybe Bam1) Source #
Reads one alignment from an input handle, or returns Nothing
for end-of-file
Writing SAM/BAM format files
openTamOutFile :: FilePath -> Header -> IO OutHandle Source #
Open a TAM format file with @SQ
headers for writing alignments
openBamOutFile :: FilePath -> Header -> IO OutHandle Source #
Open a BAM format file for writing alignments
closeOutHandle :: OutHandle -> IO () Source #
Close an alignment output handle