Safe Haskell | Safe-Infered |
---|
This module provides a fairly direct representation of the SAM/BAM alignment format, along with an interface to read and write alignments in this format.
The package is based on the C SamTools library available at
http://samtools.sourceforge.net/
and the SAM/BAM file format is described here
http://samtools.sourceforge.net/SAM-1.3.pdf
This package only reads existing alignment files generated by other tools. The meaning of the various flags is actually determined by the program that produced the alignment file.
- data HeaderSeq = HeaderSeq {
- name :: !ByteString
- len :: !Int64
- data Header
- nTargets :: Header -> Int
- targetSeqList :: Header -> [HeaderSeq]
- targetSeq :: Header -> Int -> HeaderSeq
- targetSeqName :: Header -> Int -> ByteString
- targetSeqLen :: Header -> Int -> Int64
- lookupTarget :: Header -> ByteString -> Maybe Int
- data Bam1
- targetID :: Bam1 -> Maybe Int
- targetName :: Bam1 -> Maybe ByteString
- targetLen :: Bam1 -> Maybe Int64
- position :: Bam1 -> Maybe Int64
- isPaired :: Bam1 -> Bool
- isProperPair :: Bam1 -> Bool
- isUnmap :: Bam1 -> Bool
- isMateUnmap :: Bam1 -> Bool
- isReverse :: Bam1 -> Bool
- isMateReverse :: Bam1 -> Bool
- isRead1 :: Bam1 -> Bool
- isRead2 :: Bam1 -> Bool
- isSecondary :: Bam1 -> Bool
- isQCFail :: Bam1 -> Bool
- isDup :: Bam1 -> Bool
- cigars :: Bam1 -> [Cigar]
- queryName :: Bam1 -> ByteString
- queryLength :: Bam1 -> Maybe Int64
- querySeq :: Bam1 -> Maybe ByteString
- queryQual :: Bam1 -> Maybe ByteString
- mateTargetID :: Bam1 -> Maybe Int
- mateTargetName :: Bam1 -> Maybe ByteString
- mateTargetLen :: Bam1 -> Maybe Int64
- matePosition :: Bam1 -> Maybe Int64
- insertSize :: Bam1 -> Maybe Int64
- nMismatch :: Bam1 -> Maybe Int
- nHits :: Bam1 -> Maybe Int
- matchDesc :: Bam1 -> Maybe ByteString
- refSpLoc :: Bam1 -> Maybe SpliceLoc
- refSeqLoc :: Bam1 -> Maybe SpliceSeqLoc
- data InHandle
- inHeader :: InHandle -> Header
- openTamInFile :: FilePath -> IO InHandle
- openTamInFileWithIndex :: FilePath -> FilePath -> IO InHandle
- openBamInFile :: FilePath -> IO InHandle
- closeInHandle :: InHandle -> IO ()
- withTamInFile :: FilePath -> (InHandle -> IO a) -> IO a
- withTamInFileWithIndex :: FilePath -> FilePath -> (InHandle -> IO a) -> IO a
- withBamInFile :: FilePath -> (InHandle -> IO a) -> IO a
- get1 :: InHandle -> IO (Maybe Bam1)
- readBams :: FilePath -> IO [Bam1]
- data OutHandle
- outHeader :: OutHandle -> Header
- openTamOutFile :: FilePath -> Header -> IO OutHandle
- openBamOutFile :: FilePath -> Header -> IO OutHandle
- closeOutHandle :: OutHandle -> IO ()
- withTamOutFile :: FilePath -> Header -> (OutHandle -> IO a) -> IO a
- withBamOutFile :: FilePath -> Header -> (OutHandle -> IO a) -> IO a
- put1 :: OutHandle -> Bam1 -> IO ()
Target sequence sets
Information about one target sequence in a SAM alignment set
HeaderSeq | |
|
targetSeqList :: Header -> [HeaderSeq]Source
Returns the list of target sequences
targetSeq :: Header -> Int -> HeaderSeqSource
Returns a target sequence by ID, which is a 0-based index
targetSeqName :: Header -> Int -> ByteStringSource
Returns a target sequence name by ID
targetSeqLen :: Header -> Int -> Int64Source
lookupTarget :: Header -> ByteString -> Maybe IntSource
SAM/BAM format alignments
targetName :: Bam1 -> Maybe ByteStringSource
isProperPair :: Bam1 -> BoolSource
Is the pair properly aligned (usually based on relative orientation and distance)
isMateUnmap :: Bam1 -> BoolSource
Is the read paired and the mate unmapped
isMateReverse :: Bam1 -> BoolSource
Is the read paired and the mate's reverse complement aligned to the target
isSecondary :: Bam1 -> BoolSource
Is the fragment alignment secondary
queryName :: Bam1 -> ByteStringSource
Name of the query sequence
queryLength :: Bam1 -> Maybe Int64Source
querySeq :: Bam1 -> Maybe ByteStringSource
queryQual :: Bam1 -> Maybe ByteStringSource
mateTargetID :: Bam1 -> Maybe IntSource
mateTargetLen :: Bam1 -> Maybe Int64Source
matePosition :: Bam1 -> Maybe Int64Source
'Just the 0-based coordinate of the left-most position in the
mate alignment on the target, or Nothing
when the read is
unpaired or the mate is unmapped.
insertSize :: Bam1 -> Maybe Int64Source
matchDesc :: Bam1 -> Maybe ByteStringSource
refSpLoc :: Bam1 -> Maybe SpliceLocSource
Just
the reference sequence location covered by the
alignment. This includes nucleotide positions that are reported to
be deleted in the read, but not skipped nucleotide position
(typically intronic positions in a spliced alignment). If the
reference location is unavailable, e.g. for an unmapped read or for
a read with no CIGAR format alignment information, then Nothing
.
refSeqLoc :: Bam1 -> Maybe SpliceSeqLocSource
Just
the reference sequence location (as per refSpLoc
) on
the target reference (as per targetName
)
Reading SAM/BAM format files
openTamInFile :: FilePath -> IO InHandleSource
Open a TAM (tab-delimited text) format file with @SQ
headers
for the target sequence set.
openTamInFileWithIndex :: FilePath -> FilePath -> IO InHandleSource
Open a TAM format file with a separate target sequence set index
openBamInFile :: FilePath -> IO InHandleSource
Open a BAM (binary) format file
closeInHandle :: InHandle -> IO ()Source
Close a SAM/BAM format alignment input handle
Target sequence set data is still available after the file input has been closed.
withTamInFile :: FilePath -> (InHandle -> IO a) -> IO aSource
Run an IO action using a handle to a TAM format file that will be
opened (see openTamInFile
) and closed for the action.
withTamInFileWithIndex :: FilePath -> FilePath -> (InHandle -> IO a) -> IO aSource
As withTamInFile
with a separate target sequence index set (see
openTamInFileWithIndex
)
withBamInFile :: FilePath -> (InHandle -> IO a) -> IO aSource
As withTamInFile
for BAM (binary) format files
get1 :: InHandle -> IO (Maybe Bam1)Source
Reads one alignment from an input handle, or returns Nothing
for end-of-file
Writing SAM/BAM format files
openTamOutFile :: FilePath -> Header -> IO OutHandleSource
Open a TAM format file with @SQ
headers for writing alignments
openBamOutFile :: FilePath -> Header -> IO OutHandleSource
Open a BAM format file for writing alignments
closeOutHandle :: OutHandle -> IO ()Source
Close an alignment output handle