Safe Haskell | None |
---|---|
Language | Haskell98 |
This module provides a fairly direct representation of the SAM/BAM alignment format, along with an interface to read and write alignments in this format.
The package is based on the C SamTools library available at
http://samtools.sourceforge.net/
and the SAM/BAM file format is described here
http://samtools.sourceforge.net/SAM-1.3.pdf
This package only reads existing alignment files generated by other tools. The meaning of the various flags is actually determined by the program that produced the alignment file.
- data HeaderSeq = HeaderSeq {
- name :: !ByteString
- len :: !Int64
- data Header
- nTargets :: Header -> Int
- targetSeqList :: Header -> [HeaderSeq]
- targetSeq :: Header -> Int -> HeaderSeq
- targetSeqName :: Header -> Int -> ByteString
- targetSeqLen :: Header -> Int -> Int64
- lookupTarget :: Header -> ByteString -> Maybe Int
- data Bam1
- header :: Bam1 -> Header
- targetID :: Bam1 -> Maybe Int
- targetName :: Bam1 -> Maybe ByteString
- targetLen :: Bam1 -> Maybe Int64
- position :: Bam1 -> Maybe Int64
- isPaired :: Bam1 -> Bool
- isProperPair :: Bam1 -> Bool
- isUnmap :: Bam1 -> Bool
- isMateUnmap :: Bam1 -> Bool
- isReverse :: Bam1 -> Bool
- isMateReverse :: Bam1 -> Bool
- isRead1 :: Bam1 -> Bool
- isRead2 :: Bam1 -> Bool
- isSecondary :: Bam1 -> Bool
- isQCFail :: Bam1 -> Bool
- isDup :: Bam1 -> Bool
- cigars :: Bam1 -> [Cigar]
- queryName :: Bam1 -> ByteString
- queryLength :: Bam1 -> Maybe Int64
- querySeq :: Bam1 -> Maybe ByteString
- queryQual :: Bam1 -> Maybe ByteString
- mateTargetID :: Bam1 -> Maybe Int
- mateTargetName :: Bam1 -> Maybe ByteString
- mateTargetLen :: Bam1 -> Maybe Int64
- matePosition :: Bam1 -> Maybe Int64
- insertSize :: Bam1 -> Maybe Int64
- nMismatch :: Bam1 -> Maybe Int
- nHits :: Bam1 -> Maybe Int
- matchDesc :: Bam1 -> Maybe ByteString
- auxGeti :: Bam1 -> String -> Maybe Int
- auxGetf :: Bam1 -> String -> Maybe Float
- auxGetd :: Bam1 -> String -> Maybe Double
- auxGetA :: Bam1 -> String -> Maybe Char
- auxGetZ :: Bam1 -> String -> Maybe ByteString
- auxGet :: AuxGet a => Bam1 -> String -> Maybe a
- addAuxA :: Bam1 -> String -> Char -> IO Bam1
- addAuxi :: Bam1 -> String -> Int -> IO Bam1
- addAuxZ :: Bam1 -> String -> String -> IO Bam1
- refSpLoc :: Bam1 -> Maybe SpliceLoc
- refSeqLoc :: Bam1 -> Maybe SpliceSeqLoc
- data InHandle
- inHeader :: InHandle -> Header
- openTamInFile :: FilePath -> IO InHandle
- openTamInFileWithIndex :: FilePath -> FilePath -> IO InHandle
- openBamInFile :: FilePath -> IO InHandle
- closeInHandle :: InHandle -> IO ()
- withTamInFile :: FilePath -> (InHandle -> IO a) -> IO a
- withTamInFileWithIndex :: FilePath -> FilePath -> (InHandle -> IO a) -> IO a
- withBamInFile :: FilePath -> (InHandle -> IO a) -> IO a
- get1 :: InHandle -> IO (Maybe Bam1)
- readBams :: FilePath -> IO [Bam1]
- data OutHandle
- outHeader :: OutHandle -> Header
- openTamOutFile :: FilePath -> Header -> IO OutHandle
- openBamOutFile :: FilePath -> Header -> IO OutHandle
- closeOutHandle :: OutHandle -> IO ()
- withTamOutFile :: FilePath -> Header -> (OutHandle -> IO a) -> IO a
- withBamOutFile :: FilePath -> Header -> (OutHandle -> IO a) -> IO a
- put1 :: OutHandle -> Bam1 -> IO ()
Target sequence sets
Information about one target sequence in a SAM alignment set
HeaderSeq | |
|
targetSeqList :: Header -> [HeaderSeq] Source
Returns the list of target sequences
targetSeq :: Header -> Int -> HeaderSeq Source
Returns a target sequence by ID, which is a 0-based index
targetSeqName :: Header -> Int -> ByteString Source
Returns a target sequence name by ID
targetSeqLen :: Header -> Int -> Int64 Source
lookupTarget :: Header -> ByteString -> Maybe Int Source
SAM/BAM format alignments
targetName :: Bam1 -> Maybe ByteString Source
isProperPair :: Bam1 -> Bool Source
Is the pair properly aligned (usually based on relative orientation and distance)
isMateUnmap :: Bam1 -> Bool Source
Is the read paired and the mate unmapped
isMateReverse :: Bam1 -> Bool Source
Is the read paired and the mate's reverse complement aligned to the target
isSecondary :: Bam1 -> Bool Source
Is the fragment alignment secondary
queryName :: Bam1 -> ByteString Source
Name of the query sequence
queryLength :: Bam1 -> Maybe Int64 Source
querySeq :: Bam1 -> Maybe ByteString Source
queryQual :: Bam1 -> Maybe ByteString Source
mateTargetID :: Bam1 -> Maybe Int Source
mateTargetName :: Bam1 -> Maybe ByteString Source
mateTargetLen :: Bam1 -> Maybe Int64 Source
matePosition :: Bam1 -> Maybe Int64 Source
'Just the 0-based coordinate of the left-most position in the
mate alignment on the target, or Nothing
when the read is
unpaired or the mate is unmapped.
insertSize :: Bam1 -> Maybe Int64 Source
matchDesc :: Bam1 -> Maybe ByteString Source
refSpLoc :: Bam1 -> Maybe SpliceLoc Source
Just
the reference sequence location covered by the
alignment. This includes nucleotide positions that are reported to
be deleted in the read, but not skipped nucleotide position
(typically intronic positions in a spliced alignment). If the
reference location is unavailable, e.g. for an unmapped read or for
a read with no CIGAR format alignment information, then Nothing
.
refSeqLoc :: Bam1 -> Maybe SpliceSeqLoc Source
Just
the reference sequence location (as per refSpLoc
) on
the target reference (as per targetName
)
Reading SAM/BAM format files
openTamInFile :: FilePath -> IO InHandle Source
Open a TAM (tab-delimited text) format file with @SQ
headers
for the target sequence set.
openTamInFileWithIndex :: FilePath -> FilePath -> IO InHandle Source
Open a TAM format file with a separate target sequence set index
openBamInFile :: FilePath -> IO InHandle Source
Open a BAM (binary) format file
closeInHandle :: InHandle -> IO () Source
Close a SAM/BAM format alignment input handle
Target sequence set data is still available after the file input has been closed.
withTamInFile :: FilePath -> (InHandle -> IO a) -> IO a Source
Run an IO action using a handle to a TAM format file that will be
opened (see openTamInFile
) and closed for the action.
withTamInFileWithIndex :: FilePath -> FilePath -> (InHandle -> IO a) -> IO a Source
As withTamInFile
with a separate target sequence index set (see
openTamInFileWithIndex
)
withBamInFile :: FilePath -> (InHandle -> IO a) -> IO a Source
As withTamInFile
for BAM (binary) format files
get1 :: InHandle -> IO (Maybe Bam1) Source
Reads one alignment from an input handle, or returns Nothing
for end-of-file
Writing SAM/BAM format files
openTamOutFile :: FilePath -> Header -> IO OutHandle Source
Open a TAM format file with @SQ
headers for writing alignments
openBamOutFile :: FilePath -> Header -> IO OutHandle Source
Open a BAM format file for writing alignments
closeOutHandle :: OutHandle -> IO () Source
Close an alignment output handle