samtools-0.2.4.1: Binding to the C samtools library

Safe HaskellNone
LanguageHaskell98

Bio.SamTools.Bam

Contents

Description

This module provides a fairly direct representation of the SAM/BAM alignment format, along with an interface to read and write alignments in this format.

The package is based on the C SamTools library available at

http://samtools.sourceforge.net/

and the SAM/BAM file format is described here

http://samtools.sourceforge.net/SAM-1.3.pdf

This package only reads existing alignment files generated by other tools. The meaning of the various flags is actually determined by the program that produced the alignment file.

Synopsis

Target sequence sets

data HeaderSeq Source

Information about one target sequence in a SAM alignment set

Constructors

HeaderSeq 

Fields

name :: !ByteString

Target sequence name

len :: !Int64

Target sequence lengh

data Header Source

Target sequences from a SAM alignment set

nTargets :: Header -> Int Source

Number of target sequences

targetSeqList :: Header -> [HeaderSeq] Source

Returns the list of target sequences

targetSeq :: Header -> Int -> HeaderSeq Source

Returns a target sequence by ID, which is a 0-based index

targetSeqName :: Header -> Int -> ByteString Source

Returns a target sequence name by ID

SAM/BAM format alignments

data Bam1 Source

SAM/BAM format alignment

Instances

targetID :: Bam1 -> Maybe Int Source

Just the reference target sequence ID in the target set, or Nothing for an unmapped read

targetName :: Bam1 -> Maybe ByteString Source

Just the target sequence name, or Nothing for an unmapped read

targetLen :: Bam1 -> Maybe Int64 Source

Just the total length of the target sequence, or Nothing for an unmapped read

position :: Bam1 -> Maybe Int64 Source

Just the 0-based index of the leftmost aligned position on the target sequence, or Nothing for an unmapped read

isPaired :: Bam1 -> Bool Source

Is the read paired

isProperPair :: Bam1 -> Bool Source

Is the pair properly aligned (usually based on relative orientation and distance)

isUnmap :: Bam1 -> Bool Source

Is the read unmapped

isMateUnmap :: Bam1 -> Bool Source

Is the read paired and the mate unmapped

isReverse :: Bam1 -> Bool Source

Is the fragment's reverse complement aligned to the target

isMateReverse :: Bam1 -> Bool Source

Is the read paired and the mate's reverse complement aligned to the target

isRead1 :: Bam1 -> Bool Source

Is the fragment from the first read in the template

isRead2 :: Bam1 -> Bool Source

Is the fragment from the second read in the template

isSecondary :: Bam1 -> Bool Source

Is the fragment alignment secondary

isQCFail :: Bam1 -> Bool Source

Did the read fail quality controls

isDup :: Bam1 -> Bool Source

Is the read a technical duplicate

cigars :: Bam1 -> [Cigar] Source

CIGAR description of the alignment

queryName :: Bam1 -> ByteString Source

Name of the query sequence

queryLength :: Bam1 -> Maybe Int64 Source

Just the length of the query sequence, or Nothing when it is unavailable.

querySeq :: Bam1 -> Maybe ByteString Source

Just the query sequence, or Nothing when it is unavailable

queryQual :: Bam1 -> Maybe ByteString Source

Just the query qualities, or Nothing when it is unavailable. These are returned in ASCII format, i.e., q + 33.

mateTargetID :: Bam1 -> Maybe Int Source

Just the target ID of the mate alignment target reference sequence, or Nothing when the mate is unmapped or the read is unpaired.

mateTargetName :: Bam1 -> Maybe ByteString Source

Just the name of the mate alignment target reference sequence, or Nothing when the mate is unmapped or the read is unpaired.

mateTargetLen :: Bam1 -> Maybe Int64 Source

Just the length of the mate alignment target reference sequence, or Nothing when the mate is unmapped or the read is unpaired.

matePosition :: Bam1 -> Maybe Int64 Source

'Just the 0-based coordinate of the left-most position in the mate alignment on the target, or Nothing when the read is unpaired or the mate is unmapped.

insertSize :: Bam1 -> Maybe Int64 Source

Just the total insert length, or Nothing when the length is unavailable, e.g. because the read is unpaired or the mated read pair do not align in the proper relative orientation on the same strand.

nMismatch :: Bam1 -> Maybe Int Source

Just the number of mismatches in the alignemnt, or Nothing when this information is not present

nHits :: Bam1 -> Maybe Int Source

Just the number of reported alignments, or Nothing when this information is not present.

matchDesc :: Bam1 -> Maybe ByteString Source

Just the match descriptor alignment field, or Nothing when it is absent

auxGeti :: Bam1 -> String -> Maybe Int Source

Just the requested integer auxiliary field, or Nothing when it is absent

auxGetf :: Bam1 -> String -> Maybe Float Source

Just the requested single-precision float auxiliary field, or Nothing when it is absent

auxGetd :: Bam1 -> String -> Maybe Double Source

Just the requested double-precision float auxiliary field, or Nothing when it is absent

auxGetA :: Bam1 -> String -> Maybe Char Source

Just the requested character auxiliary field, or Nothing when it is absent

auxGetZ :: Bam1 -> String -> Maybe ByteString Source

Just the requested string auxiliary field, or Nothing when it is absent

auxGet :: AuxGet a => Bam1 -> String -> Maybe a Source

refSpLoc :: Bam1 -> Maybe SpliceLoc Source

Just the reference sequence location covered by the alignment. This includes nucleotide positions that are reported to be deleted in the read, but not skipped nucleotide position (typically intronic positions in a spliced alignment). If the reference location is unavailable, e.g. for an unmapped read or for a read with no CIGAR format alignment information, then Nothing.

refSeqLoc :: Bam1 -> Maybe SpliceSeqLoc Source

Just the reference sequence location (as per refSpLoc) on the target reference (as per targetName)

Reading SAM/BAM format files

data InHandle Source

Handle for reading SAM/BAM format alignments

inHeader :: InHandle -> Header Source

Target sequence set for the alignments

openTamInFile :: FilePath -> IO InHandle Source

Open a TAM (tab-delimited text) format file with @SQ headers for the target sequence set.

openTamInFileWithIndex :: FilePath -> FilePath -> IO InHandle Source

Open a TAM format file with a separate target sequence set index

openBamInFile :: FilePath -> IO InHandle Source

Open a BAM (binary) format file

closeInHandle :: InHandle -> IO () Source

Close a SAM/BAM format alignment input handle

Target sequence set data is still available after the file input has been closed.

withTamInFile :: FilePath -> (InHandle -> IO a) -> IO a Source

Run an IO action using a handle to a TAM format file that will be opened (see openTamInFile) and closed for the action.

withTamInFileWithIndex :: FilePath -> FilePath -> (InHandle -> IO a) -> IO a Source

As withTamInFile with a separate target sequence index set (see openTamInFileWithIndex)

withBamInFile :: FilePath -> (InHandle -> IO a) -> IO a Source

As withTamInFile for BAM (binary) format files

get1 :: InHandle -> IO (Maybe Bam1) Source

Reads one alignment from an input handle, or returns Nothing for end-of-file

readBams :: FilePath -> IO [Bam1] Source

Read a BAM file as a lazy strem of Bam1 records.

Writing SAM/BAM format files

data OutHandle Source

Handle for writing SAM/BAM format alignments

outHeader :: OutHandle -> Header Source

Target sequence set for the alignments

openTamOutFile :: FilePath -> Header -> IO OutHandle Source

Open a TAM format file with @SQ headers for writing alignments

openBamOutFile :: FilePath -> Header -> IO OutHandle Source

Open a BAM format file for writing alignments

closeOutHandle :: OutHandle -> IO () Source

Close an alignment output handle

put1 :: OutHandle -> Bam1 -> IO () Source

Writes one alignment to an input handle.

There is no validation that the target sequence set of the output handle matches the target sequence set of the alignment.