samtools-0.2.1.2: Binding to the C samtools library

Safe HaskellSafe-Infered

Bio.SamTools.Bam

Contents

Description

This module provides a fairly direct representation of the SAM/BAM alignment format, along with an interface to read and write alignments in this format.

The package is based on the C SamTools library available at

http://samtools.sourceforge.net/

and the SAM/BAM file format is described here

http://samtools.sourceforge.net/SAM-1.3.pdf

This package only reads existing alignment files generated by other tools. The meaning of the various flags is actually determined by the program that produced the alignment file.

Synopsis

Target sequence sets

data HeaderSeq Source

Information about one target sequence in a SAM alignment set

Constructors

HeaderSeq 

Fields

name :: !ByteString

Target sequence name

len :: !Int64

Target sequence lengh

data Header Source

Target sequences from a SAM alignment set

nTargets :: Header -> IntSource

Number of target sequences

targetSeqList :: Header -> [HeaderSeq]Source

Returns the list of target sequences

targetSeq :: Header -> Int -> HeaderSeqSource

Returns a target sequence by ID, which is a 0-based index

targetSeqName :: Header -> Int -> ByteStringSource

Returns a target sequence name by ID

SAM/BAM format alignments

data Bam1 Source

SAM/BAM format alignment

Instances

targetID :: Bam1 -> Maybe IntSource

Just the reference target sequence ID in the target set, or Nothing for an unmapped read

targetName :: Bam1 -> Maybe ByteStringSource

Just the target sequence name, or Nothing for an unmapped read

targetLen :: Bam1 -> Maybe Int64Source

Just the total length of the target sequence, or Nothing for an unmapped read

position :: Bam1 -> Maybe Int64Source

Just the 0-based index of the leftmost aligned position on the target sequence, or Nothing for an unmapped read

isPaired :: Bam1 -> BoolSource

Is the read paired

isProperPair :: Bam1 -> BoolSource

Is the pair properly aligned (usually based on relative orientation and distance)

isUnmap :: Bam1 -> BoolSource

Is the read unmapped

isMateUnmap :: Bam1 -> BoolSource

Is the read paired and the mate unmapped

isReverse :: Bam1 -> BoolSource

Is the fragment's reverse complement aligned to the target

isMateReverse :: Bam1 -> BoolSource

Is the read paired and the mate's reverse complement aligned to the target

isRead1 :: Bam1 -> BoolSource

Is the fragment from the first read in the template

isRead2 :: Bam1 -> BoolSource

Is the fragment from the second read in the template

isSecondary :: Bam1 -> BoolSource

Is the fragment alignment secondary

isQCFail :: Bam1 -> BoolSource

Did the read fail quality controls

isDup :: Bam1 -> BoolSource

Is the read a technical duplicate

cigars :: Bam1 -> [Cigar]Source

CIGAR description of the alignment

queryName :: Bam1 -> ByteStringSource

Name of the query sequence

queryLength :: Bam1 -> Maybe Int64Source

Just the length of the query sequence, or Nothing when it is unavailable.

querySeq :: Bam1 -> Maybe ByteStringSource

Just the query sequence, or Nothing when it is unavailable

queryQual :: Bam1 -> Maybe ByteStringSource

Just the query qualities, or Nothing when it is unavailable. These are returned in ASCII format, i.e., q + 33.

mateTargetID :: Bam1 -> Maybe IntSource

Just the target ID of the mate alignment target reference sequence, or Nothing when the mate is unmapped or the read is unpaired.

mateTargetName :: Bam1 -> Maybe ByteStringSource

Just the name of the mate alignment target reference sequence, or Nothing when the mate is unmapped or the read is unpaired.

mateTargetLen :: Bam1 -> Maybe Int64Source

Just the length of the mate alignment target reference sequence, or Nothing when the mate is unmapped or the read is unpaired.

matePosition :: Bam1 -> Maybe Int64Source

'Just the 0-based coordinate of the left-most position in the mate alignment on the target, or Nothing when the read is unpaired or the mate is unmapped.

insertSize :: Bam1 -> Maybe Int64Source

Just the total insert length, or Nothing when the length is unavailable, e.g. because the read is unpaired or the mated read pair do not align in the proper relative orientation on the same strand.

nMismatch :: Bam1 -> Maybe IntSource

Just the number of mismatches in the alignemnt, or Nothing when this information is not present

nHits :: Bam1 -> Maybe IntSource

Just the number of reported alignments, or Nothing when this information is not present.

matchDesc :: Bam1 -> Maybe ByteStringSource

Just the match descriptor alignment field, or Nothing when it is absent

refSpLoc :: Bam1 -> Maybe SpliceLocSource

Just the reference sequence location covered by the alignment. This includes nucleotide positions that are reported to be deleted in the read, but not skipped nucleotide position (typically intronic positions in a spliced alignment). If the reference location is unavailable, e.g. for an unmapped read or for a read with no CIGAR format alignment information, then Nothing.

refSeqLoc :: Bam1 -> Maybe SpliceSeqLocSource

Just the reference sequence location (as per refSpLoc) on the target reference (as per targetName)

Reading SAM/BAM format files

data InHandle Source

Handle for reading SAM/BAM format alignments

inHeader :: InHandle -> HeaderSource

Target sequence set for the alignments

openTamInFile :: FilePath -> IO InHandleSource

Open a TAM (tab-delimited text) format file with @SQ headers for the target sequence set.

openTamInFileWithIndex :: FilePath -> FilePath -> IO InHandleSource

Open a TAM format file with a separate target sequence set index

openBamInFile :: FilePath -> IO InHandleSource

Open a BAM (binary) format file

closeInHandle :: InHandle -> IO ()Source

Close a SAM/BAM format alignment input handle

Target sequence set data is still available after the file input has been closed.

withTamInFile :: FilePath -> (InHandle -> IO a) -> IO aSource

Run an IO action using a handle to a TAM format file that will be opened (see openTamInFile) and closed for the action.

withTamInFileWithIndex :: FilePath -> FilePath -> (InHandle -> IO a) -> IO aSource

As withTamInFile with a separate target sequence index set (see openTamInFileWithIndex)

withBamInFile :: FilePath -> (InHandle -> IO a) -> IO aSource

As withTamInFile for BAM (binary) format files

get1 :: InHandle -> IO (Maybe Bam1)Source

Reads one alignment from an input handle, or returns Nothing for end-of-file

readBams :: FilePath -> IO [Bam1]Source

Read a BAM file as a lazy strem of Bam1 records.

Writing SAM/BAM format files

data OutHandle Source

Handle for writing SAM/BAM format alignments

outHeader :: OutHandle -> HeaderSource

Target sequence set for the alignments

openTamOutFile :: FilePath -> Header -> IO OutHandleSource

Open a TAM format file with @SQ headers for writing alignments

openBamOutFile :: FilePath -> Header -> IO OutHandleSource

Open a BAM format file for writing alignments

closeOutHandle :: OutHandle -> IO ()Source

Close an alignment output handle

put1 :: OutHandle -> Bam1 -> IO ()Source

Writes one alignment to an input handle.

There is no validation that the target sequence set of the output handle matches the target sequence set of the alignment.