-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Binding to the C samtools library -- -- Binding to the C samtools library, which reads and writes SAM format -- alignments, both binary and tab- delimited text formats. It also -- supports rapid access to specific alignments in a sorted BAM file and -- access to sequences from indexed Fasta files. @package samtools @version 0.2.1 -- | Fetch sequences from an indexed fasta file module Bio.SamTools.FaIdx -- | Input handle for an indexed fasta file data InHandle -- | Name of the fasta file filename :: InHandle -> FilePath -- | Open an indexed fasta file open :: FilePath -> IO InHandle close :: InHandle -> IO () withFastaIndex :: FilePath -> (InHandle -> IO a) -> IO a -- | Fetch a region specified by sequence name and coordinates, or the -- empty string when the sequence is not found. fetch :: InHandle -> ByteString -> (Int64, Int64) -> IO ByteString fetchLoc :: Location l => InHandle -> OnSeq l -> IO (Maybe ByteString) readLoc :: Location l => FilePath -> OnSeq l -> IO (Maybe ByteString) -- | Handling of the extended CIGAR pair-wise alignment descriptors module Bio.SamTools.Cigar -- | Cigar entry types data CigarType -- | Aligned nucleotide, may be a match or mismatch Match :: CigarType -- | Insertion in read relative to reference Ins :: CigarType -- | Deletion from read relative to reference Del :: CigarType -- | Skipped reference bases, i.e., splice RefSkip :: CigarType -- | Trimmed nucleotides, still present in read SoftClip :: CigarType -- | Trimmed nucleotides, removed from read HardClip :: CigarType -- | Deletion from padded reference Pad :: CigarType -- | Cigar entry including length data Cigar Cigar :: !CigarType -> !Int64 -> Cigar cigar :: Cigar -> !CigarType length :: Cigar -> !Int64 -- | Convert a BAM binary cigar integer to a Cigar toCigar :: CUInt -> Cigar cigarToSpLoc :: Int64 -> [Cigar] -> SpliceLoc cigarToAlignment :: Int64 -> [Cigar] -> [(Maybe Pos, Maybe Pos)] instance Show CigarType instance Ord CigarType instance Eq CigarType instance Enum CigarType instance Bounded CigarType instance Show Cigar instance Ord Cigar instance Eq Cigar -- | This module provides a fairly direct representation of the SAM/BAM -- alignment format, along with an interface to read and write alignments -- in this format. -- -- The package is based on the C SamTools library available at -- -- http://samtools.sourceforge.net/ -- -- and the SAM/BAM file format is described here -- -- http://samtools.sourceforge.net/SAM-1.3.pdf -- -- This package only reads existing alignment files generated by other -- tools. The meaning of the various flags is actually determined by the -- program that produced the alignment file. module Bio.SamTools.Bam -- | Information about one target sequence in a SAM alignment set data HeaderSeq HeaderSeq :: !ByteString -> !Int64 -> HeaderSeq -- | Target sequence name name :: HeaderSeq -> !ByteString -- | Target sequence lengh len :: HeaderSeq -> !Int64 -- | Target sequences from a SAM alignment set data Header -- | Number of target sequences nTargets :: Header -> Int -- | Returns the list of target sequences targetSeqList :: Header -> [HeaderSeq] -- | Returns a target sequence by ID, which is a 0-based index targetSeq :: Header -> Int -> HeaderSeq -- | Returns a target sequence name by ID targetSeqName :: Header -> Int -> ByteString targetSeqLen :: Header -> Int -> Int64 lookupTarget :: Header -> ByteString -> Maybe Int -- | SAM/BAM format alignment data Bam1 -- | Just the reference target sequence ID in the target set, or -- Nothing for an unmapped read targetID :: Bam1 -> Maybe Int -- | Just the target sequence name, or Nothing for an -- unmapped read targetName :: Bam1 -> Maybe ByteString -- | Just the total length of the target sequence, or Nothing -- for an unmapped read targetLen :: Bam1 -> Maybe Int64 -- | Just the 0-based index of the leftmost aligned position on the -- target sequence, or Nothing for an unmapped read position :: Bam1 -> Maybe Int64 -- | Is the read paired isPaired :: Bam1 -> Bool -- | Is the pair properly aligned (usually based on relative orientation -- and distance) isProperPair :: Bam1 -> Bool -- | Is the read unmapped isUnmap :: Bam1 -> Bool -- | Is the read paired and the mate unmapped isMateUnmap :: Bam1 -> Bool -- | Is the fragment's reverse complement aligned to the target isReverse :: Bam1 -> Bool -- | Is the read paired and the mate's reverse complement aligned to the -- target isMateReverse :: Bam1 -> Bool -- | Is the fragment from the first read in the template isRead1 :: Bam1 -> Bool -- | Is the fragment from the second read in the template isRead2 :: Bam1 -> Bool -- | Is the fragment alignment secondary isSecondary :: Bam1 -> Bool -- | Did the read fail quality controls isQCFail :: Bam1 -> Bool -- | Is the read a technical duplicate isDup :: Bam1 -> Bool -- | CIGAR description of the alignment cigars :: Bam1 -> [Cigar] -- | Name of the query sequence queryName :: Bam1 -> ByteString -- | Just the length of the query sequence, or Nothing when -- it is unavailable. queryLength :: Bam1 -> Maybe Int64 -- | Just the query sequence, or Nothing when it is -- unavailable querySeq :: Bam1 -> Maybe ByteString -- | Just the query qualities, or Nothing when it is -- unavailable. These are returned in ASCII format, i.e., q + 33. queryQual :: Bam1 -> Maybe ByteString -- | Just the target ID of the mate alignment target reference -- sequence, or Nothing when the mate is unmapped or the read is -- unpaired. mateTargetID :: Bam1 -> Maybe Int -- | Just the name of the mate alignment target reference sequence, -- or Nothing when the mate is unmapped or the read is unpaired. mateTargetName :: Bam1 -> Maybe ByteString -- | Just the length of the mate alignment target reference -- sequence, or Nothing when the mate is unmapped or the read is -- unpaired. mateTargetLen :: Bam1 -> Maybe Int64 -- | 'Just the 0-based coordinate of the left-most position in the mate -- alignment on the target, or Nothing when the read is unpaired -- or the mate is unmapped. matePosition :: Bam1 -> Maybe Int64 -- | Just the total insert length, or Nothing when the length -- is unavailable, e.g. because the read is unpaired or the mated read -- pair do not align in the proper relative orientation on the same -- strand. insertSize :: Bam1 -> Maybe Int64 -- | Just the number of mismatches in the alignemnt, or -- Nothing when this information is not present nMismatch :: Bam1 -> Maybe Int -- | Just the number of reported alignments, or Nothing when -- this information is not present. nHits :: Bam1 -> Maybe Int -- | Just the match descriptor alignment field, or Nothing -- when it is absent matchDesc :: Bam1 -> Maybe ByteString -- | Just the reference sequence location covered by the alignment. -- This includes nucleotide positions that are reported to be deleted in -- the read, but not skipped nucleotide position (typically intronic -- positions in a spliced alignment). If the reference location is -- unavailable, e.g. for an unmapped read or for a read with no CIGAR -- format alignment information, then Nothing. refSpLoc :: Bam1 -> Maybe SpliceLoc -- | Just the reference sequence location (as per refSpLoc) -- on the target reference (as per targetName) refSeqLoc :: Bam1 -> Maybe SpliceSeqLoc -- | Handle for reading SAM/BAM format alignments data InHandle -- | Target sequence set for the alignments inHeader :: InHandle -> Header -- | Open a TAM (tab-delimited text) format file with @SQ headers -- for the target sequence set. openTamInFile :: FilePath -> IO InHandle -- | Open a TAM format file with a separate target sequence set index openTamInFileWithIndex :: FilePath -> FilePath -> IO InHandle -- | Open a BAM (binary) format file openBamInFile :: FilePath -> IO InHandle -- | Close a SAM/BAM format alignment input handle -- -- Target sequence set data is still available after the file input has -- been closed. closeInHandle :: InHandle -> IO () -- | Run an IO action using a handle to a TAM format file that will be -- opened (see openTamInFile) and closed for the action. withTamInFile :: FilePath -> (InHandle -> IO a) -> IO a -- | As withTamInFile with a separate target sequence index set (see -- openTamInFileWithIndex) withTamInFileWithIndex :: FilePath -> FilePath -> (InHandle -> IO a) -> IO a -- | As withTamInFile for BAM (binary) format files withBamInFile :: FilePath -> (InHandle -> IO a) -> IO a -- | Reads one alignment from an input handle, or returns Nothing -- for end-of-file get1 :: InHandle -> IO (Maybe Bam1) -- | Read a BAM file as a lazy strem of Bam1 records. readBams :: FilePath -> IO [Bam1] -- | Handle for writing SAM/BAM format alignments data OutHandle -- | Target sequence set for the alignments outHeader :: OutHandle -> Header -- | Open a TAM format file with @SQ headers for writing -- alignments openTamOutFile :: FilePath -> Header -> IO OutHandle -- | Open a BAM format file for writing alignments openBamOutFile :: FilePath -> Header -> IO OutHandle -- | Close an alignment output handle closeOutHandle :: OutHandle -> IO () withTamOutFile :: FilePath -> Header -> (OutHandle -> IO a) -> IO a withBamOutFile :: FilePath -> Header -> (OutHandle -> IO a) -> IO a -- | Writes one alignment to an input handle. -- -- There is no validation that the target sequence set of the output -- handle matches the target sequence set of the alignment. put1 :: OutHandle -> Bam1 -> IO () -- | This module provides an interface to sorted, indexed BAM alignment -- files, which allow rapid extraction of alignments that lie within one -- specific region of one sequence. module Bio.SamTools.BamIndex -- | Handle for fetching alignments by region from a sorted, indexed BAM -- file. data IdxHandle -- | Filename of sorted, indexed BAM file idxFilename :: IdxHandle -> FilePath -- | Target sequences idxHeader :: IdxHandle -> Header -- | Open a sorted, indexed BAM file. open :: FilePath -> IO IdxHandle close :: IdxHandle -> IO () withIndex :: FilePath -> (IdxHandle -> IO a) -> IO a data Query qyHandle :: Query -> IdxHandle query :: IdxHandle -> Int -> (Int64, Int64) -> IO Query next :: Query -> IO (Maybe Bam1) -- | Use a BAM index file to extract Bam1 records aligned to a -- specific target sequence (chromosome) number and region. readBamRegion :: IdxHandle -> Int -> (Int64, Int64) -> IO [Bam1]