-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Library for reading Blast XML output
--
-- This library contains a data structure and functions for reading
-- output from the BLAST sequence alignment program.
@package blastxml
@version 0.3
-- | This module implements a hierarchical data structure for BLAST
-- results.
--
-- BLAST is a tool for searching in (biological) sequences for
-- similarity. This library is tested against NCBI-blast version 2.2.14.
-- There exist several independent versions of BLAST, so expect some
-- incompatbilities if you're using a different BLAST version.
--
-- For parsing BLAST results, the XML format (blastall -m 7) is by far
-- the most robust choice, and is implemented in the
-- Bio.Alignment.BlastXML module.
--
-- The format is straightforward (and non-recursive). For more
-- information on BLAST, check
-- http://www.ncbi.nlm.nih.gov/Education/BLASTinfo/information3.html
module Bio.BlastData
-- | The Aux field in the BLAST output includes match information that
-- depends on the BLAST flavor (blastn, blastx, or blastp). This data
-- structure captures those variations.
data Aux
-- | blastn
Strands :: !Strand -> !Strand -> Aux
-- | blastx
Frame :: !Strand -> !Int -> Aux
-- | A BlastResult is the root of the hierarchy.
data BlastResult
BlastResult :: !ByteString -> !ByteString -> !ByteString -> !ByteString -> !ByteString -> !Integer -> !Integer -> [BlastRecord] -> BlastResult
blastprogram :: BlastResult -> !ByteString
blastversion :: BlastResult -> !ByteString
blastdate :: BlastResult -> !ByteString
blastreferences :: BlastResult -> !ByteString
database :: BlastResult -> !ByteString
dbsequences :: BlastResult -> !Integer
dbchars :: BlastResult -> !Integer
results :: BlastResult -> [BlastRecord]
-- | Each query sequence generates a BlastRecord
data BlastRecord
BlastRecord :: !SeqLabel -> !Int -> [BlastHit] -> BlastRecord
query :: BlastRecord -> !SeqLabel
qlength :: BlastRecord -> !Int
hits :: BlastRecord -> [BlastHit]
-- | Each match between a query and a target sequence (or subject) is a
-- BlastHit.
data BlastHit
BlastHit :: !SeqLabel -> !Int -> [BlastMatch] -> BlastHit
subject :: BlastHit -> !SeqLabel
slength :: BlastHit -> !Int
matches :: BlastHit -> [BlastMatch]
-- | A BlastHit may contain multiple separate matches (typcially
-- when an indel causes a frameshift that blastx is unable to bridge).
data BlastMatch
BlastMatch :: !Double -> !Double -> (Int, Int) -> !Int -> !Int -> !Int -> !Int -> !ByteString -> !ByteString -> !Aux -> BlastMatch
bits :: BlastMatch -> !Double
e_val :: BlastMatch -> !Double
identity :: BlastMatch -> (Int, Int)
q_from :: BlastMatch -> !Int
q_to :: BlastMatch -> !Int
h_from :: BlastMatch -> !Int
h_to :: BlastMatch -> !Int
qseq :: BlastMatch -> !ByteString
hseq :: BlastMatch -> !ByteString
aux :: BlastMatch -> !Aux
instance Show Aux
instance Eq Aux
instance Show BlastMatch
instance Show BlastHit
instance Show BlastRecord
instance Show BlastResult
-- | Parse blast XML output.
--
-- If you use a recent version of NCBI BLAST and specify XML output
-- (blastall -m 7), this module should be able to parse the result into a
-- hierarchical BlastResult structure.
--
-- While the process may consume a bit of memory, the parsing is lazy,
-- and file sizes of several gigabytes can be parsed (see e.g. the xml2x
-- tool for an example). To parse XML, we use TagSoup.
module Bio.BlastXML
-- | Parse BLAST results in XML format
readXML :: FilePath -> IO BlastResult