-- Hoogle documentation, generated by Haddock
-- See Hoogle, http://www.haskell.org/hoogle/
-- | Library for reading Blast XML output
--
-- This library contains a data structure and functions for reading
-- output from the BLAST sequence alignment program.
@package blastxml
@version 0.2
-- | Parse blast XML output.
--
-- If you use a recent version of NCBI BLAST and specify XML output
-- (blastall -m 7), this module should be able to parse the result into a
-- hierarchical BlastResult structure.
--
-- While the process may consume a bit of memory, the parsing is lazy,
-- and file sizes of several gigabytes can be parsed (see e.g. the xml2x
-- tool for an example). To parse XML, we use TagSoup.
module Bio.BlastXML
-- | Parse BLAST results in XML format
readXML :: FilePath -> IO BlastResult
-- | The sequence id, i.e. the first word of the header field.
type SeqId = ByteString
-- | The Strand indicates the direction of the match, i.e. the plain
-- sequence or its reverse complement.
data Strand
Plus :: Strand
Minus :: Strand
-- | The Aux field in the BLAST output includes match information that
-- depends on the BLAST flavor (blastn, blastx, or blastp). This data
-- structure captures those variations.
data Aux
-- | blastn
Strands :: !Strand -> !Strand -> Aux
-- | blastx
Frame :: !Strand -> !Int -> Aux
-- | A BlastResult is the root of the hierarchy.
data BlastResult
BlastResult :: !ByteString -> !ByteString -> !ByteString -> !ByteString -> !ByteString -> !Integer -> !Integer -> [BlastRecord] -> BlastResult
blastprogram :: BlastResult -> !ByteString
blastversion :: BlastResult -> !ByteString
blastdate :: BlastResult -> !ByteString
blastreferences :: BlastResult -> !ByteString
database :: BlastResult -> !ByteString
dbsequences :: BlastResult -> !Integer
dbchars :: BlastResult -> !Integer
results :: BlastResult -> [BlastRecord]
-- | Each query sequence generates a BlastRecord
data BlastRecord
BlastRecord :: !SeqId -> !Int -> [BlastHit] -> BlastRecord
query :: BlastRecord -> !SeqId
qlength :: BlastRecord -> !Int
hits :: BlastRecord -> [BlastHit]
-- | Each match between a query and a target sequence (or subject) is a
-- BlastHit.
data BlastHit
BlastHit :: !SeqId -> !Int -> [BlastMatch] -> BlastHit
subject :: BlastHit -> !SeqId
slength :: BlastHit -> !Int
matches :: BlastHit -> [BlastMatch]
-- | A BlastHit may contain multiple separate matches (typcially
-- when an indel causes a frameshift that blastx is unable to bridge).
data BlastMatch
BlastMatch :: !Double -> !Double -> (Int, Int) -> !Int -> !Int -> !Int -> !Int -> !ByteString -> !ByteString -> !Aux -> BlastMatch
bits :: BlastMatch -> !Double
e_val :: BlastMatch -> !Double
identity :: BlastMatch -> (Int, Int)
q_from :: BlastMatch -> !Int
q_to :: BlastMatch -> !Int
h_from :: BlastMatch -> !Int
h_to :: BlastMatch -> !Int
qseq :: BlastMatch -> !ByteString
hseq :: BlastMatch -> !ByteString
aux :: BlastMatch -> !Aux