-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Library for reading Blast XML output -- -- This library contains a data structure and functions for reading -- output from the BLAST sequence alignment program. @package blastxml @version 0.3 -- | This module implements a hierarchical data structure for BLAST -- results. -- -- BLAST is a tool for searching in (biological) sequences for -- similarity. This library is tested against NCBI-blast version 2.2.14. -- There exist several independent versions of BLAST, so expect some -- incompatbilities if you're using a different BLAST version. -- -- For parsing BLAST results, the XML format (blastall -m 7) is by far -- the most robust choice, and is implemented in the -- Bio.Alignment.BlastXML module. -- -- The format is straightforward (and non-recursive). For more -- information on BLAST, check -- http://www.ncbi.nlm.nih.gov/Education/BLASTinfo/information3.html module Bio.BlastData -- | The Aux field in the BLAST output includes match information that -- depends on the BLAST flavor (blastn, blastx, or blastp). This data -- structure captures those variations. data Aux -- | blastn Strands :: !Strand -> !Strand -> Aux -- | blastx Frame :: !Strand -> !Int -> Aux -- | A BlastResult is the root of the hierarchy. data BlastResult BlastResult :: !ByteString -> !ByteString -> !ByteString -> !ByteString -> !ByteString -> !Integer -> !Integer -> [BlastRecord] -> BlastResult blastprogram :: BlastResult -> !ByteString blastversion :: BlastResult -> !ByteString blastdate :: BlastResult -> !ByteString blastreferences :: BlastResult -> !ByteString database :: BlastResult -> !ByteString dbsequences :: BlastResult -> !Integer dbchars :: BlastResult -> !Integer results :: BlastResult -> [BlastRecord] -- | Each query sequence generates a BlastRecord data BlastRecord BlastRecord :: !SeqLabel -> !Int -> [BlastHit] -> BlastRecord query :: BlastRecord -> !SeqLabel qlength :: BlastRecord -> !Int hits :: BlastRecord -> [BlastHit] -- | Each match between a query and a target sequence (or subject) is a -- BlastHit. data BlastHit BlastHit :: !SeqLabel -> !Int -> [BlastMatch] -> BlastHit subject :: BlastHit -> !SeqLabel slength :: BlastHit -> !Int matches :: BlastHit -> [BlastMatch] -- | A BlastHit may contain multiple separate matches (typcially -- when an indel causes a frameshift that blastx is unable to bridge). data BlastMatch BlastMatch :: !Double -> !Double -> (Int, Int) -> !Int -> !Int -> !Int -> !Int -> !ByteString -> !ByteString -> !Aux -> BlastMatch bits :: BlastMatch -> !Double e_val :: BlastMatch -> !Double identity :: BlastMatch -> (Int, Int) q_from :: BlastMatch -> !Int q_to :: BlastMatch -> !Int h_from :: BlastMatch -> !Int h_to :: BlastMatch -> !Int qseq :: BlastMatch -> !ByteString hseq :: BlastMatch -> !ByteString aux :: BlastMatch -> !Aux instance Show Aux instance Eq Aux instance Show BlastMatch instance Show BlastHit instance Show BlastRecord instance Show BlastResult -- | Parse blast XML output. -- -- If you use a recent version of NCBI BLAST and specify XML output -- (blastall -m 7), this module should be able to parse the result into a -- hierarchical BlastResult structure. -- -- While the process may consume a bit of memory, the parsing is lazy, -- and file sizes of several gigabytes can be parsed (see e.g. the xml2x -- tool for an example). To parse XML, we use TagSoup. module Bio.BlastXML -- | Parse BLAST results in XML format readXML :: FilePath -> IO BlastResult