-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Library for reading Blast XML output -- -- This library contains a data structure and functions for reading -- output from the BLAST sequence alignment program. @package blastxml @version 0.2 -- | Parse blast XML output. -- -- If you use a recent version of NCBI BLAST and specify XML output -- (blastall -m 7), this module should be able to parse the result into a -- hierarchical BlastResult structure. -- -- While the process may consume a bit of memory, the parsing is lazy, -- and file sizes of several gigabytes can be parsed (see e.g. the xml2x -- tool for an example). To parse XML, we use TagSoup. module Bio.BlastXML -- | Parse BLAST results in XML format readXML :: FilePath -> IO BlastResult -- | The sequence id, i.e. the first word of the header field. type SeqId = ByteString -- | The Strand indicates the direction of the match, i.e. the plain -- sequence or its reverse complement. data Strand Plus :: Strand Minus :: Strand -- | The Aux field in the BLAST output includes match information that -- depends on the BLAST flavor (blastn, blastx, or blastp). This data -- structure captures those variations. data Aux -- | blastn Strands :: !Strand -> !Strand -> Aux -- | blastx Frame :: !Strand -> !Int -> Aux -- | A BlastResult is the root of the hierarchy. data BlastResult BlastResult :: !ByteString -> !ByteString -> !ByteString -> !ByteString -> !ByteString -> !Integer -> !Integer -> [BlastRecord] -> BlastResult blastprogram :: BlastResult -> !ByteString blastversion :: BlastResult -> !ByteString blastdate :: BlastResult -> !ByteString blastreferences :: BlastResult -> !ByteString database :: BlastResult -> !ByteString dbsequences :: BlastResult -> !Integer dbchars :: BlastResult -> !Integer results :: BlastResult -> [BlastRecord] -- | Each query sequence generates a BlastRecord data BlastRecord BlastRecord :: !SeqId -> !Int -> [BlastHit] -> BlastRecord query :: BlastRecord -> !SeqId qlength :: BlastRecord -> !Int hits :: BlastRecord -> [BlastHit] -- | Each match between a query and a target sequence (or subject) is a -- BlastHit. data BlastHit BlastHit :: !SeqId -> !Int -> [BlastMatch] -> BlastHit subject :: BlastHit -> !SeqId slength :: BlastHit -> !Int matches :: BlastHit -> [BlastMatch] -- | A BlastHit may contain multiple separate matches (typcially -- when an indel causes a frameshift that blastx is unable to bridge). data BlastMatch BlastMatch :: !Double -> !Double -> (Int, Int) -> !Int -> !Int -> !Int -> !Int -> !ByteString -> !ByteString -> !Aux -> BlastMatch bits :: BlastMatch -> !Double e_val :: BlastMatch -> !Double identity :: BlastMatch -> (Int, Int) q_from :: BlastMatch -> !Int q_to :: BlastMatch -> !Int h_from :: BlastMatch -> !Int h_to :: BlastMatch -> !Int qseq :: BlastMatch -> !ByteString hseq :: BlastMatch -> !ByteString aux :: BlastMatch -> !Aux