{- | This module implements a \"flattened\" data structure for Blast hits, as opposed to the hierarchical structure in "Bio.Alignment.BlastData". The flat data type is useful in many cases where it is more natural to see the result as a set of rows (e.g. for insertaion in a database). It would probably be more (memory-) efficient to go the other way (i.e. from flat to hierarchical), as passing the current, partially built "BlastFlat" object down the stream of results and stamping out a stream of completed ones. (See "Bio.Alignment.BlastXML.breaks" for this week's most cumbersome use of parallelism to avoid the memory issue.) -} module Bio.Alignment.BlastFlat ( -- * The BlastFlat data type BlastFlat(..) -- * Read XML format , readXML -- * Convert from hierarchical to flat structure , flatten -- * Re-exports from the hierarchical module ("Bio.Alignment.BlastData") , B.BlastRecord , B.blastprogram, B.blastversion, B.blastdate, B.blastreferences , B.database, B.dbsequences, B.dbchars, B.results , B.Aux(..), B.Strand(..) )where import qualified Bio.Alignment.BlastData as B import qualified Bio.Alignment.BlastXML as X import Data.ByteString.Lazy.Char8 (empty) -- | The BlastFlat data structure contains information about a single match data BlastFlat = BlastFlat { query :: !B.SeqId, qlength :: !Int -- BlastRecord , subject :: !B.SeqId, slength :: !Int -- BlastHit , bits :: !Double, e_val :: !Double -- BlastMatch , identity :: (Int,Int) , q_from, q_to, h_from, h_to :: !Int , aux :: !B.Aux } readXML :: FilePath -> IO [BlastFlat] readXML f = return . concatMap (flatten . B.results) =<< X.readXML f -- | Convert BlastRecords into BlastFlats (representing a depth-first traversal of the -- BlastRecord structure.) flatten :: [B.BlastRecord] -> [BlastFlat] flatten = concatMap frecord where frecord r = concatMap (fhit (bf0 { query = B.query r, qlength = B.qlength r })) $ B.hits r fhit f h = map (fmatch f { subject = B.subject h, slength = B.slength h }) $ B.matches h fmatch f m = f { bits = B.bits m, e_val = B.e_val m, identity = B.identity m , q_from = B.q_from m, q_to = B.q_to m , h_from = B.h_from m, h_to = B.h_to m, aux = B.aux m} bf0 = BlastFlat e 0 e 0 0 0 (0,0) 0 0 0 0 (B.Frame B.Plus 0) e = empty