{- | 
   This module implements a \"flattened\" data structure for Blast hits,
   as opposed to the hierarchical structure in "Bio.Alignment.BlastData".

   The flat data type is useful in many cases where it is more natural
   to see the result as a set of rows (e.g. for insertaion in a database).

   It would probably be more (memory-) efficient to go the other way
   (i.e. from flat to hierarchical), as passing the current, partially
   built "BlastFlat" object down the stream of results and stamping
   out a stream of completed ones.  (See "Bio.Alignment.BlastXML.breaks"
   for this week's most cumbersome use of parallelism to avoid the
   memory issue.)  
-} 

module Bio.Alignment.BlastFlat 
    ( 
    -- * The BlastFlat data type
      BlastFlat(..)
    -- * Read XML format
    , readXML
    -- * Convert from hierarchical to flat structure
    , flatten
    -- * Re-exports from the hierarchical module ("Bio.Alignment.BlastData")
    , B.BlastRecord
    , B.blastprogram, B.blastversion, B.blastdate, B.blastreferences
    , B.database, B.dbsequences, B.dbchars, B.results
    , B.Aux(..), B.Strand(..)
    )where

import qualified Bio.Alignment.BlastData as B
import qualified Bio.Alignment.BlastXML as X
import Data.ByteString.Lazy.Char8 (empty)

-- | The BlastFlat data structure contains information about a single match
data BlastFlat = BlastFlat { 
      query :: !B.SeqId, qlength :: !Int    -- BlastRecord
    , subject :: !B.SeqId, slength :: !Int  -- BlastHit
    , bits :: !Double, e_val :: !Double     -- BlastMatch
    , identity :: (Int,Int)
    , q_from, q_to, h_from, h_to :: !Int
    , aux :: !B.Aux    
    }

readXML :: FilePath -> IO [BlastFlat]
readXML f = return . concatMap (flatten . B.results) =<< X.readXML f 

-- | Convert BlastRecords into BlastFlats (representing a depth-first traversal of the 
--   BlastRecord structure.)
flatten :: [B.BlastRecord] -> [BlastFlat]
flatten = concatMap frecord
    where frecord r =
              concatMap (fhit (bf0 { query = B.query r, qlength = B.qlength r })) $ B.hits r
          fhit f h = 
              map (fmatch f { subject = B.subject h, slength = B.slength h }) $ B.matches h
          fmatch f m = 
              f { bits = B.bits m, e_val = B.e_val m, identity = B.identity m
                , q_from = B.q_from m, q_to = B.q_to m
                , h_from = B.h_from m, h_to = B.h_to m, aux = B.aux m}
          bf0 = BlastFlat e 0 e 0 0 0 (0,0) 0 0 0 0 (B.Frame B.Plus 0)
          e = empty