module Bio.Alignment.Blast
(parse)
where
import Prelude hiding (lines,words)
import qualified Data.ByteString.Lazy.Char8 as B
import Data.ByteString.Lazy.Char8 (ByteString,isPrefixOf,lines,words)
import Data.List (groupBy)
import Data.Char (isDigit)
import Bio.Alignment.BlastData
import Bio.Util (splitWhen)
str_query, str_gt, str_score, str_refer, str_datab, str_search :: ByteString
str_query = B.pack "Query= "
str_gt = B.pack ">"
str_score = B.pack " Score = "
str_refer = B.pack "Reference: "
str_datab = B.pack "Database: "
str_search= B.pack "Searching"
#define BUG(C_,M_) (error ("Program error - '"++C_++"' failed: "++M_++". Location: "++__FILE__++" line: "++ show (__LINE__::Int)))
#define readX (\s -> case [ x | (x,t) <- reads (B.unpack s), ("","") <- lex t] of { [x] -> x ; [] -> BUG("read",("no parse for "++B.unpack s)); _ -> BUG("read","ambigous parse")})
queries :: [ByteString] -> [[ByteString]]
queries = splitWhen (isPrefixOf str_query)
qhits :: [ByteString] -> [[ByteString]]
qhits = splitWhen (isPrefixOf str_gt)
hmatches :: [ByteString] -> [[ByteString]]
hmatches = splitWhen (isPrefixOf str_score)
parse :: ByteString -> BlastResult
parse s = let (p:qs) = queries $ lines s
br = parse_preamble p
rs = map parse_query qs
in br { results = rs }
parse_preamble :: [ByteString] -> BlastResult
parse_preamble [] = error "Blast: can't parse preamble - empty input"
parse_preamble (l:ls) = BlastResult { blastprogram = w1, blastversion = w2, blastdate = w3
, blastreferences = B.concat $ map (B.drop (B.length str_refer)) rs
, database = B.drop (B.length str_datab) d
, dbsequences = 1, dbchars = 1, results = [] }
where [w1,w2,w3] = words l
records = map B.unlines $ splitWhen (\p -> or $ map (\x -> isPrefixOf x p) [str_refer,str_datab,str_search]) ls
rs = filter (isPrefixOf str_refer) records
d = case filter (isPrefixOf str_datab) records of [d1] -> d1; _ -> B.pack "<unknown>"
parse_query :: [ByteString] -> BlastRecord
parse_query ls = let ((s1:s2:_):hs) = qhits ls
in BlastRecord { query = B.drop (B.length str_query) s1
, qlength = readX $ B.takeWhile isDigit $ (!!1) $ B.split '(' s2
, hits = map parse_hit hs }
parse_hit :: [ByteString] -> BlastHit
parse_hit ls = let (h:ms) = hmatches ls
in BlastHit { subject = B.concat h, slength = 1, matches = map parse_match ms}
parse_match :: [ByteString] -> BlastMatch
parse_match (l1:l2:l3:_) = let
hdr = words $ B.concat [l1,l2,l3]
[bs,ev,ident,str1,str2] = map (hdr!!) [2,7,10] ++ map (reverse hdr!!) [2,0]
identX = let [nom,_:denom] = groupBy (const isDigit) $ B.unpack ident in (read nom, read denom)
in BlastMatch { bits = readX bs,
e_val = readX ev,
identity = identX,
aux = Strands (readX str1) (readX str2),
q_from = ee, q_to = ee, h_from = ee, h_to = ee
}
where ee = error "hit coordinates is not supported - use XML format"
parse_match _ = error "Blast: parse_match: pattern match failed"