-- blat DB QRY OUT -t= -d= -minScore= -extendThroughN -- blat P_2010_10_11_10_15_26_runAssembly/454AllContigs.fna ../EST/lakselus.fasta -t=dna -q=rna tmp.psl -minScore=50 module Blat (module Bio.Alignment.PSL, runBlat, gen_result, interleave) where import Bio.Alignment.PSL import Control.Arrow (second) import Data.List (sortBy) import System.Directory (doesFileExist, findExecutable) import Control.Monad (when) import System.Process (runCommand, waitForProcess) import System.Exit (ExitCode(..)) blat_cmd :: String blat_cmd = "blat -t=dna -q=rna -minScore=50 -extendThroughN " -- run blat runBlat :: FilePath -> FilePath -> FilePath -> IO [PSL] runBlat tmpdir asm ests = do let pslfile = tmpdir++"/"++basename asm++"_vs_"++basename ests++".psl" basename = reverse . takeWhile (/='/') . reverse quote str = "\""++str++"\"" dfe <- doesFileExist pslfile when (not dfe) $ do fe <- findExecutable "blat" when (fe == Nothing) $ error "Couldn't find the 'blat' executable - aborting" blat <- runCommand $ unwords (blat_cmd : map quote [asm,ests,pslfile]) e <- waitForProcess blat case e of ExitSuccess -> return () _ -> error $ show e readPSL pslfile -- this will only get you the sequences that have a match! gen_result :: [PSL] -> [(Int,Int)] gen_result = coverage . order . pslbest interleave :: (Integral i, Show i) => [i] -> [(i,i)] -> [i] interleave sz [] = map (const 0) sz interleave (sz:szs) covs@((s1,c1):cs) | sz > s1 = 0 : interleave szs covs | sz == s1 = c1 : interleave szs cs -- if this fails, something is wrong with my assumptions! | otherwise = error ("interleave failed: "++show (take 10 (sz:szs)) ++" "++show (take 10 covs)) interleave [] rs = error ("interleave failed - leftover contigs: "++show (take 10 rs)++"...") coverage :: [PSL] -> [(Int,Int)] coverage = map (second (sum . map (sum . blocksizes))) . tgroup where tgroup [] = [] tgroup (p1:ps) = let (this,rest) = span ((tname p1 ==) . tname) ps in (tsize p1, p1:this) : tgroup rest order :: [PSL] -> [PSL] order = sortBy (comparing (\c -> (negate $ tsize c,tname c))) pslbest :: [PSL] -> [PSL] pslbest = map (last . sortBy (comparing match)) . qgroup where qgroup [] = [] qgroup (p1:ps) = let (this,rest) = span ((qname p1 ==) . qname) ps in (p1:this) : qgroup rest comparing :: (Ord a) => (t -> a) -> t -> t -> Ordering comparing f a b = compare (f a) (f b)