module Main where import qualified Data.ByteString.Lazy as BB import qualified Data.ByteString.Lazy.Char8 as B import Data.Char (toUpper) import Bio.Sequence import System.SimpleArgs main = do (fs,qs) <- getArgs trimPolyA fs qs trimPolyA fs qs = writeFastaQual (fs++".trim") (qs++".trim") =<< map doTrim `fmap` map castToNuc `fmap` readFastaQual fs qs doTrim s = case findPolyA s of Just (x,y) -> slice (x,y) s Nothing -> s slice :: (Int,Int) -> Sequence Nuc -> Sequence Nuc slice (x,y) (Seq h d (Just q)) = (Seq (B.concat [h,B.pack (" trim: "++show (y+1)++" poly-A: "++show (y-x+1))]) (B.take (fromIntegral y) d) (Just $ B.take (fromIntegral y) q)) -- Ripped from Dephd findPolyA :: Sequence Nuc -> Maybe (Int,Int) findPolyA (Seq _ d mq) = let qd = zip (B.unpack d) (maybe (repeat 15) BB.unpack mq) scores = map (\(c,q) -> if toUpper c=='A' then match q else mismatch q) qd match x' = let x = fromIntegral x' in log (4*(1-1/10**(x/10))) mismatch x' = let x = fromIntegral x' in log 4 - log 3 - x/10*log 10 cumulative = scanl (\a b -> let r = a + b in max 0 r) 0 (zi,mi,maxscore) = findmax $ cumulative scores in if maxscore > 12 then Just (zi+1,mi) else Nothing -- arbitrary constant alert! findmax :: [Double] -> (Int,Int,Double) findmax = go 0 (0,0,0) . zip [0..] where go _ cm [] = cm go _ cm ((i,0):rest) = go i cm rest go last_z (cmz,cmi,cmx) ((i,x):rest) = if x > cmx then go last_z (last_z,i,x) rest else go last_z (cmz,cmi,cmx) rest