{-| Rename reads in .SFF files to avoid name clashes. Apparently, reads with the same name crashes Newbler, and is in any case a bad idea. This ensures uniqueness by appending a serial number to each read name in a set of files. -} module Main where import Bio.Sequence.SFF import System.Environment (getArgs) import qualified Data.ByteString.Char8 as B main :: IO () main = do fs <- getArgs if null fs then putStrLn "Usage: frename file1.sff [file2.sff ...]" else renameSFFs fs renameSFFs :: [FilePath] -> IO () renameSFFs = go 0 where go _ [] = return () go current (f:fs) = do (SFF h rs) <- readSFF f writeSFF ("r_"++f) (SFF h $ renameFrom current rs) go (current+num_reads h) fs renameFrom i rs = zipWith update [i..] rs where update j r = let h = read_header r rn = B.concat [read_name h, B.pack "_", B.pack (show j)] in r { read_header = h { name_length = fromIntegral $ B.length rn, read_name = rn }}