{-# LANGUAGE NamedFieldPuns, RecordWildCards, BangPatterns, OverloadedStrings, ParallelListComp #-} import Bio.Adna import Bio.Bam import Bio.TwoBit import Control.Applicative import Control.Monad ( when, unless ) import Data.Char ( isSpace, toLower ) import Data.List ( nub ) import Data.Version ( showVersion ) import Paths_damage_patterns ( version ) import System.Console.GetOpt import System.Directory ( createDirectoryIfMissing ) import System.Environment ( getArgs, getProgName ) import System.Exit ( exitSuccess, exitFailure ) import System.FilePath import System.IO import qualified Control.Exception as CE import qualified Data.Vector.Unboxed as U import StatUtil #ifdef CHART import GraphUtil #endif read'IO :: Read a => String -> String -> IO a read'IO t s = case reads s of [(a,rest)] | all isSpace rest -> return a _ -> do hPutStrLn stderr $ "Error: could not parse " ++ show s ++ " as " ++ t exitFailure progress :: MonadIO m => Enumeratee [Alignment] [Alignment] m a progress = eneeCheckIfDone (liftI . go 0) where go !n k (EOF mx) = do liftIO $ hPutStr stderr $ "\27[Kreading, " ++ shows n " alignments.\n" idone (liftI k) (EOF mx) go !n k (Chunk as) = do let !n' = n + length as when (n `div` 4096 /= n' `div` 4096) $ liftIO $ hPutStr stderr $ "\27[Kreading, " ++ shows n' " alignments.\r" eneeCheckIfDone (liftI . go n') . k $ Chunk as tryAnyway :: IO a -> IO () tryAnyway io = (io >> return ()) `CE.catch` ignore where ignore :: CE.SomeException -> IO () ; ignore _ = return () #ifndef CHART data Format = Txt #endif options :: [OptDescr ((Conf -> IO a) -> Conf -> IO a)] options = [ Option "o" ["output"] (ReqArg (\fn k c -> k $ c { opath = fn }) "FILE") "Set output path and prefix to FILE" , Option "x" ["xrange"] (readIArg (\n k c -> k $ c { xrange = max 0 n }) "NUM") "Set range of X axis to [0..NUM]" , Option "C" ["context"] (readIArg (\n k c -> k $ c { context = max 0 n }) "NUM") "Extract NUM nt context from reference" , Option "G" ["genome"] (ReqArg (\fn k c -> openTwoBit fn >>= \tbf -> k $ c { genome = Just tbf }) "FILE") "Genome in 2bit format to get context/ref from" , Option "F" ["format"] (ReqArg readFormat "FMT") #ifdef CHART ("Set output format to FMT (svg,ps,pdf,png,txt)") , Option "y" ["yrange"] (readFArg (\n k c -> k $ c { yrange = Just $ min 1 (max 0 n) }) "NUM") "Set range of Y axis for substitution graphs to [0..NUM]" , Option "Y" ["yrange1"] (readFArg (\n k c -> k $ c { yrange1 = Just $ min 1 (max 0 n) }) "NUM") "Set range of Y axis for composition graphs to [0.25-NUM..0.25+NUM]" , Option "t" ["title"] (ReqArg (\t k c -> k $ c { titled = \s -> s ++ " (" ++ t ++ ")" }) "TEXT") "Set title of this dataset to TEXT" , Option "W" ["width"] (readIArg (\w k c -> k $ c { width = w }) "NUM") "Set width of graphs to NUM pixels" , Option "H" ["height"] (readIArg (\h k c -> k $ c { height = h }) "NUM") "Set height of graphs to NUM pixels" , Option "S" ["stats"] (NoArg do_aux_output) "Also output text files" #else "Set output format to FMT (txt)" #endif , Option "f" ["fractions"] (NoArg (\k c -> k $ c { fractions = True })) "Show numbers as common fractions" , Option [ ] ["leeHom"] (NoArg (\k c -> k $ c { leeHom = True })) "Pretend everything is adaptor-trimmed" , Option "V" ["version"] (NoArg print_version) "Output version number and exit" , Option "h" ["help","usage"] (NoArg print_help) "Print this usage information and exit" ] where readIArg f = ReqArg (\a k c -> read'IO "a whole number" a >>= \a' -> f (a'::Int) k c) #ifdef CHART readFArg f = ReqArg (\a k c -> read'IO "a floating point number" a >>= \a' -> f (a'::Double) k c) do_aux_output k c = k $ c { output_aux = True } readFormat a k c = case filter (not . isSpace) $ map toLower a of "pdf" -> k $ c { format = Pdf } "png" -> k $ c { format = Png } "txt" -> k $ c { format = Txt } "svg" -> k $ c { format = Svg } _ -> error $ "unrecognized format " ++ a #else readFormat a k c = case filter (not . isSpace) $ map toLower a of "txt" -> k $ c { format = Txt } _ -> error $ "unrecognized format " ++ a #endif print_version _k _c = do pn <- getProgName putStrLn $ pn ++ ", version " ++ showVersion version ++ #ifdef CHART "-Chart" #else "-NoChart" #endif exitSuccess print_help _k _c = do pn <- getProgName putStrLn $ usageInfo ("usage: " ++ pn ++ usage_info) options exitSuccess usage_info :: String usage_info = " [OPTIONS] [FILES]\n" ++ "where FILES can be any combination of BAM and SAM files and OPTIONS may be" data Conf = Conf { opath :: FilePath , xrange :: Int , context :: Int , genome :: Maybe TwoBitFile , yrange :: Maybe Double , yrange1 :: Maybe Double , titled :: String -> String , width :: Int , height :: Int , format :: Format , fractions :: Bool , output_aux :: Bool , leeHom :: Bool } defaultConf :: Conf defaultConf = Conf { opath = "./" , xrange = 80 , context = 10 , genome = Nothing , yrange = Nothing , yrange1 = Nothing , titled = id , width = defaultWidth , height = defaultHeight #ifdef CHART , format = Svg #else , format = Txt #endif , fractions = False , output_aux = False , leeHom = False } type OutputFn = String -> [Char] -> (Int, Int) -> [Char] -> [Curve Ratio] -> IO () stats_main :: OutputFn -- output of basecompo -> OutputFn -- output of others -> [FilePath] -> Conf -> IO () stats_main output1 output2 files Conf{..} = do files' <- if null files then ["-"] <$ hPutStrLn stderr "Info: reading from stdin" else return files DmgStats{..} <- let diter (Just f) ctx h = damagePatternsIter2Bit (meta_refs h) f ctx diter Nothing 0 _ = damagePatternsIterMD diter Nothing x _ = error $ "context (" ++ shows x ") needs a 2bit file" in concatInputs files' >=> run $ \hdr -> diter genome context hdr xrange leeHom $ progress =$ skipToEof tryAnyway . createDirectoryIfMissing True $ takeDirectory opath output1 "5' base composition" (opath ++ "basecomp-5") (-context,xrange) "Reference Base Composition Around 5' End" (basecompos context basecompo5) output1 "3' base composition" (opath ++ "basecomp-3") (-xrange,context) "Reference Base Composition Around 3' End" (basecompos (xrange-1) basecompo3) output2 "5' substitutions" (opath ++ "substitutions-5") (0, xrange) "Substitutions Seen From 5' End" (substitutes 0 substs5) output2 "3' substitutions" (opath ++ "substitutions-3") (-xrange,0) "Substitutions Seen Towards 3' End" (substitutes (xrange-1) substs3) -- 5' conditional on 3' output2 "5' conditioned substitutions" (opath ++ "cond-substitutions-5") (0, xrange) "Conditioned Substitutions Seen From 5' End" (substitutes 0 substs5d3) -- 3' conditional on 5' output2 "3' conditioned substitutions" (opath ++ "cond-substitutions-3") (-xrange,0) "Conditioned Substitutions Seen Towards 3' End" (substitutes (xrange-1) substs3d5) output2 "5' CpG substitutions" (opath ++ "substitutionsCpG-5") (0, xrange) "Substitutions At CpG Sites Seen From 5' End" (substitutesCpG 0 substs5cpg) output2 "3' CpG substitutions" (opath ++ "substitutionsCpG-3") (-xrange,0) "Substitutions At CpG Sites Seen Towards 3' End" (substitutesCpG (xrange-1) substs3cpg) where basecompos off accs = Curve "#" (Just Hidden) (SP (-off) $ U.map (\x -> (x,1)) sums) : [ Curve [showNucleotide nuc] (Just style) (SP (-off) $ U.zip v sums) | (Just nuc, v) <- accs | style <- [ solid_green, solid_blue, solid_black, solid_red ] ] where sums = foldl1 (U.zipWith (+)) [ v | (Just _, v) <- accs ] substitutes off accs = [ Curve [showNucleotide x, '\x2192', showNucleotide y] (Just style) (SP (-off) $ U.zip v sv) | (x :-> y, v) <- accs, x /= y, (z, sv) <- sums, x == z | style <- [ dashed int int 1, dashed int int int, dashed int 1 1 , Solid 1 int 1, Solid int int int, Solid 1 0 0 , Solid 0 1 0, dotted 1 0.7 int, dotted int 1 1 , dashdot 1 int 1, dashdot 1 0.7 int, dashdot int int int ] ] where sums = [ (x, foldl1 (U.zipWith (+)) [ v | (y :-> _, v) <- accs, x == y ]) | x <- nub [ y | (y :-> _, _) <- accs ] ] substitutesCpG off accs = [ Curve [showNucleotide x, '\x2192', showNucleotide y] (Just style) (SP (-off) $ U.zip v sv) | (x :-> y, v) <- accs, x /= y, (z, sv) <- sums, x == z | style <- [ Solid 1 int 1, Solid int int int, Solid 1 0 0 , Solid 0 1 0, dashed 1 0.7 int, dashed int 1 1 ] ] where sums = [ (x, foldl1 (U.zipWith (+)) [ v | (y :-> _, v) <- accs, x == y ]) | x <- nub [ y | (y :-> _, _) <- accs ] ] int = 0.3 dashed = Dashed [3,3] dotted = Dashed [1,2] dashdot = Dashed [3,2,1,2] main :: IO () main = do ( opts, files, errors ) <- getOpt Permute options <$> getArgs unless (null errors) $ mapM_ (hPutStrLn stderr) errors >> exitFailure (\kk -> foldr ($) kk opts defaultConf) $ \conf@(Conf{..}) -> do let showD = if fractions then show_common else show_decimal let output_stats _ fn rng _t crv = writeFile (fn ++ ".txt") $ to_table showD rng crv stats_only = stats_main output_stats output_stats files conf #ifdef CHART output axis msg fn rng t curves = case plot axis rng (titled t) curves of Nothing -> hPutStrLn stderr $ "Warning: " ++ msg ++ " not enough data to create plot" Just p -> renderToFile fn format width height p output1 = output (maybe myAutoAxis theOtherAxis yrange1) output2 = output (maybe myAutoAxis theAxis yrange ) outputAux | output_aux = \k msg fn rng t cs -> k msg fn rng t cs >> output_stats msg fn rng t cs | otherwise = id case format of Txt -> stats_only _ -> stats_main (outputAux output1) (outputAux output2) files conf #else stats_only #endif