{-# LANGUAGE ScopedTypeVariables #-} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE RecordWildCards #-} {-# LANGUAGE DeriveDataTypeable #-} module Main where import System.Console.CmdArgs import Data.List as L import Data.Either import Data.Either.Unwrap import Control.Monad (when) import Data.Maybe (isJust, fromJust) import qualified Biobase.FR3D as F import qualified Biobase.FR3D.Import as F import qualified Biobase.RNAstrand as R import qualified Biobase.RNAstrand.Import as R import Biobase.TrainingData import Biobase.TrainingData.Filter import Biobase.TrainingData.Manip data Options -- | FR3D parses all files within a directory and all sub-directories. = FR3D { removepk :: Bool , fromdir :: FilePath , errorFile :: Maybe FilePath , relativePairs :: Maybe Double } -- | RNAstrand reads from one file | RNAstrand { removepk :: Bool , fromfile :: FilePath , errorFile :: Maybe FilePath , relativePairs :: Maybe Double } deriving (Show,Data,Typeable) fr3d = FR3D { removepk = False &= help "removes pseudoknots from the entry using a heuristic algorithm" , fromdir = "./" &= args , errorFile = def &= help "put TrainingData which falls through the filter in this file (default: disabled)" , relativePairs = def &= help "Keep only TrainingData with that fraction of basepairs." } rnastrand = RNAstrand { fromfile = "" &= args } main :: IO () main = do o <- cmdArgs $ modes [fr3d, rnastrand] run o -- | FR3D importer run FR3D{..} = do xs <- F.fromDir fromdir let (ys :: [TDmanip]) = id . map (fErrorCheck) -- basic error-checking . map (fMinRelPairs relativePairs) -- filtering out trainingdata with too few pairs . map (fmap (removePK removepk)) -- remove pseudoknots from trainingdata . map (fmap (mkTrainingData . removeBIF . F.linearizeFR3D)) -- basic conversions . map Right $ xs let (ls,rs) = partition isLeft ys when (isJust errorFile) $ do writeFile (fromJust errorFile) . unlines . map (show . fromLeft) $ ls mapM_ (print . fromRight) rs return () -- mapM_ print $ map (removePK removepk . mkTrainingData . removeBIF . F.linearizeFR3D) xs -- | RNAstrand importer run RNAstrand{..} = do xs <- R.fromFile fromfile mapM_ print $ map (removePK removepk . mkTrainingData) xs -- | Removes bifurcated pairs. -- -- TODO we should really handle bifurcated pairs better... removeBIF l@F.LinFR3D{..} = l{F.pairs = sort $ filter f pairs} where f (_,_,x) | x == "bif" = False | otherwise = True