-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Scientific workflow management system -- -- SciFlow is a DSL for building scientific workflows. Workflows built -- with SciFlow can be run either on desktop computers or in grid -- computing environments that support DRMAA. @package SciFlow @version 0.6.0 module Scientific.Workflow.Main.Options data CMD Run :: GlobalOpts -> Int -> Bool -> (Maybe [String]) -> (Maybe String) -> CMD View :: Bool -> CMD Cat :: GlobalOpts -> String -> CMD Write :: GlobalOpts -> String -> FilePath -> CMD Delete :: GlobalOpts -> String -> CMD Recover :: GlobalOpts -> FilePath -> CMD DumpDB :: GlobalOpts -> FilePath -> CMD Call :: GlobalOpts -> String -> String -> String -> CMD data GlobalOpts GlobalOpts :: FilePath -> Maybe [FilePath] -> GlobalOpts [dbPath] :: GlobalOpts -> FilePath [configFile] :: GlobalOpts -> Maybe [FilePath] argsParser :: String -> ParserInfo CMD module Scientific.Workflow.Internal.DB openDB :: FilePath -> IO WorkflowDB closeDB :: WorkflowDB -> IO () readData :: DBData r => PID -> WorkflowDB -> IO r readDataByteString :: PID -> WorkflowDB -> IO ByteString saveDataByteString :: PID -> ByteString -> WorkflowDB -> IO () saveData :: DBData r => PID -> r -> WorkflowDB -> IO () updateData :: DBData r => PID -> r -> WorkflowDB -> IO () delRecord :: PID -> WorkflowDB -> IO () isFinished :: PID -> WorkflowDB -> IO Bool getKeys :: WorkflowDB -> IO [PID] -- | An abstract type representing the database used to store states of -- workflow newtype WorkflowDB WorkflowDB :: Connection -> WorkflowDB -- | DBData constraint is used for data serialization. type DBData a = (FromJSON a, ToJSON a, Serialize a) serialize :: DBData a => a -> ByteString deserialize :: DBData a => ByteString -> a readYaml :: DBData a => ByteString -> a showYaml :: DBData a => a -> ByteString module Scientific.Workflow.Internal.Utils data RemoteOpts config RemoteOpts :: String -> config -> RemoteOpts config [extraParams] :: RemoteOpts config -> String [environment] :: RemoteOpts config -> config data Log Running :: Text -> Log Complete :: Text -> Log Warn :: Text -> String -> Log Error :: String -> Log Exit :: Log runRemote :: (DBData a, DBData b, ToJSON config) => RemoteOpts config -> Text -> a -> IO b sendLog :: Maybe Socket -> Log -> IO () instance GHC.Show.Show Scientific.Workflow.Internal.Utils.Log instance GHC.Generics.Generic Scientific.Workflow.Internal.Utils.Log instance Data.Serialize.Serialize Scientific.Workflow.Internal.Utils.Log module Scientific.Workflow.Internal.Builder.Types -- | A computation node. data Node Node :: Text -> ExpQ -> Attribute -> Node [_nodePid] :: Node -> Text [_nodeFunction] :: Node -> ExpQ [_nodeAttr] :: Node -> Attribute -- | Links between computational nodes data Edge Edge :: Text -> Text -> EdgeOrd -> Edge [_edgeFrom] :: Edge -> Text [_edgeTo] :: Edge -> Text -- | Order of the edge [_edgeOrd] :: Edge -> EdgeOrd type EdgeOrd = Int type Builder = State ([Node], [Edge]) -- | Node attributes. data Attribute Attribute :: Text -> Text -> Maybe Bool -> String -> FunctionConfig -> Attribute -- | Short description [_label] :: Attribute -> Text -- | Long description [_note] :: Attribute -> Text -- | Overwrite the global option [_submitToRemote] :: Attribute -> Maybe Bool -- | Parameters for to remote execution [_remoteParam] :: Attribute -> String -- | Usually not being used directly [_functionConfig] :: Attribute -> FunctionConfig -- | The type of node function data FunctionConfig FunctionConfig :: ParallelMode -> FunctionType -> FunctionConfig data ParallelMode -- | No parallelism. None :: ParallelMode -- | Turn input a into [a] and process them in parallel. Standard :: Int -> ParallelMode -- | Assume the input is ContextData d a, where d is -- shared and a becomes [a]. ShareData :: Int -> ParallelMode data FunctionType -- | The function is pure, i.e., a -> b. Pure :: FunctionType -- | A IO function, i.e., a -> IO b. IOAction :: FunctionType -- | A function that has access to configuration, i.e., a -> -- WorkflowConfig config b. Stateful :: FunctionType submitToRemote :: Lens' Attribute (Maybe Bool) remoteParam :: Lens' Attribute String note :: Lens' Attribute Text label :: Lens' Attribute Text functionConfig :: Lens' Attribute FunctionConfig defaultAttribute :: Attribute type AttributeSetter = State Attribute () type DAG = Gr Node EdgeOrd -- | Objects that can be converted to ExpQ class ToExpQ a toExpQ :: ToExpQ a => a -> ExpQ -- | Data and its environment. data ContextData context dat ContextData :: context -> dat -> ContextData context dat [_context] :: ContextData context dat -> context [_data] :: ContextData context dat -> dat instance GHC.Generics.Generic (Scientific.Workflow.Internal.Builder.Types.ContextData context dat) instance Scientific.Workflow.Internal.Builder.Types.ToExpQ Language.Haskell.TH.Syntax.Name instance Scientific.Workflow.Internal.Builder.Types.ToExpQ Language.Haskell.TH.Lib.ExpQ instance (Data.Aeson.Types.FromJSON.FromJSON c, Data.Aeson.Types.FromJSON.FromJSON d) => Data.Aeson.Types.FromJSON.FromJSON (Scientific.Workflow.Internal.Builder.Types.ContextData c d) instance (Data.Aeson.Types.ToJSON.ToJSON c, Data.Aeson.Types.ToJSON.ToJSON d) => Data.Aeson.Types.ToJSON.ToJSON (Scientific.Workflow.Internal.Builder.Types.ContextData c d) instance (Data.Serialize.Serialize c, Data.Serialize.Serialize d) => Data.Serialize.Serialize (Scientific.Workflow.Internal.Builder.Types.ContextData c d) instance Language.Haskell.TH.Syntax.Lift Scientific.Workflow.Internal.Builder.Types.Attribute instance Language.Haskell.TH.Syntax.Lift Scientific.Workflow.Internal.Builder.Types.FunctionType instance Language.Haskell.TH.Syntax.Lift Scientific.Workflow.Internal.Builder.Types.ParallelMode instance Language.Haskell.TH.Syntax.Lift Scientific.Workflow.Internal.Builder.Types.FunctionConfig instance GHC.Generics.Generic Scientific.Workflow.Internal.Builder.Types.Attribute instance GHC.Generics.Generic Scientific.Workflow.Internal.Builder.Types.FunctionConfig instance GHC.Generics.Generic Scientific.Workflow.Internal.Builder.Types.FunctionType instance GHC.Generics.Generic Scientific.Workflow.Internal.Builder.Types.ParallelMode instance Data.Serialize.Serialize Scientific.Workflow.Internal.Builder.Types.Attribute instance Data.Serialize.Serialize Scientific.Workflow.Internal.Builder.Types.FunctionConfig instance Data.Serialize.Serialize Scientific.Workflow.Internal.Builder.Types.ParallelMode instance Data.Serialize.Serialize Scientific.Workflow.Internal.Builder.Types.FunctionType module Scientific.Workflow.Types -- | A Workflow is a stateful function data Workflow config Workflow :: Gr PID Int -> Map Text Attribute -> Processor config () () -> Workflow config [_worflow_dag] :: Workflow config -> Gr PID Int [_worflow_pidToAttr] :: Workflow config -> Map Text Attribute [_workflow] :: Workflow config -> Processor config () () -- | The id of a node type PID = Text -- | The result of a computation node data NodeState -- | The node has been executed Success :: NodeState -- | The node failed to finish Fail :: SomeException -> NodeState -- | The node will be executed Scheduled :: NodeState -- | Indicate the workflow is currently running under special mode Special :: SpecialMode -> NodeState data SpecialMode -- | The node will not be executed Skip :: SpecialMode -- | Simply read the saved data from database FetchData :: SpecialMode -- | Read the result from the input file and save it to database. WriteData :: FilePath -> SpecialMode -- | Read input from the input file and save results to the output file. -- This is used in remote mode. EXE :: FilePath -> FilePath -> SpecialMode type ProcState config = ReaderT WorkflowState (ExceptT (PID, SomeException) (WorkflowConfig config)) data WorkflowState WorkflowState :: WorkflowDB -> Map PID (MVar NodeState, Attribute) -> MVar () -> Bool -> Maybe Socket -> WorkflowState [_database] :: WorkflowState -> WorkflowDB [_procStatus] :: WorkflowState -> Map PID (MVar NodeState, Attribute) -- | Concurrency controller [_procParaControl] :: WorkflowState -> MVar () -- | Global remote switch [_remote] :: WorkflowState -> Bool -- | Server for logging [_logServer] :: WorkflowState -> Maybe Socket database :: Lens' WorkflowState WorkflowDB procStatus :: Lens' WorkflowState (Map PID (MVar NodeState, Attribute)) procParaControl :: Lens' WorkflowState (MVar ()) remote :: Lens' WorkflowState Bool logServer :: Lens' WorkflowState (Maybe Socket) type Processor config a b = a -> (ProcState config) b data RunMode -- | Run as the master process Master :: RunMode -- | Run as a slave process Slave :: PID -> FilePath -> FilePath -> RunMode -- | Review the info stored in a node Review :: PID -> RunMode -- | Replace the info stored in a node Replace :: PID -> FilePath -> RunMode -- | Options data RunOpt RunOpt :: FilePath -> Int -> Bool -> RunMode -> [FilePath] -> Maybe [PID] -> Maybe String -> RunOpt [dbFile] :: RunOpt -> FilePath -- | number of concurrent processes [nThread] :: RunOpt -> Int [runOnRemote] :: RunOpt -> Bool [runMode] :: RunOpt -> RunMode [configuration] :: RunOpt -> [FilePath] -- | Should run only selected nodes [selected] :: RunOpt -> Maybe [PID] [logServerAddr] :: RunOpt -> Maybe String defaultRunOpt :: RunOpt -- | Auxiliary type for concurrency support. newtype Parallel config r Parallel :: (ProcState config) r -> Parallel config r [runParallel] :: Parallel config r -> (ProcState config) r type WorkflowConfig config = ReaderT config IO instance (Language.Haskell.TH.Syntax.Lift a, Language.Haskell.TH.Syntax.Lift b) => Language.Haskell.TH.Syntax.Lift (Data.Graph.Inductive.PatriciaTree.Gr a b) instance GHC.Base.Functor (Scientific.Workflow.Types.Parallel config) instance GHC.Base.Applicative (Scientific.Workflow.Types.Parallel config) instance Data.Serialize.Serialize (Data.Graph.Inductive.PatriciaTree.Gr (Scientific.Workflow.Types.PID, Scientific.Workflow.Internal.Builder.Types.Attribute) GHC.Types.Int) module Scientific.Workflow.Visualize -- | Output the computation graph in dot code which can be visualize by -- Graphviz. drawWorkflow :: Gr (PID, Attribute) Int -> Text module Scientific.Workflow.Internal.Builder -- | Declare an IO computational step. node :: ToExpQ fun => PID -> fun -> State Attribute () -> Builder () -- | Declare a pure computational step. node' :: ToExpQ fun => PID -> fun -> State Attribute () -> Builder () -- | Declare a stateful computational step. nodeS :: ToExpQ fun => PID -> fun -> State Attribute () -> Builder () -- | Declare an IO and parallel computational step. This will turn -- functions with type "a -> IO b" into functions with type -- "[a] -> IO [b]". And [a] will be processed in -- parallel with provided batch size. Note: Currently, parallelism is -- available only when "--remote" flag is on. nodeP :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () -- | Same as nodeP but work with pure functions. nodeP' :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () -- | Same as nodeP but work with stateful functions. nodePS :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () -- | Similar to nodeP but work with inputs that are -- associated with a shared context. Turn ContextData context -- a -> IO b into ContextData context [a] -> -- IO [b]. nodeSharedP :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () nodeSharedP' :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () nodeSharedPS :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () -- | Declare the dependency between nodes. Example: -- --
--   node' "step1" [| \() -> 1 :: Int |] $ return ()
--   node' "step2" [| \() -> 2 :: Int |] $ return ()
--   node' "step3" [| \(x, y) -> x * y |] $ return ()
--   link ["step1", "step2"] "step3"
--   
link :: [PID] -> PID -> Builder () -- | (~>) = link. (~>) :: [PID] -> PID -> Builder () -- | "path [a, b, c]" is equivalent to "link a b -- >> link b c" path :: [PID] -> Builder () -- | Add a prefix to IDs of nodes for a given builder, i.e., id -- becomes prefix_id. namespace :: Text -> Builder () -> Builder () -- | Build the workflow. This function will first create functions defined -- in the builder. These pieces will then be assembled to form a function -- that will execute each individual function in a correct order, named -- $name$. buildWorkflow :: String -> Builder () -> Q [Dec] -- | Build only a part of the workflow that has not been executed. This is -- used during development for fast compliation. buildWorkflowPart :: FilePath -> String -> Builder () -> Q [Dec] -- | Contruct a DAG representing the workflow mkDAG :: Builder () -> DAG mkProc :: (DBData a, DBData b, ToJSON config) => PID -> (a -> (ProcState config) b) -> (Processor config a b) module Scientific.Workflow.Main defaultMain :: Builder () -> Q [Dec] defaultMainOpts :: MainOpts mainWith :: MainOpts -> Builder () -> Q [Dec] data MainOpts MainOpts :: Name -> String -> MainOpts -- | An action to be execute before the workflow. The action should have -- type: IO () -> IO (). e.g., some -- initialization processes. [preAction] :: MainOpts -> Name [programHeader] :: MainOpts -> String runWorkflow :: (Default config, FromJSON config) => Workflow config -> RunOpt -> IO () instance Language.Haskell.TH.Syntax.Lift Scientific.Workflow.Main.MainOpts -- | SciFlow is a DSL for building scientific workflows. Workflows built -- with SciFlow can be run either on desktop computers or in grid -- computing environments that support DRMAA. -- -- Features: -- --
    --
  1. Easy to use and safe: Provide a simple and flexible way to design -- type safe computational pipelines in Haskell.
  2. --
  3. Automatic Checkpointing: The states of intermediate steps are -- automatically logged, allowing easy restart upon failures.
  4. --
  5. Parallelism and grid computing support.
  6. --
-- -- Example: -- --
--   import           Control.Lens             ((.=))
--   import           Scientific.Workflow
--   
--   f :: Int -> Int
--   f = (+1)
--   
--   defaultMain $ do
--       nodeS "step0" [| return . const [1..10] :: () -> WorkflowConfig () [Int] |] $ return ()
--       nodeP' 2 "step1" 'f $ note .= "run in parallel with batch size 2"
--       nodeP' 4 "step2" 'f $ note .= "run in parallel with batch size 4"
--       node' "step3" [| \(x, y) -> x ++ y |] $ return ()
--   
--       ["step0"] ~> "step1"
--       ["step0"] ~> "step2"
--       ["step1", "step2"] ~> "step3"
--   
module Scientific.Workflow defaultMain :: Builder () -> Q [Dec] mainWith :: MainOpts -> Builder () -> Q [Dec] defaultMainOpts :: MainOpts data MainOpts MainOpts :: Name -> String -> MainOpts -- | An action to be execute before the workflow. The action should have -- type: IO () -> IO (). e.g., some -- initialization processes. [preAction] :: MainOpts -> Name [programHeader] :: MainOpts -> String type Builder = State ([Node], [Edge]) -- | Add a prefix to IDs of nodes for a given builder, i.e., id -- becomes prefix_id. namespace :: Text -> Builder () -> Builder () -- | Declare an IO computational step. node :: ToExpQ fun => PID -> fun -> State Attribute () -> Builder () -- | Declare a pure computational step. node' :: ToExpQ fun => PID -> fun -> State Attribute () -> Builder () -- | Declare a stateful computational step. nodeS :: ToExpQ fun => PID -> fun -> State Attribute () -> Builder () -- | Declare an IO and parallel computational step. This will turn -- functions with type "a -> IO b" into functions with type -- "[a] -> IO [b]". And [a] will be processed in -- parallel with provided batch size. Note: Currently, parallelism is -- available only when "--remote" flag is on. nodeP :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () -- | Same as nodeP but work with pure functions. nodeP' :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () -- | Same as nodeP but work with stateful functions. nodePS :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () -- | Similar to nodeP but work with inputs that are -- associated with a shared context. Turn ContextData context -- a -> IO b into ContextData context [a] -> -- IO [b]. nodeSharedP :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () nodeSharedP' :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () nodeSharedPS :: ToExpQ fun => Int -> PID -> fun -> State Attribute () -> Builder () -- | Declare the dependency between nodes. Example: -- --
--   node' "step1" [| \() -> 1 :: Int |] $ return ()
--   node' "step2" [| \() -> 2 :: Int |] $ return ()
--   node' "step3" [| \(x, y) -> x * y |] $ return ()
--   link ["step1", "step2"] "step3"
--   
link :: [PID] -> PID -> Builder () -- | (~>) = link. (~>) :: [PID] -> PID -> Builder () -- | "path [a, b, c]" is equivalent to "link a b -- >> link b c" path :: [PID] -> Builder () label :: Lens' Attribute Text note :: Lens' Attribute Text submitToRemote :: Lens' Attribute (Maybe Bool) remoteParam :: Lens' Attribute String -- | Data and its environment. data ContextData context dat ContextData :: context -> dat -> ContextData context dat [_context] :: ContextData context dat -> context [_data] :: ContextData context dat -> dat type WorkflowConfig config = ReaderT config IO