-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Distributed processing of changing tasks -- -- A framework for distributing tasks running on HDFS data using Cloud -- Haskell. The goal is speedup through distribution on clusters using -- regular hardware. This framework provides different, simple -- workarounds to transport new code to other cluster nodes. See project -- home / README.md for more information. @package task-distribution @version 0.1.0.2 module Control.Distributed.Task.Distribution.LogConfiguration -- | Sets up hslogger. initLogging :: Priority -> Priority -> FilePath -> IO () -- | Sets up hslogger with a part logfile, part stdout configuration. initDefaultLogging :: String -> IO () module Control.Distributed.Task.Types.HdfsConfigTypes type HdfsConfig = (String, Int) type HdfsPath = String type HdfsLocation = (HdfsConfig, HdfsPath) module Control.Distributed.Task.Types.TaskTypes type TaskInput = [ByteString] type TaskResult = [ByteString] type Task = TaskInput -> TaskResult -- | Contains all node communication (using Cloud Haskell). This includes -- distribution logic. module Control.Distributed.Task.Distribution.TaskDistribution -- | Start a slave listening on given hostname, port. startSlaveNode :: NodeConfig -> IO () -- | Run a calculation on all accessible slaves. This is a low-level -- method, look at the RunComputaiton module for a nicer interface. executeDistributed :: NodeConfig -> TaskDef -> [DataDef] -> ResultDef -> ([TaskResult] -> IO ()) -> IO () -- | List all accessible slaves. showSlaveNodes :: NodeConfig -> IO () -- | List all accessible slaves that have at least a single block of the -- specified path stored physically. showSlaveNodesWithData :: NodeConfig -> String -> IO () -- | Convenience method to stop all accessible slaves remotely. shutdownSlaveNodes :: NodeConfig -> IO () -- | Defines a higher level interface to running calculations. Resolves -- HDFS input paths. | module Control.Distributed.Task.Distribution.RunComputation -- | The definition of a distributed calculation. data MasterOptions MasterOptions :: String -> Int -> TaskSpec -> DataSpec -> ResultSpec -> MasterOptions -- | the master hostname _host :: MasterOptions -> String -- | the master port _port :: MasterOptions -> Int -- | the task logic _taskSpec :: MasterOptions -> TaskSpec -- | which data to process _dataSpecs :: MasterOptions -> DataSpec -- | how to process the result _resultSpec :: MasterOptions -> ResultSpec -- | Task logic definition, most modes expect task mode support, see -- RemoteExecutionSupport. data TaskSpec -- | build the given string as module remotely (restrictions apply) SourceCodeSpec :: String -> TaskSpec -- | run this binary as task FullBinaryDeployment :: TaskSpec -- | serialize the given function in the context of the given program, run -- both as task (restrictions apply) SerializedThunk :: (TaskInput -> TaskResult) -> TaskSpec -- | only transport some of the generated object code and relink remotely -- (restrictions apply) - the function here is ignored, it only forces -- the compilation of the contained module ObjectCodeModuleDeployment :: (TaskInput -> TaskResult) -> TaskSpec -- | definition of input data data DataSpec -- | simple test data, the path is configured, amount of files can be -- limited SimpleDataSpec :: Int -> DataSpec -- | use given HDFS as starting directory, descend a number of directories -- from there and take all files starting with the filter prefix (if any -- given) HdfsDataSpec :: HdfsPath -> Int -> (Maybe String) -> DataSpec -- | what to do with the result data ResultSpec -- | process all results with the given method CollectOnMaster :: ([TaskResult] -> IO ()) -> ResultSpec -- | store the results in HDFS, in the given directory(1), with the given -- suffix (2), based on the input path. StoreInHdfs :: String -> String -> ResultSpec -- | do nothing, for testing purposes only Discard :: ResultSpec -- | Run a computation. runMaster :: MasterOptions -> IO () -- | Catches expected entry points for full binary deployment / thunk -- serialization. These modes deploy the itself as a program and are -- called remote with different arguments, which is handled here. | module Control.Distributed.Task.TaskSpawning.RemoteExecutionSupport -- | Combines all defined task mode hooks. withRemoteExecutionSupport :: (TaskInput -> TaskResult) -> IO () -> IO () -- | Provides support for fullbinary task mode. withFullBinaryRemoteExecutionSupport :: (TaskInput -> TaskResult) -> IO () -> IO () -- | Provides support for serialized thunk task mode. withSerializedThunkRemoteExecutionSupport :: IO () -> IO ()