Holumbus-MapReduce-0.0.1: a distributed MapReduce frameworkSource codeContentsIndex
Holumbus.MapReduce.Types
Portabilityportable
Stabilityexperimental
MaintainerStefan Schmidt (stefanschmidt@web.de)
Contents
TaskData
JobData
TaskAction
MapAction
Combine-Reduce-Action
Description
Version : 0.1
Synopsis
data FunctionData
= TupleFunctionData ByteString
| FileFunctionData FileId
type TaskId = Integer
data TaskType
= TTSplit
| TTMap
| TTCombine
| TTReduce
| TTError
data TaskState
= TSIdle
| TSSending
| TSInProgress
| TSCompleted
| TSFinished
| TSError
getNextTaskState :: TaskState -> TaskState
data TaskOutputType
= TOTRawTuple
| TOTFile
data TaskData = TaskData {
td_JobId :: !JobId
td_TaskId :: !TaskId
td_Type :: !TaskType
td_State :: !TaskState
td_Option :: !ByteString
td_PartValue :: !(Maybe Int)
td_Input :: !(Int, [FunctionData])
td_Output :: ![(Int, [FunctionData])]
td_OutputType :: !TaskOutputType
td_Action :: !ActionName
}
type JobId = Integer
data JobState
= JSPlanned
| JSIdle
| JSSplit
| JSMap
| JSCombine
| JSReduce
| JSCompleted
| JSFinished
| JSError
getNextJobState :: JobState -> JobState
getPrevJobState :: JobState -> JobState
fromJobStatetoTaskType :: JobState -> Maybe TaskType
type OutputMap = Map JobState (AccuMap Int FunctionData)
data JobAction = JobAction {
ja_Name :: !ActionName
ja_Output :: !TaskOutputType
ja_Count :: !Int
}
data JobInfo = JobInfo {
ji_Description :: !String
ji_Option :: !ByteString
ji_SplitAction :: !(Maybe JobAction)
ji_MapAction :: !(Maybe JobAction)
ji_CombineAction :: !(Maybe JobAction)
ji_ReduceAction :: !(Maybe JobAction)
ji_NumOfResults :: !(Maybe Int)
ji_Input :: ![FunctionData]
}
data JobData = JobData {
jd_JobId :: JobId
jd_State :: JobState
jd_OutputMap :: OutputMap
jd_Info :: JobInfo
jd_startTime :: UTCTime
jd_endTime :: UTCTime
jd_Result :: JobResultContainer
}
data JobResultContainer = JobResultContainer (MVar JobResult)
data JobResult = JobResult {
jr_Output :: [FunctionData]
}
type ActionName = String
type ActionInfo = String
data ActionEnvironment = ActionEnvironment {
ae_TaskData :: TaskData
ae_FileSystem :: FileSystem
}
mkActionEnvironment :: TaskData -> FileSystem -> ActionEnvironment
type InputReader k1 v1 = ByteString -> IO [(k1, v1)]
type OutputWriter k2 v2 = [(k2, v2)] -> IO ByteString
type OptionsDecoder a = ByteString -> a
data ActionConfiguration a k1 v1 k2 v2 v3 v4 = ActionConfiguration {
ac_Name :: ActionName
ac_Info :: ActionInfo
ac_OptEncoder :: OptionsEncoder a
ac_OptDecoder :: OptionsDecoder a
ac_InputEncoder :: InputEncoder k1 v1
ac_OutputDecoder :: OutputDecoder k2 v4
ac_Split :: Maybe (SplitConfiguration a k1 v1)
ac_Map :: Maybe (MapConfiguration a k1 v1 k2 v2)
ac_Combine :: Maybe (ReduceConfiguration a k2 v2 v3)
ac_Reduce :: Maybe (ReduceConfiguration a k2 v3 v4)
}
data SplitConfiguration a k1 v1 = SplitConfiguration {
sc_Function :: SplitFunction a k1 v1
sc_Reader :: InputReader k1 v1
sc_Writer :: OutputWriter k1 v1
}
data MapConfiguration a k1 v1 k2 v2 = MapConfiguration {
mc_Function :: MapFunction a k1 v1 k2 v2
mc_Partition :: MapPartition a k2 v2
mc_Reader :: InputReader k1 v1
mc_Writer :: OutputWriter k2 v2
}
data ReduceConfiguration a k2 v2 v3 = ReduceConfiguration {
rc_Merge :: ReduceMerge a k2 v2
rc_Function :: ReduceFunction a k2 v2 v3
rc_Partition :: ReducePartition a k2 v3
rc_Reader :: InputReader k2 v2
rc_Writer :: OutputWriter k2 v3
}
defaultActionConfiguration :: (Binary a, Binary k1, Binary v1, Binary k2, Binary v4) => ActionName -> ActionConfiguration a k1 v1 k2 v2 v3 v4
defaultSplitConfiguration :: (Binary a, Binary k1, Binary v1) => SplitConfiguration a k1 v1
defaultMapConfiguration :: (Ord k2, Binary a, Binary k1, Binary v1, Binary k2, Binary v2) => MapFunction a k1 v1 k2 v2 -> MapConfiguration a k1 v1 k2 v2
defaultReduceConfiguration :: (Ord k2, Binary a, Binary k2, Binary v2, Binary v3) => ReduceFunction a k2 v2 v3 -> ReduceConfiguration a k2 v2 v3
readActionConfiguration :: (Ord k2, Binary a, Binary k1, Binary v1, Binary k2, Binary v2, Binary v3, Binary v4) => ActionConfiguration a k1 v1 k2 v2 v3 v4 -> ActionData
createJobInfoFromConfiguration :: ActionConfiguration a k1 v1 k2 v2 v3 v4 -> a -> [(k1, v1)] -> [FileId] -> Int -> Int -> Int -> Int -> TaskOutputType -> JobInfo
createListsFromJobResult :: ActionConfiguration a k1 v1 k2 v2 v3 v4 -> JobResult -> ([(k2, v4)], [FileId])
data ActionData = ActionData {
ad_Name :: ActionName
ad_Info :: ActionInfo
ad_Split :: Maybe BinarySplitAction
ad_Map :: Maybe BinaryMapAction
ad_Combine :: Maybe BinaryReduceAction
ad_Reduce :: Maybe BinaryReduceAction
}
getActionForTaskType :: TaskType -> ActionData -> Maybe BinaryReduceAction
type ActionMap = KeyMap ActionData
type MapAction a k1 v1 k2 v2 = ActionEnvironment -> a -> Int -> [(k1, v1)] -> IO [(Int, [(k2, v2)])]
type MapFunction a k1 v1 k2 v2 = ActionEnvironment -> a -> k1 -> v1 -> IO [(k2, v2)]
type MapPartition a k2 v2 = ActionEnvironment -> a -> Int -> [(k2, v2)] -> IO [(Int, [(k2, v2)])]
type ReduceAction a k2 v2 v3 = ActionEnvironment -> a -> Int -> [(k2, v2)] -> IO [(Int, [(k2, v3)])]
type ReduceMerge a k2 v2 = ActionEnvironment -> a -> [(k2, v2)] -> IO [(k2, [v2])]
type ReduceFunction a k2 v2 v3 = ActionEnvironment -> a -> k2 -> [v2] -> IO (Maybe v3)
type ReducePartition a k2 v3 = ActionEnvironment -> a -> Int -> [(k2, v3)] -> IO [(Int, [(k2, v3)])]
Documentation
data FunctionData Source
Constructors
TupleFunctionData ByteString
FileFunctionData FileId
show/hide Instances
TaskData
type TaskId = IntegerSource
the task id (should be unique in the system)
data TaskType Source
which type (map, combine, reduce)
Constructors
TTSplit
TTMap
TTCombine
TTReduce
TTError
show/hide Instances
data TaskState Source
the task state
Constructors
TSIdle
TSSending
TSInProgress
TSCompleted
TSFinished
TSError
show/hide Instances
getNextTaskState :: TaskState -> TaskStateSource
data TaskOutputType Source
Constructors
TOTRawTuple
TOTFile
show/hide Instances
data TaskData Source
the TaskData, contains all information to do the task
Constructors
TaskData
td_JobId :: !JobId
td_TaskId :: !TaskId
td_Type :: !TaskType
td_State :: !TaskState
td_Option :: !ByteString
td_PartValue :: !(Maybe Int)
td_Input :: !(Int, [FunctionData])
td_Output :: ![(Int, [FunctionData])]
td_OutputType :: !TaskOutputType
td_Action :: !ActionName
show/hide Instances
JobData
type JobId = IntegerSource
the job id (should be unique in the system)
data JobState Source
the job state
Constructors
JSPlanned
JSIdle
JSSplit
JSMap
JSCombine
JSReduce
JSCompleted
JSFinished
JSError
show/hide Instances
getNextJobState :: JobState -> JobStateSource
getPrevJobState :: JobState -> JobStateSource
fromJobStatetoTaskType :: JobState -> Maybe TaskTypeSource
type OutputMap = Map JobState (AccuMap Int FunctionData)Source
data JobAction Source
Constructors
JobAction
ja_Name :: !ActionName
ja_Output :: !TaskOutputType
ja_Count :: !Int
show/hide Instances
data JobInfo Source
defines a job, this is all data the user has to give to run a job
Constructors
JobInfo
ji_Description :: !String
ji_Option :: !ByteString
ji_SplitAction :: !(Maybe JobAction)
ji_MapAction :: !(Maybe JobAction)
ji_CombineAction :: !(Maybe JobAction)
ji_ReduceAction :: !(Maybe JobAction)
ji_NumOfResults :: !(Maybe Int)
ji_Input :: ![FunctionData]
show/hide Instances
data JobData Source
the job data, include the user-input and some additional control-data
Constructors
JobData
jd_JobId :: JobId
jd_State :: JobState
jd_OutputMap :: OutputMap
jd_Info :: JobInfo
jd_startTime :: UTCTime
jd_endTime :: UTCTime
jd_Result :: JobResultContainer
show/hide Instances
data JobResultContainer Source
Constructors
JobResultContainer (MVar JobResult)
show/hide Instances
data JobResult Source
the result of the job, given by the master
Constructors
JobResult
jr_Output :: [FunctionData]
show/hide Instances
TaskAction
type ActionName = StringSource
type ActionInfo = StringSource
data ActionEnvironment Source
the ActionEnvironment contains all data that might be needed during an action. So far, it only keeps the current task data and a reference to the global filesystem and the options. This is a good place to implement counters for the map-reduce-system or other stuff...
Constructors
ActionEnvironment
ae_TaskData :: TaskData
ae_FileSystem :: FileSystem
mkActionEnvironment :: TaskData -> FileSystem -> ActionEnvironmentSource
type InputReader k1 v1 = ByteString -> IO [(k1, v1)]Source
type OutputWriter k2 v2 = [(k2, v2)] -> IO ByteStringSource
type OptionsDecoder a = ByteString -> aSource
data ActionConfiguration a k1 v1 k2 v2 v3 v4 Source
Constructors
ActionConfiguration
ac_Name :: ActionName
ac_Info :: ActionInfo
ac_OptEncoder :: OptionsEncoder a
ac_OptDecoder :: OptionsDecoder a
ac_InputEncoder :: InputEncoder k1 v1
ac_OutputDecoder :: OutputDecoder k2 v4
ac_Split :: Maybe (SplitConfiguration a k1 v1)
ac_Map :: Maybe (MapConfiguration a k1 v1 k2 v2)
ac_Combine :: Maybe (ReduceConfiguration a k2 v2 v3)
ac_Reduce :: Maybe (ReduceConfiguration a k2 v3 v4)
data SplitConfiguration a k1 v1 Source
Constructors
SplitConfiguration
sc_Function :: SplitFunction a k1 v1
sc_Reader :: InputReader k1 v1
sc_Writer :: OutputWriter k1 v1
data MapConfiguration a k1 v1 k2 v2 Source
Constructors
MapConfiguration
mc_Function :: MapFunction a k1 v1 k2 v2
mc_Partition :: MapPartition a k2 v2
mc_Reader :: InputReader k1 v1
mc_Writer :: OutputWriter k2 v2
data ReduceConfiguration a k2 v2 v3 Source
Constructors
ReduceConfiguration
rc_Merge :: ReduceMerge a k2 v2
rc_Function :: ReduceFunction a k2 v2 v3
rc_Partition :: ReducePartition a k2 v3
rc_Reader :: InputReader k2 v2
rc_Writer :: OutputWriter k2 v3
defaultActionConfiguration :: (Binary a, Binary k1, Binary v1, Binary k2, Binary v4) => ActionName -> ActionConfiguration a k1 v1 k2 v2 v3 v4Source
defaultSplitConfiguration :: (Binary a, Binary k1, Binary v1) => SplitConfiguration a k1 v1Source
defaultMapConfiguration :: (Ord k2, Binary a, Binary k1, Binary v1, Binary k2, Binary v2) => MapFunction a k1 v1 k2 v2 -> MapConfiguration a k1 v1 k2 v2Source
defaultReduceConfiguration :: (Ord k2, Binary a, Binary k2, Binary v2, Binary v3) => ReduceFunction a k2 v2 v3 -> ReduceConfiguration a k2 v2 v3Source
readActionConfiguration :: (Ord k2, Binary a, Binary k1, Binary v1, Binary k2, Binary v2, Binary v3, Binary v4) => ActionConfiguration a k1 v1 k2 v2 v3 v4 -> ActionDataSource
createJobInfoFromConfigurationSource
::
=> ActionConfiguration a k1 v1 k2 v2 v3 v4
-> aoptions
-> [(k1, v1)]input (Tuples)
-> [FileId]input (Files)
-> Intnumber of splitters
-> Intnumber of mappers
-> Intnumber of reducers
-> Intnumber of results
-> TaskOutputTypetype of the result (file of raw)
-> JobInfo
createListsFromJobResult :: ActionConfiguration a k1 v1 k2 v2 v3 v4 -> JobResult -> ([(k2, v4)], [FileId])Source
data ActionData Source
Constructors
ActionData
ad_Name :: ActionName
ad_Info :: ActionInfo
ad_Split :: Maybe BinarySplitAction
ad_Map :: Maybe BinaryMapAction
ad_Combine :: Maybe BinaryReduceAction
ad_Reduce :: Maybe BinaryReduceAction
show/hide Instances
getActionForTaskType :: TaskType -> ActionData -> Maybe BinaryReduceActionSource
type ActionMap = KeyMap ActionDataSource
MapAction
type MapAction a k1 v1 k2 v2 = ActionEnvironment -> a -> Int -> [(k1, v1)] -> IO [(Int, [(k2, v2)])]Source
general MapAction
type MapFunction a k1 v1 k2 v2 = ActionEnvironment -> a -> k1 -> v1 -> IO [(k2, v2)]Source
type MapPartition a k2 v2 = ActionEnvironment -> a -> Int -> [(k2, v2)] -> IO [(Int, [(k2, v2)])]Source
Combine-Reduce-Action
type ReduceAction a k2 v2 v3 = ActionEnvironment -> a -> Int -> [(k2, v2)] -> IO [(Int, [(k2, v3)])]Source
general MapAction
type ReduceMerge a k2 v2 = ActionEnvironment -> a -> [(k2, v2)] -> IO [(k2, [v2])]Source
type ReduceFunction a k2 v2 v3 = ActionEnvironment -> a -> k2 -> [v2] -> IO (Maybe v3)Source
type ReducePartition a k2 v3 = ActionEnvironment -> a -> Int -> [(k2, v3)] -> IO [(Int, [(k2, v3)])]Source
Produced by Haddock version 2.4.2