Holumbus-MapReduce-0.1.0: a distributed MapReduce frameworkSource codeContentsIndex
Holumbus.MapReduce.Types
Portabilityportable
Stabilityexperimental
MaintainerStefan Schmidt (stefanschmidt@web.de)
Contents
TaskData
JobData
TaskAction
MapAction
Combine-Reduce-Action
Description
Version : 0.1
Synopsis
class Hash a where
hash :: Int -> a -> Int
hashedPartition :: (Hash k2, Binary k2, Binary v2, NFData k2, NFData v2) => MapPartition a k2 v2
data FunctionData
= TupleFunctionData ByteString
| FileFunctionData FileId
type TaskId = Integer
data TaskType
= TTSplit
| TTMap
| TTCombine
| TTReduce
| TTError
data TaskState
= TSIdle
| TSSending
| TSInProgress
| TSCompleted
| TSFinished
| TSError
getNextTaskState :: TaskState -> TaskState
data TaskOutputType
= TOTRawTuple
| TOTFile
data TaskData = TaskData {
td_JobId :: !JobId
td_TaskId :: !TaskId
td_Type :: !TaskType
td_State :: !TaskState
td_Option :: !ByteString
td_PartValue :: !(Maybe Int)
td_Input :: !(Int, [FunctionData])
td_Output :: ![(Int, [FunctionData])]
td_OutputType :: !TaskOutputType
td_Action :: !ActionName
}
type JobId = Integer
data JobState
= JSPlanned
| JSIdle
| JSSplit
| JSMap
| JSCombine
| JSReduce
| JSCompleted
| JSFinished
| JSError
getNextJobState :: JobState -> JobState
getPrevJobState :: JobState -> JobState
fromJobStatetoTaskType :: JobState -> Maybe TaskType
type OutputMap = Map JobState (AccuMap Int FunctionData)
data JobAction = JobAction {
ja_Name :: !ActionName
ja_Output :: !TaskOutputType
ja_Count :: !Int
}
data JobInfo = JobInfo {
ji_Description :: !String
ji_Option :: !ByteString
ji_SplitAction :: !(Maybe JobAction)
ji_MapAction :: !(Maybe JobAction)
ji_CombineAction :: !(Maybe JobAction)
ji_ReduceAction :: !(Maybe JobAction)
ji_NumOfResults :: !(Maybe Int)
ji_Input :: ![FunctionData]
}
data JobData = JobData {
jd_JobId :: JobId
jd_State :: JobState
jd_OutputMap :: OutputMap
jd_Info :: JobInfo
jd_startTime :: UTCTime
jd_endTime :: UTCTime
jd_Result :: JobResultContainer
}
data JobResultContainer = JobResultContainer (MVar JobResult)
data JobResult = JobResult {
jr_Output :: [FunctionData]
}
type ActionName = String
type ActionInfo = String
data ActionEnvironment = ActionEnvironment {
ae_TaskData :: TaskData
ae_FileSystem :: FileSystem
}
mkActionEnvironment :: TaskData -> FileSystem -> ActionEnvironment
type InputReader k1 v1 = ByteString -> IO [(k1, v1)]
type OutputWriter k2 v2 = [(k2, v2)] -> IO ByteString
type OptionsDecoder a = ByteString -> a
defaultInputReader :: (NFData v1, NFData k1, Binary k1, Binary v1) => InputReader k1 v1
defaultOutputWriter :: (NFData v2, NFData k2, Binary k2, Binary v2) => OutputWriter k2 v2
defaultSplit :: (NFData k1, NFData v1) => SplitFunction a k1 v1
defaultPartition :: (Binary k2, Binary v2) => MapPartition a k2 v2
defaultMerge :: (Ord k2, Binary k2, Binary v2) => ReduceMerge a k2 v2
readConnector :: (NFData k1, NFData v1, Binary k1, Binary v1) => InputReader k1 v1 -> ActionEnvironment -> [FunctionData] -> IO [(k1, v1)]
writeConnector :: (Binary k2, Binary v2) => OutputWriter k2 v2 -> ActionEnvironment -> [(Int, [(k2, v2)])] -> IO [(Int, [FunctionData])]
data ActionConfiguration a k1 v1 k2 v2 v3 v4 = ActionConfiguration {
ac_Name :: ActionName
ac_Info :: ActionInfo
ac_OptEncoder :: OptionsEncoder a
ac_OptDecoder :: OptionsDecoder a
ac_InputEncoder :: InputEncoder k1 v1
ac_OutputDecoder :: OutputDecoder k2 v4
ac_Split :: Maybe (SplitConfiguration a k1 v1)
ac_Map :: Maybe (MapConfiguration a k1 v1 k2 v2)
ac_Combine :: Maybe (ReduceConfiguration a k2 v2 v3)
ac_Reduce :: Maybe (ReduceConfiguration a k2 v3 v4)
}
data SplitConfiguration a k1 v1 = SplitConfiguration {
sc_Function :: SplitFunction a k1 v1
sc_Reader :: InputReader k1 v1
sc_Writer :: OutputWriter k1 v1
}
data MapConfiguration a k1 v1 k2 v2 = MapConfiguration {
mc_Function :: MapFunction a k1 v1 k2 v2
mc_Partition :: MapPartition a k2 v2
mc_Reader :: InputReader k1 v1
mc_Writer :: OutputWriter k2 v2
}
data ReduceConfiguration a k2 v3 v4 = ReduceConfiguration {
rc_Merge :: ReduceMerge a k2 v3
rc_Function :: ReduceFunction a k2 v3 v4
rc_Partition :: ReducePartition a k2 v4
rc_Reader :: InputReader k2 v3
rc_Writer :: OutputWriter k2 v4
}
defaultActionConfiguration :: (NFData v1, NFData k1, Binary a, Binary k1, Binary v1, Binary k2, Binary v4) => ActionName -> ActionConfiguration a k1 v1 k2 v2 v3 v4
defaultSplitConfiguration :: (NFData v1, NFData k1, Binary a, Binary k1, Binary v1) => SplitConfiguration a k1 v1
defaultMapConfiguration :: (NFData v1, NFData k1, NFData v2, NFData k2, Ord k2, Binary a, Binary k1, Binary v1, Binary k2, Binary v2) => MapFunction a k1 v1 k2 v2 -> MapConfiguration a k1 v1 k2 v2
defaultReduceConfiguration :: (NFData v2, NFData k2, NFData v3, Ord k2, Binary a, Binary k2, Binary v2, Binary v3) => ReduceFunction a k2 v2 v3 -> ReduceConfiguration a k2 v2 v3
readActionConfiguration :: (Ord k2, Binary a, Show k1, Show v1, Show k2, Show v2, Show v3, Show v4, NFData k1, NFData v1, NFData k2, NFData v2, NFData v3, Binary k1, Binary v1, Binary k2, Binary v2, Binary v3, Binary v4) => ActionConfiguration a k1 v1 k2 v2 v3 v4 -> ActionData
createJobInfoFromConfiguration :: ActionConfiguration a k1 v1 k2 v2 v3 v4 -> a -> [(k1, v1)] -> [FileId] -> Int -> Int -> Int -> Int -> TaskOutputType -> JobInfo
createListsFromJobResult :: ActionConfiguration a k1 v1 k2 v2 v3 v4 -> JobResult -> ([(k2, v4)], [FileId])
data ActionData = ActionData {
ad_Name :: ActionName
ad_Info :: ActionInfo
ad_Split :: Maybe BinarySplitAction
ad_Map :: Maybe BinaryMapAction
ad_Combine :: Maybe BinaryReduceAction
ad_Reduce :: Maybe BinaryReduceAction
}
getActionForTaskType :: TaskType -> ActionData -> Maybe BinaryReduceAction
type ActionMap = KeyMap ActionData
type MapAction a k1 v1 k2 v2 = ActionEnvironment -> a -> Int -> [(k1, v1)] -> IO [(Int, [(k2, v2)])]
type MapFunction a k1 v1 k2 v2 = ActionEnvironment -> a -> k1 -> v1 -> IO [(k2, v2)]
type MapPartition a k2 v2 = ActionEnvironment -> a -> Int -> [(k2, v2)] -> IO [(Int, [(k2, v2)])]
type ReduceAction a k2 v2 v3 = ActionEnvironment -> a -> Int -> [(k2, v2)] -> IO [(Int, [(k2, v3)])]
type ReduceMerge a k2 v2 = ActionEnvironment -> a -> [(k2, v2)] -> IO [(k2, [v2])]
type ReduceFunction a k2 v2 v3 = ActionEnvironment -> a -> k2 -> [v2] -> IO (Maybe v3)
type ReducePartition a k2 v3 = ActionEnvironment -> a -> Int -> [(k2, v3)] -> IO [(Int, [(k2, v3)])]
type SplitFunction a k1 v1 = SplitAction a k1 v1
type SplitAction a k1 v1 = ActionEnvironment -> a -> Int -> [(k1, v1)] -> IO [(Int, [(k1, v1)])]
Documentation
class Hash a whereSource
Methods
hash :: Int -> a -> IntSource
show/hide Instances
hashedPartition :: (Hash k2, Binary k2, Binary v2, NFData k2, NFData v2) => MapPartition a k2 v2Source
data FunctionData Source
Constructors
TupleFunctionData ByteString
FileFunctionData FileId
show/hide Instances
TaskData
type TaskId = IntegerSource
the task id (should be unique in the system)
data TaskType Source
which type (map, combine, reduce)
Constructors
TTSplit
TTMap
TTCombine
TTReduce
TTError
show/hide Instances
data TaskState Source
the task state
Constructors
TSIdle
TSSending
TSInProgress
TSCompleted
TSFinished
TSError
show/hide Instances
getNextTaskState :: TaskState -> TaskStateSource
data TaskOutputType Source
Constructors
TOTRawTuple
TOTFile
show/hide Instances
data TaskData Source
the TaskData, contains all information to do the task
Constructors
TaskData
td_JobId :: !JobId
td_TaskId :: !TaskId
td_Type :: !TaskType
td_State :: !TaskState
td_Option :: !ByteString
td_PartValue :: !(Maybe Int)
td_Input :: !(Int, [FunctionData])
td_Output :: ![(Int, [FunctionData])]
td_OutputType :: !TaskOutputType
td_Action :: !ActionName
show/hide Instances
JobData
type JobId = IntegerSource
the job id (should be unique in the system)
data JobState Source
the job state
Constructors
JSPlanned
JSIdle
JSSplit
JSMap
JSCombine
JSReduce
JSCompleted
JSFinished
JSError
show/hide Instances
getNextJobState :: JobState -> JobStateSource
getPrevJobState :: JobState -> JobStateSource
fromJobStatetoTaskType :: JobState -> Maybe TaskTypeSource
type OutputMap = Map JobState (AccuMap Int FunctionData)Source
data JobAction Source
Constructors
JobAction
ja_Name :: !ActionName
ja_Output :: !TaskOutputType
ja_Count :: !Int
show/hide Instances
data JobInfo Source
defines a job, this is all data the user has to give to run a job
Constructors
JobInfo
ji_Description :: !String
ji_Option :: !ByteString
ji_SplitAction :: !(Maybe JobAction)
ji_MapAction :: !(Maybe JobAction)
ji_CombineAction :: !(Maybe JobAction)
ji_ReduceAction :: !(Maybe JobAction)
ji_NumOfResults :: !(Maybe Int)
ji_Input :: ![FunctionData]
show/hide Instances
data JobData Source
the job data, include the user-input and some additional control-data
Constructors
JobData
jd_JobId :: JobId
jd_State :: JobState
jd_OutputMap :: OutputMap
jd_Info :: JobInfo
jd_startTime :: UTCTime
jd_endTime :: UTCTime
jd_Result :: JobResultContainer
show/hide Instances
data JobResultContainer Source
Constructors
JobResultContainer (MVar JobResult)
show/hide Instances
data JobResult Source
the result of the job, given by the master
Constructors
JobResult
jr_Output :: [FunctionData]
show/hide Instances
TaskAction
type ActionName = StringSource
type ActionInfo = StringSource
data ActionEnvironment Source
the ActionEnvironment contains all data that might be needed during an action. So far, it only keeps the current task data and a reference to the global filesystem and the options. This is a good place to implement counters for the map-reduce-system or other stuff...
Constructors
ActionEnvironment
ae_TaskData :: TaskData
ae_FileSystem :: FileSystem
mkActionEnvironment :: TaskData -> FileSystem -> ActionEnvironmentSource
type InputReader k1 v1 = ByteString -> IO [(k1, v1)]Source
type OutputWriter k2 v2 = [(k2, v2)] -> IO ByteStringSource
type OptionsDecoder a = ByteString -> aSource
defaultInputReader :: (NFData v1, NFData k1, Binary k1, Binary v1) => InputReader k1 v1Source
defaultOutputWriter :: (NFData v2, NFData k2, Binary k2, Binary v2) => OutputWriter k2 v2Source
defaultSplit :: (NFData k1, NFData v1) => SplitFunction a k1 v1Source
defaultPartition :: (Binary k2, Binary v2) => MapPartition a k2 v2Source
defaultMerge :: (Ord k2, Binary k2, Binary v2) => ReduceMerge a k2 v2Source
readConnector :: (NFData k1, NFData v1, Binary k1, Binary v1) => InputReader k1 v1 -> ActionEnvironment -> [FunctionData] -> IO [(k1, v1)]Source
writeConnector :: (Binary k2, Binary v2) => OutputWriter k2 v2 -> ActionEnvironment -> [(Int, [(k2, v2)])] -> IO [(Int, [FunctionData])]Source
data ActionConfiguration a k1 v1 k2 v2 v3 v4 Source
Constructors
ActionConfiguration
ac_Name :: ActionName
ac_Info :: ActionInfo
ac_OptEncoder :: OptionsEncoder a
ac_OptDecoder :: OptionsDecoder a
ac_InputEncoder :: InputEncoder k1 v1
ac_OutputDecoder :: OutputDecoder k2 v4
ac_Split :: Maybe (SplitConfiguration a k1 v1)
ac_Map :: Maybe (MapConfiguration a k1 v1 k2 v2)
ac_Combine :: Maybe (ReduceConfiguration a k2 v2 v3)
ac_Reduce :: Maybe (ReduceConfiguration a k2 v3 v4)
data SplitConfiguration a k1 v1 Source
Constructors
SplitConfiguration
sc_Function :: SplitFunction a k1 v1
sc_Reader :: InputReader k1 v1
sc_Writer :: OutputWriter k1 v1
data MapConfiguration a k1 v1 k2 v2 Source
Constructors
MapConfiguration
mc_Function :: MapFunction a k1 v1 k2 v2
mc_Partition :: MapPartition a k2 v2
mc_Reader :: InputReader k1 v1
mc_Writer :: OutputWriter k2 v2
data ReduceConfiguration a k2 v3 v4 Source
Constructors
ReduceConfiguration
rc_Merge :: ReduceMerge a k2 v3
rc_Function :: ReduceFunction a k2 v3 v4
rc_Partition :: ReducePartition a k2 v4
rc_Reader :: InputReader k2 v3
rc_Writer :: OutputWriter k2 v4
defaultActionConfiguration :: (NFData v1, NFData k1, Binary a, Binary k1, Binary v1, Binary k2, Binary v4) => ActionName -> ActionConfiguration a k1 v1 k2 v2 v3 v4Source
defaultSplitConfiguration :: (NFData v1, NFData k1, Binary a, Binary k1, Binary v1) => SplitConfiguration a k1 v1Source
defaultMapConfiguration :: (NFData v1, NFData k1, NFData v2, NFData k2, Ord k2, Binary a, Binary k1, Binary v1, Binary k2, Binary v2) => MapFunction a k1 v1 k2 v2 -> MapConfiguration a k1 v1 k2 v2Source
defaultReduceConfiguration :: (NFData v2, NFData k2, NFData v3, Ord k2, Binary a, Binary k2, Binary v2, Binary v3) => ReduceFunction a k2 v2 v3 -> ReduceConfiguration a k2 v2 v3Source
readActionConfiguration :: (Ord k2, Binary a, Show k1, Show v1, Show k2, Show v2, Show v3, Show v4, NFData k1, NFData v1, NFData k2, NFData v2, NFData v3, Binary k1, Binary v1, Binary k2, Binary v2, Binary v3, Binary v4) => ActionConfiguration a k1 v1 k2 v2 v3 v4 -> ActionDataSource
createJobInfoFromConfigurationSource
:: ActionConfiguration a k1 v1 k2 v2 v3 v4
-> aoptions
-> [(k1, v1)]input (Tuples)
-> [FileId]input (Files)
-> Intnumber of splitters
-> Intnumber of mappers
-> Intnumber of reducers
-> Intnumber of results
-> TaskOutputTypetype of the result (file of raw)
-> JobInfo
createListsFromJobResult :: ActionConfiguration a k1 v1 k2 v2 v3 v4 -> JobResult -> ([(k2, v4)], [FileId])Source
data ActionData Source
Constructors
ActionData
ad_Name :: ActionName
ad_Info :: ActionInfo
ad_Split :: Maybe BinarySplitAction
ad_Map :: Maybe BinaryMapAction
ad_Combine :: Maybe BinaryReduceAction
ad_Reduce :: Maybe BinaryReduceAction
show/hide Instances
getActionForTaskType :: TaskType -> ActionData -> Maybe BinaryReduceActionSource
type ActionMap = KeyMap ActionDataSource
MapAction
type MapAction a k1 v1 k2 v2 = ActionEnvironment -> a -> Int -> [(k1, v1)] -> IO [(Int, [(k2, v2)])]Source
general MapAction
type MapFunction a k1 v1 k2 v2 = ActionEnvironment -> a -> k1 -> v1 -> IO [(k2, v2)]Source
type MapPartition a k2 v2 = ActionEnvironment -> a -> Int -> [(k2, v2)] -> IO [(Int, [(k2, v2)])]Source
Combine-Reduce-Action
type ReduceAction a k2 v2 v3 = ActionEnvironment -> a -> Int -> [(k2, v2)] -> IO [(Int, [(k2, v3)])]Source
general MapAction
type ReduceMerge a k2 v2 = ActionEnvironment -> a -> [(k2, v2)] -> IO [(k2, [v2])]Source
type ReduceFunction a k2 v2 v3 = ActionEnvironment -> a -> k2 -> [v2] -> IO (Maybe v3)Source
type ReducePartition a k2 v3 = ActionEnvironment -> a -> Int -> [(k2, v3)] -> IO [(Int, [(k2, v3)])]Source
type SplitFunction a k1 v1 = SplitAction a k1 v1Source
type SplitAction a k1 v1 = ActionEnvironment -> a -> Int -> [(k1, v1)] -> IO [(Int, [(k1, v1)])]Source
general SplitAction
Produced by Haddock version 2.6.1