Copyright	(c) Adam Conner-Sax 2019
License	BSD-3-Clause
Maintainer	adam_conner_sax@yahoo.com
Stability	experimental
Safe Haskell	Safe
Language	Haskell2010

Control.MapReduce.Core

Contents

Basic Types for map reduce
Foldl helpers
Re-Exports

Description

MapReduce as folds This is all just wrapping around Control.Foldl so that it's easier to see the map-reduce structure The Mapping step is broken into 2 parts:

unpacking, which could include "melting" or filtering,
assigning, which assigns a group to each unpacked item. Could just be choosing a key column(s)

The items are then grouped by key and "reduced"

The reduce step is conceptually simpler, just requiring a function from the (key, grouped data) pair to the result.

Reduce could be as simple as combining the key with a single data row or some very complex function of the grouped data. E.g., reduce could itself be a map-reduce on the grouped data. Since these are folds, we can share work by using the Applicative instance of MapStep (just the Applicative instance of Control.Foldl.Fold) and we will loop over the data only once. The Reduce type is also Applicative so there could be work sharing there as well, especially if you specify your reduce as a Fold. e.g., if your reduce :: (k -> h c -> d) has the form reduce :: k -> FL.Fold c d

We combine these steps with an Engine, resulting in a fold from a container of x to some container of d. The Engine amounts to a choice of grouping algorithm (usually using Data.Map or Data.HashMap) and a choice of result container type. The result container type is used for the intermediate steps as well.

The goal is to make assembling a large family of common map/reduce patterns in a straightforward way. At some level of complication, you may as well write them by hand. An in-between case would be writing the unpack function as a complex hand written filter

Synopsis

data Unpack x y where
- Filter :: (x -> Bool) -> Unpack x x
- Unpack :: Traversable g => (x -> g y) -> Unpack x y
data Assign k y c where
- Assign :: (y -> (k, c)) -> Assign k y c
data Reduce k x d where
- Reduce :: (k -> forall h. (Foldable h, Functor h) => h x -> d) -> Reduce k x d
- ReduceFold :: (k -> Fold x d) -> Reduce k x d
data UnpackM m x y where
- FilterM :: Monad m => (x -> m Bool) -> UnpackM m x x
- UnpackM :: (Monad m, Traversable g) => (x -> m (g y)) -> UnpackM m x y
data AssignM m k y c where
- AssignM :: Monad m => (y -> m (k, c)) -> AssignM m k y c
data ReduceM m k x d where
- ReduceM :: Monad m => (k -> forall h. (Foldable h, Functor h) => h x -> m d) -> ReduceM m k x d
- ReduceFoldM :: Monad m => (k -> FoldM m x d) -> ReduceM m k x d
generalizeUnpack :: Monad m => Unpack x y -> UnpackM m x y
generalizeAssign :: Monad m => Assign k y c -> AssignM m k y c
generalizeReduce :: Monad m => Reduce k x d -> ReduceM m k x d
functionToFold :: (forall h. Foldable h => h x -> a) -> Fold x a
functionToFoldM :: Monad m => (forall h. Foldable h => h x -> m a) -> FoldM m x a
postMapM :: Monad m => (a -> m b) -> FoldM m x a -> FoldM m x b
data Fold a b
data FoldM (m :: Type -> Type) a b
fold :: Foldable f => Fold a b -> f a -> b
foldM :: (Foldable f, Monad m) => FoldM m a b -> f a -> m b

Basic Types for map reduce

Non-Monadic

data Unpack x y where Source #

Unpack is for filtering rows or "melting" them, that is doing something which turns each x into several y. Filter is a special case because it can often be done faster directly than via folding over Maybe

Constructors

Filter :: (x -> Bool) -> Unpack x x
Unpack :: Traversable g => (x -> g y) -> Unpack x y

Instances

Profunctor Unpack Source #
Instance details Defined in Control.MapReduce.Core Methods dimap :: (a -> b) -> (c -> d) -> Unpack b c -> Unpack a d # lmap :: (a -> b) -> Unpack b c -> Unpack a c # rmap :: (b -> c) -> Unpack a b -> Unpack a c # (#.) :: Coercible c b => q b c -> Unpack a b -> Unpack a c # (.#) :: Coercible b a => Unpack b c -> q a b -> Unpack a c #
Functor (Unpack x) Source #
Instance details Defined in Control.MapReduce.Core Methods fmap :: (a -> b) -> Unpack x a -> Unpack x b # (<$) :: a -> Unpack x b -> Unpack x a #

data Assign k y c where Source #

map y into a (k,c) pair for grouping

Constructors

Assign :: (y -> (k, c)) -> Assign k y c

Instances

Profunctor (Assign k) Source #
Instance details Defined in Control.MapReduce.Core Methods dimap :: (a -> b) -> (c -> d) -> Assign k b c -> Assign k a d # lmap :: (a -> b) -> Assign k b c -> Assign k a c # rmap :: (b -> c) -> Assign k a b -> Assign k a c # (#.) :: Coercible c b => q b c -> Assign k a b -> Assign k a c # (.#) :: Coercible b a => Assign k b c -> q a b -> Assign k a c #
Functor (Assign k y) Source #
Instance details Defined in Control.MapReduce.Core Methods fmap :: (a -> b) -> Assign k y a -> Assign k y b # (<$) :: a -> Assign k y b -> Assign k y a #

data Reduce k x d where Source #

Wrapper for functions to reduce keyed and grouped data to the result type. It is *strongly* suggested that you use Folds for this step. Type-inference and applicative optimization are more straightforward that way. The non-Fold contructors are there in order to retro-fit existing functions.

Reduce step for non-effectful reductions

Constructors

Reduce :: (k -> forall h. (Foldable h, Functor h) => h x -> d) -> Reduce k x d
ReduceFold :: (k -> Fold x d) -> Reduce k x d

Instances

Profunctor (Reduce k) Source #
Instance details Defined in Control.MapReduce.Core Methods dimap :: (a -> b) -> (c -> d) -> Reduce k b c -> Reduce k a d # lmap :: (a -> b) -> Reduce k b c -> Reduce k a c # rmap :: (b -> c) -> Reduce k a b -> Reduce k a c # (#.) :: Coercible c b => q b c -> Reduce k a b -> Reduce k a c # (.#) :: Coercible b a => Reduce k b c -> q a b -> Reduce k a c #
Functor (Reduce k x) Source #
Instance details Defined in Control.MapReduce.Core Methods fmap :: (a -> b) -> Reduce k x a -> Reduce k x b # (<$) :: a -> Reduce k x b -> Reduce k x a #
Applicative (Reduce k x) Source #
Instance details Defined in Control.MapReduce.Core Methods pure :: a -> Reduce k x a # (<>) :: Reduce k x (a -> b) -> Reduce k x a -> Reduce k x b # liftA2 :: (a -> b -> c) -> Reduce k x a -> Reduce k x b -> Reduce k x c # (>) :: Reduce k x a -> Reduce k x b -> Reduce k x b # (<*) :: Reduce k x a -> Reduce k x b -> Reduce k x a #

Monadic

data UnpackM m x y where Source #

UnpackM is for filtering or melting the input type. This version has a monadic result type to accomodate unpacking that might require randomness or logging during unpacking. filter is a special case since filtering can be often be done faster. So we single it out.

Constructors

FilterM :: Monad m => (x -> m Bool) -> UnpackM m x x
UnpackM :: (Monad m, Traversable g) => (x -> m (g y)) -> UnpackM m x y

Instances

Profunctor (UnpackM m) Source #
Instance details Defined in Control.MapReduce.Core Methods dimap :: (a -> b) -> (c -> d) -> UnpackM m b c -> UnpackM m a d # lmap :: (a -> b) -> UnpackM m b c -> UnpackM m a c # rmap :: (b -> c) -> UnpackM m a b -> UnpackM m a c # (#.) :: Coercible c b => q b c -> UnpackM m a b -> UnpackM m a c # (.#) :: Coercible b a => UnpackM m b c -> q a b -> UnpackM m a c #
Functor (UnpackM m x) Source #
Instance details Defined in Control.MapReduce.Core Methods fmap :: (a -> b) -> UnpackM m x a -> UnpackM m x b # (<$) :: a -> UnpackM m x b -> UnpackM m x a #

data AssignM m k y c where Source #

Effectfully map y into a (k,c) pair for grouping

Constructors

AssignM :: Monad m => (y -> m (k, c)) -> AssignM m k y c

Instances

Profunctor (AssignM m k) Source #
Instance details Defined in Control.MapReduce.Core Methods dimap :: (a -> b) -> (c -> d) -> AssignM m k b c -> AssignM m k a d # lmap :: (a -> b) -> AssignM m k b c -> AssignM m k a c # rmap :: (b -> c) -> AssignM m k a b -> AssignM m k a c # (#.) :: Coercible c b => q b c -> AssignM m k a b -> AssignM m k a c # (.#) :: Coercible b a => AssignM m k b c -> q a b -> AssignM m k a c #
Functor (AssignM m k y) Source #
Instance details Defined in Control.MapReduce.Core Methods fmap :: (a -> b) -> AssignM m k y a -> AssignM m k y b # (<$) :: a -> AssignM m k y b -> AssignM m k y a #

data ReduceM m k x d where Source #

Reduce step for effectful reductions. It is *strongly* suggested that you use Folds for this step. Type-inference and applicative optimization are more straightforward that way. The non-Fold contructors are there in order to retro-fit existing functions.

Constructors

ReduceM :: Monad m => (k -> forall h. (Foldable h, Functor h) => h x -> m d) -> ReduceM m k x d
ReduceFoldM :: Monad m => (k -> FoldM m x d) -> ReduceM m k x d

Instances

Profunctor (ReduceM m k) Source #
Instance details Defined in Control.MapReduce.Core Methods dimap :: (a -> b) -> (c -> d) -> ReduceM m k b c -> ReduceM m k a d # lmap :: (a -> b) -> ReduceM m k b c -> ReduceM m k a c # rmap :: (b -> c) -> ReduceM m k a b -> ReduceM m k a c # (#.) :: Coercible c b => q b c -> ReduceM m k a b -> ReduceM m k a c # (.#) :: Coercible b a => ReduceM m k b c -> q a b -> ReduceM m k a c #
Functor (ReduceM m k x) Source #
Instance details Defined in Control.MapReduce.Core Methods fmap :: (a -> b) -> ReduceM m k x a -> ReduceM m k x b # (<$) :: a -> ReduceM m k x b -> ReduceM m k x a #
Monad m => Applicative (ReduceM m k x) Source #
Instance details Defined in Control.MapReduce.Core Methods pure :: a -> ReduceM m k x a # (<>) :: ReduceM m k x (a -> b) -> ReduceM m k x a -> ReduceM m k x b # liftA2 :: (a -> b -> c) -> ReduceM m k x a -> ReduceM m k x b -> ReduceM m k x c # (>) :: ReduceM m k x a -> ReduceM m k x b -> ReduceM m k x b # (<*) :: ReduceM m k x a -> ReduceM m k x b -> ReduceM m k x a #

Non-Monadic -> Monadic

generalizeUnpack :: Monad m => Unpack x y -> UnpackM m x y Source #

lift a non-monadic Unpack to a monadic one for any monad m

generalizeAssign :: Monad m => Assign k y c -> AssignM m k y c Source #

lift a non-monadic Assign to a monadic one for any monad m

generalizeReduce :: Monad m => Reduce k x d -> ReduceM m k x d Source #

Make a non-monadic reduce monadic.

Foldl helpers

functionToFold :: (forall h. Foldable h => h x -> a) -> Fold x a Source #

convert a function which takes a foldable container of x and produces an a into a Fold x a. uses Data.Sequence.Seq as an intermediate type because it should behave well whether f is a left or right fold. This can be helpful in putting an existing function into a Reduce.

functionToFoldM :: Monad m => (forall h. Foldable h => h x -> m a) -> FoldM m x a Source #

convert a function which takes a foldable container of x and produces an (m a) into a FoldM m x a. uses Data.Sequence.Seq as an intermediate type because it should behave well whether f is a left or right fold. This can be helpful in putting an existing function into a ReduceM.

postMapM :: Monad m => (a -> m b) -> FoldM m x a -> FoldM m x b Source #

Given an effectful (monadic) Control.Foldl fold, a FoldM m x a, we can use its Functor instance to apply a non-effectful (a -> b) to its result type. To apply (a -> m b), we need this combinator.

Re-Exports

data Fold a b #

Efficient representation of a left fold that preserves the fold's step function, initial accumulator, and extraction function

This allows the Applicative instance to assemble derived folds that traverse the container only once

A 'Fold a b' processes elements of type a and results in a value of type b.

Instances

Choice Fold
Instance details Defined in Control.Foldl Methods left' :: Fold a b -> Fold (Either a c) (Either b c) # right' :: Fold a b -> Fold (Either c a) (Either c b) #
Profunctor Fold
Instance details Defined in Control.Foldl Methods dimap :: (a -> b) -> (c -> d) -> Fold b c -> Fold a d # lmap :: (a -> b) -> Fold b c -> Fold a c # rmap :: (b -> c) -> Fold a b -> Fold a c # (#.) :: Coercible c b => q b c -> Fold a b -> Fold a c # (.#) :: Coercible b a => Fold b c -> q a b -> Fold a c #
Functor (Fold a)
Instance details Defined in Control.Foldl Methods fmap :: (a0 -> b) -> Fold a a0 -> Fold a b # (<$) :: a0 -> Fold a b -> Fold a a0 #
Applicative (Fold a)
Instance details Defined in Control.Foldl Methods pure :: a0 -> Fold a a0 # (<>) :: Fold a (a0 -> b) -> Fold a a0 -> Fold a b # liftA2 :: (a0 -> b -> c) -> Fold a a0 -> Fold a b -> Fold a c # (>) :: Fold a a0 -> Fold a b -> Fold a b # (<*) :: Fold a a0 -> Fold a b -> Fold a a0 #
Comonad (Fold a)
Instance details Defined in Control.Foldl Methods extract :: Fold a a0 -> a0 # duplicate :: Fold a a0 -> Fold a (Fold a a0) # extend :: (Fold a a0 -> b) -> Fold a a0 -> Fold a b #
Semigroupoid Fold
Instance details Defined in Control.Foldl Methods o :: Fold j k1 -> Fold i j -> Fold i k1 #
Floating b => Floating (Fold a b)
Instance details Defined in Control.Foldl Methods pi :: Fold a b # exp :: Fold a b -> Fold a b # log :: Fold a b -> Fold a b # sqrt :: Fold a b -> Fold a b # (**) :: Fold a b -> Fold a b -> Fold a b # logBase :: Fold a b -> Fold a b -> Fold a b # sin :: Fold a b -> Fold a b # cos :: Fold a b -> Fold a b # tan :: Fold a b -> Fold a b # asin :: Fold a b -> Fold a b # acos :: Fold a b -> Fold a b # atan :: Fold a b -> Fold a b # sinh :: Fold a b -> Fold a b # cosh :: Fold a b -> Fold a b # tanh :: Fold a b -> Fold a b # asinh :: Fold a b -> Fold a b # acosh :: Fold a b -> Fold a b # atanh :: Fold a b -> Fold a b # log1p :: Fold a b -> Fold a b # expm1 :: Fold a b -> Fold a b # log1pexp :: Fold a b -> Fold a b # log1mexp :: Fold a b -> Fold a b #
Fractional b => Fractional (Fold a b)
Instance details Defined in Control.Foldl Methods (/) :: Fold a b -> Fold a b -> Fold a b # recip :: Fold a b -> Fold a b # fromRational :: Rational -> Fold a b #
Num b => Num (Fold a b)
Instance details Defined in Control.Foldl Methods (+) :: Fold a b -> Fold a b -> Fold a b # (-) :: Fold a b -> Fold a b -> Fold a b # (*) :: Fold a b -> Fold a b -> Fold a b # negate :: Fold a b -> Fold a b # abs :: Fold a b -> Fold a b # signum :: Fold a b -> Fold a b # fromInteger :: Integer -> Fold a b #
Semigroup b => Semigroup (Fold a b)
Instance details Defined in Control.Foldl Methods (<>) :: Fold a b -> Fold a b -> Fold a b # sconcat :: NonEmpty (Fold a b) -> Fold a b # stimes :: Integral b0 => b0 -> Fold a b -> Fold a b #
Monoid b => Monoid (Fold a b)
Instance details Defined in Control.Foldl Methods mempty :: Fold a b # mappend :: Fold a b -> Fold a b -> Fold a b # mconcat :: [Fold a b] -> Fold a b #

data FoldM (m :: Type -> Type) a b #

Like Fold, but monadic.

A 'FoldM m a b' processes elements of type a and results in a monadic value of type m b.

Instances

Functor m => Profunctor (FoldM m)
Instance details Defined in Control.Foldl Methods dimap :: (a -> b) -> (c -> d) -> FoldM m b c -> FoldM m a d # lmap :: (a -> b) -> FoldM m b c -> FoldM m a c # rmap :: (b -> c) -> FoldM m a b -> FoldM m a c # (#.) :: Coercible c b => q b c -> FoldM m a b -> FoldM m a c # (.#) :: Coercible b a => FoldM m b c -> q a b -> FoldM m a c #
Functor m => Functor (FoldM m a)
Instance details Defined in Control.Foldl Methods fmap :: (a0 -> b) -> FoldM m a a0 -> FoldM m a b # (<$) :: a0 -> FoldM m a b -> FoldM m a a0 #
Applicative m => Applicative (FoldM m a)
Instance details Defined in Control.Foldl Methods pure :: a0 -> FoldM m a a0 # (<>) :: FoldM m a (a0 -> b) -> FoldM m a a0 -> FoldM m a b # liftA2 :: (a0 -> b -> c) -> FoldM m a a0 -> FoldM m a b -> FoldM m a c # (>) :: FoldM m a a0 -> FoldM m a b -> FoldM m a b # (<*) :: FoldM m a a0 -> FoldM m a b -> FoldM m a a0 #
(Monad m, Floating b) => Floating (FoldM m a b)
Instance details Defined in Control.Foldl Methods pi :: FoldM m a b # exp :: FoldM m a b -> FoldM m a b # log :: FoldM m a b -> FoldM m a b # sqrt :: FoldM m a b -> FoldM m a b # (**) :: FoldM m a b -> FoldM m a b -> FoldM m a b # logBase :: FoldM m a b -> FoldM m a b -> FoldM m a b # sin :: FoldM m a b -> FoldM m a b # cos :: FoldM m a b -> FoldM m a b # tan :: FoldM m a b -> FoldM m a b # asin :: FoldM m a b -> FoldM m a b # acos :: FoldM m a b -> FoldM m a b # atan :: FoldM m a b -> FoldM m a b # sinh :: FoldM m a b -> FoldM m a b # cosh :: FoldM m a b -> FoldM m a b # tanh :: FoldM m a b -> FoldM m a b # asinh :: FoldM m a b -> FoldM m a b # acosh :: FoldM m a b -> FoldM m a b # atanh :: FoldM m a b -> FoldM m a b # log1p :: FoldM m a b -> FoldM m a b # expm1 :: FoldM m a b -> FoldM m a b # log1pexp :: FoldM m a b -> FoldM m a b # log1mexp :: FoldM m a b -> FoldM m a b #
(Monad m, Fractional b) => Fractional (FoldM m a b)
Instance details Defined in Control.Foldl Methods (/) :: FoldM m a b -> FoldM m a b -> FoldM m a b # recip :: FoldM m a b -> FoldM m a b # fromRational :: Rational -> FoldM m a b #
(Monad m, Num b) => Num (FoldM m a b)
Instance details Defined in Control.Foldl Methods (+) :: FoldM m a b -> FoldM m a b -> FoldM m a b # (-) :: FoldM m a b -> FoldM m a b -> FoldM m a b # (*) :: FoldM m a b -> FoldM m a b -> FoldM m a b # negate :: FoldM m a b -> FoldM m a b # abs :: FoldM m a b -> FoldM m a b # signum :: FoldM m a b -> FoldM m a b # fromInteger :: Integer -> FoldM m a b #
(Semigroup b, Monad m) => Semigroup (FoldM m a b)
Instance details Defined in Control.Foldl Methods (<>) :: FoldM m a b -> FoldM m a b -> FoldM m a b # sconcat :: NonEmpty (FoldM m a b) -> FoldM m a b # stimes :: Integral b0 => b0 -> FoldM m a b -> FoldM m a b #
(Monoid b, Monad m) => Monoid (FoldM m a b)
Instance details Defined in Control.Foldl Methods mempty :: FoldM m a b # mappend :: FoldM m a b -> FoldM m a b -> FoldM m a b # mconcat :: [FoldM m a b] -> FoldM m a b #

fold :: Foldable f => Fold a b -> f a -> b #

Apply a strict left Fold to a Foldable container

foldM :: (Foldable f, Monad m) => FoldM m a b -> f a -> m b #

Like fold, but monadic