{-# LANGUAGE MultiParamTypeClasses,FlexibleInstances,FlexibleContexts,FunctionalDependencies,UndecidableInstances,RankNTypes,ExplicitForAll,ScopedTypeVariables,NoMonomorphismRestriction,OverlappingInstances,EmptyDataDecls,RecordWildCards,TypeFamilies,TemplateHaskell #-} -- | -- This module provides the core functionalities of the GTA (Generate, Test, and Aggregate) programming framework on Haskell (c.f., Kento Emoto, Sebastian Fischer, Zhenjiang Hu: Generate, Test, and Aggregate - A Calculation-based Framework for Systematic Parallel Programming with MapReduce. ESOP 2012: 254-273. The authors' version is available at ). -- -- /Example of GTA program/ -- -- The following code is a GTA program to solve the 0-1 Knapsack problem (). It /appears to be an exponential cost/ proram in the number of input items, because it appears to generate all item selections by @subsP items@ (/Generate/), discard those with total weight heavier than the knapsack's capacity by @`filterBy` weightlimit capacity@ (/Test/), and take the most valuable selection by @`aggregateBy` maxsumsolutionWith getValue@ (/Aggregate/). However, it /actually runs in a linear time/ owing to our proposed program transformation 'Filter-embedding Semiring Fusion' implemented in the library. In addition, it runs in /parallel/ so that you can get linear speedup. The predicate @weightlimit@ is defined based on the join list algebra given in "GTA.Data.JoinList" module. -- -- > knapsack capacity items = -- > subsP items -- > `filterBy` weightlimit capacity -- > `aggregateBy` maxsumsolutionWith getValue -- > -- > getValue (_, v) = v -- > getWeight (w, _) = w -- > -- > weightlimit w = (<=w) <.> weightsum where -- > weightsum = homJ' times single nil -- > x1 `times` x2 = ( x1 + x2) `min` (w+1) -- > single i = getWeight i `min` (w+1) -- > nil = 0 -- -- Several example GTA programs are found in /examples/ directory at . -- -- This module provides generic functionalities in the GTA programming framework. Data-strructure-dependent definitions are found in GTA.Data.* modules. -- -- module GTA.Core (Bag(Bag), CommutativeMonoid (CommutativeMonoid, oplus, identity), GenericSemiring (GenericSemiring, monoid, algebra), GenericSemiringStructure (freeSemiring, liftedSemiring, pairSemiring, shom, hom, makeAlgebra, freeAlgebra, pairAlgebra, foldingAlgebra), bag, (>==), (>=>), (>=<), (>##), (>#>), (<.>), items, revOrd, RevOrd(RevOrd), maxsumBy, maxsumKBy, maxsumsolutionXKBy, maxsumsolutionXBy, maxsumsolutionBy, maxsumsolutionKBy, maxprodBy, maxprodKBy, maxprodsolutionXKBy, maxprodsolutionXBy, maxprodsolutionBy, maxprodsolutionKBy, maxMonoSumBy, maxMonoSumsolutionXBy, maxMonoSumKBy, maxMonoSumsolutionXKBy, addIdentity, AddIdentity (AddIdentity, Identity), sumproductBy, result, filterBy, aggregateBy, transformBy) where import Data.List import Data.Map (Map,empty, singleton, unionWith,assocs) import Control.DeepSeq -- The bag -- | A bag is a multiset, i.e., a set in which members are allowed to appear more than one. The order of memebrs is ignored: e.g., @Bag [1,2] == Bag [2,1]@ is True. -- data Bag a = Bag [a] deriving (Show,Ord,Read) instance (NFData a) => (NFData (Bag a)) where rnf (Bag x) = rnf x instance (Eq a, Ord a) => Eq (Bag a) where (==) (Bag a) (Bag b) = sort a == sort b -- | Extracts members from a bag. The order of members is undecidable. -- items :: Bag a -> [a] items (Bag t) = t -- | Makes a bag that contains the given memebrs. -- bag :: forall a. [a] -> Bag a bag t = Bag t --Bag filter filterB :: forall a. (a -> Bool) -> Bag a -> Bag a filterB p (Bag b) = Bag (filter p b) -- | Commutative monoid is an algebra of an associative, commutative binary operator with its identity. -- data CommutativeMonoid a = CommutativeMonoid { -- | Commutative, associative binary operator: -- -- > (a `oplus` b) `oplus` c == a `oplus` (b `oplus` c) -- > a `oplus` b == b `oplus` a -- -- oplus :: a -> a -> a, -- | The identity of `oplus`: -- -- > a `oplus` identity == identity `oplus` a == a -- -- identity::a } -- bag is commutative monoid bagMonoid :: forall a. CommutativeMonoid (Bag a) bagMonoid = CommutativeMonoid { .. } where oplus (Bag a) (Bag b) = Bag (a ++ b) identity = Bag [] -- finite map is commutative monoid mapMonoid :: forall k a. Ord k => CommutativeMonoid a -> CommutativeMonoid (Map k a) mapMonoid m = CommutativeMonoid { .. } where oplus x y = let CommutativeMonoid {..} = m in unionWith oplus x y identity = empty --singleton bag singletonBag :: forall a. a -> Bag a singletonBag b = Bag [b] --tupled monoid pairMonoid :: forall t t1.CommutativeMonoid t -> CommutativeMonoid t1 -> CommutativeMonoid (t, t1) pairMonoid m1 m2 = CommutativeMonoid {..} where identity = (identity1, identity2) oplus (l1, l2) (r1, r2) = (oplus1 l1 r1, oplus2 l2 r2) (oplus1, identity1) = let CommutativeMonoid {..} = m1 in (oplus, identity) (oplus2, identity2) = let CommutativeMonoid {..} = m2 in (oplus, identity) -- Generic Semiring -- | A generic semiring is a combination of a commutative monoid and an algebra such that operators of the algebra distributes over `oplus` and `identity` is the zero of the operators. -- -- For example, the usual semiring is a combination of a commutative monoid and a 'GTA.Data.JoinList.JoinListAlgebra', in which we have the distributivity and the zeroness: -- -- > a `times` (b `oplus` c) == (a `times` b) `oplus` (a `times` c) -- > (a `oplus` b) `times` c == (a `times` c) `oplus` (b `times` c) -- > a `times` identity == identity `times` a == identity -- -- data GenericSemiring alg a = GenericSemiring {monoid :: CommutativeMonoid a, algebra :: alg a} -- | -- Collection of data-structure-dependent definitions necessary for the GTA framework, including the free algebra, lifting of a generic semirig with an algebra, construction of useful algebras, etc. -- class GenericSemiringStructure alg free uniformer | alg -> free, alg -> uniformer where -- | The free algebra (i.e., an algebra whose operators are the constructors). -- freeAlgebra :: alg free -- | This simply tuples two algebras. -- pairAlgebra :: alg a -> alg b -> alg (a,b) -- | This is used to lift a given algebra to the same level as a given monoid so that the combination of the lifted algebra and the monoid is a generic semiring. -- makeAlgebra :: (CommutativeMonoid m) -> (alg a) -> (m->[a]) -> (a -> m) -> alg m -- | This is used to make an algebra from a usual binary operator; every operator in the algebra simply combines its operand by the given binary operator. -- foldingAlgebra :: (a -> a -> a) -> a -> uniformer a -> alg a -- | The homomorphism from the free algrba, i.e., the catamorphism (used in inefficient impl.). -- hom :: alg a -> free -> a {- for inefficient impl. -} -- | Free generic semiring to build a bag of given data structures (such as lists, binary trees, etc.). This is a combination of the bag monoid and the lifted free algebra. -- freeSemiring :: GenericSemiring alg (Bag free) -- | The most important function to build lifted generic semiring from another generic semiring and an algebra, used in the filter-embedding transformation. -- liftedSemiring :: (Ord c) => GenericSemiring alg a -> alg c -> GenericSemiring alg (Map c a) -- | This simply tuples two generic semirings. -- pairSemiring :: GenericSemiring alg a -> GenericSemiring alg b -> GenericSemiring alg (a,b) -- | Homomorphism of a generic semiring (used in inefficient impl.). -- shom :: GenericSemiring alg a -> Bag free -> a {- for inefficient impl. -} freeSemiring = GenericSemiring {..} where monoid = bagMonoid algebra = makeAlgebra bagMonoid freeAlgebra items singletonBag liftedSemiring s a = GenericSemiring {monoid=monoid', algebra=algebra'} where monoid' = let GenericSemiring {..} = s in mapMonoid monoid algebra' = makeAlgebra (mapMonoid (monoid s)) (pairAlgebra a (algebra s)) assocs (uncurry singleton) shom (GenericSemiring {..}) = sh where CommutativeMonoid {..} = monoid sh (Bag b) = foldr oplus identity (map (hom algebra) b) pairSemiring s1 s2 = GenericSemiring {monoid=monoid', algebra=algebra'} where monoid' = pairMonoid (monoid s1) (monoid s2) algebra' = pairAlgebra (algebra s1) (algebra s2) -- combinators with optimizations -- Generator + Filter = Generator -- | Combinator for connecting a generator and a filter to build another generator. A fitler is represented by a pair of a judgement function and an algebra. -- infixl 5 >== (>==) :: forall (alg :: * -> *) free (uniformer :: * -> *) c b k. (GenericSemiringStructure alg free uniformer, Ord c) => (GenericSemiring alg (Map c b) -> Map k b) -> (k -> Bool, alg c) -> GenericSemiring alg b -> b (>==) pgen (ok, bt) bts = let res = pgen (liftedSemiring bts bt) CommutativeMonoid {..} = monoid bts in foldr oplus identity [ v | (k, v) <- assocs res, ok k ] -- Generator + Aggregator = Result -- | Combinator for connecting a generator and an aggregator to get the result. An aggregator is represented by a generic semiring. -- infixl 5 >=> (>=>) :: forall (alg :: * -> *) free (uniformer :: * -> *) b k. (GenericSemiringStructure alg free uniformer) => (GenericSemiring alg b -> b) -> GenericSemiring alg b -> b (>=>) pgen bts = pgen bts -- Generator_A + Transfomer_{A->B} = Generator_B -- | Combinator for transforming a generator by a transformer. A transformer is an aggregator polymorphic over another generic semiring. -- infixl 5 >=< (>=<) :: forall (alg :: * -> *) free (uniformer :: * -> *) (alg' :: * -> *) free' (uniformer' :: * -> *) c. (GenericSemiringStructure alg free uniformer, GenericSemiringStructure alg' free' uniformer') => (GenericSemiring alg' c -> c) -> (GenericSemiring alg c -> GenericSemiring alg' c) -> GenericSemiring alg c -> c (>=<) pgen trans = pgen . trans -- aliaces -- | The same as `>==` -- filterBy :: forall (alg :: * -> *) free (uniformer :: * -> *) c b k. (GenericSemiringStructure alg free uniformer, Ord c) => (GenericSemiring alg (Map c b) -> Map k b) -> (k -> Bool, alg c) -> GenericSemiring alg b -> b filterBy = (>==) -- | The same as `>=>` -- aggregateBy :: forall (alg :: * -> *) free (uniformer :: * -> *) b k. (GenericSemiringStructure alg free uniformer) => (GenericSemiring alg b -> b) -> GenericSemiring alg b -> b aggregateBy = (>=>) -- | The same as `>=<` -- transformBy :: forall (alg :: * -> *) free (uniformer :: * -> *) (alg' :: * -> *) free' (uniformer' :: * -> *) c. (GenericSemiringStructure alg free uniformer, GenericSemiringStructure alg' free' uniformer') => (GenericSemiring alg' c -> c) -> (GenericSemiring alg c -> GenericSemiring alg' c) -> GenericSemiring alg c -> c transformBy = (>=<) -- combinators without optimizations -- | Inefficient version of `>==` (i.e., it does not do optimziation at all). -- infixl 5 >## (>##) :: (GenericSemiringStructure alg free uniformer) => (GenericSemiring alg (Bag free) -> Bag free) -> (b -> Bool, alg b) -> GenericSemiring alg (Bag free) -> Bag free (>##) pgen (ok, bt) _ = filterB (ok.hom bt) bag where bag = pgen freeSemiring {-the given semiring will be neglected by the result of this operator -} -- | Inefficient version of `>=>` (i.e., it does not do optimziation at all). -- infixl 5 >#> (>#>) :: (GenericSemiringStructure alg free uniformer) => (GenericSemiring alg (Bag free) -> Bag free) -> GenericSemiring alg a -> a (>#>) pgen bts = shom bts (pgen freeSemiring) -- operator to replace 'ok . hom' by 'ok <.> alg' -- | Operator to build a pair of a judgement function and an algebra, which represents a Tester. -- infix 6 <.> (<.>) :: forall (alg :: * -> *) a b. (b -> Bool) -> alg b -> ((b -> Bool), alg b) (<.>) ok alg = (ok, alg) -- aggregator for generating all candidates passing tests -- | The aggregator to extract all items generated by a generator. -- result :: forall (alg :: * -> *) free (uniformer :: * -> *). GenericSemiringStructure alg free uniformer => GenericSemiring alg (Bag free) result = freeSemiring -- aggregator based on the usual semirings genAlgebraFromSemiring :: forall free (uniformer :: * -> *) (alg :: * -> *) a. GenericSemiringStructure alg free uniformer => (a -> a -> a) -> a -> (a -> a -> a) -> a -> uniformer a -> GenericSemiring alg a genAlgebraFromSemiring op iop ot iot mf = GenericSemiring {..} where monoid = CommutativeMonoid {..} where oplus a b = a `op` b identity = iop algebra = foldingAlgebra ot iot mf -- | The aggregator to compute a sum of products. Each product is of all values in the data structure after /map/. -- sumproductBy :: forall free (uniformer :: * -> *) (alg :: * -> *) a. (GenericSemiringStructure alg free uniformer, Num a) => uniformer a -> GenericSemiring alg a sumproductBy = genAlgebraFromSemiring (+) 0 (*) 1 -- | Introduces an identity 'Identity' to a given type. -- data AddIdentity a = AddIdentity a | Identity deriving (Show, Eq, Read) instance (Ord a) => Ord (AddIdentity a) where compare Identity Identity = EQ compare Identity (AddIdentity _) = LT compare (AddIdentity _) Identity = GT compare (AddIdentity a) (AddIdentity b) = compare a b instance (NFData a) => (NFData (AddIdentity a)) where rnf (AddIdentity a) = rnf a rnf Identity = () -- | Introduces an identity. -- addIdentity :: forall a. a -> AddIdentity a addIdentity a = AddIdentity a -- max-sum semiring -- | The aggregator to take the maximum items under a given monotonic sum `mplus` with its identity `mid` after /map/. -- -- > c == a `max` b => d `mplus` (a `max` b) == (d `mplus` a) `max` (d `mplus` b) -- -- maxMonoSumBy :: forall free (uniformer :: * -> *) (alg :: * -> *) a. (GenericSemiringStructure alg free uniformer, Ord a) => (a -> a -> a) -> a -> uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a) maxMonoSumBy mplus mid mf = genAlgebraFromSemiring max Identity plus (AddIdentity mid) mf where plus Identity _ = Identity plus _ Identity = Identity plus (AddIdentity a) (AddIdentity b) = AddIdentity (a `mplus` b) -- max-MonoSum with computation -- | The tupling of maxMonoSumBy and a given generic semiring. The second component of the result is the aggregation of the maximum items by the given generaic semiring. -- -- maxMonoSumsolutionXBy :: forall free (uniformer :: * -> *) a t (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a) => (a -> a -> a) -> a -> GenericSemiring alg t -> uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a, t) maxMonoSumsolutionXBy mplus mid s mf = GenericSemiring {..} where monoid = CommutativeMonoid {..} where oplus (a, x) (b, y) = case compare a b of EQ -> (a, x `oplus'` y) LT -> (b, y) GT -> (a, x) identity = (Identity, identity') algebra = pairAlgebra maxMonoSumAlgebra algebra' maxMonoSumAlgebra = let GenericSemiring {..} = maxMonoSumBy mplus mid mf in algebra (monoid', algebra') = let GenericSemiring {..} = s in (monoid, algebra) (oplus', identity') = let CommutativeMonoid {..} = monoid' in(oplus, identity) -- max-k -- | The aggregator to find the best k maximum items under a given monotonic sum. An extension of `maxMonoSumBy`. -- maxMonoSumKBy :: forall a free (uniformer :: * -> *) (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a) => (a -> a -> a) -> a -> Int -> uniformer (AddIdentity a) -> GenericSemiring alg [AddIdentity a] maxMonoSumKBy mplus mid k mf = GenericSemiring {..} where monoid = CommutativeMonoid {..} where oplus x y = take k (map head (group (reverse (sort (x ++ y))))) identity = [] algebra = makeAlgebra monoid maxMonoSumAlgebra id sing sing a = [a] maxMonoSumAlgebra = let GenericSemiring {..} = maxMonoSumBy mplus mid mf in algebra -- max-solution-k -- | The /best-k/ extension of `maxMonoSumsolutionXBy`. -- maxMonoSumsolutionXKBy :: forall a free (uniformer :: * -> *) b (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a) => (a -> a -> a) -> a -> GenericSemiring alg b -> Int -> uniformer (AddIdentity a) -> GenericSemiring alg [(AddIdentity a, b)] maxMonoSumsolutionXKBy mplus mid s k mf = GenericSemiring {..} where monoid = CommutativeMonoid {..} where oplus x y = let std = reverse (sortBy fstCmp (x ++ y)) grpd = groupBy (\a b -> fstCmp a b == EQ) std fstCmp a b = compare (fst a) (fst b) op (a, x) (_, y) = (a, x `oplus'` y) in take k (map (foldr1 op) grpd) identity = [] (oplus', identity') = let CommutativeMonoid {..} = monoid' in (oplus, identity) algebra = makeAlgebra monoid (pairAlgebra maxMonoSumAlgebra algebra') id sing sing a = [a] maxMonoSumAlgebra = let GenericSemiring {..} = maxMonoSumBy mplus mid mf in algebra (monoid', algebra') = let GenericSemiring {..} = s in (monoid, algebra) -- max-sum -- | The aggregator to the maximum sum after /map/. -- maxsumBy :: forall free (uniformer :: * -> *) (alg :: * -> *) a. (GenericSemiringStructure alg free uniformer, Ord a, Num a) => uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a) maxsumBy = maxMonoSumBy (+) 0 -- | The /best-k/ extension of `maxsumBy`. -- maxsumKBy :: forall a free (uniformer :: * -> *) (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => Int -> uniformer (AddIdentity a) -> GenericSemiring alg [AddIdentity a] maxsumKBy = maxMonoSumKBy (+) 0 -- | The /best-k/ extension of `maxsumsolutionXBy`. -- maxsumsolutionXKBy :: forall a free (uniformer :: * -> *) b (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => GenericSemiring alg b -> Int -> uniformer (AddIdentity a) -> GenericSemiring alg [(AddIdentity a, b)] maxsumsolutionXKBy = maxMonoSumsolutionXKBy (+) 0 -- | An instance of `maxMonoSumsolutionXBy` with the usual summation. -- maxsumsolutionXBy :: forall free (uniformer :: * -> *) a t (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => GenericSemiring alg t -> uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a, t) maxsumsolutionXBy = maxMonoSumsolutionXBy (+) 0 -- | An instance of `maxMonoSumsolutionBy` with the usual summation. -- maxsumsolutionBy :: forall a (alg :: * -> *) free (uniformer :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a, Bag free) maxsumsolutionBy = maxsumsolutionXBy freeSemiring -- | The /best-k/ extension of `maxsumsolutionBy`. -- maxsumsolutionKBy :: forall a (alg :: * -> *) free (uniformer :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => Int -> uniformer (AddIdentity a) -> GenericSemiring alg [(AddIdentity a, Bag free)] maxsumsolutionKBy = maxsumsolutionXKBy freeSemiring --max prod (on positive numbers) -- | The aggregator to take the maximum product on /non-negative/ numbers. -- maxprodBy :: forall free (uniformer :: * -> *) (alg :: * -> *) a. (GenericSemiringStructure alg free uniformer, Ord a, Num a) => uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a) maxprodBy = maxMonoSumBy (*) 1 -- | The /best-k/ extension of 'maxprodBy' -- maxprodKBy :: forall a free (uniformer :: * -> *) (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => Int -> uniformer (AddIdentity a) -> GenericSemiring alg [AddIdentity a] maxprodKBy = maxMonoSumKBy (*) 1 -- | The /best-k/ extension of 'maxprodsolutionXBy' -- maxprodsolutionXKBy :: forall a free (uniformer :: * -> *) b (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => GenericSemiring alg b -> Int -> uniformer (AddIdentity a) -> GenericSemiring alg [(AddIdentity a, b)] maxprodsolutionXKBy = maxMonoSumsolutionXKBy (*) 1 -- | The tupling of 'maxprodsolutionBy' and a given generic semiring. The second component of the result is the aggregation of the best items by the given generic emiring. -- maxprodsolutionXBy :: forall free (uniformer :: * -> *) a t (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => GenericSemiring alg t -> uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a, t) maxprodsolutionXBy = maxMonoSumsolutionXBy (*) 1 -- | The aggregator to find the items with the maximum product on /non-negative/ numbers. -- maxprodsolutionBy :: forall a (alg :: * -> *) free (uniformer :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a, Bag free) maxprodsolutionBy = maxprodsolutionXBy freeSemiring -- | The /best-k/ extension of 'maxprodsolutionBy' -- maxprodsolutionKBy :: forall a (alg :: * -> *) free (uniformer :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => Int -> uniformer (AddIdentity a) -> GenericSemiring alg [(AddIdentity a, Bag free)] maxprodsolutionKBy = maxprodsolutionXKBy freeSemiring -- reverse order to make `max` `min` -- | Reverses the order of the argument, so that we can use aggregators maxXXX to take the minimum XXX. -- revOrd :: forall a. a -> RevOrd a revOrd a = RevOrd a -- | Reverses the order of a given type. -- data RevOrd a = RevOrd a deriving (Eq, Show, Read) instance (Num a) => (Num (RevOrd a)) where (+) (RevOrd a) (RevOrd b) = RevOrd (a + b) (*) (RevOrd a) (RevOrd b) = RevOrd (a * b) (-) (RevOrd a) (RevOrd b) = RevOrd (a - b) negate (RevOrd a) = RevOrd (negate a) abs (RevOrd a) = RevOrd (abs a) signum (RevOrd a) = RevOrd (signum a) fromInteger a = RevOrd (fromInteger a) instance (Ord a) => (Ord (RevOrd a)) where compare (RevOrd a) (RevOrd b) = case compare a b of EQ -> EQ LT -> GT GT -> LT instance (NFData a) => (NFData (RevOrd a)) where rnf (RevOrd a) = rnf a