{-# LANGUAGE MultiParamTypeClasses,FlexibleInstances,FlexibleContexts,FunctionalDependencies,UndecidableInstances,RankNTypes,ExplicitForAll,ScopedTypeVariables,NoMonomorphismRestriction,OverlappingInstances,EmptyDataDecls,RecordWildCards,TypeFamilies,TemplateHaskell #-} {-| This module provides the core functionalities of the GTA (Generate, Test, and Aggregate) programming framework on Haskell (c.f., Kento Emoto, Sebastian Fischer, Zhenjiang Hu: Generate, Test, and Aggregate - A Calculation-based Framework for Systematic Parallel Programming with MapReduce. ESOP 2012: 254-273. The authors' version is available at <http://www.ipl-lab.org/~emoto/ESOP2012.pdf>). /Example of GTA program/ The following code is a GTA program to solve the 0-1 Knapsack problem (<http://en.wikipedia.org/wiki/Knapsack_problem>). It /appears to be an exponential cost/ proram in the number of input items, because it appears to generate all item selections by @subsP items@ (/Generate/), discard those with total weight heavier than the knapsack's capacity by @`filterBy` weightlimit capacity@ (/Test/), and take the most valuable selection by @`aggregateBy` maxsumsolutionWith getValue@ (/Aggregate/). However, it /actually runs in a linear time/ owing to our proposed program transformation 'Filter-embedding Semiring Fusion' implemented in the library. In addition, it runs in /parallel/ so that you can get linear speedup. The predicate @weightlimit@ is defined based on the join list algebra given in "GTA.Data.JoinList" module. > knapsack capacity items = > subsP items > `filterBy` weightlimit capacity > `aggregateBy` maxsumsolutionWith getValue > > getValue (_, v) = v > getWeight (w, _) = w > > weightlimit w = (<=w) <.> weightsum where > weightsum = JoinListAlgebra{..} > x1 `times` x2 = ( x1 + x2) `min` (w+1) > single i = getWeight i `min` (w+1) > nil = 0 Several example GTA programs are found in /examples/ directory at <https://bitbucket.org/emoto/gtalib/src>. This module provides generic functionalities in the GTA programming framework. Data-strructure-dependent definitions are found in GTA.Data.* modules. -} module GTA.Core (Bag(Bag), CommutativeMonoid (CommutativeMonoid, oplus, identity), GenericSemiring (GenericSemiring, monoid, algebra), GenericSemiringStructure (freeSemiring, liftedSemiring, pairSemiring, shom, hom, makeAlgebra, freeAlgebra, pairAlgebra, foldingAlgebra), bag, (>==), (>=>), (>=<), (>##), (>#>), (<.>), items, revOrd, RevOrd(RevOrd), maxsumBy, maxsumKBy, maxsumsolutionXKBy, maxsumsolutionXBy, maxsumsolutionBy, maxsumsolutionKBy, maxprodBy, maxprodKBy, maxprodsolutionXKBy, maxprodsolutionXBy, maxprodsolutionBy, maxprodsolutionKBy, maxMonoSumBy, maxMonoSumsolutionXBy, maxMonoSumKBy, maxMonoSumsolutionXKBy, addIdentity, AddIdentity (AddIdentity, Identity), sumproductBy, result, filterBy, aggregateBy, transformBy) where import Data.List import Data.Map (Map,empty, singleton, unionWith,assocs) import Control.DeepSeq -- The bag {-| A bag is a multiset, i.e., a set in which members are allowed to appear more than one. The order of memebrs is ignored: e.g., @Bag [1,2] == Bag [2,1]@ is True. -} data Bag a = Bag [a] deriving (Show,Ord,Read) instance (NFData a) => (NFData (Bag a)) where rnf (Bag x) = rnf x instance (Eq a, Ord a) => Eq (Bag a) where (==) (Bag a) (Bag b) = sort a == sort b {-| Extracts members from a bag. The order of members is undecidable. -} items :: Bag a -> [a] items (Bag t) = t {-| Makes a bag that contains the given memebrs. -} bag :: forall a. [a] -> Bag a bag t = Bag t --Bag filter filterB :: forall a. (a -> Bool) -> Bag a -> Bag a filterB p (Bag b) = Bag (filter p b) {-| Commutative monoid is an algebra of an associative, commutative binary operator with its identity. -} data CommutativeMonoid a = CommutativeMonoid { {-| Commutative, associative binary operator: > (a `oplus` b) `oplus` c == a `oplus` (b `oplus` c) > a `oplus` b == b `oplus` a -} oplus :: a -> a -> a, {-| The identity of `oplus`: > a `oplus` identity == identity `oplus` a == a -} identity::a } -- bag is commutative monoid bagMonoid :: forall a. CommutativeMonoid (Bag a) bagMonoid = CommutativeMonoid { .. } where oplus (Bag a) (Bag b) = Bag (a ++ b) identity = Bag [] -- finite map is commutative monoid mapMonoid :: forall k a. Ord k => CommutativeMonoid a -> CommutativeMonoid (Map k a) mapMonoid m = CommutativeMonoid { .. } where oplus x y = let CommutativeMonoid {..} = m in unionWith oplus x y identity = empty --singleton bag singletonBag :: forall a. a -> Bag a singletonBag b = Bag [b] --tupled monoid pairMonoid :: forall t t1.CommutativeMonoid t -> CommutativeMonoid t1 -> CommutativeMonoid (t, t1) pairMonoid m1 m2 = CommutativeMonoid {..} where identity = (identity1, identity2) oplus (l1, l2) (r1, r2) = (oplus1 l1 r1, oplus2 l2 r2) (oplus1, identity1) = let CommutativeMonoid {..} = m1 in (oplus, identity) (oplus2, identity2) = let CommutativeMonoid {..} = m2 in (oplus, identity) -- Generic Semiring {-| A generic semiring is a combination of a commutative monoid and an algebra such that operators of the algebra distributes over `oplus` and `identity` is the zero of the operators. For example, the usual semiring is a combination of a commutative monoid and a 'GTA.Data.JoinList.JoinListAlgebra', in which we have the distributivity and the zeroness: > a `times` (b `oplus` c) == (a `times` b) `oplus` (a `times` c) > (a `oplus` b) `times` c == (a `times` c) `oplus` (b `times` c) > a `times` identity == identity `times` a == identity -} data GenericSemiring alg a = GenericSemiring {monoid :: CommutativeMonoid a, algebra :: alg a} {-| Collection of data-structure-dependent definitions necessary for the GTA framework, including the free algebra, lifting of a generic semirig with an algebra, construction of useful algebras, etc. -} class GenericSemiringStructure alg free uniformer | alg -> free, alg -> uniformer where {-| The free algebra (i.e., an algebra whose operators are the constructors). -} freeAlgebra :: alg free {-| This simply tuples two algebras. -} pairAlgebra :: alg a -> alg b -> alg (a,b) {-| This is used to lift a given algebra to the same level as a given monoid so that the combination of the lifted algebra and the monoid is a generic semiring. -} makeAlgebra :: (CommutativeMonoid m) -> (alg a) -> (m->[a]) -> (a -> m) -> alg m {-| This is used to make an algebra from a usual binary operator; every operator in the algebra simply combines its operand by the given binary operator. -} foldingAlgebra :: (a -> a -> a) -> a -> uniformer a -> alg a {-| The homomorphism from the free algrba, i.e., the catamorphism (used in inefficient impl.). -} hom :: alg a -> free -> a {- for inefficient impl. -} {-| Free generic semiring to build a bag of given data structures (such as lists, binary trees, etc.). This is a combination of the bag monoid and the lifted free algebra. -} freeSemiring :: GenericSemiring alg (Bag free) {-| The most important function to build lifted generic semiring from another generic semiring and an algebra, used in the filter-embedding transformation. -} liftedSemiring :: (Ord c) => GenericSemiring alg a -> alg c -> GenericSemiring alg (Map c a) {-| This simply tuples two generic semirings. -} pairSemiring :: GenericSemiring alg a -> GenericSemiring alg b -> GenericSemiring alg (a,b) {-| Homomorphism of a generic semiring (used in inefficient impl.). -} shom :: GenericSemiring alg a -> Bag free -> a {- for inefficient impl. -} freeSemiring = GenericSemiring {..} where monoid = bagMonoid algebra = makeAlgebra bagMonoid freeAlgebra items singletonBag liftedSemiring s a = GenericSemiring {monoid=monoid', algebra=algebra'} where monoid' = let GenericSemiring {..} = s in mapMonoid monoid algebra' = makeAlgebra (mapMonoid (monoid s)) (pairAlgebra a (algebra s)) assocs (uncurry singleton) shom (GenericSemiring {..}) = sh where CommutativeMonoid {..} = monoid sh (Bag b) = foldr oplus identity (map (hom algebra) b) pairSemiring s1 s2 = GenericSemiring {monoid=monoid', algebra=algebra'} where monoid' = pairMonoid (monoid s1) (monoid s2) algebra' = pairAlgebra (algebra s1) (algebra s2) -- combinators with optimizations -- Generator + Filter = Generator {-| Combinator for connecting a generator and a filter to build another generator. A fitler is represented by a pair of a judgement function and an algebra. -} infixl 5 >== (>==) :: forall (alg :: * -> *) free (uniformer :: * -> *) c b k. (GenericSemiringStructure alg free uniformer, Ord c) => (GenericSemiring alg (Map c b) -> Map k b) -> (k -> Bool, alg c) -> GenericSemiring alg b -> b (>==) pgen (ok, bt) bts = let res = pgen (liftedSemiring bts bt) CommutativeMonoid {..} = monoid bts in foldr oplus identity [ v | (k, v) <- assocs res, ok k ] -- Generator + Aggregator = Result {-| Combinator for connecting a generator and an aggregator to get the result. An aggregator is represented by a generic semiring. -} infixl 5 >=> (>=>) :: forall (alg :: * -> *) free (uniformer :: * -> *) b k. (GenericSemiringStructure alg free uniformer) => (GenericSemiring alg b -> b) -> GenericSemiring alg b -> b (>=>) pgen bts = pgen bts -- Generator_A + Transfomer_{A->B} = Generator_B {-| Combinator for transforming a generator by a transformer. A transformer is an aggregator polymorphic over another generic semiring. -} infixl 5 >=< (>=<) :: forall (alg :: * -> *) free (uniformer :: * -> *) (alg' :: * -> *) free' (uniformer' :: * -> *) c. (GenericSemiringStructure alg free uniformer, GenericSemiringStructure alg' free' uniformer') => (GenericSemiring alg' c -> c) -> (GenericSemiring alg c -> GenericSemiring alg' c) -> GenericSemiring alg c -> c (>=<) pgen trans = pgen . trans -- aliaces {-| The same as `>==` -} filterBy :: forall (alg :: * -> *) free (uniformer :: * -> *) c b k. (GenericSemiringStructure alg free uniformer, Ord c) => (GenericSemiring alg (Map c b) -> Map k b) -> (k -> Bool, alg c) -> GenericSemiring alg b -> b filterBy = (>==) {-| The same as `>=>` -} aggregateBy :: forall (alg :: * -> *) free (uniformer :: * -> *) b k. (GenericSemiringStructure alg free uniformer) => (GenericSemiring alg b -> b) -> GenericSemiring alg b -> b aggregateBy = (>=>) {-| The same as `>=<` -} transformBy :: forall (alg :: * -> *) free (uniformer :: * -> *) (alg' :: * -> *) free' (uniformer' :: * -> *) c. (GenericSemiringStructure alg free uniformer, GenericSemiringStructure alg' free' uniformer') => (GenericSemiring alg' c -> c) -> (GenericSemiring alg c -> GenericSemiring alg' c) -> GenericSemiring alg c -> c transformBy = (>=<) -- combinators without optimizations {-| Inefficient version of `>==` (i.e., it does not do optimziation at all). -} infixl 5 >## (>##) :: (GenericSemiringStructure alg free uniformer) => (GenericSemiring alg (Bag free) -> Bag free) -> (b -> Bool, alg b) -> GenericSemiring alg (Bag free) -> Bag free (>##) pgen (ok, bt) _ = filterB (ok.hom bt) bag where bag = pgen freeSemiring {-the given semiring will be neglected by the result of this operator -} {-| Inefficient version of `>=>` (i.e., it does not do optimziation at all). -} infixl 5 >#> (>#>) :: (GenericSemiringStructure alg free uniformer) => (GenericSemiring alg (Bag free) -> Bag free) -> GenericSemiring alg a -> a (>#>) pgen bts = shom bts (pgen freeSemiring) -- operator to replace 'ok . hom' by 'ok <.> alg' {-| Operator to build a pair of a judgement function and an algebra, which represents a Tester. -} infix 6 <.> (<.>) :: forall (alg :: * -> *) a b. (b -> Bool) -> alg b -> ((b -> Bool), alg b) (<.>) ok alg = (ok, alg) -- aggregator for generating all candidates passing tests {-| The aggregator to extract all items generated by a generator. -} result :: forall (alg :: * -> *) free (uniformer :: * -> *). GenericSemiringStructure alg free uniformer => GenericSemiring alg (Bag free) result = freeSemiring -- aggregator based on the usual semirings genAlgebraFromSemiring :: forall free (uniformer :: * -> *) (alg :: * -> *) a. GenericSemiringStructure alg free uniformer => (a -> a -> a) -> a -> (a -> a -> a) -> a -> uniformer a -> GenericSemiring alg a genAlgebraFromSemiring op iop ot iot mf = GenericSemiring {..} where monoid = CommutativeMonoid {..} where oplus a b = a `op` b identity = iop algebra = foldingAlgebra ot iot mf {-| The aggregator to compute a sum of products. Each product is of all values in the data structure after /map/. -} sumproductBy :: forall free (uniformer :: * -> *) (alg :: * -> *) a. (GenericSemiringStructure alg free uniformer, Num a) => uniformer a -> GenericSemiring alg a sumproductBy = genAlgebraFromSemiring (+) 0 (*) 1 {-| Introduces an identity 'Identity' to a given type. -} data AddIdentity a = AddIdentity a | Identity deriving (Show, Eq, Read) instance (Ord a) => Ord (AddIdentity a) where compare Identity Identity = EQ compare Identity (AddIdentity _) = LT compare (AddIdentity _) Identity = GT compare (AddIdentity a) (AddIdentity b) = compare a b instance (NFData a) => (NFData (AddIdentity a)) where rnf (AddIdentity a) = rnf a rnf Identity = () {-| Introduces an identity. -} addIdentity :: forall a. a -> AddIdentity a addIdentity a = AddIdentity a -- max-sum semiring {-| The aggregator to take the maximum items under a given monotonic sum `mplus` with its identity `mid` after /map/. > c == a `max` b => d `mplus` (a `max` b) == (d `mplus` a) `max` (d `mplus` b) -} maxMonoSumBy :: forall free (uniformer :: * -> *) (alg :: * -> *) a. (GenericSemiringStructure alg free uniformer, Ord a) => (a -> a -> a) -> a -> uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a) maxMonoSumBy mplus mid mf = genAlgebraFromSemiring max Identity plus (AddIdentity mid) mf where plus Identity _ = Identity plus _ Identity = Identity plus (AddIdentity a) (AddIdentity b) = AddIdentity (a `mplus` b) -- max-MonoSum with computation {-| The tupling of maxMonoSumBy and a given generic semiring. The second component of the result is the aggregation of the maximum items by the given generaic semiring. -} maxMonoSumsolutionXBy :: forall free (uniformer :: * -> *) a t (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a) => (a -> a -> a) -> a -> GenericSemiring alg t -> uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a, t) maxMonoSumsolutionXBy mplus mid s mf = GenericSemiring {..} where monoid = CommutativeMonoid {..} where oplus (a, x) (b, y) = case compare a b of EQ -> (a, x `oplus'` y) LT -> (b, y) GT -> (a, x) identity = (Identity, identity') algebra = pairAlgebra maxMonoSumAlgebra algebra' maxMonoSumAlgebra = let GenericSemiring {..} = maxMonoSumBy mplus mid mf in algebra (monoid', algebra') = let GenericSemiring {..} = s in (monoid, algebra) (oplus', identity') = let CommutativeMonoid {..} = monoid' in(oplus, identity) -- max-k {-| The aggregator to find the best k maximum items under a given monotonic sum. An extension of `maxMonoSumBy`. -} maxMonoSumKBy :: forall a free (uniformer :: * -> *) (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a) => (a -> a -> a) -> a -> Int -> uniformer (AddIdentity a) -> GenericSemiring alg [AddIdentity a] maxMonoSumKBy mplus mid k mf = GenericSemiring {..} where monoid = CommutativeMonoid {..} where oplus x y = take k (map head (group (reverse (sort (x ++ y))))) identity = [] algebra = makeAlgebra monoid maxMonoSumAlgebra id sing sing a = [a] maxMonoSumAlgebra = let GenericSemiring {..} = maxMonoSumBy mplus mid mf in algebra -- max-solution-k {-| The /best-k/ extension of `maxMonoSumsolutionXBy`. -} maxMonoSumsolutionXKBy :: forall a free (uniformer :: * -> *) b (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a) => (a -> a -> a) -> a -> GenericSemiring alg b -> Int -> uniformer (AddIdentity a) -> GenericSemiring alg [(AddIdentity a, b)] maxMonoSumsolutionXKBy mplus mid s k mf = GenericSemiring {..} where monoid = CommutativeMonoid {..} where oplus x y = let std = reverse (sortBy fstCmp (x ++ y)) grpd = groupBy (\a b -> fstCmp a b == EQ) std fstCmp a b = compare (fst a) (fst b) op (a, x) (_, y) = (a, x `oplus'` y) in take k (map (foldr1 op) grpd) identity = [] (oplus', identity') = let CommutativeMonoid {..} = monoid' in (oplus, identity) algebra = makeAlgebra monoid (pairAlgebra maxMonoSumAlgebra algebra') id sing sing a = [a] maxMonoSumAlgebra = let GenericSemiring {..} = maxMonoSumBy mplus mid mf in algebra (monoid', algebra') = let GenericSemiring {..} = s in (monoid, algebra) -- max-sum {-| The aggregator to the maximum sum after /map/. -} maxsumBy :: forall free (uniformer :: * -> *) (alg :: * -> *) a. (GenericSemiringStructure alg free uniformer, Ord a, Num a) => uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a) maxsumBy = maxMonoSumBy (+) 0 {-| The /best-k/ extension of `maxsumBy`. -} maxsumKBy :: forall a free (uniformer :: * -> *) (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => Int -> uniformer (AddIdentity a) -> GenericSemiring alg [AddIdentity a] maxsumKBy = maxMonoSumKBy (+) 0 {-| The /best-k/ extension of `maxsumsolutionXBy`. -} maxsumsolutionXKBy :: forall a free (uniformer :: * -> *) b (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => GenericSemiring alg b -> Int -> uniformer (AddIdentity a) -> GenericSemiring alg [(AddIdentity a, b)] maxsumsolutionXKBy = maxMonoSumsolutionXKBy (+) 0 {-| An instance of `maxMonoSumsolutionXBy` with the usual summation. -} maxsumsolutionXBy :: forall free (uniformer :: * -> *) a t (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => GenericSemiring alg t -> uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a, t) maxsumsolutionXBy = maxMonoSumsolutionXBy (+) 0 {-| An instance of `maxMonoSumsolutionBy` with the usual summation. -} maxsumsolutionBy :: forall a (alg :: * -> *) free (uniformer :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a, Bag free) maxsumsolutionBy = maxsumsolutionXBy freeSemiring {-| The /best-k/ extension of `maxsumsolutionBy`. -} maxsumsolutionKBy :: forall a (alg :: * -> *) free (uniformer :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => Int -> uniformer (AddIdentity a) -> GenericSemiring alg [(AddIdentity a, Bag free)] maxsumsolutionKBy = maxsumsolutionXKBy freeSemiring --max prod (on positive numbers) {-| The aggregator to take the maximum product on /non-negative/ numbers. -} maxprodBy :: forall free (uniformer :: * -> *) (alg :: * -> *) a. (GenericSemiringStructure alg free uniformer, Ord a, Num a) => uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a) maxprodBy = maxMonoSumBy (*) 1 {-| The /best-k/ extension of 'maxprodBy' -} maxprodKBy :: forall a free (uniformer :: * -> *) (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => Int -> uniformer (AddIdentity a) -> GenericSemiring alg [AddIdentity a] maxprodKBy = maxMonoSumKBy (*) 1 {-| The /best-k/ extension of 'maxprodsolutionXBy' -} maxprodsolutionXKBy :: forall a free (uniformer :: * -> *) b (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => GenericSemiring alg b -> Int -> uniformer (AddIdentity a) -> GenericSemiring alg [(AddIdentity a, b)] maxprodsolutionXKBy = maxMonoSumsolutionXKBy (*) 1 {-| The tupling of 'maxprodsolutionBy' and a given generic semiring. The second component of the result is the aggregation of the best items by the given generic emiring. -} maxprodsolutionXBy :: forall free (uniformer :: * -> *) a t (alg :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => GenericSemiring alg t -> uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a, t) maxprodsolutionXBy = maxMonoSumsolutionXBy (*) 1 {-| The aggregator to find the items with the maximum product on /non-negative/ numbers. -} maxprodsolutionBy :: forall a (alg :: * -> *) free (uniformer :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => uniformer (AddIdentity a) -> GenericSemiring alg (AddIdentity a, Bag free) maxprodsolutionBy = maxprodsolutionXBy freeSemiring {-| The /best-k/ extension of 'maxprodsolutionBy' -} maxprodsolutionKBy :: forall a (alg :: * -> *) free (uniformer :: * -> *). (GenericSemiringStructure alg free uniformer, Ord a, Num a) => Int -> uniformer (AddIdentity a) -> GenericSemiring alg [(AddIdentity a, Bag free)] maxprodsolutionKBy = maxprodsolutionXKBy freeSemiring -- reverse order to make `max` `min` {-| Reverses the order of the argument, so that we can use aggregators maxXXX to take the minimum XXX. -} revOrd :: forall a. a -> RevOrd a revOrd a = RevOrd a {-| Reverses the order of a given type. -} data RevOrd a = RevOrd a deriving (Eq, Show, Read) instance (Num a) => (Num (RevOrd a)) where (+) (RevOrd a) (RevOrd b) = RevOrd (a + b) (*) (RevOrd a) (RevOrd b) = RevOrd (a * b) (-) (RevOrd a) (RevOrd b) = RevOrd (a - b) negate (RevOrd a) = RevOrd (negate a) abs (RevOrd a) = RevOrd (abs a) signum (RevOrd a) = RevOrd (signum a) fromInteger a = RevOrd (fromInteger a) instance (Ord a) => (Ord (RevOrd a)) where compare (RevOrd a) (RevOrd b) = case compare a b of EQ -> EQ LT -> GT GT -> LT