{-# LANGUAGE TemplateHaskell, FlexibleContexts #-} module NoSlow.Mini.Kernels ( kernels ) where import qualified NoSlow.Backend.Interface as I import NoSlow.Util.Base import NoSlow.Util.Computation ( Len(..), WithSqrtLen(..), Sorted(..), ParenTree(..), Edges(..) ) import Data.Bits kernels = [d| -- Wyllie's list ranking based on pointer jumping. Based on -- http://www.cs.cmu.edu/~scandal/nesl/algorithms.html list_rank :: I.Vector v a => Ty (v a) -> Len -> v Int list_rank _ (Len n) = pointer_jump list val where list = 0 `I.cons` I.enumFromTo_Int 0 (n-2) val = I.zipWith (\i j -> if i == j then 0 else 1) list (I.enumFromTo_Int 0 (n-1)) pointer_jump pt val | npt == pt = val | otherwise = pointer_jump npt nval where npt = I.backpermute pt pt; nval = I.zipWith (+) val (I.backpermute val pt) -- Tree rootfix based on -- http://www.cse.unsw.edu.au/~rl/publications/recycling.html rootfix_depth :: I.Vector v a => Ty (v a) -> ParenTree v Int -> v Int rootfix_depth _ (ParenTree ls rs) = rootfix (I.replicate (I.length ls) 1) ls rs where rootfix xs ls rs = let zs = I.replicate (I.length ls * 2) 0 vs = I.update_ (I.update_ zs ls xs) rs (I.map negate xs) sums = I.prescanl' (+) 0 vs in I.backpermute sums ls -- Tree leaffix based on -- http://www.cse.unsw.edu.au/~rl/publications/recycling.html leaffix_size :: I.Vector v a => Ty (v a) -> ParenTree v Int -> v Int leaffix_size _ (ParenTree ls rs) = leaffix (I.replicate (I.length ls) 1) ls rs where leaffix xs ls rs = let zs = I.replicate (I.length ls * 2) 0 vs = I.update_ zs ls xs sums = I.prescanl' (+) 0 vs in I.zipWith (-) (I.backpermute sums ls) (I.backpermute sums rs) -- Awerbuch-Shiloach connected components. Based on -- http://www.cs.cmu.edu/~scandal/nesl/algorithms.html awsh_cc :: I.Vector v a => Ty (v a) -> Edges v Int -> v Int awsh_cc _ (Edges n es1 es2) = concomp ds es1' es2' where ds = I.enumFromTo_Int 0 (n-1) `I.append` I.enumFromTo_Int 0 (n-1) es1' = es1 `I.append` es2 es2' = es2 `I.append` es1 starCheck ds = I.backpermute st' gs where gs = I.backpermute ds ds st = I.zipWith (==) ds gs st' = I.update st . I.filter (not . I.snd) $ I.zip gs st concomp ds es1 es2 | I.and (starCheck ds'') = ds'' | otherwise = concomp (I.backpermute ds'' ds'') es1 es2 where ds' = I.update ds . I.map (\t -> case I.from3 t of (di, dj, gi) -> di `I.pair` dj) . I.filter (\t -> case I.from3 t of (di, dj, gi) -> gi == di && di > dj) $ I.zip3 (I.backpermute ds es1) (I.backpermute ds es2) (I.backpermute ds (I.backpermute ds es1)) ds'' = I.update ds' . I.map (\t -> case I.from3 t of (di, dj, st) -> di `I.pair` dj) . I.filter (\t -> case I.from3 t of (di, dj, st) -> st && di /= dj) $ I.zip3 (I.backpermute ds' es1) (I.backpermute ds' es2) (I.backpermute (starCheck ds') es1) -- Awerbuch-Shiloach/random mate hybrid connected components. Based on -- http://www.cs.cmu.edu/~scandal/nesl/algorithms.html hyb_cc :: I.Vector v a => Ty (v a) -> Edges v Int -> v Int hyb_cc _ (Edges n e1 e2) = concomp (I.zip e1 e2) n where concomp es n | I.null es = I.enumFromTo_Int 0 (n-1) | otherwise = I.backpermute ins ins where p = shortcut_all $ I.update (I.enumFromTo_Int 0 (n-1)) es (es',i) = compress p es r = concomp es' (I.length i) ins = I.update_ p i $ I.backpermute i r enumerate bs = I.prescanl' (+) 0 $ I.map (\b -> if b then 1 else 0) bs pack_index bs = I.map I.fst . I.filter I.snd $ I.zip (I.enumFromTo_Int 0 (I.length bs - 1)) bs shortcut_all p | p == pp = pp | otherwise = shortcut_all pp where pp = I.backpermute p p compress p es = (new_es, pack_index roots) where (e1,e2) = I.from2 $ I.unzip es es' = I.map (\t -> if I.fst t > I.snd t then I.snd t `I.pair` I.fst t else I.fst t `I.pair` I.snd t) . I.filter (\t -> I.fst t /= I.snd t) $ I.zip (I.backpermute p e1) (I.backpermute p e2) roots = I.zipWith (==) p (I.enumFromTo_Int 0 (I.length p - 1)) labels = enumerate roots (e1',e2') = I.from2 $ I.unzip es' new_es = I.zip (I.backpermute labels e1') (I.backpermute labels e2') -- Quickhull. Based on -- http://www.cs.cmu.edu/~scandal/nesl/algorithms.html quickhull :: I.Vector v a => Ty (v a) -> v Double -> v Double -> (v Double, v Double) quickhull _ xs ys = I.from2 $ I.unzip $ hsplit points pmin pmax `I.append` hsplit points pmax pmin where imin = I.minIndex xs imax = I.maxIndex xs points = I.zip xs ys pmin = I.index points imin pmax = I.index points imax hsplit points p1 p2 | I.length packed < 2 = p1 `I.cons` packed | otherwise = hsplit packed p1 pm `I.append` hsplit packed pm p2 where cs = I.map (\p -> cross p p1 p2) points packed = I.map I.fst $ I.filter (\t -> I.snd t > 0) $ I.zip points cs pm = I.index points (I.maxIndex cs) cross p p1 p2 = case I.from2 p of { (x,y) -> case I.from2 p1 of { (x1,y1) -> case I.from2 p2 of { (x2,y2) -> (x1-x)*(y2-y) - (y1-y)*(x2-x) }}} -- Inner loop from the spectral-norm benchmark from -- http://shootout.alioth.debian.org spectral_norm :: I.Vector v a => Ty (v a) -> WithSqrtLen (v Double) -> v Double spectral_norm _ (WithSqrtLen us) = n `seq` us `seq` I.map row (I.enumFromTo_Int 0 (n-1)) where n = I.length us row i = i `seq` I.sum (I.imap (\j u -> eval_A i j * u) us) eval_A i j = 1 / fromIntegral r where r = u + (i+1) u = t `shiftR` 1 t = n * (n+1) n = i+j -- Tridiagonal matrix solver tridiagonal :: I.Vector v a => Ty (v a) -> v Double -> v Double -> v Double -> v Double -> v Double tridiagonal _ as bs cs ds = xs where (cs',ds') = I.from2 $ I.unzip $ I.prescanl' modify (I.pair 0 0) $ I.zip (I.zip as bs) (I.zip cs ds) modify p q = case I.from2 p of { (c',d') -> case I.from2 q of { (q1,q2) -> case I.from2 q1 of { (a,b) -> case I.from2 q2 of { (c,d) -> let id = 1 / (b - c'*a) in id `seq` I.pair (c*id) ((d-d'*a)*id) }}}} xs = I.prescanr' (\p x' -> case I.from2 p of (c,d) -> d - c*x') 0 (I.zip cs' ds') |]