{-# LANGUAGE ScopedTypeVariables, FlexibleContexts, Trustworthy #-}
module Control.CUtils.AssociativeFold (assocFold, assocFold_pattern) where
import Control.CUtils.CPUMultiThreading
import Data.List.Extra
import Data.Array.Unsafe
import Data.Array.IO
import Data.Array.Base
import Data.Array
import Control.Monad
import Control.Exception
import System.IO.Unsafe
chunkSize::Int
chunkSize = 10000
assocFold :: forall t. Pool -> (t->t->IO t) -> Int -> (Int -> t) -> IO t
-- | Concurrent evaluation of an associative fold. The caller is responsible
-- for introducing appropriate strictness for the results in the second argument.
assocFold _ _ n _ | n <= 0 = throwIO$ErrorCall"assocFold: list is empty"
assocFold _ _ n f2 | n == 1 = return(f2 0)
assocFold pool f n f2 = do
        let mx = pred n `div` chunkSize
        let ls2 =[0.. mx]
        let l = succ mx
        -- Using unsafe interleaving with this line seems to help the tasks start faster.

        ar::IOArray Int t <- unsafeInterleaveIO(newArray_(0,mx))
        let ls3 = fmap(f3 ar) ls2
        simpleConc_ pool ls3()
        ar2 :: Array Int t <- unsafeFreeze ar
        -- Each recursive call reduces the size of the problem by a factor of 'chunkSize'.
        assocFold pool f l(unsafeAt ar2)
        where
        f3 ar ii = do
                let x:xs= [chunkSize*ii..pred(min n(chunkSize*ii+chunkSize))]
                let x2 = f2 x
                x3 <- foldM((.f2).f) x2 xs
                writeArray ar ii x3
assocFold_pattern :: Pool -> (t->t->IO t) -> Int -> (Int -> IO t) -> IO t
-- | Pattern which is a simple wrapper around 'assocFold'.
assocFold_pattern pool f = (join.).assocFold pool( \x x2 -> do
        x_res <- x
        x2_res <- x2
        return$!f x_res x2_res)