{-# LANGUAGE BangPatterns #-} {-# LANGUAGE FlexibleContexts #-} {- gmndl -- Mandelbrot Set explorer Copyright (C) 2010,2011,2014,2017 Claude Heiland-Allen This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -} module Calculate (convert, renderer) where -- simple helpers import Control.Monad (when) -- concurrent renderer with capability-specific scheduling import Control.Concurrent (MVar, newEmptyMVar, takeMVar, tryTakeMVar, tryPutMVar, threadDelay, forkIO, killThread) import GHC.Conc (forkOn, numCapabilities) -- each worker uses a mutable unboxed array of Bool to know which pixels -- it has already started to render, to avoid pointless work duplication import Data.Array.IO (IOUArray, newArray, readArray, writeArray, inRange) -- each worker thread keeps a queue of pixels that it needs to render or -- to continue rendering later import Data.PriorityQueue (PriorityQueue, newPriorityQueue, enqueue, enqueueBatch, dequeue) -- poking bytes into memory is dirty, but it's quick and allows use of -- other fast functions like memset and easy integration with OpenGL import Foreign (Word8) -- higher precision arithmetic using libqd import Numeric.QD.DoubleDouble (DoubleDouble()) import Numeric.QD.QuadDouble (QuadDouble()) import Complex (Complex((:+)), Turbo(sqr, twice), convert) -- some type aliases to shorten things type B = Word8 type N = Int type R = Double {- -- colour space conversion from HSV [0..1] to RGB [0..1] -- HSV looks quite 'chemical' to my eyes, need to investigate something -- better to make it feel more 'natural' hsv2rgb :: R -> R -> R -> (R, R, R) hsv2rgb !h !s !v | s == 0 = (v, v, v) | h == 1 = hsv2rgb 0 s v | otherwise = let !i = floor (h * 6) `mod` 6 :: N !f = (h * 6) - fromIntegral i !p = v * (1 - s) !q = v * (1 - s * f) !t = v * (1 - s * (1 - f)) in case i of 0 -> (v, t, p) 1 -> (q, v, p) 2 -> (p, v, t) 3 -> (p, q, v) 4 -> (t, p, v) 5 -> (v, p, q) _ -> (0, 0, 0) -} hsv2rgb' :: R -> R -> R -> (R, R, R) hsv2rgb' !h !s !l = let !a = 2 * pi * h !ca = cos a !sa = sin a !y = l / 2 !u = s / 2 * ca !v = s / 2 * sa !r = y + 1.407 * u !g = y - 0.677 * u - 0.236 * v !b = y + 1.848 * v in (r, g, b) -- compute RGB [0..255] bytes from the results of the complex iterations -- don't need very high precision for this, as spatial aliasing will be -- much more of a problem in intricate regions of the fractal colour :: Complex Double -> Complex Double -> N -> (B, B, B) colour !(zr:+zi) !(dzr:+dzi) !n = let -- micro-optimization - there is no log2 function !il2 = 1 / log 2 !zd2 = sqr zr + sqr zi !dzd2 = sqr dzr + sqr dzi -- normalized escape time !d = (fromIntegral n :: R) - log (log zd2 / log escapeR2) * il2 !dwell = fromIntegral (floor d :: N) -- final angle of the iterate !finala = atan2 zi zr -- distance estimate !de = (log zd2 * il2) * sqrt (zd2 / dzd2) !dscale = -log de * il2 -- HSV is based on escape time, distance estimate, and angle !hue = log ((- log de * il2) `max` 1) * il2 / 16 + log d * il2 !saturation = 0 `max` (log d * il2 / 8) `min` 1 !value = 0 `max` (1 - dscale / 256) `min` 1 !h = hue - fromIntegral (floor hue :: N) -- adjust saturation to give concentric striped pattern !k = dwell / 2 !satf = if k - fromIntegral (floor k :: N) >= (0.5 :: R) then 0.9 else 1 -- adjust value to give tiled pattern !valf = if finala < 0 then 0.9 else 1 -- convert to RGB (!r, !g, !b) = hsv2rgb' h (satf * saturation) (valf * value) -- convert to bytes !rr = floor $ 0 `max` (255 * r) `min` 255 !gg = floor $ 0 `max` (255 * g) `min` 255 !bb = floor $ 0 `max` (255 * b) `min` 255 in (rr, gg, bb) -- a Job stores a pixel undergoing iterations data Job c = Job !N !N !(Complex c) !(Complex c) !(Complex c) !N -- the priority of a Job is how many iterations have been computed: -- so 'fresher' pixels drop to the front of the queue in the hope of -- avoiding too much work iterating pixels that will never escape priority :: Job c -> N priority !(Job _ _ _ _ _ n) = n -- add a job to a work queue, taking care not to duplicate work -- there is no race condition here as each worker has its own queue addJob :: (Real c, Floating c, Turbo c) => N -> N -> Complex c -> c -> PriorityQueue IO (Job c) -> IOUArray (N,N) Bool -> N -> N -> IO () addJob !w !h !c !zradius' todo sync !i !j = do already <- readArray sync (j, i) when (not already) $ do writeArray sync (j, i) True enqueue todo $! Job i j (coords w h c zradius' i j) 0 0 0 -- spawns a new batch of workers to render an image -- returns an action that stops the rendering renderer' :: (Turbo c, Real c, Floating c) => MVar () -> ((N,N),(N,N)) -> (N -> N -> B -> B -> B -> IO ()) -> Complex c -> c -> IO (IO ()) renderer' done rng output !c !zradius' = do wdog <- newEmptyMVar workerts <- mapM (\w -> forkOn w $ worker wdog rng c zradius' output w) [ 0 .. workers - 1 ] watcher <- forkIO $ do () <- takeMVar wdog let loop = do threadDelay 10000000 -- 10 seconds m <- tryTakeMVar wdog case m of Nothing -> mapM_ killThread workerts >> tryPutMVar done () >> return () Just () -> loop loop return $ killThread watcher >> mapM_ killThread workerts -- compute the Complex 'c' coordinate for a pixel in the image coords :: (Real c, Floating c, Turbo c) => N -> N -> Complex c -> c -> N -> N -> Complex c coords !w !h !c !zradius' !i !j = c + ( (fromIntegral (i - w`div`2) * k) :+(fromIntegral (h`div`2 - j) * k)) where !k = zradius' / (fromIntegral $ (w `div` 2) `min` (h `div` 2)) -- the worker thread enqueues its border and starts computing iterations worker :: (Turbo c, Real c, Floating c) => MVar () -> ((N,N),(N,N)) -> Complex c -> c -> (N -> N -> B -> B -> B -> IO ()) -> N -> IO () worker wdog rng@((y0,x0),(y1,x1)) !c !zradius' output !me = do sync <- newArray rng False queue <- newPriorityQueue priority let addJ = addJob w h c zradius' queue sync js = filter mine (border w h) w = x1 - x0 + 1 h = y1 - y0 + 1 mapM_ (flip (writeArray sync) True) js enqueueBatch queue (map (\(j,i) -> Job i j (coords w h c zradius' i j) 0 0 0) js) compute wdog rng addJ output queue where mine (_, i) = i `mod` workers == me -- another dependency on spread -- the compute engine pulls pixels from the queue until there are no -- more, and calculates a batch of iterations for each compute :: (Turbo c, Real c, Floating c) => MVar () -> ((N,N),(N,N)) -> (N -> N -> IO ()) -> (N -> N -> B -> B -> B -> IO ()) -> PriorityQueue IO (Job c) -> IO () compute wdog rng addJ output queue = do mjob <- dequeue queue case mjob of Just (Job i j c z dz n) -> do let -- called when the pixel escapes done' !(zr:+zi) !(dzr:+dzi) !n' = {-# SCC "done" #-} do _ <- tryPutMVar wdog () let (r, g, b) = colour (convert zr :+ convert zi) (convert dzr :+ convert dzi) n' output i j r g b -- a wavefront of computation spreads to neighbouring pixels sequence_ [ addJ x y | u <- spreadX , v <- spreadY , let x = i + u , let y = j + v , inRange rng (y, x) ] -- called when the pixel doesn't escape yet todo' !z' !dz' !n' = {-# SCC "todo" #-} {- output i j 255 0 0 >> -} enqueue queue $! Job i j c z' dz' n' calculate c limit z dz n done' todo' compute wdog rng addJ output queue Nothing -> return () -- no pixels left to render, so finish quietly -- the raw z->z^2+c calculation engine -- also computes the derivative for distance estimation calculations -- this function is crucial for speed, too much allocation will slooow -- everything down severely calculate :: (Turbo c, Real c, Floating c) => Complex c -> N -> Complex c -> Complex c -> N -> (Complex c -> Complex c -> N -> IO ()) -> (Complex c -> Complex c -> N -> IO ()) -> IO () calculate !c !m0 !z0 !dz0 !n0 done todo = go m0 z0 dz0 n0 where go !m !z@(zr:+zi) !dz !n | not (sqr zr + sqr zi < er2) = done z dz n | m <= 0 = todo z dz n | otherwise = go (m - 1) (sqr z + c) (let !zdz = z * dz in twice zdz + 1) (n + 1) !er2 = convert escapeR2 -- dispatch to different instances of renderer depending on required precision -- if zoom is low, single precision Float is ok, but as soon as pixel spacing -- gets really small, it's necessary to increase it -- it's probably not even worth using Float - worth benchmarking this and -- also the DD and QD types (which cause a massively noticeable slowdown) renderer :: (Real c, Floating c) => MVar () -> ((N,N),(N,N)) -> (N -> N -> B -> B -> B -> IO ()) -> Complex c -> c -> IO (IO ()) renderer done rng output !c !zradius' | zoom' < 20 = {-# SCC "rF" #-} renderer' done rng output (f c :: Complex Float ) (g zradius') | zoom' < 50 = {-# SCC "rD" #-} renderer' done rng output (f c :: Complex Double ) (g zradius') | zoom' < 100 = {-# SCC "rDD" #-} renderer' done rng output (f c :: Complex DoubleDouble) (g zradius') | otherwise = {-# SCC "rQD" #-} renderer' done rng output (f c :: Complex QuadDouble ) (g zradius') where f !(cr :+ ci) = convert cr :+ convert ci g !x = convert x zoom' = - logBase 2 (zradius' / (fromIntegral $ w `min` h)) ((x0,y0), (x1, y1)) = rng w = x1 - x0 + 1 h = y1 - y0 + 1 -- start rendering pixels from the edge of the image -- the Mandelbrot Set and its complement are both simply-connected -- discounting spatial aliasing any point inside the boundary that is -- in the complement is 'reachable' from a point on the boundary that -- is also in the complement - probably some heavy math involved to -- prove this though -- note: this implicitly depends on the spread values below - it's -- necessary for each interlaced subimage (one per worker) to have -- at least a one pixel deep border border :: N -> N -> [(N, N)] border !w !h = concat $ [ [ (j, i) | i <- [ 0 .. w - 1 ], j <- [ 0 ] ] , [ (j, i) | j <- [ 0 .. h - 1 ], i <- [ 0 .. workers - 1 ] ] , [ (j, i) | j <- [ 0 .. h - 1 ], i <- [ w - workers .. w - 1 ] ] , [ (j, i) | i <- [ 0 .. w - 1 ], j <- [ h - 1 ] ] ] -- which neighbours to activate once a pixel has escaped -- there are essentially two choices, with x<->y swapped -- choose greater X spread because images are often wider than tall -- other schemes wherein the spread is split in both directions -- might benefit appearance with large worker count, but too complicated spreadX, spreadY :: [ N ] spreadX = [ -workers, 0, workers ] spreadY = [ -1, 0, 1 ] -- number of worker threads -- use as many worker threads as capabilities, with the workers -- distributed 1-1 onto capabilities to maximize CPU utilization workers :: N workers = numCapabilities -- iteration limit per pixel -- at most this many iterations are performed on each pixel before it -- is shunted to the back of the work queue -- this should be tuneable to balance display updates against overheads limit :: N limit = (2^(13::N)-1) -- escape radius for fractal iteration calculations -- once the complex iterate exceeds this, it's never coming back -- theoretically escapeR = 2 would work -- but higher values like this give a significantly smoother picture escapeR, escapeR2 :: R escapeR = 65536 escapeR2 = escapeR * escapeR