{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE FlexibleContexts #-}
{-
gmndl -- Mandelbrot Set explorer
Copyright (C) 2010,2011,2014,2017 Claude Heiland-Allen
This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License along
with this program; if not, write to the Free Software Foundation, Inc.,
51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
-}
module Calculate (convert, renderer) where
-- simple helpers
import Control.Monad (when)
-- concurrent renderer with capability-specific scheduling
import Control.Concurrent (MVar, newEmptyMVar, takeMVar, tryTakeMVar, tryPutMVar, threadDelay, forkIO, killThread)
import GHC.Conc (forkOn, numCapabilities)
-- each worker uses a mutable unboxed array of Bool to know which pixels
-- it has already started to render, to avoid pointless work duplication
import Data.Array.IO (IOUArray, newArray, readArray, writeArray, inRange)
-- each worker thread keeps a queue of pixels that it needs to render or
-- to continue rendering later
import Data.PriorityQueue (PriorityQueue, newPriorityQueue, enqueue, enqueueBatch, dequeue)
-- poking bytes into memory is dirty, but it's quick and allows use of
-- other fast functions like memset and easy integration with OpenGL
import Foreign (Word8)
-- higher precision arithmetic using libqd
import Numeric.QD.DoubleDouble (DoubleDouble())
import Numeric.QD.QuadDouble (QuadDouble())
import Complex (Complex((:+)), Turbo(sqr, twice), convert)
-- some type aliases to shorten things
type B = Word8
type N = Int
type R = Double
{-
-- colour space conversion from HSV [0..1] to RGB [0..1]
-- HSV looks quite 'chemical' to my eyes, need to investigate something
-- better to make it feel more 'natural'
hsv2rgb :: R -> R -> R -> (R, R, R)
hsv2rgb !h !s !v
| s == 0 = (v, v, v)
| h == 1 = hsv2rgb 0 s v
| otherwise =
let !i = floor (h * 6) `mod` 6 :: N
!f = (h * 6) - fromIntegral i
!p = v * (1 - s)
!q = v * (1 - s * f)
!t = v * (1 - s * (1 - f))
in case i of
0 -> (v, t, p)
1 -> (q, v, p)
2 -> (p, v, t)
3 -> (p, q, v)
4 -> (t, p, v)
5 -> (v, p, q)
_ -> (0, 0, 0)
-}
hsv2rgb' :: R -> R -> R -> (R, R, R)
hsv2rgb' !h !s !l =
let !a = 2 * pi * h
!ca = cos a
!sa = sin a
!y = l / 2
!u = s / 2 * ca
!v = s / 2 * sa
!r = y + 1.407 * u
!g = y - 0.677 * u - 0.236 * v
!b = y + 1.848 * v
in (r, g, b)
-- compute RGB [0..255] bytes from the results of the complex iterations
-- don't need very high precision for this, as spatial aliasing will be
-- much more of a problem in intricate regions of the fractal
colour :: Complex Double -> Complex Double -> N -> (B, B, B)
colour !(zr:+zi) !(dzr:+dzi) !n =
let -- micro-optimization - there is no log2 function
!il2 = 1 / log 2
!zd2 = sqr zr + sqr zi
!dzd2 = sqr dzr + sqr dzi
-- normalized escape time
!d = (fromIntegral n :: R) - log (log zd2 / log escapeR2) * il2
!dwell = fromIntegral (floor d :: N)
-- final angle of the iterate
!finala = atan2 zi zr
-- distance estimate
!de = (log zd2 * il2) * sqrt (zd2 / dzd2)
!dscale = -log de * il2
-- HSV is based on escape time, distance estimate, and angle
!hue = log ((- log de * il2) `max` 1) * il2 / 16 + log d * il2
!saturation = 0 `max` (log d * il2 / 8) `min` 1
!value = 0 `max` (1 - dscale / 256) `min` 1
!h = hue - fromIntegral (floor hue :: N)
-- adjust saturation to give concentric striped pattern
!k = dwell / 2
!satf = if k - fromIntegral (floor k :: N) >= (0.5 :: R) then 0.9 else 1
-- adjust value to give tiled pattern
!valf = if finala < 0 then 0.9 else 1
-- convert to RGB
(!r, !g, !b) = hsv2rgb' h (satf * saturation) (valf * value)
-- convert to bytes
!rr = floor $ 0 `max` (255 * r) `min` 255
!gg = floor $ 0 `max` (255 * g) `min` 255
!bb = floor $ 0 `max` (255 * b) `min` 255
in (rr, gg, bb)
-- a Job stores a pixel undergoing iterations
data Job c = Job !N !N !(Complex c) !(Complex c) !(Complex c) !N
-- the priority of a Job is how many iterations have been computed:
-- so 'fresher' pixels drop to the front of the queue in the hope of
-- avoiding too much work iterating pixels that will never escape
priority :: Job c -> N
priority !(Job _ _ _ _ _ n) = n
-- add a job to a work queue, taking care not to duplicate work
-- there is no race condition here as each worker has its own queue
addJob :: (Real c, Floating c, Turbo c) => N -> N -> Complex c -> c -> PriorityQueue IO (Job c) -> IOUArray (N,N) Bool -> N -> N -> IO ()
addJob !w !h !c !zradius' todo sync !i !j = do
already <- readArray sync (j, i)
when (not already) $ do
writeArray sync (j, i) True
enqueue todo $! Job i j (coords w h c zradius' i j) 0 0 0
-- spawns a new batch of workers to render an image
-- returns an action that stops the rendering
renderer' :: (Turbo c, Real c, Floating c) => MVar () -> ((N,N),(N,N)) -> (N -> N -> B -> B -> B -> IO ()) -> Complex c -> c -> IO (IO ())
renderer' done rng output !c !zradius' = do
wdog <- newEmptyMVar
workerts <- mapM (\w -> forkOn w $ worker wdog rng c zradius' output w) [ 0 .. workers - 1 ]
watcher <- forkIO $ do
() <- takeMVar wdog
let loop = do
threadDelay 10000000 -- 10 seconds
m <- tryTakeMVar wdog
case m of
Nothing -> mapM_ killThread workerts >> tryPutMVar done () >> return ()
Just () -> loop
loop
return $ killThread watcher >> mapM_ killThread workerts
-- compute the Complex 'c' coordinate for a pixel in the image
coords :: (Real c, Floating c, Turbo c) => N -> N -> Complex c -> c -> N -> N -> Complex c
coords !w !h !c !zradius' !i !j = c + ( (fromIntegral (i - w`div`2) * k)
:+(fromIntegral (h`div`2 - j) * k))
where !k = zradius' / (fromIntegral $ (w `div` 2) `min` (h `div` 2))
-- the worker thread enqueues its border and starts computing iterations
worker :: (Turbo c, Real c, Floating c) => MVar () -> ((N,N),(N,N)) -> Complex c -> c -> (N -> N -> B -> B -> B -> IO ()) -> N -> IO ()
worker wdog rng@((y0,x0),(y1,x1)) !c !zradius' output !me = do
sync <- newArray rng False
queue <- newPriorityQueue priority
let addJ = addJob w h c zradius' queue sync
js = filter mine (border w h)
w = x1 - x0 + 1
h = y1 - y0 + 1
mapM_ (flip (writeArray sync) True) js
enqueueBatch queue (map (\(j,i) -> Job i j (coords w h c zradius' i j) 0 0 0) js)
compute wdog rng addJ output queue
where mine (_, i) = i `mod` workers == me -- another dependency on spread
-- the compute engine pulls pixels from the queue until there are no
-- more, and calculates a batch of iterations for each
compute :: (Turbo c, Real c, Floating c) => MVar () -> ((N,N),(N,N)) -> (N -> N -> IO ()) -> (N -> N -> B -> B -> B -> IO ()) -> PriorityQueue IO (Job c) -> IO ()
compute wdog rng addJ output queue = do
mjob <- dequeue queue
case mjob of
Just (Job i j c z dz n) -> do
let -- called when the pixel escapes
done' !(zr:+zi) !(dzr:+dzi) !n' = {-# SCC "done" #-} do
_ <- tryPutMVar wdog ()
let (r, g, b) = colour (convert zr :+ convert zi) (convert dzr :+ convert dzi) n'
output i j r g b
-- a wavefront of computation spreads to neighbouring pixels
sequence_
[ addJ x y
| u <- spreadX
, v <- spreadY
, let x = i + u
, let y = j + v
, inRange rng (y, x)
]
-- called when the pixel doesn't escape yet
todo' !z' !dz' !n' = {-# SCC "todo" #-} {- output i j 255 0 0 >> -} enqueue queue $! Job i j c z' dz' n'
calculate c limit z dz n done' todo'
compute wdog rng addJ output queue
Nothing -> return () -- no pixels left to render, so finish quietly
-- the raw z->z^2+c calculation engine
-- also computes the derivative for distance estimation calculations
-- this function is crucial for speed, too much allocation will slooow
-- everything down severely
calculate :: (Turbo c, Real c, Floating c) => Complex c -> N -> Complex c -> Complex c -> N -> (Complex c -> Complex c -> N -> IO ()) -> (Complex c -> Complex c -> N -> IO ()) -> IO ()
calculate !c !m0 !z0 !dz0 !n0 done todo = go m0 z0 dz0 n0
where
go !m !z@(zr:+zi) !dz !n
| not (sqr zr + sqr zi < er2) = done z dz n
| m <= 0 = todo z dz n
| otherwise = go (m - 1) (sqr z + c) (let !zdz = z * dz in twice zdz + 1) (n + 1)
!er2 = convert escapeR2
-- dispatch to different instances of renderer depending on required precision
-- if zoom is low, single precision Float is ok, but as soon as pixel spacing
-- gets really small, it's necessary to increase it
-- it's probably not even worth using Float - worth benchmarking this and
-- also the DD and QD types (which cause a massively noticeable slowdown)
renderer :: (Real c, Floating c) => MVar () -> ((N,N),(N,N)) -> (N -> N -> B -> B -> B -> IO ()) -> Complex c -> c -> IO (IO ())
renderer done rng output !c !zradius'
| zoom' < 20 = {-# SCC "rF" #-} renderer' done rng output (f c :: Complex Float ) (g zradius')
| zoom' < 50 = {-# SCC "rD" #-} renderer' done rng output (f c :: Complex Double ) (g zradius')
| zoom' < 100 = {-# SCC "rDD" #-} renderer' done rng output (f c :: Complex DoubleDouble) (g zradius')
| otherwise = {-# SCC "rQD" #-} renderer' done rng output (f c :: Complex QuadDouble ) (g zradius')
where f !(cr :+ ci) = convert cr :+ convert ci
g !x = convert x
zoom' = - logBase 2 (zradius' / (fromIntegral $ w `min` h))
((x0,y0), (x1, y1)) = rng
w = x1 - x0 + 1
h = y1 - y0 + 1
-- start rendering pixels from the edge of the image
-- the Mandelbrot Set and its complement are both simply-connected
-- discounting spatial aliasing any point inside the boundary that is
-- in the complement is 'reachable' from a point on the boundary that
-- is also in the complement - probably some heavy math involved to
-- prove this though
-- note: this implicitly depends on the spread values below - it's
-- necessary for each interlaced subimage (one per worker) to have
-- at least a one pixel deep border
border :: N -> N -> [(N, N)]
border !w !h = concat $
[ [ (j, i) | i <- [ 0 .. w - 1 ], j <- [ 0 ] ]
, [ (j, i) | j <- [ 0 .. h - 1 ], i <- [ 0 .. workers - 1 ] ]
, [ (j, i) | j <- [ 0 .. h - 1 ], i <- [ w - workers .. w - 1 ] ]
, [ (j, i) | i <- [ 0 .. w - 1 ], j <- [ h - 1 ] ]
]
-- which neighbours to activate once a pixel has escaped
-- there are essentially two choices, with x<->y swapped
-- choose greater X spread because images are often wider than tall
-- other schemes wherein the spread is split in both directions
-- might benefit appearance with large worker count, but too complicated
spreadX, spreadY :: [ N ]
spreadX = [ -workers, 0, workers ]
spreadY = [ -1, 0, 1 ]
-- number of worker threads
-- use as many worker threads as capabilities, with the workers
-- distributed 1-1 onto capabilities to maximize CPU utilization
workers :: N
workers = numCapabilities
-- iteration limit per pixel
-- at most this many iterations are performed on each pixel before it
-- is shunted to the back of the work queue
-- this should be tuneable to balance display updates against overheads
limit :: N
limit = (2^(13::N)-1)
-- escape radius for fractal iteration calculations
-- once the complex iterate exceeds this, it's never coming back
-- theoretically escapeR = 2 would work
-- but higher values like this give a significantly smoother picture
escapeR, escapeR2 :: R
escapeR = 65536
escapeR2 = escapeR * escapeR