{-# OPTIONS_GHC -frewrite-rules #-}
{-# LANGUAGE MultiParamTypeClasses, FunctionalDependencies, MagicHash,
  FlexibleInstances, FlexibleContexts, UnboxedTuples, DeriveDataTypeable, CPP #-}
-----------------------------------------------------------------------------
-- |
-- Module      : Data.Array.CArray.Base
-- Copyright   : (c) 2001 The University of Glasgow
--               (c) 2008 Jed Brown
-- License     : BSD-style
-- 
-- Maintainer  : jed@59A2.org
-- Stability   : experimental
-- Portability : non-portable
--
-- This module provides both the immutable 'CArray' and mutable 'IOCArray'.  The
-- underlying storage is exactly the same - pinned memory on the GC'd heap.
-- Elements are stored according to the class 'Storable'.  You can obtain a
-- pointer to the array contents to manipulate elements from languages like C.
--
-- 'CArray' is 16-byte aligned by default.  If you create a 'CArray' with
-- 'unsafeForeignPtrToCArray' then it may not be aligned.  This will be an issue
-- if you intend to use SIMD instructions.
--
-- 'CArray' is similar to 'Data.Array.Unboxed.UArray' but slower if you stay
-- within Haskell.  'CArray' can handle more types and can be used by external
-- libraries.
--
-- 'IOCArray' is equivalent to 'Data.Array.Storable.StorableArray' and similar
-- to 'Data.Array.IO.IOUArray' but slower.  'IOCArray' has O(1) versions of
-- 'unsafeFreeze' and 'unsafeThaw' when converting to/from 'CArray'.
-----------------------------------------------------------------------------

module Data.Array.CArray.Base where

import Control.Applicative
import Control.Monad
import Data.Array.Base
import Data.Array.MArray
import Data.Array.IArray
import Data.Complex
import Data.List
import System.IO.Unsafe         (unsafePerformIO)
import Foreign.Storable
import Foreign.ForeignPtr
import Foreign.Ptr
import Foreign.Marshal.Alloc
import Foreign.Marshal.Array    (copyArray,peekArray,pokeArray)
import Data.Word                (Word8,Word)

import Data.Generics            (Data(..), Typeable(..))
import GHC.Base                 (realWorld#, Addr#)
import GHC.IOBase               (IO(..))
import GHC.Ptr                  (Ptr(..))
import GHC.ForeignPtr           (ForeignPtr(..), mallocPlainForeignPtrBytes)

-- | The immutable array type.
data CArray i e = CArray !i !i Int !(ForeignPtr e)
    deriving (Data, Typeable)

-- | Absolutely equivalent representation, but used for the mutable interface.
data IOCArray i e = IOCArray !i !i Int !(ForeignPtr e)
    deriving (Data, Typeable)

instance Storable e => MArray IOCArray e IO where
    getBounds (IOCArray l u _ _) = return (l,u)

    getNumElements (IOCArray l u n _) = return n

    newArray (l,u) init = do
        fp <- mallocForeignPtrArrayAligned size
        withForeignPtr fp $ \a ->
            sequence_ [pokeElemOff a i init | i <- [0..size-1]]
        return (IOCArray l u size fp)
        where size = rangeSize (l,u)

    unsafeNewArray_ (l,u) = do
        let n = rangeSize (l,u)
        fp <- mallocForeignPtrArrayAligned n
        return (IOCArray l u n fp)

    newArray_ = unsafeNewArray_

    unsafeRead (IOCArray _ _ _ fp) i =
        withForeignPtr fp $ \a -> peekElemOff a i

    unsafeWrite (IOCArray _ _ _ fp) i e =
        withForeignPtr fp $ \a -> pokeElemOff a i e

-- |The pointer to the array contents is obtained by 'withCArray'.
-- The idea is similar to 'ForeignPtr' (used internally here).
-- The pointer should be used only during execution of the 'IO' action
-- retured by the function passed as argument to 'withCArray'.
withCArray :: CArray i e -> (Ptr e -> IO a) -> IO a
withCArray (CArray _ _ _ fp) f = withForeignPtr fp f

withIOCArray :: IOCArray i e -> (Ptr e -> IO a) -> IO a
withIOCArray (IOCArray _ _ _ fp) f = withForeignPtr fp f

-- |If you want to use it afterwards, ensure that you
-- 'touchCArray' after the last use of the pointer,
-- so the array is not freed too early.
touchIOCArray :: IOCArray i e -> IO ()
touchIOCArray (IOCArray _ _ _ fp) = touchForeignPtr fp

-- |Construct a 'CArray' from an arbitrary 'ForeignPtr'.  It is
-- the caller's responsibility to ensure that the 'ForeignPtr' points to
-- an area of memory sufficient for the specified bounds.
unsafeForeignPtrToCArray
   :: Ix i => ForeignPtr e -> (i,i) -> IO (CArray i e)
unsafeForeignPtrToCArray p (l,u) =
   return (CArray l u (rangeSize (l,u)) p)

unsafeForeignPtrToIOCArray
   :: Ix i => ForeignPtr e -> (i,i) -> IO (IOCArray i e)
unsafeForeignPtrToIOCArray p (l,u) =
   return (IOCArray l u (rangeSize (l,u)) p)

copy :: (Ix i, Storable e) => CArray i e -> IO (CArray i e)
copy ain@(CArray l u n fp) =
    createCArray (l,u) $ \op ->
        withCArray ain $ \ip ->
            copyArray op ip n

freezeIOCArray :: (Ix i, Storable e) => IOCArray i e -> IO (CArray i e)
freezeIOCArray = unsafeFreezeIOCArray >=> copy

unsafeFreezeIOCArray :: (Ix i) => IOCArray i e -> IO (CArray i e)
unsafeFreezeIOCArray (IOCArray l u n fp) = return (CArray l u n fp)

thawIOCArray :: (Ix i, Storable e) => CArray i e -> IO (IOCArray i e)
thawIOCArray = copy >=> unsafeThawIOCArray

unsafeThawIOCArray :: (Ix i) => CArray i e -> IO (IOCArray i e)
unsafeThawIOCArray (CArray l u n fp) = return (IOCArray l u n fp)

-- Since we can remove the (Storable e) restriction for these, the rules are
-- compact and general.
{-# RULES
"unsafeFreeze/IOCArray" unsafeFreeze = unsafeFreezeIOCArray
"unsafeThaw/IOCArray" unsafeThaw = unsafeThawIOCArray
  #-}

-- Since we can't parameterize the rules with the (Storable e) constraint, we
-- have to specialize manually.  This is unfortunate since it is less general.
{-# RULES
"freeze/IOCArray/Int"    freeze      = freezeIOCArray :: (Ix i) => IOCArray i Int -> IO (CArray i Int)
"freeze/IOCArray/Float"  freeze      = freezeIOCArray :: (Ix i) => IOCArray i Float -> IO (CArray i Float)
"freeze/IOCArray/Double" freeze      = freezeIOCArray :: (Ix i) => IOCArray i Double -> IO (CArray i Double)
"thaw/IOCArray/Int"      thaw        = thawIOCArray   :: (Ix i) => CArray i Int -> IO (IOCArray i Int)
"thaw/IOCArray/Float"    thaw        = thawIOCArray   :: (Ix i) => CArray i Float -> IO (IOCArray i Float)
"thaw/IOCArray/Double"   thaw        = thawIOCArray   :: (Ix i) => CArray i Double -> IO (IOCArray i Double)
  #-}


instance Storable e => IArray CArray e where
    {-# INLINE bounds #-}
    bounds (CArray l u _ _) = (l,u)
    {-# INLINE numElements #-}
    numElements (CArray _ _ n _) = n
    {-# NOINLINE unsafeArray #-}
    unsafeArray lu ies = unsafePerformIO $ unsafeArrayCArray lu ies (zeroElem (undefined :: e))
    {-# INLINE unsafeAt #-}
    unsafeAt (CArray _ _ _ fp) i = unsafeInlinePerformIO $
                                   withForeignPtr fp $ \a -> peekElemOff a i
    {-# NOINLINE unsafeReplace #-}
    unsafeReplace arr ies = unsafePerformIO $ unsafeReplaceCArray arr ies
    {-# NOINLINE unsafeAccum #-}
    unsafeAccum f arr ies = unsafePerformIO $ unsafeAccumCArray f arr ies
    {-# NOINLINE unsafeAccumArray #-}
    unsafeAccumArray f init lu ies = unsafePerformIO $ unsafeAccumArrayCArray f init lu ies


-- | Hackish way to get the zero element for a Storable type.
{-# NOINLINE zeroElem #-}
zeroElem :: Storable a => a -> a
zeroElem u = unsafePerformIO $ do
    allocaBytes size $ \p -> do
        sequence_ [pokeByteOff p off (0 :: Word8) | off <- [0 .. (size - 1)] ]
        peek (castPtr p)
    where size = sizeOf u

{-# INLINE unsafeArrayCArray #-}
unsafeArrayCArray :: (MArray IOCArray e IO, Storable e, Ix i)
                  => (i,i) -> [(Int, e)] -> e -> IO (CArray i e)
unsafeArrayCArray lu ies default_elem = do
        marr <- newArray lu default_elem
        sequence_ [unsafeWrite marr i e | (i, e) <- ies]
        unsafeFreezeIOCArray marr

{-# INLINE unsafeReplaceCArray #-}
unsafeReplaceCArray :: (MArray IOCArray e IO, Storable e, Ix i)
                       => CArray i e -> [(Int, e)] -> IO (CArray i e)
unsafeReplaceCArray arr ies = do
    marr <- thawIOCArray arr
    sequence_ [unsafeWrite marr i e | (i, e) <- ies]
    unsafeFreezeIOCArray marr

{-# INLINE unsafeAccumCArray #-}
unsafeAccumCArray :: (MArray IOCArray e IO, Storable e, Ix i)
                            => (e -> e' -> e) -> CArray i e -> [(Int, e')]
                                              -> IO (CArray i e)
unsafeAccumCArray f arr ies = do
    marr <- thawIOCArray arr
    sequence_ [do
        old <- unsafeRead marr i
        unsafeWrite marr i (f old new)
        | (i, new) <- ies]
    unsafeFreezeIOCArray marr

{-# INLINE unsafeAccumArrayCArray #-}
unsafeAccumArrayCArray :: (MArray IOCArray e IO, Storable e, Ix i)
                          => (e -> e' -> e) -> e -> (i,i) -> [(Int, e')]
                                            -> IO (CArray i e)
unsafeAccumArrayCArray f init lu ies = do
    marr <- newArray lu init
    sequence_ [do
        old <- unsafeRead marr i
        unsafeWrite marr i (f old new)
        | (i, new) <- ies]
    unsafeFreezeIOCArray marr

{-# INLINE eqCArray #-}
eqCArray :: (IArray CArray e, Ix i, Eq e)
                   => CArray i e -> CArray i e -> Bool
eqCArray arr1@(CArray l1 u1 n1 _) arr2@(CArray l2 u2 n2 _) =
    if n1 == 0 then n2 == 0 else
        l1 == l2 && u1 == u2 &&
           and [unsafeAt arr1 i == unsafeAt arr2 i | i <- [0 .. n1 - 1]]

{-# INLINE cmpCArray #-}
cmpCArray :: (IArray CArray e, Ix i, Ord e)
                    => CArray i e -> CArray i e -> Ordering
cmpCArray arr1 arr2 = compare (assocs arr1) (assocs arr2)

{-# INLINE cmpIntCArray #-}
cmpIntCArray :: (IArray CArray e, Ord e)
                       => CArray Int e -> CArray Int e -> Ordering
cmpIntCArray arr1@(CArray l1 u1 n1 _) arr2@(CArray l2 u2 n2 _) =
    if n1 == 0 then if n2 == 0 then EQ else LT else
    if n2 == 0 then GT else
    case compare l1 l2 of
        EQ    -> foldr cmp (compare u1 u2) [0 .. (n1 `min` n2) - 1]
        other -> other
    where
    cmp i rest = case compare (unsafeAt arr1 i) (unsafeAt arr2 i) of
        EQ    -> rest
        other -> other

{-# RULES "cmpCArray/Int" cmpCArray = cmpIntCArray #-}

instance (Ix ix, Eq e, IArray CArray e) => Eq (CArray ix e) where
    (==) = eqCArray

instance (Ix ix, Ord e, IArray CArray e) => Ord (CArray ix e) where
    compare = cmpCArray

instance (Ix ix, Show ix, Show e, IArray CArray e) => Show (CArray ix e) where
    showsPrec = showsIArray

--
-- General purpose array operations which happen to be very fast for CArray.
--

-- | O(1) reshape an array.  The number of elements in the new shape must not
-- exceed the number in the old shape.  The elements are in C-style ordering.
reshape :: (Ix i, Ix j) => (j,j) -> CArray i e -> CArray j e
reshape (l',u') (CArray l u n fp) | n' > n = error "reshape: new size too large"
                                  |otherwise = CArray l' u' n' fp
    where n' = rangeSize (l', u')

-- | O(1) make a rank 1 array from an arbitrary shape.
-- It has the property that 'reshape (0, size a - 1) a == flatten a'.
flatten :: Ix i => CArray i e -> CArray Int e
flatten (CArray l u n fp) = CArray 0 (n - 1) n fp

-- | Determine the rank of an array.
rank :: (Shapable i, Ix i, IArray a e) => a i e -> Int
rank = sRank . fst . bounds

--
-- Useful for multi-dimensional arrays.  Not specific to CArray.

-- | Canonical representation of the shape.
-- The following properties hold:
--     'length . shape = rank'
--     'product . shape = size'
shape :: (Shapable i, Ix i, IArray a e) => a i e -> [Int]
shape = uncurry sShape . bounds

-- | How much the offset changes when you move one element in the given
-- direction.  Since arrays are in row-major order, 'last . shapeToStride = const 1'
shapeToStride :: [Int] -> [Int]
shapeToStride = scanr (*) 1 . tail

-- | Number of elements in the Array.
size :: (Ix i, IArray a e) => a i e -> Int
size = numElements

--
-- None of the following are specific to CArray.  Some could have slightly
-- faster versions specialized to CArray.  In general, slicing is expensive
-- because the slice is not contiguous in memory, so must be copied.  There are
-- many specialized versions.
--

-- | Generic slice and map.  This takes the new range, the inverse map on
-- indices, and function to produce the next element.  It is the most general
-- operation in its class.
ixmapWithIndP :: (Ix i, Ix i', IArray a e, IArray a' e')
                 => (i',i') -> (i' -> i) -> (i -> e -> i' -> e') -> a i e -> a' i' e'
ixmapWithIndP lu f g arr = listArray lu
                           [ let i = f i' in g i (arr ! i) i' | i' <- range lu ]

-- | Less polymorphic version.
ixmapWithInd :: (Ix i, Ix i', IArray a e, IArray a e')
                => (i',i') -> (i' -> i) -> (i -> e -> i' -> e') -> a i e -> a i' e'
ixmapWithInd = ixmapWithIndP

-- | Perform an operation on the elements, independent of their location.
ixmapWithP  :: (Ix i, Ix i', IArray a e, IArray a' e')
               => (i',i') -> (i' -> i) -> (e -> e') -> a i e -> a' i' e'
ixmapWithP lu f g arr = listArray lu
                        [ g (arr ! f i') | i' <- range lu ]

-- | Less polymorphic version.
ixmapWith  :: (Ix i, Ix i', IArray a e, IArray a e')
              => (i',i') -> (i' -> i) -> (e -> e') -> a i e -> a i' e'
ixmapWith = ixmapWithP

-- | More polymorphic version of 'ixmap'.
ixmapP :: (Ix i, Ix i', IArray a e, IArray a' e)
          => (i',i') -> (i' -> i) -> a i e -> a' i' e
ixmapP lu f arr = ixmapWithP lu f id arr

-- | More friendly sub-arrays with element mapping.
sliceStrideWithP :: (Ix i, Shapable i, Ix i', IArray a e, IArray a' e')
                    => (i',i') -> (i,i,i) -> (e -> e') -> a i e -> a' i' e'
sliceStrideWithP lu (start,next,end) f arr
    | all (inRange (bounds arr)) [start,next,end] = listArray lu elems
    | otherwise = error "sliceStrideWith: out of bounds"
    where is = offsetShapeFromThenTo (shape arr) (index' start) (index' next) (index' end)
          elems = map (f . (unsafeAt arr)) is
          index' = indexes arr

-- | Less polymorphic version.
sliceStrideWith :: (Ix i, Shapable i, Ix i', IArray a e, IArray a e')
                   => (i',i') -> (i,i,i) -> (e -> e') -> a i e -> a i' e'
sliceStrideWith = sliceStrideWithP

-- | Strided sub-array without element mapping.
sliceStrideP :: (Ix i, Shapable i, Ix i', IArray a e, IArray a' e)
                => (i',i') -> (i,i,i) -> a i e -> a' i' e
sliceStrideP lu sne = sliceStrideWithP lu sne id

-- | Less polymorphic version.
sliceStride :: (Ix i, Shapable i, Ix i', IArray a e)
               => (i',i') -> (i,i,i) -> a i e -> a i' e
sliceStride = sliceStrideP

-- | Contiguous sub-array with element mapping.
sliceWithP :: (Ix i, Shapable i, Ix i', IArray a e, IArray a' e')
              => (i',i') -> (i,i) -> (e -> e') -> a i e -> a' i' e'
sliceWithP lu (start,end) f arr
    | all (inRange (bounds arr)) [start,end] = listArray lu elems
    | otherwise = error "sliceWith: out of bounds"
    where is = offsetShapeFromTo (shape arr) (index' start) (index' end)
          elems = map (f . (unsafeAt arr)) is
          index' = indexes arr

-- | Less polymorphic version.
sliceWith :: (Ix i, Shapable i, Ix i', IArray a e, IArray a e')
             => (i',i') -> (i,i) -> (e -> e') -> a i e -> a i' e'
sliceWith = sliceWithP

-- | Contiguous sub-array without element mapping.
sliceP :: (Ix i, Shapable i, Ix i', IArray a e, IArray a' e)
          => (i',i') -> (i,i) -> a i e -> a' i' e
sliceP lu se = sliceWithP lu se id

-- | Less polymorphic version.
slice ::  (Ix i, Shapable i, Ix i', IArray a e)
          => (i',i') -> (i,i) -> a i e -> a i' e
slice = sliceP

-- | In-place map on CArray.  Note that this is /IN PLACE/ so you should not
-- retain any reference to the original.  It flagrantly breaks referential
-- transparency!
{-# INLINE mapCArrayInPlace #-}
mapCArrayInPlace :: (Ix i, IArray CArray e, Storable e) => (e -> e) -> CArray i e -> CArray i e
mapCArrayInPlace f a = unsafeInlinePerformIO $ do
    withCArray a $ \p ->
        forM_ [0 .. size a - 1] $ \i ->
            peekElemOff p i >>= pokeElemOff p i . f
    return a

-----------------------------------------
-- These are meant to be internal only
indexes :: (Ix i, Shapable i, IArray a e) => a i e -> i -> [Int]
indexes a i = map pred $ (sShape . fst . bounds) a i

offsetShapeFromThenTo s a b c = foldr (liftA2 (+)) [0] (ilists stride a b c)
    where ilists = zipWith4 (\s a b c -> map (*s) $ enumFromThenTo a b c)
          stride = shapeToStride s

offsetShapeFromTo = offsetShapeFromTo' id

offsetShapeFromTo' f s a b = foldr (liftA2 (+)) [0] (f $ ilists stride a b)
    where ilists = zipWith3 (\s a b -> map (*s) $ enumFromTo a b)
          stride = shapeToStride s

offsets :: (Ix a, Shapable a) => (a, a) -> a -> [Int]
offsets lu i = reverse . osets (index lu i) . reverse . scanl1 (*) . uncurry sShape $ lu
    where osets 0 [] = []
          osets i (b:bs) = r : osets d bs
              where (d,r) = i `divMod` b
          osets _ _ = error "osets"
-----------------------------------------

-- | p-norm on the array taken as a vector
normp :: (Ix i, RealFloat e', Abs e e', IArray a e) => e' -> a i e -> e'
normp p a | 1 <= p && not (isInfinite p) = (** (1/p)) $ foldl' (\z e -> z + (abs_ e) ** p) 0 (elems a)
          | otherwise = error "normp: p < 1"

-- | 2-norm on the array taken as a vector (Frobenius norm for matrices)
norm2 :: (Ix i, Floating e', Abs e e', IArray a e) => a i e -> e'
norm2 a = sqrt $ foldl' (\z e -> z + abs_ e ^ 2) 0 (elems a)

-- | Sup norm on the array taken as a vector
normSup :: (Ix i, Num e', Ord e', Abs e e', IArray a e) => a i e -> e'
normSup a = foldl' (\z e -> z `max` abs_ e) 0 (elems a)

-- | Polymorphic version of amap.
liftArrayP :: (Ix i, IArray a e, IArray a1 e1)
              => (e -> e1) -> a i e -> a1 i e1
liftArrayP f a = listArray (bounds a) (map f (elems a))

-- | Equivalent to amap.  Here for consistency only.
liftArray :: (Ix i, IArray a e, IArray a e1)
              => (e -> e1) -> a i e -> a i e1
liftArray = liftArrayP

-- | Polymorphic 2-array lift.
liftArray2P :: (Ix i, IArray a e, IArray a1 e1, IArray a2 e2)
              => (e -> e1 -> e2) -> a i e -> a1 i e1 -> a2 i e2
liftArray2P f a b | aBounds == bounds b =
                     listArray aBounds (zipWith f (elems a) (elems b))
                 | otherwise = error "liftArray2: array bounds must match"
    where aBounds = bounds a

-- | Less polymorphic version.
liftArray2 :: (Ix i, IArray a e, IArray a e1, IArray a e2)
              => (e -> e1 -> e2) -> a i e -> a i e1 -> a i e2
liftArray2 = liftArray2P

-- | Polymorphic 3-array lift.
liftArray3P :: (Ix i, IArray a e, IArray a1 e1, IArray a2 e2, IArray a3 e3)
               => (e -> e1 -> e2 -> e3) -> a i e -> a1 i e1 -> a2 i e2 -> a3 i e3
liftArray3P f a b c | aBounds == bounds b && aBounds == bounds c =
                       listArray aBounds (zipWith3 f (elems a) (elems b) (elems c))
                   | otherwise = error "liftArray2: array bounds must match"
    where aBounds = bounds a

-- | Less polymorphic version.
liftArray3 :: (Ix i, IArray a e, IArray a e1, IArray a e2, IArray a e3)
              => (e -> e1 -> e2 -> e3) -> a i e -> a i e1 -> a i e2 -> a i e3
liftArray3 = liftArray3P


-- | We need this type class to distinguish between different tuples of Ix.
-- There are Shapable instances for homogenous Int tuples, but may Haddock
-- doesn't see them.
class Shapable i where
    sRank :: i -> Int
    sShape :: i -> i -> [Int]
    sBounds :: [Int] -> (i,i)

instance Shapable Int where
    sRank _ = 1
    sShape a a' = [rangeSize (a,a')]
    sBounds [a] = (0,a-1)

instance Shapable (Int,Int) where
    sRank _ = 2
    sShape (a,b) (a',b') = [rangeSize (a,a'), rangeSize (b,b')]
    sBounds [a,b] = ((0,0),(a-1,b-1))

instance Shapable (Int,Int,Int) where
    sRank _ = 3
    sShape (a,b,c) (a',b',c') =
        [rangeSize (a,a'), rangeSize (b,b'), rangeSize (c,c')]
    sBounds [a,b,c] = ((0,0,0),(a-1,b-1,c-1))

instance Shapable (Int,Int,Int,Int) where
    sRank _ = 4
    sShape (a,b,c,d) (a',b',c',d') =
        [rangeSize (a,a'), rangeSize (b,b'), rangeSize (c,c'), rangeSize (d,d')]
    sBounds [a,b,c,d] = ((0,0,0,0),(a-1,b-1,c-1,d-1))

instance Shapable (Int,Int,Int,Int,Int) where
    sRank _ = 5
    sShape (a,b,c,d,e) (a',b',c',d',e') =
        [rangeSize (a,a'), rangeSize (b,b'), rangeSize (c,c'), rangeSize (d,d')
        , rangeSize (e,e')]
    sBounds [a,b,c,d,e] = ((0,0,0,0,0),(a-1,b-1,c-1,d-1,e-1))

instance Shapable (Int,Int,Int,Int,Int,Int) where
    sRank _ = 6
    sShape (a,b,c,d,e,f) (a',b',c',d',e',f') =
        [rangeSize (a,a'), rangeSize (b,b'), rangeSize (c,c'), rangeSize (d,d')
        , rangeSize (e,e'), rangeSize (f,f')]
    sBounds [a,b,c,d,e,f] = ((0,0,0,0,0,0),(a-1,b-1,c-1,d-1,e-1,f-1))

instance Shapable (Int,Int,Int,Int,Int,Int,Int) where
    sRank _ = 7
    sShape (a,b,c,d,e,f,g) (a',b',c',d',e',f',g') =
        [rangeSize (a,a'), rangeSize (b,b'), rangeSize (c,c'), rangeSize (d,d')
        , rangeSize (e,e'), rangeSize (f,f'), rangeSize (g,g')]
    sBounds [a,b,c,d,e,f,g] = ((0,0,0,0,0,0,0),(a-1,b-1,c-1,d-1,e-1,f-1,g-1))

instance Shapable (Int,Int,Int,Int,Int,Int,Int,Int) where
    sRank _ = 8
    sShape (a,b,c,d,e,f,g,h) (a',b',c',d',e',f',g',h') =
        [rangeSize (a,a'), rangeSize (b,b'), rangeSize (c,c'), rangeSize (d,d')
        , rangeSize (e,e'), rangeSize (f,f'), rangeSize (g,g'), rangeSize (h,h')]
    sBounds [a,b,c,d,e,f,g,h] = ((0,0,0,0,0,0,0,0),(a-1,b-1,c-1,d-1,e-1,f-1,g-1,h-1))

instance Shapable (Int,Int,Int,Int,Int,Int,Int,Int,Int) where
    sRank _ = 9
    sShape (a,b,c,d,e,f,g,h,i) (a',b',c',d',e',f',g',h',i') =
        [rangeSize (a,a'), rangeSize (b,b'), rangeSize (c,c'), rangeSize (d,d')
        , rangeSize (e,e'), rangeSize (f,f'), rangeSize (g,g'), rangeSize (h,h')
        , rangeSize (i,i')]
    sBounds [a,b,c,d,e,f,g,h,i] = ((0,0,0,0,0,0,0,0,0)
                                  ,(a-1,b-1,c-1,d-1,e-1,f-1,g-1,h-1,i-1))


-- | Hack so that norms have a sensible type.
class Abs a b | a -> b where
    abs_ :: a -> b
instance Abs (Complex Double) Double where
    abs_ = magnitude
instance Abs (Complex Float) Float where
    abs_ = magnitude
instance Abs Double Double where
    abs_ = abs
instance Abs Float Float where
    abs_ = abs


-- | This variant of 'unsafePerformIO' is quite /mind-bogglingly unsafe/. It
-- unstitches the dependency chain that holds the IO monad together and breaks
-- all your ordinary intuitions about IO, sequencing and side effects. Avoid
-- it unless you really know what you are doing.
--
-- It is only safe for operations which are genuinely pure (not just
-- externally pure) for example reading from an immutable foreign data
-- structure. In particular, you should do no memory allocation inside an
-- 'unsafeInlinePerformIO' block. This is because an allocation is a constant
-- and is likely to be floated out and shared. More generally, any part of any
-- IO action that does not depend on a function argument is likely to be
-- floated to the top level and have its result shared.
--
-- It is more efficient because in addition to the checks that
-- 'unsafeDupablePerformIO' omits, we also inline. Additionally we do not
-- pretend that the body is lazy which allows the strictness analyser to see
-- the strictness in the body. In turn this allows some re-ordering of
-- operations and any corresponding side-effects.
--
-- With GHC it compiles to essentially no code and it exposes the body to
-- further inlining.
--
{-# INLINE unsafeInlinePerformIO #-}
unsafeInlinePerformIO :: IO a -> a
#ifdef __GLASGOW_HASKELL__
unsafeInlinePerformIO (IO m) = case m realWorld# of (# _, r #) -> r
#else
unsafeInlinePerformIO = unsafePerformIO
#endif

-- | Allocate an array which is 16-byte aligned.  Essential for SIMD instructions.
mallocForeignPtrArrayAligned :: Storable a => Int -> IO (ForeignPtr a)
mallocForeignPtrArrayAligned n = doMalloc undefined n
  where
    doMalloc :: Storable b => b -> Int -> IO (ForeignPtr b)
    doMalloc dummy size = mallocForeignPtrBytesAligned (size * sizeOf dummy)

-- | Allocate memory which is 16-byte aligned.  This is essential for SIMD
-- instructions.  We know that mallocPlainForeignPtrBytes will give word-aligned
-- memory, so we pad enough to be able to return the desired amount of memory
-- after aligning our pointer.
mallocForeignPtrBytesAligned :: Int -> IO (ForeignPtr a)
mallocForeignPtrBytesAligned n = do
    (ForeignPtr addr contents) <- mallocPlainForeignPtrBytes (n + pad)
    let (Ptr addr') = alignPtr (Ptr addr) 16
    return (ForeignPtr addr' contents)
    where pad = 16 - sizeOf (undefined :: Word)

-- | Make a new CArray with an IO action.
createCArray :: (Ix i, Storable e) => (i,i) -> (Ptr e -> IO ()) -> IO (CArray i e)
createCArray lu f = do
    fp <- mallocForeignPtrArrayAligned (rangeSize lu)
    withForeignPtr fp f
    unsafeForeignPtrToCArray fp lu

unsafeCreateCArray :: (Ix i, Storable e) => (i,i) -> (Ptr e -> IO ()) -> CArray i e
unsafeCreateCArray lu =  unsafePerformIO . createCArray lu