{-# LANGUAGE FlexibleContexts #-} -- | Module providing a 'fast' implementation of IDCT -- -- inverse two dimensional DCT, Chen-Wang algorithm -- (cf. IEEE ASSP-32, pp. 803-816, Aug. 1984) -- 32-bit integer arithmetic (8 bit coefficients) -- 11 mults, 29 adds per DCT -- sE, 18.8.91 -- -- coefficients extended to 12 bit for IEEE1180-1990 -- compliance sE, 2.1.94 -- -- this code assumes >> to be a two's-complement arithmetic -- right shift: (-2)>>1 == -1 , (-3)>>1 == -2 module Codec.Picture.Jpg.FastIdct( MutableMacroBlock , fastIdct , mutableLevelShift , createEmptyMutableMacroBlock ) where import qualified Data.Vector.Storable as V import qualified Data.Vector.Storable.Mutable as M import Control.Monad( forM_ ) import Control.Monad.ST( ST ) import Control.Monad.Primitive ( PrimMonad, PrimState) import Data.Bits( shiftL, shiftR ) import Data.Int( Int16 ) import Foreign.Storable ( Storable ) {- {-# INLINE iclip #-} iclip :: Int -> Int16 iclip i | fromIntegral i < (-256) = -256 | fromIntegral i > 256 = 256 | otherwise = fromIntegral i -} iclip :: V.Vector Int16 iclip = V.fromListN 1024 [ val i| i <- [(-512) .. 511] ] where val i | i < (-256) = -256 | i > 255 = 255 | otherwise = i {-# INLINE (.<<.) #-} {-# INLINE (.>>.) #-} (.<<.), (.>>.) :: Int -> Int -> Int (.<<.) = shiftL (.>>.) = shiftR data IDctStage = IDctStage { x0 :: {-# UNPACK #-} !Int, x1 :: {-# UNPACK #-} !Int, x2 :: {-# UNPACK #-} !Int, x3 :: {-# UNPACK #-} !Int, x4 :: {-# UNPACK #-} !Int, x5 :: {-# UNPACK #-} !Int, x6 :: {-# UNPACK #-} !Int, x7 :: {-# UNPACK #-} !Int, x8 :: {-# UNPACK #-} !Int } w1, w2, w3, w5, w6, w7 :: Int w1 = 2841 -- 2048*sqrt(2)*cos(1*pi/16) w2 = 2676 -- 2048*sqrt(2)*cos(2*pi/16) w3 = 2408 -- 2048*sqrt(2)*cos(3*pi/16) w5 = 1609 -- 2048*sqrt(2)*cos(5*pi/16) w6 = 1108 -- 2048*sqrt(2)*cos(6*pi/16) w7 = 565 -- 2048*sqrt(2)*cos(7*pi/16) {-# INLINE (!!!) #-} (!!!) :: (Storable e) => V.Vector e -> Int -> e (!!!) a i = V.unsafeIndex a (i + 512) {-# INLINE (.!!!.) #-} (.!!!.) :: (PrimMonad m, Storable a) => M.STVector (PrimState m) a -> Int -> m a (.!!!.) = M.unsafeRead {-# INLINE (.<-.) #-} (.<-.) :: (PrimMonad m, Storable a) => M.STVector (PrimState m) a -> Int -> a -> m () (.<-.) = M.unsafeWrite -- | Macroblock that can be transformed. type MutableMacroBlock s a = M.STVector s a {-# INLINE createEmptyMutableMacroBlock #-} -- | Create a new macroblock with the good array size createEmptyMutableMacroBlock :: ST s (MutableMacroBlock s Int16) createEmptyMutableMacroBlock = M.replicate 64 0 -- row (horizontal) IDCT -- -- 7 pi 1 -- dst[k] = sum c[l] * src[l] * cos( -- * ( k + - ) * l ) -- l=0 8 2 -- -- where: c[0] = 128 -- c[1..7] = 128*sqrt(2) idctRow :: MutableMacroBlock s Int16 -> Int -> ST s () idctRow blk idx = do xx0 <- blk .!!!. (0 + idx) xx1 <- blk .!!!. (4 + idx) xx2 <- blk .!!!. (6 + idx) xx3 <- blk .!!!. (2 + idx) xx4 <- blk .!!!. (1 + idx) xx5 <- blk .!!!. (7 + idx) xx6 <- blk .!!!. (5 + idx) xx7 <- blk .!!!. (3 + idx) let initialState = IDctStage { x0 = (fromIntegral xx0 .<<. 11) + 128 , x1 = fromIntegral xx1 .<<. 11 , x2 = fromIntegral xx2 , x3 = fromIntegral xx3 , x4 = fromIntegral xx4 , x5 = fromIntegral xx5 , x6 = fromIntegral xx6 , x7 = fromIntegral xx7 , x8 = 0 } firstStage c = c { x4 = x8' + (w1 - w7) * x4 c , x5 = x8' - (w1 + w7) * x5 c , x6 = x8'' - (w3 - w5) * x6 c , x7 = x8'' - (w3 + w5) * x7 c , x8 = x8'' } where x8' = w7 * (x4 c + x5 c) x8'' = w3 * (x6 c + x7 c) secondStage c = c { x0 = x0 c - x1 c , x8 = x0 c + x1 c , x1 = x1'' , x2 = x1' - (w2 + w6) * x2 c , x3 = x1' + (w2 - w6) * x3 c , x4 = x4 c - x6 c , x6 = x5 c + x7 c , x5 = x5 c - x7 c } where x1' = w6 * (x3 c + x2 c) x1'' = x4 c + x6 c thirdStage c = c { x7 = x8 c + x3 c , x8 = x8 c - x3 c , x3 = x0 c + x2 c , x0 = x0 c - x2 c , x2 = (181 * (x4 c + x5 c) + 128) .>>. 8 , x4 = (181 * (x4 c - x5 c) + 128) .>>. 8 } scaled c = c { x0 = (x7 c + x1 c) .>>. 8 , x1 = (x3 c + x2 c) .>>. 8 , x2 = (x0 c + x4 c) .>>. 8 , x3 = (x8 c + x6 c) .>>. 8 , x4 = (x8 c - x6 c) .>>. 8 , x5 = (x0 c - x4 c) .>>. 8 , x6 = (x3 c - x2 c) .>>. 8 , x7 = (x7 c - x1 c) .>>. 8 } transformed = scaled . thirdStage . secondStage $ firstStage initialState (blk .<-. (0 + idx)) . fromIntegral $ x0 transformed (blk .<-. (1 + idx)) . fromIntegral $ x1 transformed (blk .<-. (2 + idx)) . fromIntegral $ x2 transformed (blk .<-. (3 + idx)) . fromIntegral $ x3 transformed (blk .<-. (4 + idx)) . fromIntegral $ x4 transformed (blk .<-. (5 + idx)) . fromIntegral $ x5 transformed (blk .<-. (6 + idx)) . fromIntegral $ x6 transformed (blk .<-. (7 + idx)) . fromIntegral $ x7 transformed -- column (vertical) IDCT -- -- 7 pi 1 -- dst[8*k] = sum c[l] * src[8*l] * cos( -- * ( k + - ) * l ) -- l=0 8 2 -- -- where: c[0] = 1/1024 -- c[1..7] = (1/1024)*sqrt(2) -- idctCol :: MutableMacroBlock s Int16 -> Int -> ST s () idctCol blk idx = do xx0 <- blk .!!!. ( 0 + idx) xx1 <- blk .!!!. (8 * 4 + idx) xx2 <- blk .!!!. (8 * 6 + idx) xx3 <- blk .!!!. (8 * 2 + idx) xx4 <- blk .!!!. (8 + idx) xx5 <- blk .!!!. (8 * 7 + idx) xx6 <- blk .!!!. (8 * 5 + idx) xx7 <- blk .!!!. (8 * 3 + idx) let initialState = IDctStage { x0 = (fromIntegral xx0 .<<. 8) + 8192 , x1 = fromIntegral xx1 .<<. 8 , x2 = fromIntegral xx2 , x3 = fromIntegral xx3 , x4 = fromIntegral xx4 , x5 = fromIntegral xx5 , x6 = fromIntegral xx6 , x7 = fromIntegral xx7 , x8 = 0 } firstStage c = c { x4 = (x8' + (w1 - w7) * x4 c) .>>. 3 , x5 = (x8' - (w1 + w7) * x5 c) .>>. 3 , x6 = (x8'' - (w3 - w5) * x6 c) .>>. 3 , x7 = (x8'' - (w3 + w5) * x7 c) .>>. 3 , x8 = x8'' } where x8' = w7 * (x4 c + x5 c) + 4 x8'' = w3 * (x6 c + x7 c) + 4 secondStage c = c { x8 = x0 c + x1 c , x0 = x0 c - x1 c , x2 = (x1' - (w2 + w6) * x2 c) .>>. 3 , x3 = (x1' + (w2 - w6) * x3 c) .>>. 3 , x4 = x4 c - x6 c , x1 = x1'' , x6 = x5 c + x7 c , x5 = x5 c - x7 c } where x1' = w6 * (x3 c + x2 c) + 4 x1'' = x4 c + x6 c thirdStage c = c { x7 = x8 c + x3 c , x8 = x8 c - x3 c , x3 = x0 c + x2 c , x0 = x0 c - x2 c , x2 = (181 * (x4 c + x5 c) + 128) .>>. 8 , x4 = (181 * (x4 c - x5 c) + 128) .>>. 8 } f = thirdStage . secondStage $ firstStage initialState (blk .<-. (idx + 8*0)) $ iclip !!! ((x7 f + x1 f) .>>. 14) (blk .<-. (idx + 8 )) $ iclip !!! ((x3 f + x2 f) .>>. 14) (blk .<-. (idx + 8*2)) $ iclip !!! ((x0 f + x4 f) .>>. 14) (blk .<-. (idx + 8*3)) $ iclip !!! ((x8 f + x6 f) .>>. 14) (blk .<-. (idx + 8*4)) $ iclip !!! ((x8 f - x6 f) .>>. 14) (blk .<-. (idx + 8*5)) $ iclip !!! ((x0 f - x4 f) .>>. 14) (blk .<-. (idx + 8*6)) $ iclip !!! ((x3 f - x2 f) .>>. 14) (blk .<-. (idx + 8*7)) $ iclip !!! ((x7 f - x1 f) .>>. 14) {-# INLINE fastIdct #-} -- | Algorithm to call to perform an IDCT, return the same -- block that the one given as input. fastIdct :: MutableMacroBlock s Int16 -> ST s (MutableMacroBlock s Int16) fastIdct block = do forM_ [0..7] (\i -> idctRow block (8 * i)) forM_ [0..7] (idctCol block) return block {-# INLINE mutableLevelShift #-} -- | Perform a Jpeg level shift in a mutable fashion. mutableLevelShift :: MutableMacroBlock s Int16 -> ST s (MutableMacroBlock s Int16) mutableLevelShift block = do forM_ [0..63] (\i -> do v <- block .!!!. i (block .<-. i) $ v + 128) return block