src/Graphics/Netpbm.hs

{-# LANGUAGE OverloadedStrings, NamedFieldPuns #-}
{-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies, MultiParamTypeClasses, TemplateHaskell #-}


-- | Parsing the netpbm image formates (PBM, PGM and PPM, both ASCII and binary) from 'ByteString's.
--
-- All netpbm image formats are implemented (P1 - P6).
--
-- To parse one of these formats, use `parsePPM`.
--
-- See also: <http://www.imagemagick.org/Usage/formats/#netpbm>
module Graphics.Netpbm (
  PPMType (..)
, PPM (..)
, PpmPixelRGB8 (..)
, PpmPixelRGB16 (..)
, PbmPixel (..)
, PgmPixel8 (..)
, PgmPixel16 (..)
, PPMHeader (..)
, PpmPixelData (..)
, pixelVectorToList
, pixelDataToIntList
, parsePPM
, PpmParseResult
-- TODO expose attoparsec functions in .Internal package
) where

import           Control.Monad
import           Control.Applicative
import           Data.Attoparsec.ByteString as A
import           Data.Attoparsec.ByteString.Char8 as A8
import           Data.Attoparsec.Binary (anyWord16be)
import           Data.Bits (testBit)
import           Data.ByteString (ByteString)
import           Data.Char (chr, ord)
import           Data.List (foldl')
import           Data.Word (Word8, Word16)
import           Foreign.Storable.Record as Store
import           Foreign.Storable (Storable (..))

import qualified Data.Vector.Unboxed as U
import           Data.Vector.Unboxed ((!))
import qualified Data.Vector.Generic
import qualified Data.Vector.Generic.Mutable as VGM

import Data.Vector.Unboxed.Deriving


-- | The netpbm image type of an image.
data PPMType = P1 -- ^ ASCII bitmap
             | P2 -- ^ ASCII greymap
             | P3 -- ^ ASCII pixmap (color)
             | P4 -- ^ binary bitmap
             | P5 -- ^ binary greymap
             | P6 -- ^ binary pixmap (color)
             deriving (Eq, Show, Enum, Ord)


-- | A PPM file with type, dimensions, and image data.
data PPM = PPM {
  ppmHeader :: PPMHeader
, ppmData   :: PpmPixelData
}

data PPMHeader = PPMHeader {
  ppmType   :: PPMType
, ppmWidth  :: Int
, ppmHeight :: Int
} deriving (Eq, Show)

instance Show PPM where
  show PPM { ppmHeader = PPMHeader { ppmType, ppmWidth, ppmHeight } } = "PPM " ++ show ppmType ++ " image " ++ dim
    where
      dim = show (ppmWidth, ppmHeight)


-- | A pixel containing three 8-bit color components, RGB.
data PpmPixelRGB8 = PpmPixelRGB8 {-# UNPACK #-} !Word8 -- Red
                                 {-# UNPACK #-} !Word8 -- Green
                                 {-# UNPACK #-} !Word8 -- Blue
                                 deriving (Eq, Show)

-- | A pixel containing three 16-bit color components, RGB.
data PpmPixelRGB16 = PpmPixelRGB16 {-# UNPACK #-} !Word16 -- Red
                                   {-# UNPACK #-} !Word16 -- Green
                                   {-# UNPACK #-} !Word16 -- Blue
                                   deriving (Eq, Show)

-- | A pixel containing black or white.
newtype PbmPixel = PbmPixel Bool -- False = black, True = white
                 deriving (Eq, Show)

-- | A pixel containing an 8-bit greyscale value.
data PgmPixel8 = PgmPixel8 {-# UNPACK #-} !Word8
                           deriving (Eq, Show)

-- | A pixel containing a 16-bit greyscale value.
data PgmPixel16 = PgmPixel16 {-# UNPACK #-} !Word16
                             deriving (Eq, Show)

-- | Image data, either 8 or 16 bits.
-- TODO rename to PNM
data PpmPixelData = PpmPixelDataRGB8 (U.Vector PpmPixelRGB8)   -- ^ For 8-bit PPMs.
                  | PpmPixelDataRGB16 (U.Vector PpmPixelRGB16) -- ^ For 16-bit PPMs.
                  | PbmPixelData (U.Vector PbmPixel)           -- ^ For 1-bit PBMs.
                  | PgmPixelData8 (U.Vector PgmPixel8)         -- ^ For 8-bit PGMs.
                  | PgmPixelData16 (U.Vector PgmPixel16)       -- ^ For 16-bit PGMs.


pixelVectorToList :: (U.Unbox a) => U.Vector a -> [a]
pixelVectorToList = U.toList


pixelDataToIntList :: PpmPixelData -> [Int]
pixelDataToIntList d = case d of
  PpmPixelDataRGB8 v  -> concat [ map fromIntegral [r, g, b] | PpmPixelRGB8 r g b  <- U.toList v ]
  PpmPixelDataRGB16 v -> concat [ map fromIntegral [r, g, b] | PpmPixelRGB16 r g b <- U.toList v ]
  PbmPixelData v      ->        [ if b then 1 else 0         | PbmPixel b          <- U.toList v ]
  PgmPixelData8 v     ->        [ fromIntegral x             | PgmPixel8 x         <- U.toList v ]
  PgmPixelData16 v    ->        [ fromIntegral x             | PgmPixel16 x        <- U.toList v ]


-- * Unbox instance for pixels

derivingUnbox "PpmPixelRGB8"
    [t| PpmPixelRGB8 -> (Word8, Word8, Word8) |]
    [| \ (PpmPixelRGB8 a b c) -> (a, b, c) |]
    [| \ (a, b, c) -> PpmPixelRGB8 a b c |]

derivingUnbox "PpmPixelRGB16"
    [t| PpmPixelRGB16 -> (Word16, Word16, Word16) |]
    [| \ (PpmPixelRGB16 a b c) -> (a, b, c) |]
    [| \ (a, b, c) -> PpmPixelRGB16 a b c |]

derivingUnbox "PbmPixel"
    [t| PbmPixel -> Bool |]
    [| \ (PbmPixel b) -> b |]
    [| \ b -> PbmPixel b |]

derivingUnbox "PgmPixel8"
    [t| PgmPixel8 -> Word8 |]
    [| \ (PgmPixel8 x) -> x |]
    [| \ x -> PgmPixel8 x |]

derivingUnbox "PgmPixel16"
    [t| PgmPixel16 -> Word16 |]
    [| \ (PgmPixel16 x) -> x |]
    [| \ x -> PgmPixel16 x |]


-- * Storable instance for pixels

storePpmPixel8 :: Store.Dictionary PpmPixelRGB8
storePpmPixel8 =
  Store.run $ liftA3 PpmPixelRGB8
    (Store.element (\(PpmPixelRGB8 x _ _) -> x))
    (Store.element (\(PpmPixelRGB8 _ y _) -> y))
    (Store.element (\(PpmPixelRGB8 _ _ z) -> z))

storePpmPixel16 :: Store.Dictionary PpmPixelRGB16
storePpmPixel16 =
  Store.run $ liftA3 PpmPixelRGB16
    (Store.element (\(PpmPixelRGB16 x _ _) -> x))
    (Store.element (\(PpmPixelRGB16 _ y _) -> y))
    (Store.element (\(PpmPixelRGB16 _ _ z) -> z))

storePbmPixel :: Store.Dictionary PbmPixel
storePbmPixel =
  Store.run $ liftA PbmPixel
    (Store.element (\(PbmPixel x) -> x))

storePgmPixel8 :: Store.Dictionary PgmPixel8
storePgmPixel8 =
  Store.run $ liftA PgmPixel8
    (Store.element (\(PgmPixel8 x) -> x))

storePgmPixel16 :: Store.Dictionary PgmPixel16
storePgmPixel16 =
  Store.run $ liftA PgmPixel16
    (Store.element (\(PgmPixel16 x) -> x))

instance Storable PpmPixelRGB8 where
  sizeOf = Store.sizeOf storePpmPixel8
  alignment = Store.alignment storePpmPixel8
  peek = Store.peek storePpmPixel8
  poke = Store.poke storePpmPixel8

instance Storable PpmPixelRGB16 where
  sizeOf = Store.sizeOf storePpmPixel16
  alignment = Store.alignment storePpmPixel16
  peek = Store.peek storePpmPixel16
  poke = Store.poke storePpmPixel16

instance Storable PbmPixel where
  sizeOf = Store.sizeOf storePbmPixel
  alignment = Store.alignment storePbmPixel
  peek = Store.peek storePbmPixel
  poke = Store.poke storePbmPixel

instance Storable PgmPixel8 where
  sizeOf = Store.sizeOf storePgmPixel8
  alignment = Store.alignment storePgmPixel8
  peek = Store.peek storePgmPixel8
  poke = Store.poke storePgmPixel8

instance Storable PgmPixel16 where
  sizeOf = Store.sizeOf storePgmPixel16
  alignment = Store.alignment storePgmPixel16
  peek = Store.peek storePgmPixel16
  poke = Store.poke storePgmPixel16


-- | Parses a netpbm magic number.
-- One of P1, P2, P3, P4, P5, P6.
magicNumberParser :: Parser PPMType
magicNumberParser = do
  magic <- choice ["P1", "P2", "P3", "P4", "P5", "P6"]
  case magic of
    "P1" -> return P1
    "P2" -> return P2
    "P3" -> return P3
    "P4" -> return P4
    "P5" -> return P5
    "P6" -> return P6
    _    -> fail $ "PPM: uknown PPM format " ++ show magic



-- Not writing this as @comments = skipMany comment@ because that would allow this parser
-- to consume no input, which makes it loop forever when stuck into something like `many`.
{-# INLINE comment #-}
comment :: Parser ByteString
comment = "#" .*> A.takeWhile isNotNewline <* endOfLine
  where
    isNotNewline w = w /= 10 && w /= 13


{-# INLINE sep #-}
sep :: Parser ()
-- At least one space, optionally with more space or comments around
sep = do skipMany comment
         singleWhitespace
         skipMany (singleWhitespace <|> void comment)


-- | Decimal, possibly with comments interleaved,
-- but starting and ending with a digit.
-- See the notes about comments.
{-# INLINE decimalC #-}
decimalC :: Parser Int
decimalC = foldl' shiftDecimalChar 0 <$> (digit `sepBy1` skipMany comment)
  where
    shiftDecimalChar a d = a * 10 + ord d - (48 :: Int)


headerParser :: Parser PPMHeader
headerParser = do
  ppmType <- magicNumberParser
  sep
  width <- decimalC
  sep
  height <- decimalC
  skipMany comment -- Don't allow whitespace here since after the next whitespace there must not be any more comments
  return $ PPMHeader ppmType width height


{-# INLINE word8max #-}
-- Parsing words not bigger than given maxval
word8max :: Word8 -> Parser Word8
word8max m = A.satisfy (<= m) <?> "pixel data must be smaller than maxval"

{-# INLINE word16max #-}
word16max :: Word16 -> Parser Word16
word16max m = do w16 <- anyWord16be
                 when (not $ w16 <= m) $ fail "pixel data must be smaller than maxval"
                 return w16


{-# INLINE isValidMaxval #-}
isValidMaxval :: Int -> Bool
isValidMaxval v = v > 0 && v < 65536


{-# INLINE singleWhitespace #-}
singleWhitespace :: Parser ()
singleWhitespace = void $ A.satisfy isSpace_w8


-- | Parses a SINGLE PPM file.
--
-- Specification: http://netpbm.sourceforge.net/doc/ppm.html
--
-- There can be multiple images in one file, each starting with
-- a "Pn" magic number.
--
-- Comments starting with '#' can only be
-- "before the whitespace character that delimits the raster"
-- (see http://netpbm.sourceforge.net/doc/pbm.html).
-- Nevertheless, I interpret that as "comments cannot be
-- inside the magic number".
--
-- See also the notes for `imagesParser`.
ppmBodyParser :: PPMHeader -> Parser PPM
ppmBodyParser header@PPMHeader { ppmWidth = width, ppmHeight = height } = do

  sep

  maxColorVal <- decimalC
  when (not $ isValidMaxval maxColorVal) $
    fail $ "PPM: invalid color maxval " ++ show maxColorVal
  skipMany comment

  singleWhitespace -- obligatory SINGLE whitespace; starting from here, comments are not allowed any more

  raster <- case maxColorVal of -- 1 or 2 bytes per pixel
    -- Parse pixel data into vector, making sure that words don't exceed maxColorVal
    m | m < 256   -> let v = word8max (fromIntegral m)
                      in PpmPixelDataRGB8  <$> U.replicateM (height * width) (PpmPixelRGB8  <$> v <*> v <*> v)
    m | otherwise -> let v = word16max (fromIntegral m)
                      in PpmPixelDataRGB16 <$> U.replicateM (height * width) (PpmPixelRGB16 <$> v <*> v <*> v)

  return $ PPM header raster


pgmBodyParser :: PPMHeader -> Parser PPM
pgmBodyParser header@PPMHeader { ppmWidth = width, ppmHeight = height } = do

  sep

  maxGreyVal <- decimalC
  when (not $ isValidMaxval maxGreyVal) $
    fail $ "PGM: invalid grey maxval " ++ show maxGreyVal
  skipMany comment

  singleWhitespace -- obligatory SINGLE whitespace; starting from here, comments are not allowed any more

  raster <- case maxGreyVal of -- 1 or 2 bytes per pixel
    -- Parse pixel data into vector, making sure that words don't exceed maxGreyVal
    m | m < 256   -> let v = word8max (fromIntegral m)
                      in PgmPixelData8  <$> U.replicateM (height * width) (PgmPixel8  <$> v)
    m | otherwise -> let v = word16max (fromIntegral m)
                      in PgmPixelData16 <$> U.replicateM (height * width) (PgmPixel16 <$> v)

  return $ PPM header raster


pbmBodyParser :: PPMHeader -> Parser PPM
pbmBodyParser header@PPMHeader { ppmWidth = width, ppmHeight = height } = do

  singleWhitespace -- obligatory SINGLE whitespace; starting from here, comments are not allowed any more

  -- From: http://netpbm.sourceforge.net/doc/pbm.html
  --   "Each row is Width bits, packed 8 to a byte, with don't care bits to fill out the last byte in the row."
  let widthBytes = (width + 7) // 8

  -- Parse pixel data first in into a Word8 vector, then translate to a Bool vector, leaving the don't-cares at the end out.
  word8Vector <- U.replicateM (height * widthBytes) anyWord8

  let bits = U.create $ do
        v <- VGM.replicate (width * height) (PbmPixel False)
        forM_ [0..height-1] $ \row ->
          forM_ [0..width-1] $ \col ->
            let i            = row * width + col
                (col8, bitN) = col /% 8
                i8           = row * widthBytes + col8
             -- We negate (see "not"), because:
             --   "1 is black, 0 is white."
             -- Also, `testBit` indexes from the right (LSB).
             in VGM.write v i (PbmPixel . not $ (word8Vector ! i8) `testBit` (7 - bitN))
        return v

  return $ PPM header (PbmPixelData bits)
  where
    (//) = quot
    (/%) = quotRem


-- | See http://netpbm.sourceforge.net/doc/pbm.html
--
-- We ignore the "No line should be longer than 70 characters" here due to "should".
pbmAsciiBodyParser :: PPMHeader -> Parser PPM
pbmAsciiBodyParser header@PPMHeader { ppmWidth = width, ppmHeight = height } = do

  singleWhitespace -- obligatory SINGLE whitespace; starting from here, comments are not allowed any more

  -- Parse pixel data into Bool vector.
  let n = height * width
  -- There must be whitespace *between* the values.
  -- There can be whitespace *before* the first value since:
  --   "White space in the raster section is ignored."
  -- Don't allow it *after* so that we can check if there is a whitespace between raster and optional junk.
  -- I use `generateM` here instead of fromList . (`sepBy` [whitespace]) because I believe it's faster.
  bits <- U.replicateM n (A.takeWhile isSpace_w8 *> asciiBit)

  -- From the spec (who the heck can even come up with this):
  --   "You can put any junk you want after the raster, if it starts with a white space character."
  -- Note that it says *can*, i.e. the junk can also be empty, so trailing whitespace is allowed.
  -- So let's eat all remaining input:
  option () (A.takeWhile1 isSpace_w8 *> takeLazyByteString *> pure ())

  -- Now we should be at the end of file.
  endOfInput <?> "there is junk after the ASCII raster that is not separated by whitespace"

  return $ PPM header (PbmPixelData bits)
  where
    asciiBit = PbmPixel <$> (anyWord8 >>= toBool)
    -- We flip True/False because "1" means black == False.
    toBool 48 = return True
    toBool 49 = return False
    toBool w  = fail $ "ASCII bit must be '0' or '1', not " ++ show (chr $ fromIntegral w)


pgmAsciiBodyParser :: PPMHeader -> Parser PPM
pgmAsciiBodyParser header@PPMHeader { ppmWidth = width, ppmHeight = height } = do

  sep

  maxGreyVal <- decimalC
  when (not $ isValidMaxval maxGreyVal) $
    fail $ "PGM: invalid grey maxval " ++ show maxGreyVal
  skipMany comment

  singleWhitespace -- obligatory SINGLE whitespace; starting from here, comments are not allowed any more

  let n = height * width

  -- TODO size-check the int by first putting it in Word64 and limiting decimal length
  raster <- case maxGreyVal of -- 1 or 2 bytes per pixel
    -- Parse pixel data into vector, making sure that words don't exceed maxGreyVal
    m | m < 256 -> PgmPixelData8  <$> U.replicateM n (A.takeWhile isSpace_w8 *> (PgmPixel8  <$> decimal))
    _           -> PgmPixelData16 <$> U.replicateM n (A.takeWhile isSpace_w8 *> (PgmPixel16 <$> decimal))

  option () (A.takeWhile1 isSpace_w8 *> takeLazyByteString *> pure ())

  -- Now we should be at the end of file.
  endOfInput <?> "there is junk after the ASCII raster that is not separated by whitespace"

  return $ PPM header raster


ppmAsciiBodyParser :: PPMHeader -> Parser PPM
ppmAsciiBodyParser header@PPMHeader { ppmWidth = width, ppmHeight = height } = do

  sep

  maxColorVal <- decimalC
  when (not $ isValidMaxval maxColorVal) $
    fail $ "PGM: invalid color maxval " ++ show maxColorVal
  skipMany comment

  singleWhitespace -- obligatory SINGLE whitespace; starting from here, comments are not allowed any more

  let n = height * width
      d8  = A.takeWhile isSpace_w8 *> decimal :: Parser Word8
      d16 = A.takeWhile isSpace_w8 *> decimal :: Parser Word16

  -- TODO size-check the int by first putting it in Word64 and limiting decimal length
  raster <- case maxColorVal of -- 1 or 2 bytes per pixel
    -- Parse pixel data into vector, making sure that words don't exceed maxColorVal
    m | m < 256 -> PpmPixelDataRGB8  <$> U.replicateM n (PpmPixelRGB8  <$> d8  <*> d8  <*> d8 )
    _           -> PpmPixelDataRGB16 <$> U.replicateM n (PpmPixelRGB16 <$> d16 <*> d16 <*> d16)

  option () (A.takeWhile1 isSpace_w8 *> takeLazyByteString *> pure ())

  -- Now we should be at the end of file.
  endOfInput <?> "there is junk after the ASCII raster that is not separated by whitespace"

  return $ PPM header raster


imageParserOfType :: Maybe PPMType -> Parser PPM
imageParserOfType mpN = do
  header@PPMHeader { ppmType } <- headerParser

  case mpN of
    Just pN | pN /= ppmType -> fail "an image in a multi-image file is not of the same type as the first image in the file"
    _                       -> return ()

  case ppmType of
    P1 -> pbmAsciiBodyParser header
    P2 -> pgmAsciiBodyParser header
    P3 -> ppmAsciiBodyParser header
    P4 -> pbmBodyParser header
    P5 -> pgmBodyParser header
    P6 -> ppmBodyParser header


imageParser :: Parser PPM
imageParser = imageParserOfType Nothing


-- | Parses a full PPM file, containing one or more images.
--
-- From the spec:
--
-- >"A PPM file consists of a sequence of one or more PPM images.
-- > There are no data, delimiters, or padding before, after, or between images."
--
-- However, you can find PPM files that have trailing whitespace, especially a '\n',
-- so we allow this.
imagesParser :: Parser [PPM]
imagesParser = do
  -- Parse the first image.
  firstImage@PPM { ppmHeader = PPMHeader { ppmType } } <- imageParser <* skipSpace

  -- Force the following images, if any, to be of the same type.
  otherImages <- many (imageParserOfType (Just ppmType) <* skipSpace)

  -- TODO Restructure so that this cannot happen. There is no point of returning [PPM] for ASCII images.
  when (ppmType `elem` [P1, P2, P3] && not (null otherImages)) $
    error "haskell-netpbm bug: ASCII formats should never contain more than one image (they treat remaining data as junk)"

  return $ firstImage:otherImages



-- | The result of a PPM parse.
--
-- See `parsePPM`.
type PpmParseResult = Either String ([PPM], Maybe ByteString)


-- | Parses a PPM file from the given 'ByteString'.
-- On failure, @Left error@ contains the error message.
-- On success, @Right (images, Maybe rest)@ contains the parsed images
-- and potentially an unparsable rest input.
parsePPM :: ByteString -> PpmParseResult
parsePPM bs = case parse imagesParser bs of
  -- The image file ByteStrings are not terminated by '\0',
  -- so Attoparsec will issue a Partial result when it
  -- parses to EOF. Passing in "" signalizes EOF.
  Partial cont -> resultToEither (cont "")
  r            -> resultToEither r
  where
    -- Assumes a Partial result has already been fed with "" (another Partial cannot happen)
    resultToEither r = case r of
      Done ""   images -> Right (images, Nothing)
      Done rest images -> Right (images, Just rest)
      Partial _        -> error "parsePPM bug: Got a partial result after end of input"
      Fail _ cs e      -> Left $ e ++ "; contexts: " ++ show cs