{-# LANGUAGE OverloadedStrings, NamedFieldPuns #-} {-# LANGUAGE GeneralizedNewtypeDeriving, TypeFamilies, MultiParamTypeClasses, TemplateHaskell #-} -- | Parsing the netpbm image formates (PBM, PGM and PPM, both ASCII and binary) from 'ByteString's. -- -- To parse one of these formats, use `parsePPM`. -- -- Currently, only P6 images are implemented. -- Implementing the other types should be straighforward. module Graphics.Netpbm ( PPMType (..) , PPM (..) , PpmPixelRGB8 , PpmPixelRGB16 , parsePPM , PpmParseResult -- TODO expose attoparsec functions in .Internal package ) where import Control.Monad import Control.Applicative import Data.Attoparsec.ByteString as A import Data.Attoparsec.ByteString.Char8 as A8 import Data.Attoparsec.Binary (anyWord16be) import Data.ByteString (ByteString) import Data.Char (ord) import Data.List (foldl') import Data.Word (Word8, Word16) import qualified Data.Vector.Unboxed as U import qualified Data.Vector.Generic import qualified Data.Vector.Generic.Mutable import Data.Vector.Unboxed.Deriving -- | The netpbm image type of an image. data PPMType = P1 -- ^ ASCII bitmap | P2 -- ^ ASCII greymap | P3 -- ^ ASCII pixmap (color) | P4 -- ^ binary bitmap | P5 -- ^ binary greymap | P6 -- ^ binary pixmap (color) deriving (Eq, Show, Enum, Ord) -- | A PPM file with type, dimensions, and image data. data PPM = PPM { ppmType :: PPMType , ppmWidth :: {-# UNPACK #-} !Int , ppmHeight :: {-# UNPACK #-} !Int , ppmData :: PpmPixelData } instance Show PPM where show PPM { ppmType, ppmWidth, ppmHeight } = "PPM " ++ show ppmType ++ " image " ++ dim where dim = show (ppmWidth, ppmHeight) -- | A pixel containing three 8-bit color components, RGB. data PpmPixelRGB8 = PpmPixelRGB8 {-# UNPACK #-} !Word8 -- Red {-# UNPACK #-} !Word8 -- Green {-# UNPACK #-} !Word8 -- Blue deriving (Eq, Show) -- | A pixel containing three 16-bit color components, RGB. data PpmPixelRGB16 = PpmPixelRGB16 {-# UNPACK #-} !Word16 -- Red {-# UNPACK #-} !Word16 -- Green {-# UNPACK #-} !Word16 -- Blue deriving (Eq, Show) -- | Image data, either 8 or 16 bits. data PpmPixelData = PpmPixelDataRGB8 (U.Vector PpmPixelRGB8) -- ^ For 8-bit PPMs. | PpmPixelDataRGB16 (U.Vector PpmPixelRGB16) -- ^ For 16-bit PPMs. derivingUnbox "PpmPixelRGB8" [t| PpmPixelRGB8 -> (Word8, Word8, Word8) |] [| \ (PpmPixelRGB8 a b c) -> (a, b, c) |] [| \ (a, b, c) -> PpmPixelRGB8 a b c |] derivingUnbox "PpmPixelRGB16" [t| PpmPixelRGB16 -> (Word16, Word16, Word16) |] [| \ (PpmPixelRGB16 a b c) -> (a, b, c) |] [| \ (a, b, c) -> PpmPixelRGB16 a b c |] -- | Parses a netpbm magic number. -- One of P1, P2, P3, P4, P5, P6. magicNumberParser :: Parser PPMType magicNumberParser = do magic <- choice ["P1", "P2", "P3", "P4", "P5", "P6"] case magic of "P1" -> return P1 "P2" -> return P2 "P3" -> return P3 "P4" -> return P4 "P5" -> return P5 "P6" -> return P6 _ -> fail $ "PPM: uknown PPM format " ++ show magic -- | Parses a SINGLE PPM file. -- -- Specification: http://netpbm.sourceforge.net/doc/ppm.html -- -- There can be multiple images in one file, each starting with -- a "Pn" magic number. -- -- Comments starting with '#' can only be -- "before the whitespace character that delimits the raster" -- (see http://netpbm.sourceforge.net/doc/pbm.html). -- Nevertheless, I interpret that as "comments cannot be -- inside the magic number". -- -- See also the notes for `imagesParser`. ppmParser :: Parser PPM ppmParser = do ppmType <- magicNumberParser -- TODO Implement the other netpbm image types when (ppmType /= P6) $ error "haskell-netpbm currently only supports PPM P6" comments skipSpace comments width <- decimalC skipSpace comments height <- decimalC skipSpace comments maxColorVal <- decimalC when (not $ isValidColorVal maxColorVal) $ fail $ "PPM: invalid color maxval " ++ show maxColorVal comments _ <- A8.satisfy isSpace -- obligatory SINGLE whitespace -- Starting from here, comments are not allowed any more raster <- if maxColorVal < 256 -- 1 or 2 bytes per pixel then PpmPixelDataRGB8 <$> (U.replicateM (height * width) $ PpmPixelRGB8 <$> anyWord8 <*> anyWord8 <*> anyWord8) else PpmPixelDataRGB16 <$> (U.replicateM (height * width) $ PpmPixelRGB16 <$> anyWord16be <*> anyWord16be <*> anyWord16be) return $ PPM ppmType width height raster where isValidColorVal v = v > 0 && v < 65536 comments = void $ many comment comment = "#" .*> A.takeWhile isNotNewline <* endOfLine isNotNewline w = w /= 10 && w /= 13 -- Decimal, possibly with comments interleaved, -- but starting with a digit. -- See the notes about comments above. decimalC :: Parser Int decimalC = foldl' shiftDecimalChar 0 <$> many1' (digit <* comments) shiftDecimalChar a d = a * 10 + ord d - (48 :: Int) -- | Parses a full PPM file, containing one or more images. -- -- "A PPM file consists of a sequence of one or more PPM images." -- We allow trailing whitespace after images, which is AGAINST THE SPEC: -- -- >"A PPM file consists of a sequence of one or more PPM images. -- > There are no data, delimiters, or padding before, after, or between images." -- -- However, you can find PPM files that have trailing whitespace, especially a '\n'. imagesParser :: Parser [PPM] imagesParser = many1 (ppmParser <* skipSpace) -- | The result of a PPM parse. -- -- See `parsePPM`. type PpmParseResult = Either String ([PPM], Maybe ByteString) -- | Parses a PPM file from the given 'ByteString'. -- On failure, @Left error@ contains the error message. -- On success, @Right (images, Maybe rest)@ contains the parsed images -- and potentially an unparsable rest input. parsePPM :: ByteString -> PpmParseResult parsePPM bs = case parse imagesParser bs of Done "" images -> Right (images, Nothing) Done rest images -> Right (images, Just rest) Fail _ _ e -> Left e -- The image file ByteStrings are not terminated by '\0', -- so Attoparsec will issue a Partial result when it -- parses to EOF. Passing in "" signalizes EOF. Partial cont -> case cont "" of Done "" images -> Right (images, Nothing) Done rest images -> Right (images, Just rest) Partial _ -> error "parsePPM bug: Got a partial result after end of input"