Codec/PNM/Parse.hs

{- |
Module      :  Codec.PNM.Parse
Copyright   :  (c) Claude Heiland-Allen 2012
License     :  BSD3

Maintainer  :  claude@mathr.co.uk
Stability   :  provisional
Portability :  portable

Lower-level functions for parsing PNM image format headers.  Most users
shouldn't need to import this module directly.

-}
module Codec.PNM.Parse where

import Prelude hiding ((++), dropWhile)
import Data.ByteString.Lazy (ByteString, append, empty, singleton, uncons, pack, unpack)
import Data.Word (Word8)
import Data.Char (chr, ord)

-- |  The result of parsing.
data Parse p
  = Wrong
      { parseMalformed :: ByteString
      }
  | Empty
  | Parse
      { parseResult :: p
      , parseRawResult :: ByteString
      , parseRemainder :: ByteString
      }
  deriving (Eq, Ord, Show)

-- |  The next character.
rawChar :: ByteString -> Parse Word8
rawChar s = case uncons s of
  Nothing -> Empty
  Just (c, cs) -> Parse c (singleton c) cs

-- |  The next non-comment character.  Comments can occur in the middle
--    of what might be considered tokens.
char :: ByteString -> Parse Word8
char s = case uncons s of
  Nothing -> Empty
  Just (c, cs)
    -- handle comments: "# ... EOL" can occur anywhere, including mid-token.
    | isStartComment c -> case dropWhile (not . isEndComment) rawChar s of
        Wrong _ -> Wrong s
        Empty -> Wrong s
        Parse () ds es -> case uncons es of
            Nothing -> Wrong s
            Just (f, fs)
              | isEndComment f -> case char fs of
                  Wrong _ -> Wrong s
                  Empty -> Wrong s
                  Parse g gs hs -> Parse g (ds ++ singleton f ++ gs) hs
              | otherwise -> Wrong s  -- should never happen
    | otherwise -> Parse c (singleton c) cs

-- |  Drop input while a predicate holds.
dropWhile :: (Word8 -> Bool) -> (ByteString -> Parse Word8) -> ByteString -> Parse ()
dropWhile p get s = case get s of
  Wrong _ -> Wrong s
  Empty -> Empty
  Parse c cs ds
    | p c -> case dropWhile p get ds of
        Wrong _ -> Wrong s
        Empty -> Parse () cs ds
        Parse () es fs -> Parse () (cs ++ es) fs
    | otherwise -> Parse () empty s

-- |  Take input until a predicate holds.
takeUntil :: (Word8 -> Bool) -> (ByteString -> Parse Word8) -> ByteString -> Parse ByteString
takeUntil p get s = case get s of
  Wrong _ -> Wrong s
  Empty -> Empty
  Parse c cs ds
    | p c -> Parse empty empty s
    | otherwise -> case takeUntil p get ds of
        Wrong _ -> Wrong s
        Empty -> Parse (singleton c) cs ds
        Parse e es fs -> Parse (singleton c ++ e) (cs ++ es) fs

-- |  Parse a token.
token :: ByteString -> Parse ByteString
token s = case dropWhile isSpace char s of
  Wrong _ -> Wrong s
  Empty -> Empty
  Parse () cs ds -> case takeUntil isSpace char ds of
    Wrong _ -> Wrong s
    Empty -> Wrong s
    Parse t ts rs -> Parse t (cs ++ ts) rs

-- |  Parse a fixed string.
string :: ByteString -> ByteString -> Parse ()
string match s = case token s of
  Empty -> Empty
  Parse t ts rs | t == match -> Parse () ts rs
  _ -> Wrong s

-- |  Parse a positive decimal number.
number :: ByteString -> Parse Integer
number s = case token s of
  Parse t ts rs -> case decimal t of
    Just n -> Parse n ts rs
    _ -> Wrong s
  _ -> Wrong s

-- |  Number conversion.
decimal :: ByteString -> Maybe Integer
decimal s = case unpack s of
  ts@(t:_)
    | all isDigit ts && t /= d0 -> case reads (map (chr . fromEnum) ts) of
        [(n, "")] -> Just n
        _ -> Nothing  -- should never happen?
  _ -> Nothing

-- |  Parse a single space.
oneSpace :: ByteString -> Parse ()
oneSpace s = case char s of
  Parse c cs rs | isSpace c -> Parse () cs rs
  _ -> Wrong s

-- |  Convert from a string.  Crashes hard on non-ASCII input.
str :: String -> ByteString
str = pack . map (toEnum . ord) -- toEnum crashes on out of bounds

-- | Character classes.
isSpace, isDigit, isStartComment, isEndComment :: Word8 -> Bool
isSpace c = c `elem` [ht, lf, vt, ff, cr, space]
isDigit c = c `elem` [d0, d1, d2, d3, d4, d5, d6, d7, d8, d9]
isStartComment c = c == hash
isEndComment c = c == lf || c == cr

-- |  White space characters.
ht, lf, vt, ff, cr, space :: Word8
ht    =  9
lf    = 10
vt    = 11
ff    = 12
cr    = 13
space = 32

-- |  Comment start character.
hash :: Word8
hash  = 35

-- |  Decimal digit characters.
d0, d1, d2, d3, d4, d5, d6, d7, d8, d9 :: Word8
d0 = 48
d1 = 49
d2 = 50
d3 = 51
d4 = 52
d5 = 53
d6 = 54
d7 = 55
d8 = 56
d9 = 57

-- |  Alias for 'append'.
(++) :: ByteString -> ByteString -> ByteString
(++) = append