{- | Module : Codec.PNM.Parse Copyright : (c) Claude Heiland-Allen 2012 License : BSD3 Maintainer : claude@mathr.co.uk Stability : provisional Portability : portable Lower-level functions for parsing PNM image format headers. Most users shouldn't need to import this module directly. -} module Codec.PNM.Parse where import Prelude hiding ((++), dropWhile) import Data.ByteString.Lazy (ByteString, append, empty, singleton, uncons, pack, unpack) import Data.Word (Word8) import Data.Char (chr, ord) -- | The result of parsing. data Parse p = Wrong { parseMalformed :: ByteString } | Empty | Parse { parseResult :: p , parseRawResult :: ByteString , parseRemainder :: ByteString } deriving (Eq, Ord, Show) -- | The next character. rawChar :: ByteString -> Parse Word8 rawChar s = case uncons s of Nothing -> Empty Just (c, cs) -> Parse c (singleton c) cs -- | The next non-comment character. Comments can occur in the middle -- of what might be considered tokens. char :: ByteString -> Parse Word8 char s = case uncons s of Nothing -> Empty Just (c, cs) -- handle comments: "# ... EOL" can occur anywhere, including mid-token. | isStartComment c -> case dropWhile (not . isEndComment) rawChar s of Wrong _ -> Wrong s Empty -> Wrong s Parse () ds es -> case uncons es of Nothing -> Wrong s Just (f, fs) | isEndComment f -> case char fs of Wrong _ -> Wrong s Empty -> Wrong s Parse g gs hs -> Parse g (ds ++ singleton f ++ gs) hs | otherwise -> Wrong s -- should never happen | otherwise -> Parse c (singleton c) cs -- | Drop input while a predicate holds. dropWhile :: (Word8 -> Bool) -> (ByteString -> Parse Word8) -> ByteString -> Parse () dropWhile p get s = case get s of Wrong _ -> Wrong s Empty -> Empty Parse c cs ds | p c -> case dropWhile p get ds of Wrong _ -> Wrong s Empty -> Parse () cs ds Parse () es fs -> Parse () (cs ++ es) fs | otherwise -> Parse () empty s -- | Take input until a predicate holds. takeUntil :: (Word8 -> Bool) -> (ByteString -> Parse Word8) -> ByteString -> Parse ByteString takeUntil p get s = case get s of Wrong _ -> Wrong s Empty -> Empty Parse c cs ds | p c -> Parse empty empty s | otherwise -> case takeUntil p get ds of Wrong _ -> Wrong s Empty -> Parse (singleton c) cs ds Parse e es fs -> Parse (singleton c ++ e) (cs ++ es) fs -- | Parse a token. token :: ByteString -> Parse ByteString token s = case dropWhile isSpace char s of Wrong _ -> Wrong s Empty -> Empty Parse () cs ds -> case takeUntil isSpace char ds of Wrong _ -> Wrong s Empty -> Wrong s Parse t ts rs -> Parse t (cs ++ ts) rs -- | Parse a fixed string. string :: ByteString -> ByteString -> Parse () string match s = case token s of Empty -> Empty Parse t ts rs | t == match -> Parse () ts rs _ -> Wrong s -- | Parse a positive decimal number. number :: ByteString -> Parse Integer number s = case token s of Parse t ts rs -> case decimal t of Just n -> Parse n ts rs _ -> Wrong s _ -> Wrong s -- | Number conversion. decimal :: ByteString -> Maybe Integer decimal s = case unpack s of ts@(t:_) | all isDigit ts && t /= d0 -> case reads (map (chr . fromEnum) ts) of [(n, "")] -> Just n _ -> Nothing -- should never happen? _ -> Nothing -- | Parse a single space. oneSpace :: ByteString -> Parse () oneSpace s = case char s of Parse c cs rs | isSpace c -> Parse () cs rs _ -> Wrong s -- | Convert from a string. Crashes hard on non-ASCII input. str :: String -> ByteString str = pack . map (toEnum . ord) -- toEnum crashes on out of bounds -- | Character classes. isSpace, isDigit, isStartComment, isEndComment :: Word8 -> Bool isSpace c = c `elem` [ht, lf, vt, ff, cr, space] isDigit c = c `elem` [d0, d1, d2, d3, d4, d5, d6, d7, d8, d9] isStartComment c = c == hash isEndComment c = c == lf || c == cr -- | White space characters. ht, lf, vt, ff, cr, space :: Word8 ht = 9 lf = 10 vt = 11 ff = 12 cr = 13 space = 32 -- | Comment start character. hash :: Word8 hash = 35 -- | Decimal digit characters. d0, d1, d2, d3, d4, d5, d6, d7, d8, d9 :: Word8 d0 = 48 d1 = 49 d2 = 50 d3 = 51 d4 = 52 d5 = 53 d6 = 54 d7 = 55 d8 = 56 d9 = 57 -- | Alias for 'append'. (++) :: ByteString -> ByteString -> ByteString (++) = append