src/Text/ParserCombinators/ReadP/ByteString.hs

{-# OPTIONS_GHC -fglasgow-exts -cpp -fno-implicit-prelude #-}
-----------------------------------------------------------------------------
-- |
-- Module      :  Text.ParserCombinators.ReadP.ByteString
-- Copyright   :  (c) The University of Glasgow 2002
--             :  (c) Gracjan Polak 2007
-- License     :  BSD-style (see the file libraries/base/LICENSE)
--
-- Maintainer  :  gracjanpolak@gmail.com
-- Stability   :  provisional
-- Portability :  non-portable (local universal quantification)
--
-- This is a library of parser combinators, originally written by Koen Claessen.
-- It parses all alternatives in parallel, so it never keeps hold of
-- the beginning of the input string, a common source of space leaks with
-- other parsers.  The '('+++')' choice combinator is genuinely commutative;
-- it makes no difference which branch is \"shorter\".
--
-- Adapted to use 'Data.ByteString' by Gracjan Polak. Designed as a drop-in
-- replacement for 'Text.ParserCombinators.ReadP'.
--
-----------------------------------------------------------------------------

module Text.ParserCombinators.ReadP.ByteString
  (
  -- * The 'ReadP' type
  ReadP,      -- :: * -> *; instance Functor, Monad, MonadPlus

  -- * Primitive operations
  skip,       -- :: ReadP Word8
  look,       -- :: ReadP ByteString
  (+++),      -- :: ReadP a -> ReadP a -> ReadP a
  (<++),      -- :: ReadP a -> ReadP a -> ReadP a
  countsym,   -- :: ReadP a -> ReadP (Int, a)

  -- * Other operations
  get,        -- :: ReadP Word8
  pfail,      -- :: ReadP a
  satisfy,    -- :: (Word8 -> Bool) -> ReadP Word8
  char,       -- :: Word8 -> ReadP Word8
  string,     -- :: ByteString -> ReadP ByteString
  gather,     -- :: ReadP a -> ReadP (ByteString, a)
  munch,      -- :: (Word8 -> Bool) -> ReadP ByteString
  munch1,     -- :: (Word8 -> Bool) -> ReadP ByteString
  skipSpaces, -- :: ReadP ()
  choice,     -- :: [ReadP a] -> ReadP a
  count,      -- :: Int -> ReadP a -> ReadP [a]
  between,    -- :: ReadP open -> ReadP close -> ReadP a -> ReadP a
  option,     -- :: a -> ReadP a -> ReadP a
  optional,   -- :: ReadP a -> ReadP ()
  many,       -- :: ReadP a -> ReadP [a]
  many1,      -- :: ReadP a -> ReadP [a]
  skipMany,   -- :: ReadP a -> ReadP ()
  skipMany1,  -- :: ReadP a -> ReadP ()
  sepBy,      -- :: ReadP a -> ReadP sep -> ReadP [a]
  sepBy1,     -- :: ReadP a -> ReadP sep -> ReadP [a]
  endBy,      -- :: ReadP a -> ReadP sep -> ReadP [a]
  endBy1,     -- :: ReadP a -> ReadP sep -> ReadP [a]
  chainr,     -- :: ReadP a -> ReadP (a -> a -> a) -> a -> ReadP a
  chainl,     -- :: ReadP a -> ReadP (a -> a -> a) -> a -> ReadP a
  chainl1,    -- :: ReadP a -> ReadP (a -> a -> a) -> ReadP a
  chainr1,    -- :: ReadP a -> ReadP (a -> a -> a) -> ReadP a
  manyTill,   -- :: ReadP a -> ReadP end -> ReadP [a]

  -- * Running a parser
  ReadS,      -- :: *; = ByteString -> [(a,ByteString)]
  readP_to_S, -- :: ReadP a -> ReadS a
  readS_to_P, -- :: ReadS a -> ReadP a
  )
 where

import Control.Monad( MonadPlus(..), sequence, liftM2, Monad, (>>), (>>=), return, fail, sequence_ )
import Prelude ((+),fromInteger,(++),Int,Bool(..),(==),error,seq,id,fromIntegral, (-))

import Data.Word
import Data.ByteString hiding (count,foldl')

#ifndef __HADDOCK__
#if defined(__GLASGOW_HASKELL__) && __GLASGOW_HASKELL__<608
import Data.ByteString.Base
#else
import Data.ByteString.Unsafe
#endif
#endif

import Control.Monad ( replicateM )
import Data.List (foldl')
import GHC.Base
import GHC.Exts
import Data.Char ( isSpace )

infixr 5 +++, <++

------------------------------------------------------------------------
-- ReadS

-- | A parser for a type @a@, represented as a function that takes a
-- 'ByteString' and returns a list of possible parses as @(a,'ByteString')@ pairs.
--
-- Note that this kind of backtracking parser is very inefficient;
-- reading a large structure may be quite slow (cf 'ReadP').
type ReadS a = ByteString -> [(a,ByteString)]

-- ---------------------------------------------------------------------------
-- The P type
-- is representation type -- should be kept abstract

data P a
  = Skip {-# UNPACK #-} !Int (P a)
  | Look (ByteString -> P a)
  | Fail
  | Result a (P a)
  | Final [(a,ByteString)] -- invariant: list is non-empty!

-- Monad, MonadPlus

instance Monad (P) where
  return x = Result x Fail

  (Skip n f)   >>= k = Skip n (f >>= k)
  (Look f)     >>= k = Look (\s -> f s >>= k)
  Fail         >>= k = Fail
  (Result x p) >>= k = k x `mplus` (p >>= k)
  (Final r)    >>= k = final [ys' | (x,s) <- r, ys' <- run (k x) s]

  fail _ = Fail

instance MonadPlus (P) where
  mzero = Fail

  -- most common case: two skips are combined
  Skip n1 f1 `mplus` Skip n2 f2 =
      case compare n1 n2 of
          LT -> Skip n1 (f1 `mplus` Skip (n2-n1) f2)
          EQ -> Skip n1 (f1 `mplus` f2)
          GT -> Skip n2 (Skip (n1-n2) f1 `mplus` f2)

  -- results are delivered as soon as possible
  Result x p `mplus` q          = Result x (p `mplus` q)
  p          `mplus` Result x q = Result x (p `mplus` q)

  -- fail disappears
  Fail       `mplus` p          = p
  p          `mplus` Fail       = p

  -- two finals are combined
  -- final + look becomes one look and one final (=optimization)
  -- final + sthg else becomes one look and one final
  Final r    `mplus` Final t    = Final (r ++ t)
  Final r    `mplus` Look f     = Look (\s -> Final (r ++ run (f s) s))
  Final r    `mplus` p          = Look (\s -> Final (r ++ run p s))
  Look f     `mplus` Final r    = Look (\s -> Final (run (f s) s ++ r))
  p          `mplus` Final r    = Look (\s -> Final (run p s ++ r))

  -- two looks are combined (=optimization)
  -- look + sthg else floats upwards
  Look f     `mplus` Look g     = Look (\s -> f s `mplus` g s)
  Look f     `mplus` p          = Look (\s -> f s `mplus` p)
  p          `mplus` Look f     = Look (\s -> p `mplus` f s)

-- ---------------------------------------------------------------------------
-- The ReadP type

newtype ReadP a = R (forall b . (a -> P b) -> P b)

-- Functor, Monad, MonadPlus

instance Functor (ReadP) where
  fmap h (R f) = R (\k -> f (k . h))

instance Monad (ReadP) where
  return x  = R (\k -> k x)
  fail _    = R (\_ -> Fail)
  R m >>= f = R (\k -> m (\a -> let R m' = f a in m' k))

instance MonadPlus (ReadP) where
  mzero = pfail
  mplus = (+++)

-- ---------------------------------------------------------------------------
-- Operations over P

final :: [(a,ByteString)] -> P a
-- Maintains invariant for Final constructor
final [] = Fail
final r  = Final r

run :: P a -> ReadS a
run (Skip n f)  cs | length cs >=n =
         run f (unsafeDrop n cs)
run (Look f)     s     = run (f s) s
run (Result x p) s     = (x,s) : run p s
run (Final r)    _     = r
run _            _     = []

-- ---------------------------------------------------------------------------
-- Operations over ReadP


skip :: Int -> ReadP ()
skip 0 = R (\f -> f ())
skip n = R (\f -> Skip n (f ()))

get :: ReadP Word8
-- ^ Consumes and returns the next character.
--   Fails if there is no input left.
get = do
    s <- look
    skip 1
    return (unsafeHead s)

look :: ReadP ByteString
-- ^ Look-ahead: returns the part of the input that is left, without
--   consuming it.
look = R Look

pfail :: ReadP a
-- ^ Always fails.
pfail = R (\_ -> Fail)

(+++) :: ReadP a -> ReadP a -> ReadP a
-- ^ Symmetric choice.
R f1 +++ R f2 = R (\k -> f1 k `mplus` f2 k)

(<++) :: ReadP a -> ReadP a -> ReadP a
-- ^ Local, exclusive, left-biased choice: If left parser
--   locally produces any result at all, then right parser is
--   not used.
R f <++ q =
  do s <- look
     probe (f return) s 0
 where
  probe (Skip m f)     cs    n  | length cs >= m = probe f (unsafeDrop m cs) (n+m)
  probe (Look f)       s     n = probe (f s) s n
  probe p@(Result _ _) _     n = skip n >> R (p >>=)
  probe (Final r)      _     _ = R (Final r >>=)
  probe _              _     _ = q

gather :: ReadP a -> ReadP (ByteString, a)
-- ^ Transforms a parser into one that does the same, but
--   in addition returns the exact characters read.
--   IMPORTANT NOTE: 'gather' gives a runtime error if its first argument
--   is built using any occurrences of readS_to_P.
gather p = do
    s <- look
    (l,r) <- countsym p
    return (unsafeDrop l s,r)

countsym :: ReadP a -> ReadP (Int, a)
-- ^ Transforms a parser into one that does the same, but
--   in addition returns the exact number of characters read.
--   IMPORTANT NOTE: 'countsym' gives a runtime error if its first argument
--   is built using any occurrences of readS_to_P.
countsym (R m) =
  R (\k -> gath 0 (m (\a -> return (\s -> k (s,a)))))
 where
  gath 0 _   | False  = Fail
  gath l (Skip n f)   = Skip n (gath (l+n) f)
  gath l Fail         = Fail
  gath l (Look f)     = Look (\s -> gath l (f s))
  gath l (Result k p) = k (l) `mplus` gath l p
  gath l (Final r)    = error "do not use readS_to_P in gather or countsym!"

-- ---------------------------------------------------------------------------
-- Derived operations

satisfy :: (Word8 -> Bool) -> ReadP Word8
-- ^ Consumes and returns the next character, if it satisfies the
--   specified predicate.
satisfy p = do
    c <- get
    if p c
        then return c
        else pfail

char :: Word8 -> ReadP Word8
-- ^ Parses and returns the specified character.
char c = satisfy (c ==)

string :: ByteString -> ReadP ByteString
-- ^ Parses and returns the specified string.
string this = do
    s <- look
    let l = length this
    let w = take l s
    if this == w
        then skip (length this) >> return this
        else pfail

munch :: (Word8 -> Bool) -> ReadP ByteString
-- ^ Parses the first zero or more characters satisfying the predicate.
munch p =
  do s <- look
     let k = takeWhile p s
     skip (length k)
     return k

munch1 :: (Word8 -> Bool) -> ReadP ByteString
-- ^ Parses the first one or more characters satisfying the predicate.
munch1 p =
  do s <- look
     let k = takeWhile p s
     if null k
         then pfail
         else skip (length k) >> return k

choice :: [ReadP a] -> ReadP a
-- ^ Combines all parsers in the specified list.
choice []     = pfail
choice [p]    = p
choice (p:ps) = p +++ choice ps

skipSpaces :: ReadP ()
-- ^ Skips all whitespace.
skipSpaces = munch (isSpace . chr . fromIntegral) >> return ()

count :: Int -> ReadP a -> ReadP [a]
-- ^ @count n p@ parses @n@ occurrences of @p@ in sequence. A list of
--   results is returned.
count n p = replicateM n p

between :: ReadP open -> ReadP close -> ReadP a -> ReadP a
-- ^ @between open close p@ parses @open@, followed by @p@ and finally
--   @close@. Only the value of @p@ is returned.
between open close p = do open
                          x <- p
                          close
                          return x

option :: a -> ReadP a -> ReadP a
-- ^ @option x p@ will either parse @p@ or return @x@ without consuming
--   any input.
option x p = p +++ return x

optional :: ReadP a -> ReadP ()
-- ^ @optional p@ optionally parses @p@ and always returns @()@.
optional p = (p >> return ()) +++ return ()

many :: ReadP a -> ReadP [a]
-- ^ Parses zero or more occurrences of the given parser.
many p = return [] +++ many1 p

many1 :: ReadP a -> ReadP [a]
-- ^ Parses one or more occurrences of the given parser.
many1 p = liftM2 (:) p (many p)

skipMany :: ReadP a -> ReadP ()
-- ^ Like 'many', but discards the result.
skipMany p = many p >> return ()

skipMany1 :: ReadP a -> ReadP ()
-- ^ Like 'many1', but discards the result.
skipMany1 p = p >> skipMany p

sepBy :: ReadP a -> ReadP sep -> ReadP [a]
-- ^ @sepBy p sep@ parses zero or more occurrences of @p@, separated by @sep@.
--   Returns a list of values returned by @p@.
sepBy p sep = sepBy1 p sep +++ return []

sepBy1 :: ReadP a -> ReadP sep -> ReadP [a]
-- ^ @sepBy1 p sep@ parses one or more occurrences of @p@, separated by @sep@.
--   Returns a list of values returned by @p@.
sepBy1 p sep = liftM2 (:) p (many (sep >> p))

endBy :: ReadP a -> ReadP sep -> ReadP [a]
-- ^ @endBy p sep@ parses zero or more occurrences of @p@, separated and ended
--   by @sep@.
endBy p sep = many (do x <- p ; sep ; return x)

endBy1 :: ReadP a -> ReadP sep -> ReadP [a]
-- ^ @endBy p sep@ parses one or more occurrences of @p@, separated and ended
--   by @sep@.
endBy1 p sep = many1 (do x <- p ; sep ; return x)

chainr :: ReadP a -> ReadP (a -> a -> a) -> a -> ReadP a
-- ^ @chainr p op x@ parses zero or more occurrences of @p@, separated by @op@.
--   Returns a value produced by a /right/ associative application of all
--   functions returned by @op@. If there are no occurrences of @p@, @x@ is
--   returned.
chainr p op x = chainr1 p op +++ return x

chainl :: ReadP a -> ReadP (a -> a -> a) -> a -> ReadP a
-- ^ @chainl p op x@ parses zero or more occurrences of @p@, separated by @op@.
--   Returns a value produced by a /left/ associative application of all
--   functions returned by @op@. If there are no occurrences of @p@, @x@ is
--   returned.
chainl p op x = chainl1 p op +++ return x

chainr1 :: ReadP a -> ReadP (a -> a -> a) -> ReadP a
-- ^ Like 'chainr', but parses one or more occurrences of @p@.
chainr1 p op = scan
  where scan   = p >>= rest
        rest x = do f <- op
                    y <- scan
                    return (f x y)
                 +++ return x

chainl1 :: ReadP a -> ReadP (a -> a -> a) -> ReadP a
-- ^ Like 'chainl', but parses one or more occurrences of @p@.
chainl1 p op = p >>= rest
  where rest x = do f <- op
                    y <- p
                    rest (f x y)
                 +++ return x

manyTill :: ReadP a -> ReadP end -> ReadP [a]
-- ^ @manyTill p end@ parses zero or more occurrences of @p@, until @end@
--   succeeds. Returns a list of values returned by @p@.
manyTill p end = scan
  where scan = (end >> return []) <++ (liftM2 (:) p scan)

-- ---------------------------------------------------------------------------
-- Converting between ReadP and Read

readP_to_S :: ReadP a -> ReadS a
-- ^ Converts a parser into a Haskell ReadS-style function.
--   This is the main way in which you can \"run\" a 'ReadP' parser:
--   the expanded type is
-- @ readP_to_S :: ReadP a -> ByteString -> [(a,ByteString)] @
readP_to_S (R f) = run (f return)

readS_to_P :: ReadS a -> ReadP a
-- ^ Converts a Haskell ReadS-style function into a parser.
--   Warning: This introduces local backtracking in the resulting
--   parser, and therefore a possible inefficiency.
readS_to_P r =
  R (\k -> Look (\s -> final [bs'' | (a,s') <- r s, bs'' <- run (k a) s']))