------------------------------------------------------------------------
-- Parser combinators with support for left recursion, following
-- Johnson's "Memoization in Top-Down Parsing"
------------------------------------------------------------------------

-- This implementation is based on an implementation due to Atkey
-- (attached to an edlambda-members mailing list message from
-- 2011-02-15 titled 'Slides for "Introduction to Parser
-- Combinators"').

-- Note that non-memoised left recursion is not guaranteed to work.

-- The code contains an important deviation from Johnson's paper: the
-- check for subsumed results is not included. This means that one can
-- get the same result multiple times when parsing using ambiguous
-- grammars. As an example, parsing the empty string using S ∷= ε | ε
-- succeeds twice. This change also means that parsing fails to
-- terminate for some cyclic grammars that would otherwise be handled
-- successfully, such as S ∷= S | ε. However, the library is not
-- intended to handle infinitely ambiguous grammars. (It is unclear to
-- me whether the change leads to more non-termination for grammars
-- which are not cyclic.)

-- Another change has also been made: the user does not have to insert
-- memoisation annotations manually. Instead all grammar non-terminals
-- are memoised.

{-# LANGUAGE FlexibleInstances, MultiParamTypeClasses, RankNTypes #-}

module MemoisedCPS (Parser, parse) where

import Control.Applicative
import Control.Monad.State.Lazy
import Data.Array
import Data.List
import qualified Data.IntMap as IntMap
import Data.IntMap (IntMap)

import qualified IndexedMap as IMap
import IndexedOrd
import qualified Parser

-- | Positions.

type Pos = Int

-- | State monad used by the parser.

type M nt tok b = State (IntMap (IMap.Map nt (Value nt tok b)))

-- | Continuations.

type Cont nt tok b a = Pos -> a -> M nt tok b [b]

-- | Memoised values.

data Value nt tok b a = Value
  { results       :: IntMap [a]
  , continuations :: [Cont nt tok b a]
  }

-- | The parser type.

newtype Parser nt tok a =
  P { unP :: forall b.
             Parser.Grammar (Parser nt tok) nt ->
             Array Pos tok ->
             Pos ->
             Cont nt tok b a ->
             M nt tok b [b]
    }

instance Monad (Parser nt tok) where
  return x  = P $ \_ _ i k -> k i x
  P p >>= f = P $ \g input i k ->
    p g input i $ \j x -> unP (f x) g input j k

instance Functor (Parser nt tok) where
  fmap f p = p >>= return . f

instance Applicative (Parser nt tok) where
  pure  = return
  (<*>) = ap

instance Alternative (Parser nt tok) where
  empty         = P $ \_ _ _ _ -> return []
  P p1 <|> P p2 = P $ \g input i k ->
    liftM2 (++) (p1 g input i k) (p2 g input i k)

-- | Runs the parser.

parse :: Parser.Grammar (Parser nt tok) nt ->
         Parser nt tok r -> [tok] -> [r]
parse g p toks =
  flip evalState IntMap.empty $
  unP p g (listArray (0, n - 1) toks) 0 $ \j x ->
    if j == n then return [x] else return []
  where n = genericLength toks

instance Parser.Parser (Parser nt tok) tok where
  symbol = P $ \_ input i k ->
    if inRange (bounds input) i then
      k (i + 1) (input ! i)
     else
      return []

  parse = parse (const empty)

-- | Non-terminals are memoised.

instance Parser.NTParser (Parser nt tok) nt tok where
  nonTerm nt = P $ \grammar input i k -> do

    let alter j zero f m =
          IntMap.alter (Just . f . maybe zero id) j m

        lookupTable   = fmap (\m -> IMap.lookup nt =<<
                                    IntMap.lookup i m) get
        insertTable v = modify $ alter i IMap.empty (IMap.insert nt v)

    v <- lookupTable
    case v of
      Nothing -> do
        insertTable (Value IntMap.empty [k])
        unP (grammar nt) grammar input i $ \j r -> do
          Just (Value rs ks) <- lookupTable
          insertTable (Value (alter j [] (r :) rs) ks)
          concat <$> mapM (\k -> k j r) ks  -- See note [Reverse ks?].
      Just (Value rs ks) -> do
        insertTable (Value rs (k : ks))
        concat . concat <$>
          mapM (\(i, rs) -> mapM (k i) rs) (IntMap.toList rs)

  parseWithGrammar = parse

-- [Reverse ks?]
--
-- If ks were reversed, then the code would be productive for some
-- infinitely ambiguous grammars, including S ∷= S | ε. However, in
-- some cases the results would not be fair (some valid results would
-- never be returned).