{-# LANGUAGE StrictData #-}

module Little.Earley.Internal.Core where

import Control.Monad.State
import Data.Foldable (for_)
import Data.Maybe (fromMaybe)

import Data.Sequence (Seq)
import qualified Data.Sequence as Seq
import Data.Set (Set)
import qualified Data.Set as Set

-- | Grammars with non-terminal symbols @n@, terminal symbols @t@, and tokens @c@.
--
-- A grammar defines a language, which is a set of sequences of tokens @c@.
--
-- Two basic choices for @t@ and @c@ are:
--
-- - @t = 'Little.Earley.Examples.CharT'@ and @c = Char@,
--   with @'match' = 'Little.Earley.Examples.matchCharT'@: then the input @[c]@ is a @String@.
--
-- - @t = String@ and @c = String@, with @'match' = (==)@:
--   then the input @[c]@ is a @[String]@, which can be produced using 'words';
--   just remember to put spaces around operators and parentheses.
--
-- See also examples in "Little.Earley.Examples".
data Grammar n t c = Grammar
  { rules :: n -> [Rule n t]  -- ^ Production rules associated with each non-terminal symbol.
  , match :: t -> c -> Bool   -- ^ Match a token @c@ with a terminal symbol @t@.
  , isNullable :: n -> Bool
    -- ^ Predicate for non-terminal symbols which may expand to the empty string.
    -- This function MUST be correct for the library to work.
    -- It can be populated automatically using 'mkGrammar'.
  }

-- | Construct a grammar given the fields 'rules' and 'match',
-- implicitly populating 'isNullable'.
mkGrammar :: (Ord n, Bounded n, Enum n) => (n -> [Rule n t]) -> (t -> c -> Bool) -> Grammar n t c
mkGrammar rs mc = Grammar
  { rules = rs
  , match = mc
  , isNullable = (`Set.member` vs)
  } where vs = nullableSymbols rs [minBound .. maxBound]

-- | Compute the set of non-terminal symbols which may expand to the empty string,
-- given an enumeration of all non-terminal symbols.
nullableSymbols :: Ord n => (n -> [Rule n t]) -> [n] -> Set n
nullableSymbols g ns = loop Set.empty where
  loop vs =
    let vs' = Set.fromList (filter (isNullable_ vs) ns) in
    if vs == vs' then vs else loop vs'
  isNullable_ vs n = any (all (isNullableS vs)) (g n)
  isNullableS vs (N n) = n `Set.member` vs
  isNullableS _vs (T _) = False

-- | A production rule is a sequence of atoms.
type Rule n t = [Atom n t]

-- | An atom is either a non-terminal or a terminal.
data Atom n t = N n | T t
  deriving (Eq, Ord, Show)

-- | A rule can be identified by a non-terminal and an index into all
-- of the associated rules of that non-terminal.
data RuleId n = RuleId n Int
  deriving (Eq, Ord, Show)

data Item n t = Item (RuleId n) [Atom n t] Int
  deriving (Eq, Ord, Show)

data S n t = S
  { itemSets :: Seq (Set (Item n t))
  , currItemSet :: Set (Item n t)
  , nextItemSet :: Set (Item n t)
  , currIndex :: Int
  , todo :: [Item n t]
  } deriving (Eq, Show)

emptyS :: S n t
emptyS = S
  { itemSets = Seq.empty
  , currItemSet = Set.empty
  , nextItemSet = Set.empty
  , currIndex = 0
  , todo = []
  }

initialS :: (Ord n, Ord t) => Grammar n t c -> n -> S n t
initialS g n =
  emptyS { currItemSet = Set.fromList (newItems g 0 n) }

newItems :: Ord n => Grammar n t c -> Int -> n -> [Item n t]
newItems g i n =
  let rs = rules g n in
  map (\(ri_, r) -> Item (RuleId n ri_) r i) (zip [0 ..] rs)

allItemSets :: S n t -> Seq (Set (Item n t))
allItemSets s = itemSets s Seq.|> currItemSet s Seq.|> nextItemSet s

type Parser n t = State (S n t)

next :: Parser n t ()
next = modify' (\s ->
  let items | currIndex s == -1 = Seq.empty
            | otherwise = itemSets s Seq.|> currItemSet s in
  s { itemSets = items
    , currItemSet = nextItemSet s
    , nextItemSet = Set.empty
    , currIndex = currIndex s + 1
    , todo = []
    })

addItemCurr :: (Ord n, Ord t) => Item n t -> Parser n t ()
addItemCurr item = do
  s <- get
  if Set.member item (currItemSet s) then
    pure ()
  else do
    put (s
      { currItemSet = Set.insert item (currItemSet s)
      , todo = item : todo s })

-- Invariant: @i <= currIndex s@
-- In particular, never look up @nextItemSet s@.
lookupItemSet :: Int -> Parser n t (Set (Item n t))
lookupItemSet i = gets (\s ->
  if i == currIndex s then
    currItemSet s
  else
    fromMaybe (error "Should exist") (Seq.lookup i (itemSets s)))

addItemNext :: (Ord n, Ord t) => Item n t -> Parser n t ()
addItemNext item = do
  modify' (\s -> s { nextItemSet = Set.insert item (nextItemSet s) })

stepItem :: (Ord n, Ord t) => Grammar n t c -> Maybe c -> Item n t -> Parser n t ()
stepItem g _c (Item ri (N n : xs) iStart) = do
  i <- gets currIndex
  for_ (newItems g i n) addItemCurr
  if isNullable g n then addItemCurr (Item ri xs iStart) else pure ()
stepItem _ _ (Item (RuleId n _) [] iStart) = do
  sset <- lookupItemSet iStart
  for_ sset (\(Item ri' xs' iStart') ->
    case xs' of
      N n' : xs1' | n == n' -> addItemCurr (Item ri' xs1' iStart')
      _ -> pure ())
stepItem g (Just c) (Item ri (T t : xs) iStart) | match g t c = addItemNext (Item ri xs iStart)
stepItem _ _ (Item _ (T _ : _) _) = pure ()

step :: (Ord n, Ord t) => Grammar n t c -> Maybe c -> Parser n t ()
step g c = gets (Set.toList . currItemSet) >>= go where
  go [] = pure ()
  go td = do
    modify' (\s -> s { todo = [] })
    for_ td (stepItem g c)
    gets todo >>= go

steps :: (Ord n, Ord t) => Grammar n t c -> [c] -> Parser n t ()
steps g = go where
  go (c : cs) = step g (Just c) >> next >> go cs
  go [] = step g Nothing

data Seq1 a = Seq a ::> a
  deriving (Eq, Show)

-- Returns nonempty sequence.
preparse :: (Ord n, Ord t) => Grammar n t c -> n -> [c] -> Seq1 (Set (Item n t))
preparse g n cs = itemSets' (execState (steps g cs) (initialS g n))
  where
    itemSets' s = itemSets s ::> currItemSet s
    -- nextItemSet should be empty at the end after (step g Nothing)

-- | Check whether a grammar matches a chain of character @[c]@ from a starting symbol @n@.
accepts :: (Ord n, Ord t) => Grammar n t c -> n -> [c] -> Bool
accepts g n cs =
  case preparse g n cs of
    _ ::> items ->
      (not . null) [() | Item (RuleId n' _) [] 0 <- Set.toList items, n == n']