{-# OPTIONS_HADDOCK not-home #-}
{-# LANGUAGE TemplateHaskell #-}
-- | Creating Earley parsers.

module Pinchot.Earley where

import Pinchot.Names
import Pinchot.RecursiveDo
import Pinchot.Rules
import Pinchot.Types

import Control.Applicative ((<|>), liftA2)
import Data.Data (Data)
import Data.Foldable (foldlM)
import Data.List.NonEmpty (NonEmpty((:|)))
import qualified Language.Haskell.TH as T
import qualified Language.Haskell.TH.Syntax as Syntax
import qualified Text.Earley

earleyTerm
  :: Eq t
  => NonEmpty t
  -> Text.Earley.Prod r e (t, a) (NonEmpty (t, a))
earleyTerm (fore :| aft) = (:|) <$> parseHead <*> parseRest
  where
    parseHead = parse fore
    parseRest = foldr (liftA2 (:) . parse) (pure []) aft
    parse t = Text.Earley.satisfy ((== t) . fst)

branchToParser
  :: Syntax.Lift t
  => String
  -- ^ Module prefix
  -> Branch t
  -> Namer T.ExpQ
branchToParser prefix (Branch name rules) = do
  case rules of
    [] -> return [| pure $constructor |]
    (Rule rule1 _ _) : xs -> do
      rule1Name <- getName rule1
      let z = [| $constructor <$> $(T.varE rule1Name) |]
          f soFar (Rule rule2 _ _) = do
            rule2Name <- getName rule2
            return [| $soFar <*> $(T.varE rule2Name) |]
      foldlM f z xs
  where
    constructor = do
      ctorName <- lookupValueName (quald prefix name)
      T.conE ctorName

-- | Creates a list of pairs.  Each pair represents a statement in
-- @do@ notation.  The first element of the pair is the name of the
-- variable to which to bind the result of the expression, which is
-- the second element of the pair.

ruleToParser
  :: (Syntax.Lift t, Data t)
  => String
  -- ^ Module prefix
  -> Rule t
  -> Namer [(T.Name, T.ExpQ)]
ruleToParser prefix (Rule nm mayDescription rt) = do
  bindName <- getName nm
  let desc = maybe nm id mayDescription
      makeRule expression = (bindName,
        [|Text.Earley.rule ($expression Text.Earley.<?> desc)|])
      constructor = do
        ctorName <- lookupValueName (quald prefix nm)
        T.conE ctorName
      wrapper wrapRule = [|fmap $constructor $(T.varE wrapRule) |]
  case rt of
    Terminal (Predicate pdct) -> return [makeRule expression]
      where
        expression = do
          ctorName <- lookupValueName (quald prefix nm)
          [| let f (c, a)
                  | $(fmap T.unType pdct) c = Just
                      ($(T.conE ctorName) (c, a))
                  | otherwise = Nothing
            in Text.Earley.terminal f |]

    NonTerminal (b1 :| bs) -> do
      let addBranch tree branch = do
            branchParserExpn <- branchToParser prefix branch
            return [| $tree <|> $branchParserExpn |]
      branch1 <- branchToParser prefix b1
      expression <- foldlM addBranch branch1 bs
      return [makeRule expression]

    Wrap (Rule innerNm _ _) -> do
      innerName <- getName innerNm
      let expression = [| fmap $constructor $(T.varE innerName) |]
      return [makeRule expression]

    Record sq -> do
      expression <- case sq of
        [] -> return [| pure $constructor |]
        Rule r1 _ _ : restFields -> do
          r1Name <- getName r1
          let fstField = [| $constructor <$> $(T.varE r1Name) |]
              addField soFar (Rule r _ _) = do
                rName <- getName r
                return [| $soFar <*> $(T.varE rName) |]
          foldlM addField fstField restFields
      return [makeRule expression]

    Opt (Rule innerNm _ _) -> do
      innerName <- getName innerNm
      let just = [| fmap Just $(T.varE innerName) |]
          expression = [| fmap $constructor (pure Nothing <|> $(just)) |]
      return [makeRule expression]

    Star (Rule innerNm _ _) -> do
      innerName <- getName innerNm
      helperName <- namerNewName
      let pList = [| liftA2 (:) $(T.varE innerName) $(T.varE helperName) |]
          pChoose = T.uInfixE [|pure []|] [|(<|>)|] pList
          nestRule = (helperName, ([|Text.Earley.rule|] `T.appE` pChoose))
      return [nestRule, makeRule (wrapper helperName)]

    Plus (Rule innerNm _ _) -> do
      innerName <- getName innerNm
      helperName <- namerNewName
      let pList = [| (:) <$> $(T.varE innerName) <*> $(T.varE helperName) |]
          pChoose = [| pure [] <|> $pList |]
          nestRule = (helperName, [|Text.Earley.rule $pChoose |])
          topExpn = [| $constructor <$>
            ( (:|) <$> $(T.varE innerName) <*> $(T.varE helperName)) |]
      return [nestRule, makeRule topExpn]

    Series neSeq -> do
      let expn = [| fmap $constructor $ earleyTerm $(Syntax.liftData neSeq) |]
      return [makeRule expn]


-- | Creates an expression that has type
--
-- 'Text.Earley.Grammar' r (Prod r String (c, a) (p c a))
--
-- where @r@ is left universally quantified; @c@ is the terminal
-- type (often 'Char'), @a@ is arbitrary metadata about each token
-- (often 'Loc') and @p@ is the data type corresponding to
-- the given 'Rule'.
--
-- Example:  'Pinchot.Examples.Earley.addressGrammar'.
earleyGrammarFromRule
  :: (Data t, Syntax.Lift t)
  => Qualifier
  -- ^ Module prefix holding the data types created with
  -- 'Pinchot.syntaxTrees'
  -> Rule t
  -- ^ Create a grammar for this 'Rule'
  -> T.Q T.Exp
earleyGrammarFromRule prefix r@(Rule top _ _) = do
  (binds, topName) <- runNamer $ do
    bnds <- fmap concat . sequence . fmap (ruleToParser prefix) . family $ r
    topN <- getName top
    return (bnds, topN)
  let final = [| return $(T.varE topName) |]
  recursiveDo binds final

-- | Creates a record data type that holds a value of type
--
-- @'Text.Earley.Prod' r 'String' (t, a) (p t a)@
--
-- where
--
-- * @r@ is left universally quantified
--
-- * @t@ is the token type (often 'Char')
--
-- * @a@ is any additional information about each token (often
-- 'Pinchot.Loc')
--
-- * @p@ is the type of the particular production
--
-- This always creates a single product type whose name is
-- @Productions@; currently the name cannot be configured.
--
-- Example: "Pinchot.Examples.AllRulesRecord".
allRulesRecord
  :: Qualifier
  -- ^ Qualifier for data types corresponding to those created from
  -- the 'Rule's
  -> [Rule t]
  -- ^ A record is created that holds a value for each 'Rule'
  -- in the 'Seq', as well as for every ancestor of these 'Rule's.
  -> T.DecsQ
  -- ^ When spliced, this will create a single declaration that is a
  -- record with the name @Productions@.
  --
  -- @a'NAME@
  --
  -- where @NAME@ is the name of the type.  Don't count on these
  -- records being in any particular order.
allRulesRecord prefix ruleSeq
  = sequence [T.dataD (return []) productions
                      tys Nothing [con] (return [])]
  where
    tys = [tyVarBndrR, tyVarBndrT, tyVarBndrA]
    con = T.recC productions
        (fmap mkRecord . families $ ruleSeq)
    mkRecord (Rule ruleNm _ _) = T.varBangType (recordName ruleNm) st
      where
        st = T.bangType (T.bang T.noSourceUnpackedness T.noSourceStrictness) ty
          where
            ty = do
              ctorName <- lookupTypeName (quald prefix ruleNm)
              [t| Text.Earley.Prod $typeR String ($typeT, $typeA)
                      ( $(T.conT ctorName) $typeT $typeA) |]

-- | Creates a 'Text.Earley.Grammar' that contains a
-- 'Text.Earley.Prod' for every given 'Rule' and its ancestors.
-- Example: 'Pinchot.Examples.Earley.addressAllProductions'.
earleyProduct
  :: (Data t, Syntax.Lift t)

  => Qualifier
  -- ^ Qualifier for data types corresponding to those created from
  -- the 'Rule's

  -> Qualifier
  -- ^ Qualifier for the type created with 'allRulesRecord'

  -> [Rule t]
  -- ^ Creates an Earley grammar that contains a 'Text.Earley.Prod'
  -- for each 'Rule' in this list, as well as all the ancestors of
  -- these 'Rule's.

  -> T.ExpQ
  -- ^ When spliced, 'earleyProduct' creates an expression whose
  -- type is @'Text.Earley.Grammar' r (Productions r t a)@, where
  -- @Productions@ is
  -- the type created by 'allRulesRecord'; @r@ is left universally
  -- quantified; @t@ is the token type (often 'Char'), and @a@ is
  -- any additional information about each token (often
  -- 'Pinchot.Loc').
earleyProduct pfxRule pfxRec ruleSeq = do
  (binds, topName) <- runNamer $ do
    let fams = families ruleSeq
    bnds <- fmap concat . sequence . fmap (ruleToParser pfxRule) $ fams
    let allRuleNames = fmap _ruleName fams
    allRuleBindNames <- traverse getName allRuleNames
    let mkRec ruleName bindName = (qualRecordName pfxRec ruleName, T.VarE bindName)
        ruleBindNamePairs = zipWith mkRec allRuleNames allRuleBindNames
        convertPair (str, expn) = do
          nm <- lookupValueName str
          return (nm, expn)
        final = do
          recName <- lookupValueName (quald pfxRec productionsStr)
          [| return $(T.recConE recName (fmap convertPair ruleBindNamePairs)) |]
    return (bnds, final)
  recursiveDo binds topName