module FormalLanguage.GrammarProduct.Op.Chomsky where

import Control.Applicative
import Control.Lens
import Control.Lens.Fold
import Control.Newtype ()
import Data.Function (on)
import Data.List (genericReplicate,replicate,groupBy)
import Data.Maybe
import Data.Monoid hiding ((<>))
import Data.Semigroup
import qualified Data.Set as S
import Text.Printf
import System.IO.Unsafe

import FormalLanguage.CFG.Grammar
import FormalLanguage.CFG.Parser
import FormalLanguage.CFG.PrettyPrint.ANSI

import FormalLanguage.GrammarProduct.Op.Common



{-

newtype CNF = CNF { runCNF :: Grammar }

instance Semigroup CNF where
  (CNF g) <> (CNF h) = CNF $ Grammar ts ns es rs s (g^.name ++ h^.name) where
    ts = S.fromList $ g^..tsyms.folded ++ h^..tsyms.folded
    ns = collectNonTerminals rs -- this is needed since we generate completely new non-terminal symbols
    es = S.fromList $ g^..epsis.folded ++ h^..epsis.folded
    rs = S.fromList
       . concat
       $ [ chomskyCombine l r | l <- g^..rules.folded, r <- h^..rules.folded ]
    s  = liftA2 (\l r -> Symb $ l^.symb ++ r^.symb) (g^.start) (h^.start)

instance Monoid CNF where
  mempty = CNF $ Grammar S.empty S.empty S.empty (S.singleton undefined) (Just $ Symb []) ""
  mappend = (<>)

-- | Combine production rules a la Chomsky normal form.
--
-- TODO We need to be able to generate fresh rule name, as we are splitting
-- rules here! (this means that we need to lift this stuff into a
-- name-generating monad)

chomskyCombine :: Rule -> Rule -> [Rule]
chomskyCombine (Rule l f rs) (Rule a g bs)
  | [r] <- rs, [b] <- bs, isSymbT r, isSymbT b
  = [Rule (Symb $ l^.symb ++ a^.symb) [] {- (f++g) -} [Symb $ r^.symb ++ b^.symb]]
  | [r1,r2] <- rs, [b1,b2] <- bs, isSymbN r1, isSymbN r2, isSymbN b1, isSymbN b2
  = [Rule (Symb $ l^.symb ++ a^.symb) [] {- (f++g) -} [Symb $ r1^.symb ++ b1^.symb, Symb $ r2^.symb ++ b2^.symb]]
  | [r] <- rs, [b1,b2] <- bs, isSymbT r, isSymbN b1, isSymbN b2
  = let (z1,zs1) = symbToRules r b1
        (z2,zs2) = symbToRules r b2
    in  zs1 ++ zs2 ++ {-concatMap (extendRederive (length $ l^.symb) (length $ a^.symb))-}
        [ Rule (Symb $ l^.symb ++ a^.symb) [] {- (f++g) -} [ {- Symb $ r^.symb  ++ b1^.symb -} z1 , Symb $ genEps r ++ b2^.symb]
        , Rule (Symb $ l^.symb ++ a^.symb) [] {- (f++g) -} [Symb $ genEps r ++ b1^.symb, z2 {- Symb $ r^.symb  ++ b2^.symb -} ]
        ]
  | [r1,r2] <- rs, [b] <- bs, isSymbN r1, isSymbN r2, isSymbT b
  = let (z1,zs1) = symbToRules r1 b
        (z2,zs2) = symbToRules r2 b
    in  zs1 ++ zs2 ++ {-concatMap (extendRederive (length $ l^.symb) (length $ a^.symb))-}
        [ Rule (Symb $ l^.symb ++ a^.symb) [] {- (f++g) -} [{- Symb $ r1^.symb ++ b^.symb -} z1 , Symb $ r2^.symb ++ genEps b]
        , Rule (Symb $ l^.symb ++ a^.symb) [] {- (f++g) -} [Symb $ r1^.symb ++ genEps b, z2 {- Symb $ r2^.symb ++ b^.symb -} ]
        ]
  --
  -- extended Chomsky: Non-terminal -> Non-terminal
  --
  {-
  | [r] <- rs, [b] <- bs, nSymb r, nSymb b
  = [ Rule (Symb $ l^.symb ++ a^.symb) [] [ Symb $ r^.symb ++ b^.symb ] ]
  | [r] <- rs, [b1,b2] <- bs, nSymb r, nSymb b1, nSymb b2
  = []
  | [r1,r2] <- rs, [b] <- bs, nSymb r1, nSymb r2, nSymb b
  = []
  | [r] <- rs, [b] <- bs
  = []
  -}
  {-
  = [ Rule (Symb $ l^.symb ++ a^.symb) [] [ Symb $ r^.symb  ++ b1^.symb, Symb $ genEps r ++ b2^.symb ]
    , Rule (Symb $ l^.symb ++ a^.symb) [] [ Symb $ genEps r ++ b1^.symb, Symb $ r^.symb  ++ b2^.symb ]
    ]
  -}
  --
  -- extended Chomsky above
  --
  | otherwise = unsafePerformIO $ do
      print "======"
      printDoc $ rulesDoc $ S.singleton $ Rule l f rs
      printDoc $ rulesDoc $ S.singleton $ Rule a g bs
      fail "cannot handle (rule is not CNF):"
  -- \| otherwise = error $ "cannot handle (rule is not CNF): " ++ show (printDoc $ rulesDoc $ S.singleton $ Rule l f rs, Rule a g bs)

{-
-- | Extend mixed rules and rederive CNF

extendRederive :: Int -> Int -> Rule -> [Rule]
extendRederive α β (Rule l f [r1,r2])
  | not (tSymb r1) && not (nSymb r1) && nSymb r2
  = let (newN,epsN,trmN,epsT) = genNewSymbols α β r1
    in  [ Rule l    f           [newN,r2]
        , Rule newN ( {- "nwNL_": -} f) [epsN,trmN]
        , Rule newN ( {- "nwNR_": -} f) [trmN,epsN]
        , Rule trmN ( {- "trmN_": -} f) [epsT]
        ]
  | nSymb r1 && not (tSymb r2) && not (nSymb r2)
  = let (newN,epsN,trmN,epsT) = genNewSymbols α β r2
    in  [ Rule l    f           [r1,newN]
        , Rule newN ( {- "nwNL_": -} f) [epsN,trmN]
        , Rule newN ( {- "nwNR_": -} f) [trmN,epsN]
        , Rule trmN ( {- "trmN_": -} f) [epsT]
        ]
extendRederive _ _ r = error $ "cannot handle (rule not in extendRederive form for CNF): " ++ show r

genNewSymbols :: Int -> Int -> Symb -> (Symb,Symb,Symb,Symb)
genNewSymbols α β x = (newN, epsN, trmN, epsT) where
  -- the new non-terminal, with term TN's replaced by non-term TN with same name (plus extension)
  newN = Symb . map (\case (T s) -> N ("N"++s) Singular ; z -> z) $ x^.symb
  -- the new non-terminal, with terms replaced by epsilons
  epsN = Symb . map (\case (T s) -> eps                 ; z -> z) $ x^.symb
  -- the new non-terminal for the terminal symbol, with terms replaced by non-term symbols
  -- TODO we can't just replace all N here with eps, tome could have been created from other prods.
  trmN = Symb . map (\case (T s) -> N ("T"++s) Singular ; N _ _ -> eps; z -> z) $ x^.symb
  -- finally the terminal
  epsT = Symb . map (\case (N _ _) -> eps               ; z -> z) $ x^.symb
-}

-- |

symbToRules :: Symb -> Symb -> (Symb, [Rule])
symbToRules u' l'
  | isSymbN u' && isSymbT l' = go u' l'
  | isSymbT u' && isSymbN l' = let (s,rs) = go (over symb reverse l') (over symb reverse u')
                             in  ( over symb reverse s
                                 , map (\(Rule l [] rs) -> Rule (over symb reverse l) [] (map (over symb reverse) rs)) rs
                                 )
  | otherwise            = error $ "incompatible upper/lower: " ++ show (u',l')
  where
    -- in 'n' we have the partial non-terminal, in 't' the partial terminal
    go n t =
      let t' = Symb $ map (\case (T s) -> (N ("T"++s) Singular) ; z -> z) $ t^.symb
      in  ( Symb $ n^.symb ++ t'^.symb
          , [ Rule (Symb $ n^.symb ++ t'^.symb) [] [ Symb $ n^.symb ++ genEps t, Symb $ genEps n ++ genTermStar t ]
            , Rule (Symb $ n^.symb ++ t'^.symb) [] [ Symb $ genEps n ++ genTermStar t, Symb $ n^.symb ++ genEps t ]
            , Rule (Symb $ genEps n ++ genTermStar t)  [] [ Symb $ genEps n ++ t^.symb ]
            ]
          )

-- | Generate a certain number of epsilons

genTermStar :: Symb -> [TN]
genTermStar s = map (\case (T s) -> N ("S"++s) Singular ; z -> z) $ s^.symb

-}