{-# OPTIONS_GHC -fno-warn-name-shadowing #-}

{-# LANGUAGE PatternGuards       #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE TupleSections       #-}

module DFAMin (minimizeDFA) where

import AbsSyn

import Data.Map (Map)
import qualified Data.Map as Map
import Data.IntSet (IntSet)
import qualified Data.IntSet as IS
import Data.IntMap (IntMap)
import qualified Data.IntMap as IM
import qualified Data.List as List

-- % Hopcroft's Algorithm for DFA minimization (cut/pasted from Wikipedia):
-- % X refines Y into Y1 and Y2 means
-- %  Y1 := Y ∩ X
-- %  Y2 := Y \ X
-- %  where both Y1 and Y2 are nonempty
--
-- P := {{all accepting states}, {all nonaccepting states}};
-- Q := {{all accepting states}};
-- while (Q is not empty) do
--      choose and remove a set A from Q
--      for each c in ∑ do
--           let X be the set of states for which a transition on c leads to a state in A
--           for each set Y in P for which X refines Y into Y1 and Y2 do
--                replace Y in P by the two sets Y1 and Y2
--                if Y is in Q
--                     replace Y in Q by the same two sets
--                else
--                     add the smaller of the two sets to Q
--           end;
--      end;
-- end;
--
-- % X is a preimage of A under transition function.

-- % observation : Q is always subset of P
-- % let R = P \ Q. then following algorithm is the equivalent of the Hopcroft's Algorithm
--
-- R := {{all nonaccepting states}};
-- Q := {{all accepting states}};
-- while (Q is not empty) do
--      choose a set A from Q
--      remove A from Q and add it to R
--      for each c in ∑ do
--           let X be the set of states for which a transition on c leads to a state in A
--           for each set Y in R for which X refines Y into Y1 and Y2 do
--                replace Y in R by the greater of the two sets Y1 and Y2
--                add the smaller of the two sets to Q
--           end;
--           for each set Y in Q for which X refines Y into Y1 and Y2 do
--                replace Y in Q by the two sets Y1 and Y2
--           end;
--      end;
-- end;
--
-- % The second for loop that iterates over R mutates Q,
-- % but it does not affect the third for loop that iterates over Q.
-- % Because once X refines Y into Y1 and Y2, Y1 and Y2 can't be more refined by X. 

minimizeDFA :: forall a. Ord a => DFA Int a -> DFA Int a
minimizeDFA  dfa@(DFA { dfa_start_states = starts,
                        dfa_states       = statemap
                      })
  = DFA { dfa_start_states = starts,
          dfa_states       = Map.fromList states }
  where
      equiv_classes   :: [EquivalenceClass]
      equiv_classes   = groupEquivStates dfa

      numbered_states :: [(Int, EquivalenceClass)]
      numbered_states = number (length starts) equiv_classes

      -- assign each state in the minimized DFA a number, making
      -- sure that we assign the numbers [0..] to the start states.
      number :: Int -> [EquivalenceClass] -> [(Int, EquivalenceClass)]
      number _ [] = []
      number n (ss:sss) =
        case filter (`IS.member` ss) starts of
          []      -> (n,ss) : number (n+1) sss
          starts' -> map (,ss) starts' ++ number n sss
          -- if one of the states of the minimized DFA corresponds
          -- to multiple starts states, we just have to duplicate
          -- that state.

      states :: [(Int, State Int a)]
      states = [
                let old_states = map (lookup statemap) (IS.toList equiv)
                    accs = map fix_acc (state_acc (head old_states))
                           -- accepts should all be the same
                    out  = IM.fromList [ (b, get_new old)
                                           | State _ out <- old_states,
                                             (b,old) <- IM.toList out ]
                in (n, State accs out)
               | (n, equiv) <- numbered_states
               ]

      fix_acc :: Accept a -> Accept a
      fix_acc acc = acc { accRightCtx = fix_rctxt (accRightCtx acc) }

      fix_rctxt :: RightContext SNum -> RightContext SNum
      fix_rctxt (RightContextRExp s) = RightContextRExp (get_new s)
      fix_rctxt other = other

      lookup :: Ord k => Map k v -> k -> v
      lookup m k = Map.findWithDefault (error "minimizeDFA") k m

      get_new :: Int -> Int
      get_new = lookup old_to_new

      old_to_new :: Map Int Int
      old_to_new = Map.fromList [ (s,n) | (n,ss) <- numbered_states,
                                          s <- IS.toList ss ]

type EquivalenceClass = IntSet

groupEquivStates :: forall a. Ord a => DFA Int a -> [EquivalenceClass]
groupEquivStates DFA { dfa_states = statemap }
  = go init_r init_q
  where
    accepting, nonaccepting :: Map Int (State Int a)
    (accepting, nonaccepting) = Map.partition acc statemap
       where acc (State as _) = not (List.null as)

    nonaccepting_states :: EquivalenceClass
    nonaccepting_states = IS.fromList (Map.keys nonaccepting)

    -- group the accepting states into equivalence classes
    accept_map :: Map [Accept a] [Int]
    accept_map = {-# SCC "accept_map" #-}
      List.foldl' (\m (n,s) -> Map.insertWith (++) (state_acc s) [n] m)
             Map.empty
             (Map.toList accepting)

    accept_groups :: [EquivalenceClass]
    accept_groups = map IS.fromList (Map.elems accept_map)

    init_r, init_q :: [EquivalenceClass]
    init_r  -- Issue #71: each EquivalenceClass needs to be a non-empty set
      | IS.null nonaccepting_states = []
      | otherwise                   = [nonaccepting_states]
    init_q = accept_groups

    -- a map from token T to
    --   a map from state S to the set of states that transition to
    --   S on token T
    -- bigmap is an inversed transition function classified by each input token.
    -- the codomain of each inversed function is a set of states rather than single state
    -- since a transition function might not be an injective.
    -- This is a cache of the information needed to compute xs below
    bigmap :: IntMap (IntMap EquivalenceClass)
    bigmap = IM.fromListWith (IM.unionWith IS.union)
                [ (i, IM.singleton to (IS.singleton from))
                | (from, state) <- Map.toList statemap,
                  (i,to) <- IM.toList (state_out state) ]

    -- The outer loop: recurse on each set in R and Q
    go :: [EquivalenceClass] -> [EquivalenceClass] -> [EquivalenceClass]
    go r [] = r
    go r (a:q) = uncurry go $ List.foldl' go0 (a:r,q) xs
      where
        preimage :: IntMap EquivalenceClass -- inversed transition function
                 -> EquivalenceClass        -- subset of codomain of original transition function
                 -> EquivalenceClass        -- preimage of given subset
#if MIN_VERSION_containers(0, 6, 0)
        preimage invMap a = IS.unions (IM.restrictKeys invMap a)
#else
        preimage invMap a = IS.unions [IM.findWithDefault IS.empty s invMap | s <- IS.toList a]
#endif

        xs :: [EquivalenceClass]
        xs =
          [ x
          | invMap <- IM.elems bigmap
          , let x = preimage invMap a
          , not (IS.null x)
          ]

        refineWith
          :: EquivalenceClass -- preimage set that bisects the input equivalence class
          -> EquivalenceClass -- input equivalence class
          -> Maybe (EquivalenceClass, EquivalenceClass) -- refined equivalence class
        refineWith x y =
          if IS.null y1 || IS.null y2
            then Nothing
            else Just (y1, y2)
          where
            y1 = IS.intersection y x
            y2 = IS.difference   y x

        go0 (r,q) x = go1 r [] []
          where
            -- iterates over R
            go1 []    r' q' = (r', go2 q q')
            go1 (y:r) r' q' = case refineWith x y of
              Nothing                       -> go1 r (y:r') q'
              Just (y1, y2)
                | IS.size y1 <= IS.size y2  -> go1 r (y2:r') (y1:q')
                | otherwise                 -> go1 r (y1:r') (y2:q')

            -- iterates over Q
            go2 []    q' = q'
            go2 (y:q) q' = case refineWith x y of
              Nothing       -> go2 q (y:q')
              Just (y1, y2) -> go2 q (y1:y2:q')