-- -----------------------------------------------------------------------------
-- 
-- NFA.hs, part of Alex
--
-- (c) Chris Dornan 1995-2000, Simon Marlow 2003
--
-- The `scanner2nfa' takes a `Scanner' (see the `RExp' module) and
-- generates its equivelent nondeterministic finite automaton.  NFAs
-- are turned into DFAs in the DFA module.
-- 
-- See the chapter on `Finite Automata and Lexical Analysis' in the
-- dragon book for an excellent overview of the algorithms in this
-- module.
--
-- ----------------------------------------------------------------------------}

module NFA where

import AbsSyn
import CharSet ( CharSet, charSetToArray )
import DFS ( t_close, out )
import Map ( Map )
import qualified Map hiding ( Map )
import Util ( str, space )

import Control.Monad ( zipWithM, zipWithM_ )
import Data.Array ( Array, (!), array, listArray, assocs, bounds )
--import Debug.Trace

-- Each state of a nondeterministic automaton contains a list of `Accept'
-- values, a list of epsilon transitions (an epsilon transition represents a
-- transition to another state that can be made without reading a character)
-- and a list of transitions qualified with a character predicate (the
-- transition can only be made to the given state on input of a character
-- permitted by the predicate).  Although a list of `Accept' values is provided
-- for, in actual fact each state will have zero or one of them (the `Maybe'
-- type is not used because the flexibility offered by the list representation
-- is useful).

type NFA = Array SNum NState

data NState = NSt {
 nst_accs :: [Accept Code],
 nst_cl   :: [SNum],
 nst_outs :: [(CharSet,SNum)]
 }

-- Debug stuff
instance Show NState where
  showsPrec _ (NSt accs cl outs) =
    str "NSt " . shows accs . space . shows cl . space .
	shows [ (charSetToArray c, s) | (c,s) <- outs ]

{- 			     From the Scan Module

-- The `Accept' structure contains the priority of the token being accepted
-- (lower numbers => higher priorities), the name of the token, a place holder
-- that can be used for storing the `action' function, a list of start codes
-- (listing the start codes that the scanner must be in for the token to be
-- accepted; empty => no restriction), the leading and trailing context (both
-- `Nothing' if there is none).
--  
-- The leading context consists simply of a character predicate that will
-- return true if the last character read is acceptable.  The trailing context
-- consists of an alternative starting state within the DFA; if this `sub-dfa'
-- turns up any accepting state when applied to the residual input then the
-- trailing context is acceptable.
-}


-- `scanner2nfa' takes a scanner (see the AbsSyn module) and converts it to an
-- NFA, using the NFA creation monad (see below).
--
-- We generate a start state for each startcode, with the same number
-- as that startcode, and epsilon transitions from this state to each
-- of the sub-NFAs for each of the tokens acceptable in that startcode.

scanner2nfa:: Scanner -> [StartCode] -> NFA
scanner2nfa Scanner{scannerTokens = toks} startcodes
   = runNFA $
        do
	  -- make a start state for each start code (these will be
	  -- numbered from zero).
	  start_states <- sequence (replicate (length startcodes) newState)
	  
	  -- construct the NFA for each token
	  tok_states <- zipWithM do_token toks [0..]

	  -- make an epsilon edge from each state state to each
	  -- token that is acceptable in that state
	  zipWithM_ (tok_transitions (zip toks tok_states)) 
		startcodes start_states

	where
	  do_token (RECtx _scs lctx re rctx code) prio = do
		b <- newState
		e <- newState
		rexp2nfa b e re

		rctx_e <- case rctx of
				  NoRightContext ->
					return NoRightContext
				  RightContextCode code' ->
					return (RightContextCode code')
				  RightContextRExp re' -> do 
					r_b <- newState
					r_e <- newState
		 			rexp2nfa r_b r_e re'
					accept r_e rctxt_accept
					return (RightContextRExp r_b)

		accept e (Acc prio code lctx rctx_e)
		return b

	  tok_transitions toks_with_states start_code start_state = do
		let states = [ s | (RECtx scs _ _ _ _, s) <- toks_with_states,
			           null scs || start_code `elem` map snd scs ]
		mapM_ (epsilonEdge start_state) states

-- -----------------------------------------------------------------------------
-- NFA creation from a regular expression

-- rexp2nfa B E R generates an NFA that begins in state B, recognises
-- R, and ends in state E only if R has been recognised. 

rexp2nfa :: SNum -> SNum -> RExp -> NFAM ()
rexp2nfa b e Eps    = epsilonEdge b e
rexp2nfa b e (Ch p) = charEdge b p e
rexp2nfa b e (re1 :%% re2) = do
  s <- newState
  rexp2nfa b s re1
  rexp2nfa s e re2
rexp2nfa b e (re1 :| re2) = do
  rexp2nfa b e re1
  rexp2nfa b e re2
rexp2nfa b e (Star re) = do
  s <- newState
  epsilonEdge b s
  rexp2nfa s s re
  epsilonEdge s e
rexp2nfa b e (Plus re) = do
  s1 <- newState
  s2 <- newState
  rexp2nfa s1 s2 re
  epsilonEdge b s1
  epsilonEdge s2 s1
  epsilonEdge s2 e
rexp2nfa b e (Ques re) = do
  rexp2nfa b e re
  epsilonEdge b e

-- -----------------------------------------------------------------------------
-- NFA creation monad.

-- Partial credit to Thomas Hallgren for this code, as I adapted it from
-- his "Lexing Haskell in Haskell" lexer generator.

type MapNFA = Map SNum NState

newtype NFAM a = N {unN :: SNum -> MapNFA -> (SNum, MapNFA, a)}

instance Monad NFAM where
  return a = N $ \s n -> (s,n,a)

  m >>= k  = N $ \s n -> case unN m s n of
				 (s', n', a) -> unN (k a) s' n'

runNFA :: NFAM () -> NFA
runNFA m = case unN m 0 Map.empty of
		(s, nfa_map, ()) -> -- trace (show (Map.toAscList nfa_map)) $ 
				    e_close (array (0,s-1) (Map.toAscList nfa_map))

e_close:: Array Int NState -> NFA
e_close ar = listArray bds
		[NSt accs (out gr v) outs|(v,NSt accs _ outs)<-assocs ar]
	where
	gr = t_close (hi+1,\v->nst_cl (ar!v))
	bds@(_,hi) = bounds ar

newState :: NFAM SNum
newState = N $ \s n -> (s+1,n,s)

charEdge :: SNum -> CharSet -> SNum -> NFAM ()
charEdge from charset to = N $ \s n -> (s, addEdge n, ())
 where
   addEdge n =
     case Map.lookup from n of
       Nothing -> 
	   Map.insert from (NSt [] [] [(charset,to)]) n
       Just (NSt acc eps trans) ->
	   Map.insert from (NSt acc eps ((charset,to):trans)) n

epsilonEdge :: SNum -> SNum -> NFAM ()
epsilonEdge from to 
 | from == to = return ()
 | otherwise  = N $ \s n -> (s, addEdge n, ())
 where
   addEdge n =
     case Map.lookup from n of
       Nothing 			-> Map.insert from (NSt [] [to] []) n
       Just (NSt acc eps trans) -> Map.insert from (NSt acc (to:eps) trans) n

accept :: SNum -> Accept Code -> NFAM ()
accept state new_acc = N $ \s n -> (s, addAccept n, ())
 where
   addAccept n = 
     case Map.lookup state n of
       Nothing ->
	   Map.insert state (NSt [new_acc] [] []) n
       Just (NSt acc eps trans) ->
	   Map.insert state (NSt (new_acc:acc) eps trans) n


rctxt_accept :: Accept Code
rctxt_accept = Acc 0 Nothing Nothing NoRightContext