-- GenI surface realiser
-- Copyright (C) 2005 Carlos Areces and Eric Kow
--
-- This program is free software; you can redistribute it and/or
-- modify it under the terms of the GNU General Public License
-- as published by the Free Software Foundation; either version 2
-- of the License, or (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE DeriveDataTypeable #-}

{-|
The heavy lifting of GenI, the whole chart/agenda mechanism, can be
implemented in many ways.  To make it easier to write different
algorithms for GenI and compare them, we provide a single interface
for what we call Builders.

This interface is then used called by the Geni module and by the
graphical interface.  Note that each builder has its own graphical
interface and that we do a similar thing in the graphical interface
code to make it possible to use these GUIs.
-}

module NLP.GenI.Builder (
 TagDerivation, Builder(..), GenStatus(..),
 lexicalSelection, FilterStatus(..),incrCounter, num_iterations,
 (>-->),
 num_comparisons, chart_size,
 SemBitMap, defineSemanticBits, semToBitVector, bitVectorToSem, DispatchFilter, condFilter,
 defaultStepAll,
 BuilderState, UninflectedDisjunction(..), Input(..), unlessEmptySem,
 initStats, Output, SentenceAut, run, queryCounter, defaultMetricNames, preInit
)
where

import Control.Monad.State.Strict
import Data.Bits ( (.&.), (.|.), bit )
import Data.List ( delete, sort, nub )
import Data.Maybe ( mapMaybe, fromMaybe, maybeToList )
import Data.Tree ( flatten )
import Prelude hiding ( init )
import Data.Text ( Text )
import qualified Data.Map as Map
import qualified Data.Text as T

import Control.DeepSeq

import Data.Generics ( Data )
import Data.Typeable ( Typeable )

import NLP.GenI.Automaton (NFA, automatonPaths, automatonPathSets, numStates, numTransitions)
import NLP.GenI.Configuration
  ( getListFlagP, getFlagP, modifyFlagP, Params,
    DetectPolaritiesFlg(..),
    MetricsFlg(..),
    RootFeatureFlg(..),
    Optimisation(..), hasOpt,
  )
import NLP.GenI.FeatureStructure ( Flist, sortFlist, mkFeatStruct )
import NLP.GenI.General ( BitVector, snd3, thd3, geniBug )
import NLP.GenI.GeniVal ( GeniVal, DescendGeniVal(..), Collectable(collect), finaliseVarsById )
import NLP.GenI.Lexicon ( LexEntry )
import NLP.GenI.Morphology.Types
import NLP.GenI.Polarity  (PolResult(..), buildAutomaton, detectPolPaths)
import NLP.GenI.Pretty
import NLP.GenI.Semantics ( SemInput, Sem, Literal )
import NLP.GenI.Statistics (Statistics, incrIntMetric,
                   Metric(IntMetric), updateMetrics,
                   queryMetrics, queryIntMetric,
                   addMetric, emptyStats,
                   )
import NLP.GenI.Tag
    ( TagElem(idname,tsemantics,ttree), setTidnums
    , TagDerivation, dsChild, dsParent
    )
import NLP.GenI.TreeSchema ( GNode(..), GType(Subs, Foot) )

data GenStatus = Finished
               | Active
               | Error Text

data Builder st it pa = Builder
  { init     :: Input -> pa -> (st, Statistics)
             -- ^ initialise the machine from the semantics and lexical selection 
  , step     :: BuilderState st () -- ^ run a realisation step
  , stepAll  :: BuilderState st () -- ^ run all realisations steps until completion
  --
  , finished :: st -> GenStatus    -- ^ determine if realisation is finished
  , unpack   :: st -> [Output]     -- ^ unpack chart results into a list of sentences
  , partial  :: st -> [Output]
  }

type Output = (Integer, LemmaPlusSentence, TagDerivation)

-- | To simplify interaction with the backend, we provide a single data
--   structure which represents all the inputs a backend could take.

data Input = 
  Input { inSemInput :: SemInput
        , inLex      :: [LexEntry] -- ^ for the debugger
        , inCands    :: [(TagElem, BitVector)]   -- ^ tag tree
        }

-- Uninflected words and sentences

-- | A SentenceAut represents a set of sentences in the form of an automaton.
--   The labels of the automaton are the words of the sentence.  But note! 
--   “word“ in the sentence is in fact a tuple (lemma, inflectional feature
--   structures).  Normally, the states are defined as integers, with the
--   only requirement being that each one, naturally enough, is unique.
type SentenceAut            = NFA Int LemmaPlus

data UninflectedDisjunction = UninflectedDisjunction [Text] (Flist GeniVal) deriving (Data, Typeable)

instance DescendGeniVal UninflectedDisjunction where
  descendGeniVal s (UninflectedDisjunction a v) = {-# SCC "descendGeniVal" #-} UninflectedDisjunction a (descendGeniVal s v)

instance Collectable UninflectedDisjunction where
  collect (UninflectedDisjunction _ b) = collect b

-- BuilderState

-- To cleanly seperate the tracking of statistics from the core functionality of a
-- builder, we use a State transformer to thread a Statistics state monad inside of
-- our main monad.
type BuilderState s a = StateT s (State Statistics) a

-- ----------------------------------------------------------------------
-- Helper functions for Builders
-- ----------------------------------------------------------------------

-- Initialisation
--
-- There's a few things that need to be run before even initialising the builder.
-- One of these is running some of the optimisations (namely the polarity stuff),
-- which is made complicated by the fact that they are optional.  Another of these
-- to assign each of the trees with a unique ID.  Note that this has to be done
-- after the polarity optimisation because this optimisation may introduce new
-- items into the lexical selection.  Finally, we must also make sure we perform
-- alpha conversion so that unification does not do the wrong thing when two trees
-- have the same variables.

preInit :: Input -> Params -> (Input, PolResult)
preInit input config =
 let (cand,_) = unzip $ inCands input
     seminput = inSemInput input
     --
     extraPol = Map.empty
     polsToDetect = fromMaybe (error "there should be a default for --detect-pols")
                  $ getFlagP DetectPolaritiesFlg config
     rootFeat = mkFeatStruct $ getListFlagP RootFeatureFlg config
     -- do any optimisations
     isPol = hasOpt Polarised config
     -- polarity optimisation (if enabled)
     autstuff = buildAutomaton polsToDetect rootFeat extraPol seminput cand
     autpaths = map concat . automatonPathSets . prFinal $ autstuff
     combosPol = if isPol then autpaths else [considerHasSem cand]
     considerHasSem = filter (not . null . tsemantics)
     -- polarity automaton construction uses the zero literal semantic
     -- items, but it may be safer to filter them out now if we are not
     -- using it
     -- chart sharing optimisation
     (cands2, pathIds) = unzip $ detectPolPaths combosPol
     --
     fixate ts ps = zip (map finaliseVarsById $ setTidnums ts) ps
     input2 = input { inCands    = fixate cands2 pathIds
                    , inSemInput = (prSem autstuff, snd3 seminput, thd3 seminput) }
     -- note: autstuff is only useful for the graphical debugger
  in (input2, autstuff)

-- | Equivalent to 'id' unless the input contains an empty or uninstatiated
--   semantics
unlessEmptySem :: Input -> Params -> a -> a
unlessEmptySem input _
    | null semanticsErr = id
    | otherwise         = error semanticsErr
  where
    (cands,_) = unzip $ inCands input
    nullSemCands   = [ idname t | t <- cands, (null.tsemantics) t ]
    unInstSemCands = [ idname t | t <- cands, not $ Map.null $ collect (tsemantics t) Map.empty ]
    nullSemErr     =
        "The following trees have a null semantics: " ++
         T.unpack (T.unwords nullSemCands)
    unInstSemErr   =
        "The following trees have an uninstantiated semantics: " ++
        T.unpack (T.unwords unInstSemCands)
    semanticsErr =
        (if null nullSemCands   then "" else nullSemErr ++ "\n") ++
        (if null unInstSemCands then "" else unInstSemErr)

-- ----------------------------------------------------------------------
-- Running a surface realiser
-- ----------------------------------------------------------------------

-- | Performs surface realisation from an input semantics and a lexical selection.
--
--   Statistics tracked
--
--    * pol_used_bundles - number of bundled paths through the polarity automaton.
--                         see 'NLP.GenI.Automaton.automatonPathSets'
--
--    * pol_used_paths - number of paths through the final automaton
--
--    * pol_seed_paths - number of paths through the seed automaton (i.e. with no polarities).
--                       This is normally just 1, unless you have multi-literal semantics
--
--    * pol_total_states - combined number of states in the all the polarity automata
--
--    * pol_total_tras - combined number of transitions in all polarity automata
--
--    * pol_max_states - number of states in the polarity automaton with the most states
--
--    * pol_total_tras - number of transitions in the polarity automata with the most transitions
--
--    * sem_literals    - number of literals in the input semantics
--
--    * lex_trees       - total number of lexically selected trees

--    * lex_foot_nodes  - total number of nodes of any sort in lexically selected trees
--
--    * lex_subst_nodes - total number of sustitution nodes in lexically selected trees
--
--    * lex_foot_nodes  - total number of foot nodes in lexically selected trees
--
--    * plex_...        - same as the lex_ equivalent, but after polarity filtering
run :: Builder st it Params -> Input -> Params -> (st, Statistics)
run builder input config_ =
  let -- 0 normalise the config
      config = modifyFlagP RootFeatureFlg sortFlist config_
      -- 1 run the setup stuff
      (input2, autstuff) = preInit input config
      auts = map snd3 (prIntermediate autstuff)
      -- 2 call the init stuff
      (iSt, iStats) = init builder input2 config
      -- 2b extra statistics
      autpaths = map concat . automatonPathSets . prFinal $ autstuff
      countsFor ts = (length ts, length nodes, length sn, length an)
        where nodes = concatMap (flatten.ttree) ts
              sn = [ n | n <- nodes, gtype n == Subs  ]
              an = [ n | n <- nodes, gtype n == Foot  ]
      (tsem,_,_) = inSemInput input
      cands = nub . map fst $ inCands input
      cands2 = nub . concatMap concat . automatonPathSets . prFinal $ autstuff
      countUp = do incrCounter "sem_literals"  $ length tsem
                   --
                   incrCounter "lex_subst_nodes" snl
                   incrCounter "lex_foot_nodes"  anl
                   incrCounter "lex_nodes"        nl
                   incrCounter "lex_trees"        tl
                   -- node count after polarities are taken into account
                   incrCounter "plex_subst_nodes" snl2
                   incrCounter "plex_foot_nodes"  anl2
                   incrCounter "plex_nodes"        nl2
                   incrCounter "plex_trees"        tl2
                where (tl , nl , snl , anl ) = countsFor cands
                      (tl2, nl2, snl2, anl2) = countsFor cands2
      -- 3 step through the whole thing
      stepAll_ = do countUp
                    incrCounter "pol_used_bundles" $ length autpaths
                    incrCounter "pol_used_paths"   $ length . automatonPaths . prFinal   $ autstuff
                    incrCounter "pol_seed_paths"   $ length . automatonPaths . prInitial $ autstuff
                    incrCounter "pol_total_states" $ sum $ map numStates auts
                    incrCounter "pol_total_trans"  $ sum $ map numTransitions auts
                    incrCounter "pol_max_states"   $ maximum $ map numStates auts
                    incrCounter "pol_max_trans"    $ maximum $ map numTransitions auts
                    stepAll builder
  in runState (execStateT stepAll_ iSt) iStats

-- ----------------------------------------------------------------------
-- Semantics and bit vectors
-- ----------------------------------------------------------------------

type SemBitMap = Map.Map (Literal GeniVal) BitVector

-- | assign a bit vector value to each literal in the semantics
-- the resulting map can then be used to construct a bit vector
-- representation of the semantics
defineSemanticBits :: Sem -> SemBitMap
defineSemanticBits sem = Map.fromList $ zip sem bits
  where
   bits = map bit [0..] -- 0001, 0010, 0100...

semToBitVector :: SemBitMap -> Sem -> BitVector
semToBitVector bmap sem = foldr (.|.) 0 $ map doLookup sem
  where doLookup p =
         case Map.lookup p bmap of
         Nothing -> geniBug $ "predicate " ++ prettyStr p ++ " not found in semanticBit map"
         Just b  -> b

bitVectorToSem :: SemBitMap -> BitVector -> Sem
bitVectorToSem bmap vector =
  mapMaybe tryKey $ Map.toList bmap
  where tryKey (p,k) = if (k .&. vector == k) then Just p else Nothing

-- ----------------------------------------------------------------------
-- Generate step
-- ----------------------------------------------------------------------

-- | Default implementation for the 'stepAll' function in 'Builder'
defaultStepAll :: Builder st it pa -> BuilderState st ()
defaultStepAll b =
 do s <- get
    case finished b s of
      Active -> step b >> defaultStepAll b
      _      -> return ()

-- | Dispatching consists of assigning a chart item to the right part of the
--   chart (agenda, trash, results list, etc).  This is implemented as a
--   series of filters which can either fail or succeed.  If a filter fails,
--   it may modify the item before passing it on to future filters.
type DispatchFilter s a = a -> s (FilterStatus a)

data FilterStatus a = Filtered | NotFiltered a

-- | Sequence two dispatch filters.
(>-->) :: (Monad s) => DispatchFilter s a -> DispatchFilter s a -> DispatchFilter s a
f >--> f2 = \x -> f x >>= next
 where
  next y@Filtered = return y
  next (NotFiltered x2) = f2 x2

-- | If the item meets some condition, use the first filter, otherwise
--   use the second one.
condFilter :: (Monad s) => (a -> Bool)
           -> DispatchFilter s a -> DispatchFilter s a
           -> DispatchFilter s a
condFilter cond f1 f2 = \x -> if cond x then f1 x else f2 x

-- ----------------------------------------------------------------------
-- Statistics
-- ----------------------------------------------------------------------

modifyStats :: (Metric -> Metric) -> BuilderState st ()
modifyStats fn = lift $ modify $ updateMetrics fn

incrCounter :: String -> Int -> BuilderState st ()
incrCounter key n = modifyStats (incrIntMetric key n)

queryCounter :: String -> Statistics -> Maybe Int
queryCounter key s =
  case queryMetrics (queryIntMetric key) s of
  []  -> Nothing
  [c] -> Just c
  _   -> geniBug $ "More than one instance of the metric: " ++ key

-- Command line configuration

initStats :: Params -> Statistics
initStats pa =
 let mdefault ms = if "default" `elem` ms then defaultMetricNames else []
     identifyMs :: [String] -> [Metric]
     identifyMs ms = map namedMetric $ mdefault ms ++ delete "default" ms
     metrics = identifyMs $ fromMaybe [] $ getFlagP MetricsFlg pa
 in execState (mapM_ addMetric metrics) emptyStats

namedMetric :: String -> Metric
-- the default case is that it's an int metric
namedMetric n = IntMetric n 0

-- Note that the strings here are command-line strings, not metric names!
defaultMetricNames :: [ String ]
defaultMetricNames = [ num_iterations, chart_size, num_comparisons, gen_time ]

-- Common counters
--
-- These numbers allow us to keep track of how efficient our generator is
-- and where we are in the process (how many steps we've taken, etc)

num_iterations, chart_size, num_comparisons, gen_time :: String

num_iterations  = "iterations"
chart_size      = "chart_size"
num_comparisons = "comparisons"
gen_time = "gen_time"

-- ----------------------------------------------------------------------
-- strictly API-ish bits
-- ----------------------------------------------------------------------

-- | The names of lexically selected chart items used in a derivation
lexicalSelection :: TagDerivation -> [Text]
lexicalSelection = sort . nub
                 . concatMap (\d -> dsChild d : maybeToList (dsParent d))

{-!
deriving instance NFData Input
!-}

-- GENERATED START

 
instance NFData Input where
        rnf (Input x1 x2 x3) = rnf x1 `seq` rnf x2 `seq` rnf x3 `seq` ()

-- GENERATED STOP