src/NLP/GenI/Simple/SimpleBuilder.lhs

% GenI surface realiser % Copyright (C) 2005 Carlos Areces and Eric Kow % % This program is free software; you can redistribute it and/or % modify it under the terms of the GNU General Public License % as published by the Free Software Foundation; either version 2 % of the License, or (at your option) any later version. % % This program is distributed in the hope that it will be useful, % but WITHOUT ANY WARRANTY; without even the implied warranty of % MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the % GNU General Public License for more details. % % You should have received a copy of the GNU General Public License % along with this program; if not, write to the Free Software % Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. \chapter{SimpleBuilder} \label{cha:SimpleBuilder} A SimpleBuilder is a Builder which constructs derived trees using a simple agenda control mechanism and two-phase realisation (substitution before adjunction). There is no packing strategy whatsoever; each chart item is a derived tree. \begin{code}

{-# OPTIONS_GHC -fglasgow-exts #-}
-- {-# LANGUAGE LiberalTypeSynonyms #-}
module NLP.GenI.Simple.SimpleBuilder (
   -- Types
   Agenda, AuxAgenda, Chart, SimpleStatus, SimpleState,
   SimpleItem(..),

   -- From SimpleStatus
   simpleBuilder_1p, simpleBuilder_2p, simpleBuilder,
   theAgenda, theAuxAgenda, theChart, theResults,
   initSimpleBuilder,
   addToAgenda, addToChart,
   genconfig,

#ifndef DISABLE_GUI
   SimpleGuiItem(..),
   theTrash, unpackResult,
#endif
   )
where

\end{code} \ignore{ \begin{code}

import Control.Monad (when, liftM2)
import Control.Monad.State
  (get, put, modify, gets, runState, execStateT)

import Data.List
  (partition, delete, foldl', unfoldr, sortBy)
import Data.Maybe (isJust, isNothing)
import Data.Ord (comparing)
import Data.Bits
import qualified Data.Map as Map
import Data.Tree

import NLP.GenI.Statistics (Statistics)

import NLP.GenI.Automaton ( automatonPaths, NFA(..), addTrans )
import NLP.GenI.Btypes
  ( Ptype(Initial,Auxiliar)
  , Replacable(..), replaceOneAsMap
  , GNode(..), NodeName
  , root, foot
  , plugTree, spliceTree
  , unifyFeat, Flist, Subst, mergeSubst
  )
import NLP.GenI.Builder (
    incrCounter, num_iterations, num_comparisons, chart_size,
    SemBitMap, defineSemanticBits, semToBitVector, bitVectorToSem,
    DispatchFilter, (>-->), condFilter, nullFilter,
    semToIafMap, IafAble(..), IafMap, fromUniConst, getIdx,
    recalculateAccesibility, iafBadSem, ts_iafFailure,
    )
import qualified NLP.GenI.Builder as B

import NLP.GenI.Tags (TagElem, TagSite(TagSite),
             tagLeaves, tidnum,
             ttree, ttype, tsemantics,
             detectSites,
             TagDerivation,
             ts_rootFeatureMismatch,
            )
import NLP.GenI.Configuration
import NLP.GenI.General
 ( BitVector, mapMaybeM, mapTree', geniBug, preTerminals, )

#ifndef DISABLE_GUI
import NLP.GenI.Btypes ( GType(Other), sortSem, Sem, gnnameIs )
import NLP.GenI.General ( repList, )
import NLP.GenI.Tags ( idname,
    ts_synIncomplete, ts_semIncomplete, ts_tbUnificationFailure,
    )
#endif

\end{code} } % -------------------------------------------------------------------- \section{The Builder interface} % -------------------------------------------------------------------- Here is our implementation of Builder. \begin{code}

type SimpleBuilder = B.Builder SimpleStatus SimpleItem Params
simpleBuilder_2p, simpleBuilder_1p :: SimpleBuilder
simpleBuilder_2p = simpleBuilder True
simpleBuilder_1p = simpleBuilder False

simpleBuilder :: Bool -> SimpleBuilder
simpleBuilder twophase = B.Builder
  { B.init     = initSimpleBuilder twophase
  , B.step     = if twophase then generateStep_2p else generateStep_1p
  , B.stepAll  = B.defaultStepAll (simpleBuilder twophase)
  , B.finished = \s -> (null.theAgenda) s && (not twophase || step s == Auxiliar)
  , B.unpack   = unpackResults.theResults
  , B.partial  = unpackResults.partialResults
  }

\end{code} % -------------------------------------------------------------------- \section{Key types} % -------------------------------------------------------------------- \begin{code}

type Agenda = [SimpleItem]
type AuxAgenda  = [SimpleItem]
type Chart  = [SimpleItem]
#ifndef DISABLE_GUI
type Trash = [SimpleItem]
#endif

\end{code} \subsection{SimpleState and SimpleStatus} The \fnreflite{SimpleState} is a state monad where the state being thread through is a \fnreflite{SimpleStatus}. The two are named deliberately alike to indicate their close relationship. To prevent confusion, we ought to keep a somewhat consistent naming scheme across the builders: FooState for the state monad, FooStatus for the state monad's ``contents'', and FooItem for the chart items manipulated. Note the theTrash is not actually essential to the operation of the generator; it is for pratical debugging of grammars. Instead of trees dissapearing off the face of the debugger; they go into the trash where the user can inspect them and try to figure out why they went wrong. \begin{code}

type SimpleState a = B.BuilderState SimpleStatus a

data SimpleStatus = S
  { theAgenda    :: Agenda
  , theAuxAgenda :: AuxAgenda
  , theChart     :: Chart
#ifndef DISABLE_GUI
  , theTrash   :: Trash
#endif
  , theResults :: [SimpleItem]
  , theIafMap  :: IafMap -- for index accessibility filtering
  , tsem       :: BitVector
  , step       :: Ptype
  , gencounter :: Integer
  , genconfig  :: Params
  -- we keep a SemBitMap strictly to help display the semantics
  , semBitMap  :: SemBitMap
  }
  deriving Show

\end{code} \subsubsection{SimpleStatus updaters} \begin{code}

addToAgenda :: SimpleItem -> SimpleState ()
addToAgenda te = do
  modify $ \s -> s{theAgenda = te:(theAgenda s) }

updateAgenda :: Agenda -> SimpleState ()
updateAgenda a = do
  modify $ \s -> s{theAgenda = a}

addToAuxAgenda :: SimpleItem -> SimpleState ()
addToAuxAgenda te = do
  s <- get
  -- each new tree gets a unique id... this makes comparisons faster
  let counter = (gencounter s) + 1
      te2 = te { siId = counter }
  put s{gencounter = counter,
        theAuxAgenda = te2:(theAuxAgenda s) }

addToChart :: SimpleItem -> SimpleState ()
addToChart te = do
  modify $ \s -> s { theChart = te:(theChart s) }
  incrCounter chart_size 1

#ifndef DISABLE_GUI
addToTrash :: SimpleItem -> String -> SimpleState ()
addToTrash te err = do
  let te2 = modifyGuiStuff (\g -> g { siDiagnostic = err:(siDiagnostic g) }) te
  modify $ \s -> s { theTrash = te2 : (theTrash s) }
#endif

addToResults :: SimpleItem -> SimpleState ()
addToResults te = do
  modify $ \s -> s { theResults = te : (theResults s) }

\end{code} \subsection{SimpleItem} \begin{code}

data SimpleItem = SimpleItem
 { siId        :: ChartId
 --
 , siSubstnodes :: ![TagSite]
 , siAdjnodes   :: ![TagSite]
 --
 , siSemantics :: !BitVector
 , siPolpaths  :: !BitVector
 -- for generation sans semantics
 -- , siAdjlist :: [(String,Integer)] -- (node name, auxiliary tree id)
 -- for index accesibility filtering (one-phase only)
 , siAccesible    :: [ String ] -- it's acc/inacc/undetermined
 , siInaccessible :: [ String ] -- that's why you want both
 --
 -- | actually: a set of pre-terminals and their leaves
 , siLeaves  :: [(String, B.UninflectedDisjunction)]
 , siDerived :: Tree String
 , siRoot    :: TagSite
 , siFoot    :: Maybe TagSite
 --
 , siPendingTb :: [ TagSite ] -- only for one-phase
 -- how was this item produced?
 , siDerivation :: TagDerivation
#ifndef DISABLE_GUI
 -- for the debugger only
 , siGuiStuff :: SimpleGuiItem
#endif
 } deriving Show

#ifndef DISABLE_GUI
-- | Things whose only use is within the graphical debugger
data SimpleGuiItem = SimpleGuiItem
 { siHighlight :: [String] -- ^ nodes to highlight
 , siNodes :: [GNode]    -- ^ actually a set
 -- if there are things wrong with this item, what?
 , siDiagnostic :: [String]
 , siFullSem :: Sem
 , siIdname  :: String
 } deriving Show

modifyGuiStuff :: (SimpleGuiItem -> SimpleGuiItem) -> SimpleItem -> SimpleItem
modifyGuiStuff fn i = i { siGuiStuff = fn . siGuiStuff $ i }
#endif

type ChartId = Integer

instance Replacable SimpleItem where
  replaceMap s i =
    i { siSubstnodes = replaceMap s (siSubstnodes i)
      , siAdjnodes   = replaceMap s (siAdjnodes i)
      , siLeaves  = replaceMap s (siLeaves i)
      , siRoot    = replaceMap s (siRoot i)
      , siFoot    = replaceMap s (siFoot i)
      , siPendingTb = replaceMap s (siPendingTb i)
#ifndef DISABLE_GUI
      , siGuiStuff = replaceMap s (siGuiStuff i)
#endif
     }
  replaceOne = replaceOneAsMap

#ifndef DISABLE_GUI
instance Replacable SimpleGuiItem where
 replaceMap s i = i { siNodes = replaceMap s (siNodes i) }
 replaceOne = replaceOneAsMap
#endif

\end{code} \begin{code}

{-# INLINE closedAux #-}

-- | True if the chart item has no open substitution nodes
closed :: SimpleItem -> Bool
closed = null.siSubstnodes

-- | True if the chart item is an auxiliary tree
aux :: SimpleItem -> Bool
aux = isJust . siFoot

-- | True if both 'closed' and 'aux' are True
closedAux :: SimpleItem -> Bool
closedAux x = (aux x) && (closed x)

adjdone :: SimpleItem -> Bool
adjdone = null.siAdjnodes

siInitial :: SimpleItem -> Bool
siInitial =  isNothing . siFoot

\end{code} % -------------------------------------------------------------------- \section{Initialisation} % -------------------------------------------------------------------- \begin{code}

-- | Creates an initial SimpleStatus.
initSimpleBuilder ::  Bool -> B.Input -> Params -> (SimpleStatus, Statistics)
initSimpleBuilder twophase input config =
  let cands   = map (initSimpleItem bmap) $ B.inCands input
      (sem,_,_) = B.inSemInput input
      bmap    = defineSemanticBits sem
      -- FIXME: I don't know if this matters for one-phase
      -- because of on-the-fly tb unification (in 2p), we
      -- need an initial tb step that only addresses the
      -- nodes with null adjunction constraints
      simpleDp = if twophase then simpleDispatch_2p
                 else simpleDispatch_1p (isIaf config)
      initialDp = dpTbFailure >--> simpleDp
      --
      initS = S{ theAgenda    = []
               , theAuxAgenda = []
               , theChart     = []
#ifndef DISABLE_GUI
               , theTrash     = []
#endif
               , theResults   = []
               , semBitMap = bmap
               , tsem      = semToBitVector bmap sem
               , theIafMap = semToIafMap sem
               , step     = Initial
               , gencounter = toInteger $ length cands
               , genconfig  = config }
      --
  in B.unlessEmptySem input config $
     runState (execStateT (mapM initialDp cands) initS) (B.initStats config)


initSimpleItem :: SemBitMap -> (TagElem, BitVector) -> SimpleItem
initSimpleItem bmap (teRaw,pp) =
 let (te,tlite) = renameNodesWithTidnum teRaw in
 case detectSites (ttree te) of
 (snodes,anodes,nullAdjNodes) -> setIaf $ SimpleItem
  { siId        = tidnum te
  , siSemantics = semToBitVector bmap (tsemantics te)
  , siSubstnodes = snodes
  , siAdjnodes   = anodes
  , siPolpaths  = pp
  -- for index accesibility filtering
  , siAccesible    = [] -- see below
  , siInaccessible = []
  -- for generation sans semantics
  -- , siAdjlist = []
  , siLeaves  = tagLeaves te
  , siDerived = tlite
  , siRoot = ncopy.root $ theTree
  , siFoot = if ttype te == Initial then Nothing
             else Just . ncopy.foot $ theTree
  , siDerivation = []
  -- note: see comment in initSimpleBuilder re: tb unification
  , siPendingTb = nullAdjNodes
  --
#ifndef DISABLE_GUI
  , siGuiStuff = initSimpleGuiItem te
#endif
  }
  where setIaf i = i { siAccesible = iafNewAcc i }
        theTree = ttree te

#ifndef DISABLE_GUI
initSimpleGuiItem :: TagElem -> SimpleGuiItem
initSimpleGuiItem te = SimpleGuiItem
 { siHighlight = []
 , siNodes = flatten.ttree $ te
 , siDiagnostic = []
 , siFullSem = tsemantics te
 , siIdname = idname te }
#endif

renameNodesWithTidnum :: TagElem -> (TagElem, Tree NodeName)
renameNodesWithTidnum te =
  ( te { ttree = mapTree' renameNode theTree }
  , mapTree' newName theTree )
  where theTree = ttree te
        renameNode n = n { gnname = newName n }
        newName n = gnname n ++ "-" ++ tidstr
        tidstr = show . tidnum $ te

\end{code} % -------------------------------------------------------------------- \section{Generate} % -------------------------------------------------------------------- \subsection{One-phase generation} This is a standard chart-and-agenda mechanism, where each iteration consists of picking an item off the agenda and combining it with elements from the chart. \begin{code}

generateStep_1p :: SimpleState ()
generateStep_1p =
 do isDone <- gets (null.theAgenda)
    iaf <- gets (isIaf.genconfig)
    let dispatch = mapM (simpleDispatch_1p iaf)
    if isDone
       then return ()
       else do incrCounter num_iterations 1
               given <- selectGiven
               -- do both substitution and adjunction
               applySubstitution1p given >>= dispatch
               passiveAdjunction1p given >>= dispatch
               activeAdjunction1p  given >>= dispatch
               sansAdjunction1p    given >>= dispatch
               -- determine which of the res should go in the agenda
               -- (monadic state) and which should go in the result (res')
               addToChart given

\end{code} \subsection{Two-phase generation} Following \cite{carroll1999ecg}, we could also separate realisation into two distinct phases. This requires that we maintain two seperate agendas and process them sequentially, one loop after the other. See \fnref{switchToAux} for details. \begin{itemize} \item If both Agenda and AuxAgenda are empty then there is nothing to do, otherwise, if Agenda is empty then we switch to the application of the Adjunction rule. \item After the rule is applied we classify solutions into those that are complete and cover the semantics and those that don't. The first ones are returned and added to the result, while the others are sent back to Agenda. \item Notice that if we are applying the Substitution rule then the current agenda item is added to the chart, otherwise it is deleted. \end{itemize} \begin{code}

generateStep_2p :: SimpleState ()
generateStep_2p = do
  nir     <- gets (null.theAgenda)
  curStep <- gets step
  -- this check may seem redundant with generate, but it's needed
  -- to protect against a user who calls generateStep_2p on a finished
  -- state
  if (nir && curStep == Auxiliar)
    then return ()
    else do incrCounter num_iterations 1
            -- this triggers exactly once in the whole process
            if nir
               then switchToAux
               else generateStep_2p'

generateStep_2p' :: SimpleState ()
generateStep_2p' =
  do -- choose an item from the agenda
     given <- selectGiven
     -- have we triggered the switch to aux yet?
     curStep <- gets step
     -- do either substitution or adjunction
     res <- if (curStep == Initial)
            then applySubstitution given
            else liftM2 (++) (sansAdjunction2p given) (applyAdjunction2p given)

     -- determine which of the res should go in the agenda
     -- (monadic state) and which should go in the result (res')
     mapM simpleDispatch_2p res
     -- put the given into the chart untouched
     if (curStep == Initial)
        then addToChart given
        else when (adjdone given) $ trashIt given

\end{code} \subsection{Helpers for the generateSteps} \begin{code}

trashIt :: SimpleItem -> SimpleState ()
#ifdef DISABLE_GUI
trashIt _ = return ()
#else
trashIt item =
 do s <- get
    let bmap = semBitMap s
        itemSem = siSemantics item
        inputSem = tsem s
        reason = if inputSem == itemSem
                    then "unknown reason!"
                    else ts_semIncomplete $ bitVectorToSem bmap $ inputSem `xor` itemSem
    addToTrash item reason
#endif

-- | Arbitrarily selects and removes an element from the agenda and
--   returns it.
selectGiven :: SimpleState SimpleItem
selectGiven = do
  agenda <- gets theAgenda
  case agenda of
   []        -> geniBug "null agenda in selectGiven"
   (a:atail) -> updateAgenda atail >> return a

\end{code} \subsection{Switching phases} \fnlabel{switchToAux} When all substitutions has been done, tags with substitution nodes still open are deleted, then the auxiliars tags are put in Chart and the (initial) tags in the repository are moved into the Agenda. The step is then changed to Auxiliary \begin{code}

switchToAux :: SimpleState ()
switchToAux = do
  st <- get
  let chart  = theChart st
      config = genconfig st
      -- You might be wondering why we ignore the auxiliary trees in the
      -- chart; this is because all the syntactically complete auxiliary
      -- trees have already been filtered away by calls to classifyNew
      initialT  = filter siInitial chart
      res1@(compT1, incompT1) =
         partition (null.siSubstnodes) initialT
      --
      auxAgenda = theAuxAgenda st
      (compT2, incompT2) =
        if semfiltered config
        then semfilter (tsem st) auxAgenda compT1
        else res1
      --
      compT = compT2
  put st{ theAgenda = []
        , theAuxAgenda = []
        , theChart = auxAgenda
        , step = Auxiliar}
  -- the root cat filter by Claire
  let switchFilter =
        if rootcatfiltered config
        then dpRootFeatFailure2 >--> dpToAgenda
        else dpToAgenda
  mapM switchFilter compT
  -- toss the syntactically incomplete stuff in the trash
#ifndef DISABLE_GUI
  mapM (\t -> addToTrash t ts_synIncomplete) incompT1
  mapM (\t -> addToTrash t "sem-filtered") incompT2
#endif
  return ()

\end{code} \subsubsection{SemFilter Optimisation} \label{sec:semfilter} The purpose of the semantic filter optimisation is to take full advantage of Carroll's delayed adjunction. Consider the semantics \semexpr{def(m), poor(m), brokenhearted(m), man(m), def(w), woman(w), beautiful(w), heartless(w), rejects(w,m)}. At the switchToAux step, we are left with the initial trees \natlang{man}, \natlang{woman}, \natlang{the woman rejects the man}. It would be nice to filter out the structures \natlang{man} and \natlang{woman} since we know that they are not going to be semantically complete even with adjunction. More precisely, on the switch to adjunction, we do the following: \begin{itemize} \item Take the union of the semantics of all auxiliary trees; which we call $\phi^*$ \item Delete any initial tree with semantics $\phi^s$ such that $\phi^s \cup \phi^*$ is not the target semantics \end{itemize} In other words, we delete all initial trees that cannot produce a semantically complete result even with the help of auxiliary trees. FIXME: comment 2006-04-18: sem filter each polarity path separately (this is more aggressive; it gives us much more filtering) \begin{code}

semfilter :: BitVector -> [SimpleItem] -> [SimpleItem] -> ([SimpleItem], [SimpleItem])
semfilter inputsem auxs initial =
  let auxsem x = foldl' (.|.) 0 [ siSemantics a | a <- auxs, siPolpaths a .&. siPolpaths x /= 0 ]
      -- lite, here, means sans auxiliary semantics
      notjunk x = (siSemantics x) .&. inputsemLite == inputsemLite
                  where inputsemLite = inputsem `xor` (auxsem x)
      -- note that we can't just compare against siSemantics because
      -- that would exclude trees that have stuff in the aux semantics
      -- which would be overzealous
  in partition notjunk initial

\end{code} % -------------------------------------------------------------------- \section{Operations} % -------------------------------------------------------------------- We implement the two TAG operations, substitution and adjunction, below. These are the only two operations we have, because we're working with a very simple builder that constructs derived trees. % -------------------------------------------------------------------- \subsection{Substitution} \label{sec:substitution} % -------------------------------------------------------------------- \paragraph{applySubstitution} Given a SimpleItem it returns the list of all possible substitutions between it and the elements in Chart \begin{code}

applySubstitution :: SimpleItem -> SimpleState ([SimpleItem])
applySubstitution item =
 do gr <- lookupChart item
    active  <- mapM (\x -> iapplySubst True item x) gr
    passive <- mapM (\x -> iapplySubst True x item) gr
    let res = concat $ active ++ passive
    incrCounter num_comparisons (2 * (length gr))
    return res

applySubstitution1p :: SimpleItem -> SimpleState ([SimpleItem])
applySubstitution1p item =
 do gr <- lookupChart item
    active  <- if adjdone item then return []
               else mapM (\x -> iapplySubst False item x) gr
    passive <- mapM (\x -> iapplySubst False x item) $ filter adjdone gr
    let res = concat $ active ++ passive
    incrCounter num_comparisons (2 * (length gr))
    return res

-- | Note: returns ONE possible substitution (the head node)
--   of the first in the second.  As all substitutions nodes should
--   be substituted we force substitution in order.
iapplySubst :: Bool -> SimpleItem -> SimpleItem -> SimpleState [SimpleItem]
iapplySubst twophase item1 item2 | siInitial item1 && closed item1 = {-# SCC "applySubstitution" #-}
 case siSubstnodes item2 of
 [] -> return []
 ((TagSite n fu fd nOrigin) : stail) ->
  let doIt =
       do -- Maybe monad
          let r@(TagSite rn ru rd rOrigin) = siRoot item1
          (newU, subst1) <- unifyFeat ru fu
          (newD, subst2) <- unifyFeat (replace subst1 rd)
                                      (replace subst1 fd)
          let subst = mergeSubst subst1 subst2
              nr    = TagSite rn newU newD rOrigin
              adj1  = nr : (delete r $ siAdjnodes item1)
              adj2  = siAdjnodes item2
#ifdef DISABLE_GUI
              item1g = item1
#else
              item1g = item1 { siGuiStuff = g2 }
                where g2 = g { siNodes = repList (gnnameIs rn) newRoot (siNodes g) }
                      g  = siGuiStuff item1
              -- gui stuff
              newRoot g = g { gup = newU, gdown = newD, gtype = Other }
#endif
          let pending = if twophase then []
                        else nr : ((siPendingTb item1) ++ (siPendingTb item2))
          return $! replace subst $ combineSimpleItems [rn] item1g $
                     item2 { siSubstnodes = stail ++ (siSubstnodes item1)
                           , siAdjnodes   = adj2 ++ adj1
                           , siDerived    = plugTree (siDerived item1) n (siDerived item2)
                           , siDerivation = addToDerivation 's' (item1g,rOrigin) (item2,nOrigin,n)
                           , siLeaves     = (siLeaves item1) ++ (siLeaves item2)
                           , siPendingTb  = pending
                           }
  in case doIt of
     Nothing -> return []
     Just x  -> do incrCounter "substitutions" 1
                   return [x]
iapplySubst _ _ _ = return []

\end{code} % -------------------------------------------------------------------- \subsection{Adjunction} \label{sec:adjunction} \label{sec:ordered_adjunction} \label{sec:foot_constraint} % --------------------------------------------------------------- \paragraph{applyAdjunction2p} Given a SimpleItem, it returns the list of all possible adjunctions between it and the elements in Chart. The Chart contains Auxiliars, while SimpleItem is an Initial Note: as of 13 april 2005 - only uses ordered adjunction as described in \cite{kow04a} \begin{code}

applyAdjunction2p :: SimpleItem -> SimpleState ([SimpleItem])
applyAdjunction2p item = {-# SCC "applyAdjunction2p" #-}
 do gr <-lookupChart item
    incrCounter num_comparisons (length gr)
    mapMaybeM (\a -> tryAdj True a item) gr

passiveAdjunction1p :: SimpleItem -> SimpleState [SimpleItem]
passiveAdjunction1p item | closed item && siInitial item =
  do gr <- lookupChart item
     mapMaybeM (\a -> tryAdj False a item) $ filter validAux gr
passiveAdjunction1p _ = return []

activeAdjunction1p :: SimpleItem -> SimpleState [SimpleItem]
activeAdjunction1p item | validAux item =
  do gr <- lookupChart item
     mapMaybeM (\p -> tryAdj False item p) $ filter (\x -> siInitial x && closed x) gr
activeAdjunction1p _ = return []

validAux :: SimpleItem -> Bool
validAux t = closedAux t && adjdone t

tryAdj :: Bool -> SimpleItem -> SimpleItem -> SimpleState (Maybe SimpleItem)
tryAdj twophase aItem pItem =
 do case iapplyAdjNode twophase aItem pItem of
     Just x  -> do incrCounter "adjunctions" 1
                   return $ Just x
     Nothing -> return Nothing

\end{code} Note that in the one-phase variant of non-adjunction, we can't do top/bot unification on the fly, because afaik we can't tell that a node will never be revisited. One example of this is if you try to adjoin into the root \begin{code}

-- | Ignore the next adjunction node
sansAdjunction1p, sansAdjunction2p :: SimpleItem -> SimpleState [SimpleItem]
sansAdjunction1p item | closed item =
 case siAdjnodes item of
 [] -> return []
 (ahead : atail) ->
   return $ [item { siAdjnodes = atail
                  , siPendingTb = ahead : (siPendingTb item) } ]
sansAdjunction1p _ = return []

-- | Ignore the next adjunction node
sansAdjunction2p item | closed item =
 case siAdjnodes item of
 [] -> return []
 (TagSite gn t b o: atail) -> do
  -- do top/bottom unification on the node
  case unifyFeat t b of
   Nothing ->
#ifndef DISABLE_GUI
     do addToTrash (modifyGuiStuff (\g -> g { siHighlight = [gn] }) item)
                   ts_tbUnificationFailure
#endif
        return []
   Just (tb,s) ->
     let item1 = if isRootOf item gn
                 then item { siRoot = TagSite gn tb [] o }
                 else item
#ifdef DISABLE_GUI
         item2 = item1
#else
         item2 = modifyGuiStuff (constrainAdj gn tb) item1
#endif
     in return $! [replace s $! item2 { siAdjnodes = atail }]
sansAdjunction2p _ = return []

\end{code} The main work for adjunction is done in the helper function below (see also figure \ref{fig:adjunction}). Auxiliary tree \texttt{te1} has a root node \texttt{r} and a foot node \texttt{f}. Main tree \texttt{te2} has an adjunction site \texttt{an}. The resulting tree \texttt{res} is a result of splicing \texttt{te1} into \texttt{te2}. We replace \texttt{s} with the nodes \texttt{anr} and \texttt{anf} (which are the results of unifying \texttt{an} with \texttt{r} and \texttt{f} respectively). \begin{figure} \begin{center} \includegraphics[scale=0.5]{images/adjunction.pdf} \label{fig:adjunction} \caption{iapplyAdjNode} \end{center} \end{figure} In addition to the trees proper, we have to consider that each tree has a list with a copy of its adjunction sites. The adjunction list of the result (\texttt{adjnodes res}) should then contain \texttt{adjnodes te1} and \texttt{adjnodes te2}, but replacing \texttt{r} and \texttt{an} with \texttt{anr}. \begin{code}

iapplyAdjNode :: Bool -> SimpleItem -> SimpleItem -> Maybe SimpleItem
iapplyAdjNode twophase aItem pItem = {-# SCC "iapplyAdjNode" #-}
 case siAdjnodes pItem of
 [] -> Nothing
 (TagSite an_name an_up an_down nOrigin : atail) -> do
  -- block repeated adjunctions of the same SimpleItem (for ignore semantics mode)
  -- guard $ not $ (an_name, siId aItem) `elem` (siAdjlist pItem)
  -- let's go!
  let r@(TagSite r_name r_up r_down rOrigin) = siRoot aItem -- auxiliary tree, eh?
  (TagSite f_name f_up f_down _) <- siFoot aItem -- should really be an error if fails
  (anr_up',  subst1)  <- unifyFeat r_up an_up
  (anf_down, subst2)  <- unifyFeat (replace subst1 f_down) (replace subst1 an_down)
  let -- combined substitution list and success condition
      subst12 = mergeSubst subst1 subst2
      -- the result of unifying the t1 root and the t2 an
      anr = TagSite r_name (replace subst2 anr_up') r_down rOrigin
  let anf_up = replace subst12 f_up
      -- the new adjunction nodes
      auxlite = delete r $ siAdjnodes aItem
      newadjnodes = anr : (atail ++ auxlite)
      --
#ifdef DISABLE_GUI
      aItem2 = aItem
#else
      -- Ugh, this is horrible: this is just to make sure the GUI gets
      -- updated accordingly.  The code used to be a lot simpler, but
      -- I started trying to move stuff out of the way in the interests
      -- of efficiency, and to pack as much gui-related stuff as possible
      -- into a single tuple.
      aItem2 = aItem { siGuiStuff = fixNodes $ siGuiStuff aItem }
        where fixNodes g = g { siNodes = map (setSites anr) (siNodes g) }
              setSites (TagSite n u d _) gn =
                if gnname gn == n then gn { gup = u, gdown = d }
                                  else gn
#endif
      rawCombined =
        combineSimpleItems [r_name, an_name] aItem2 $ pItem
               { siAdjnodes = newadjnodes
               , siLeaves  = siLeaves aItem ++ siLeaves pItem
               , siDerived = spliceTree f_name (siDerived aItem) an_name (siDerived pItem)
               , siDerivation = addToDerivation 'a' (aItem,rOrigin) (pItem,nOrigin,an_name)
               -- , siAdjlist = (n, (tidnum te1)):(siAdjlist item2)
               -- if we adjoin into the root, the new root is that of the aux
               -- tree (affects 1p only)
               , siRoot = if isRootOf pItem an_name then r else siRoot pItem
               , siPendingTb =
                  if twophase then []
                  else (TagSite an_name anf_up anf_down nOrigin) : (siPendingTb pItem) ++ (siPendingTb aItem)
               }
      -- one phase = postpone tb unification
      -- two phase = do tb unification on the fly
      finalRes1p = return $ replace subst12 rawCombined
      finalRes2p =
       do -- tb on the former foot
          tbRes <- unifyFeat anf_up anf_down
#ifdef DISABLE_GUI
          let (_, subst3) = tbRes
              myRes = res'
#else
          let (anf_tb, subst3) = tbRes
              myRes = modifyGuiStuff (constrainAdj an_name anf_tb) res'
#endif
          -- apply the substitutions
              res' = replace (mergeSubst subst12 subst3) rawCombined
          return myRes
  -- ---------------
  if twophase then finalRes2p else finalRes1p

\end{code} % -------------------------------------------------------------------- \subsection{Helper functions for operations} % -------------------------------------------------------------------- \begin{code}

ncopy :: GNode -> TagSite
ncopy x = TagSite (gnname x) (gup x) (gdown x) (gorigin x)

isRootOf :: SimpleItem -> String -> Bool
isRootOf item n = n == rname
  where (TagSite rname _ _ _) = siRoot item

-- | Retrieves a list of trees from the chart which could be combined with the given agenda tree.
-- The current implementation searches for trees which
--  * do not have overlapping semantics with the given
--  * are on the some of the same polarity automaton paths as the
--    current agenda item
lookupChart :: SimpleItem -> SimpleState [SimpleItem]
lookupChart given = do
  chart <- gets theChart
  let gpaths = siPolpaths given
      gsem   = siSemantics given
  return [ i | i <- chart
             -- should be on the same polarity path (chart sharing)
             , (siPolpaths i) .&. gpaths /= 0
             -- semantics should not be overlapping
             && (siSemantics i .&. gsem ) == 0
         ]

-- | Helper function for when chart operations succeed.
combineSimpleItems :: [NodeName] -- ^ nodes to highlight
                   -> SimpleItem -> SimpleItem -> SimpleItem
combineSimpleItems hi item1 item2 = {-# SCC "combineSimpleItems" #-}
  item2 { siSemantics = (siSemantics item1) .|. (siSemantics item2)
        , siPolpaths  = (siPolpaths  item1) .&. (siPolpaths  item2)
#ifndef DISABLE_GUI
        , siGuiStuff  = combineSimpleGuiItems hi (siGuiStuff item1) (siGuiStuff item2)
#endif
        }

#ifndef DISABLE_GUI
combineSimpleGuiItems :: [NodeName]
                      -> SimpleGuiItem -> SimpleGuiItem -> SimpleGuiItem
combineSimpleGuiItems hi item1 item2 =
 item2 { siFullSem = sortSem $ (siFullSem item1) ++ (siFullSem item2)
       , siNodes = (siNodes item1) ++ (siNodes item2)
       , siDiagnostic = (siDiagnostic item1) ++ (siDiagnostic item2)
       , siHighlight = hi
       }

constrainAdj :: String -> Flist -> SimpleGuiItem -> SimpleGuiItem
constrainAdj gn newT g =
  g { siNodes = repList (gnnameIs gn) fixIt (siNodes g) }
  where fixIt n = n { gup = newT, gdown = [], gaconstr = True }
#endif

\end{code} \subsubsection{Derivation trees} We make the simplifying assumption that each chart item is only used once. This is clearly wrong if we allow for items with an empty semantics, but since we do not actually allow such a thing, we're ok. \begin{code}

addToDerivation :: Char
                -> (SimpleItem,String)
                -> (SimpleItem,String,String)
                -> TagDerivation
addToDerivation op (tc,tcOrigin) (tp,tpOrigin,tpSite) =
  let hp = siDerivation tp
      hc = siDerivation tc
      newnode = (op, tcOrigin, (tpOrigin, tpSite))
  in newnode:hp++hc

\end{code} % -------------------------------------------------------------------- \section{Dispatching new results} % -------------------------------------------------------------------- Dispatching is the process where new chart items are assigned to one of the trash, agenda, auxiliary agenda or chart. The item could be modified during dispatch-time; for example, we might do top/bottom unification on it. See \ref{sec:dispatching} for more details. \begin{code}

type SimpleDispatchFilter = DispatchFilter SimpleState SimpleItem

simpleDispatch_2p :: SimpleDispatchFilter
simpleDispatch_2p =
 simpleDispatch (dpRootFeatFailure >--> dpToResults)
                (dpAux >--> dpToAgenda)

simpleDispatch_1p :: Bool -> SimpleDispatchFilter
simpleDispatch_1p iaf =
 simpleDispatch (dpRootFeatFailure >--> dpTbFailure >--> dpToResults)
                (maybeDpIaf >--> dpToAgenda)
 where maybeDpIaf = if iaf then dpIafFailure else nullFilter

simpleDispatch :: SimpleDispatchFilter -> SimpleDispatchFilter -> SimpleDispatchFilter
simpleDispatch resFilter nonResFilter item =
 do inputsem <- gets tsem
    let synComplete x = siInitial x && closed x && adjdone x
        semComplete x = inputsem == siSemantics x
        isResult x = synComplete x && semComplete x
    condFilter isResult resFilter nonResFilter item

dpAux, dpToAgenda :: SimpleDispatchFilter
dpTbFailure, dpRootFeatFailure, dpRootFeatFailure2, dpToResults :: SimpleDispatchFilter
dpToTrash :: String -> SimpleDispatchFilter

dpToAgenda x  = addToAgenda x  >> return Nothing
dpToResults x = addToResults x >> return Nothing
#ifdef DISABLE_GUI
dpToTrash _ _ = return Nothing
#else
dpToTrash m x = addToTrash x m >> return Nothing
#endif

dpAux item =
  if closedAux item
  then addToAuxAgenda item >> return Nothing
  else return $ Just item

{-
-- | Dispatches to the trash and returns Nothing if there is a tree
--   size limit in effect and the item is over that limit.  The
--   tree size limit is used in 'IgnoreSemantics' mode.
dpTreeLimit item =
 do config <- gets genconfig
    case maxTrees config of
     Nothing  -> return $ Just item
     Just lim -> if (length.snd.siDerivation) item > lim
                 then do addToTrash item (ts_overnumTrees lim)
                         return Nothing
                 else return $ Just item
   where ts_overnumTrees l = "Over derivation size of " ++ (show l)
-}

-- | This is only used for the one-phase algorithm
dpTbFailure item =
 return $ if tbUnifyTree item then Just item else Nothing

-- | If the item (ostensibly a result) does not have the correct root
--   category, return Nothing; otherwise return Just item
dpRootFeatFailure  = dpRootFeatFailure_ False
dpRootFeatFailure2 = dpRootFeatFailure_ True

dpRootFeatFailure_ :: Bool -> SimpleDispatchFilter
dpRootFeatFailure_ count item =
 do config <- gets genconfig
    let rootFeat = getListFlagP RootFeatureFlg config
        (TagSite _ top _ _) = siRoot item
    case unifyFeat rootFeat top of
      Nothing ->
        do when count $ incrCounter "root_feat_discards" 1
           dpToTrash (ts_rootFeatureMismatch rootFeat) item
      Just (_, s) ->
        return . Just $ replace s item

\end{code} % -------------------------------------------------------------------- \subsection{Top and bottom unification} % -------------------------------------------------------------------- \paragraph{tbUnifyTree} unifies the top and bottom feature structures of each node on each tree. Note: this only determines if it is possible to do so. Actually returning the results is possible and even easy (you'll have to look back into the darcs repository and unpull the patch from 2006-05-21T15:40:51 ``Remove top/bot unification standalone code.'') but since it is only used in the one-phase algorithm and for the graphical interface, I decided not to bother. \begin{code}

type TbEither = Either String Subst
tbUnifyTree :: SimpleItem -> Bool
tbUnifyTree item = {-# SCC "tbUnifyTree" #-}
  case foldl tbUnifyNode (Right Map.empty) (siPendingTb item) of
    Left  _ -> False
    Right _ -> True

\end{code} Our helper function corresponds to the first unification step. It is meant to be called from a fold. The node argument represents the current node being explored. The Either argument holds a list of pending substitutions and a copy of the entire tree. There are two things going on in here: \begin{enumerate} \item check if unification is possible - first we apply the pending substitutions on the node and then we check if unification of the top and bottom feature structures of that node succeeds \item keep track of the substitutions that need to be performed - any new substitutions that result from unification are added to the pending list \end{enumerate} Note that we wrap the second argument in a Maybe; this is used to indicate that if unification suceeds or fails. We also use it to prevent the function from doing any work if a unification failure from a previous call has already occured. Getting this right was a big pain in the butt, so don't go trying to simplify this over-complicated code unless you know what you're doing. \begin{code}

tbUnifyNode :: TbEither -> TagSite -> TbEither
tbUnifyNode (Right pending) rawSite =
  -- apply pending substitutions
  case replace pending rawSite of
  (TagSite name up down _) ->
    -- check top/bottom unification on this node
    case unifyFeat up down of
    -- stop all future iterations
    Nothing -> Left name
    -- apply any new substutions to the whole tree
    Just (_,sb) -> Right (mergeSubst pending sb)

-- if earlier we had a failure, don't even bother
tbUnifyNode (Left n) _ = Left n

\end{code} % -------------------------------------------------------------------- \subsection{Index accesibility filtering} \label{sec:simple:iaf} % -------------------------------------------------------------------- Note that index accesibility filtering only makes sense for the one-phase algorithm. See also \ref{sec:iaf} for more details about what this is. \begin{code}

instance IafAble SimpleItem where
  iafAcc   = siAccesible
  iafInacc = siInaccessible
  iafSetAcc   a i = i { siAccesible = a }
  iafSetInacc a i = i { siInaccessible = a }
  iafNewAcc i =
    concatMap fromUniConst $
    concat [ getIdx up | (TagSite _ up _ _) <- siSubstnodes i ]

dpIafFailure :: SimpleDispatchFilter
dpIafFailure item | aux item = return $ Just item
dpIafFailure itemRaw =
 do s <- get
    let bmap = semBitMap s
        item = recalculateAccesibility itemRaw
        badSem = iafBadSem (theIafMap s) bmap (tsem s) siSemantics item
        inAcc = iafInacc item
    if badSem == 0
      then -- can't dispatch, but that's good!
           -- (note that we return the item with its iaf criteria updated)
           return $ Just item
      else dpToTrash (ts_iafFailure inAcc $ bitVectorToSem bmap badSem) item

\end{code} % -------------------------------------------------------------------- \section{Unpacking the results} % -------------------------------------------------------------------- Unpacking the results consists of converting each result into a sentence automaton (to take care of atomic disjunction) and reading the paths of each automaton. \begin{code}

unpackResults :: [SimpleItem] ->  [B.Output]
unpackResults = concatMap unpackResult

unpackResult :: SimpleItem -> [B.Output]
unpackResult item =
  let leafMap :: Map.Map String B.UninflectedDisjunction
      leafMap = Map.fromList . siLeaves $ item
      lookupOrBug :: NodeName -> B.UninflectedDisjunction
      lookupOrBug k = case Map.lookup k leafMap of
                      Nothing -> geniBug $ "unpackResult : could not find node " ++ k
                      Just w  -> w
      derivation = siDerivation item
      paths = automatonPaths . listToSentenceAut $
              [ lookupOrBug k | (k,_) <- (preTerminals . siDerived) item ]
 in zip paths (repeat derivation)

\end{code} \subsection{Sentence automata} \fnlabel{listToSentenceAut} converts a list of GNodes into a sentence automaton. It's a actually pretty stupid conversion in fact. We pretty much make a straight path through the automaton, with the only cleverness being that we provide a different transition for each atomic disjunction. \begin{code}

listToSentenceAut :: [ B.UninflectedDisjunction ] -> B.SentenceAut
listToSentenceAut nodes =
  let theStart  = 0
      theEnd = (length nodes) - 1
      theStates = [theStart..theEnd]
      --
      emptyAut = NFA
        { startSt     = theStart
        , isFinalSt   = Nothing
        , finalStList = [theEnd]
        , states      = [theStates]
        , transitions = Map.empty }
      -- create a transition for each lexeme in the node to the
      -- next state...
      helper :: (Int, B.UninflectedDisjunction) -> B.SentenceAut -> B.SentenceAut
      helper (current, (lemmas, features)) aut =
        foldl' addT aut lemmas
        where
          addT a t = addTrans a current (Just (t, features)) next
          next = current + 1
      --
  in foldr helper emptyAut (zip theStates nodes)

\end{code} % -------------------------------------------------------------------- \section{Partial results} % -------------------------------------------------------------------- The user may ask for partial results when realisation fails. We implement this using a greedy, full-commitment algorithm. Find the discarded result that matches the largest part of the semantics and output that fragment. If there are parts of the input semantics not covered by that fragment, search for the largest chunk that covers the missing semantics. Recurse until there are no more eligible items. \begin{code}

partialResults :: SimpleStatus -> [SimpleItem]
#ifndef DISABLE_GUI
partialResults st = unfoldr getNext 0
 where
  inputsem = tsem st
  trash  = theTrash st
  trashC = sortBy (comparing $ negate . fst) $
           map (\t -> (coverage inputsem t, t)) trash
  getNext sem = case getItems sem of
                     []     -> Nothing
                     (it:_) -> Just (it, siSemantics it .|. sem)
  getItems sem = [ i | (_,i) <- trashC, siSemantics i .&. sem == 0 ]

coverage :: BitVector -> SimpleItem -> Int
coverage sem it = countBits (sem .&. siSemantics it)

countBits :: Bits a => a -> Int
countBits 0  = 0
countBits bs = if testBit bs 0 then 1 + next else next
  where next = countBits (shiftR bs 1)
#else
partialResults = return []
#endif

\end{code}