-- GenI surface realiser
-- Copyright (C) 2005 Carlos Areces and Eric Kow
--
-- This program is free software; you can redistribute it and/or
-- modify it under the terms of the GNU General Public License
-- as published by the Free Software Foundation; either version 2
-- of the License, or (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

{-# LANGUAGE ScopedTypeVariables, ExistentialQuantification, TypeSynonymInstances #-}
{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings #-}

-- | This is the interface between the front and backends of the generator. The GUI
--   and the console interface both talk to this module, and in turn, this module
--   talks to the input file parsers and the surface realisation engine.
module NLP.GenI (
             -- * Main interface

             -- ** Program state and configuration
             ProgState(..), ProgStateRef, emptyProgState,
             LexicalSelector,

             -- ** Running GenI
             runGeni,
             GeniResults(..),
             GeniResult(..), isSuccess, GeniError(..), GeniSuccess(..),
             GeniLexSel(..),
             ResultType(..),

             -- * Helpers
             initGeni, extractResults,
             lemmaSentenceString, prettyResult,
             showRealisations, histogram,
             getTraces,

             -- ** Loading things
             loadEverything,
             Loadable(..),
             loadLexicon,
             loadGeniMacros,
             loadTestSuite, parseSemInput,
             loadRanking, BadInputException(..),
             loadFromString,
             )
where


import Control.Applicative ((<$>),(<*>))
import Control.DeepSeq
import Control.Exception
import Control.Monad.Error
import Data.Binary (Binary, decodeFile)
import Data.IORef (IORef, readIORef, modifyIORef)
import Data.List
import Data.Maybe (fromMaybe)
import Data.Monoid ( mappend, mempty )
import Data.Text ( Text )
import Data.Typeable (Typeable)
import System.CPUTime( getCPUTime )
import System.IO ( stderr )
import qualified Data.Map as Map
import qualified Data.Text as T
import qualified Data.Text.IO as T

import Data.FullList ( fromFL )
import Text.JSON
import qualified System.IO.UTF8 as UTF8

import NLP.GenI.Configuration
    ( Params, customMorph, customSelector
    , getFlagP, hasFlagP, hasOpt, Optimisation(NoConstraints)
    , MacrosFlg(..), LexiconFlg(..), TestSuiteFlg(..)
    , MorphInfoFlg(..), MorphCmdFlg(..)
    , RankingConstraintsFlg(..)
    , PartialFlg(..)
    , FromStdinFlg(..), VerboseModeFlg(..)
    , NoLoadTestSuiteFlg(..)
    , RootFeatureFlg(..)
    , TracesFlg(..)
    , grammarType
    , GrammarType(..)
    )
import NLP.GenI.General
    ( histogram, geniBug, snd3, first3, ePutStr, ePutStrLn, eFlush,
    -- mkLogname,
    )
import NLP.GenI.GeniVal
import NLP.GenI.LexicalSelection ( LexicalSelector, LexicalSelection(..), defaultLexicalSelector )
import NLP.GenI.Lexicon
import NLP.GenI.Morphology
import NLP.GenI.OptimalityTheory
import NLP.GenI.Parser (geniMacros, geniTagElems,
                    geniLexicon, geniTestSuite,
                    geniTestSuiteString, geniSemanticInput,
                    geniMorphInfo,
                    runParser,
                    ParseError,
                    )
import NLP.GenI.Pretty
import NLP.GenI.Semantics
import NLP.GenI.Statistics
import NLP.GenI.Tag ( TagElem, idname, tsemantics, ttrace, setTidnums )
import NLP.GenI.TestSuite ( TestCase(..) )
import NLP.GenI.TreeSchema
import NLP.GenI.Warning
import qualified NLP.GenI.Builder as B

-- -- DEBUG
-- import Control.Monad.Writer
-- import NLP.GenI.Lexicon
-- import NLP.GenI.LexicalSelection
-- import NLP.GenI.FeatureStructures

-- --------------------------------------------------------------------
-- ProgState
-- --------------------------------------------------------------------

-- | The program state consists of its configuration options and abstract,
--   cleaned up representations of all the data it's had to load into memory
--   (tree schemata files, lexicon files, etc).  The intention is for the
--   state to stay static until the next time something triggers some file
--   loading.
data ProgState = ProgState
    { pa       :: Params  -- ^ the current configuration
    , gr       :: Macros  -- ^ tree schemata
    , le       :: Lexicon -- ^ lexical entries
    , morphinf :: MorphInputFn -- ^ function to extract morphological
                               -- information from the semantics (you may
                               -- instead be looking for
                               -- 'NLP.GenI.Configuration.customMorph')
    , ranking  :: OtRanking -- ^ OT constraints    (optional)
    , traces   :: [Text]    -- ^ simplified traces (optional)
    }

type ProgStateRef = IORef ProgState

-- | The program state when you start GenI for the very first time
emptyProgState :: Params -> ProgState
emptyProgState args = ProgState
    { pa = args
    , gr = []
    , le = []
    , morphinf = const Nothing
    , traces = []
    , ranking = []
    }

-- --------------------------------------------------------------------
-- Interface
-- Loading and parsing
-- --------------------------------------------------------------------

-- | We have one master function that loads all the files GenI is expected to
--   use.  This just calls the sub-loaders below, some of which are exported
--   for use by the graphical interface.  The master function also makes sure
--   to complain intelligently if some of the required files are missing.
loadEverything :: ProgStateRef -> IO() 
loadEverything pstRef =
  do pst <- readIORef pstRef
     --
     let config   = pa pst
         isMissing f = not $ hasFlagP f config
     -- grammar type
         isNotPreanchored = grammarType config /= PreAnchored
         isNotPrecompiled = grammarType config /= PreCompiled
         useTestSuite =  isMissing FromStdinFlg
                      && isMissing NoLoadTestSuiteFlg
     -- display 
     let errormsg =
           concat $ intersperse ", " [ msg | (con, msg) <- errorlst, con ]
         errorlst =
              [ (isMissing RootFeatureFlg,
                "a root feature [empty feature is fine if you are not using polarity filtering]")
              , (isNotPrecompiled && isMissing MacrosFlg,
                "a tree file")
              , (isNotPreanchored && isMissing LexiconFlg,
                "a lexicon file")
              , (useTestSuite && isMissing TestSuiteFlg,
                "a test suite") ]
     unless (null errormsg) $ fail ("Please specify: " ++ errormsg)
     -- we only have to read in grammars from the simple format
     case grammarType config of 
        PreAnchored -> return ()
        PreCompiled -> return ()
        _        -> loadGeniMacros pstRef >> return ()
     -- we don't have to read in the lexicon if it's already pre-anchored
     when isNotPreanchored $ loadLexicon pstRef >> return ()
     -- in any case, we have to...
     loadMorphInfo pstRef
     when useTestSuite $ loadTestSuite pstRef >> return ()
     -- the trace filter file
     loadTraces pstRef
     -- OT ranking
     loadRanking pstRef


-- | The file loading functions all work the same way: we load the file,
--   and try to parse it.  If this doesn't work, we just fail in IO, and
--   GenI dies.  If we succeed, we update the program state passed in as
--   an IORef.
class Loadable x where
  lParse       :: FilePath -- ^ source (optional)
               -> String -> Either ParseError x
  lSet         :: x -> ProgState -> ProgState
  lSummarise   :: x -> String

-- | Note that here we assume the input consists of UTF-8 encoded file
lParseFromFile :: Loadable x => FilePath -> IO (Either ParseError x)
lParseFromFile f = lParse f `fmap` UTF8.readFile f

-- | Returns the input too (convenient for type checking)
lSetState :: Loadable x => ProgStateRef -> x -> IO x
lSetState pstRef x = modifyIORef pstRef (lSet x) >> return x

-- to be phased out
throwOnParseError :: String -> Either ParseError x -> IO x
throwOnParseError descr (Left err) = throwIO (BadInputException descr err)
throwOnParseError _ (Right p)  = return p

data BadInputException = BadInputException String ParseError
  deriving (Show, Typeable)

instance Exception BadInputException

data L a = Loadable a => L

-- | Load something, exiting GenI if we have not been given the
--   appropriate flag
loadOrDie :: forall f a . (Eq f, Typeable f, Loadable a)
          => L a
          -> (FilePath -> f) -- ^ flag
          -> String
          -> ProgStateRef
          -> IO a
loadOrDie L flg descr pstRef =
  withFlagOrDie flg pstRef descr $ \f -> do
   v <- verbosity pstRef
   x <- withLoadStatus v f descr lParseFromFile
     >>= throwOnParseError descr
     >>= lSetState pstRef
   return x

-- | Load something from a string rather than a file
loadFromString :: Loadable a => ProgStateRef
               -> String -- ^ description
               -> String -- ^ string to load
               -> IO a
loadFromString pstRef descr s =
  throwOnParseError descr (lParse "" s) >>= lSetState pstRef

instance Loadable Lexicon where
  lParse f = fmap toLexicon . runParser geniLexicon () f
    where
     fixEntry  = finaliseVars "" -- anonymise singletons for performance
               . sorter
     toLexicon = map fixEntry
     sorter l  = l { isemantics = (sortByAmbiguity . isemantics) l }
  lSet x p = p { le = x }
  lSummarise x = show (length x) ++ " lemmas"

instance Loadable Macros where
  lParse f = runParser geniMacros () f
  lSet x p = p { gr = x }
  lSummarise x = show (length x) ++ " schemata"

loadLexicon :: ProgStateRef -> IO Lexicon
loadLexicon = loadOrDie (L :: L Lexicon) LexiconFlg "lexicon"

-- | The macros are stored as a hashing function in the monad.
loadGeniMacros :: ProgStateRef -> IO Macros
loadGeniMacros pstRef =
  withFlagOrDie MacrosFlg pstRef descr $ \f -> do
     v <- verbosity pstRef
     withLoadStatus v f descr (parseFromFileMaybeBinary lParseFromFile)
     >>= throwOnParseError "tree schemata"
     >>= lSetState pstRef
  where
   descr = "trees"

-- | Load something, but only if we are configured to do so
loadOptional :: forall f a . (Eq f, Typeable f, Loadable a)
             => L a
             -> (FilePath -> f) -- ^ flag
             -> String
             -> ProgStateRef
             -> IO ()
loadOptional L flg descr pstRef =
  withFlagOrIgnore flg pstRef $ \f -> do
   v <- verbosity pstRef
   x <- withLoadStatus v f descr lParseFromFile
     >>= throwOnParseError descr
     >>= lSetState pstRef
   let _ = x :: a
   return () -- ignore

newtype MorphFnL = MorphFnL MorphInputFn

instance Loadable MorphFnL where
  lParse f = fmap (MorphFnL . readMorph) . runParser geniMorphInfo () f
  lSet (MorphFnL x) p = p { morphinf = x }
  lSummarise _ = "morphinfo"

newtype TracesL = TracesL [Text]

instance Loadable TracesL where
    lParse _ = Right . TracesL . T.lines . T.pack
    lSet (TracesL xs) p = p { traces = xs }
    lSummarise (TracesL xs) = show (length xs) ++ " traces"

instance Loadable OtRanking where
  lParse _ = resultToEither2 . decode
  lSet r p = p { ranking = r }
  lSummarise _ = "ranking"

loadMorphInfo :: ProgStateRef -> IO ()
loadMorphInfo = loadOptional (L :: L MorphFnL) MorphInfoFlg "morphological info"

loadTraces :: ProgStateRef -> IO ()
loadTraces = loadOptional (L :: L TracesL) TracesFlg "traces"

loadRanking :: ProgStateRef -> IO ()
loadRanking = loadOptional (L :: L OtRanking) RankingConstraintsFlg "OT constraints"

resultToEither2 :: Result a -> Either ParseError a
resultToEither2 r =
  case resultToEither r of
    Left e  -> runParser (fail e) () "" [] -- convoluted way to generate a Parsec error
    Right x -> Right x

-- Target semantics
--
-- Reading in the target semantics (or test suite) is a little more
-- complicated.  It follows the same general schema as above, except
-- that we parse the file twice: once for our internal representation,
-- and once to get a string representation of each test case.  The
-- string representation is for the graphical interface; it avoids us
-- figuring out how to pretty-print things because we can assume the
-- user will format it the way s/he wants.

newtype TestSuiteL = TestSuiteL [TestCase]

instance Loadable TestSuiteL where
 lParse f s =
   case runParser geniTestSuite () f s of
     Left e     -> Left e
     Right sem  -> case runParser geniTestSuiteString () f s of
        Left e      -> Left e
        Right mStrs -> Right (TestSuiteL (zipWith cleanup sem mStrs))
   where
    cleanup tc str =
        tc { tcSem = first3 sortSem (tcSem tc)
           , tcSemString = str }
 --
 lSet (TestSuiteL _) p = p
 lSummarise (TestSuiteL x) = show (length x) ++ " cases"

-- |
loadTestSuite :: ProgStateRef -> IO [TestCase]
loadTestSuite pstRef = do
  TestSuiteL xs <- loadOrDie (L :: L TestSuiteL) TestSuiteFlg "test suite" pstRef
  return xs

parseSemInput :: String -> Either ParseError SemInput
parseSemInput = fmap smooth . runParser geniSemanticInput () "semantics"
  where
    smooth (s,r,l) = (sortSem s, sort r, l)

-- Helpers for loading files

withFlag :: forall f a . (Eq f, Typeable f)
         => (FilePath -> f) -- ^ flag
         -> ProgStateRef
         -> IO a               -- ^ null action
         -> (FilePath -> IO a) -- ^ job
         -> IO a
withFlag flag pstRef z job =
 do config <- pa `fmap` readIORef pstRef
    case getFlagP flag config of
      Nothing -> z
      Just  x -> job x

withFlagOrIgnore :: forall f . (Eq f, Typeable f)
                 => (FilePath -> f) -- ^ flag
                 -> ProgStateRef
                 -> (FilePath -> IO ())
                 -> IO ()
withFlagOrIgnore flag pstRef = withFlag flag pstRef (return ())

withFlagOrDie :: forall f a . (Eq f, Typeable f)
              => (FilePath -> f) -- ^ flag
              -> ProgStateRef
              -> String
              -> (FilePath -> IO a)
              -> IO a
withFlagOrDie flag pstRef description =
    withFlag flag pstRef (fail msg)
  where
    msg = "Please specify a " ++ description ++ " file!"

withLoadStatus :: Loadable a
               => Bool                    -- ^ verbose
               -> FilePath             -- ^ file to load
               -> String               -- ^ description
               -> (FilePath -> IO (Either ParseError a)) -- ^ parsing cmd
               -> IO (Either ParseError a)
withLoadStatus False f _ p = p f
withLoadStatus True  f d p = do
  ePutStr $ unwords [ "Loading",  d, f ++ "... " ]
  eFlush
  mx <- p f
  ePutStrLn $ either (const "ERROR") (\x -> lSummarise x ++ " loaded") mx
  return mx

parseFromFileMaybeBinary :: Binary a
                         => (FilePath -> IO (Either ParseError a))
                         -> FilePath
                         -> IO (Either ParseError a)
parseFromFileMaybeBinary p f =
 if (".genib" `isSuffixOf` f)
    then Right `fmap` decodeFile f
    else p f

-- --------------------------------------------------------------------
-- Surface realisation - entry point
-- --------------------------------------------------------------------

-- | 'GeniResults' is the outcome of running GenI on a single input semantics.
--   Each distinct result is returned as a single 'GeniResult' (NB: a single
--   result may expand into multiple strings through morphological
--   post-processing),
data GeniResults = GeniResults
    { grResults        :: [GeniResult] -- ^ one per chart item
    , grGlobalWarnings :: [Text]       -- ^ usually from lexical selection
    , grStatistics     :: Statistics   -- ^ things like number of chart items
                                       --   to help study efficiency
    }

data GeniResult = GError   GeniError
                | GSuccess GeniSuccess
  deriving (Ord, Eq)

isSuccess :: GeniResult -> Bool
isSuccess (GSuccess _) = True
isSuccess (GError _)   = False

data GeniError = GeniError [Text]
  deriving (Ord, Eq)

data GeniSuccess = GeniSuccess
    { grLemmaSentence :: LemmaPlusSentence -- ^ “original” uninflected result 
    , grRealisations  :: [Text]            -- ^ results after morphology
    , grResultType    :: ResultType
    , grWarnings      :: [Text]            -- ^ warnings “local” to this particular
                                           --   item, cf. 'grGlobalWarnings'
    , grDerivation    :: B.TagDerivation   -- ^ derivation tree behind the result
    , grOrigin        :: Integer           -- ^ normally a chart item id
    , grLexSelection  :: [GeniLexSel]      -- ^ the lexical selection behind
                                           --   this result (info only)
    , grRanking       :: Int               -- ^ see 'NLP.GenI.OptimalityTheory'
    , grViolations    :: [OtViolation]     -- ^ which OT constraints were violated
    } deriving (Ord, Eq)

data GeniLexSel = GeniLexSel
    { nlTree  :: Text
    , nlTrace :: [Text]
    } deriving (Ord, Eq)

data ResultType = CompleteResult | PartialResult deriving (Ord, Eq)

instance Pretty GeniError where
    pretty (GeniError xs) = T.intercalate "\n" $ map ("Error:" <+>) xs

-- | Entry point! (the most useful function to know here)
-- 
--   * Initialises the realiser (lexical selection, among other things),
--
--   * Runs the builder (the surface realisation engine proper)
--
--   * Unpacks the builder results 
--
--   * Finalises the results (morphological generation)
--
--   In addition to the results, this returns a generator state.  The latter is
--   is mostly useful for debugging via the graphical interface.
--   Note that we assumes that you have already loaded in your grammar and
--   parsed your input semantics.
runGeni :: ProgStateRef -> SemInput -> B.Builder st it Params -> IO (GeniResults,st)
runGeni pstRef semInput builder = do
     pst <- readIORef pstRef
     let config = pa pst
         run    = B.run builder
     -- step 1: lexical selection
     (initStuff, initWarns) <- initGeni pstRef semInput
     --force evaluation before measuring start time to avoid including grammar/lexicon parsing.
     start <- rnf initStuff `seq` getCPUTime
     -- step 2: chart generation
     let (finalSt, stats) = run initStuff config
     -- step 3: unpacking and
     -- step 4: post-processing
     results <- extractResults pstRef builder finalSt
     --force evaluation before measuring end time to account for all the work that should be done.
     end <- rnf results `seq` getCPUTime
     let elapsedTime = picosToMillis $! end - start
         diff = round (elapsedTime :: Double) :: Int
         stats2 = updateMetrics (incrIntMetric "gen_time"  (fromIntegral diff) ) stats
         gresults = GeniResults { grResults        = results
                                , grStatistics     = stats2
                                , grGlobalWarnings = map showWarnings (fromGeniWarnings initWarns)
                                }
     return (gresults, finalSt)
  where
    showWarnings = T.intercalate "\n" . showGeniWarning

-- | This is a helper to 'runGenI'. It's mainly useful if you are building
--   interactive GenI debugging tools.
--
--   Given a builder state,
--
--   * Unpacks the builder results
--
--   * Finalises the results (morphological generation)
extractResults :: ProgStateRef ->  B.Builder st it Params -> st -> IO [GeniResult]
extractResults pstRef builder finalSt = do
    config  <- pa <$> readIORef pstRef
    -- step 3: unpacking
    let uninflected = B.unpack builder finalSt
        (rawResults, resultTy) =
            if null uninflected && hasFlagP PartialFlg config
               then (B.partial builder finalSt, PartialResult)
               else (uninflected              , CompleteResult)
        status = B.finished builder finalSt
    -- step 4: post-processing
    finaliseResults pstRef (resultTy, status, rawResults)

-- --------------------------------------------------------------------
-- Surface realisation - sub steps
-- --------------------------------------------------------------------

-- | 'initGeni' performs lexical selection and strips the input semantics of
--   any morpohological literals
initGeni :: ProgStateRef -> SemInput -> IO (B.Input, GeniWarnings)
initGeni pstRef semInput_ = do
    pst <- readIORef pstRef
    let semInput = stripMorphStuff pst
                 . maybeRemoveConstraints pst
                 $ semInput_
    -- lexical selection
    selection <- runLexSelection pstRef semInput
    -- strip morphological predicates
    let initStuff = B.Input 
          { B.inSemInput = semInput
          , B.inLex   = lsLexEntries selection
          , B.inCands = map (\c -> (c,-1)) (lsAnchored selection)
          }
    return (initStuff, lsWarnings selection)
  where
    stripMorphStuff pst = first3 (stripMorphSem (morphinf pst))
    -- disable constraints if the NoConstraintsFlg pessimisation is active
    maybeRemoveConstraints pst =
         if hasOpt NoConstraints (pa pst) then removeConstraints else id

-- | 'finaliseResults' does any post-processing steps that we want to integrate
--   into mainline GenI.  So far, this consists of morphological realisation and
--   OT ranking
finaliseResults :: ProgStateRef -> (ResultType, B.GenStatus, [B.Output]) -> IO [GeniResult]
finaliseResults pstRef (ty, status, os) = do
    pst <- readIORef pstRef
    -- morph TODO: make this a bit safer
    mss <- case getFlagP MorphCmdFlg (pa pst) of
             Nothing  -> let morph = fromMaybe (map sansMorph) (customMorph (pa pst))
                         in  return (morph sentences)
             Just cmd -> map snd `fmap` inflectSentencesUsingCmd cmd sentences
    -- OT ranking
    let unranked = zipWith (sansRanking pst) os mss
        rank = rankResults (getTraces pst) grDerivation (ranking pst)
        successes = map addRanking (rank unranked)
        failures  = case status of
                      B.Error str -> [GeniError [str]]
                      B.Finished  -> []
                      B.Active    -> []
    return (map GError failures ++ map GSuccess successes)
 where
  sentences = map snd3 os
  sansRanking pst (i,l,d) rs = GeniSuccess
               { grLemmaSentence = l
               , grRealisations = moRealisations rs
               , grWarnings     = moWarnings rs
               , grDerivation   = d
               , grLexSelection = map (\x -> GeniLexSel x (getTraces pst x)) (B.lexicalSelection d)
               , grRanking = -1
               , grViolations = []
               , grResultType = ty
               , grOrigin     = i
               }
  addRanking (i,res,vs) = res { grViolations = vs, grRanking = i }

-- --------------------------------------------------------------------
-- Displaying results
-- --------------------------------------------------------------------

-- | Show the sentences produced by the generator, in a relatively compact form
showRealisations :: [String] -> String
showRealisations [] = "(none)"
showRealisations xs = unlines . map sho . Map.toList . histogram $ xs
  where
   sho (x,1) = x
   sho (x,c) = x ++ " (" ++ show c ++ " instances)"

-- | No morphology! Pretend the lemma string is a sentence
lemmaSentenceString :: GeniSuccess -> Text
lemmaSentenceString = T.unwords . map lpLemma . grLemmaSentence

prettyResult :: ProgState -> GeniSuccess -> Text
prettyResult pst nr =
    T.intercalate "\n" . map showOne . grRealisations $ nr
 where
    showOne str = pretty theRanking <> ". " <> str <> "\n" <> violations
    violations  = prettyViolations tracesFn verbose (grViolations nr)
    theRanking  = grRanking nr
    verbose  = hasFlagP VerboseModeFlg (pa pst)
    tracesFn = getTraces pst

-- | 'getTraces' is most likely useful for grammars produced by a
--   metagrammar system.  Given a tree name, we retrieve the ``trace''
--   information from the grammar for all trees that have this name.  We
--   assume the tree name was constructed by GenI; see the source code for
--   details.
getTraces :: ProgState -> Text -> [Text]
getTraces pst tname =
  filt $ concat [ ptrace t | t <- gr pst, pidname t == readPidname tname ]
  where
   filt = case traces pst of
          []    -> id
          theTs -> filter (`elem` theTs)

-- | We assume the name was constructed by 'combineName'
readPidname :: Text -> Text
readPidname n =
    case T.splitOn ":" n of
        (_:_:p:_) -> p
        _         -> geniBug "NLP.GenI.readPidname or combineName are broken"

-- --------------------------------------------------------------------
-- Lexical selection
-- --------------------------------------------------------------------

-- | Runs the lexical selection (be it the standard GenI version or
--   a custom function supplied by a user) and runs the results
--   through the universal 'finaliseLexSelection'.
--
--   Also hunts for some warning conditions
runLexSelection :: ProgStateRef -> SemInput -> IO LexicalSelection
runLexSelection pstRef (tsem,_,litConstrs) = do
    pst <- readIORef pstRef
    let config   = pa pst
        verbose  = hasFlagP VerboseModeFlg config
    -- perform lexical selection
    selector  <- getLexicalSelector pstRef
    selection <- selector (gr pst) (le pst) tsem
    let lexCand   = lsLexEntries selection
        candFinal = finaliseLexSelection (morphinf pst) tsem litConstrs (lsAnchored selection)
    -- status
    when verbose $ T.hPutStrLn stderr . T.unlines $
        "Lexical items selected:"
        :  map (indent . showLexeme . fromFL . iword) lexCand
        ++ ["Trees anchored (family) :"]
        ++ map (indent . idname) candFinal
    -- warnings
    let semWarnings = case missingLiterals candFinal tsem of
                       [] -> []
                       xs -> [NoLexSelection xs]
    return $ selection { lsAnchored = candFinal
                       , lsWarnings = mkGeniWarnings semWarnings `mappend` lsWarnings selection
                       }
  where
    indent  x = ' ' `T.cons` x

-- | Grab the lexical selector from the config, or return the standard GenI
--   version if none is supplied
getLexicalSelector :: ProgStateRef -> IO LexicalSelector
getLexicalSelector pstRef = do
  config <- pa <$> readIORef pstRef
  case (customSelector config, grammarType config) of
    (Just s, _)            -> return s
    (Nothing, PreAnchored) -> mkPreAnchoredLexicalSelector pstRef
    (Nothing, _)           -> return defaultLexicalSelector

-- | @missingLiterals ts sem@ returns any literals in @sem@ that do not
--   appear in any of the @ts@ trees
missingLiterals :: [TagElem] -> [Literal GeniVal] -> [Literal GeniVal]
missingLiterals cands tsem =
   tsem \\ (nub $ concatMap tsemantics cands)

-- | Post-processes lexical selection results to things which
--   GenI considers applicable to all situations:
--
--   * attaches morphological information to trees
--
--   * throws out elementary trees that violate trace constraints
--     given by the user
--
--   * filters out any elementary tree whose semantics contains
--     things that are not in the input semantics
finaliseLexSelection :: MorphInputFn -> Sem -> [LitConstr] -> [TagElem] -> [TagElem]
finaliseLexSelection morph tsem litConstrs =
  setTidnums . considerCoherency . considerLc . considerMorph
 where
   -- attach any morphological information to the candidates
   considerMorph = attachMorph morph tsem
   -- filter out candidates which do not fulfill the trace constraints
   matchesLc t = all (`elem` myTrace) constrs
         where constrs = concat [ cs | (l,cs) <- litConstrs, l `elem` mySem ]
               mySem   = tsemantics t
               myTrace = ttrace t
   considerLc = filter matchesLc
   -- filter out candidates whose semantics has bonus stuff which does
   -- not occur in the input semantics
   considerCoherency = filter (all (`elem` tsem) . tsemantics)

-- --------------------------------------------------------------------
-- Pre-selection and pre-anchoring
-- --------------------------------------------------------------------

newtype PreAnchoredL = PreAnchoredL [TagElem]

instance Loadable PreAnchoredL where
  lParse f = fmap PreAnchoredL
           . runParser geniTagElems () f
  lSet _ p = p -- this does not update prog state at all
  lSummarise (PreAnchoredL xs) = show (length xs) ++ " trees"

readPreAnchored :: ProgStateRef -> IO [TagElem]
readPreAnchored pstRef = do
  PreAnchoredL xs <- loadOrDie (L :: L PreAnchoredL)
                        MacrosFlg "preanchored trees" pstRef
  return xs

mkPreAnchoredLexicalSelector :: ProgStateRef -> IO LexicalSelector
mkPreAnchoredLexicalSelector pstRef = do
  xs <- readPreAnchored pstRef
  return (\_ _ _ -> return (LexicalSelection xs [] mempty))

-- --------------------------------------------------------------------
-- Boring utility code
-- --------------------------------------------------------------------

verbosity :: ProgStateRef -> IO Bool
verbosity = fmap (hasFlagP VerboseModeFlg . pa)
          . readIORef

instance JSON GeniResult where
 readJSON j =
    case readJSON j of
      Ok s    -> Ok (GSuccess s)
      Error _ -> GError `fmap` readJSON j
 showJSON (GSuccess x) = showJSON x
 showJSON (GError   x) = showJSON x

instance JSON GeniSuccess where
 readJSON j = do
   jo <- fromJSObject `fmap` readJSON j
   let field x = maybe (fail $ "Could not find: " ++ x) readJSON
               $ lookup x jo
   GeniSuccess <$> field "raw"
               <*> field "realisations"
               <*> field "result-type"
               <*> field "warnings"
               <*> field "derivation"
               <*> field "chart-item"
               <*> field "lexical-selection"
               <*> field "ranking"
               <*> field "violations"
 showJSON nr =
     JSObject . toJSObject $ [ ("raw", showJSON $ grLemmaSentence nr)
                             , ("realisations", showJSONs $ grRealisations nr)
                             , ("derivation", showJSONs $ grDerivation nr)
                             , ("lexical-selection", showJSONs $ grLexSelection nr)
                             , ("ranking", showJSON $ grRanking nr)
                             , ("violations", showJSONs $ grViolations nr)
                             , ("result-type", showJSON $ grResultType nr)
                             , ("chart-item", showJSON $ grOrigin nr)
                             , ("warnings",   showJSONs $ grWarnings nr)
                             ]

instance JSON GeniError where
 readJSON j =
    do jo <- fromJSObject `fmap` readJSON j
       let field x = maybe (fail $ "Could not find: " ++ x) readJSON
                   $ lookup x jo
       GeniError  <$> field "errors"
 showJSON (GeniError xs) =
     JSObject . toJSObject $ [ ("errors", showJSON xs) ]

instance JSON ResultType where
  readJSON j =
    do js <- fromJSString `fmap` readJSON j
       case js of
         "partial"   -> return PartialResult
         "complete"  -> return CompleteResult
         ty          -> fail $ "unknown result type: " ++ ty
  showJSON CompleteResult = JSString $ toJSString "complete"
  showJSON PartialResult  = JSString $ toJSString "partial"

instance JSON GeniLexSel where
 readJSON j =
    do jo <- fromJSObject `fmap` readJSON j
       let field x = maybe (fail $ "Could not find: " ++ x) readJSON
                   $ lookup x jo
       GeniLexSel <$> field "lex-item"
                  <*> field "trace"
 showJSON x =
     JSObject . toJSObject $ [ ("lex-item", showJSON  $ nlTree x)
                             , ("trace",    showJSONs $ nlTrace x)
                             ]

-- Converts picoseconds to milliseconds.
picosToMillis :: Integer -> Double
picosToMillis t = realToFrac t / (10^(9 :: Int))

{-
data MNAME = MNAME deriving Typeable
logname :: String
logname = mkLogname MNAME
-}

{-!
deriving instance NFData GeniResult
deriving instance NFData GeniSuccess
deriving instance NFData GeniError
deriving instance NFData ResultType
deriving instance NFData GeniLexSel
!-}

-- GENERATED START

 
instance NFData GeniResult where
        rnf (GError x1) = rnf x1 `seq` ()
        rnf (GSuccess x1) = rnf x1 `seq` ()

 
instance NFData GeniSuccess where
        rnf (GeniSuccess x1 x2 x3 x4 x5 x6 x7 x8 x9)
          = rnf x1 `seq`
              rnf x2 `seq`
                rnf x3 `seq`
                  rnf x4 `seq`
                    rnf x5 `seq` rnf x6 `seq` rnf x7 `seq` rnf x8 `seq` rnf x9 `seq` ()

 
instance NFData GeniError where
        rnf (GeniError x1) = rnf x1 `seq` ()

 
instance NFData ResultType where
        rnf CompleteResult = ()
        rnf PartialResult = ()

 
instance NFData GeniLexSel where
        rnf (GeniLexSel x1 x2) = rnf x1 `seq` rnf x2 `seq` ()
-- GENERATED STOP