-- GenI surface realiser
-- Copyright (C) 2005 Carlos Areces and Eric Kow
--
-- This program is free software; you can redistribute it and/or
-- modify it under the terms of the GNU General Public License
-- as published by the Free Software Foundation; either version 2
-- of the License, or (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

-- | This module provides basic datatypes specific to Tree Adjoining Grammar
-- (TAG) elementary trees and some low-level operations.

{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE MultiParamTypeClasses, FlexibleInstances #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings #-}

module NLP.GenI.Tag (
   -- Main Datatypes
   Tags, TagElem(..), TagItem(..), TagSite(..),
   TagDerivation, DerivationStep(..), dsChild, dsParent, dsParentSite,
   ts_synIncomplete, ts_semIncomplete, ts_tbUnificationFailure,
   ts_rootFeatureMismatch,

   -- Functions from Tags
   addToTags, tagLeaves, getLexeme, toTagSite,

   -- Functions from TagElem
   setTidnums, plugTree, spliceTree,

   -- General functions
   mapBySem,
   collect, detectSites,
) where

import Control.Applicative ( (<$>), (<*>) )
import qualified Data.Map as Map
import Data.Maybe (fromMaybe, listToMaybe, mapMaybe, catMaybes)
import Data.Tree
import Data.Text ( Text )
import qualified Data.Text as T

import Control.DeepSeq
import Data.Generics (Data)
import Data.Typeable (Typeable)
import Data.FullList hiding ( (++) )
import Text.JSON

import NLP.GenI.FeatureStructure ( AvPair(..), Flist )
import NLP.GenI.General (listRepNode, groupByFM, preTerminals, geniBug)
import NLP.GenI.GeniShow
import NLP.GenI.GeniVal ( GeniVal(..), DescendGeniVal(..), Collectable(..), Idable(..),
                          isConst,
                        )
import NLP.GenI.Polarity.Types ( PolarityKey(..), SemPols )
import NLP.GenI.Pretty
import NLP.GenI.Semantics ( Sem, Literal(..), emptyLiteral )
import NLP.GenI.TreeSchema
    ( Ptype(..), GNode(..), GType(..), NodeName, lexemeAttributes )

-- ----------------------------------------------------------------------
-- Tags
-- ----------------------------------------------------------------------

-- | An anchored grammar.
--   The grammar associates a set of semantic predicates to a list of trees each.
type Tags = Map.Map String [TagElem]                            

-- | 'addTags' @tags key elem@ adds @elem@ to the the list of elements associated
--   to the key
addToTags :: Tags -> String -> TagElem -> Tags
addToTags t k e = Map.insertWith (++) k [e] t

-- ----------------------------------------------------------------------
-- TagElem
-- ----------------------------------------------------------------------

data TagSite = TagSite
    { tsName :: Text
    , tsUp   :: Flist GeniVal
    , tsDown :: Flist GeniVal
    , tsOrigin :: Text
    }
  deriving (Eq, Ord, Data, Typeable)

data TagElem = TE
    { idname       :: Text
    , ttreename    :: Text
    , tidnum       :: Integer
    , ttype        :: Ptype
    , ttree        :: Tree (GNode GeniVal)
    , tsemantics   :: Sem
     -- optimisation stuff
     -- (polarity key to charge interval)
    , tpolarities  :: Map.Map PolarityKey (Int,Int)
    , tinterface   :: Flist GeniVal  -- for idxconstraints (pol)
    , ttrace       :: [Text]
    , tsempols     :: [SemPols] -- ^ can be empty
    }
  deriving (Eq, Data, Typeable)

-- | Given a tree(GNode) returns a list of substitution or adjunction
--   nodes, as well as remaining nodes with a null adjunction constraint.
detectSites :: Tree (GNode GeniVal) -> ([NodeName], [NodeName], [NodeName])
detectSites t =
  ( sites isSub           -- for substitution
  , sites (not.gaconstr)  -- for adjunction
  , sites constrButNotSub -- for neither
  )
 where
 ns = flatten t
 sites match = map gnname . filter match $ ns
 isSub n = gtype n == Subs
 constrButNotSub n = gaconstr n && (not $ isSub n)

toTagSite :: GNode GeniVal -> TagSite
toTagSite n = TagSite (gnname n) (gup n) (gdown n) (gorigin n)

type TagDerivation = [ DerivationStep ]

data DerivationStep = SubstitutionStep Text Text Text
                    | AdjunctionStep   Text Text Text
                    | InitStep         Text
 deriving (Show, Ord, Eq)

dsOp :: DerivationStep -> Char
dsOp (SubstitutionStep {}) = 's'
dsOp (AdjunctionStep {})   = 'a'
dsOp (InitStep {})         = 'i'

dsChild :: DerivationStep -> Text
dsChild (SubstitutionStep c _ _) = c
dsChild (AdjunctionStep c _ _ )  = c
dsChild (InitStep c)             = c

dsParent :: DerivationStep -> Maybe Text
dsParent (SubstitutionStep _ p _) = Just p
dsParent (AdjunctionStep _ p _)   = Just p
dsParent (InitStep _)             = Nothing

dsParentSite :: DerivationStep -> Maybe Text
dsParentSite (SubstitutionStep _ _ s) = Just s
dsParentSite (AdjunctionStep _ _ s)   = Just s
dsParentSite (InitStep _)             = Nothing

instance JSON DerivationStep where
 readJSON j = do
   jo <- fromJSObject `fmap` readJSON j
   let field x = maybe (fail $ "Could not find: " ++ x) readJSON
               $ lookup x jo
   op    <- field "op"
   child <- field "child"
   case op of
    "s" -> AdjunctionStep   child <$> field "parent" <*> field "parent-node"
    "a" -> SubstitutionStep child <$> field "parent" <*> field "parent-node"
    "i" -> return (InitStep child)
    x   -> fail $ "Don't know about derivation operation '" ++ x ++ "'"
 showJSON x =
     JSObject . toJSObject $ [ ("op",     showJSON $ dsOp x)
                             , ("child",  showJSON $ dsChild x)
                             ] ++ catMaybes
                             [ (\v -> ("parent", showJSON v))      <$> dsParent x
                             , (\v -> ("parent-node", showJSON v)) <$> dsParentSite x
                             ]

instance Ord TagElem where
  compare t1 t2 = 
    case (ttype t1, ttype t2) of
         (Initial, Initial)   -> compareId 
         (Initial, Auxiliar)  -> LT
         (Auxiliar, Initial)  -> GT
         (Auxiliar, Auxiliar) -> compareId 
    where compareId  = compare (tidnum t1) (tidnum t2)

instance DescendGeniVal TagElem where
  descendGeniVal s te =
    te { tinterface = descendGeniVal s (tinterface te)
       , ttree      = descendGeniVal s (ttree te)
       , tsemantics = descendGeniVal s (tsemantics te) }

instance DescendGeniVal TagSite where
  descendGeniVal s (TagSite n fu fd o) = TagSite n (descendGeniVal s fu) (descendGeniVal s fd) o

instance Collectable TagElem where
  collect t = (collect $ tinterface t) . (collect $ ttree t) 
            . (collect $ tsemantics t)

instance Idable TagElem where
  idOf = tidnum

-- Substitution and Adjunction

-- | Plug the first tree into the second tree at the specified node.
--   Anything below the second node is silently discarded.
--   We assume the trees are pluggable; it is treated as a bug if
--   they are not!
plugTree :: Tree NodeName
         -> NodeName
         -> Tree NodeName
         -> Tree NodeName
plugTree male n female =
    case listRepNode (const male) (nmatch n) [female] of
         ([r], True) -> r
         _           -> geniBug oops
  where
    oops = "plugTree: unexpected plug failure at node " ++ T.unpack n

-- | Given two trees 'auxt' and 't', splice the tree 'auxt' into
--   't' via the TAG adjunction rule.
spliceTree :: NodeName      -- ^ foot node of the aux tree
           -> Tree NodeName -- ^ aux tree
           -> NodeName      -- ^ place to adjoin in target tree
           -> Tree NodeName -- ^ target tree
           -> Tree NodeName
spliceTree f auxT n top =
    plugTree middle n top 
  where
    bottom = fromMaybe (geniBug oops) (findSubTree n top)
    middle = plugTree bottom f auxT
    oops = unwords
        [ "NLP.GenI.Tag.spliceTree:"
        , "Unexpected adjunction failure."
        , "Could not find node " ++ T.unpack n ++ " of target tree."
        ]

nmatch :: NodeName -> Tree NodeName -> Bool
nmatch n (Node a _) = a == n

findSubTree :: NodeName -> Tree NodeName -> Maybe (Tree NodeName)
findSubTree n n2@(Node x ks)
  | x == n    = Just n2
  | otherwise = case mapMaybe (findSubTree n) ks of
                []    -> Nothing
                (h:_) -> Just h

-- Unique ID

-- | Assigns a unique id to each element of this list, that is, an integer
--   between 1 and the size of the list.
setTidnums :: [TagElem] -> [TagElem]
setTidnums xs = zipWith (\c i -> setOrigin $ c {tidnum = i}) xs [1..]

setOrigin :: TagElem -> TagElem
setOrigin te =
    te { ttree = fmap setLabel . ttree $ te }
  where
    setLabel g = g { gorigin = nameNumber }
    nameNumber = idname te
        `T.append` ":"
        `T.append` (T.pack . show . tidnum) te

-- ----------------------------------------------------------------------
-- TAG Item
-- ----------------------------------------------------------------------

-- | 'TagItem' is a generalisation of 'TagElem'.
class TagItem t where 
    tgIdName    :: t -> Text
    tgIdNum     :: t -> Integer
    tgSemantics :: t -> Sem
    tgTree      :: t -> Tree (GNode GeniVal)

instance TagItem TagElem where
    tgIdName = idname
    tgIdNum  = tidnum
    tgSemantics = tsemantics
    tgTree = ttree

-- | Sorts trees into a Map.Map organised by the first literal of their
--   semantics.  This is useful in at least three places: the polarity
--   optimisation, the gui display code, and code for measuring the efficiency
--   of GenI.  Note: trees with a null semantics are filed under an empty
--   predicate, if any.
mapBySem :: (TagItem t) => [t] -> Map.Map (Literal GeniVal) [t]
mapBySem ts = 
  let gfn t = case tgSemantics t of
              []    -> emptyLiteral
              (x:_) -> x
  in groupByFM gfn ts

-- ----------------------------------------------------------------------
-- Extracting sentences
-- ----------------------------------------------------------------------

type UninflectedDisjunction = ([Text], Flist GeniVal)

-- | Normally, extracting the sentences from a TAG tree would just
--   consist of reading its leaves.  But if you want the generator to
--   return inflected forms instead of just lemmas, you also need to
--   return the relevant features for each leaf.  In TAG, or at least our
--   use of it, the features come from the *pre-terminal* nodes, that is,
--   not the leaves themselves but their parents.  Another bit of
--   trickiness: because of atomic disjunction, leaves might have more
--   than one value, so we can't just return a String lemma but a list of
--   String, one for each possibility.
tagLeaves :: TagElem -> [ (NodeName, UninflectedDisjunction) ]
tagLeaves te = [ (gnname pt, (getLexeme t, gup pt)) | (pt,t) <- preTerminals . ttree $ te ]

-- | Try in order: lexeme, lexeme attributes, node name
getLexeme :: GNode GeniVal -> [Text]
getLexeme node =
    case glexeme node of
        []   -> fromMaybe [gnname node] $ firstMaybe grab lexemeAttributes
        lexs -> lexs
  where
    grab la = firstMaybe match guppy
      where
        match (AvPair a v) | isConst v && a == la =
             fromFL <$> gConstraints v
        match _ = Nothing
    guppy = gup node

firstMaybe :: (a -> Maybe b) -> [a] -> Maybe b
firstMaybe fn = listToMaybe . mapMaybe fn

-- ----------------------------------------------------------------------
-- Conversion to text
-- ----------------------------------------------------------------------

instance GeniShow TagElem where
    geniShowText te = T.concat
        [ "% ------------------------- ", idname te
        , "\n", ttreename te, ":" , idname te
        , " " , (geniShowText $ tinterface te)
        , " " , (geniShowText $ ttype te)
        , "\n", (geniShowText $ ttree te)
        , "\n", geniKeyword "semantics" (geniShowText $ tsemantics te)
        ]

instance GeniShow [TagElem] where
    geniShowText = T.intercalate "\n\n" . map geniShowText

-- Useful for debugging adjunction and substitution nodes
instance Pretty [TagSite] where
    pretty =
        T.intercalate "\n  " . map fn
      where
        fn (TagSite n t b o) = T.intercalate "/"
            [ n, pretty t, pretty b, o ]

-- ----------------------------------------------------------------------
-- Diagnostic messages
--
-- Diagnostic messages let us know why a TAG tree is not returned as a result.
-- Whenever GenI decides to discard a tree, it sets the tdiagnostic field of 
-- the TagElem so that the person using a debugger can find out what went wrong.
-- ----------------------------------------------------------------------

ts_synIncomplete, ts_tbUnificationFailure :: String
ts_synIncomplete = "syntactically incomplete"
ts_tbUnificationFailure = "top/bot unification failure"

ts_rootFeatureMismatch :: Flist GeniVal -> String
ts_rootFeatureMismatch good = "root feature does not unify with " ++ prettyStr good

ts_semIncomplete :: [Literal GeniVal] -> String
ts_semIncomplete sem = "semantically incomplete - missing:  " ++ prettyStr sem

-- ----------------------------------------------------------------------
-- Performance
-- ----------------------------------------------------------------------

{-!
deriving instance NFData TagElem
deriving instance NFData DerivationStep
!-}

-- GENERATED START

 
instance NFData TagElem where
        rnf (TE x1 x2 x3 x4 x5 x6 x7 x8 x9 x10)
          = rnf x1 `seq`
              rnf x2 `seq`
                rnf x3 `seq`
                  rnf x4 `seq`
                    rnf x5 `seq`
                      rnf x6 `seq`
                        rnf x7 `seq` rnf x8 `seq` rnf x9 `seq` rnf x10 `seq` ()

 
instance NFData DerivationStep where
        rnf (SubstitutionStep x1 x2 x3)
          = rnf x1 `seq` rnf x2 `seq` rnf x3 `seq` ()
        rnf (AdjunctionStep x1 x2 x3)
          = rnf x1 `seq` rnf x2 `seq` rnf x3 `seq` ()
        rnf (InitStep x1) = rnf x1 `seq` ()
-- GENERATED STOP