-- GenI surface realiser
-- Copyright (C) 2005-2009 Carlos Areces and Eric Kow
--
-- This program is free software; you can redistribute it and/or
-- modify it under the terms of the GNU General Public License
-- as published by the Free Software Foundation; either version 2
-- of the License, or (at your option) any later version.
--
-- This program is distributed in the hope that it will be useful,
-- but WITHOUT ANY WARRANTY; without even the implied warranty of
-- MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-- GNU General Public License for more details.
--
-- You should have received a copy of the GNU General Public License
-- along with this program; if not, write to the Free Software
-- Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.

{-# OPTIONS_GHC -fno-warn-orphans #-}
{-# LANGUAGE OverlappingInstances, FlexibleInstances #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE OverloadedStrings  #-}

-- | This module provides basic datatypes specific to Tree Adjoining Grammar
--   tree schemata.
module NLP.GenI.TreeSchema (
   Macros,
   SchemaTree, SchemaNode, Ttree(..), Ptype(..),

   -- Functions from Tree GNode
   root, rootUpd, foot, setLexeme, setAnchor, lexemeAttributes,
   crushTreeGNode,

   -- GNode
   GNode(..), gnnameIs, NodeName,
   GType(..), gCategory, showLexeme,
   crushGNode,
 ) where

import qualified Data.Map as Map
import Data.Binary
import Data.Tree
import Data.Text ( Text )
import qualified Data.Text as T

import Control.DeepSeq
import Data.FullList hiding (head, tail, (++))
import Data.Generics (Data)
import Data.Typeable (Typeable)

import NLP.GenI.General (filterTree, listRepNode, geniBug, quoteText)
import NLP.GenI.GeniShow
import NLP.GenI.GeniVal ( GeniVal(..), DescendGeniVal(..), Collectable(..),
                        )
import NLP.GenI.FeatureStructure ( AvPair(..), Flist, crushFlist )
import NLP.GenI.Pretty
import NLP.GenI.Semantics ( Sem )

-- ----------------------------------------------------------------------
-- Tree schemata

-- In GenI, the tree schemata are called `macros' for historical reasons.
-- We are working to phase out this name in favour of the more standard
-- `tree schema(ta)'.
-- ----------------------------------------------------------------------

type SchemaTree = Ttree SchemaNode
type SchemaNode = GNode [GeniVal]
type Macros = [SchemaTree]

data Ttree a = TT
    { params  :: [GeniVal]
    , pfamily :: Text
    , pidname :: Text
    , pinterface :: Flist GeniVal
    , ptype :: Ptype
    , psemantics :: Maybe Sem
    , ptrace :: [Text]
    , tree :: Tree a
    }
  deriving (Data, Typeable, Eq)

data Ptype = Initial | Auxiliar
  deriving (Show, Eq, Data, Typeable)

instance DescendGeniVal v => DescendGeniVal (Ttree v) where
  descendGeniVal s mt =
    mt { params = descendGeniVal s (params mt)
       , tree   = descendGeniVal s (tree mt)
       , pinterface  = descendGeniVal s (pinterface mt)
       , psemantics = descendGeniVal s (psemantics mt) }

instance (Collectable a) => Collectable (Ttree a) where
  collect mt = (collect $ params mt) . (collect $ tree mt) .
               (collect $ psemantics mt) . (collect $ pinterface mt)

-- ----------------------------------------------------------------------
-- Tree manipulation
-- ----------------------------------------------------------------------

-- Traversal

instance DescendGeniVal a => DescendGeniVal (Map.Map k a) where
  descendGeniVal s = {-# SCC "descendGeniVal" #-} Map.map (descendGeniVal s)

instance (Collectable a => Collectable (Tree a)) where
  collect = collect.flatten

-- Utility functions

root :: Tree a -> a
root (Node a _) = a

rootUpd :: Tree a -> a -> Tree a
rootUpd (Node _ l) b = (Node b l)

foot :: Tree (GNode a) -> GNode a
foot t = case filterTree (\n -> gtype n == Foot) t of
         [x] -> x
         _   -> geniBug $ "foot returned weird result"

-- | Given a lexical item @s@ and a Tree GNode t, returns the tree t'
--   where l has been assigned to the anchor node in t'
setAnchor :: FullList Text -> Tree (GNode a) -> Tree (GNode a)
setAnchor s t =
  let filt (Node a []) = (gtype a == Lex && ganchor a)
      filt _ = False
  in case listRepNode (setLexeme (fromFL s)) filt [t] of
     ([r],True) -> r
     _ -> geniBug $ "setLexeme " ++ show s ++ " returned weird result"

-- | Given a lexical item @l@ and a tree node @n@ (actually a subtree
--   with no children), return the same node with the lexical item as
--   its unique child.  The idea is that it converts terminal lexeme nodes
--   into preterminal nodes where the actual terminal is the given lexical
--   item
setLexeme :: [Text] -> Tree (GNode a) -> Tree (GNode a)
setLexeme l (Node a []) = Node a [ Node subanc [] ]
  where
    subanc = GN
        { gnname = T.concat $ "_" : gnname a : "." : l
        , gup    = []
        , gdown  = []
        , gaconstr = True
        , ganchor  = False
        , glexeme = l
        , gtype   = Other
        , gorigin = ""
        }
setLexeme _ _ = geniBug "impossible case in setLexeme - subtree with kids"

-- ----------------------------------------------------------------------
-- TAG nodes (GNode)
-- ----------------------------------------------------------------------

-- | A single node of a TAG tree.
data GNode gv = GN
    { gnname :: NodeName
    , gup    :: Flist gv   -- ^ top feature structure
    , gdown  :: Flist gv   -- ^ bottom feature structure
    , ganchor  :: Bool     -- ^ @False@ for na nodes
    , glexeme  :: [Text]   -- ^ @[]@ for na nodes
    , gtype    :: GType
    , gaconstr :: Bool
    , gorigin  :: Text -- ^ for TAG, this would be the elementary tree
                       --   that this node originally came from
    }
  deriving (Eq, Data, Typeable)

-- Node type used during parsing of the grammar
data GType = Subs | Foot | Lex | Other
  deriving (Show, Eq, Data, Typeable)

type NodeName = Text

-- Traversal

instance Collectable gv => Collectable (GNode gv) where
  collect n = (collect $ gdown n) . (collect $ gup n)

instance DescendGeniVal v => DescendGeniVal (GNode v) where
  descendGeniVal s gn =
    gn { gup = descendGeniVal s (gup gn)
       , gdown = descendGeniVal s (gdown gn) }

-- Utilities

gnnameIs :: NodeName -> GNode gv -> Bool
gnnameIs n = (== n) . gnname

-- | Return the value of the "cat" attribute, if available
gCategory :: Flist GeniVal -> Maybe GeniVal
gCategory top =
  case [ v | AvPair "cat" v <- top ] of
  []  -> Nothing
  [c] -> Just c
  _   -> geniBug $ "Impossible case: node with more than one category"

-- | Attributes recognised as lexemes, in order of preference
lexemeAttributes :: [Text]
lexemeAttributes = [ "lex", "phon", "cat" ]

-- ----------------------------------------------------------------------
-- Pretty printing and other text conversions
-- ----------------------------------------------------------------------

instance GeniShow Ptype where
    geniShow Initial  = "initial"
    geniShow Auxiliar = "auxiliary"

instance (GeniShow a) => GeniShow (Ttree a) where
    geniShowText tt = T.intercalate "\n" . filter (not . T.null) $
        [ "% ------------------------- ", pidname tt
        , T.unwords [ pfamily tt <> ":" <> pidname tt
                    , plist
                    , geniShowText (ptype  tt)
                    ]
        , geniShowText (tree   tt)
        , maybe "" showSem (psemantics tt)
        , showTr (ptrace tt)
        ]
      where
        plist = parens . T.unwords . concat $
            [ map geniShowText (params tt)
            , ["!"]
            , map geniShowText (pinterface tt)
            ]
        showSem = geniKeyword "semantics" . geniShowText
        showTr  = geniKeyword "trace" . squares . T.unwords

-- | The default show for GNode tries to be very compact; it only shows the value
--   for cat attribute and any flags which are marked on that node.
--
--   This is one the places where the pretty representation of a GenI object is
--   different from its GenI-format one
instance Pretty (GNode GeniVal) where
    pretty gn =
        stub `T.append` extra
      where
        cat_ = maybe "" pretty . gCategory $ gup gn
        lex_ = showLexeme (glexeme gn)
        --
        stub = T.intercalate ":" $ filter (not . T.null) [ cat_, lex_ ]
        extra = case gtype gn of
                    Subs -> " !"
                    Foot -> " *"
                    _    -> if gaconstr gn then " #"   else ""

instance GeniShow (GNode GeniVal) where
    geniShowText x =
        T.unwords . filter (not . T.null) $
            [ gnname x, gaconstrstr, gtypestr x, glexstr x, tbFeats x ]
      where
        gaconstrstr = case (gaconstr x, gtype x) of
                          (True, Other) -> "aconstr:noadj"
                          _             ->  ""
        gtypestr n  = case gtype n of
                          Subs -> "type:subst"
                          Foot -> "type:foot"
                          Lex  -> if ganchor n && (null.glexeme) n
                                     then "type:anchor" else "type:lex"
                          _    -> ""
        glexstr n =
            if null ls
               then ""
               else T.intercalate "|" (map quoteText ls)
          where
            ls = glexeme n
        tbFeats n =
            geniShowText (gup n)
            `T.append` "!"
            `T.append` geniShowText (gdown n)


-- FIXME: will have to think of nicer way - one which involves
-- unpacking the trees :-(
showLexeme :: [Text] -> Text
showLexeme []   = ""
showLexeme [l]  = l
showLexeme xs   = T.intercalate "|" xs

-- Fancy disjunction

crushTreeGNode :: Tree (GNode [GeniVal]) -> Maybe (Tree (GNode GeniVal))
crushTreeGNode (Node x xs) =
 do x2  <- crushGNode x
    xs2 <- mapM crushTreeGNode xs
    return $ Node x2 xs2

crushGNode :: GNode [GeniVal] -> Maybe (GNode GeniVal)
crushGNode gn =
  do gup2   <- crushFlist (gup gn)
     gdown2 <- crushFlist (gdown gn)
     return $ GN { gnname = gnname gn
                 , gup = gup2
                 , gdown = gdown2
                 , ganchor = ganchor gn
                 , glexeme = glexeme gn
                 , gtype = gtype gn
                 , gaconstr = gaconstr gn
                 , gorigin = gorigin gn}


instance Binary Ptype where
  put Initial = putWord8 0
  put Auxiliar = putWord8 1
  get = do
    tag_ <- getWord8
    case tag_ of
      0 -> return Initial
      1 -> return Auxiliar
      _ -> fail "no parse"

instance Binary gv => Binary (GNode gv) where
  put (GN a b c d e f g h) = put a >> put b >> put c >> put d >> put e >> put f >> put g >> put h
  get = get >>= \a -> get >>= \b -> get >>= \c -> get >>= \d -> get >>= \e -> get >>= \f -> get >>= \g -> get >>= \h -> return (GN a b c d e f g h)

instance Binary GType where
  put Subs = putWord8 0
  put Foot = putWord8 1
  put Lex = putWord8 2
  put Other = putWord8 3
  get = do
    tag_ <- getWord8
    case tag_ of
      0 -> return Subs
      1 -> return Foot
      2 -> return Lex
      3 -> return Other
      _ -> fail "no parse"

instance (Binary a) => Binary (Ttree a) where
  put (TT a b c d e f g h) = put a >> put b >> put c >> put d >> put e >> put f >> put g >> put h
  get = get >>= \a -> get >>= \b -> get >>= \c -> get >>= \d -> get >>= \e -> get >>= \f -> get >>= \g -> get >>= \h -> return (TT a b c d e f g h)

-- Node type used during parsing of the grammar
instance NFData GType where
  rnf x = x `seq` ()

instance NFData Ptype where
  rnf x = x `seq` ()

-- | A single node of a TAG tree.
instance NFData gv => NFData (GNode gv) where
  rnf (GN x1 x2 x3 x4 x5 x6 x7 x8)
          = rnf x1 `seq`
              rnf x2 `seq`
                rnf x3 `seq`
                  rnf x4 `seq`
                    rnf x5 `seq`
                      rnf x6 `seq`
                        rnf x7 `seq` rnf x8 `seq` ()