-----------------------------------------------------------------------------
-- |
-- Module      :  NLP.WordNet
-- Copyright   :  (c) Hal Daume III 2003-2004
-- License     :  BSD-style
--
-- Maintainer  :  hdaume@isi.edu
-- Stability   :  experimental
-- Portability :  non-portable (H98 + implicit parameters)
--
-- This is the top level module to the Haskell WordNet interface.
--
-- This module is maintained at:
--    <http://www.isi.edu/~hdaume/HWordNet/>.
--
-- This is the only module in the WordNet package you need to import.
-- The others provide utility functions and primitives that this
-- module is based on.
--
-- More information about WordNet is available at:
--    <http://http://www.cogsci.princeton.edu/~wn/>.
-----------------------------------------------------------------------------
module NLP.WordNet
    (
     -- * The basic type system
     module NLP.WordNet.Types,

     -- * Top level execution functions
     runWordNet,
     runWordNetQuiet,
     runWordNetWithOptions,

     -- * Functions to manually initialize the WordNet system; these are not
     --   needed if you use one of the "runWordNet" functions above.
     initializeWordNet,
     initializeWordNetWithOptions,
     closeWordNet,
     runs,

     -- * The basic database access functions.
     getOverview,
     searchByOverview,
     search,
     lookupKey,

     -- * The agglomeration functions
     relatedBy,
     closure,
     closureOn,

     -- * Computing lowest-common ancestor functions; the implementation
     --   of these can be tuned by providing a different "Bag" implementation.
     --   use "emptyQueue" for breadth-first-search (recommended) or "emptyStack"
     --   for depth-first-search, or write your own.
     meet,
     meetPaths,
     meetSearchPaths,
     Bag(..),
     emptyQueue,
     emptyStack,
    )
    where

import Prelude
import Data.Tree
import qualified Data.Set as Set
import System.IO.Unsafe

import NLP.WordNet.Common
import NLP.WordNet.Util
import NLP.WordNet.Types

import qualified NLP.WordNet.PrimTypes as T
import qualified NLP.WordNet.Prims     as P

-- | Takes a WordNet command, initializes the environment
-- and returns the results in the 'IO' monad.  WordNet
-- warnings are printed to stderr.
runWordNet :: WN a -> IO a
runWordNet = runWordNetWithOptions Nothing Nothing

-- | Takes a WordNet command, initializes the environment
-- and returns the results in the 'IO' monad.  WordNet
-- warnings are ignored.
runWordNetQuiet :: WN a -> IO a
runWordNetQuiet = runWordNetWithOptions Nothing (Just (\_ _ -> return ()))

-- | Takes a FilePath to the directory holding WordNet and
-- a function to do with warnings and a WordNet command, initializes 
-- the environment and returns the results in the 'IO' monad.
runWordNetWithOptions :: 
    Maybe FilePath ->                          -- word net data directory
    Maybe (String -> SomeException -> IO ()) ->    -- warning function (by default, warnings go to stderr)
    WN a ->                                    -- what to run
      IO a
runWordNetWithOptions dd warn wn = do
  wne <- P.initializeWordNetWithOptions dd warn
  let a = let ?wne = wne in wn
  -- P.closeWordNet wne
  return a

-- | Gives you a 'WordNetEnv' which can be passed to 'runs' or used
-- as the implicit parameter to the other WordNet functions.
initializeWordNet :: IO WordNetEnv
initializeWordNet = P.initializeWordNet

-- | Takes a FilePath to the directory holding WordNet and
-- a function to do with warnings, initializes 
-- the environment and returns a 'WordNetEnv' as in 'initializeWordNet'.
initializeWordNetWithOptions :: Maybe FilePath -> Maybe (String -> SomeException -> IO ()) -> IO WordNetEnv
initializeWordNetWithOptions = P.initializeWordNetWithOptions

-- | Closes all the handles associated with the 'WordNetEnv'.  Since
-- the functions provided in the "NLP.WordNet.WordNet" module
-- are /lazy/, you shouldn't do this until you're really done.
-- Or perhaps not at all (GC will eventually kick in).
closeWordNet :: WordNetEnv -> IO ()
closeWordNet = P.closeWordNet

-- | This simply takes a 'WordNetEnv' and provides it as the
-- implicit parameter to the WordNet command.
runs :: WordNetEnv -> WN a -> a
runs wne x = let ?wne = wne in x


-- | This takes a word and returns an 'Overview' of all its senses
-- for all parts of speech.
getOverview :: WN (Word -> Overview)
getOverview word = unsafePerformIO $ do
  idxN <- unsafeInterleaveIO $ getOverview' Noun
  idxV <- unsafeInterleaveIO $ getOverview' Verb
  idxA <- unsafeInterleaveIO $ getOverview' Adj
  idxR <- unsafeInterleaveIO $ getOverview' Adv
  return (T.Overview idxN idxV idxA idxR)
  where
    getOverview' pos = do
      strM <- P.getIndexString ?wne word pos
      case strM of
        Nothing -> return Nothing
        Just  _ -> unsafeInterleaveIO $ P.indexLookup ?wne word pos

-- | This takes an 'Overview' (see 'getOverview'), a 'POS' and a 'SenseType' and returns
-- a list of search results.  If 'SenseType' is 'AllSenses', there will be one
-- 'SearchResult' in the results for each valid sense.  If 'SenseType' is
-- a single sense number, there will be at most one element in the result list.
searchByOverview :: WN (Overview -> POS -> SenseType -> [SearchResult])
searchByOverview overview pos sense = unsafePerformIO $ 
  case (case pos of { Noun -> T.nounIndex ; Verb -> T.verbIndex ; Adj -> T.adjIndex ; Adv -> T.advIndex })
          overview of
    Nothing  -> return []
    Just idx -> do
      let numSenses = T.indexSenseCount idx
      skL <- mapMaybe id `liftM` 
             unsafeInterleaveIO (
               mapM (\sense' -> do
                     skey <- P.indexToSenseKey ?wne idx sense'
                     return (liftM ((,) sense') skey)
                    ) (sensesOf numSenses sense)
             )
      r <- unsafeInterleaveIO $ mapM (\ (snum, skey) ->
                 unsafeInterleaveIO (P.getSynsetForSense ?wne skey) >>= \v ->
                 case v of
                   Nothing -> return Nothing
                   Just ss -> return $ Just (T.SearchResult 
                                               (Just skey) 
                                               (Just overview) 
                                               (Just idx)
                                               (Just (SenseNumber snum)) 
                                               ss)
                ) skL
      return (mapMaybe id r)

-- | This takes a 'Word', a 'POS' and a 'SenseType' and returns
-- the equivalent of first running 'getOverview' and then 'searchByOverview'.
search :: WN (Word -> POS -> SenseType -> [SearchResult])
search word = searchByOverview (getOverview word)

-- | This takes a 'Key' (see 'srToKey' and 'srFormKeys') and looks it
-- up in the databse.
lookupKey :: WN (Key -> SearchResult)
lookupKey (T.Key (o,p)) = unsafePerformIO $ do
  ss <- unsafeInterleaveIO $ P.readSynset ?wne p o ""
  return $ T.SearchResult Nothing Nothing Nothing Nothing ss

-- | This takes a 'Form' and a 'SearchResult' and returns all
-- 'SearchResult' related to the given one by the given 'Form'.
--
-- For example:
--
-- > relatedBy Antonym (head (search "happy" Adj 1))
-- > [<unhappy>]
-- >
-- > relatedBy Hypernym (head (search "dog" Noun 1))
-- > [<canine canid>]
relatedBy :: WN (Form -> SearchResult -> [SearchResult])
relatedBy form sr = map lookupKey $ srFormKeys sr form

-- | This is a utility function to build lazy trees from a function and a root.
closure :: (a -> [a]) -> a -> Tree a
closure f x = Node x (map (closure f) $ f x)

-- | This enables 'Form'-based trees to be built.
--
-- For example:
--
-- > take 5 $ flatten $ closureOn Antonym (head (search "happy" Adj AllSenses)))
-- > [<happy>,<unhappy>,<happy>,<unhappy>,<happy>]
--
-- > closureOn Hypernym (head (search "dog" Noun 1)))
-- > - <dog domestic_dog Canis_familiaris> --- <canine canid> --- <carnivore>\\
-- >   --- <placental placental_mammal eutherian eutherian_mammal> --- <mammal>\\
-- >   --- <vertebrate craniate> --- <chordate> --- <animal animate_being beast\\
-- >   brute creature fauna> --- <organism being> --- <living_thing animate_thing>\\
-- >   --- <object physical_object> --- <entity> 
closureOn :: WN (Form -> SearchResult -> Tree SearchResult)
closureOn form = closure (relatedBy form)

-- | A simple bag class for our 'meet' implementation.
class Bag b a where
  emptyBag :: b a
  addToBag :: b a -> a -> b a
  addListToBag :: b a -> [a] -> b a
  isEmptyBag :: b a -> Bool
  splitBag :: b a -> (a, b a)
  addListToBag = foldr (flip addToBag)

instance Bag [] a where
  emptyBag = []
  addToBag = flip (:)
  isEmptyBag = null
  splitBag (x:xs) = (x, xs)
  splitBag [] = undefined

-- | A very slow queue based on lists.
newtype Queue a = Queue [a] deriving (Show)

instance Bag Queue a where
  emptyBag = Queue []
  addToBag (Queue l) a = Queue (l++[a])
  isEmptyBag (Queue l) = null l
  splitBag (Queue (x:xs)) = (x, Queue xs)
  splitBag (Queue []) = undefined
  addListToBag (Queue l) l' = Queue (l ++ l')

-- | An empty stack.
emptyStack :: [a]
emptyStack = []

-- | An empty queue.
emptyQueue :: Queue a
emptyQueue = Queue []

-- | This function takes an empty bag (in particular, this is to specify
-- what type of search to perform), and the results of two search.
-- It returns (maybe) the lowest point at which the two terms
-- meet in the WordNet hierarchy.
--
-- For example:
--
-- > meet emptyQueue (head $ search "cat" Noun 1) (head $ search "dog" Noun 1)
-- > Just <carnivore>
--
-- > meet emptyQueue (head $ search "run" Verb 1) (head $ search "walk" Verb 1)
-- > Just <travel go move locomote>
meet :: Bag b (Tree SearchResult) => WN (b (Tree SearchResult) -> SearchResult -> SearchResult -> Maybe SearchResult)
meet emptyBg sr1 sr2 = srch Set.empty Set.empty (addToBag emptyBg t1) (addToBag emptyBg t2)
  where
    t1 = closureOn Hypernym sr1
    t2 = closureOn Hypernym sr2

    srch v1 v2 bag1 bag2
        | isEmptyBag bag1 && isEmptyBag bag2 = Nothing
        | isEmptyBag bag1                    = srch v2 v1 bag2 bag1
        | otherwise = 
            let (Node sr chl, bag1') = splitBag bag1
            in  if v2 `containsResult` sr 
                  then Just sr
                  else srch v2 (addResult v1 sr) bag2 (addListToBag bag1' chl) -- flip the order :)

    containsResult v sr = srWords sr AllSenses `Set.member` v
    addResult v sr = Set.insert (srWords sr AllSenses) v

-- | This function takes an empty bag (see 'meet'), and the results of two searches.
-- It returns (maybe) the lowest point at which the two terms
-- meet in the WordNet hierarchy, as well as the paths leading from each
-- term to this common term.
--
-- For example:
--
-- > meetPaths emptyQueue (head $ search "cat" Noun 1) (head $ search "dog" Noun 1)
-- > Just ([<cat true_cat>,<feline felid>],<carnivore>,[<canine canid>,<dog domestic_dog Canis_familiaris>])
--
-- > meetPaths emptyQueue (head $ search "run" Verb 1) (head $ search "walk" Verb 1)
-- > Just ([<run>,<travel_rapidly speed hurry zip>],<travel go move locomote>,[<walk>])
--
-- This is marginally less efficient than just using 'meet', since it uses
-- linear-time lookup for the visited sets, whereas 'meet' uses log-time
-- lookup.
meetPaths :: Bag b (Tree SearchResult) => 
             WN (
                 b (Tree SearchResult) ->       -- bag implementation
                 SearchResult ->                -- word 1
                 SearchResult ->                -- word 2
                 Maybe ([SearchResult], SearchResult, [SearchResult]))   -- word 1 -> common,
                                                                         -- common
                                                                         -- common -> word 2
meetPaths emptyBg sr1 sr2 = meetSearchPaths emptyBg t1 t2
  where
    t1 = closureOn Hypernym sr1
    t2 = closureOn Hypernym sr2

meetSearchPaths :: Bag b (Tree SearchResult)
                => b (Tree SearchResult)
                -> Tree SearchResult
                -> Tree SearchResult
                -> Maybe ([SearchResult], SearchResult, [SearchResult])
meetSearchPaths emptyBg t1 t2 =
  let srch b v1 v2 bag1 bag2
        | isEmptyBag bag1 && isEmptyBag bag2 = Nothing
        | isEmptyBag bag1                    = srch (not b) v2 v1 bag2 bag1
        | otherwise = 
            let (Node sr chl, bag1') = splitBag bag1
                sl = srWords sr AllSenses
            in  if v2 `containsResult` sl
                  then Just $ if b 
                                then (reverse v1, sr, drop 1 $ dropWhile ((/=sl) . flip srWords AllSenses) v2) 
                                else (reverse $ drop 1 $ dropWhile ((/=sl) . flip srWords AllSenses) v2, sr, v1)
                  else srch (not b)
                            v2 (addResult v1 sr) 
                            bag2 (addListToBag bag1' chl) -- flip the order :)
  in  srch True [] [] (addToBag emptyBg t1) (addToBag emptyBg t2)
  where
    containsResult v sl = sl `elem` map (`srWords` AllSenses) v
    addResult v sr = sr:v