----------------------------------------------------------------------------- -- | -- Module : NLP.WordNet -- Copyright : (c) Hal Daume III 2003-2004 -- License : BSD-style -- -- Maintainer : hdaume@isi.edu -- Stability : experimental -- Portability : non-portable (H98 + implicit parameters) -- -- This is the top level module to the Haskell WordNet interface. -- -- This module is maintained at: -- . -- -- This is the only module in the WordNet package you need to import. -- The others provide utility functions and primitives that this -- module is based on. -- -- More information about WordNet is available at: -- . ----------------------------------------------------------------------------- module NLP.WordNet ( -- * The basic type system module NLP.WordNet.Types, -- * Top level execution functions runWordNet, runWordNetQuiet, runWordNetWithOptions, -- * Functions to manually initialize the WordNet system; these are not -- needed if you use one of the "runWordNet" functions above. initializeWordNet, initializeWordNetWithOptions, closeWordNet, runs, -- * The basic database access functions. getOverview, searchByOverview, search, lookupKey, -- * The agglomeration functions relatedBy, closure, closureOn, -- * Computing lowest-common ancestor functions; the implementation -- of these can be tuned by providing a different "Bag" implementation. -- use "emptyQueue" for breadth-first-search (recommended) or "emptyStack" -- for depth-first-search, or write your own. meet, meetPaths, meetSearchPaths, Bag(..), emptyQueue, emptyStack, ) where import Prelude hiding (catch) import Data.Array import GHC.Arr (unsafeIndex) import GHC.Handle import Data.Tree import Data.IORef import Data.Dynamic import qualified Data.Set as Set import Numeric (readHex, readDec) import System.IO.Unsafe import NLP.WordNet.Common import NLP.WordNet.Consts import NLP.WordNet.Util import NLP.WordNet.Types import qualified NLP.WordNet.PrimTypes as T import qualified NLP.WordNet.Prims as P -- | Takes a WordNet command, initializes the environment -- and returns the results in the 'IO' monad. WordNet -- warnings are printed to stderr. runWordNet :: WN a -> IO a runWordNet = runWordNetWithOptions Nothing Nothing -- | Takes a WordNet command, initializes the environment -- and returns the results in the 'IO' monad. WordNet -- warnings are ignored. runWordNetQuiet :: WN a -> IO a runWordNetQuiet = runWordNetWithOptions Nothing (Just (\_ _ -> return ())) -- | Takes a FilePath to the directory holding WordNet and -- a function to do with warnings and a WordNet command, initializes -- the environment and returns the results in the 'IO' monad. runWordNetWithOptions :: Maybe FilePath -> -- word net data directory Maybe (String -> Exception -> IO ()) -> -- warning function (by default, warnings go to stderr) WN a -> -- what to run IO a runWordNetWithOptions dd warn wn = do wne <- P.initializeWordNetWithOptions dd warn let a = let ?wne = wne in wn -- P.closeWordNet wne return a -- | Gives you a 'WordNetEnv' which can be passed to 'runs' or used -- as the implicit parameter to the other WordNet functions. initializeWordNet :: IO WordNetEnv initializeWordNet = P.initializeWordNet -- | Takes a FilePath to the directory holding WordNet and -- a function to do with warnings, initializes -- the environment and returns a 'WordNetEnv' as in 'initializeWordNet'. initializeWordNetWithOptions :: Maybe FilePath -> Maybe (String -> Exception -> IO ()) -> IO WordNetEnv initializeWordNetWithOptions = P.initializeWordNetWithOptions -- | Closes all the handles associated with the 'WordNetEnv'. Since -- the functions provided in the "NLP.WordNet.WordNet" module -- are /lazy/, you shouldn't do this until you're really done. -- Or perhaps not at all (GC will eventually kick in). closeWordNet :: WordNetEnv -> IO () closeWordNet = P.closeWordNet -- | This simply takes a 'WordNetEnv' and provides it as the -- implicit parameter to the WordNet command. runs :: WordNetEnv -> WN a -> a runs wne x = let ?wne = wne in x -- | This takes a word and returns an 'Overview' of all its senses -- for all parts of speech. getOverview :: WN (Word -> Overview) getOverview word = unsafePerformIO $ do idxN <- unsafeInterleaveIO $ getOverview' Noun idxV <- unsafeInterleaveIO $ getOverview' Verb idxA <- unsafeInterleaveIO $ getOverview' Adj idxR <- unsafeInterleaveIO $ getOverview' Adv return (T.Overview idxN idxV idxA idxR) where getOverview' pos = do strM <- P.getIndexString ?wne word pos case strM of Nothing -> return Nothing Just s -> unsafeInterleaveIO $ P.indexLookup ?wne word pos -- | This takes an 'Overview' (see 'getOverview'), a 'POS' and a 'SenseType' and returns -- a list of search results. If 'SenseType' is 'AllSenses', there will be one -- 'SearchResult' in the results for each valid sense. If 'SenseType' is -- a single sense number, there will be at most one element in the result list. searchByOverview :: WN (Overview -> POS -> SenseType -> [SearchResult]) searchByOverview overview pos sense = unsafePerformIO $ case (case pos of { Noun -> T.nounIndex ; Verb -> T.verbIndex ; Adj -> T.adjIndex ; Adv -> T.advIndex }) overview of Nothing -> return [] Just idx -> do let numSenses = T.indexSenseCount idx skL <- mapMaybe id `liftM` unsafeInterleaveIO ( mapM (\sense -> do skey <- P.indexToSenseKey ?wne idx sense return (liftM ((,) sense) skey) ) (sensesOf numSenses sense) ) r <- unsafeInterleaveIO $ mapM (\ (snum, skey) -> unsafeInterleaveIO (P.getSynsetForSense ?wne skey) >>= \v -> case v of Nothing -> return Nothing Just ss -> return $ Just (T.SearchResult (Just skey) (Just overview) (Just idx) (Just (SenseNumber snum)) ss) ) skL return (mapMaybe id r) -- | This takes a 'Word', a 'POS' and a 'SenseType' and returns -- the equivalent of first running 'getOverview' and then 'searchByOverview'. search :: WN (Word -> POS -> SenseType -> [SearchResult]) search word pos sense = searchByOverview (getOverview word) pos sense -- | This takes a 'Key' (see 'srToKey' and 'srFormKeys') and looks it -- up in the databse. lookupKey :: WN (Key -> SearchResult) lookupKey (T.Key (o,p)) = unsafePerformIO $ do ss <- unsafeInterleaveIO $ P.readSynset ?wne p o "" return $ T.SearchResult Nothing Nothing Nothing Nothing ss -- | This takes a 'Form' and a 'SearchResult' and returns all -- 'SearchResult' related to the given one by the given 'Form'. -- -- For example: -- -- > relatedBy Antonym (head (search "happy" Adj 1)) -- > [] -- > -- > relatedBy Hypernym (head (search "dog" Noun 1)) -- > [] relatedBy :: WN (Form -> SearchResult -> [SearchResult]) relatedBy form sr = map lookupKey $ srFormKeys sr form -- | This is a utility function to build lazy trees from a function and a root. closure :: (a -> [a]) -> a -> Tree a closure f x = Node x (map (closure f) $ f x) -- | This enables 'Form'-based trees to be built. -- -- For example: -- -- > take 5 $ flatten $ closureOn Antonym (head (search "happy" Adj AllSenses))) -- > [,,,,] -- -- > closureOn Hypernym (head (search "dog" Noun 1))) -- > - --- --- \\ -- > --- --- \\ -- > --- --- --- brute creature fauna> --- --- \\ -- > --- --- closureOn :: WN (Form -> SearchResult -> Tree SearchResult) closureOn form = closure (relatedBy form) -- | A simple bag class for our 'meet' implementation. class Bag b a where emptyBag :: b a addToBag :: b a -> a -> b a addListToBag :: b a -> [a] -> b a isEmptyBag :: b a -> Bool splitBag :: b a -> (a, b a) addListToBag = foldr (flip addToBag) instance Bag [] a where emptyBag = [] addToBag = flip (:) isEmptyBag = null splitBag (x:xs) = (x, xs) -- | A very slow queue based on lists. newtype Queue a = Queue [a] deriving (Show) instance Bag Queue a where emptyBag = Queue [] addToBag (Queue l) a = Queue (l++[a]) isEmptyBag (Queue l) = null l splitBag (Queue (x:xs)) = (x, Queue xs) addListToBag (Queue l) l' = Queue (l ++ l') -- | An empty stack. emptyStack :: [a] emptyStack = [] -- | An empty queue. emptyQueue :: Queue a emptyQueue = Queue [] -- | This function takes an empty bag (in particular, this is to specify -- what type of search to perform), and the results of two search. -- It returns (maybe) the lowest point at which the two terms -- meet in the WordNet hierarchy. -- -- For example: -- -- > meet emptyQueue (head $ search "cat" Noun 1) (head $ search "dog" Noun 1) -- > Just -- -- > meet emptyQueue (head $ search "run" Verb 1) (head $ search "walk" Verb 1) -- > Just meet :: Bag b (Tree SearchResult) => WN (b (Tree SearchResult) -> SearchResult -> SearchResult -> Maybe SearchResult) meet emptyBg sr1 sr2 = srch Set.empty Set.empty (addToBag emptyBg t1) (addToBag emptyBg t2) where t1 = closureOn Hypernym sr1 t2 = closureOn Hypernym sr2 srch v1 v2 bag1 bag2 | isEmptyBag bag1 && isEmptyBag bag2 = Nothing | isEmptyBag bag1 = srch v2 v1 bag2 bag1 | otherwise = let (Node sr chl, bag1') = splitBag bag1 in if v2 `containsResult` sr then Just sr else srch v2 (addResult v1 sr) bag2 (addListToBag bag1' chl) -- flip the order :) containsResult v sr = srWords sr AllSenses `Set.member` v addResult v sr = Set.insert (srWords sr AllSenses) v -- | This function takes an empty bag (see 'meet'), and the results of two searches. -- It returns (maybe) the lowest point at which the two terms -- meet in the WordNet hierarchy, as well as the paths leading from each -- term to this common term. -- -- For example: -- -- > meetPaths emptyQueue (head $ search "cat" Noun 1) (head $ search "dog" Noun 1) -- > Just ([,],,[,]) -- -- > meetPaths emptyQueue (head $ search "run" Verb 1) (head $ search "walk" Verb 1) -- > Just ([,],,[]) -- -- This is marginally less efficient than just using 'meet', since it uses -- linear-time lookup for the visited sets, whereas 'meet' uses log-time -- lookup. meetPaths :: Bag b (Tree SearchResult) => WN ( b (Tree SearchResult) -> -- bag implementation SearchResult -> -- word 1 SearchResult -> -- word 2 Maybe ([SearchResult], SearchResult, [SearchResult])) -- word 1 -> common, -- common -- common -> word 2 meetPaths emptyBg sr1 sr2 = meetSearchPaths emptyBg t1 t2 where t1 = closureOn Hypernym sr1 t2 = closureOn Hypernym sr2 meetSearchPaths emptyBg t1 t2 = let srch b v1 v2 bag1 bag2 | isEmptyBag bag1 && isEmptyBag bag2 = Nothing | isEmptyBag bag1 = srch (not b) v2 v1 bag2 bag1 | otherwise = let (Node sr chl, bag1') = splitBag bag1 sl = srWords sr AllSenses in if v2 `containsResult` sl then Just $ if b then (reverse v1, sr, drop 1 $ dropWhile ((/=sl) . flip srWords AllSenses) v2) else (reverse $ drop 1 $ dropWhile ((/=sl) . flip srWords AllSenses) v2, sr, v1) else srch (not b) v2 (addResult v1 sr) bag2 (addListToBag bag1' chl) -- flip the order :) in srch True [] [] (addToBag emptyBg t1) (addToBag emptyBg t2) where containsResult v sl = sl `elem` map (flip srWords AllSenses) v addResult v sr = sr:v personTree = runWordNetQuiet (closureOn Hypernym (head $ search "person" Noun AllSenses)) organizationTree = runWordNetQuiet (closureOn Hypernym (head $ search "organization" Noun AllSenses))