module Bindings.Stemmer
( Encoding(..)
, Language(..)
, StemConfig(..)
, Stemmer(..)
, init_stemmer
, new_stemmer
, stemword
, delete_stemmer
, unsafeStemword ) where
import Bindings.Stemmer.Raw
import Foreign.C.String
import System.IO.Unsafe (unsafePerformIO)
import Data.Char (toLower)
import Foreign.Ptr
data Encoding = UTF_8
| ISO_8859_1
| ISO_8859_2
| KOI8_R
deriving Show
data Language = Danish
| Dutch
| English
| Finnish
| French
| German
| Hungarian
| Italian
| Norwegian
| Porter
| Portuguese
| Romanian
| Russian
| Spanish
| Swedish
| Turkish
deriving Show
data StemConfig = StemConfig { language :: Language
, encoding :: Encoding }
deriving Show
type Stemmer = Ptr C'sb_stemmer
init_stemmer :: Language -> Encoding -> IO StemConfig
init_stemmer lang enc = do
return StemConfig { language = lang
, encoding = enc }
new_stemmer :: StemConfig -> IO Stemmer
new_stemmer StemConfig{..} = do
cword_enc <- encodingCString encoding
algorithm <- languageCString language
stemmer <- c'sb_stemmer_new algorithm cword_enc
return stemmer
stemword :: Stemmer -> String -> IO String
stemword stemmer word = do
cword <- newCString word
strPtr <- c'sb_stemmer_stem stemmer cword (fromIntegral $ length word)
str_length <- c'sb_stemmer_length stemmer
peekCStringLen (strPtr, fromIntegral str_length)
delete_stemmer :: Stemmer -> IO ()
delete_stemmer = c'sb_stemmer_delete
unsafeStemword :: Stemmer -> String -> String
unsafeStemword stemmer word = unsafePerformIO $ stemword stemmer word
encodingCString :: Encoding -> IO CString
encodingCString = newCString . show
languageCString :: Language -> IO CString
languageCString = newCString . go . show
where go (x:xs) = (toLower x) : xs