module NLP.Stemmer.C (
Algorithm(..)
, Stemmer
, new
, delete
, stem
, withStemmer
) where
import Data.Char (toLower)
import Foreign.C (CString, CInt, peekCStringLen, newCString)
import Foreign.Ptr (Ptr)
data StemmerStruct
type Stemmer = Ptr StemmerStruct
data Algorithm = Danish
| Dutch
| English
| Finnish
| French
| German
| Hungarian
| Italian
| Norwegian
| Portuguese
| Romanian
| Russian
| Spanish
| Swedish
| Turkish
| Porter
deriving Show
foreign import ccall "libstemmer.h sb_stemmer_new" sb_stemmer_new :: CString -> CString -> IO Stemmer
foreign import ccall "libstemmer.h sb_stemmer_delete" sb_stemmer_delete :: Stemmer -> IO ()
foreign import ccall "libstemmer.h sb_stemmer_stem" sb_stemmer_stem :: Stemmer -> CString -> CInt -> IO (CString)
foreign import ccall "libstemmer.h sb_stemmer_length" sb_stemmer_length :: Stemmer -> IO CInt
new :: Algorithm -> IO Stemmer
new algorithm = do
algorithm' <- algorithmCString algorithm
encoding <- utf8
sb_stemmer_new algorithm' encoding
delete :: Stemmer -> IO ()
delete = sb_stemmer_delete
stem :: Stemmer -> String -> IO String
stem stemmer word = do
word' <- newCString word
strPtr <- sb_stemmer_stem stemmer word' (fromIntegral $ length word)
strLen <- sb_stemmer_length stemmer
peekCStringLen (strPtr, fromIntegral strLen)
withStemmer :: Algorithm -> (Stemmer -> IO a) -> IO a
withStemmer algorithm action = do
stemmer <- new algorithm
result <- action stemmer
delete stemmer
return result
utf8 :: IO CString
utf8 = newCString "UTF_8"
algorithmCString :: Algorithm -> IO CString
algorithmCString = newCString . firstLower . show
where firstLower (first:rest) = (toLower first) : rest