{-# OPTIONS  -XEmptyDataDecls #-}
{-# LANGUAGE ForeignFunctionInterface #-}

-- | Haskell bindings for the Snowball stemming library.
-- This module contains all the low-level functions and are more or less direct
-- translations of the foreign function calls.  The 'stem' function expects
-- strings to use UTF-8 encoding.
module NLP.Stemmer.C (
    -- * Types
      Stemmer(..)
    -- * Low level functions
    , stem
    ) where

import Data.Char        (toLower)
import Foreign.C        (CString(..), CInt(..), peekCStringLen, newCString)
import Foreign.Ptr      (Ptr)

data StemmerStruct

-- | Pointer to a stemmer instance
type StemmerP  = Ptr StemmerStruct

-- | Available algorithms.  For English, 'English' is recommended over 'Porter'.
data Stemmer = Danish
             | Dutch
             | English
             | Finnish
             | French
             | German
             | Hungarian
             | Italian
             | Norwegian
             | Portuguese
             | Romanian
             | Russian
             | Spanish
             | Swedish
             | Turkish
             | Porter
             deriving Show

foreign import ccall "libstemmer.h sb_stemmer_new"     sb_stemmer_new    :: CString -> CString -> IO StemmerP
foreign import ccall "libstemmer.h sb_stemmer_delete"  sb_stemmer_delete :: StemmerP -> IO ()
foreign import ccall "libstemmer.h sb_stemmer_stem"    sb_stemmer_stem   :: StemmerP -> CString -> CInt -> IO (CString)
foreign import ccall "libstemmer.h sb_stemmer_length"  sb_stemmer_length :: StemmerP -> IO CInt

-- | Stem a word
stem :: Stemmer -> String -> IO String
stem algorithm word = do
    algorithm' <- stemmerCString algorithm
    encoding   <- newCString "UTF_8"
    stemmer    <- sb_stemmer_new algorithm' encoding
    word'      <- newCString word
    strPtr     <- sb_stemmer_stem   stemmer word' (fromIntegral $ length word)
    strLen     <- sb_stemmer_length stemmer
    result     <- peekCStringLen (strPtr, fromIntegral strLen)
    sb_stemmer_delete stemmer
    return result

stemmerCString :: Stemmer -> IO CString
stemmerCString = newCString . firstLower . show
    where firstLower (first:rest) = (toLower first) : rest