module Bindings.Stemmer
       ( Encoding(..)
       , Language(..)
       , StemConfig(..)
       , Stemmer(..)
       , init_stemmer
       , new_stemmer
       , stemword
       , delete_stemmer
       , unsafeStemword ) where

import Bindings.Stemmer.Raw
import Foreign.C.String
import System.IO.Unsafe (unsafePerformIO)
import Data.Char (toLower)
import Foreign.Ptr

-- | 'Encoding' Type
--
--   === NOTE:
--
--   * 'ISO_8859_2' for 'Romanian' | 'Hungarian' only.
--   * 'KOI8_R' for 'Russian' only.
data Encoding = UTF_8
              | ISO_8859_1
              | ISO_8859_2
              | KOI8_R
              deriving Show

-- | 'Language' Type
data Language = Danish
              | Dutch
              | English
              | Finnish
              | French
              | German
              | Hungarian
              | Italian
              | Norwegian
              | Porter
              | Portuguese
              | Romanian
              | Russian
              | Spanish
              | Swedish
              | Turkish
              deriving Show

-- | 'StemConfig' Type
data StemConfig = StemConfig { language :: Language
                             , encoding :: Encoding }
                deriving Show

-- | 'Stemmer' Type
--
--   Wrapper type for Ptr C'sb_stemmer.
type Stemmer = Ptr C'sb_stemmer

-- | create 'StemConfig' type
--
--   * algorithm: 'Language'
--
--   * encoding: 'Encoding'
init_stemmer :: Language -> Encoding -> IO StemConfig
init_stemmer lang enc = do
  return StemConfig { language = lang
                    , encoding = enc  }

-- | create stemmer instance
new_stemmer :: StemConfig -> IO Stemmer
new_stemmer StemConfig{..} = do
  cword_enc <- encodingCString encoding
  algorithm <- languageCString language
  stemmer <- c'sb_stemmer_new algorithm cword_enc
  return stemmer

-- | stem word with 'Stemmer'
stemword :: Stemmer -> String -> IO String
stemword stemmer word = do
  cword <- newCString word
  strPtr <- c'sb_stemmer_stem stemmer cword (fromIntegral $ length word)
  str_length <- c'sb_stemmer_length stemmer
  peekCStringLen (strPtr, fromIntegral str_length)

-- | delete stemmer instance
delete_stemmer :: Stemmer -> IO ()
delete_stemmer = c'sb_stemmer_delete

-- | stem words with unsafePerformIO
unsafeStemword :: Stemmer -> String -> String
unsafeStemword stemmer word = unsafePerformIO $ stemword stemmer word

-- | 'Encoding' Type Util function
encodingCString :: Encoding -> IO CString
encodingCString = newCString . show

-- | 'Language' Type Util function
languageCString :: Language -> IO CString
languageCString = newCString . go . show
    where go (x:xs) = (toLower x) : xs