{-# LANGUAGE GeneralizedNewtypeDeriving #-} 
{-# LANGUAGE ScopedTypeVariables #-} 

-- | The module provides functions for working with the binary
-- representation of the historical dictionary of Polish.
-- The dictionary is stored on a disk but we assume that it
-- doesn't change throughtout the program session so that we
-- can provide the pure interface for dictionary reading
-- and searching.

module NLP.Polh.Binary
( savePolh
, loadPolh

, PolhM
, runPolh
, index
, withKey
, lookup
) where

import Prelude hiding (lookup)
import Control.Exception (try, SomeException)
import Control.Monad (when, guard)
import Control.Applicative ((<$>))
import Control.Monad.Reader (ReaderT (..), ask, lift)
import Control.Monad.Trans.Maybe (MaybeT (..))
import System.IO.Unsafe (unsafePerformIO, unsafeInterleaveIO)
import System.FilePath ((</>))
import System.Directory ( getDirectoryContents, createDirectoryIfMissing
                        , createDirectory, doesDirectoryExist )
import Data.Maybe (catMaybes)
import Data.Monoid (mappend, mconcat)
import Data.Binary (encodeFile, decodeFile)
import qualified Data.Map as M
import qualified Data.Set as S
import qualified Data.Text as T

import NLP.Polh.Types
import qualified NLP.Polh.Util as Util

-- | Path to entries in the binary dictionary.
entryDir :: String
entryDir = "entries"

-- | Path to key map in the binary dictionary.
formMapFile :: String
formMapFile = "forms.bin"

-- | A dictionary key (lexical entry ID).
type Key = T.Text

-- | Load the directory contents.
loadContents :: FilePath -> IO [FilePath]
loadContents path = do
    xs <- getDirectoryContents path
    return [x | x <- xs, x /= ".", x /= ".."]

-- | Check if the directory is empty.
emptyDirectory :: FilePath -> IO Bool
emptyDirectory path = null <$> loadContents path

-- | Save the lexical entry on the disk.
saveLexEntry :: FilePath -> LexEntry -> IO ()
saveLexEntry path x = 
    let lexPath = T.unpack . lexId
    in  encodeFile (path </> lexPath x) x

-- | Save the polh dictionary in the empty directory.
savePolh :: FilePath -> Polh -> IO ()
savePolh path xs = do
    createDirectoryIfMissing True path
    isEmpty <- emptyDirectory path
    when (not isEmpty) $ do
        error $ "savePolh: directory " ++ path ++ " is not empty"
    let lexPath = path </> entryDir
    createDirectory lexPath
    formMap' <- mconcat <$> mapM (saveLex lexPath) xs
    encodeFile (path </> formMapFile) formMap'
  where
    saveLex lexPath x = do
        saveLexEntry lexPath x
        return $ lexMap x
    lexMap lexEntry = M.fromListWith mappend
        [ (x, S.singleton key)
        | x <- Util.allForms lexEntry ]
      where
        key = lexId lexEntry

maybeErr :: IO a -> IO (Maybe a)
maybeErr io = do
    r <- try io
    case r of
        Left (_e :: SomeException)  -> return Nothing
        Right x                     -> return (Just x)

maybeT :: Monad m => Maybe a -> MaybeT m a
maybeT = MaybeT . return
{-# INLINE maybeT #-}

maybeErrT :: IO a -> MaybeT IO a
maybeErrT io = do
    r <- lift (maybeErr io)
    maybeT r

-- | Load lexical entry from disk by its key.
loadLexEntry :: FilePath -> Key -> IO (Maybe LexEntry)
loadLexEntry path key = do
    maybeErr $ decodeFile (path </> T.unpack key)

-- | Binary dictionary data kept in program memory.
data MemData = MemData
    { polhPath  :: FilePath
    , formMap   :: M.Map T.Text (S.Set Key) }

-- | A PolhM monad is a wrapper over the Polish historical
-- dictionary in a binary form.
newtype PolhM a = PolhM (ReaderT MemData IO a)
    deriving (Functor, Monad)

-- | Path to directory with entries.
entryPath :: MemData -> FilePath
entryPath = (</> entryDir) . polhPath

-- | List of dictionary keys.
index :: PolhM [Key]
index = PolhM $ do
    path <- entryPath <$> ask
    map T.pack <$> lift (loadContents path)

-- | Extract lexical entry with the given ID.
withKey :: Key -> PolhM (Maybe LexEntry)
withKey key = PolhM $ do
    path <- entryPath <$> ask
    lift . unsafeInterleaveIO $ loadLexEntry path key

-- | Lookup the form in the dictionary.
lookup :: T.Text -> PolhM [LexEntry]
lookup x = do
    fm <- PolhM $ formMap <$> ask
    keys <- return $ case M.lookup x fm of
        Nothing -> []
        Just xs -> S.toList xs
    catMaybes <$> mapM withKey keys

-- | Execute the Polh monad against the binary Polh representation
-- located in the given directory.  Return Nothing if the directory
-- doesnt' exist or if it doesn't look like a Polh dictionary.
-- We assume that the binary representation doesn't change so we
-- can provide the pure interface.
runPolh :: FilePath -> PolhM a -> Maybe a
runPolh path (PolhM m) = unsafePerformIO . runMaybeT $ do
    formMap' <- maybeErrT $ decodeFile (path </> formMapFile)
    doesExist <- lift $ doesDirectoryExist (path </> entryDir)
    guard doesExist 
    lift $ runReaderT m (MemData path formMap')

-- | Load dictionary from a disk in a lazy manner.  Return 'Nothing'
-- if the path doesn't correspond to a binary representation of the
-- dictionary. 
loadPolh :: FilePath -> Maybe Polh
loadPolh path = runPolh path $ do
    keys <- index
    catMaybes <$> mapM withKey keys

-- We don't provide update functionality since we want only the pure
-- iterface to be visible.  It greatly simplifies the implementation.
--
-- updateLexEntry :: FilePath -> Key -> (LexEntry -> LexEntry) -> IO LexEntry
-- updateLexEntry path lexKey f = do
--     lexEntry <- loadLexEntry path lexKey
--     let lexEntry' = f lexEntry
--     saveLexEntry path lexEntry'
--     return lexEntry'
-- 
-- updateLexEntry_ :: FilePath -> Key -> (LexEntry -> LexEntry) -> IO ()
-- updateLexEntry_ path lexKey f = do
--     lexEntry <- loadLexEntry path lexKey
--     saveLexEntry path (f lexEntry)