module NLP.Concraft
(
Concraft (..)
, saveModel
, loadModel
, tag
, train
) where
import System.IO (hClose)
import Control.Applicative ((<$>), (<*>))
import Control.Monad (when)
import Data.Binary (Binary, put, get)
import qualified Data.Binary as Binary
import Data.Aeson
import Data.Maybe (fromJust)
import qualified System.IO.Temp as Temp
import qualified Data.ByteString.Lazy as BL
import qualified Codec.Compression.GZip as GZip
import NLP.Concraft.Morphosyntax
import NLP.Concraft.Analysis
import NLP.Concraft.Format.Temp
import qualified Data.Tagset.Positional as P
import qualified NLP.Concraft.Guess as G
import qualified NLP.Concraft.Disamb as D
modelVersion :: String
modelVersion = "0.5"
data Concraft = Concraft
{ tagset :: P.Tagset
, guessNum :: Int
, guesser :: G.Guesser P.Tag
, disamb :: D.Disamb }
instance Binary Concraft where
put Concraft{..} = do
put modelVersion
put tagset
put guessNum
put guesser
put disamb
get = do
comp <- get
when (comp /= modelVersion) $ error $
"Incompatible model version: " ++ comp ++
", expected: " ++ modelVersion
Concraft <$> get <*> get <*> get <*> get
saveModel :: FilePath -> Concraft -> IO ()
saveModel path = BL.writeFile path . GZip.compress . Binary.encode
loadModel :: FilePath -> IO Concraft
loadModel path = do
x <- Binary.decode . GZip.decompress <$> BL.readFile path
x `seq` return x
tag :: Word w => Concraft -> Sent w P.Tag -> [P.Tag]
tag Concraft{..} = D.disamb disamb . G.guessSent guessNum guesser
train
:: (Word w, FromJSON w, ToJSON w)
=> P.Tagset
-> Analyse w P.Tag
-> Int
-> G.TrainConf
-> D.TrainConf
-> [SentO w P.Tag]
-> Maybe [SentO w P.Tag]
-> IO Concraft
train tagset ana guessNum guessConf disambConf train0 eval0 = do
putStrLn "\n===== Reanalysis ====="
trainR <- reAnaPar tagset ana train0
evalR <- case eval0 of
Just ev -> Just <$> reAnaPar tagset ana ev
Nothing -> return Nothing
withTemp tagset "train" trainR $ \trainR'IO -> do
withTemp' tagset "eval" evalR $ \evalR'IO -> do
putStrLn "\n===== Train guessing model ====="
guesser <- do
tr <- trainR'IO
ev <- evalR'IO
G.train guessConf tr ev
trainG <- map (G.guessSent guessNum guesser) <$> trainR'IO
evalG <- fmap (map (G.guessSent guessNum guesser)) <$> evalR'IO
putStrLn "\n===== Train disambiguation model ====="
disamb <- D.train disambConf trainG evalG
return $ Concraft tagset guessNum guesser disamb
withTemp
:: (FromJSON w, ToJSON w)
=> P.Tagset
-> String
-> [Sent w P.Tag]
-> (IO [Sent w P.Tag] -> IO a)
-> IO a
withTemp tagset tmpl xs handler =
withTemp' tagset tmpl (Just xs) (handler . fmap fromJust)
withTemp'
:: (FromJSON w, ToJSON w)
=> P.Tagset
-> String
-> Maybe [Sent w P.Tag]
-> (IO (Maybe [Sent w P.Tag]) -> IO a)
-> IO a
withTemp' tagset tmpl (Just xs) handler =
Temp.withTempFile "." tmpl $ \tmpPath tmpHandle -> do
hClose tmpHandle
let txtSent = mapSent $ P.showTag tagset
tagSent = mapSent $ P.parseTag tagset
writePar tmpPath $ map txtSent xs
handler (Just . map tagSent <$> readPar tmpPath)
withTemp' _ _ Nothing handler = handler (return Nothing)