module NLP.Concraft
(
Concraft (..)
, saveModel
, loadModel
, tag
, marginals
, train
, reAnaTrain
, prune
) where
import System.IO (hClose)
import Control.Applicative ((<$>), (<*>))
import Control.Monad (when)
import qualified Data.Set as S
import Data.Binary (Binary, put, get)
import qualified Data.Binary as Binary
import Data.Aeson
import qualified System.IO.Temp as Temp
import qualified Data.ByteString.Lazy as BL
import qualified Codec.Compression.GZip as GZip
import NLP.Concraft.Morphosyntax
import NLP.Concraft.Analysis
import NLP.Concraft.Format.Temp
import qualified Data.Tagset.Positional as P
import qualified NLP.Concraft.Guess as G
import qualified NLP.Concraft.Disamb as D
modelVersion :: String
modelVersion = "0.7"
data Concraft = Concraft
{ tagset :: P.Tagset
, guessNum :: Int
, guesser :: G.Guesser P.Tag
, disamb :: D.Disamb }
instance Binary Concraft where
put Concraft{..} = do
put modelVersion
put tagset
put guessNum
put guesser
put disamb
get = do
comp <- get
when (comp /= modelVersion) $ error $
"Incompatible model version: " ++ comp ++
", expected: " ++ modelVersion
Concraft <$> get <*> get <*> get <*> get
saveModel :: FilePath -> Concraft -> IO ()
saveModel path = BL.writeFile path . GZip.compress . Binary.encode
loadModel :: FilePath -> IO Concraft
loadModel path = do
x <- Binary.decode . GZip.decompress <$> BL.readFile path
x `seq` return x
tag :: Word w => Concraft -> Sent w P.Tag -> [(S.Set P.Tag, P.Tag)]
tag Concraft{..} sent =
zip (map S.fromList gss) tgs
where
gss = G.guess guessNum guesser sent
tgs = D.disamb disamb (G.include gss sent)
marginals :: Word w => Concraft -> Sent w P.Tag -> [WMap P.Tag]
marginals Concraft{..} sent =
let gss = G.guess guessNum guesser sent
in D.marginals disamb (G.include gss sent)
reAnaTrain
:: (Word w, FromJSON w, ToJSON w)
=> P.Tagset
-> Analyse w P.Tag
-> Int
-> G.TrainConf
-> D.TrainConf
-> IO [SentO w P.Tag]
-> IO [SentO w P.Tag]
-> IO Concraft
reAnaTrain tagset ana guessNum guessConf disambConf train0'IO eval0'IO = do
Temp.withTempDirectory "." ".reana" $ \tmpDir -> do
let temp = withTemp tagset tmpDir
putStrLn "\n===== Reanalysis ====="
trainR <- reAnaPar tagset ana =<< train0'IO
evalR <- reAnaPar tagset ana =<< eval0'IO
temp "train" trainR $ \trainR'IO -> do
temp "eval" evalR $ \evalR'IO -> do
train tagset guessNum guessConf disambConf trainR'IO evalR'IO
train
:: (Word w, FromJSON w, ToJSON w)
=> P.Tagset
-> Int
-> G.TrainConf
-> D.TrainConf
-> IO [Sent w P.Tag]
-> IO [Sent w P.Tag]
-> IO Concraft
train tagset guessNum guessConf disambConf trainR'IO evalR'IO = do
Temp.withTempDirectory "." ".guessed" $ \tmpDir -> do
let temp = withTemp tagset tmpDir
putStrLn "\n===== Train guessing model ====="
guesser <- G.train guessConf trainR'IO evalR'IO
trainG <- map (G.guessSent guessNum guesser) <$> trainR'IO
evalG <- map (G.guessSent guessNum guesser) <$> evalR'IO
temp "train" trainG $ \trainG'IO -> do
temp "eval" evalG $ \evalG'IO -> do
putStrLn "\n===== Train disambiguation model ====="
disamb <- D.train disambConf trainG'IO evalG'IO
return $ Concraft tagset guessNum guesser disamb
prune :: Double -> Concraft -> Concraft
prune x concraft =
let disamb' = D.prune x (disamb concraft)
in concraft { disamb = disamb' }
withTemp
:: (FromJSON w, ToJSON w)
=> P.Tagset
-> FilePath
-> String
-> [Sent w P.Tag]
-> (IO [Sent w P.Tag] -> IO a)
-> IO a
withTemp _ _ _ [] handler = handler (return [])
withTemp tagset dir tmpl xs handler =
Temp.withTempFile dir tmpl $ \tmpPath tmpHandle -> do
hClose tmpHandle
let txtSent = mapSent $ P.showTag tagset
tagSent = mapSent $ P.parseTag tagset
writePar tmpPath $ map txtSent xs
handler (map tagSent <$> readPar tmpPath)