module NLP.Concraft
(
Concraft (..)
, tag
, tagSent
, tagDoc
, train
) where
import System.IO (hClose)
import Control.Applicative ((<$>), (<*>))
import Data.Foldable (Foldable)
import Data.Binary (Binary, put, get)
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.IO as L
import qualified System.IO.Temp as Temp
import qualified NLP.Concraft.Morphosyntax as Mx
import qualified NLP.Concraft.Format as F
import qualified NLP.Concraft.Guess as G
import qualified NLP.Concraft.Disamb as D
data Concraft = Concraft
{ guessNum :: Int
, guesser :: G.Guesser F.Tag
, disamb :: D.Disamb }
instance Binary Concraft where
put Concraft{..} = do
put guessNum
put guesser
put disamb
get = Concraft <$> get <*> get <*> get
tag :: Concraft -> Mx.Sent F.Tag -> [F.Tag]
tag Concraft{..} sent
= D.disamb disamb
. G.include sent
. G.guess guessNum guesser
$ sent
tagSent :: F.Sent s w -> Concraft -> s -> s
tagSent sentH Concraft{..}
= D.disambSent sentH disamb
. G.guessSent sentH guessNum guesser
tagDoc :: Functor f => F.Doc f s w -> Concraft -> L.Text -> L.Text
tagDoc F.Doc{..} concraft =
let onSent = tagSent sentHandler concraft
in showDoc . fmap onSent . parseDoc
train
:: (Functor f, Foldable f)
=> F.Doc f s w
-> Int
-> G.TrainConf
-> D.TrainConf
-> FilePath
-> Maybe FilePath
-> IO Concraft
train format guessNum guessConf disambConf trainPath evalPath'Maybe = do
putStrLn "\n===== Train guessing model ====\n"
guesser <- G.train format guessConf trainPath evalPath'Maybe
let withGuesser = guessFile format guessNum guesser
withGuesser "train" (Just trainPath) $ \(Just trainPathG) ->
withGuesser "eval" evalPath'Maybe $ \evalPathG'Maybe -> do
putStrLn "\n===== Train disambiguation model ====\n"
disamb <- D.train format disambConf trainPathG evalPathG'Maybe
return $ Concraft guessNum guesser disamb
guessFile
:: Functor f
=> F.Doc f s w
-> Int
-> G.Guesser F.Tag
-> String
-> Maybe FilePath
-> (Maybe FilePath -> IO a)
-> IO a
guessFile _ _ _ _ Nothing handler = handler Nothing
guessFile format guessNum gsr tmpl (Just path) handler =
Temp.withTempFile "." tmpl $ \tmpPath tmpHandle -> do
inp <- L.readFile path
let out = G.guessDoc format guessNum gsr inp
hClose tmpHandle
L.writeFile tmpPath out
handler (Just tmpPath)