module NLP.Concraft
( GuessConf (..)
, GuessData (..)
, DisambConf (..)
, DisambWith (..)
, DisambTag
, DisambTrain
, disamb
, disambDoc
, trainOn
) where
import System.IO (hClose)
import Data.Foldable (Foldable)
import qualified Data.Text.Lazy as L
import qualified Data.Text.Lazy.IO as L
import qualified Numeric.SGD as SGD
import qualified System.IO.Temp as Temp
import NLP.Concraft.Schema
import qualified NLP.Concraft.Morphosyntax as Mx
import qualified NLP.Concraft.Format as F
import qualified NLP.Concraft.Guess as G
import qualified NLP.Concraft.Disamb as D
data GuessConf r = GuessConf
{ guessNum :: Int
, guessSchema :: Schema r () }
data GuessData r = GuessData
{ guessConf :: GuessConf r
, guesser :: G.Guesser r }
data DisambConf r t = DisambConf
{ split :: D.Split r t
, disambSchema :: Schema t () }
data DisambWith r t a = DisambWith
{ disambConf :: DisambConf r t
, disambWith :: a }
type DisambTag r t = DisambWith r t (D.TagCRF Ob t)
type DisambTrain r t c = DisambWith r t (D.TrainCRF Ob t c)
disamb
:: (Ord r, Ord t)
=> GuessData r
-> DisambTag r t
-> Mx.Sent r
-> [r]
disamb GuessData{..} DisambWith{..} sent
= D.disamb disambSchema split tagCRF
. G.include sent
. G.guess guessNum guessSchema guesser
$ sent
where
GuessConf{..} = guessConf
DisambConf{..} = disambConf
tagCRF = disambWith
disambSent
:: Ord t
=> F.Sent s w
-> GuessData F.Tag
-> DisambTag F.Tag t
-> s -> s
disambSent sentH GuessData{..} DisambWith{..}
= D.disambSent sentH disambSchema split tagCRF
. G.guessSent sentH guessNum guessSchema guesser
where
GuessConf{..} = guessConf
DisambConf{..} = disambConf
tagCRF = disambWith
disambDoc
:: (Functor f, Ord t)
=> F.Doc f s w
-> GuessData F.Tag
-> DisambTag F.Tag t
-> L.Text
-> L.Text
disambDoc F.Doc{..} guessData disambTag =
let onSent = disambSent sentHandler guessData disambTag
in showDoc . fmap onSent . parseDoc
trainOn
:: (Functor f, Foldable f, Ord t)
=> F.Doc f s w
-> GuessConf F.Tag
-> SGD.SgdArgs
-> DisambTrain F.Tag t c
-> FilePath
-> Maybe FilePath
-> IO (G.Guesser F.Tag, c)
trainOn format guessConf@GuessConf{..} sgdArgs DisambWith{..}
trainPath evalPath'Maybe = do
putStrLn "\n===== Train guessing model ====\n"
guesser <- G.trainOn format guessSchema sgdArgs
trainPath evalPath'Maybe
let guessData = GuessData guessConf guesser
let withGuesser = guessFile format guessData
withGuesser "train" (Just trainPath) $ \(Just trainPathG) ->
withGuesser "eval" evalPath'Maybe $ \evalPathG'Maybe -> do
putStrLn "\n===== Train disambiguation model ====\n"
let DisambConf{..} = disambConf
let trainCRF = disambWith
disambCRF <- D.trainOn format disambSchema split trainCRF
trainPathG evalPathG'Maybe
return (guesser, disambCRF)
guessFile
:: Functor f
=> F.Doc f s w
-> GuessData F.Tag
-> String
-> Maybe FilePath
-> (Maybe FilePath -> IO a)
-> IO a
guessFile _ _ _ Nothing handler = handler Nothing
guessFile format GuessData{..} tmpl (Just path) handler =
Temp.withTempFile "." tmpl $ \tmpPath tmpHandle -> do
inp <- L.readFile path
let GuessConf{..} = guessConf
let out = G.guessDoc format guessNum guessSchema guesser inp
hClose tmpHandle
L.writeFile tmpPath out
handler (Just tmpPath)