module Main (main) where import qualified NLP.Sequor as L import NLP.Sequor.CoNLL import qualified Helper.Text as Text import qualified Helper.ListZipper as Z import qualified Data.Binary as Binary import qualified Data.ByteString.Lazy as ByteString import System.Environment (getArgs) import System.IO import Helper.Commands ( CommandSpec (..),defaultMain , usage , Command , OptDescr(Option), ArgDescr(ReqArg,NoArg)) import NLP.Sequor.Config(Flags(..)) import Text.Printf commands :: [(String, CommandSpec Flags)] commands = [ ("train", CommandSpec train "train model" [ Option [] ["rate"] (ReqArg (\a o -> o { flagRate = read a }) "NUM (0.01)") "learning rate" , Option [] ["beam"] (ReqArg (\a o -> o { flagBeam = read a }) "INT (10)") "beam size" , Option [] ["iter"] (ReqArg (\a o -> o { flagIter = read a }) "INT (10)") "number of iterations" , Option [] ["min-count"] (ReqArg (\a o -> o { flagMinFeatCount = read a }) "INT (100)") "minimum feature frequency for label dictionary" , Option [] ["heldout"] (ReqArg (\a o -> o { flagHeldout = Just a }) "FILE") "path to heldout data" , Option [] ["hash"] (NoArg (\o -> o { flagHash = True })) "use hashing instead of feature dictionary" , Option [] ["hash-sample"] (ReqArg (\a o -> o { flagHashSample = read a }) "INT (1000)") "sample size to estimate number of features when hashing" , Option [] ["hash-max-size"] (ReqArg (\a o -> o { flagHashMaxSize = Just $ read a }) "INT") "maximum size of parameter vector when hashing" , Option [] ["stop-win-size"] (ReqArg (\a o -> o { flagStopWinSize = read a }) "INT (5)") "size of window of iterations when checking convergence" , Option [] ["stop-threshold"] (ReqArg (\a o -> o { flagStopThreshold = read a }) "FLOAT (0.05)") "threshold of error change when checking convergence " ] ["TEMPLATE-FILE","TRAIN-FILE","MODEL-FILE"]) , ("predict", CommandSpec predict "predict using model" [] ["MODEL-FILE"]) , ("version", CommandSpec version "print version" [] []) , ("help" , CommandSpec help "print usage information" [] []) ] train :: Command Flags train flags [templatef,trainf,outf] = do template <- L.parseTemplate `fmap` Text.readFile templatef traindat <- (map toLabeled . parse) `fmap` Text.readFile trainf testdat <- case flagHeldout flags of Nothing -> return [] Just testf -> (map toLabeled . parse) `fmap` Text.readFile testf let (m, info) = L.train flags template traindat testdat hSetBuffering stderr LineBuffering hPutStr stderr . formatTrace $ info ByteString.writeFile outf . Binary.encode $ m predict :: Command Flags predict flags [modelf] = do m <- Binary.decode `fmap` ByteString.readFile modelf testdat <- parse `fmap` Text.getContents Text.putStr . Text.unlines . map Text.unlines . L.predict m $ testdat -- | Format sequence of error rates on train and development data formatTrace :: L.Trace -> String formatTrace scores = unlines $ [ printf "%10s %10s %10s %10s" "Iter" "Err_train" "Err_heldout" "Rel_change"] ++ [ printf "%10d %10.5f %10.5f %10.5f" i err_train err_dev ch | (i,(err_train, err_dev, ch)) <- zip [(1::Int) ..] scores ] version :: Command Flags version _ _ = putStrLn "sequor-0.2.2" help :: Command Flags help _ _ = usage commands msg [] main :: IO () main = defaultMain L.defaultFlags commands msg msg = "Usage: sequor command [OPTION...] [ARG...]"