module Main (main) where import qualified Labeler as L import CorpusReader (corpus,corpusLabeled) import qualified Helper.Text as Text import qualified Helper.ListZipper as Z import qualified Data.Binary as Binary import qualified Data.ByteString.Lazy as ByteString import System.Environment (getArgs) import System.IO (hPutStrLn,stderr) import FeatureTemplate (parse) import Helper.Commands ( CommandSpec (..),defaultMain , usage , Command , OptDescr(Option), ArgDescr(ReqArg,NoArg)) import Config(Flags(..)) commands :: [(String, CommandSpec Flags)] commands = [ ("train", CommandSpec train "train model" [ Option [] ["rate"] (ReqArg (\a o -> o { flagRate = read a }) "NUM") "learning rate" , Option [] ["beam"] (ReqArg (\a o -> o { flagBeam = read a }) "INT") "beam size" , Option [] ["iter"] (ReqArg (\a o -> o { flagIter = read a }) "INT") "number of iterations" , Option [] ["min-count"] (ReqArg (\a o -> o { flagMinFeatCount = read a }) "INT") "minimum feature frequency for label dictionary" , Option [] ["heldout"] (ReqArg (\a o -> o { flagHeldout = Just a }) "FILE") "path to heldout data" , Option [] ["hash"] (NoArg (\o -> o { flagHash = True })) "use hashing instead of feature dictionary" , Option [] ["hash-sample"] (ReqArg (\a o -> o { flagHashSample = read a }) "INT") "sample size to estimate number of features when hashing" , Option [] ["hash-max-size"] (ReqArg (\a o -> o { flagHashMaxSize = Just $ read a }) "INT") "maximum size of parameter vector when hashing" ] ["TEMPLATE-FILE","TRAIN-FILE","MODEL-FILE"]) , ("predict", CommandSpec predict "predict using model" [] ["MODEL-FILE"]) , ("version", CommandSpec version "print version" [] []) , ("help" , CommandSpec help "print usage information" [] []) ] defaultFlags = Flags { flagRate = 0.01 , flagBeam = 10 , flagIter = 10 , flagMinFeatCount = 100 , flagHeldout = Nothing , flagHash = False , flagHashSample = 1000 , flagHashMaxSize = Nothing } train :: Command Flags train flags [templatef,trainf,outf] = do template <- parse `fmap` Text.readFile templatef traindat <- fmap corpusLabeled $ Text.readFile trainf testdat <- case flagHeldout flags of Nothing -> return [] Just testf -> fmap corpusLabeled $ Text.readFile testf let len = case fmap length . Z.focus . (\(x:_) -> x) . fst . (\(x:_) -> x) $ traindat of Just i -> i conf = L.Config { L.featureTemplate = template , L.atomTable = error "main:Config.atomTable undefined" , L.flags = flags , L.fieldNum = len } ByteString.writeFile outf . Binary.encode . L.train conf traindat $ testdat predict :: Command Flags predict flags [modelf] = do m <- fmap Binary.decode (ByteString.readFile modelf) testdat <- fmap (corpus (L.fieldNum . L.config $ m)) $ Text.getContents Text.putStr . Text.unlines . map Text.unlines . L.predict m $ testdat version :: Command Flags version _ _ = putStrLn "sequor-0.2.2" help :: Command Flags help _ _ = usage commands msg [] main :: IO () main = defaultMain defaultFlags commands msg msg = "Usage: sequor command [OPTION...] [ARG...]"