{-# LANGUAGE RecordWildCards #-} module Numeric.SVM.LibLinear ( -- * Types FeatureIdx(..) , Label -- * Solvers -- | @LIBLINEAR@ supports a variety of different solvers targetting both -- regression and multi-class classification. , Solver -- ** Classification methods , l2Reg_logistic , l2Reg_l2LossSvc' , l2Reg_l2LossSvc , l2Reg_l1LossSvc' , crammerSingerSvc -- ** Regression methods , l2Reg_l2LossSvr , l2Reg_l2LossSvr' , l2Reg_l1LossSvr' -- * Accumulating samples , Samples , samplesFromList -- * Training , Model , train , predict -- ** Model parameters , defaultParameters , Parameters(..) -- * Saving and restoring 'Model's , saveModel , loadModel ) where import Control.Monad (unless) import Control.Monad.ST import Foreign.C.String import Foreign.C.Types import Foreign.Marshal.Utils import Foreign.Ptr import Foreign.ForeignPtr import qualified Data.Vector.Storable as VS import qualified Data.Vector.Unboxed as VU import System.IO.Unsafe (unsafePerformIO) import Numeric.SVM.LibLinear.Internal import Numeric.SVM.LibLinear.Types import Numeric.SVM.LibLinear.PackFeatures -- | A classification or regression model. data Model = Model (ForeignPtr C'model) -- | A solver type newtype Solver = Solver CInt -- Classification -- | $L^2$ regularized logistic regression, optimizing primal problem l2Reg_logistic :: Solver l2Reg_logistic = Solver c'L2R_LR -- | $L^2$ regularized $L^2$ loss optimizing classification, dual problem l2Reg_l2LossSvc' :: Solver l2Reg_l2LossSvc' = Solver c'L2R_L2LOSS_SVC_DUAL -- | $L^2$ regularized $L^2$ loss classification, optimizing primal problem l2Reg_l2LossSvc :: Solver l2Reg_l2LossSvc = Solver c'L2R_L2LOSS_SVC -- | $L^2$ regularized $L^1$ loss classification, optimizing dual problem l2Reg_l1LossSvc' :: Solver l2Reg_l1LossSvc' = Solver c'L2R_L1LOSS_SVC_DUAL -- | Support Vector Classification by Crammer and Singer crammerSingerSvc :: Solver crammerSingerSvc = Solver c'MCSVM_CS -- Regression -- | $L^2$ regularized $L^2$ loss regression, optimizing primal problem l2Reg_l2LossSvr :: Solver l2Reg_l2LossSvr = Solver c'L2R_L2LOSS_SVR -- | $L^2$ regularized $L^2$ loss regression, optimizing dual problem l2Reg_l2LossSvr' :: Solver l2Reg_l2LossSvr' = Solver c'L2R_L2LOSS_SVR_DUAL -- | $L^2$ regularized $L^1$ loss regression, optimizing dual problem l2Reg_l1LossSvr' :: Solver l2Reg_l1LossSvr' = Solver c'L2R_L1LOSS_SVR_DUAL -- | Parameters to the solver data Parameters = Parameters { epsilon :: Double -- ^ the stopping criterion , cost :: Double -- ^ the constraint violation cost, $C$ , weights :: VU.Vector (FeatureIdx, Double) -- ^ the feature weights; features omitted from this -- association vector default to a weight of 1. , supportLossSensitivity :: Double -- ^ the sensisitivity to loss, $epsilon$ -- (only in support vector regression) , bias :: Double -- ^ TODO } defaultParameters :: Parameters defaultParameters = Parameters { epsilon = 0.1 , cost = 1 , weights = VU.empty , supportLossSensitivity = 0.1 , bias = -1 } {- toSamples :: (Fold.PrimMonad m) => Fold.FoldM (ToSamplesM m) (Label, [(FeatureIdx, Double)]) Samples toSamples = Fold.sink $ uncurry addSample -} -- | Build a sample set from a list of samples samplesFromList :: Int -- ^ expected number of features per sample -> [(Label, [(FeatureIdx, Double)])] -- ^ labelled samples -> Samples samplesFromList expectedFeaturesPerSample samples = snd $ runST $ runToSamplesM samplesPerChunk expectedFeaturesPerSample $ mapM_ (uncurry addSample) samples where samplesPerChunk = 10000 -- | Train a 'Model'. trainM :: Solver -> Parameters -> Samples -> IO Model trainM (Solver solver) (Parameters {..}) samples = do modelPtr <- flip withPackedSamples samples $ \nSamples labelPtr featurePtr -> do let FeatureIdx maxFeatureIdx = samplesMaxFeature samples problem = C'problem { c'problem'l = fromIntegral nSamples , c'problem'n = fromIntegral maxFeatureIdx , c'problem'y = castPtr labelPtr , c'problem'x = featurePtr , c'problem'bias = realToFrac bias } param = C'parameter { c'parameter'solver_type = solver , c'parameter'eps = realToFrac epsilon , c'parameter'C = realToFrac cost , c'parameter'nr_weight = 0 , c'parameter'weight_label = nullPtr -- TODO , c'parameter'weight = nullPtr -- TODO , c'parameter'p = realToFrac supportLossSensitivity , c'parameter'init_sol = nullPtr } with param $ \paramPtr -> with problem $ \problemPtr -> do err <- c'check_parameter problemPtr paramPtr unless (nullPtr == err) $ peekCString err >>= fail c'train problemPtr paramPtr Model <$> newForeignPtr p'free_model_content modelPtr -- | Train a 'Model'. train :: Solver -- ^ solver type -> Parameters -- ^ model parameters -> Samples -- ^ labelled training samples -> Model -- ^ trained model train solver params samples = unsafePerformIO $ trainM solver params samples {-# NOINLINE train #-} -- | Make a prediction with a 'Model'. predictM :: Model -- ^ model to predict with -> [(FeatureIdx, Double)] -- ^ features -> IO Label -- ^ prediction predictM (Model model) features = do let featuresV = VS.fromList $ map (uncurry FeatureValue) features ++ [FeatureValue (FeatureIdx (-1)) 0] k <- withForeignPtr model $ \modelPtr -> VS.unsafeWith featuresV $ c'predict modelPtr return $ realToFrac k -- | Make a prediction with a 'Model'. predict :: Model -- ^ model to predict with -> [(FeatureIdx, Double)] -- ^ features -> Label -- ^ prediction predict model features = unsafePerformIO $ predictM model features {-# NOINLINE predict #-} -- | Save a 'Model' to a file. saveModel :: FilePath -> Model -> IO () saveModel fname (Model model) = withCString fname $ \fnamePtr -> do withForeignPtr model $ c'save_model fnamePtr -- | Load a 'Model' from a file. loadModel :: FilePath -> IO (Maybe Model) loadModel fname = do modelPtr <- withCString fname c'load_model if modelPtr == nullPtr then return Nothing else Just . Model <$> newForeignPtr p'free_model_content modelPtr