module Data.LibLinear
( Model(..)
, Feature(..)
, Example(..)
, Solver(..)
, Target(..)
, TrainParams(..)
, train
) where
import Bindings.LibLinear
import Control.Monad (forM_, liftM2)
import Control.Monad.Trans (liftIO)
import Data.Enumerator as E hiding (sequence)
import qualified Data.Enumerator.List as EL
import qualified Data.List as L
import Data.LibLinear.Solver
import qualified Data.Vector.Storable as SVec
import qualified Data.Vector.Storable.Mutable as MVec
import Foreign as F
import Foreign.C.Types
newtype Target = Target Int deriving (Show, Eq, Enum)
data Model = Model !Target (SVec.Vector Double) deriving (Show)
data Feature = Feature !Int !Double deriving (Show)
data Example = Example !Target [Feature] deriving (Show)
featuresToNodeList :: [Feature] -> [C'feature_node]
featuresToNodeList features = L.map mapper features ++ [sentintel]
where mapper (Feature i v) = C'feature_node
{ c'feature_node'index = fromIntegral i
, c'feature_node'value = realToFrac v }
sentintel = mapper $ Feature (1) 0.0
newParameter :: Solver -> C'parameter
newParameter solver = C'parameter
{ c'parameter'solver_type = fromIntegral $ fromEnum solver
, c'parameter'eps = 0.1
, c'parameter'C = 1.0
, c'parameter'nr_weight = 0
, c'parameter'weight_label = nullPtr
, c'parameter'weight = nullPtr
}
writeByIndex :: MVec.IOVector CInt
-> MVec.IOVector C'feature_node
-> MVec.IOVector (Ptr C'feature_node)
-> (Int, Int, Int)
-> Example
-> IO (Int, Int, Int)
writeByIndex targets features featureIndex (i, fMax, fSum) (Example (Target t) f) = do
let fMax' = L.maximum [fi | Feature fi _ <- f]
MVec.write targets i $ fromIntegral t
forM_ (zip [fSum..] (featuresToNodeList f)) ( \ (fi, feature) -> MVec.write features fi feature)
MVec.unsafeWith features ( \ basePtr -> do
let addr = basePtr `advancePtr` fSum
MVec.write featureIndex i addr)
return $! (i+1, max fMax fMax', fSum+L.length f+1)
convertModel :: C'model -> IO [Model]
convertModel C'model
{ c'model'nr_class = nr_class
, c'model'nr_feature = nr_feature
, c'model'label = label
, c'model'w = w
} = sequence $ L.unfoldr step 0
where nr_class' = fromIntegral nr_class
nr_feature' = fromIntegral nr_feature
step :: Int -> Maybe (IO Model, Int)
step i | nr_class' == 2, i == 1 = Nothing
| i < nr_class' = Just (model, i+1)
| otherwise = Nothing
where model = liftM2 Model target weightVec
target = peekElemOff label i >>= newTarget
newTarget = return . Target . fromIntegral
weightVec = do
let weights = castPtr w
modelWeights = weights `advancePtr` i
ptr <- newForeignPtr_ modelWeights
SVec.freeze $! MVec.MVector nr_feature' ptr
data TrainParams = TrainParams
{ trainSolver :: Solver
, trainExamples :: Int
, trainFeatureSum :: Int
} deriving (Show)
train :: TrainParams -> Iteratee Example IO [Model]
train TrainParams{trainSolver, trainExamples, trainFeatureSum} = do
targets <- liftIO $ MVec.new trainExamples
featureIndex <- liftIO $ MVec.new trainExamples
features <- liftIO $ MVec.new (trainFeatureSum + trainExamples)
(targetCount, featureMax, featureSum) <- EL.foldM (writeByIndex targets features featureIndex) (0, 0, 0)
if trainExamples /= targetCount
then fail $! "target mismatch: " ++ show trainExamples ++ " != " ++ show targetCount
else liftIO $ do
let (featureBuffer, _, _) = MVec.unsafeToForeignPtr features
withForeignPtr featureBuffer $ \ _ ->
MVec.unsafeWith targets $ \ targets' ->
MVec.unsafeWith featureIndex $ \ features' -> do
print featureBuffer
let problem = C'problem
{ c'problem'l = fromIntegral trainExamples
, c'problem'n = fromIntegral featureMax
, c'problem'y = targets'
, c'problem'x = features'
, c'problem'bias = 1.0
}
model <- with problem $ \ problem' ->
with (newParameter trainSolver) $ \ param' ->
c'train problem' param'
convertModel =<< F.peek model