{-# LANGUAGE RecordWildCards #-} module Numeric.Recommender.ALS where import Control.Parallel.Strategies import Data.Bifunctor import Data.Default.Class import qualified Data.IntMap as IntMap import qualified Data.IntSet as IntSet import Data.List (sortBy) import Data.Maybe import Data.Tuple import qualified Data.Vector.Storable import Numeric.LinearAlgebra import System.Random import Prelude hiding ((<>)) data ALSParams = ALSParams { lambda :: Double -- ^ Training speed , alpha :: Double -- ^ Weight multiplier , seed :: Int -- ^ RNG seed , nFactors :: Int -- ^ Hidden features dimension , parChunk :: Int -- ^ Chunk size for parallelization } deriving (Show) instance Default ALSParams where def = ALSParams 0.1 10 0 10 10 data ALSResult = ALSResult { cost :: Double , itemFeature :: !(Matrix Double) , userFeature :: !(Matrix Double) } data ALSModel u i = ALSModel { encodeUser :: u -> Maybe Int -- ^ User to dense representation , decodeUser :: Int -> u -- ^ User from dense representation , encodeItem :: i -> Maybe Int -- ^ Item to dense representation , decodeItem :: Int -> i -- ^ Item from dense representation -- | Internal representation of the input data pairs , pairs :: [(Int, Int)] -- | Results as further iterations of the algorithm , results :: [ALSResult] } -- | Build recommendations based on users' unrated item choices. -- -- Takes conversion functions to/from Int representation for user -- supplied data types. Use 'id' if you're already based on them. -- -- The implementation follows the one in the recommenderlab library in -- CRAN. For further details, see "Large-scale Parallel Collaborative -- Filtering for the Netflix Prize" by Yunhong Zhou, Dennis Wilkinson, -- Robert Schreiber and Rong Pan. buildModel :: (Functor f, Foldable f) => ALSParams -> (u -> Int) -> (Int -> u) -> (i -> Int) -> (Int -> i) -> f (u, i) -- ^ User-item pairs -> ALSModel u i buildModel ALSParams{..} fromUser toUser fromItem toItem xs = let parMap' f = withStrategy (parListChunk parChunk rdeepseq) . map f rnd = mkStdGen seed (encUser, decUser) = bimap (. fromUser) (toUser .) . compact $ fmap (fromUser . fst) xs (encItem, decItem) = bimap (. fromItem) (toItem .) . compact $ fmap (fromItem . snd) xs xs' = fmap (bimap (fromJust . encUser) (fromJust . encItem)) xs nU = 1 + (maximum $ fmap fst xs') nM = 1 + (maximum $ fmap snd xs') selections = foldr (\(u,c) -> IntSet.insert (c+(nM*u))) mempty xs' ratings = (nU> if IntSet.member k selections then 1 else 0) [0..(nU*nM)-1] weighted = scalar alpha * ratings mIni = (nFactors> tr m u = fromRows $ parMap' (\i -> let -- Drop the rows and columns not relevant to this user m' = m ¿ ((filter (\j -> (>0.1) $ atIndex ratings (i,j))) [0..nM-1]) f' x = vector $ map (atIndex ((toRows x) !! i)) ((filter (\j -> (>0.1) $ atIndex ratings (i,j))) [0..nM-1]) w' = f' weighted r' = f' ratings m'' = tr $ (tr m') * asColumn w' x1 = mtm + (m'' <> tr m' + (scalar lambda * scalar (atIndex sumsU i) * ident nFactors)) x2 = asColumn $ (m'' + m') #> r' in flatten . maybe (linearSolveSVD x1 x2) id $ linearSolve x1 x2 ) [0..nU-1] -- Minimize the item feature matrix tuu = tr u <> u m2 = fromColumns $ parMap' (\j -> let u' = u ? ((filter (\i -> (>0.1) $ atIndex ratings (i,j))) [0..nU-1]) f' x = vector $ map (atIndex ((toColumns x) !! j)) ((filter (\i -> (>0.1) $ atIndex ratings (i,j))) [0..nU-1]) w' = f' weighted r' = f' ratings u'' = tr $ asColumn w' * u' x1 = tuu + u'' <> u' + (scalar lambda * scalar (atIndex sumsM j) * ident nFactors) x2 = asColumn $ (u'' + tr u') #> r' in flatten . maybe (linearSolveSVD x1 x2) id $ linearSolve x1 x2 ) [0..nM-1] in (u, m2) results = iterate ((\x -> x `seq` f x) . snd) (f mIni) in ALSModel encUser decUser encItem decItem (foldr (:) [] xs') $ map (\(u, m) -> ALSResult (costFunction ratings u m weighted lambda sumsU sumsM) m u) results where -- |Build to/from functions from a sparse set to a dense 0..n-1 -- range. -- -- The reverse function is total for convenience since the inputs -- for it are better controlled. compact :: Foldable f => f Int -> (Int -> Maybe Int, Int -> Int) compact ys = let mp = foldr (\x a -> IntMap.insertWith (flip const) x (IntMap.size a) a) mempty ys pm = IntMap.fromList . map swap $ IntMap.toList mp in ( flip IntMap.lookup mp , maybe (error $ "missing value") id . flip IntMap.lookup pm ) -- |This is not actually used by the algorithm. costFunction :: Matrix Double -> Matrix Double -> Matrix Double -> Matrix Double -> Double -> Vector Double -> Vector Double -> Double costFunction r u m w l nui nmj = let rum = r - (u <> m) in sumElements ((w + 1) * (rum * rum)) + (l * (sumElements (nui <# (u^2)) + sumElements ((m^2) #> nmj))) recommend :: ALSModel u i -> Int -- ^ Iterations -> IntMap.IntMap [(i, Bool)] recommend ALSModel{..} n = let ALSResult{..} = results !! (n-1) feat = userFeature <> itemFeature usrIt = foldr (\(k,v) -> IntMap.insertWith IntSet.union k (IntSet.singleton v)) mempty pairs in foldr (\u -> let inUsr = fromJust $ IntMap.lookup u usrIt in IntMap.insert u $ map ((\x -> (decodeItem x, not $ IntSet.member x inUsr)) . fst) $ sortBy (\(_,a) (_,b) -> compare b a) $ zip [0..] $ head $ toLists $ feat ? [u]) mempty $ map fst pairs