module Numeric.Regression.Linear (Model, compute, regress) where import Control.Applicative import Data.Foldable import Data.Monoid import Data.Traversable import Numeric.AD import Numeric.Regression.Internal -- | A model using the given @f@ to store parameters of type @a@. -- Can be thought of as some kind of vector throughough this -- package. type Model f a = f a -- | Compute the predicted value for -- the given model on the given observation compute :: (Applicative v, Foldable v, Num a) => Model v a -- ^ theta vector, the model's parameters -> v a -- ^ @x@ vector, with the observed numbers -> a -- ^ predicted @y@ for this observation compute theta x = theta `dot` x {-# INLINE compute #-} -- | Cost function for a linear regression on a single observation cost :: (Applicative v, Foldable v, Floating a) => Model v a -- ^ theta vector, the model's parameters -> v a -- ^ @x@ vector -> a -- ^ expected @y@ for the observation -> a -- ^ cost cost theta x y = 0.5 * (y - compute theta x) ^ (2 :: Int) {-# INLINE compute #-} -- | Cost function for a linear regression on a set of observations totalCost :: (Applicative v, Foldable v, Applicative f, Foldable f, Floating a) => Model v a -- ^ theta vector, the model's parameters -> f a -- ^ expected @y@ value for each observation -> f (v a) -- ^ input data for each observation -> a -- ^ total cost over all observations totalCost theta ys xs = let Acc n (Sum s) = foldMap acc $ liftA2 (cost theta) xs ys in s / fromIntegral n {-# INLINE totalCost #-} -- | Given some observed \"predictions\" @ys@, the corresponding -- input values @xs@ and initial values for the model's parameters @theta0@, -- -- > regress ys xs theta0 -- -- returns a stream of values for the parameters that'll fit the data better -- and better. -- -- Example: -- -- @ -- -- the theta we're approximating -- realtheta :: Model V.Vector Double -- realtheta = V.fromList [1.0, 2.0, 3.0] -- -- -- let's start there and make 'regress' -- -- get values that better fit the input data -- theta0 :: Model V.Vector Double -- theta0 = V.fromList [0.2, 3.0, 2.23] -- -- -- input data. (output value, vector of values for each input) -- ys_ex :: V.Vector Double -- xs_ex :: V.Vector (V.Vector Double) -- (ys_ex, xs_ex) = V.unzip . V.fromList $ -- [ (3, V.fromList [0, 0, 1]) -- , (1, V.fromList [1, 0, 0]) -- , (2, V.fromList [0, 1, 0]) -- , (6, V.fromList [1, 1, 1]) -- ] -- -- -- stream of increasingly accurate parameters -- thetaApproxs :: [Model V.Vector Double] -- thetaApproxs = learnAll ys_ex xs_ex theta0 -- @ regress :: (Traversable v, Applicative v, Foldable v, Applicative f, Foldable f, Ord a, Floating a) => f a -- ^ expected @y@ value for each observation -> f (v a) -- ^ input data for each observation -> Model v a -- ^ initial parameters for the model, from which we'll improve -> [Model v a] -- ^ a stream of increasingly accurate values -- for the model's parameter to better fit the observations. regress ys xs t0 = gradientDescent (\theta -> totalCost theta (fmap auto ys) (fmap (fmap auto) xs)) t0 {-# INLINE regress #-}