-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Generic encoding of records -- -- Generic encoding of records. It currently provides a single, -- polymorphic function to encode sum types (i.e. categorical variables) -- as one-hot vectors. @package record-encode @version 0.2.2 -- | This library provides generic machinery (via GHC.Generics and -- `generics-sop`) to encode values of some algebraic type as points in a -- vector space. -- -- Processing datasets that have one or more categorical variables (which -- in other words are values of a sum type) typically requires a series -- of boilerplate transformations, and the encodeOneHot function -- provided here does precisely that. -- --

Internals

-- -- This library makes use of generic programming to analyze both values -- and types (see the internal Data.Record.Encode.Generics module). -- -- Initially, it was relying on Template Haskell to analyze types, -- using the the instance generation machinery explained here: -- https://markkarpov.com/tutorial/th.html#example-1-instance-generation module Data.Record.Encode -- | Computes the one-hot encoding of a value of a sum type. -- -- The type of the input value must be an instance of Generic -- (from GHC.Generics) and of Generic (from the -- `generics-sop` library). -- --
--   >>> :set -XDeriveGeneric
--   
-- --
--   >>> import qualified GHC.Generics as G
--   
--   >>> import qualified Generics.SOP as SOP
--   
--   >>> import Data.Record.Encode
--   
-- --
--   >>> data X = A | B | C deriving (G.Generic)
--   
--   >>> instance SOP.Generic X
--   
-- --
--   >>> encodeOneHot B
--   OH {oDim = 3, oIx = 1}
--   
encodeOneHot :: forall a. G a => a -> OneHot -- | A one-hot encoding is a d-dimensional vector having a single component -- equal to 1 and all others equal to 0. We represent it here compactly -- as two integers: an integer dimension and an index (which must both be -- nonnegative). data OneHot OH :: !Int -> !Int -> OneHot -- | Dimension of ambient space (i.e. number of categories) [oDim] :: OneHot -> !Int -- | Index of nonzero entry [oIx] :: OneHot -> !Int -- | Compares two one-hot encodings for equality. Returns Nothing if the -- operand dimensions are not equal. compareOH :: OneHot -> OneHot -> Maybe Ordering -- | Create a one-hot vector oneHotV :: Num a => OneHot -> Vector a -- | Constraints necessary to encodeOneHot a value. -- -- NB: GVariants is an internal typeclass, and this constraint is -- automatically satisfied if the type is an instance of Generic type G a = (GVariants (Rep a), Generic a, Generic a) instance GHC.Show.Show Data.Record.Encode.OneHot instance GHC.Classes.Eq Data.Record.Encode.OneHot instance GHC.Generics.Generic Data.Record.Encode.X instance Generics.SOP.Universe.Generic Data.Record.Encode.X