-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Generic encoding of records -- -- Generic encoding of records. It currently provides a single, -- polymorphic function to encode sum types (i.e. categorical variables) -- as one-hot vectors. @package record-encode @version 0.2.3 module Data.Record.Encode.Generics -- | Compute the structural index of a value of a sum type via its Generic -- representation e.g.: -- --
-- >>> data S = Sa | Sb | Sc deriving (Eq, Show, G.Generic) -- -- >>> instance Generic S ---- --
-- >>> gindex $ from Sb -- 1 --gindex :: SOP f xs -> Int -- | Counts the number of outermost constructors ("variants" of a type) class GVariants (f :: * -> *) vars :: GVariants f => proxy f -> Int -- | Counts the number of outermost constructors gnconstructors :: forall a. (Generic a, GVariants (Rep a)) => Proxy a -> Int instance GHC.Generics.Generic (Data.Record.Encode.Generics.X a) instance Data.Record.Encode.Generics.GVariants (GHC.Generics.M1 GHC.Generics.C m f) instance Data.Record.Encode.Generics.GVariants GHC.Generics.V1 instance Data.Record.Encode.Generics.GVariants f => Data.Record.Encode.Generics.GVariants (GHC.Generics.M1 GHC.Generics.D m f) instance (Data.Record.Encode.Generics.GVariants f, Data.Record.Encode.Generics.GVariants g) => Data.Record.Encode.Generics.GVariants (f GHC.Generics.:+: g) instance Generics.SOP.Universe.Generic (Data.Record.Encode.Generics.X a) -- | This library provides generic machinery (via GHC.Generics and -- `generics-sop`) to encode values of some algebraic type as points in a -- vector space. -- -- Processing datasets that have one or more categorical variables (which -- in other words are values of a sum type) typically requires a series -- of boilerplate transformations, and the encodeOneHot function -- provided here does precisely that. -- --
-- > :set -XDeriveGeneric -- -- > import qualified GHC.Generics as G -- > import qualified Generics.SOP as SOP -- > import Data.Record.Encode -- -- > data X = A | B | C deriving (Enum, G.Generic) -- > instance SOP.Generic X ---- -- The B constructor is the second (i.e. position 1 counting -- from 0) of a choice of three : -- --
-- >>> encodeOneHot B
-- OH {oDim = 3, oIx = 1}
--
--
-- The Just constructor is the second of a choice of two:
--
--
-- >>> encodeOneHot $ Just B
-- OH {oDim = 2, oIx = 1}
--
--
-- The Nothing constructor is the first:
--
--
-- >>> encodeOneHot (Nothing :: Maybe Int)
-- OH {oDim = 2, oIx = 0}
--
encodeOneHot :: forall a. G a => a -> OneHot
-- | A one-hot encoding is a d-dimensional vector having a single component
-- equal to 1 and all others equal to 0. We represent it here compactly
-- as two integers: an integer dimension and an index (which must both be
-- nonnegative).
data OneHot
OH :: !Int -> !Int -> OneHot
-- | Dimension of embedding space (i.e. number of categories)
[oDim] :: OneHot -> !Int
-- | Index of nonzero coordinate
[oIx] :: OneHot -> !Int
-- | Compares two one-hot encodings for equality. Returns Nothing if the
-- operand dimensions are not equal.
--
-- -- >>> compareOH (OH 3 2) (OH 3 1) -- Just GT ---- --
-- >>> compareOH (OH 3 2) (OH 5 1) -- Nothing --compareOH :: OneHot -> OneHot -> Maybe Ordering -- | Create a one-hot vector oneHotV :: Num a => OneHot -> Vector a -- | Constraints necessary to encodeOneHot a value. -- -- NB: GVariants is an internal typeclass, and this constraint is -- automatically satisfied if the type is an instance of Generic type G a = (GVariants (Rep a), Generic a, Generic a) instance GHC.Show.Show Data.Record.Encode.OneHot instance GHC.Classes.Eq Data.Record.Encode.OneHot