Copyright | (c) Marco Zocca (2018-2020) |
---|---|
License | BSD-style |
Maintainer | ocramz fripost org |
Stability | experimental |
Portability | GHC |
Safe Haskell | None |
Language | Haskell2010 |
The purpose of this library is to make it easy to analyze collections of Haskell values; users encode
their data collections (lists, maps and so on) into dataframes, and use functions provided by heidi
for manipulation.
Synopsis
- data Frame row
- encode :: (Foldable t, Heidi a) => t a -> Frame (Row [TC] VP)
- class Heidi a
- data TC
- data VP
- frameFromList :: [row] -> Frame row
- head :: Frame row -> row
- take :: Int -> Frame row -> Frame row
- drop :: Int -> Frame row -> Frame row
- numRows :: Frame row -> Int
- filter :: (row -> Bool) -> Frame row -> Frame row
- filterA :: Applicative f => (row -> f Bool) -> Frame row -> f (Frame row)
- groupWith :: (row -> row -> Bool) -> Frame row -> [Frame row]
- zipWith :: (a -> b -> c) -> Frame a -> Frame b -> Frame c
- scanl :: (b -> a -> b) -> b -> Frame a -> Frame b
- scanr :: (a -> b -> b) -> b -> Frame a -> Frame b
- spreadWith :: (TrieKey k, Foldable t, Ord k, Ord v) => (v -> k) -> k -> k -> t (Row k v) -> Frame (Row k v)
- gatherWith :: (Foldable t, Ord k, TrieKey k) => (k -> v) -> Set k -> k -> k -> t (Row k v) -> Frame (Row k v)
- groupBy :: (Foldable t, TrieKey k, Eq k, Ord v) => k -> t (Row k v) -> Map v (Frame (Row k v))
- innerJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v)
- leftOuterJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v)
- toVector :: Frame row -> Vector row
- fromVector :: Vector row -> Frame row
- data Row k v
- rowFromList :: TrieKey k => [(k, v)] -> Row k v
- toList :: TrieKey k => Row k v -> [(k, v)]
- keys :: TrieKey k => Row k v -> [k]
- delete :: TrieKey k => k -> Row k v -> Row k v
- filterWithKey :: TrieKey k => (k -> v -> Bool) -> Row k v -> Row k v
- filterWithKeyPrefix :: (TrieKey a, Eq a) => [a] -> Row [a] v -> Row [a] v
- filterWithKeyAny :: (TrieKey a, Eq a) => a -> Row [a] v -> Row [a] v
- deleteMany :: (TrieKey k, Foldable t) => t k -> Row k v -> Row k v
- partitionWithKey :: TrieKey k => (k -> v -> Bool) -> Row k v -> (Row k v, Row k v)
- partitionWithKeyPrefix :: (TrieKey a, Eq a) => [a] -> Row [a] v -> (Row [a] v, Row [a] v)
- lookup :: TrieKey k => k -> Row k v -> Maybe v
- (!:) :: TrieKey k => k -> (a -> Bool) -> Row k a -> Bool
- elemSatisfies :: TrieKey k => (a -> Bool) -> k -> Row k a -> Bool
- maybeEmpty :: TrieKey k => Maybe (Row k v) -> Row k v
- eqByLookup :: (TrieKey k, Eq k, Eq a) => k -> Row k a -> Row k a -> Maybe Bool
- eqByLookups :: (Foldable t, TrieKey k, Eq k, Eq a) => t k -> Row k a -> Row k a -> Maybe Bool
- compareByLookup :: (TrieKey k, Eq k, Ord a) => k -> Row k a -> Row k a -> Maybe Ordering
- union :: TrieKey k => Row k v -> Row k v -> Row k v
- unionWith :: TrieKey k => (v -> v -> v) -> Row k v -> Row k v -> Row k v
- intersection :: TrieKey k => Row k v -> Row k b -> Row k v
- intersectionWith :: TrieKey k => (a -> b -> v) -> Row k a -> Row k b -> Row k v
- mapWithKey :: TrieKey k => (k -> a -> b) -> Row k a -> Row k b
- foldWithKey :: TrieKey k => (k -> a -> r -> r) -> r -> Row k a -> r
- keysOnly :: (TrieKey k, Foldable f) => f (Row k v) -> Row k ()
- traverseWithKey :: (Applicative f, TrieKey k) => (k -> a -> f b) -> Row k a -> f (Row k b)
- int :: TrieKey k => k -> Traversal' (Row k VP) Int
- bool :: TrieKey k => k -> Traversal' (Row k VP) Bool
- float :: TrieKey k => k -> Traversal' (Row k VP) Float
- double :: TrieKey k => k -> Traversal' (Row k VP) Double
- char :: TrieKey k => k -> Traversal' (Row k VP) Char
- string :: TrieKey k => k -> Traversal' (Row k VP) String
- text :: TrieKey k => k -> Traversal' (Row k VP) Text
- scientific :: TrieKey k => k -> Traversal' (Row k VP) Scientific
- oneHot :: TrieKey k => k -> Traversal' (Row k VP) (OneHot Int)
- real :: TrieKey k => k -> Row k VP -> Maybe Double
- txt :: TrieKey k => k -> Row k VP -> Maybe Text
- at :: TrieKey k => k -> Lens' (Row k a) (Maybe a)
- keep :: Getting Any row a -> (a -> b) -> row -> Bool
- atPrefix :: (TrieKey k, Eq k) => [k] -> Lens' (Row [k] v) [v]
- eachPrefixed :: (TrieKey k, Eq k) => [k] -> Traversal' (Row [k] v) v
- foldPrefixed :: (TrieKey k, Eq k, Monoid r) => [k] -> Getting r (Row [k] v) v
- tcTyN :: TC -> String
- tcTyCon :: TC -> String
- mkTyN :: String -> TC
- mkTyCon :: String -> TC
- data OneHot i
Frame
A Frame
is a list of rows.
Instances
Functor Frame Source # | |
Foldable Frame Source # | |
Defined in Core.Data.Frame.List fold :: Monoid m => Frame m -> m # foldMap :: Monoid m => (a -> m) -> Frame a -> m # foldMap' :: Monoid m => (a -> m) -> Frame a -> m # foldr :: (a -> b -> b) -> b -> Frame a -> b # foldr' :: (a -> b -> b) -> b -> Frame a -> b # foldl :: (b -> a -> b) -> b -> Frame a -> b # foldl' :: (b -> a -> b) -> b -> Frame a -> b # foldr1 :: (a -> a -> a) -> Frame a -> a # foldl1 :: (a -> a -> a) -> Frame a -> a # elem :: Eq a => a -> Frame a -> Bool # maximum :: Ord a => Frame a -> a # minimum :: Ord a => Frame a -> a # | |
Traversable Frame Source # | |
Show row => Show (Frame row) Source # | |
Construction
Encoding
encode :: (Foldable t, Heidi a) => t a -> Frame (Row [TC] VP) Source #
Populate a Frame
with the generic encoding of the row data
For example, a list of records having two fields each will produce a dataframe with two columns, having the record field names as column labels.
data P1 = P1 Int Char deriving (Eq, Show,Generic
) instanceHeidi
P1 data P2 = P2 { p2i :: Int, p2c :: Char } deriving (Eq, Show, Generic) instance Heidi P2 data Q = Q (Maybe Int) (Either Double Char) deriving (Eq, Show, Generic) instance Heidi Q
>>>
encode [P1 42 'z']
Frame {tableRows = [([TC "P1" "_0"],VPInt 42),([TC "P1" "_1"],VPChar 'z')] :| []}
>>>
encode [P2 42 'z']
Frame {tableRows = [([TC "P2" "p2c"],VPChar 'z'),([TC "P2" "p2i"],VPInt 42)] :| []}
Test using Maybe
and Either
record fields :
>>>
encode [Q (Just 42) (Left 1.2), Q Nothing (Right 'b')]
Frame {tableRows = [([TC "Q" "_0",TC "Maybe" "Just"],VPInt 42),([TC "Q" "_1",TC "Either" "Left"],VPDouble 1.2)] :| [[([TC "Q" "_1",TC "Either" "Right"],VPChar 'b')]]}
NB: as the last example above demonstrates, Nothing
values are not inserted in the rows, which can be used to encode missing data features.
Single interface to the library.
This typeclass provides all the machinery for encoding Haskell values into dataframes.
NOTE: Your datatypes only need to possess a Generic
instance, to which you just need to add an empty instance of Heidi
.
example:
{-# language DeriveGenerics, DeriveAnyClass #-} data A = A Int Char deriving (Generic
,Heidi
)
Instances
Heidi Bool Source # | |
Heidi Char Source # | |
Heidi Double Source # | |
Heidi Float Source # | |
Heidi Int Source # | |
Heidi Int8 Source # | |
Heidi Int16 Source # | |
Heidi Int32 Source # | |
Heidi Int64 Source # | |
Heidi Word Source # | |
Heidi Word8 Source # | |
Heidi Word16 Source # | |
Heidi Word32 Source # | |
Heidi Word64 Source # | |
Heidi () Source # | |
Defined in Data.Generics.Encode.Internal | |
Heidi String Source # | |
Heidi Scientific Source # | |
Defined in Data.Generics.Encode.Internal toVal :: Scientific -> Val header :: Proxy Scientific -> Header String | |
Heidi Text Source # | |
Heidi a => Heidi (Maybe a) Source # | |
(Heidi a, Heidi b) => Heidi (Either a b) Source # | |
(Heidi a, Heidi b) => Heidi (a, b) Source # | |
Defined in Data.Generics.Encode.Internal | |
(Heidi a, Heidi b, Heidi c) => Heidi (a, b, c) Source # | |
Defined in Data.Generics.Encode.Internal |
A (type, constructor) name pair
Instances
Eq TC Source # | |
Ord TC Source # | |
Show TC Source # | |
Generic TC Source # | |
Hashable TC Source # | |
Defined in Data.Generics.Encode.Internal | |
type Rep TC Source # | |
Defined in Data.Generics.Encode.Internal type Rep TC = D1 ('MetaData "TC" "Data.Generics.Encode.Internal" "heidi-0.3.0-9M5jxZVOe4V4ra6vG8q0ck" 'False) (C1 ('MetaCons "TC" 'PrefixI 'False) (S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 String) :*: S1 ('MetaSel ('Nothing :: Maybe Symbol) 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 String))) |
Primitive types
NB : this is just a convenience for unityping the dataframe contents, but it should not be exposed to the library users
Instances
Direct
frameFromList :: [row] -> Frame row Source #
Access
Filtering
filterA :: Applicative f => (row -> f Bool) -> Frame row -> f (Frame row) Source #
This generalizes the list-based filter
function.
Grouping
groupWith :: (row -> row -> Bool) -> Frame row -> [Frame row] Source #
groupWith
takes row comparison function and a list and returns a list of lists such that the concatenation of the result is equal to the argument. Moreover, each sublist in the result contains only elements that satisfy the comparison.
Zipping
Scans
Data tidying
:: (TrieKey k, Foldable t, Ord k, Ord v) | |
=> (v -> k) | |
-> k | "key" key |
-> k | "value" key |
-> t (Row k v) | input dataframe |
-> Frame (Row k v) |
spreadWith
moves the unique values of a key column into the column names, spreading the values of a value column across the new columns.
:: (Foldable t, Ord k, TrieKey k) | |
=> (k -> v) | |
-> Set k | set of keys to gather |
-> k | "key" key |
-> k | "value" key |
-> t (Row k v) | input dataframe |
-> Frame (Row k v) |
gatherWith
moves column names into a "key" column, gathering the column values into a single "value" column
Relational operations
:: (Foldable t, TrieKey k, Eq k, Ord v) | |
=> k | Key to group by |
-> t (Row k v) | A 'Frame (GTR.Row k v) can be used here |
-> Map v (Frame (Row k v)) |
GROUP BY : given a key and a table that uses it, split the table in multiple tables, one per value taken by the key.
>>>
numRows <$> (HM.lookup "129" $ groupBy "id.0" t0)
Just 2
:: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) | |
=> k | Key into the first table |
-> k | Key into the second table |
-> t (Row k v) | First dataframe |
-> t (Row k v) | Second dataframe |
-> Frame (Row k v) |
INNER JOIN : given two dataframes and one key from each, compute the inner join using the keys as relations.
>>>
head t0
[("id.0","129"),("qty","1"),("item","book")]
>>>
head t1
[("id.1","129"),("price","100")]
>>>
head $ innerJoin "id.0" "id.1" t0 t1
[("id.1","129"),("id.0","129"),("qty","5"),("item","book"),("price","100")]
leftOuterJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v) Source #
LEFT (OUTER) JOIN : given two dataframes and one key from each, compute the left outer join using the keys as relations.
Vector-related
fromVector :: Vector row -> Frame row Source #
Produce a Frame from a Vector
of rows
Row
A Row
type is internally a Trie:
- Fast random access
- Fast set operations
- Supports missing elements
Instances
TrieKey k => Functor (Row k) Source # | |
TrieKey k => Foldable (Row k) Source # | |
Defined in Heidi.Data.Row.GenericTrie fold :: Monoid m => Row k m -> m # foldMap :: Monoid m => (a -> m) -> Row k a -> m # foldMap' :: Monoid m => (a -> m) -> Row k a -> m # foldr :: (a -> b -> b) -> b -> Row k a -> b # foldr' :: (a -> b -> b) -> b -> Row k a -> b # foldl :: (b -> a -> b) -> b -> Row k a -> b # foldl' :: (b -> a -> b) -> b -> Row k a -> b # foldr1 :: (a -> a -> a) -> Row k a -> a # foldl1 :: (a -> a -> a) -> Row k a -> a # elem :: Eq a => a -> Row k a -> Bool # maximum :: Ord a => Row k a -> a # minimum :: Ord a => Row k a -> a # | |
TrieKey k => Traversable (Row k) Source # | |
(TrieKey k, Eq k, Eq v) => Eq (Row k v) Source # | |
(TrieKey k, Eq k, Eq v, Ord k, Ord v) => Ord (Row k v) Source # | |
(TrieKey k, Show k, Show v) => Show (Row k v) Source # | |
TrieKey k => Semigroup (Row k v) Source # | |
TrieKey k => Monoid (Row k v) Source # | |
Construction
rowFromList :: TrieKey k => [(k, v)] -> Row k v Source #
Construct a Row
from a list of key-element pairs.
>>>
lookup 3 (rowFromList [(3,'a'),(4,'b')])
Just 'a'>>>
lookup 6 (rowFromList [(3,'a'),(4,'b')])
Nothing
Access
Filtering
Returns a new Row
that doesn't have a given key-value pair
filterWithKey :: TrieKey k => (k -> v -> Bool) -> Row k v -> Row k v Source #
Filter a row by applying a predicate to its keys and corresponding elements.
NB : filtering _retains_ the elements that satisfy the predicate.
Retains the entries for which the given list is a prefix of the indexing key
filterWithKeyAny :: (TrieKey a, Eq a) => a -> Row [a] v -> Row [a] v Source #
Retains the entries for which the given item appears at any position in the indexing key
deleteMany :: (TrieKey k, Foldable t) => t k -> Row k v -> Row k v Source #
Produce a new Row
such that its keys do _not_ belong to a certain set.
Partitioning
Partition a Row
into two new ones, such as the elements that satisfy the predicate will end up in the _left_ row.
partitionWithKeyPrefix Source #
Uses partitionWithKey
internally
Lookup
lookup :: TrieKey k => k -> Row k v -> Maybe v Source #
Lookup the value stored at a given key in a row
>>>
lookup 0 row0
Just 'a'>>>
lookup 1 row0
Nothing
elemSatisfies :: TrieKey k => (a -> Bool) -> k -> Row k a -> Bool Source #
Looks up a key from a row and applies a predicate to its value (if this is found). If no value is found at that key the function returns False.
This function is meant to be used as first argument to filter
.
>>>
elemSatisfies (== 'a') 0 row0
True>>>
elemSatisfies (== 'a') 42 row0
False
Lookup utilities
maybeEmpty :: TrieKey k => Maybe (Row k v) -> Row k v Source #
Returns an empty row if the argument is Nothing.
Comparison by lookup
eqByLookup :: (TrieKey k, Eq k, Eq a) => k -> Row k a -> Row k a -> Maybe Bool Source #
Compares two rows by the values indexed at a specific key.
Returns Nothing if the key is not present in either row.
eqByLookups :: (Foldable t, TrieKey k, Eq k, Eq a) => t k -> Row k a -> Row k a -> Maybe Bool Source #
Compares two rows by the values indexed at a set of keys.
Returns Nothing if a key in either row is not present.
compareByLookup :: (TrieKey k, Eq k, Ord a) => k -> Row k a -> Row k a -> Maybe Ordering Source #
Compares for ordering two rows by the values indexed at a specific key.
Returns Nothing if the key is not present in either row.
Set operations
union :: TrieKey k => Row k v -> Row k v -> Row k v Source #
Set union of two rows
>>>
keys $ union row0 row1
[0,1,3,666]
unionWith :: TrieKey k => (v -> v -> v) -> Row k v -> Row k v -> Row k v Source #
Set union of two rows, using a combining function for equal keys
intersectionWith :: TrieKey k => (a -> b -> v) -> Row k a -> Row k b -> Row k v Source #
Set intersections of two rows, using a combining function for equal keys
Maps
mapWithKey :: TrieKey k => (k -> a -> b) -> Row k a -> Row k b Source #
Map over all elements with a function of both the key and the value
Folds
foldWithKey :: TrieKey k => (k -> a -> r -> r) -> r -> Row k a -> r Source #
Fold over a row with a function of both key and value
keysOnly :: (TrieKey k, Foldable f) => f (Row k v) -> Row k () Source #
Takes the union of a Foldable container of Row
s and discards the values
Traversals
traverseWithKey :: (Applicative f, TrieKey k) => (k -> a -> f b) -> Row k a -> f (Row k b) Source #
Traverse a Row
using a function of both the key and the element.
Lens combinators
Traversals
bool :: TrieKey k => k -> Traversal' (Row k VP) Bool Source #
Decode a Bool
from the given column index
float :: TrieKey k => k -> Traversal' (Row k VP) Float Source #
Decode a Float
from the given column index
double :: TrieKey k => k -> Traversal' (Row k VP) Double Source #
Decode a Double
from the given column index
char :: TrieKey k => k -> Traversal' (Row k VP) Char Source #
Decode a Char
from the given column index
string :: TrieKey k => k -> Traversal' (Row k VP) String Source #
Decode a String
from the given column index
text :: TrieKey k => k -> Traversal' (Row k VP) Text Source #
Decode a Text
from the given column index
scientific :: TrieKey k => k -> Traversal' (Row k VP) Scientific Source #
Decode a Scientific
from the given column index
oneHot :: TrieKey k => k -> Traversal' (Row k VP) (OneHot Int) Source #
Decode a OneHot
from the given column index
Getters
real :: TrieKey k => k -> Row k VP -> Maybe Double Source #
Lookup a real number at the given index.
Matches Double
, Float
, Int
and Scientific
values.
Combinators
Focus on a given column
NB : setting a Nothing
value removes the entry
Helper for filtering Frame
s
e.g.
>>>
:t \k -> keep (text k) (== "hello")
:: GT.TrieKey k => k -> Row k VP -> Bool
Combinators for list-indexed rows
atPrefix
: a Lens' that takes a key prefix and relates a row having lists as keys and the subset of columns corresponding to keys having that prefix
:: (TrieKey k, Eq k) | |
=> [k] | key prefix of the columns of interest |
-> Traversal' (Row [k] v) v |
Focus on all elements that share a common key prefix
e.g.
>>> :t k -> toListOf
(eachPrefixed k . vpBool)
(GT.TrieKey k, Eq k) => [k] -> Row [k] VP -> [Bool]
:: (TrieKey k, Eq k, Monoid r) | |
=> [k] | key prefix of the columns of interest |
-> Getting r (Row [k] v) v |
Extract all elements that share a common key prefix into a monoidal value (e.g. a list)
Encode internals
1-hot encoded vector.
This representation is used to encode categorical variables as points in a vector space.
Instances
Eq i => Eq (OneHot i) Source # | |
Ord i => Ord (OneHot i) Source # | |
Defined in Data.Generics.Encode.OneHot | |
Show i => Show (OneHot i) Source # | |
Generic (OneHot i) Source # | |
Hashable i => Hashable (OneHot i) Source # | |
Defined in Data.Generics.Encode.OneHot | |
type Rep (OneHot i) Source # | |
Defined in Data.Generics.Encode.OneHot type Rep (OneHot i) = D1 ('MetaData "OneHot" "Data.Generics.Encode.OneHot" "heidi-0.3.0-9M5jxZVOe4V4ra6vG8q0ck" 'False) (C1 ('MetaCons "OH" 'PrefixI 'True) (S1 ('MetaSel ('Just "ohDim") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 i) :*: S1 ('MetaSel ('Just "ohIx") 'NoSourceUnpackedness 'NoSourceStrictness 'DecidedLazy) (Rec0 i))) |