heidi-0.0.0: Tidy data in Haskell

Copyright(c) Marco Zocca (2018-2020)
LicenseBSD-style
Maintainerocramz fripost org
Stabilityexperimental
PortabilityGHC
Safe HaskellNone
LanguageHaskell2010

Heidi

Contents

Description

Heidi : tidy data in Haskell

In Heidi, a data Frame is not meant to be constructed directly, but encoded from a collection of values. The encoding produces a simple representation which can be easily manipulated for common data analysis tasks.

Synopsis

Frame

data Frame row Source #

A Frame is a list of rows.

Instances
Functor Frame Source # 
Instance details

Defined in Core.Data.Frame.List

Methods

fmap :: (a -> b) -> Frame a -> Frame b #

(<$) :: a -> Frame b -> Frame a #

Foldable Frame Source # 
Instance details

Defined in Core.Data.Frame.List

Methods

fold :: Monoid m => Frame m -> m #

foldMap :: Monoid m => (a -> m) -> Frame a -> m #

foldr :: (a -> b -> b) -> b -> Frame a -> b #

foldr' :: (a -> b -> b) -> b -> Frame a -> b #

foldl :: (b -> a -> b) -> b -> Frame a -> b #

foldl' :: (b -> a -> b) -> b -> Frame a -> b #

foldr1 :: (a -> a -> a) -> Frame a -> a #

foldl1 :: (a -> a -> a) -> Frame a -> a #

toList :: Frame a -> [a] #

null :: Frame a -> Bool #

length :: Frame a -> Int #

elem :: Eq a => a -> Frame a -> Bool #

maximum :: Ord a => Frame a -> a #

minimum :: Ord a => Frame a -> a #

sum :: Num a => Frame a -> a #

product :: Num a => Frame a -> a #

Traversable Frame Source # 
Instance details

Defined in Core.Data.Frame.List

Methods

traverse :: Applicative f => (a -> f b) -> Frame a -> f (Frame b) #

sequenceA :: Applicative f => Frame (f a) -> f (Frame a) #

mapM :: Monad m => (a -> m b) -> Frame a -> m (Frame b) #

sequence :: Monad m => Frame (m a) -> m (Frame a) #

Show row => Show (Frame row) Source # 
Instance details

Defined in Core.Data.Frame.List

Methods

showsPrec :: Int -> Frame row -> ShowS #

show :: Frame row -> String #

showList :: [Frame row] -> ShowS #

Construction

Encoding

encode :: (Foldable t, Heidi a) => t a -> Frame (Row [TC] VP) Source #

Populate a Frame with the generic encoding of the row data

For example, a list of records having two fields each will produce a dataframe with two columns, having the record field names as column labels.

data P1 = P1 Int Char deriving (Eq, Show, Generic)
instance Heidi P1

data P2 = P2 { p2i :: Int, p2c :: Char } deriving (Eq, Show, Generic)
instance Heidi P2

data Q = Q (Maybe Int) (Either Double Char) deriving (Eq, Show, Generic)
instance Heidi Q
>>> encode [P1 42 'z']
Frame {tableRows = [([TC "P1" "_0"],VPInt 42),([TC "P1" "_1"],VPChar 'z')] :| []}
>>> encode [P2 42 'z']
Frame {tableRows = [([TC "P2" "p2c"],VPChar 'z'),([TC "P2" "p2i"],VPInt 42)] :| []}

Test using Maybe and Either record fields :

>>> encode [Q (Just 42) (Left 1.2), Q Nothing (Right 'b')]
Frame {tableRows = [([TC "Q" "_0",TC "Maybe" "Just"],VPInt 42),([TC "Q" "_1",TC "Either" "Left"],VPDouble 1.2)] :| [[([TC "Q" "_1",TC "Either" "Right"],VPChar 'b')]]}

NB: as the last example above demonstrates, Nothing values are not inserted in the rows, which can be used to encode missing data features.

class Heidi a Source #

Typeclass for types which have a generic encoding.

NOTE: if your type has a Generic instance you just need to declare an empty instance of Heidi for it.

example:

data A = A Int Char deriving (Generic)
instance Heidi A
Instances
Heidi Bool Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Bool -> Val

Heidi Char Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Char -> Val

Heidi Double Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Double -> Val

Heidi Float Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Float -> Val

Heidi Int Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Int -> Val

Heidi Int8 Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Int8 -> Val

Heidi Int16 Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Int16 -> Val

Heidi Int32 Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Int32 -> Val

Heidi Int64 Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Int64 -> Val

Heidi Word8 Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Word8 -> Val

Heidi Word16 Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Word16 -> Val

Heidi Word32 Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Word32 -> Val

Heidi Word64 Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Word64 -> Val

Heidi String Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: String -> Val

Heidi Scientific Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Scientific -> Val

Heidi Text Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Text -> Val

Heidi a => Heidi (Maybe a) Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Maybe a -> Val

(Heidi a, Heidi b) => Heidi (Either a b) Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: Either a b -> Val

(Heidi a, Heidi b) => Heidi (a, b) Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: (a, b) -> Val

(Heidi a, Heidi b, Heidi c) => Heidi (a, b, c) Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

toVal :: (a, b, c) -> Val

data TC Source #

A (type, constructor) name pair

Instances
Eq TC Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

(==) :: TC -> TC -> Bool #

(/=) :: TC -> TC -> Bool #

Ord TC Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

compare :: TC -> TC -> Ordering #

(<) :: TC -> TC -> Bool #

(<=) :: TC -> TC -> Bool #

(>) :: TC -> TC -> Bool #

(>=) :: TC -> TC -> Bool #

max :: TC -> TC -> TC #

min :: TC -> TC -> TC #

Show TC Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

showsPrec :: Int -> TC -> ShowS #

show :: TC -> String #

showList :: [TC] -> ShowS #

Generic TC Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Associated Types

type Rep TC :: Type -> Type #

Methods

from :: TC -> Rep TC x #

to :: Rep TC x -> TC #

TrieKey TC Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Associated Types

type TrieRep TC :: Type -> Type #

Methods

trieEmpty :: Trie TC a #

trieNull :: Trie TC a -> Bool #

trieLookup :: TC -> Trie TC a -> Maybe a #

trieInsert :: TC -> a -> Trie TC a -> Trie TC a #

trieDelete :: TC -> Trie TC a -> Trie TC a #

trieSingleton :: TC -> a -> Trie TC a #

trieMap :: (a -> b) -> Trie TC a -> Trie TC b #

trieTraverse :: Applicative f => (a -> f b) -> Trie TC a -> f (Trie TC b) #

trieShowsPrec :: Show a => Int -> Trie TC a -> ShowS #

trieMapMaybeWithKey :: (TC -> a -> Maybe b) -> Trie TC a -> Trie TC b #

trieFoldWithKey :: (TC -> a -> r -> r) -> r -> Trie TC a -> r #

trieTraverseWithKey :: Applicative f => (TC -> a -> f b) -> Trie TC a -> f (Trie TC b) #

trieMergeWithKey :: (TC -> a -> b -> Maybe c) -> (Trie TC a -> Trie TC c) -> (Trie TC b -> Trie TC c) -> Trie TC a -> Trie TC b -> Trie TC c #

Hashable TC Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

hashWithSalt :: Int -> TC -> Int #

hash :: TC -> Int #

type Rep TC Source # 
Instance details

Defined in Data.Generics.Encode.Internal

type TrieRep TC Source # 
Instance details

Defined in Data.Generics.Encode.Internal

data VP Source #

Primitive types

NB : this is just a convenience for unityping the dataframe contents, but it should not be exposed to the library users

Instances
Eq VP Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

(==) :: VP -> VP -> Bool #

(/=) :: VP -> VP -> Bool #

Ord VP Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

compare :: VP -> VP -> Ordering #

(<) :: VP -> VP -> Bool #

(<=) :: VP -> VP -> Bool #

(>) :: VP -> VP -> Bool #

(>=) :: VP -> VP -> Bool #

max :: VP -> VP -> VP #

min :: VP -> VP -> VP #

Show VP Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

showsPrec :: Int -> VP -> ShowS #

show :: VP -> String #

showList :: [VP] -> ShowS #

Generic VP Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Associated Types

type Rep VP :: Type -> Type #

Methods

from :: VP -> Rep VP x #

to :: Rep VP x -> VP #

Hashable VP Source # 
Instance details

Defined in Data.Generics.Encode.Internal

Methods

hashWithSalt :: Int -> VP -> Int #

hash :: VP -> Int #

type Rep VP Source # 
Instance details

Defined in Data.Generics.Encode.Internal

type Rep VP

Direct

frameFromList :: [row] -> Frame row Source #

Access

head :: Frame row -> row Source #

take :: Int -> Frame row -> Frame row Source #

Retain n rows

drop :: Int -> Frame row -> Frame row Source #

Drop n rows

Filtering

filter :: (row -> Bool) -> Frame row -> Frame row Source #

filterA :: Applicative f => (row -> f Bool) -> Frame row -> f (Frame row) Source #

This generalizes the list-based filter function.

Grouping

groupWith :: (row -> row -> Bool) -> Frame row -> [Frame row] Source #

groupWith takes row comparison function and a list and returns a list of lists such that the concatenation of the result is equal to the argument. Moreover, each sublist in the result contains only elements that satisfy the comparison.

Zipping

zipWith :: (a -> b -> c) -> Frame a -> Frame b -> Frame c Source #

Scans

scanl :: (b -> a -> b) -> b -> Frame a -> Frame b Source #

Left-associative scan

scanr :: (a -> b -> b) -> b -> Frame a -> Frame b Source #

Right-associative scan

Data tidying

spreadWith Source #

Arguments

:: (TrieKey k, Foldable t, Ord k, Ord v) 
=> (v -> k) 
-> k

"key" key

-> k

"value" key

-> t (Row k v)

input dataframe

-> Frame (Row k v) 

spreadWith moves the unique values of a key column into the column names, spreading the values of a value column across the new columns.

gatherWith Source #

Arguments

:: (Foldable t, Ord k, TrieKey k) 
=> (k -> v) 
-> Set k

set of keys to gather

-> k

"key" key

-> k

"value" key

-> t (Row k v)

input dataframe

-> Frame (Row k v) 

gatherWith moves column names into a "key" column, gathering the column values into a single "value" column

Relational operations

groupBy Source #

Arguments

:: (Foldable t, TrieKey k, Eq k, Ord v) 
=> k

Key to group by

-> t (Row k v)

A 'Frame (GTR.Row k v) can be used here

-> Map v (Frame (Row k v)) 

GROUP BY : given a key and a table that uses it, split the table in multiple tables, one per value taken by the key.

>>> numRows <$> (HM.lookup "129" $ groupBy "id.0" t0)
Just 2

innerJoin Source #

Arguments

:: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) 
=> k

Key into the first table

-> k

Key into the second table

-> t (Row k v)

First dataframe

-> t (Row k v)

Second dataframe

-> Frame (Row k v) 

INNER JOIN : given two dataframes and one key from each, compute the inner join using the keys as relations.

>>> head t0
[("id.0","129"),("qty","1"),("item","book")]
>>> head t1
[("id.1","129"),("price","100")]
>>> head $ innerJoin "id.0" "id.1" t0 t1
[("id.1","129"),("id.0","129"),("qty","5"),("item","book"),("price","100")]

leftOuterJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v) Source #

LEFT (OUTER) JOIN : given two dataframes and one key from each, compute the left outer join using the keys as relations.

Vector-related

toVector :: Frame row -> Vector row Source #

Produce a Vector of rows

fromVector :: Vector row -> Frame row Source #

Produce a Frame from a Vector of rows

Row

data Row k v Source #

A Row type is internally a Trie:

  • Fast random access
  • Fast set operations
  • Supports missing elements
Instances
TrieKey k => Functor (Row k) Source # 
Instance details

Defined in Heidi.Data.Row.GenericTrie

Methods

fmap :: (a -> b) -> Row k a -> Row k b #

(<$) :: a -> Row k b -> Row k a #

TrieKey k => Foldable (Row k) Source # 
Instance details

Defined in Heidi.Data.Row.GenericTrie

Methods

fold :: Monoid m => Row k m -> m #

foldMap :: Monoid m => (a -> m) -> Row k a -> m #

foldr :: (a -> b -> b) -> b -> Row k a -> b #

foldr' :: (a -> b -> b) -> b -> Row k a -> b #

foldl :: (b -> a -> b) -> b -> Row k a -> b #

foldl' :: (b -> a -> b) -> b -> Row k a -> b #

foldr1 :: (a -> a -> a) -> Row k a -> a #

foldl1 :: (a -> a -> a) -> Row k a -> a #

toList :: Row k a -> [a] #

null :: Row k a -> Bool #

length :: Row k a -> Int #

elem :: Eq a => a -> Row k a -> Bool #

maximum :: Ord a => Row k a -> a #

minimum :: Ord a => Row k a -> a #

sum :: Num a => Row k a -> a #

product :: Num a => Row k a -> a #

TrieKey k => Traversable (Row k) Source # 
Instance details

Defined in Heidi.Data.Row.GenericTrie

Methods

traverse :: Applicative f => (a -> f b) -> Row k a -> f (Row k b) #

sequenceA :: Applicative f => Row k (f a) -> f (Row k a) #

mapM :: Monad m => (a -> m b) -> Row k a -> m (Row k b) #

sequence :: Monad m => Row k (m a) -> m (Row k a) #

(TrieKey k, Eq k, Eq v) => Eq (Row k v) Source # 
Instance details

Defined in Heidi.Data.Row.GenericTrie

Methods

(==) :: Row k v -> Row k v -> Bool #

(/=) :: Row k v -> Row k v -> Bool #

(TrieKey k, Eq k, Eq v, Ord k, Ord v) => Ord (Row k v) Source # 
Instance details

Defined in Heidi.Data.Row.GenericTrie

Methods

compare :: Row k v -> Row k v -> Ordering #

(<) :: Row k v -> Row k v -> Bool #

(<=) :: Row k v -> Row k v -> Bool #

(>) :: Row k v -> Row k v -> Bool #

(>=) :: Row k v -> Row k v -> Bool #

max :: Row k v -> Row k v -> Row k v #

min :: Row k v -> Row k v -> Row k v #

(TrieKey k, Show k, Show v) => Show (Row k v) Source # 
Instance details

Defined in Heidi.Data.Row.GenericTrie

Methods

showsPrec :: Int -> Row k v -> ShowS #

show :: Row k v -> String #

showList :: [Row k v] -> ShowS #

Construction

rowFromList :: TrieKey k => [(k, v)] -> Row k v Source #

Construct a Row from a list of key-element pairs.

>>> lookup 3 (rowFromList [(3,'a'),(4,'b')])
Just 'a'
>>> lookup 6 (rowFromList [(3,'a'),(4,'b')])
Nothing

Access

toList :: TrieKey k => Row k v -> [(k, v)] Source #

Access the key-value pairs contained in the Row

keys :: TrieKey k => Row k v -> [k] Source #

List the keys of a given row

>>> keys row0
[0,3]

Filtering

delete Source #

Arguments

:: TrieKey k 
=> k

Key to remove

-> Row k v 
-> Row k v 

Returns a new Row that doesn't have a given key-value pair

filterWithKey :: TrieKey k => (k -> v -> Bool) -> Row k v -> Row k v Source #

Filter a row by applying a predicate to its keys and corresponding elements.

NB : filtering _retains_ the elements that satisfy the predicate.

filterWithKeyPrefix Source #

Arguments

:: (TrieKey a, Eq a) 
=> [a]

key prefix

-> Row [a] v 
-> Row [a] v 

Retains the entries for which the given list is a prefix of the indexing key

filterWithKeyAny :: (TrieKey a, Eq a) => a -> Row [a] v -> Row [a] v Source #

Retains the entries for which the given item appears at any position in the indexing key

deleteMany :: (TrieKey k, Foldable t) => t k -> Row k v -> Row k v Source #

Produce a new Row such that its keys do _not_ belong to a certain set.

Partitioning

partitionWithKey Source #

Arguments

:: TrieKey k 
=> (k -> v -> Bool)

predicate

-> Row k v 
-> (Row k v, Row k v) 

Partition a Row into two new ones, such as the elements that satisfy the predicate will end up in the _left_ row.

partitionWithKeyPrefix Source #

Arguments

:: (TrieKey a, Eq a) 
=> [a]

key prefix

-> Row [a] v 
-> (Row [a] v, Row [a] v) 

Uses partitionWithKey internally

Lookup

lookup :: TrieKey k => k -> Row k v -> Maybe v Source #

Lookup the value stored at a given key in a row

>>> lookup 0 row0
Just 'a'
>>> lookup 1 row0
Nothing

(!:) :: TrieKey k => k -> (a -> Bool) -> Row k a -> Bool Source #

Inline synonym for elemSatisfies

elemSatisfies :: TrieKey k => (a -> Bool) -> k -> Row k a -> Bool Source #

Looks up a key from a row and applies a predicate to its value (if this is found). If no value is found at that key the function returns False.

This function is meant to be used as first argument to filter.

>>> elemSatisfies (== 'a') 0 row0
True
>>> elemSatisfies (== 'a') 42 row0
False

Lookup utilities

maybeEmpty :: TrieKey k => Maybe (Row k v) -> Row k v Source #

Returns an empty row if the argument is Nothing.

Comparison by lookup

eqByLookup :: (TrieKey k, Eq k, Eq a) => k -> Row k a -> Row k a -> Maybe Bool Source #

Compares two rows by the values indexed at a specific key.

Returns Nothing if the key is not present in either row.

eqByLookups :: (Foldable t, TrieKey k, Eq k, Eq a) => t k -> Row k a -> Row k a -> Maybe Bool Source #

Compares two rows by the values indexed at a set of keys.

Returns Nothing if a key in either row is not present.

compareByLookup :: (TrieKey k, Eq k, Ord a) => k -> Row k a -> Row k a -> Maybe Ordering Source #

Compares for ordering two rows by the values indexed at a specific key.

Returns Nothing if the key is not present in either row.

Set operations

union :: TrieKey k => Row k v -> Row k v -> Row k v Source #

Set union of two rows

>>> keys $ union row0 row1
[0,1,3,666]

unionWith :: TrieKey k => (v -> v -> v) -> Row k v -> Row k v -> Row k v Source #

Set union of two rows, using a combining function for equal keys

intersection :: TrieKey k => Row k v -> Row k b -> Row k v Source #

Set intersection of two rows

intersectionWith :: TrieKey k => (a -> b -> v) -> Row k a -> Row k b -> Row k v Source #

Set intersections of two rows, using a combining function for equal keys

Maps

mapWithKey :: TrieKey k => (k -> a -> b) -> Row k a -> Row k b Source #

Map over all elements with a function of both the key and the value

Folds

foldWithKey :: TrieKey k => (k -> a -> r -> r) -> r -> Row k a -> r Source #

Fold over a row with a function of both key and value

keysOnly :: (TrieKey k, Foldable f) => f (Row k v) -> Row k () Source #

Takes the union of a Foldable container of Rows and discards the values

Traversals

traverseWithKey :: (Applicative f, TrieKey k) => (k -> a -> f b) -> Row k a -> f (Row k b) Source #

Traverse a Row using a function of both the key and the element.

Lenses

int :: TrieKey k => k -> Traversal' (Row k VP) Int Source #

Decode a Int from the given column index

bool :: TrieKey k => k -> Traversal' (Row k VP) Bool Source #

Decode a Bool from the given column index

float :: TrieKey k => k -> Traversal' (Row k VP) Float Source #

Decode a Float from the given column index

double :: TrieKey k => k -> Traversal' (Row k VP) Double Source #

Decode a Double from the given column index

char :: TrieKey k => k -> Traversal' (Row k VP) Char Source #

Decode a Char from the given column index

string :: TrieKey k => k -> Traversal' (Row k VP) String Source #

Decode a String from the given column index

text :: TrieKey k => k -> Traversal' (Row k VP) Text Source #

Decode a Text from the given column index

scientific :: TrieKey k => k -> Traversal' (Row k VP) Scientific Source #

Decode a Scientific from the given column index

oneHot :: TrieKey k => k -> Traversal' (Row k VP) (OneHot Int) Source #

Decode a OneHot from the given column index

Lens combinators

at :: TrieKey k => k -> Lens' (Row k a) (Maybe a) Source #

Focus on a given column

keep Source #

Arguments

:: Getting Any row a 
-> (a -> b)

e.g. a predicate

-> row 
-> Bool 

Helper for filtering Frames

e.g.

>>> :t \k -> keep (text k) (== "hello")
  :: GT.TrieKey k => k -> Row k VP -> Bool

Combinators for list-indexed rows

atPrefix Source #

Arguments

:: (TrieKey k, Eq k) 
=> [k]

key prefix of the columns of interest

-> Lens' (Row [k] v) [v] 

atPrefix : a Lens' that takes a key prefix and relates a row having lists as keys and the subset of columns corresponding to keys having that prefix

eachPrefixed Source #

Arguments

:: (TrieKey k, Eq k) 
=> [k]

key prefix of the columns of interest

-> Traversal' (Row [k] v) v 

Focus on all elements that share a common key prefix

e.g.

>>> :t k -> toListOf (eachPrefixed k . vpBool)
(GT.TrieKey k, Eq k) => [k] -> Row [k] VP -> [Bool]

foldPrefixed Source #

Arguments

:: (TrieKey k, Eq k, Monoid r) 
=> [k]

key prefix of the columns of interest

-> Getting r (Row [k] v) v 

Extract all elements that share a common key prefix into a monoidal value (e.g. a list)

Encode internals

tcTyN :: TC -> String Source #

Type name

tcTyCon :: TC -> String Source #

Type constructor

mkTyN :: String -> TC Source #

Create a fake TC with the given string as type name

mkTyCon :: String -> TC Source #

Create a fake TC with the given string as type constructor