-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Tidy data in Haskell -- -- Tidy data in Haskell, via generics. @package heidi @version 0.3.0 module Heidi.Data.Frame.Algorithms.GenericTrie -- | Merge two frames by taking the set union of the columns unionColsWith :: (Eq k, TrieKey k) => (v -> v -> v) -> Frame (Row k v) -> Frame (Row k v) -> Frame (Row k v) -- | spreadWith moves the unique values of a key column into the -- column names, spreading the values of a value column across the new -- columns. spreadWith :: (TrieKey k, Foldable t, Ord k, Ord v) => (v -> k) -> k -> k -> t (Row k v) -> Frame (Row k v) -- | gatherWith moves column names into a "key" column, gathering -- the column values into a single "value" column gatherWith :: (Foldable t, Ord k, TrieKey k) => (k -> v) -> Set k -> k -> k -> t (Row k v) -> Frame (Row k v) -- | GROUP BY : given a key and a table that uses it, split the table in -- multiple tables, one per value taken by the key. -- --
-- >>> numRows <$> (HM.lookup "129" $ groupBy "id.0" t0) -- Just 2 --groupBy :: (Foldable t, TrieKey k, Eq k, Ord v) => k -> t (Row k v) -> Map v (Frame (Row k v)) -- | INNER JOIN : given two dataframes and one key from each, compute the -- inner join using the keys as relations. -- --
-- >>> head t0
-- [("id.0","129"),("qty","1"),("item","book")]
--
--
--
-- >>> head t1
-- [("id.1","129"),("price","100")]
--
--
--
-- >>> head $ innerJoin "id.0" "id.1" t0 t1
-- [("id.1","129"),("id.0","129"),("qty","5"),("item","book"),("price","100")]
--
innerJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v)
-- | LEFT (OUTER) JOIN : given two dataframes and one key from each,
-- compute the left outer join using the keys as relations.
leftOuterJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v)
-- | The purpose of this library is to make it easy to analyze collections
-- of Haskell values; users encode their data collections (lists,
-- maps and so on) into dataframes, and use functions provided by
-- heidi for manipulation.
module Heidi
-- | A Frame is a list of rows.
data Frame row
-- | Populate a Frame with the generic encoding of the row data
--
-- For example, a list of records having two fields each will produce a
-- dataframe with two columns, having the record field names as column
-- labels.
--
--
-- data P1 = P1 Int Char deriving (Eq, Show, Generic)
-- instance Heidi P1
--
-- data P2 = P2 { p2i :: Int, p2c :: Char } deriving (Eq, Show, Generic)
-- instance Heidi P2
--
-- data Q = Q (Maybe Int) (Either Double Char) deriving (Eq, Show, Generic)
-- instance Heidi Q
--
--
--
-- >>> encode [P1 42 'z']
-- Frame {tableRows = [([TC "P1" "_0"],VPInt 42),([TC "P1" "_1"],VPChar 'z')] :| []}
--
--
--
-- >>> encode [P2 42 'z']
-- Frame {tableRows = [([TC "P2" "p2c"],VPChar 'z'),([TC "P2" "p2i"],VPInt 42)] :| []}
--
--
-- Test using Maybe and Either record fields :
--
--
-- >>> encode [Q (Just 42) (Left 1.2), Q Nothing (Right 'b')]
-- Frame {tableRows = [([TC "Q" "_0",TC "Maybe" "Just"],VPInt 42),([TC "Q" "_1",TC "Either" "Left"],VPDouble 1.2)] :| [[([TC "Q" "_1",TC "Either" "Right"],VPChar 'b')]]}
--
--
-- NB: as the last example above demonstrates, Nothing values are
-- not inserted in the rows, which can be used to encode missing data
-- features.
encode :: (Foldable t, Heidi a) => t a -> Frame (Row [TC] VP)
-- | Single interface to the library.
--
-- This typeclass provides all the machinery for encoding Haskell values
-- into dataframes.
--
-- NOTE: Your datatypes only need to possess a Generic instance,
-- to which you just need to add an empty instance of Heidi.
--
-- example:
--
--
-- {-# language DeriveGenerics, DeriveAnyClass #-}
--
-- data A = A Int Char deriving (Generic, Heidi)
--
class Heidi a
-- | A (type, constructor) name pair
data TC
-- | Primitive types
--
-- NB : this is just a convenience for unityping the dataframe contents,
-- but it should not be exposed to the library users
data VP
frameFromList :: [row] -> Frame row
head :: Frame row -> row
-- | Retain n rows
take :: Int -> Frame row -> Frame row
-- | Drop n rows
drop :: Int -> Frame row -> Frame row
numRows :: Frame row -> Int
filter :: (row -> Bool) -> Frame row -> Frame row
-- | This generalizes the list-based filter function.
filterA :: Applicative f => (row -> f Bool) -> Frame row -> f (Frame row)
-- | groupWith takes row comparison function and a list and returns
-- a list of lists such that the concatenation of the result is equal to
-- the argument. Moreover, each sublist in the result contains only
-- elements that satisfy the comparison.
groupWith :: (row -> row -> Bool) -> Frame row -> [Frame row]
zipWith :: (a -> b -> c) -> Frame a -> Frame b -> Frame c
-- | Left-associative scan
scanl :: (b -> a -> b) -> b -> Frame a -> Frame b
-- | Right-associative scan
scanr :: (a -> b -> b) -> b -> Frame a -> Frame b
-- | spreadWith moves the unique values of a key column into the
-- column names, spreading the values of a value column across the new
-- columns.
spreadWith :: (TrieKey k, Foldable t, Ord k, Ord v) => (v -> k) -> k -> k -> t (Row k v) -> Frame (Row k v)
-- | gatherWith moves column names into a "key" column, gathering
-- the column values into a single "value" column
gatherWith :: (Foldable t, Ord k, TrieKey k) => (k -> v) -> Set k -> k -> k -> t (Row k v) -> Frame (Row k v)
-- | GROUP BY : given a key and a table that uses it, split the table in
-- multiple tables, one per value taken by the key.
--
-- -- >>> numRows <$> (HM.lookup "129" $ groupBy "id.0" t0) -- Just 2 --groupBy :: (Foldable t, TrieKey k, Eq k, Ord v) => k -> t (Row k v) -> Map v (Frame (Row k v)) -- | INNER JOIN : given two dataframes and one key from each, compute the -- inner join using the keys as relations. -- --
-- >>> head t0
-- [("id.0","129"),("qty","1"),("item","book")]
--
--
--
-- >>> head t1
-- [("id.1","129"),("price","100")]
--
--
--
-- >>> head $ innerJoin "id.0" "id.1" t0 t1
-- [("id.1","129"),("id.0","129"),("qty","5"),("item","book"),("price","100")]
--
innerJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v)
-- | LEFT (OUTER) JOIN : given two dataframes and one key from each,
-- compute the left outer join using the keys as relations.
leftOuterJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v)
-- | Produce a Vector of rows
toVector :: Frame row -> Vector row
-- | Produce a Frame from a Vector of rows
fromVector :: Vector row -> Frame row
-- | A Row type is internally a Trie:
--
-- -- >>> lookup 3 (rowFromList [(3,'a'),(4,'b')]) -- Just 'a' -- -- >>> lookup 6 (rowFromList [(3,'a'),(4,'b')]) -- Nothing --rowFromList :: TrieKey k => [(k, v)] -> Row k v -- | Access the key-value pairs contained in the Row toList :: TrieKey k => Row k v -> [(k, v)] -- | List the keys of a given row -- --
-- >>> keys row0 -- [0,3] --keys :: TrieKey k => Row k v -> [k] -- | Returns a new Row that doesn't have a given key-value pair delete :: TrieKey k => k -> Row k v -> Row k v -- | Filter a row by applying a predicate to its keys and corresponding -- elements. -- -- NB : filtering _retains_ the elements that satisfy the predicate. filterWithKey :: TrieKey k => (k -> v -> Bool) -> Row k v -> Row k v -- | Retains the entries for which the given list is a prefix of the -- indexing key filterWithKeyPrefix :: (TrieKey a, Eq a) => [a] -> Row [a] v -> Row [a] v -- | Retains the entries for which the given item appears at any position -- in the indexing key filterWithKeyAny :: (TrieKey a, Eq a) => a -> Row [a] v -> Row [a] v -- | Produce a new Row such that its keys do _not_ belong to a -- certain set. deleteMany :: (TrieKey k, Foldable t) => t k -> Row k v -> Row k v -- | Partition a Row into two new ones, such as the elements that -- satisfy the predicate will end up in the _left_ row. partitionWithKey :: TrieKey k => (k -> v -> Bool) -> Row k v -> (Row k v, Row k v) -- | Uses partitionWithKey internally partitionWithKeyPrefix :: (TrieKey a, Eq a) => [a] -> Row [a] v -> (Row [a] v, Row [a] v) -- | Lookup the value stored at a given key in a row -- --
-- >>> lookup 0 row0 -- Just 'a' -- -- >>> lookup 1 row0 -- Nothing --lookup :: TrieKey k => k -> Row k v -> Maybe v -- | Inline synonym for elemSatisfies (!:) :: TrieKey k => k -> (a -> Bool) -> Row k a -> Bool -- | Looks up a key from a row and applies a predicate to its value (if -- this is found). If no value is found at that key the function returns -- False. -- -- This function is meant to be used as first argument to filter. -- --
-- >>> elemSatisfies (== 'a') 0 row0 -- True -- -- >>> elemSatisfies (== 'a') 42 row0 -- False --elemSatisfies :: TrieKey k => (a -> Bool) -> k -> Row k a -> Bool -- | Returns an empty row if the argument is Nothing. maybeEmpty :: TrieKey k => Maybe (Row k v) -> Row k v -- | Compares two rows by the values indexed at a specific key. -- -- Returns Nothing if the key is not present in either row. eqByLookup :: (TrieKey k, Eq k, Eq a) => k -> Row k a -> Row k a -> Maybe Bool -- | Compares two rows by the values indexed at a set of keys. -- -- Returns Nothing if a key in either row is not present. eqByLookups :: (Foldable t, TrieKey k, Eq k, Eq a) => t k -> Row k a -> Row k a -> Maybe Bool -- | Compares for ordering two rows by the values indexed at a specific -- key. -- -- Returns Nothing if the key is not present in either row. compareByLookup :: (TrieKey k, Eq k, Ord a) => k -> Row k a -> Row k a -> Maybe Ordering -- | Set union of two rows -- --
-- >>> keys $ union row0 row1 -- [0,1,3,666] --union :: TrieKey k => Row k v -> Row k v -> Row k v -- | Set union of two rows, using a combining function for equal keys unionWith :: TrieKey k => (v -> v -> v) -> Row k v -> Row k v -> Row k v -- | Set intersection of two rows intersection :: TrieKey k => Row k v -> Row k b -> Row k v -- | Set intersections of two rows, using a combining function for equal -- keys intersectionWith :: TrieKey k => (a -> b -> v) -> Row k a -> Row k b -> Row k v -- | Map over all elements with a function of both the key and the value mapWithKey :: TrieKey k => (k -> a -> b) -> Row k a -> Row k b -- | Fold over a row with a function of both key and value foldWithKey :: TrieKey k => (k -> a -> r -> r) -> r -> Row k a -> r -- | Takes the union of a Foldable container of Rows and discards -- the values keysOnly :: (TrieKey k, Foldable f) => f (Row k v) -> Row k () -- | Traverse a Row using a function of both the key and the -- element. traverseWithKey :: (Applicative f, TrieKey k) => (k -> a -> f b) -> Row k a -> f (Row k b) -- | Decode a Int from the given column index int :: TrieKey k => k -> Traversal' (Row k VP) Int -- | Decode a Bool from the given column index bool :: TrieKey k => k -> Traversal' (Row k VP) Bool -- | Decode a Float from the given column index float :: TrieKey k => k -> Traversal' (Row k VP) Float -- | Decode a Double from the given column index double :: TrieKey k => k -> Traversal' (Row k VP) Double -- | Decode a Char from the given column index char :: TrieKey k => k -> Traversal' (Row k VP) Char -- | Decode a String from the given column index string :: TrieKey k => k -> Traversal' (Row k VP) String -- | Decode a Text from the given column index text :: TrieKey k => k -> Traversal' (Row k VP) Text -- | Decode a Scientific from the given column index scientific :: TrieKey k => k -> Traversal' (Row k VP) Scientific -- | Decode a OneHot from the given column index oneHot :: TrieKey k => k -> Traversal' (Row k VP) (OneHot Int) -- | Lookup a real number at the given index. -- -- Matches Double, Float, Int and Scientific -- values. real :: TrieKey k => k -> Row k VP -> Maybe Double -- | Look up a text string at the given index. -- -- Matches String and Text values. txt :: TrieKey k => k -> Row k VP -> Maybe Text -- | Focus on a given column -- -- NB : setting a Nothing value removes the entry at :: TrieKey k => k -> Lens' (Row k a) (Maybe a) -- | Helper for filtering Frames -- -- e.g. -- --
-- >>> :t \k -> keep (text k) (== "hello") -- :: GT.TrieKey k => k -> Row k VP -> Bool --keep :: Getting Any row a -> (a -> b) -> row -> Bool -- | atPrefix : a Lens' that takes a key prefix and relates a row -- having lists as keys and the subset of columns corresponding to keys -- having that prefix atPrefix :: (TrieKey k, Eq k) => [k] -> Lens' (Row [k] v) [v] -- | Focus on all elements that share a common key prefix -- -- e.g. -- --
-- >>> :t k -> toListOf (eachPrefixed k . vpBool) -- (GT.TrieKey k, Eq k) => [k] -> Row [k] VP -> [Bool] --eachPrefixed :: (TrieKey k, Eq k) => [k] -> Traversal' (Row [k] v) v -- | Extract all elements that share a common key prefix into a monoidal -- value (e.g. a list) foldPrefixed :: (TrieKey k, Eq k, Monoid r) => [k] -> Getting r (Row [k] v) v -- | Type name tcTyN :: TC -> String -- | Type constructor tcTyCon :: TC -> String -- | Create a fake TC with the given string as type name mkTyN :: String -> TC -- | Create a fake TC with the given string as type constructor mkTyCon :: String -> TC -- | 1-hot encoded vector. -- -- This representation is used to encode categorical variables as points -- in a vector space. data OneHot i