-- Hoogle documentation, generated by Haddock -- See Hoogle, http://www.haskell.org/hoogle/ -- | Tidy data in Haskell -- -- Tidy data in Haskell, via generics. @package heidi @version 0.3.0 module Heidi.Data.Frame.Algorithms.GenericTrie -- | Merge two frames by taking the set union of the columns unionColsWith :: (Eq k, TrieKey k) => (v -> v -> v) -> Frame (Row k v) -> Frame (Row k v) -> Frame (Row k v) -- | spreadWith moves the unique values of a key column into the -- column names, spreading the values of a value column across the new -- columns. spreadWith :: (TrieKey k, Foldable t, Ord k, Ord v) => (v -> k) -> k -> k -> t (Row k v) -> Frame (Row k v) -- | gatherWith moves column names into a "key" column, gathering -- the column values into a single "value" column gatherWith :: (Foldable t, Ord k, TrieKey k) => (k -> v) -> Set k -> k -> k -> t (Row k v) -> Frame (Row k v) -- | GROUP BY : given a key and a table that uses it, split the table in -- multiple tables, one per value taken by the key. -- --
--   >>> numRows <$> (HM.lookup "129" $ groupBy "id.0" t0)
--   Just 2
--   
groupBy :: (Foldable t, TrieKey k, Eq k, Ord v) => k -> t (Row k v) -> Map v (Frame (Row k v)) -- | INNER JOIN : given two dataframes and one key from each, compute the -- inner join using the keys as relations. -- --
--   >>> head t0
--   [("id.0","129"),("qty","1"),("item","book")]
--   
-- --
--   >>> head t1
--   [("id.1","129"),("price","100")]
--   
-- --
--   >>> head $ innerJoin "id.0" "id.1" t0 t1
--   [("id.1","129"),("id.0","129"),("qty","5"),("item","book"),("price","100")]
--   
innerJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v) -- | LEFT (OUTER) JOIN : given two dataframes and one key from each, -- compute the left outer join using the keys as relations. leftOuterJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v) -- | The purpose of this library is to make it easy to analyze collections -- of Haskell values; users encode their data collections (lists, -- maps and so on) into dataframes, and use functions provided by -- heidi for manipulation. module Heidi -- | A Frame is a list of rows. data Frame row -- | Populate a Frame with the generic encoding of the row data -- -- For example, a list of records having two fields each will produce a -- dataframe with two columns, having the record field names as column -- labels. -- --
--   data P1 = P1 Int Char deriving (Eq, Show, Generic)
--   instance Heidi P1
--   
--   data P2 = P2 { p2i :: Int, p2c :: Char } deriving (Eq, Show, Generic)
--   instance Heidi P2
--   
--   data Q = Q (Maybe Int) (Either Double Char) deriving (Eq, Show, Generic)
--   instance Heidi Q
--   
-- --
--   >>> encode [P1 42 'z']
--   Frame {tableRows = [([TC "P1" "_0"],VPInt 42),([TC "P1" "_1"],VPChar 'z')] :| []}
--   
-- --
--   >>> encode [P2 42 'z']
--   Frame {tableRows = [([TC "P2" "p2c"],VPChar 'z'),([TC "P2" "p2i"],VPInt 42)] :| []}
--   
-- -- Test using Maybe and Either record fields : -- --
--   >>> encode [Q (Just 42) (Left 1.2), Q Nothing (Right 'b')]
--   Frame {tableRows = [([TC "Q" "_0",TC "Maybe" "Just"],VPInt 42),([TC "Q" "_1",TC "Either" "Left"],VPDouble 1.2)] :| [[([TC "Q" "_1",TC "Either" "Right"],VPChar 'b')]]}
--   
-- -- NB: as the last example above demonstrates, Nothing values are -- not inserted in the rows, which can be used to encode missing data -- features. encode :: (Foldable t, Heidi a) => t a -> Frame (Row [TC] VP) -- | Single interface to the library. -- -- This typeclass provides all the machinery for encoding Haskell values -- into dataframes. -- -- NOTE: Your datatypes only need to possess a Generic instance, -- to which you just need to add an empty instance of Heidi. -- -- example: -- --
--   {-# language DeriveGenerics, DeriveAnyClass #-}
--   
--   data A = A Int Char deriving (Generic, Heidi)
--   
class Heidi a -- | A (type, constructor) name pair data TC -- | Primitive types -- -- NB : this is just a convenience for unityping the dataframe contents, -- but it should not be exposed to the library users data VP frameFromList :: [row] -> Frame row head :: Frame row -> row -- | Retain n rows take :: Int -> Frame row -> Frame row -- | Drop n rows drop :: Int -> Frame row -> Frame row numRows :: Frame row -> Int filter :: (row -> Bool) -> Frame row -> Frame row -- | This generalizes the list-based filter function. filterA :: Applicative f => (row -> f Bool) -> Frame row -> f (Frame row) -- | groupWith takes row comparison function and a list and returns -- a list of lists such that the concatenation of the result is equal to -- the argument. Moreover, each sublist in the result contains only -- elements that satisfy the comparison. groupWith :: (row -> row -> Bool) -> Frame row -> [Frame row] zipWith :: (a -> b -> c) -> Frame a -> Frame b -> Frame c -- | Left-associative scan scanl :: (b -> a -> b) -> b -> Frame a -> Frame b -- | Right-associative scan scanr :: (a -> b -> b) -> b -> Frame a -> Frame b -- | spreadWith moves the unique values of a key column into the -- column names, spreading the values of a value column across the new -- columns. spreadWith :: (TrieKey k, Foldable t, Ord k, Ord v) => (v -> k) -> k -> k -> t (Row k v) -> Frame (Row k v) -- | gatherWith moves column names into a "key" column, gathering -- the column values into a single "value" column gatherWith :: (Foldable t, Ord k, TrieKey k) => (k -> v) -> Set k -> k -> k -> t (Row k v) -> Frame (Row k v) -- | GROUP BY : given a key and a table that uses it, split the table in -- multiple tables, one per value taken by the key. -- --
--   >>> numRows <$> (HM.lookup "129" $ groupBy "id.0" t0)
--   Just 2
--   
groupBy :: (Foldable t, TrieKey k, Eq k, Ord v) => k -> t (Row k v) -> Map v (Frame (Row k v)) -- | INNER JOIN : given two dataframes and one key from each, compute the -- inner join using the keys as relations. -- --
--   >>> head t0
--   [("id.0","129"),("qty","1"),("item","book")]
--   
-- --
--   >>> head t1
--   [("id.1","129"),("price","100")]
--   
-- --
--   >>> head $ innerJoin "id.0" "id.1" t0 t1
--   [("id.1","129"),("id.0","129"),("qty","5"),("item","book"),("price","100")]
--   
innerJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v) -- | LEFT (OUTER) JOIN : given two dataframes and one key from each, -- compute the left outer join using the keys as relations. leftOuterJoin :: (Foldable t, Ord v, TrieKey k, Eq v, Eq k) => k -> k -> t (Row k v) -> t (Row k v) -> Frame (Row k v) -- | Produce a Vector of rows toVector :: Frame row -> Vector row -- | Produce a Frame from a Vector of rows fromVector :: Vector row -> Frame row -- | A Row type is internally a Trie: -- -- data Row k v -- | Construct a Row from a list of key-element pairs. -- --
--   >>> lookup 3 (rowFromList [(3,'a'),(4,'b')])
--   Just 'a'
--   
--   >>> lookup 6 (rowFromList [(3,'a'),(4,'b')])
--   Nothing
--   
rowFromList :: TrieKey k => [(k, v)] -> Row k v -- | Access the key-value pairs contained in the Row toList :: TrieKey k => Row k v -> [(k, v)] -- | List the keys of a given row -- --
--   >>> keys row0
--   [0,3]
--   
keys :: TrieKey k => Row k v -> [k] -- | Returns a new Row that doesn't have a given key-value pair delete :: TrieKey k => k -> Row k v -> Row k v -- | Filter a row by applying a predicate to its keys and corresponding -- elements. -- -- NB : filtering _retains_ the elements that satisfy the predicate. filterWithKey :: TrieKey k => (k -> v -> Bool) -> Row k v -> Row k v -- | Retains the entries for which the given list is a prefix of the -- indexing key filterWithKeyPrefix :: (TrieKey a, Eq a) => [a] -> Row [a] v -> Row [a] v -- | Retains the entries for which the given item appears at any position -- in the indexing key filterWithKeyAny :: (TrieKey a, Eq a) => a -> Row [a] v -> Row [a] v -- | Produce a new Row such that its keys do _not_ belong to a -- certain set. deleteMany :: (TrieKey k, Foldable t) => t k -> Row k v -> Row k v -- | Partition a Row into two new ones, such as the elements that -- satisfy the predicate will end up in the _left_ row. partitionWithKey :: TrieKey k => (k -> v -> Bool) -> Row k v -> (Row k v, Row k v) -- | Uses partitionWithKey internally partitionWithKeyPrefix :: (TrieKey a, Eq a) => [a] -> Row [a] v -> (Row [a] v, Row [a] v) -- | Lookup the value stored at a given key in a row -- --
--   >>> lookup 0 row0
--   Just 'a'
--   
--   >>> lookup 1 row0
--   Nothing
--   
lookup :: TrieKey k => k -> Row k v -> Maybe v -- | Inline synonym for elemSatisfies (!:) :: TrieKey k => k -> (a -> Bool) -> Row k a -> Bool -- | Looks up a key from a row and applies a predicate to its value (if -- this is found). If no value is found at that key the function returns -- False. -- -- This function is meant to be used as first argument to filter. -- --
--   >>> elemSatisfies (== 'a') 0 row0
--   True
--   
--   >>> elemSatisfies (== 'a') 42 row0
--   False
--   
elemSatisfies :: TrieKey k => (a -> Bool) -> k -> Row k a -> Bool -- | Returns an empty row if the argument is Nothing. maybeEmpty :: TrieKey k => Maybe (Row k v) -> Row k v -- | Compares two rows by the values indexed at a specific key. -- -- Returns Nothing if the key is not present in either row. eqByLookup :: (TrieKey k, Eq k, Eq a) => k -> Row k a -> Row k a -> Maybe Bool -- | Compares two rows by the values indexed at a set of keys. -- -- Returns Nothing if a key in either row is not present. eqByLookups :: (Foldable t, TrieKey k, Eq k, Eq a) => t k -> Row k a -> Row k a -> Maybe Bool -- | Compares for ordering two rows by the values indexed at a specific -- key. -- -- Returns Nothing if the key is not present in either row. compareByLookup :: (TrieKey k, Eq k, Ord a) => k -> Row k a -> Row k a -> Maybe Ordering -- | Set union of two rows -- --
--   >>> keys $ union row0 row1
--   [0,1,3,666]
--   
union :: TrieKey k => Row k v -> Row k v -> Row k v -- | Set union of two rows, using a combining function for equal keys unionWith :: TrieKey k => (v -> v -> v) -> Row k v -> Row k v -> Row k v -- | Set intersection of two rows intersection :: TrieKey k => Row k v -> Row k b -> Row k v -- | Set intersections of two rows, using a combining function for equal -- keys intersectionWith :: TrieKey k => (a -> b -> v) -> Row k a -> Row k b -> Row k v -- | Map over all elements with a function of both the key and the value mapWithKey :: TrieKey k => (k -> a -> b) -> Row k a -> Row k b -- | Fold over a row with a function of both key and value foldWithKey :: TrieKey k => (k -> a -> r -> r) -> r -> Row k a -> r -- | Takes the union of a Foldable container of Rows and discards -- the values keysOnly :: (TrieKey k, Foldable f) => f (Row k v) -> Row k () -- | Traverse a Row using a function of both the key and the -- element. traverseWithKey :: (Applicative f, TrieKey k) => (k -> a -> f b) -> Row k a -> f (Row k b) -- | Decode a Int from the given column index int :: TrieKey k => k -> Traversal' (Row k VP) Int -- | Decode a Bool from the given column index bool :: TrieKey k => k -> Traversal' (Row k VP) Bool -- | Decode a Float from the given column index float :: TrieKey k => k -> Traversal' (Row k VP) Float -- | Decode a Double from the given column index double :: TrieKey k => k -> Traversal' (Row k VP) Double -- | Decode a Char from the given column index char :: TrieKey k => k -> Traversal' (Row k VP) Char -- | Decode a String from the given column index string :: TrieKey k => k -> Traversal' (Row k VP) String -- | Decode a Text from the given column index text :: TrieKey k => k -> Traversal' (Row k VP) Text -- | Decode a Scientific from the given column index scientific :: TrieKey k => k -> Traversal' (Row k VP) Scientific -- | Decode a OneHot from the given column index oneHot :: TrieKey k => k -> Traversal' (Row k VP) (OneHot Int) -- | Lookup a real number at the given index. -- -- Matches Double, Float, Int and Scientific -- values. real :: TrieKey k => k -> Row k VP -> Maybe Double -- | Look up a text string at the given index. -- -- Matches String and Text values. txt :: TrieKey k => k -> Row k VP -> Maybe Text -- | Focus on a given column -- -- NB : setting a Nothing value removes the entry at :: TrieKey k => k -> Lens' (Row k a) (Maybe a) -- | Helper for filtering Frames -- -- e.g. -- --
--   >>> :t \k -> keep (text k) (== "hello")
--     :: GT.TrieKey k => k -> Row k VP -> Bool
--   
keep :: Getting Any row a -> (a -> b) -> row -> Bool -- | atPrefix : a Lens' that takes a key prefix and relates a row -- having lists as keys and the subset of columns corresponding to keys -- having that prefix atPrefix :: (TrieKey k, Eq k) => [k] -> Lens' (Row [k] v) [v] -- | Focus on all elements that share a common key prefix -- -- e.g. -- --
--   >>> :t k -> toListOf (eachPrefixed k . vpBool)
--   (GT.TrieKey k, Eq k) => [k] -> Row [k] VP -> [Bool]
--   
eachPrefixed :: (TrieKey k, Eq k) => [k] -> Traversal' (Row [k] v) v -- | Extract all elements that share a common key prefix into a monoidal -- value (e.g. a list) foldPrefixed :: (TrieKey k, Eq k, Monoid r) => [k] -> Getting r (Row [k] v) v -- | Type name tcTyN :: TC -> String -- | Type constructor tcTyCon :: TC -> String -- | Create a fake TC with the given string as type name mkTyN :: String -> TC -- | Create a fake TC with the given string as type constructor mkTyCon :: String -> TC -- | 1-hot encoded vector. -- -- This representation is used to encode categorical variables as points -- in a vector space. data OneHot i