Frames-0.4.0: Data frames For working with tabular data files

Safe HaskellNone
LanguageHaskell2010

Frames.InCore

Description

Efficient in-memory (in-core) storage of tabular data.

Synopsis

Documentation

type family VectorFor t :: * -> * Source #

The most efficient vector type for each column data type.

type VectorMFor a = Mutable (VectorFor a) Source #

The mutable version of VectorFor a particular type.

initialCapacity :: Int Source #

Since we stream into the in-memory representation, we use an exponential growth strategy to resize arrays as more data is read in. This is the initial capacity of each column.

type family VectorMs m rs where ... Source #

Mutable vector types for each column in a row.

Equations

VectorMs m '[] = '[] 
VectorMs m ((s :-> a) ': rs) = (s :-> VectorMFor a (PrimState m) a) ': VectorMs m rs 

type family Vectors rs where ... Source #

Immutable vector types for each column in a row.

Equations

Vectors '[] = '[] 
Vectors ((s :-> a) ': rs) = (s :-> VectorFor a a) ': Vectors rs 

class RecVec rs where Source #

Tooling to allocate, grow, write to, freeze, and index into records of vectors.

Minimal complete definition

allocRec, freezeRec, growRec, writeRec, indexRec, produceRec

Methods

allocRec :: PrimMonad m => proxy rs -> Int -> m (Record (VectorMs m rs)) Source #

freezeRec :: PrimMonad m => proxy rs -> Int -> Record (VectorMs m rs) -> m (Record (Vectors rs)) Source #

growRec :: PrimMonad m => proxy rs -> Record (VectorMs m rs) -> m (Record (VectorMs m rs)) Source #

writeRec :: PrimMonad m => proxy rs -> Int -> Record (VectorMs m rs) -> Record rs -> m () Source #

indexRec :: proxy rs -> Int -> Record (Vectors rs) -> Record rs Source #

produceRec :: proxy rs -> Record (Vectors rs) -> Rec ((->) Int) rs Source #

Instances

RecVec ([] *) Source # 

Methods

allocRec :: PrimMonad m => proxy [*] -> Int -> m (Record (VectorMs m [*])) Source #

freezeRec :: PrimMonad m => proxy [*] -> Int -> Record (VectorMs m [*]) -> m (Record (Vectors [*])) Source #

growRec :: PrimMonad m => proxy [*] -> Record (VectorMs m [*]) -> m (Record (VectorMs m [*])) Source #

writeRec :: PrimMonad m => proxy [*] -> Int -> Record (VectorMs m [*]) -> Record [*] -> m () Source #

indexRec :: proxy [*] -> Int -> Record (Vectors [*]) -> Record [*] Source #

produceRec :: proxy [*] -> Record (Vectors [*]) -> Rec * ((LiftedRep -> LiftedRep) Int) [*] Source #

(MVector (VectorMFor a) a, Vector (VectorFor a) a, RecVec rs) => RecVec ((:) * ((:->) s a) rs) Source # 

Methods

allocRec :: PrimMonad m => proxy ((* ': (s :-> a)) rs) -> Int -> m (Record (VectorMs m ((* ': (s :-> a)) rs))) Source #

freezeRec :: PrimMonad m => proxy ((* ': (s :-> a)) rs) -> Int -> Record (VectorMs m ((* ': (s :-> a)) rs)) -> m (Record (Vectors ((* ': (s :-> a)) rs))) Source #

growRec :: PrimMonad m => proxy ((* ': (s :-> a)) rs) -> Record (VectorMs m ((* ': (s :-> a)) rs)) -> m (Record (VectorMs m ((* ': (s :-> a)) rs))) Source #

writeRec :: PrimMonad m => proxy ((* ': (s :-> a)) rs) -> Int -> Record (VectorMs m ((* ': (s :-> a)) rs)) -> Record ((* ': (s :-> a)) rs) -> m () Source #

indexRec :: proxy ((* ': (s :-> a)) rs) -> Int -> Record (Vectors ((* ': (s :-> a)) rs)) -> Record ((* ': (s :-> a)) rs) Source #

produceRec :: proxy ((* ': (s :-> a)) rs) -> Record (Vectors ((* ': (s :-> a)) rs)) -> Rec * ((LiftedRep -> LiftedRep) Int) ((* ': (s :-> a)) rs) Source #

inCoreSoA :: forall m rs. (PrimMonad m, RecVec rs) => Producer (Record rs) m () -> m (Int, Rec ((->) Int) rs) Source #

Stream a finite sequence of rows into an efficient in-memory representation for further manipulation. Each column of the input table will be stored optimally based on its type, making use of the resulting generators a matter of indexing into a densely packed representation. Returns the number of rows and a record of column indexing functions. See toAoS to convert the result to a Frame which provides an easier-to-use function that indexes into the table in a row-major fashion.

inCoreAoS :: (PrimMonad m, RecVec rs) => Producer (Record rs) m () -> m (FrameRec rs) Source #

Stream a finite sequence of rows into an efficient in-memory representation for further manipulation. Each column of the input table will be stored optimally based on its type, making use of the resulting generators a matter of indexing into a densely packed representation. Returns a Frame that provides a function to index into the table.

inCoreAoS' :: (PrimMonad m, RecVec rs) => (Rec ((->) Int) rs -> Rec ((->) Int) ss) -> Producer (Record rs) m () -> m (FrameRec ss) Source #

Like inCoreAoS, but applies the provided function to the record of columns before building the Frame.

toAoS :: Int -> Rec ((->) Int) rs -> FrameRec rs Source #

Convert a structure-of-arrays to an array-of-structures. This can simplify usage of an in-memory representation.

inCore :: forall m n rs. (PrimMonad m, RecVec rs, Monad n) => Producer (Record rs) m () -> m (Producer (Record rs) n ()) Source #

Stream a finite sequence of rows into an efficient in-memory representation for further manipulation. Each column of the input table will be stored optimally based on its type, making use of the resulting generator a matter of indexing into a densely packed representation.

toFrame :: (Foldable f, RecVec rs) => f (Record rs) -> Frame (Record rs) Source #

Build a Frame from a collection of Records using efficient column-based storage.

filterFrame :: RecVec rs => (Record rs -> Bool) -> FrameRec rs -> FrameRec rs Source #

Keep only those rows of a FrameRec that satisfy a predicate.