{-# LANGUAGE DeriveFunctor #-} {-# LANGUAGE FlexibleInstances #-} -- | Core frame types and functions module Analyze.RFrame where import Analyze.Common import Analyze.Decoding (Decoder (..), decoderKeys, runDecoder) import qualified Control.Foldl as F import Control.Monad (join) import Control.Monad.Catch (MonadThrow (..)) import qualified Data.Aeson as A import Data.HashMap.Strict (HashMap) import qualified Data.HashMap.Strict as HM import Data.HashSet (HashSet) import qualified Data.HashSet as HS import Data.Text (Text) import qualified Data.Text as T import Data.Vector (Vector) import qualified Data.Vector as V -- | In-memory row-oriented frame with columns named by `k` and values by `v` data RFrame k v = RFrame { -- | Ordered vector of column names _rframeKeys :: !(Vector k) , -- | Quick lookup from column name to column index _rframeLookup :: !(HashMap k Int) , -- | Vector of rows. Each element should be the length of number of columns. _rframeData :: !(Vector (Vector v)) } deriving (Eq, Show, Functor) -- | A simpler 'RFrame' for updates data RFrameUpdate k v = RFrameUpdate { -- | Ordered vector of column names _rframeUpdateKeys :: !(Vector k) , -- | Vector of rows. _rframeUpdateData :: !(Vector (Vector v)) } deriving (Eq, Show, Functor) -- | Alias for a function to be applied to each row type RFrameMap k v a = Vector k -> HashMap k Int -> Int -> Vector v -> a -- | Alias for a row filter type RFrameFilter k v = RFrameMap k v Bool -- | An empty frame with no rows or columns empty :: RFrame k v empty = RFrame V.empty HM.empty V.empty -- | Build an 'RFrame' from an 'RFrameUpdate'. -- Throws on duplicate keys. fromUpdate :: (Data k, MonadThrow m) => RFrameUpdate k v -> m (RFrame k v) fromUpdate (RFrameUpdate ks vs) = checkForDupes ks >> pure (RFrame ks (makeLookup ks) vs) -- | Build an 'RFrameUpdate' from an 'RFrame' toUpdate :: Data k => RFrame k v -> RFrameUpdate k v toUpdate (RFrame ks _ vs) = RFrameUpdate ks vs -- | Number of columns in an 'RFrame' numCols :: RFrame k v -> Int numCols (RFrame ks _ _) = V.length ks -- | Number of rows in an 'RFrame' numRows :: RFrame k v -> Int numRows (RFrame _ _ vs) = V.length vs -- | Project to the given column col :: (Data k, MonadThrow m) => k -> RFrame k v -> m (Vector v) col k (RFrame _ look vs) = V.mapM (\v -> runLookup look v k) vs -- | Decode by row. Each element of the returned vector may fail on decoding error -- so flatten manually or use 'flatDecode'. decode :: (Data k, MonadThrow m) => Decoder m k v a -> RFrame k v -> m (Vector (m a)) decode decoder rframe@(RFrame ks look vs) = checkSubset required keySet >> pure decoded where keySet = HS.fromList (V.toList ks) required = decoderKeys decoder decoded = runDecoder decoder . runLookup look <$> vs -- | An auto-flattened version of 'decode'. flatDecode :: (Data k, MonadThrow m) => Decoder m k v a -> RFrame k v -> m (Vector a) flatDecode decoder rframe = join $ sequence <$> decode decoder rframe -- | Filter an 'RFrame' by row filter :: Data k => RFrameFilter k v -> RFrame k v -> RFrame k v filter p (RFrame ks look vs) = RFrame ks look vs' where vs' = V.ifilter (p ks look) vs -- | Update row-wise, adding or replacing values per-column. -- Retains the existing column order, appending new columns. -- Throws on row length mismatch or duplicate columns in the update. update :: (Data k, MonadThrow m) => RFrameUpdate k v -> RFrame k v -> m (RFrame k v) update (RFrameUpdate uks uvs) (RFrame fks look fvs) = do let fSize = V.length fvs uSize = V.length uvs if fSize /= uSize then throwM (RowSizeMismatch fSize uSize) else do checkForDupes uks let kis = mergeKeys fks uks ks' = (\(k, _, _) -> k) <$> kis look' = makeLookup ks' vs' = V.zipWith (runIndexedLookup kis) fvs uvs return (RFrame ks' look' vs') -- | Split columns in an 'RFrame' by a predicate. splitCols :: Data k => (k -> Bool) -> RFrame k v -> (RFrame k v, RFrame k v) splitCols p (RFrame ks look vs) = (RFrame keepKs keepLook keepVs, RFrame dropKs dropLook dropVs) where (keepKs, dropKs) = V.partition p ks keepLook = makeLookup keepKs keepVs = reorder keepKs look <$> vs dropLook = makeLookup dropKs dropVs = reorder dropKs look <$> vs -- | Drop columns in an 'RFrame' by a predicate. dropCols :: Data k => (k -> Bool) -> RFrame k v -> RFrame k v dropCols p frame = snd (splitCols p frame) -- | Keep columns in an 'RFrame' by a predicate. keepCols :: Data k => (k -> Bool) -> RFrame k v -> RFrame k v keepCols p frame = fst (splitCols p frame) -- | Appends rows to an 'RFrame', retaining column order of the first. -- Throws on column mismatch. appendRows :: (Data k, MonadThrow m) => RFrame k v -> RFrame k v -> m (RFrame k v) appendRows (RFrame ks0 look0 vs0) (RFrame ks1 look1 vs1) = do checkReorder ks0 ks1 let vs1' = reorder ks0 look1 vs1 return (RFrame ks0 look0 (vs0 V.++ vs1')) -- | Appends columns to an 'RFrame', retaining column order of the first. extendCols :: (Data k, MonadThrow m) => RFrame k v -> RFrame k v -> m (RFrame k v) extendCols f g = update (toUpdate g) f -- | Takes first 'n' rows of an 'RFrame'. takeRows :: Int -> RFrame k v -> RFrame k v takeRows n (RFrame ks look vs) = RFrame ks look (V.take n vs)