{- Math.Clustering.Hierarchical.Spectral.Load Gregory W. Schwartz Collects the functions pertaining to loading a matrix. -} {-# LANGUAGE BangPatterns #-} {-# LANGUAGE OverloadedStrings #-} module Math.Clustering.Hierarchical.Spectral.Load ( readDenseAdjMatrix , readSparseAdjMatrix -- , readEigenSparseAdjMatrix ) where -- Remote import Control.Monad.Except (runExceptT, ExceptT (..)) import Control.Monad.Managed (with, liftIO, Managed (..)) import Data.Maybe (fromMaybe, catMaybes) import System.IO (Handle (..)) import qualified Data.ByteString.Streaming.Char8 as BS import qualified Data.Csv as CSV -- import qualified Data.Eigen.SparseMatrix as E import qualified Data.Map.Strict as Map import qualified Data.Set as Set import qualified Data.Sparse.Common as SH import qualified Data.Text as T import qualified Data.Vector as V import qualified Numeric.LinearAlgebra as H import qualified Streaming as S import qualified Streaming.Cassava as S import qualified Streaming.Prelude as S import qualified Streaming.With.Lifted as SW -- Local import Math.Clustering.Hierarchical.Spectral.Types -- | Generic error message. errorMsg = error "Not correct format (requires row,column,value)" -- | Parse a row of a label index file. parseRow :: (T.Text, T.Text, Double) -> ((T.Text, T.Text), Double) parseRow (i, j, v) = ((i, j), v) -- | Ignore the disconnected vertices, not used (rather use very small weight). ignoreDisconnected :: V.Vector T.Text -> H.Matrix Double -> (V.Vector T.Text, H.Matrix Double) ignoreDisconnected items mat = (newItems, newMat) where newItems = V.fromList $ fmap ((V.!) items) valid newMat = mat H.?? (H.Pos $ H.idxs valid, H.Pos $ H.idxs valid) valid = catMaybes . zipWith (\x xs -> if sum xs > 0 then Just x else Nothing) [0..] . H.toLists $ mat -- | Ensure symmetry. symmetric :: [((Int, Int), Double)] -> [((Int, Int), Double)] symmetric = concatMap (\((!i, !j), v) -> [((i, j), v), ((j, i), v)]) -- | Ensure zeros on diagonal. zeroDiag :: [((Int, Int), Double)] -> [((Int, Int), Double)] zeroDiag = filter (\((!i, !j), _) -> i /= j) -- | Get the translated matrix indices. getNewIndices -- :: (Eq a, Ord a) -- => [((a, a), Double)] -> [((Int, Int), Double)] :: [((T.Text, T.Text), Double)] -> [((Int, Int), Double)] getNewIndices xs = fmap (\((!i,!j),!v) -> ( ( Map.findWithDefault eMsg i idxMap , Map.findWithDefault eMsg j idxMap ) , v ) ) xs where eMsg = error "Index not found during index conversion." indices = getAllIndices xs idxMap = Map.fromList $ zip indices [0 ..] -- | Get the list of all indices. getAllIndices :: (Eq a, Ord a) => [((a, a), Double)] -> [a] getAllIndices xs = Set.toAscList . Set.union (getSet fst) $ getSet snd where getSet f = Set.fromList . fmap (f . fst) $ xs -- | Get a dense adjacency matrix from a handle. readDenseAdjMatrix :: CSV.DecodeOptions -> Handle -> IO (V.Vector T.Text, H.Matrix Double) readDenseAdjMatrix decodeOpt handle = flip with return $ do let getAssocList = S.toList_ . S.map parseRow assocList <- fmap (either (error . show) id) . runExceptT . getAssocList . S.decodeWith decodeOpt S.NoHeader $ (BS.hGetContents handle :: BS.ByteString (ExceptT S.CsvParseException Managed) ()) let items = V.fromList $ getAllIndices assocList mat = H.assoc (V.length items, V.length items) 0 . Set.toList . Set.fromList -- Ensure no duplicates. . symmetric -- Ensure symmetry. . zeroDiag -- Ensure zeros on diagonal. . getNewIndices -- Only look at present rows by converting indices. $ assocList return (items, mat) -- | Get a sparse adjacency matrix from a handle. readSparseAdjMatrix :: CSV.DecodeOptions -> Handle -> IO (V.Vector T.Text, SH.SpMatrix Double) readSparseAdjMatrix decodeOpt handle = flip with return $ do let getAssocList = S.toList_ . S.map parseRow assocList <- fmap (either (error . show) id) . runExceptT . getAssocList . S.decodeWith decodeOpt S.NoHeader $ (BS.hGetContents handle :: BS.ByteString (ExceptT S.CsvParseException Managed) ()) let items = V.fromList $ getAllIndices assocList mat = SH.fromListSM (V.length items, V.length items) . Set.toList . Set.fromList -- Ensure no duplicates. . fmap (\((i, j), v) -> (i, j, v)) . symmetric -- Ensure symmetry. . zeroDiag -- Ensure zeros on diagonal. . getNewIndices -- Only look at present rows by converting indices. $ assocList return (items, mat) -- -- | Get a sparse adjacency matrix from a handle. -- readEigenSparseAdjMatrix :: CSV.DecodeOptions -- -> Handle -- -> IO (V.Vector T.Text, E.SparseMatrixXd) -- readEigenSparseAdjMatrix decodeOpt handle = flip with return $ do -- let getAssocList = S.toList_ . S.map parseRow -- assocList <- -- fmap (either (error . show) id) -- . runExceptT -- . getAssocList -- . S.decodeWith decodeOpt S.NoHeader -- $ (BS.hGetContents handle :: BS.ByteString (ExceptT S.CsvParseException Managed) ()) -- let items = V.fromList $ getAllIndices assocList -- mat = E.fromList (V.length items) (V.length items) -- . Set.toList -- . Set.fromList -- Ensure no duplicates. -- . fmap (\((i, j), v) -> (i, j, v)) -- . symmetric -- Ensure symmetry. -- . zeroDiag -- Ensure zeros on diagonal. -- . getNewIndices -- Only look at present rows by converting indices. -- $ assocList -- return (items, mat)