{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE OverloadedStrings #-}
module Math.Clustering.Hierarchical.Spectral.Load
( readDenseAdjMatrix
, readSparseAdjMatrix
, readEigenSparseAdjMatrix
) where
import Control.Monad.Except (runExceptT, ExceptT (..))
import Control.Monad.Managed (with, liftIO, Managed (..))
import Data.Maybe (fromMaybe, catMaybes)
import System.IO (Handle (..))
import qualified Data.ByteString.Streaming.Char8 as BS
import qualified Data.Csv as CSV
import qualified Data.Eigen.SparseMatrix as E
import qualified Data.Map.Strict as Map
import qualified Data.Set as Set
import qualified Data.Sparse.Common as SH
import qualified Data.Text as T
import qualified Data.Vector as V
import qualified Numeric.LinearAlgebra as H
import qualified Streaming as S
import qualified Streaming.Cassava as S
import qualified Streaming.Prelude as S
import qualified Streaming.With.Lifted as SW
import Math.Clustering.Hierarchical.Spectral.Types
errorMsg = error "Not correct format (requires row,column,value)"
parseRow :: (T.Text, T.Text, Double) -> ((T.Text, T.Text), Double)
parseRow (i, j, v) = ((i, j), v)
ignoreDisconnected :: V.Vector T.Text
-> H.Matrix Double
-> (V.Vector T.Text, H.Matrix Double)
ignoreDisconnected items mat = (newItems, newMat)
where
newItems = V.fromList $ fmap ((V.!) items) valid
newMat = mat H.?? (H.Pos $ H.idxs valid, H.Pos $ H.idxs valid)
valid = catMaybes
. zipWith (\x xs -> if sum xs > 0 then Just x else Nothing) [0..]
. H.toLists
$ mat
symmetric :: [((Int, Int), Double)] -> [((Int, Int), Double)]
symmetric = concatMap (\((!i, !j), v) -> [((i, j), v), ((j, i), v)])
zeroDiag :: [((Int, Int), Double)] -> [((Int, Int), Double)]
zeroDiag = filter (\((!i, !j), _) -> i /= j)
getNewIndices
:: [((T.Text, T.Text), Double)] -> [((Int, Int), Double)]
getNewIndices xs =
fmap
(\((!i,!j),!v) ->
( ( Map.findWithDefault eMsg i idxMap
, Map.findWithDefault eMsg j idxMap
)
, v
)
)
xs
where
eMsg = error "Index not found during index conversion."
indices = getAllIndices xs
idxMap = Map.fromList $ zip indices [0 ..]
getAllIndices :: (Eq a, Ord a) => [((a, a), Double)] -> [a]
getAllIndices xs = Set.toAscList . Set.union (getSet fst) $ getSet snd
where
getSet f = Set.fromList . fmap (f . fst) $ xs
readDenseAdjMatrix :: CSV.DecodeOptions
-> Handle
-> IO (V.Vector T.Text, H.Matrix Double)
readDenseAdjMatrix decodeOpt handle = flip with return $ do
let getAssocList = S.toList_ . S.map parseRow
assocList <-
fmap (either (error . show) id)
. runExceptT
. getAssocList
. S.decodeWith decodeOpt S.NoHeader
$ (BS.hGetContents handle :: BS.ByteString (ExceptT S.CsvParseException Managed) ())
let items = V.fromList $ getAllIndices assocList
mat = H.assoc (V.length items, V.length items) 0
. Set.toList
. Set.fromList
. symmetric
. zeroDiag
. getNewIndices
$ assocList
return (items, mat)
readSparseAdjMatrix :: CSV.DecodeOptions
-> Handle
-> IO (V.Vector T.Text, SH.SpMatrix Double)
readSparseAdjMatrix decodeOpt handle = flip with return $ do
let getAssocList = S.toList_ . S.map parseRow
assocList <-
fmap (either (error . show) id)
. runExceptT
. getAssocList
. S.decodeWith decodeOpt S.NoHeader
$ (BS.hGetContents handle :: BS.ByteString (ExceptT S.CsvParseException Managed) ())
let items = V.fromList $ getAllIndices assocList
mat = SH.fromListSM (V.length items, V.length items)
. Set.toList
. Set.fromList
. fmap (\((i, j), v) -> (i, j, v))
. symmetric
. zeroDiag
. getNewIndices
$ assocList
return (items, mat)
readEigenSparseAdjMatrix :: CSV.DecodeOptions
-> Handle
-> IO (V.Vector T.Text, E.SparseMatrixXd)
readEigenSparseAdjMatrix decodeOpt handle = flip with return $ do
let getAssocList = S.toList_ . S.map parseRow
assocList <-
fmap (either (error . show) id)
. runExceptT
. getAssocList
. S.decodeWith decodeOpt S.NoHeader
$ (BS.hGetContents handle :: BS.ByteString (ExceptT S.CsvParseException Managed) ())
let items = V.fromList $ getAllIndices assocList
mat = E.fromList (V.length items) (V.length items)
. Set.toList
. Set.fromList
. fmap (\((i, j), v) -> (i, j, v))
. symmetric
. zeroDiag
. getNewIndices
$ assocList
return (items, mat)