----------------------------------------------------------------------------- -- | -- Module : System.IO.MatrixMarket -- Copyright : Copyright (c) 2008, Patrick Perry -- License : BSD3 -- Maintainer : Patrick Perry -- Stability : stable -- -- Read and write matrices and vectors in the Matrix Market format -- (see http:\/\/math.nist.gov\/MatrixMarket\/). -- module System.IO.MatrixMarket ( -- * Matrix and vector type descriptors Field(..), Format(..), Type(..), -- * Dense Vector I/O hPutVector, hPutVectorWithDesc, hGetVector, -- * Sparse Vector I/O hPutCoordVector, hPutCoordVectorWithDesc, hGetCoordVector, -- * Dense Matrix I/O hPutMatrix, hPutMatrixWithDesc, hGetMatrix, -- * Sparse Matrix I/O hPutCoordMatrix, hPutCoordMatrixWithDesc, hGetCoordMatrix, -- * Banner Functions showHeader, readHeader, hPutBanner, hGetBanner, hPutHeader, hGetHeader, commentChar, hPutComments, hGetComments, ) where import Control.Monad ( forM_ ) import Data.Char ( toLower, toUpper, isSpace ) import Data.List ( intercalate ) import Data.Maybe ( fromMaybe ) import System.IO ( Handle, hGetLine, hPutStr, hPutStrLn, hLookAhead ) import Text.Printf ( printf ) import qualified Data.ByteString.Lazy.Char8 as B type ByteString = B.ByteString -- | Specifies the element type. Pattern matrices do not have any -- elements, only indices, and only make sense for coordinate matrices and -- vectors. data Field = Real | Complex | Integer | Pattern deriving (Eq, Read, Show) -- | Specifies either sparse or dense storage. In sparse (\"coordinate\") -- storage, elements are given in (i,j,x) triplets for matrices (or (i,x) -- for vectors). Indices are 1-based, so that A(1,1) is the first element of -- a matrix, and x(1) is the first element of a vector. -- -- In dense (\"array\") storage, elements are given in column-major order. -- -- In both cases, each element is given on a separate line. data Format = Coordinate | Array deriving (Eq, Read, Show) -- | Specifies any special structure in the matrix. For symmetric and -- hermition matrices, only the lower-triangular part of the matrix is given. -- For skew matrices, only the entries below the diagonal are stored. data Type = General | Symmetric | Hermitian | Skew deriving (Eq, Read, Show) ----------------------------- Banner Functions ------------------------------ -- | Give the Matrix Market header string for the given matrix type. showHeader :: Format -> Field -> Type -> String showHeader format field t = intercalate " " $ [ prefixStr, showMM format, showMM field, showMM t ] -- | Read a Matrix Market header from a string. readHeader :: String -> (Format, Field, Type) readHeader str = case words str of (mm:m:ws) -> if mm ++ " " ++ m == prefixStr then case ws of [ format, field, t ] -> (readMM format, readMM field, readMM t) _ -> err else err _ -> err where err = error "could not parse Matrix Market header" -- | Write out a header and comments for a Matrix Market matrix. hPutBanner :: Handle -> Format -> Field -> Type -> String -> IO () hPutBanner h format field t str = do hPutHeader h format field t hPutComments h str -- | Read the Matrix Market banner (including comments) from a file. The -- comments and the banner information are returned. hGetBanner :: Handle -> IO (Format, Field, Type, String) hGetBanner h = do (format, field, t) <- hGetHeader h str <- hGetComments h return (format, field, t, str) -- | Write a Matrix Market header for a matrix to a file. hPutHeader :: Handle -> Format -> Field -> Type -> IO () hPutHeader h format field t = hPutStrLn h $ showHeader format field t -- | Read the Matrix Market header from a file. hGetHeader :: Handle -> IO (Format, Field, Type) hGetHeader h = hGetLine h >>= return . readHeader -- | The line-comment character (@%@). commentChar :: Char commentChar = '%' -- | Write a string as a Matrix Market file comment. This prepends each line -- with \'%\' and then writes it out to the file. hPutComments :: Handle -> String -> IO () hPutComments h cs = forM_ (lines cs) $ \l -> do hPutStr h $ [ commentChar, ' ' ] hPutStrLn h l -- | Read the comments from a file, stripping the leading \'%\' from each -- line, until reaching a line that does not start with the comment character. hGetComments :: Handle -> IO (String) hGetComments h = do c <- hLookAhead h if c == commentChar then do (_:l) <- hGetLine h ls <- hGetComments h return (l ++ "\n" ++ ls) else return "" ------------------------------ Size Functions ------------------------------- hPutSize2 :: Handle -> Int -> Int -> IO () hPutSize2 h m n = hPutStrLn h $ show m ++ "\t" ++ show n hPutSize3 :: Handle -> Int -> Int -> Int -> IO () hPutSize3 h m n l = hPutStrLn h $ show m ++ "\t" ++ show n ++ "\t" ++ show l hGetSize2 :: Handle -> IO (Int,Int) hGetSize2 h = do l <- hGetLine h case (words l) of [ n1, n2 ] -> return (read n1, read n2) _ -> fail "could not read sizes" hGetSize3 :: Handle -> IO (Int,Int,Int) hGetSize3 h = do l <- hGetLine h case (words l) of [ n1, n2, n3 ] -> return (read n1, read n2, read n3) _ -> fail "could not read sizes" ------------------------------ Dense Vectors -------------------------------- -- | Write a dense vector with the given dimension and elements to a file. -- If the field is given as 'Pattern', no elements are written, only the -- header and size. hPutVector :: (Show a) => Handle -> Field -> Int -> [a] -> IO () hPutVector = flip hPutVectorWithDesc "" -- | Write a dense vector along with a description, which is put in the -- comment section of the file. hPutVectorWithDesc :: (Show a) => Handle -> String -> Field -> Int -> [a] -> IO () hPutVectorWithDesc h desc field n xs = hPutMatrixWithDesc h desc field General (n,1) xs -- | Lazily read a dense vector from a file. The vector field, dimension -- and elements are returned. The file is put in a semi-closed state and -- the elements are read lazily. If the field is 'Pattern', the elements -- list will be @Nothing@. hGetVector :: (Read a) => Handle -> IO (Field, Int, Maybe [a]) hGetVector h = do (field, t, (m,n),xs) <- hGetMatrix h case t of General -> return () _ -> fail $ printf "expecting a matrix of type `general` but got `%s' instead" (showMM t) case (m,n) of (_,1) -> return (field, m,xs) (1,_) -> return (field, n,xs) _ -> fail $ printf ("expecting a vector but instead got a matrix with" ++ " dimensions (%d,%d)") m n ------------------------------ Sparse Vectors ------------------------------- -- | Write a coordinate vector with the given dimension and size to a file. -- The indices are 1-based, so that x(1) is the first element of the vector. -- If the field is 'Pattern', only the indices are used. hPutCoordVector :: (Show a) => Handle -> Field -> Int -> Int -> [(Int, a)] -> IO () hPutCoordVector = flip hPutCoordVectorWithDesc "" -- | Write a coordinate vector along with a description, which is put in the -- comment section of the file. hPutCoordVectorWithDesc :: (Show a) => Handle -> String -> Field -> Int -> Int -> [(Int, a)] -> IO () hPutCoordVectorWithDesc h desc field n nz ixs = let ijxs = map (\(i,x) -> ((i,1),x)) ixs in hPutCoordMatrixWithDesc h desc field General (n,1) nz ijxs -- | Lazily read a coordinate vector from a file. The vector field, dimension, -- size, and elements are returned. The file is put in a semi-clased state and -- the elements are read lazily. If the field is 'Pattern', only a list of -- indices is returned. hGetCoordVector :: (Read a) => Handle -> IO (Field, Int, Int, Either [Int] [(Int, a)]) hGetCoordVector h = do (field, t, (m,n), nz, ijxs') <- hGetCoordMatrix h case t of General -> return () _ -> fail $ printf "expecting a matrix of type `general` but got `%s' instead" (showMM t) case (m,n) of (_,1) -> let ixs = either (Left . fst . unzip ) (\ijxs -> let (ijs,xs) = unzip ijxs is = (fst . unzip) ijs in Right $ zip is xs) ijxs' in return (field, m, nz, ixs) (1,_) -> let jxs = either (Left . snd . unzip ) (\ijxs -> let (ijs,xs) = unzip ijxs js = (snd . unzip) ijs in Right $ zip js xs) ijxs' in return (field, n, nz, jxs) _ -> fail $ printf ("expecting a vector but instead got a matrix with" ++ " dimensions (%d,%d)") m n ------------------------------ Dense Matrices ------------------------------- -- | Write a dense matrix with the given shape and elements in column-major -- order to a file. If the field is given as 'Pattern', no elements are -- written, only the header and size. hPutMatrix :: (Show a) => Handle -> Field -> Type -> (Int,Int) -> [a] -> IO () hPutMatrix = flip hPutMatrixWithDesc "" -- | Write a dense matrix along with a description, which is put in the -- comment section of the file. hPutMatrixWithDesc :: (Show a) => Handle -> String -> Field -> Type -> (Int,Int) -> [a] -> IO () hPutMatrixWithDesc h desc field t (m,n) xs = do hPutHeader h Array field t hPutComments h desc hPutSize2 h m n case field of Pattern -> return () _ -> mapM_ (\x -> hPutStrLn h $ show x) xs -- | Lazily read a dense matrix from a file, returning the matrix field, type, -- shape and elements in column-major order. The elements are read laxily, and -- the file is put in a semi-closed state. If the field is 'Pattern', -- @Nothing@ is returned instead of an element list. hGetMatrix :: (Read a) => Handle -> IO (Field, Type, (Int,Int), Maybe [a]) hGetMatrix h = do (format,field,t,_) <- hGetBanner h if format == Array then do (m,n) <- hGetSize2 h ls <- B.hGetContents h >>= return . B.lines let xs = case field of Pattern -> Nothing _ -> Just $ map (readValue . dropLineComment) ls return (field, t, (m,n), xs) else fail "error reading matrix; header does not match type `matrix array'" ------------------------------ Sparse Matrices ------------------------------- -- | Write a coordinate matrix with the given shape and size to a file. The -- indices are 1-based, so that A(1,1) is the first element of the matrix. -- If the field is 'Pattern', only the indices are used. hPutCoordMatrix :: (Show a) => Handle -> Field -> Type -> (Int,Int) -> Int -> [((Int,Int), a)] -> IO () hPutCoordMatrix = flip hPutCoordMatrixWithDesc "" -- | Write a coordinate matrix along with a description, which is put in the -- comment section of the file. hPutCoordMatrixWithDesc :: (Show a) => Handle -> String -> Field -> Type -> (Int,Int) -> Int -> [((Int,Int), a)] -> IO () hPutCoordMatrixWithDesc h desc field t (m,n) nz ijes = do hPutHeader h Coordinate field t hPutComments h desc hPutSize3 h m n nz let showCoord = case field of Pattern -> \((i,j),_) -> show i ++ "\t" ++ show j _ -> \((i,j),x) -> show i ++ "\t" ++ show j ++ "\t" ++ show x mapM_ (hPutStrLn h . showCoord) ijes -- | Lazily read a coordinate matrix from a file. The matrix field, type, shape -- size,and elements are returned. The files contents lazily, and the file is -- put in a semi-closed state. If the field is 'Pattern', only the indices are -- returned. hGetCoordMatrix :: (Read a) => Handle -> IO (Field, Type, (Int,Int), Int, Either [(Int,Int)] [((Int,Int), a)]) hGetCoordMatrix h = do (format, field, t, _) <- hGetBanner h if format == Coordinate then do (m,n,nz) <- hGetSize3 h ls <- B.hGetContents h >>= return . B.lines let ixs = map (parseLine field) ls ixs' = case field of Pattern -> Left $ map fst ixs _ -> Right ixs return (field, t, (m,n), nz, ixs') else fail "error reading matrix; header does not match type `matrix coordinate'" where parseLine field l = let (i,l') = readInt $ dropLineComment l (j,l'') = readInt l' e = case field of Pattern -> undefined _ -> readValue l'' in ((i,j),e) ----------------------------- Helper Functions ------------------------------ prefixStr :: String prefixStr = "%%MatrixMarket matrix" showMM :: (Show a) => a -> String showMM a = let (c:cs) = show a in (toLower c):cs readMM :: (Read a) => String -> a readMM ~(c:cs) = read $ (toUpper c):cs dropLineComment :: ByteString -> ByteString dropLineComment = B.takeWhile (/= commentChar) readInt :: ByteString -> (Int, ByteString) readInt s = fromMaybe (error "could not parse integer") $ B.readInt (B.dropWhile isSpace s) readValue :: (Read a) => ByteString -> a readValue s = read $ B.unpack (B.dropWhile isSpace s)