----------------------------------------------------------------------------- -- | -- Module : System.IO.MatrixMarket -- Copyright : Copyright (c) 2008, Patrick Perry -- License : BSD3 -- Maintainer : Patrick Perry -- Stability : stable -- -- Read and write matrices and vectors in the Matrix Market format -- (see http:\/\/math.nist.gov\/MatrixMarket\/). -- module System.IO.MatrixMarket ( -- * Matrix and vector type descriptors Type(..), Field(..), Format(..), MatrixType(..), -- * Dense Vector I/O hPutVector, hPutVectorWithDesc, hGetVector, -- * Sparse Vector I/O hPutCoordVector, hPutCoordVectorWithDesc, hGetCoordVector, -- * Dense Matrix I/O hPutMatrix, hPutMatrixWithDesc, hGetMatrix, -- * Sparse Matrix I/O hPutCoordMatrix, hPutCoordMatrixWithDesc, hGetCoordMatrix, -- * Banner Functions showVectorHeader, showMatrixHeader, readHeader, hPutVectorBanner, hPutMatrixBanner, hGetBanner, hPutVectorHeader, hPutMatrixHeader, hGetHeader, commentChar, hPutComments, hGetComments, ) where import Control.Monad ( forM_ ) import Data.Char ( toLower, toUpper, isSpace ) import Data.List ( intercalate ) import Data.Maybe ( fromMaybe ) import System.IO ( Handle, hGetLine, hPutStr, hPutStrLn, hLookAhead ) import qualified Data.ByteString.Lazy.Char8 as B type ByteString = B.ByteString -- | Specifies what kind of object is stored in the file. Note that \"vector\" -- is a non-standard format. data Type = Matrix | Vector deriving (Eq, Read, Show) -- | Specifies the element type. Pattern matrices do not have any -- elements, only indices, and only make sense for coordinate matrices and -- vectors. data Field = Real | Complex | Integer | Pattern deriving (Eq, Read, Show) -- | Specifies either sparse or dense storage. In sparse (\"coordinate\") -- storage, elements are given in (i,j,x) triplets for matrices (or (i,x) -- for vectors). Indices are 1-based, so that A(1,1) is the first element of -- a matrix, and x(1) is the first element of a vector. -- -- In dense (\"array\") storage, elements are given in column-major order. -- -- In both cases, each element is given on a separate line. data Format = Coordinate | Array deriving (Eq, Read, Show) -- | Specifies any special structure in the matrix. For symmetric and -- hermition matrices, only the lower-triangular part of the matrix is given. -- For skew matrices, only the entries below the diagonal are stored. data MatrixType = General | Symmetric | Hermitian | Skew deriving (Eq, Read, Show) ----------------------------- Banner Functions ------------------------------ -- | Give the Matrix Market header string for the given vector type. showVectorHeader :: Format -> Field -> String showVectorHeader format field = intercalate " " $ [ prefixStr, showMM Vector, showMM format, showMM field ] -- | Give the Matrix Market header string for the given matrix type. showMatrixHeader :: Format -> Field -> MatrixType -> String showMatrixHeader format field attr = intercalate " " $ [ prefixStr, showMM Matrix, showMM format, showMM field, showMM attr ] -- | Read a Matrix Market header from a string. readHeader :: String -> (Type, Format, Field, Maybe MatrixType) readHeader str = case words str of (p:t:ws) -> if p == prefixStr then case (readMM t, ws) of (Vector, [ format, field ]) -> (Vector, readMM format, readMM field, Nothing) (Matrix, [ format, field, attr ]) -> (Matrix, readMM format, readMM field, Just (readMM attr)) _ -> err else err _ -> err where err = error "could not parse Matrix Market header" -- | Write out a header and comments for a Matrix Market vector. hPutVectorBanner :: Handle -> Format -> Field -> String -> IO () hPutVectorBanner h format field str = do hPutVectorHeader h format field hPutComments h str -- | Write out a header and comments for a Matrix Market matrix. hPutMatrixBanner :: Handle -> Format -> Field -> MatrixType -> String -> IO () hPutMatrixBanner h format field attr str = do hPutMatrixHeader h format field attr hPutComments h str -- | Read the Matrix Market banner (including comments) from a file. The -- comments are discarded and the banner information is returned. hGetBanner :: Handle -> IO (Type, Format, Field, Maybe MatrixType) hGetBanner h = do header <- hGetHeader h hGetComments h return header -- | Write a Matrix Market header for a vector to a file. hPutVectorHeader :: Handle -> Format -> Field -> IO () hPutVectorHeader h format field = hPutStrLn h $ showVectorHeader format field -- | Write a Matrix Market header for a matrix to a file. hPutMatrixHeader :: Handle -> Format -> Field -> MatrixType -> IO () hPutMatrixHeader h format field attr = hPutStrLn h $ showMatrixHeader format field attr -- | Read the Matrix Market header from a file. hGetHeader :: Handle -> IO (Type, Format, Field, Maybe MatrixType) hGetHeader h = hGetLine h >>= return . readHeader -- | The line-comment character (@%@). commentChar :: Char commentChar = '%' -- | Write a string as a Matrix Market file comment. This prepends each line -- with \'%\' and then writes it out to the file. hPutComments :: Handle -> String -> IO () hPutComments h cs = forM_ (lines cs) $ \l -> do hPutStr h $ [ commentChar, ' ' ] hPutStrLn h l -- | Read the comments from a file, stripping the leading \'%\' from each -- line, until reaching a line that does not start with the comment character. hGetComments :: Handle -> IO (String) hGetComments h = do c <- hLookAhead h if c == commentChar then do (_:l) <- hGetLine h ls <- hGetComments h return (l ++ "\n" ++ ls) else return "" ------------------------------ Size Functions ------------------------------- hPutSize1 :: Handle -> Int -> IO () hPutSize1 h n = hPutStrLn h $ show n hPutSize2 :: Handle -> Int -> Int -> IO () hPutSize2 h m n = hPutStrLn h $ show m ++ "\t" ++ show n hPutSize3 :: Handle -> Int -> Int -> Int -> IO () hPutSize3 h m n l = hPutStrLn h $ show m ++ "\t" ++ show n ++ "\t" ++ show l hGetSize1 :: Handle -> IO Int hGetSize1 h = hGetLine h >>= return . read hGetSize2 :: Handle -> IO (Int,Int) hGetSize2 h = do l <- hGetLine h case (words l) of [ n1, n2 ] -> return (read n1, read n2) _ -> ioError $ userError "could not read sizes" hGetSize3 :: Handle -> IO (Int,Int,Int) hGetSize3 h = do l <- hGetLine h case (words l) of [ n1, n2, n3 ] -> return (read n1, read n2, read n3) _ -> ioError $ userError "could not read sizes" ------------------------------ Dense Vectors -------------------------------- -- | Write a dense vector with the given dimension and elements to a file. -- If the field is given as 'Pattern', no elements are written, only the -- header and size. hPutVector :: (Show a) => Handle -> Field -> Int -> [a] -> IO () hPutVector = flip hPutVectorWithDesc "" -- | Write a dense vector along with a description, which is put in the -- comment section of the file. hPutVectorWithDesc :: (Show a) => Handle -> String -> Field -> Int -> [a] -> IO () hPutVectorWithDesc h desc field n xs = do hPutVectorHeader h Array field hPutComments h desc hPutSize1 h n case field of Pattern -> return () _ -> mapM_ (\x -> hPutStrLn h $ show x) xs -- | Lazily read a dense vector from a file. The vector dimension and -- elements are returned. The file is closed when the operation finishes. -- If the field is 'Pattern', the elements list will be @Nothing@. hGetVector :: (Read a) => Handle -> Field -> IO (Int, Maybe [a]) hGetVector h field = do (t,format,field',attr') <- hGetBanner h if t == Vector && format == Array && field' == field && attr' == Nothing then do n <- hGetSize1 h ls <- B.hGetContents h >>= return . B.lines let xs = case field of Pattern -> Nothing _ -> Just $ map (readValue . dropLineComment) ls return (n, xs) else headerError Vector Array field Nothing ------------------------------ Sparse Vectors ------------------------------- -- | Write a coordinate vector with the given dimension and size to a file. -- The indices are 1-based, so that x(1) is the first element of the vector. -- If the field is 'Pattern', only the indices are used. hPutCoordVector :: (Show a) => Handle -> Field -> Int -> Int -> [(Int, a)] -> IO () hPutCoordVector = flip hPutCoordVectorWithDesc "" -- | Write a coordinate vector along with a description, which is put in the -- comment section of the file. hPutCoordVectorWithDesc :: (Show a) => Handle -> String -> Field -> Int -> Int -> [(Int, a)] -> IO () hPutCoordVectorWithDesc h desc field n nz ixs = do hPutVectorHeader h Coordinate field hPutComments h desc hPutSize2 h n nz let showCoord = case field of Pattern -> \(i,_) -> show i _ -> \(i,x) -> show i ++ "\t" ++ show x mapM_ (hPutStrLn h . showCoord) ixs -- | Lazily read a coordinate vector from a file. The vector dimension, size, -- and elements are returned. The file is closed when the operation finishes. -- If the field is given as 'Pattern', only a list of indices is returned. hGetCoordVector :: (Read a) => Handle -> Field -> IO (Int, Int, Either [Int] [(Int, a)]) hGetCoordVector h field = do (t,format,field',attr') <- hGetBanner h if t == Vector && format == Coordinate && field' == field && attr' == Nothing then do (n,nz) <- hGetSize2 h ls <- B.hGetContents h >>= return . B.lines let ixs = map readCoord ls ixs' = case field of Pattern -> Left $ map fst ixs _ -> Right ixs return (n, nz, ixs') else headerError Vector Coordinate field Nothing where readCoord l = let (i,l') = readInt $ dropLineComment l x = case field of Pattern -> undefined _ -> readValue l' in (i,x) ------------------------------ Dense Matrices ------------------------------- -- | Write a dense matrix with the given shape and elements in column-major -- order to a file. If the field is given as 'Pattern', no elements are -- written, only the header and size. hPutMatrix :: (Show a) => Handle -> Field -> MatrixType -> (Int,Int) -> [a] -> IO () hPutMatrix = flip hPutMatrixWithDesc "" -- | Write a dense matrix along with a description, which is put in the -- comment section of the file. hPutMatrixWithDesc :: (Show a) => Handle -> String -> Field -> MatrixType -> (Int,Int) -> [a] -> IO () hPutMatrixWithDesc h desc field attr (m,n) xs = do hPutMatrixHeader h Array field attr hPutComments h desc hPutSize2 h m n case field of Pattern -> return () _ -> mapM_ (\x -> hPutStrLn h $ show x) xs -- | Lazily read a dense matrix from a file, returning the matrix shape and -- its elements in column-major order. The file is closed when the operation -- finishes. If the field is given as 'Pattern', @Nothing@ is returned -- instead of an element list. hGetMatrix :: (Read a) => Handle -> Field -> MatrixType -> IO ((Int,Int), Maybe [a]) hGetMatrix h field attr = do (t,format,field',attr') <- hGetBanner h if t == Matrix && format == Array && field' == field && attr' == Just attr then do (m,n) <- hGetSize2 h ls <- B.hGetContents h >>= return . B.lines let xs = case field of Pattern -> Nothing _ -> Just $ map (readValue . dropLineComment) ls return ((m,n), xs) else headerError Matrix Array field (Just attr) ------------------------------ Sparse Matrices ------------------------------- -- | Write a coordinate matrix with the given shape and size to a file. The -- indices are 1-based, so that A(1,1) is the first element of the matrix. -- If the field is 'Pattern', only the indices are used. hPutCoordMatrix :: (Show a) => Handle -> Field -> MatrixType -> (Int,Int) -> Int -> [((Int,Int), a)] -> IO () hPutCoordMatrix = flip hPutCoordMatrixWithDesc "" -- | Write a coordinate matrix along with a description, which is put in the -- comment section of the file. hPutCoordMatrixWithDesc :: (Show a) => Handle -> String -> Field -> MatrixType -> (Int,Int) -> Int -> [((Int,Int), a)] -> IO () hPutCoordMatrixWithDesc h desc field attr (m,n) nz ijes = do hPutMatrixHeader h Coordinate field attr hPutComments h desc hPutSize3 h m n nz let showCoord = case field of Pattern -> \((i,j),_) -> show i ++ "\t" ++ show j _ -> \((i,j),x) -> show i ++ "\t" ++ show j ++ "\t" ++ show x mapM_ (hPutStrLn h . showCoord) ijes -- | Lazily read a coordinate vector from a file. The vector dimension, size, -- and elements are returned. The file is closed when the operation finishes. -- If the field is 'Pattern', only the indices are returned. hGetCoordMatrix :: (Read a) => Handle -> Field -> MatrixType -> IO ((Int,Int), Int, Either [(Int,Int)] [((Int,Int), a)]) hGetCoordMatrix h field attr = do (t, format, field', attr') <- hGetBanner h if t == Matrix && format == Coordinate && field' == field && attr' == Just attr then do (m,n,nz) <- hGetSize3 h ls <- B.hGetContents h >>= return . B.lines let ixs = map parseLine ls ixs' = case field of Pattern -> Left $ map fst ixs _ -> Right ixs return ((m,n), nz, ixs') else headerError Matrix Coordinate field (Just attr) where parseLine l = let (i,l') = readInt $ dropLineComment l (j,l'') = readInt l' e = case field of Pattern -> undefined _ -> readValue l'' in ((i,j),e) ----------------------------- Helper Functions ------------------------------ prefixStr :: String prefixStr = "%%MatrixMarket" showMM :: (Show a) => a -> String showMM a = let (c:cs) = show a in (toLower c):cs readMM :: (Read a) => String -> a readMM ~(c:cs) = read $ (toUpper c):cs headerError :: Type -> Format -> Field -> Maybe MatrixType -> IO a headerError t format field attr = ioError $ userError $ "header does not match expected type `" ++ showMM t ++ " " ++ showMM format ++ " " ++ showMM field ++ " " ++ fromMaybe "" (attr >>= return . showMM) ++ "'." dropLineComment :: ByteString -> ByteString dropLineComment = B.takeWhile (/= commentChar) readInt :: ByteString -> (Int, ByteString) readInt s = fromMaybe (error "could not parse integer") $ B.readInt (B.dropWhile isSpace s) readValue :: (Read a) => ByteString -> a readValue s = read $ B.unpack (B.dropWhile isSpace s)