-----------------------------------------------------------------------------
-- |
-- Module     : System.IO.MatrixMarket
-- Copyright  : Copyright (c) 2008, Patrick Perry <patperry@stanford.edu>
-- License    : BSD3
-- Maintainer : Patrick Perry <patperry@stanford.edu>
-- Stability  : stable
--
-- Read and write matrices and vectors in the Matrix Market format
-- (see http:\/\/math.nist.gov\/MatrixMarket\/).
--
module System.IO.MatrixMarket (
    -- * Matrix and vector type descriptors
    Type(..),
    Field(..),
    Format(..),
    MatrixType(..),
    
    -- * Dense Vector I/O    
    hPutVector,
    hPutVectorWithDesc,
    hGetVector,
    
    -- * Sparse Vector I/O
    hPutCoordVector,
    hPutCoordVectorWithDesc,
    hGetCoordVector,
    
    -- * Dense Matrix I/O
    hPutMatrix,
    hPutMatrixWithDesc,
    hGetMatrix,
    
    -- * Sparse Matrix I/O    
    hPutCoordMatrix,
    hPutCoordMatrixWithDesc,
    hGetCoordMatrix,

    -- * Banner Functions
    showVectorHeader,
    showMatrixHeader,
    readHeader,
    
    hPutVectorBanner,
    hPutMatrixBanner,
    hGetBanner,

    hPutVectorHeader,
    hPutMatrixHeader,
    hGetHeader,

    commentChar,
    hPutComments,
    hGetComments,
    
    ) where

import Control.Monad   ( forM_ )
import Data.Char       ( toLower, toUpper, isSpace )
import Data.List       ( intercalate )
import Data.Maybe      ( fromMaybe )
import System.IO       ( Handle, hGetLine, hPutStr, hPutStrLn, hLookAhead )

import qualified Data.ByteString.Lazy.Char8 as B

type ByteString = B.ByteString


-- | Specifies what kind of object is stored in the file.  Note that \"vector\"
-- is a non-standard format.
data Type = Matrix | Vector 
    deriving (Eq, Read, Show)

-- | Specifies the element type.  Pattern matrices do not have any 
-- elements, only indices, and only make sense for coordinate matrices and
-- vectors.
data Field  = Real | Complex | Integer | Pattern
    deriving (Eq, Read, Show)

-- | Specifies either sparse or dense storage.  In sparse (\"coordinate\") 
-- storage, elements are given in (i,j,x) triplets for matrices (or (i,x) 
-- for vectors).  Indices are 1-based, so that A(1,1) is the first element of
-- a matrix, and x(1) is the first element of a vector.
--
-- In dense (\"array\") storage, elements are given in column-major order.
--
-- In both cases, each element is given on a separate line.
data Format = Coordinate | Array 
    deriving (Eq, Read, Show)
    
-- | Specifies any special structure in the matrix.  For symmetric and 
-- hermition matrices, only the lower-triangular part of the matrix is given.
-- For skew matrices, only the entries below the diagonal are stored.
data MatrixType = General | Symmetric | Hermitian | Skew 
    deriving (Eq, Read, Show)


----------------------------- Banner Functions ------------------------------

-- | Give the Matrix Market header string for the given vector type.
showVectorHeader :: Format -> Field -> String
showVectorHeader format field = intercalate " "
    $ [ prefixStr, showMM Vector, showMM format, showMM field ]

-- | Give the Matrix Market header string for the given matrix type.
showMatrixHeader :: Format -> Field -> MatrixType -> String
showMatrixHeader format field attr = intercalate " "
    $ [ prefixStr, showMM Matrix, showMM format, showMM field, showMM attr ]

-- | Read a Matrix Market header from a string.
readHeader :: String -> (Type, Format, Field, Maybe MatrixType)
readHeader str =
    case words str of
        (p:t:ws) ->
            if p == prefixStr
                then case (readMM t, ws) of
                    (Vector, [ format, field ]) ->
                        (Vector, readMM format, readMM field, Nothing)
                    (Matrix, [ format, field, attr ]) ->
                        (Matrix, readMM format, readMM field, Just (readMM attr))
                    _ -> err
                else 
                    err
        _ -> err
        where
            err = error "could not parse Matrix Market header"

-- | Write out a header and comments for a Matrix Market vector.
hPutVectorBanner :: Handle -> Format -> Field -> String -> IO ()
hPutVectorBanner h format field str = do
    hPutVectorHeader h format field
    hPutComments     h str

-- | Write out a header and comments for a Matrix Market matrix.
hPutMatrixBanner :: Handle -> Format -> Field -> MatrixType -> String -> IO ()
hPutMatrixBanner h format field attr str = do
    hPutMatrixHeader h format field attr
    hPutComments     h str

-- | Read the Matrix Market banner (including comments) from a file.  The
-- comments are discarded and the banner information is returned.
hGetBanner :: Handle -> IO (Type, Format, Field, Maybe MatrixType)
hGetBanner h = do
    header <- hGetHeader h
    hGetComments h
    return header

-- | Write a Matrix Market header for a vector to a file.
hPutVectorHeader :: Handle -> Format -> Field -> IO ()
hPutVectorHeader h format field = 
    hPutStrLn h $ showVectorHeader format field

-- | Write a Matrix Market header for a matrix to a file.
hPutMatrixHeader :: Handle -> Format -> Field -> MatrixType -> IO ()
hPutMatrixHeader h format field attr = 
    hPutStrLn h $ showMatrixHeader format field attr

-- | Read the Matrix Market header from a file.
hGetHeader :: Handle -> IO (Type, Format, Field, Maybe MatrixType)
hGetHeader h = hGetLine h >>= return . readHeader


-- | The line-comment character (@%@).
commentChar :: Char
commentChar = '%'

-- | Write a string as a Matrix Market file comment.  This prepends each line
-- with \'%\' and then writes it out to the file.
hPutComments :: Handle -> String -> IO ()
hPutComments h cs =
    forM_ (lines cs) $ \l -> do
        hPutStr h $ [ commentChar, ' ' ]
        hPutStrLn h l

-- | Read the comments from a file, stripping the leading \'%\' from each
-- line, until reaching a line that does not start with the comment character.
hGetComments :: Handle -> IO (String)
hGetComments h = do
    c <- hLookAhead h
    if c == commentChar
        then do
            (_:l) <- hGetLine h
            ls    <- hGetComments h
            return (l ++ "\n" ++ ls)
        else
            return ""    


------------------------------ Size Functions -------------------------------

hPutSize1 :: Handle -> Int -> IO ()
hPutSize1 h n = 
    hPutStrLn h $ show n

hPutSize2 :: Handle -> Int -> Int -> IO ()
hPutSize2 h m n =
    hPutStrLn h $ show m ++ "\t" ++ show n

hPutSize3 :: Handle -> Int -> Int -> Int -> IO ()
hPutSize3 h m n l =
    hPutStrLn h $ show m ++ "\t" ++ show n ++ "\t" ++ show l

hGetSize1 :: Handle -> IO Int
hGetSize1 h = hGetLine h >>= return . read

hGetSize2 :: Handle -> IO (Int,Int)
hGetSize2 h = do
    l <- hGetLine h
    case (words l) of
        [ n1, n2 ] -> return (read n1, read n2)
        _ -> ioError $ userError "could not read sizes"

hGetSize3 :: Handle -> IO (Int,Int,Int)
hGetSize3 h = do
    l <- hGetLine h
    case (words l) of
        [ n1, n2, n3 ] -> return (read n1, read n2, read n3)
        _ -> ioError $ userError "could not read sizes"


------------------------------ Dense Vectors --------------------------------

-- | Write a dense vector with the given dimension and elements to a file.
-- If the field is given as 'Pattern', no elements are written, only the 
-- header and size.
hPutVector :: (Show a) => 
    Handle -> Field -> Int -> [a] -> IO ()
hPutVector = flip hPutVectorWithDesc ""

-- | Write a dense vector along with a description, which is put in the
-- comment section of the file.
hPutVectorWithDesc :: (Show a) => 
    Handle -> String -> Field -> Int -> [a] -> IO ()
hPutVectorWithDesc h desc field n xs = do
    hPutVectorHeader h Array field 
    hPutComments     h desc
    hPutSize1        h n
    
    case field of
        Pattern -> return ()
        _       -> mapM_ (\x -> hPutStrLn h $ show x) xs

-- | Lazily read a dense vector from a file.  The vector dimension and 
-- elements are returned.  The file is closed when the operation finishes.
-- If the field is 'Pattern', the elements list will be @Nothing@.
hGetVector :: (Read a) => Handle -> Field -> IO (Int, Maybe [a])
hGetVector h field = do 
    (t,format,field',attr') <- hGetBanner h
    if         t == Vector
       && format == Array
       && field' == field
       && attr'  == Nothing
        then do
            n  <- hGetSize1 h
            ls <- B.hGetContents h >>= return . B.lines
            
            let xs = case field of 
                         Pattern -> Nothing
                         _       -> Just $ map (readValue . dropLineComment) ls
            return (n, xs)
            
        else
            headerError Vector Array field Nothing


------------------------------ Sparse Vectors -------------------------------

-- | Write a coordinate vector with the given dimension and size to a file.
-- The indices are 1-based, so that x(1) is the first element of the vector.
-- If the field is 'Pattern', only the indices are used.
hPutCoordVector :: (Show a) =>
    Handle -> Field -> Int -> Int -> [(Int, a)] -> IO ()
hPutCoordVector = flip hPutCoordVectorWithDesc ""

-- | Write a coordinate vector along with a description, which is put in the
-- comment section of the file.
hPutCoordVectorWithDesc :: (Show a) => 
    Handle -> String -> Field -> Int -> Int -> [(Int, a)] -> IO ()
hPutCoordVectorWithDesc h desc field n nz ixs = do
    hPutVectorHeader h Coordinate field
    hPutComments     h desc
    hPutSize2        h n nz
    
    let showCoord = case field of 
                        Pattern -> \(i,_) -> show i
                        _       -> \(i,x) -> show i ++ "\t" ++ show x 
    mapM_ (hPutStrLn h . showCoord) ixs
                
-- | Lazily read a coordinate vector from a file.  The vector dimension, size,
-- and elements are returned.  The file is closed when the operation finishes.
-- If the field is given as 'Pattern', only a list of indices is returned.
hGetCoordVector :: (Read a) => 
    Handle -> Field -> IO (Int, Int, Either [Int] [(Int, a)])
hGetCoordVector h field = do
    (t,format,field',attr') <- hGetBanner h
    if         t == Vector
       && format == Coordinate
       && field' == field
       && attr'  == Nothing
        then do
            (n,nz) <- hGetSize2 h
            ls <- B.hGetContents h >>= return . B.lines
            let ixs  =  map readCoord ls
                ixs' = case field of 
                            Pattern -> Left $ map fst ixs
                            _       -> Right ixs
                            
            return (n, nz, ixs')
        else
            headerError Vector Coordinate field Nothing

    where
        readCoord l =
            let (i,l')  = readInt $ dropLineComment l
                x       = case field of
                              Pattern -> undefined
                              _       -> readValue l'
            in (i,x)


------------------------------ Dense Matrices -------------------------------

-- | Write a dense matrix with the given shape and elements in column-major
-- order to a file. If the field is given as 'Pattern', no elements are 
-- written, only the header and size.
hPutMatrix :: (Show a) => 
    Handle -> Field -> MatrixType -> (Int,Int) -> [a] -> IO ()
hPutMatrix = flip hPutMatrixWithDesc ""

-- | Write a dense matrix along with a description, which is put in the
-- comment section of the file.
hPutMatrixWithDesc :: (Show a) => 
    Handle -> String -> Field -> MatrixType -> (Int,Int) -> [a] -> IO ()
hPutMatrixWithDesc h desc field attr (m,n) xs = do
    hPutMatrixHeader h Array field attr
    hPutComments     h desc
    hPutSize2        h m n
    
    case field of
        Pattern -> return ()
        _       -> mapM_ (\x -> hPutStrLn h $ show x) xs

-- | Lazily read a dense matrix from a file, returning the matrix shape and
-- its elements in column-major order.  The file is closed when the operation 
-- finishes.  If the field is given as 'Pattern', @Nothing@ is returned 
-- instead of an element list.
hGetMatrix :: (Read a) => 
    Handle -> Field -> MatrixType -> IO ((Int,Int), Maybe [a])
hGetMatrix h field attr = do
    (t,format,field',attr') <- hGetBanner h
    if         t == Matrix 
       && format == Array 
       && field' == field 
       && attr'  == Just attr
        then do
            (m,n)  <- hGetSize2 h
            ls <- B.hGetContents h >>= return . B.lines
            let xs = case field of
                         Pattern -> Nothing
                         _       -> Just $ map (readValue . dropLineComment) ls
            return ((m,n), xs)

        else 
            headerError Matrix Array field (Just attr)


------------------------------ Sparse Matrices -------------------------------

-- | Write a coordinate matrix with the given shape and size to a file.  The
-- indices are 1-based, so that A(1,1) is the first element of the matrix.
-- If the field is 'Pattern', only the indices are used.
hPutCoordMatrix :: (Show a) =>
       Handle -> Field -> MatrixType 
    -> (Int,Int) -> Int -> [((Int,Int), a)] -> IO ()
hPutCoordMatrix = flip hPutCoordMatrixWithDesc ""

-- | Write a coordinate matrix along with a description, which is put in the
-- comment section of the file.
hPutCoordMatrixWithDesc :: (Show a) => 
       Handle -> String -> Field -> MatrixType 
    -> (Int,Int) -> Int -> [((Int,Int), a)] -> IO ()
hPutCoordMatrixWithDesc h desc field attr (m,n) nz ijes = do
    hPutMatrixHeader h Coordinate field attr
    hPutComments     h desc
    hPutSize3        h m n nz

    let showCoord = case field of 
         Pattern -> \((i,j),_) -> show i ++ "\t" ++ show j
         _       -> \((i,j),x) -> show i ++ "\t" ++ show j ++ "\t" ++ show x

    mapM_ (hPutStrLn h . showCoord) ijes
                
-- | Lazily read a coordinate vector from a file.  The vector dimension, size,
-- and elements are returned.  The file is closed when the operation finishes.
-- If the field is 'Pattern', only the indices are returned.
hGetCoordMatrix :: (Read a) => 
       Handle -> Field -> MatrixType 
    -> IO ((Int,Int), Int, Either [(Int,Int)] [((Int,Int), a)])
hGetCoordMatrix h field attr = do
    (t, format, field', attr') <- hGetBanner h
    if         t == Matrix 
       && format == Coordinate 
       && field' == field 
       && attr'  == Just attr
        then do
            (m,n,nz) <- hGetSize3 h
            ls <- B.hGetContents h >>= return . B.lines
            
            let ixs = map parseLine ls
                ixs' = case field of
                           Pattern -> Left $ map fst ixs
                           _       -> Right ixs
                           
            return ((m,n), nz, ixs')
        else
            headerError Matrix Coordinate field (Just attr)

    where
        parseLine l =
            let (i,l')  = readInt $ dropLineComment l
                (j,l'') = readInt l'
                e       = case field of
                              Pattern -> undefined
                              _       -> readValue l''
            in ((i,j),e)


----------------------------- Helper Functions ------------------------------

prefixStr :: String
prefixStr = "%%MatrixMarket"

showMM :: (Show a) => a -> String
showMM a =
    let (c:cs) = show a
    in (toLower c):cs
    
readMM :: (Read a) => String -> a
readMM ~(c:cs) =
    read $ (toUpper c):cs
    
headerError :: Type -> Format -> Field -> Maybe MatrixType -> IO a
headerError t format field attr =
    ioError $ userError $
        "header does not match expected type `" 
        ++ showMM t ++ " " ++ showMM format ++ " " ++ showMM field ++ " "
        ++ fromMaybe "" (attr >>= return . showMM)
        ++ "'."

dropLineComment :: ByteString -> ByteString
dropLineComment =
    B.takeWhile (/= commentChar)

readInt :: ByteString -> (Int, ByteString)
readInt s =
    fromMaybe (error "could not parse integer")
        $ B.readInt (B.dropWhile isSpace s)
        
readValue :: (Read a) => ByteString -> a
readValue s = 
    read $ B.unpack (B.dropWhile isSpace s)