-----------------------------------------------------------------------------
-- |
-- Module     : System.IO.MatrixMarket
-- Copyright  : Copyright (c) 2008, Patrick Perry <patperry@stanford.edu>
-- License    : BSD3
-- Maintainer : Patrick Perry <patperry@stanford.edu>
-- Stability  : stable
--
-- Read and write matrices and vectors in the Matrix Market format
-- (see http:\/\/math.nist.gov\/MatrixMarket\/).
--
module System.IO.MatrixMarket (
    -- * Matrix and vector type descriptors
    Field(..),
    Format(..),
    Type(..),
    
    -- * Dense Vector I/O    
    hPutVector,
    hPutVectorWithDesc,
    hGetVector,
    
    -- * Sparse Vector I/O
    hPutCoordVector,
    hPutCoordVectorWithDesc,
    hGetCoordVector,
    
    -- * Dense Matrix I/O
    hPutMatrix,
    hPutMatrixWithDesc,
    hGetMatrix,
    
    -- * Sparse Matrix I/O    
    hPutCoordMatrix,
    hPutCoordMatrixWithDesc,
    hGetCoordMatrix,

    -- * Banner Functions
    showHeader,
    readHeader,
    
    hPutBanner,
    hGetBanner,

    hPutHeader,
    hGetHeader,

    commentChar,
    hPutComments,
    hGetComments,
    
    ) where

import Control.Monad   ( forM_ )
import Data.Char       ( toLower, toUpper, isSpace )
import Data.List       ( intercalate )
import Data.Maybe      ( fromMaybe )
import System.IO       ( Handle, hGetLine, hPutStr, hPutStrLn, hLookAhead )
import Text.Printf     ( printf )

import qualified Data.ByteString.Lazy.Char8 as B

type ByteString = B.ByteString

-- | Specifies the element type.  Pattern matrices do not have any 
-- elements, only indices, and only make sense for coordinate matrices and
-- vectors.
data Field  = Real | Complex | Integer | Pattern
    deriving (Eq, Read, Show)

-- | Specifies either sparse or dense storage.  In sparse (\"coordinate\") 
-- storage, elements are given in (i,j,x) triplets for matrices (or (i,x) 
-- for vectors).  Indices are 1-based, so that A(1,1) is the first element of
-- a matrix, and x(1) is the first element of a vector.
--
-- In dense (\"array\") storage, elements are given in column-major order.
--
-- In both cases, each element is given on a separate line.
data Format = Coordinate | Array 
    deriving (Eq, Read, Show)
    
-- | Specifies any special structure in the matrix.  For symmetric and 
-- hermition matrices, only the lower-triangular part of the matrix is given.
-- For skew matrices, only the entries below the diagonal are stored.
data Type = General | Symmetric | Hermitian | Skew 
    deriving (Eq, Read, Show)


----------------------------- Banner Functions ------------------------------

-- | Give the Matrix Market header string for the given matrix type.
showHeader :: Format -> Field -> Type -> String
showHeader format field t = intercalate " "
    $ [ prefixStr, showMM format, showMM field, showMM t ]

-- | Read a Matrix Market header from a string.
readHeader :: String -> (Format, Field, Type)
readHeader str =
    case words str of
        (mm:m:ws) ->
            if mm ++ " " ++ m == prefixStr
                then case ws of
                    [ format, field, t ] ->
                        (readMM format, readMM field, readMM t)
                    _ -> err
                else 
                    err
        _ -> err
        where
            err = error "could not parse Matrix Market header"

-- | Write out a header and comments for a Matrix Market matrix.
hPutBanner :: Handle -> Format -> Field -> Type -> String -> IO ()
hPutBanner h format field t str = do
    hPutHeader h format field t
    hPutComments     h str

-- | Read the Matrix Market banner (including comments) from a file.  The
-- comments are discarded and the banner information is returned.
hGetBanner :: Handle -> IO (Format, Field, Type)
hGetBanner h = do
    header <- hGetHeader h
    hGetComments h
    return header

-- | Write a Matrix Market header for a matrix to a file.
hPutHeader :: Handle -> Format -> Field -> Type -> IO ()
hPutHeader h format field t = 
    hPutStrLn h $ showHeader format field t

-- | Read the Matrix Market header from a file.
hGetHeader :: Handle -> IO (Format, Field, Type)
hGetHeader h = hGetLine h >>= return . readHeader


-- | The line-comment character (@%@).
commentChar :: Char
commentChar = '%'

-- | Write a string as a Matrix Market file comment.  This prepends each line
-- with \'%\' and then writes it out to the file.
hPutComments :: Handle -> String -> IO ()
hPutComments h cs =
    forM_ (lines cs) $ \l -> do
        hPutStr h $ [ commentChar, ' ' ]
        hPutStrLn h l

-- | Read the comments from a file, stripping the leading \'%\' from each
-- line, until reaching a line that does not start with the comment character.
hGetComments :: Handle -> IO (String)
hGetComments h = do
    c <- hLookAhead h
    if c == commentChar
        then do
            (_:l) <- hGetLine h
            ls    <- hGetComments h
            return (l ++ "\n" ++ ls)
        else
            return ""    


------------------------------ Size Functions -------------------------------

hPutSize2 :: Handle -> Int -> Int -> IO ()
hPutSize2 h m n =
    hPutStrLn h $ show m ++ "\t" ++ show n

hPutSize3 :: Handle -> Int -> Int -> Int -> IO ()
hPutSize3 h m n l =
    hPutStrLn h $ show m ++ "\t" ++ show n ++ "\t" ++ show l

hGetSize2 :: Handle -> IO (Int,Int)
hGetSize2 h = do
    l <- hGetLine h
    case (words l) of
        [ n1, n2 ] -> return (read n1, read n2)
        _ -> ioError $ userError "could not read sizes"

hGetSize3 :: Handle -> IO (Int,Int,Int)
hGetSize3 h = do
    l <- hGetLine h
    case (words l) of
        [ n1, n2, n3 ] -> return (read n1, read n2, read n3)
        _ -> ioError $ userError "could not read sizes"


------------------------------ Dense Vectors --------------------------------

-- | Write a dense vector with the given dimension and elements to a file.
-- If the field is given as 'Pattern', no elements are written, only the 
-- header and size.
hPutVector :: (Show a) => 
    Handle -> Field -> Int -> [a] -> IO ()
hPutVector = flip hPutVectorWithDesc ""

-- | Write a dense vector along with a description, which is put in the
-- comment section of the file.
hPutVectorWithDesc :: (Show a) => 
    Handle -> String -> Field -> Int -> [a] -> IO ()
hPutVectorWithDesc h desc field n xs = 
    hPutMatrixWithDesc h desc field General (n,1) xs

-- | Lazily read a dense vector from a file.  The vector dimension and 
-- elements are returned.  The file is closed when the operation finishes.
-- If the field is 'Pattern', the elements list will be @Nothing@.
hGetVector :: (Read a) => Handle -> Field -> IO (Int, Maybe [a])
hGetVector h field = do 
    ((m,n),xs) <- hGetMatrix h field General
    
    case (m,n) of
        (_,1) -> return (m,xs)
        (1,_) -> return (n,xs)
        _     -> 
            fail $ printf 
                ("expecting a vector but instead got a matrix with"
                 ++ " dimensions (%d,%d)") m n

------------------------------ Sparse Vectors -------------------------------

-- | Write a coordinate vector with the given dimension and size to a file.
-- The indices are 1-based, so that x(1) is the first element of the vector.
-- If the field is 'Pattern', only the indices are used.
hPutCoordVector :: (Show a) =>
    Handle -> Field -> Int -> Int -> [(Int, a)] -> IO ()
hPutCoordVector = flip hPutCoordVectorWithDesc ""

-- | Write a coordinate vector along with a description, which is put in the
-- comment section of the file.
hPutCoordVectorWithDesc :: (Show a) => 
    Handle -> String -> Field -> Int -> Int -> [(Int, a)] -> IO ()
hPutCoordVectorWithDesc h desc field n nz ixs = 
    let ijxs = map (\(i,x) -> ((i,1),x)) ixs
    in hPutCoordMatrixWithDesc h desc field General (n,1) nz ijxs
                
-- | Lazily read a coordinate vector from a file.  The vector dimension, size,
-- and elements are returned.  The file is closed when the operation finishes.
-- If the field is given as 'Pattern', only a list of indices is returned.
hGetCoordVector :: (Read a) => 
    Handle -> Field -> IO (Int, Int, Either [Int] [(Int, a)])
hGetCoordVector h field = do
    ((m,n), nz, ijxs') <- hGetCoordMatrix h field General
    case (m,n) of
        (_,1) -> 
            let ixs = either (Left . fst . unzip ) 
                             (\ijxs -> let (ijs,xs) = unzip ijxs
                                           is       = (fst . unzip) ijs
                                       in Right $ zip is xs)
                             ijxs'
            in return (m, nz, ixs)
        (1,_) -> 
            let jxs = either (Left . snd . unzip ) 
                             (\ijxs -> let (ijs,xs) = unzip ijxs
                                           js       = (snd . unzip) ijs
                                       in Right $ zip js xs)
                             ijxs'
            in return (n, nz, jxs)
        _     -> 
            fail $ printf 
                ("expecting a vector but instead got a matrix with"
                 ++ " dimensions (%d,%d)") m n

------------------------------ Dense Matrices -------------------------------

-- | Write a dense matrix with the given shape and elements in column-major
-- order to a file. If the field is given as 'Pattern', no elements are 
-- written, only the header and size.
hPutMatrix :: (Show a) => 
    Handle -> Field -> Type -> (Int,Int) -> [a] -> IO ()
hPutMatrix = flip hPutMatrixWithDesc ""

-- | Write a dense matrix along with a description, which is put in the
-- comment section of the file.
hPutMatrixWithDesc :: (Show a) => 
    Handle -> String -> Field -> Type -> (Int,Int) -> [a] -> IO ()
hPutMatrixWithDesc h desc field t (m,n) xs = do
    hPutHeader   h Array field t
    hPutComments h desc
    hPutSize2    h m n
    
    case field of
        Pattern -> return ()
        _       -> mapM_ (\x -> hPutStrLn h $ show x) xs

-- | Lazily read a dense matrix from a file, returning the matrix shape and
-- its elements in column-major order.  The file is closed when the operation 
-- finishes.  If the field is given as 'Pattern', @Nothing@ is returned 
-- instead of an element list.
hGetMatrix :: (Read a) => 
    Handle -> Field -> Type -> IO ((Int,Int), Maybe [a])
hGetMatrix h field t = do
    (format,field',t') <- hGetBanner h
    if    format == Array      
       && field' == field 
       && t'     == t
        then do
            (m,n)  <- hGetSize2 h
            ls <- B.hGetContents h >>= return . B.lines
            let xs = case field of
                         Pattern -> Nothing
                         _       -> Just $ map (readValue . dropLineComment) ls
            return ((m,n), xs)

        else 
            headerError Array field t


------------------------------ Sparse Matrices -------------------------------

-- | Write a coordinate matrix with the given shape and size to a file.  The
-- indices are 1-based, so that A(1,1) is the first element of the matrix.
-- If the field is 'Pattern', only the indices are used.
hPutCoordMatrix :: (Show a) =>
       Handle -> Field -> Type 
    -> (Int,Int) -> Int -> [((Int,Int), a)] -> IO ()
hPutCoordMatrix = flip hPutCoordMatrixWithDesc ""

-- | Write a coordinate matrix along with a description, which is put in the
-- comment section of the file.
hPutCoordMatrixWithDesc :: (Show a) => 
       Handle -> String -> Field -> Type 
    -> (Int,Int) -> Int -> [((Int,Int), a)] -> IO ()
hPutCoordMatrixWithDesc h desc field t (m,n) nz ijes = do
    hPutHeader   h Coordinate field t
    hPutComments h desc
    hPutSize3    h m n nz

    let showCoord = case field of 
         Pattern -> \((i,j),_) -> show i ++ "\t" ++ show j
         _       -> \((i,j),x) -> show i ++ "\t" ++ show j ++ "\t" ++ show x

    mapM_ (hPutStrLn h . showCoord) ijes
                
-- | Lazily read a coordinate vector from a file.  The vector dimension, size,
-- and elements are returned.  The file is closed when the operation finishes.
-- If the field is 'Pattern', only the indices are returned.
hGetCoordMatrix :: (Read a) => 
       Handle -> Field -> Type 
    -> IO ((Int,Int), Int, Either [(Int,Int)] [((Int,Int), a)])
hGetCoordMatrix h field t = do
    (format, field', t') <- hGetBanner h
    if    format == Coordinate 
       && field' == field 
       && t'     == t
        then do
            (m,n,nz) <- hGetSize3 h
            ls <- B.hGetContents h >>= return . B.lines
            
            let ixs = map parseLine ls
                ixs' = case field of
                           Pattern -> Left $ map fst ixs
                           _       -> Right ixs
                           
            return ((m,n), nz, ixs')
        else
            headerError Coordinate field t

    where
        parseLine l =
            let (i,l')  = readInt $ dropLineComment l
                (j,l'') = readInt l'
                e       = case field of
                              Pattern -> undefined
                              _       -> readValue l''
            in ((i,j),e)


----------------------------- Helper Functions ------------------------------

prefixStr :: String
prefixStr = "%%MatrixMarket matrix"

showMM :: (Show a) => a -> String
showMM a =
    let (c:cs) = show a
    in (toLower c):cs
    
readMM :: (Read a) => String -> a
readMM ~(c:cs) =
    read $ (toUpper c):cs
    
headerError :: Format -> Field -> Type -> IO a
headerError format field t =
    ioError $ userError $ printf
        "header does not match expected type: `%s %s %s'" 
        (showMM format) 
        (showMM field)
        (showMM t)

dropLineComment :: ByteString -> ByteString
dropLineComment =
    B.takeWhile (/= commentChar)

readInt :: ByteString -> (Int, ByteString)
readInt s =
    fromMaybe (error "could not parse integer")
        $ B.readInt (B.dropWhile isSpace s)
        
readValue :: (Read a) => ByteString -> a
readValue s = 
    read $ B.unpack (B.dropWhile isSpace s)