-- | -- Module : Data.Xls -- Copyright : (c) 2016 Harendra Kumar -- -- License : BSD-style -- Maintainer : harendra.kumar@gmail.com -- Stability : experimental -- Portability : GHC -- -- Parse Microsoft excel spreadsheet xls file (format BIFF/Excel 97-2004). -- {-# OPTIONS_GHC -pgmP gcc -optP -E -optP -undef -optP -std=c89 #-} {-# LANGUAGE CPP #-} {-# LANGUAGE EmptyDataDecls #-} {-# LANGUAGE ForeignFunctionInterface #-} {-# LANGUAGE RankNTypes #-} #if __GLASGOW_HASKELL__ < 7100 {-# LANGUAGE DeriveDataTypeable #-} #endif module Data.Xls ( decodeXlsIO , decodeXls , XlsException(..) ) where import Control.Exception (Exception, throwIO, bracket) import Control.Monad.IO.Class import Control.Monad (when, void) import Control.Monad.Trans.Resource import Data.Conduit hiding (Conduit, Sink, Source) import Data.Data import Data.Int import Data.Maybe (catMaybes, fromJust, isJust, fromMaybe) import Foreign.C import Foreign.Ptr import Text.Printf #define CCALL(name,signature) \ foreign import ccall unsafe #name \ c_##name :: signature -- Workbook accessor functions data XLSWorkbookStruct type XLSWorkbook = Ptr XLSWorkbookStruct CCALL(xls_open, CString -> CString -> IO XLSWorkbook) CCALL(xls_wb_sheetcount, XLSWorkbook -> IO CInt -- Int32) CCALL(xls_close_WB, XLSWorkbook -> IO ()) -- Worksheet accessor functions data XLSWorksheetStruct type XLSWorksheet = Ptr XLSWorksheetStruct CCALL(xls_getWorkSheet, XLSWorkbook -> CInt -> IO XLSWorksheet) CCALL(xls_parseWorkSheet, XLSWorksheet -> IO ()) CCALL(xls_ws_rowcount, XLSWorksheet -> IO Int16 -- Int16) CCALL(xls_ws_colcount, XLSWorksheet -> IO Int16 -- Int16) CCALL(xls_close_WS, XLSWorksheet -> IO ()) -- Cell accessor functions data XLSCellStruct type XLSCell = Ptr XLSCellStruct CCALL(xls_cell, XLSWorksheet -> Int16 -> Int16 -> IO XLSCell) CCALL(xls_cell_type, XLSCell -> IO Int16 -- Int16) CCALL(xls_cell_strval, XLSCell -> IO CString) CCALL(xls_cell_formulatype, XLSCell -> IO Int32 -- Int32) CCALL(xls_cell_numval, XLSCell -> IO CDouble) -- CCALL(xls_cell_colspan, XLSCell -> IO Int16 -- Int16) -- CCALL(xls_cell_rowspan, XLSCell -> IO Int16 -- Int16) CCALL(xls_cell_hidden, XLSCell -> IO Int8 -- Int8) data XlsException = XlsFileNotFound String | XlsParseError String deriving (Show, Typeable) instance Exception XlsException -- | Parse a Microsoft excel xls workbook file into a Conduit yielding -- rows in a worksheet. Each row represented by a list of Strings, each String -- representing an individual cell. -- -- Important Note: This API concatenates rows from all worksheets into a single -- stream. Please use the non-streaming 'decodeXlsIO' API to get individual -- worksheets. -- -- Throws 'XlsException' -- decodeXls :: MonadResource m => FilePath -> ConduitM i [String] m () decodeXls file = bracketP alloc cleanup decodeWorkSheets where alloc = do file' <- newCString file pWB <- newCString "UTF-8" >>= c_xls_open file' if pWB == nullPtr then throwIO $ XlsFileNotFound $ "XLS file " ++ file ++ " not found." else return pWB cleanup = c_xls_close_WB decodeWorkSheets pWB = do count <- liftIO $ c_xls_wb_sheetcount pWB mapM_ (decodeOneWorkSheet file pWB) [0 .. count - 1] -- | Parse a Microsoft excel xls workbook file into a list of worksheets, each -- worksheet consists of a list of rows and each row consists of a list of -- cells. Cells are plain 'String'. -- -- Throws 'XlsException' -- decodeXlsIO :: FilePath -> IO [[[String]]] decodeXlsIO file = do file' <- newCString file pWB <- newCString "UTF-8" >>= c_xls_open file' when (pWB == nullPtr) $ throwIO $ XlsFileNotFound $ "XLS file " ++ file ++ " not found." count <- liftIO $ c_xls_wb_sheetcount pWB results <- mapM (decodeOneWorkSheetIO file pWB) [0 .. count - 1] void $ c_xls_close_WB pWB return results decodeOneWorkSheet :: MonadResource m => FilePath -> XLSWorkbook -> CInt -> ConduitM i [String] m () decodeOneWorkSheet file pWB index = bracketP alloc cleanup decodeWS where alloc = do pWS <- c_xls_getWorkSheet pWB index if pWS == nullPtr then throwIO $ XlsParseError $ "XLS file " ++ file ++ " could not be parsed." else do c_xls_parseWorkSheet pWS return pWS cleanup = c_xls_close_WS decodeWS = decodeRows decodeOneWorkSheetIO :: FilePath -> XLSWorkbook -> CInt -> IO [[String]] decodeOneWorkSheetIO file pWB index = bracket alloc cleanup decodeRowsIO where alloc = do pWS <- c_xls_getWorkSheet pWB index if pWS == nullPtr then throwIO $ XlsParseError $ "XLS file " ++ file ++ " could not be parsed." else do c_xls_parseWorkSheet pWS return pWS cleanup = c_xls_close_WS decodeRows :: MonadResource m => XLSWorksheet -> ConduitM i [String] m () decodeRows pWS = do rows <- liftIO $ c_xls_ws_rowcount pWS cols <- liftIO $ c_xls_ws_colcount pWS mapM_ (decodeOneRow pWS cols) [r | r <- [0 .. rows - 1]] decodeRowsIO :: XLSWorksheet -> IO [[String]] decodeRowsIO pWS = do rows <- c_xls_ws_rowcount pWS cols <- c_xls_ws_colcount pWS mapM (decodeOneRowIO pWS cols) [r | r <- [0 .. rows - 1]] decodeOneRow :: MonadResource m => XLSWorksheet -> Int16 -> Int16 -> ConduitM i [String] m () decodeOneRow pWS cols rowindex = mapM (liftIO . (c_xls_cell pWS rowindex)) [0 .. cols - 1] >>= mapM (liftIO . decodeOneCell) >>= yield . catMaybes decodeOneRowIO :: XLSWorksheet -> Int16 -> Int16 -> IO [String] decodeOneRowIO pWS cols rowindex = mapM (c_xls_cell pWS rowindex) [0 .. cols - 1] >>= mapM decodeOneCell >>= pure . (map $ fromMaybe "") data CellType = Numerical | Formula | Str | Other decodeOneCell :: XLSCell -> IO (Maybe String) decodeOneCell cellPtr = do nil <- isNullCell cellPtr if nil then return Nothing else cellValue cellPtr >>= return . Just where isNullCell ptr = if ptr == nullPtr then return True else do hidden <- c_xls_cell_hidden ptr if hidden /= 0 then return True else return False cellValue ptr = do typ <- c_xls_cell_type ptr numval <- c_xls_cell_numval ptr ftype <- c_xls_cell_formulatype ptr --rowspan <- c_xls_cell_rowspan ptr --colspan <- c_xls_cell_colspan ptr pStr <- c_xls_cell_strval ptr strval <- if pStr /= nullPtr then peekCString pStr >>= return . Just else return Nothing return $ case cellType typ ftype strval of Numerical -> outputNum numval Formula -> decodeFormula strval numval Str -> fromJust strval Other -> "" -- we don't decode anything else decodeFormula str numval = case str of Just "bool" -> outputBool numval Just "error" -> "*error*" Just x -> x Nothing -> "" -- is it possible? outputNum d = printf "%.15g" (uncurry encodeFloat (decodeFloat d) :: Double) outputBool d = if d == 0 then "false" else "true" cellType t ftype strval = if t == 0x27e || t == 0x0BD || t == 0x203 then Numerical else if t == 0x06 then if ftype == 0 then Numerical else Formula else if isJust strval then Str else Other