module Codec.Xlsx.Parser
( toXlsx
) where
import qualified Codec.Archive.Zip as Zip
import Control.Applicative
import Control.Arrow ((&&&))
import Control.Monad.IO.Class ()
import qualified Data.ByteString.Lazy as L
import Data.ByteString.Lazy.Char8 ()
import Data.List
import qualified Data.Map as M
import Data.Maybe
import Data.Ord
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Read as T
import Prelude hiding (sequence)
import Safe
import System.FilePath.Posix
import Text.XML as X
import Text.XML.Cursor
import Codec.Xlsx.Parser.Internal
import Codec.Xlsx.Types
import Codec.Xlsx.Types.Internal
import Codec.Xlsx.Types.Internal.Relationships as Relationships
import Codec.Xlsx.Types.Internal.SharedStringTable
import Codec.Xlsx.Types.Internal.CustomProperties
import Codec.Xlsx.Types.Internal.CommentTable
import Codec.Xlsx.Types.Internal.CustomProperties as CustomProperties
toXlsx :: L.ByteString -> Xlsx
toXlsx bs = Xlsx sheets styles names customPropMap
where
ar = Zip.toArchive bs
sst = getSharedStrings ar
styles = getStyles ar
(wfs, names) = readWorkbook ar
sheets = M.fromList $ map (wfName &&& extractSheet ar sst) wfs
CustomProperties customPropMap = getCustomProperties ar
data WorksheetFile = WorksheetFile { wfName :: Text
, wfPath :: FilePath
}
deriving Show
extractSheet :: Zip.Archive
-> SharedStringTable
-> WorksheetFile
-> Worksheet
extractSheet ar sst wf = Worksheet cws rowProps cells merges sheetViews pageSetup
where
file = fromJust $ Zip.fromEntry <$> Zip.findEntryByPath (wfPath wf) ar
cur = case parseLBS def file of
Left _ -> error "could not read file"
Right d -> fromDocument d
sheetViewList = cur $/ element (n"sheetViews") &/ element (n"sheetView") >=> fromCursor
sheetViews = case sheetViewList of
[] -> Nothing
views -> Just views
commentsMap = getComments ar . relTarget =<< findRelByType commentsType sheetRels
commentsType = "http://schemas.openxmlformats.org/officeDocument/2006/relationships/comments"
sheetRels = getRels ar (wfPath wf)
pageSetup = listToMaybe $ cur $/ element (n"pageSetup") >=> fromCursor
cws = cur $/ element (n"cols") &/ element (n"col") >=> fromCursor
(rowProps, cells) = collect $ cur $/ element (n"sheetData") &/ element (n"row") >=> parseRow
parseRow c = do
r <- c $| attribute "r" >=> decimal
let ht = if attribute "customHeight" c == ["true"]
then listToMaybe $ c $| attribute "ht" >=> rational
else Nothing
let s = if attribute "s" c /= []
then listToMaybe $ c $| attribute "s" >=> decimal
else Nothing
let rp = if isNothing s && isNothing ht
then Nothing
else Just (RowProps ht s)
return (r, rp, c $/ element (n"c") >=> parseCell)
parseCell :: Cursor -> [(Int, Int, Cell)]
parseCell cell = do
ref <- cell $| attribute "r"
let
s = listToMaybe $ cell $| attribute "s" >=> decimal
t = fromMaybe "n" $ listToMaybe $ cell $| attribute "t"
d = listToMaybe $ cell $/ element (n"v") &/ content >=> extractCellValue sst t
(c, r) = T.span (>'9') ref
comment = commentsMap >>= lookupComment ref
return (int r, col2int c, Cell s d comment)
collect = foldr collectRow (M.empty, M.empty)
collectRow (_, Nothing, rowCells) (rowMap, cellMap) =
(rowMap, foldr collectCell cellMap rowCells)
collectRow (r, Just h, rowCells) (rowMap, cellMap) =
(M.insert r h rowMap, foldr collectCell cellMap rowCells)
collectCell (x, y, cd) = M.insert (x,y) cd
merges = cur $/ parseMerges
parseMerges :: Cursor -> [Text]
parseMerges = element (n"mergeCells") &/ element (n"mergeCell") >=> parseMerge
parseMerge c = c $| attribute "ref"
extractCellValue :: SharedStringTable -> Text -> Text -> [CellValue]
extractCellValue sst "s" v =
case T.decimal v of
Right (d, _) ->
case sstItem sst d of
XlsxText txt -> [CellText txt]
XlsxRichText rich -> [CellRich rich]
_ ->
[]
extractCellValue _ "str" str = [CellText str]
extractCellValue _ "n" v =
case T.rational v of
Right (d, _) -> [CellDouble d]
_ -> []
extractCellValue _ "b" "1" = [CellBool True]
extractCellValue _ "b" "0" = [CellBool False]
extractCellValue _ _ _ = []
xmlCursor :: Zip.Archive -> FilePath -> Maybe Cursor
xmlCursor ar fname = parse <$> Zip.findEntryByPath fname ar
where
parse entry = case parseLBS def (Zip.fromEntry entry) of
Left _ -> error "could not read file"
Right d -> fromDocument d
getSharedStrings :: Zip.Archive -> SharedStringTable
getSharedStrings x = case xmlCursor x "xl/sharedStrings.xml" of
Nothing ->
error "invalid shared strings"
Just c ->
let [sst] = fromCursor c in sst
getStyles :: Zip.Archive -> Styles
getStyles ar = case Zip.fromEntry <$> Zip.findEntryByPath "xl/styles.xml" ar of
Nothing -> Styles L.empty
Just xml -> Styles xml
getComments :: Zip.Archive -> FilePath -> Maybe CommentTable
getComments ar fp = listToMaybe =<< fromCursor <$> xmlCursor ar fp
getCustomProperties :: Zip.Archive -> CustomProperties
getCustomProperties ar = case fromCursor <$> xmlCursor ar "docProps/custom.xml" of
Just [cp] -> cp
_ -> CustomProperties.empty
readWorkbook :: Zip.Archive -> ([WorksheetFile], DefinedNames)
readWorkbook ar = case xmlCursor ar wbPath of
Nothing ->
error "invalid workbook"
Just c ->
let
sheets = c $/ element (n"sheets") &/ element (n"sheet") >=>
liftA2 (worksheetFile wbRels) <$> attribute "name" <*> (attribute (odr"id") &| RefId)
wbRels = getRels ar wbPath
names = c $/ element (n"definedNames") &/ element (n"definedName") >=> mkDefinedName
in (sheets, DefinedNames names)
where
wbPath = "xl/workbook.xml"
mkDefinedName :: Cursor -> [(Text, Maybe Text, Text)]
mkDefinedName c = return ( head $ attribute "name" c
, listToMaybe $ attribute "localSheetId" c
, T.concat $ c $/ content
)
worksheetFile :: Relationships -> Text -> RefId -> WorksheetFile
worksheetFile wbRels name rId = WorksheetFile name path
where
path = relTarget . fromJustNote "sheet path" $ Relationships.lookup rId wbRels
getRels :: Zip.Archive -> FilePath -> Relationships
getRels ar fp =
let (dir, file) = splitFileName fp
relsPath = dir </> "_rels" </> file <.> "rels"
in case xmlCursor ar relsPath of
Nothing ->
Relationships.empty
Just c ->
let [rels] = fromCursor c in setTargetsFrom fp rels
int :: Text -> Int
int = either error fst . T.decimal