module Xlsx.Parse where
import Data.XML.Types
import Text.XML
import Text.XML.Stream.Parse
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Vector as V
import Data.Vector (Vector)
import Data.ByteString (ByteString)
import qualified Data.ByteString.Lazy as L
import Data.Conduit
import qualified Data.Conduit.List as CL
import Control.Applicative hiding (many)
import Control.Monad.Reader
import Control.Monad.State
import Control.Monad.Trans.Resource
import Data.Attoparsec.Text
import Data.Default
import Data.Char
import Xlsx.Types
parseCoord :: Text -> Maybe (Int, Int)
parseCoord t = go 0 $ map ord $ T.unpack t where
a = ord 'A'
z = ord 'Z'
zero = ord '0'
nine = ord '9'
go n [] = Nothing
go n cs@(c:cs') | c <= z && c >= a = go (n*26 + c a + 1) cs'
| n == 0 = Nothing
| otherwise = go' n 0 cs
go' m n [] = Just (n, m)
go' m n (c:cs) | c <= nine && c >= zero = go' m (n*10 + c zero) cs
| otherwise = Nothing
tagLocal n = tagPredicate (\Name {nameLocalName = l} -> n == l)
tagLocalNoAttr n c = tagLocal n ignoreAttrs $ const c
sharedStringSink :: (Monad m, MonadThrow m) => Sink ByteString m (Vector Text)
sharedStringSink = parseBytes def =$= (force "sst" $ tagLocal "sst" parseCount vecsink) where
parseCount = requireAttr "uniqueCount" <* ignoreAttrs
text = tagLocalNoAttr "t" $ content
rich = do
ts <- many $ tagLocalNoAttr "r" $ do
tagLocalNoAttr "rPr" skipPr
force "t" $ tagLocalNoAttr "t" $ content
case ts of
[] -> return Nothing
l -> return $ Just $ T.concat l
vecsink c = V.replicateM (read $ T.unpack c) $ force "si" $ tagLocalNoAttr "si" $
force "r or t" $ orE text rich
skipPr = do
mbevent <- CL.peek
case mbevent of
Just j@(EventEndElement n) | nameLocalName n == "rPr" -> return ()
Nothing -> monadThrow $ XmlException "no rPr closing tag" Nothing
_ -> CL.drop 1 >> skipPr
rawRows :: (Monad m, MonadThrow m) => ConduitM ByteString [((Int, Int), Cell)] m ()
rawRows = parseBytes def =$= skiptorows where
skiptorows = do
mbevent <- await
case mbevent of
Nothing -> monadThrow $ XmlException "sheetData expected" Nothing
Just j@(EventBeginElement n _) | nameLocalName n == "sheetData" -> rowsink
_ -> skiptorows
row = tagLocalNoAttr "row" $ many $ tagLocal "c" ((,) <$> requireAttr "r" <*> optionalAttr "t" <* ignoreAttrs) $ \(coord, mbt) -> do
coord' <- case parseCoord coord of
Nothing -> monadThrow $ XmlException ("invalid coordinate: " ++ T.unpack coord) Nothing
Just j -> return j
f <- tagLocalNoAttr "f" content
cellv <- case mbt of
Just typ | typ == "inlineStr" -> InlineString <$> (force "is" $ tagLocalNoAttr "is" $ force "t" $ tagLocalNoAttr "t" content)
| otherwise -> do
v <- tagLocalNoAttr "v" content
return $ case v of
Nothing -> Empty
Just j | typ == "str" -> InlineString j
| typ == "b" -> Boolean $ j == "1"
| typ == "s" -> SharedString $ read $ T.unpack j
| typ == "e" -> Error j
| otherwise -> error $ show typ
Nothing -> do
v <- tagLocalNoAttr "v" content
return $ case v of
Just j -> case parseOnly scientific j of
Left l -> error $ l
Right r -> Number r
_ -> Empty
return (coord', (cellv, f))
rowsink = do
mbrow <- row
case mbrow of
Just j -> yield j >> rowsink
Nothing -> return ()
unsparseSheet :: (Monad m, MonadThrow m) => ConduitM [((Int, Int), Cell)] [((Int, Int), Cell)] m ()
unsparseSheet = flip evalStateT 1 go where
go = do
mb <- lift await
case mb of
Nothing -> return ()
Just r -> case r of
[] -> lift $ yield []
cs@(((r,_),_):_) -> do
cur <- get
if r > cur then lift $ replicateM_ (r cur) (yield []) else return ()
lift $ yield cs
put $ r + 1
go
sheetRows :: (Applicative m, Monad m, MonadThrow m, FromRow a) => ConduitM ByteString a (ReaderT (Vector Text) m) ()
sheetRows = rawRows =$= unsparseSheet =$= CL.mapM fromRow