{-# LANGUAGE BangPatterns #-}
{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE MultiParamTypeClasses #-}
{-# LANGUAGE MultiWayIf #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE TupleSections #-}
module Codec.Xlsx.Parser.Internal.Fast
( FromXenoNode(..)
, collectChildren
, maybeChild
, requireChild
, childList
, maybeFromChild
, fromChild
, fromChildList
, maybeParse
, requireAndParse
, childListAny
, maybeElementVal
, toAttrParser
, parseAttributes
, FromAttrBs(..)
, unexpectedAttrBs
, maybeAttrBs
, maybeAttr
, fromAttr
, fromAttrDef
, contentBs
, contentX
, nsPrefixes
, addPrefix
) where
import Control.Applicative
import Control.Arrow (second)
import Control.Exception (Exception, throw)
import Control.Monad (ap, forM, join, liftM)
import Data.Bifunctor (first)
import Data.Bits ((.|.), shiftL)
import Data.ByteString (ByteString)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Unsafe as SU
import Data.Char (chr)
import Data.Maybe
import Data.Monoid ((<>))
import Data.Text (Text)
import qualified Data.Text as T
import qualified Data.Text.Encoding as T
import Data.Word (Word8)
import Xeno.DOM hiding (parse)
import Codec.Xlsx.Parser.Internal.Util
class FromXenoNode a where
fromXenoNode :: Node -> Either Text a
newtype ChildCollector a = ChildCollector
{ runChildCollector :: [Node] -> Either Text ([Node], a)
}
instance Functor ChildCollector where
fmap f a = ChildCollector $ \ns ->
second f <$> runChildCollector a ns
instance Applicative ChildCollector where
pure a = ChildCollector $ \ns ->
return (ns, a)
cf <*> ca = ChildCollector $ \ns -> do
(ns', f) <- runChildCollector cf ns
(ns'', a) <- runChildCollector ca ns'
return (ns'', f a)
instance Alternative ChildCollector where
empty = ChildCollector $ \_ -> Left "ChildCollector.empty"
ChildCollector f <|> ChildCollector g = ChildCollector $ \ns ->
either (const $ g ns) Right (f ns)
instance Monad ChildCollector where
return = pure
ChildCollector f >>= g = ChildCollector $ \ns ->
either Left (\(!ns', f') -> runChildCollector (g f') ns') (f ns)
toChildCollector :: Either Text a -> ChildCollector a
toChildCollector unlifted =
case unlifted of
Right a -> return a
Left e -> ChildCollector $ \_ -> Left e
collectChildren :: Node -> ChildCollector a -> Either Text a
collectChildren n c = snd <$> runChildCollector c (children n)
maybeChild :: ByteString -> ChildCollector (Maybe Node)
maybeChild nm =
ChildCollector $ \case
(n:ns)
| name n == nm -> pure (ns, Just n)
ns -> pure (ns, Nothing)
requireChild :: ByteString -> ChildCollector Node
requireChild nm =
ChildCollector $ \case
(n:ns)
| name n == nm -> pure (ns, n)
_ ->
Left $ "required element " <> T.pack (show nm) <> " was not found"
childList :: ByteString -> ChildCollector [Node]
childList nm = do
mNode <- maybeChild nm
case mNode of
Just n -> (n:) <$> childList nm
Nothing -> return []
maybeFromChild :: (FromXenoNode a) => ByteString -> ChildCollector (Maybe a)
maybeFromChild nm = do
mNode <- maybeChild nm
mapM (toChildCollector . fromXenoNode) mNode
fromChild :: (FromXenoNode a) => ByteString -> ChildCollector a
fromChild nm = do
n <- requireChild nm
case fromXenoNode n of
Right a -> return a
Left e -> ChildCollector $ \_ -> Left e
fromChildList :: (FromXenoNode a) => ByteString -> ChildCollector [a]
fromChildList nm = do
mA <- maybeFromChild nm
case mA of
Just a -> (a:) <$> fromChildList nm
Nothing -> return []
maybeParse :: ByteString -> (Node -> Either Text a) -> ChildCollector (Maybe a)
maybeParse nm parse = maybeChild nm >>= (toChildCollector . mapM parse)
requireAndParse :: ByteString -> (Node -> Either Text a) -> ChildCollector a
requireAndParse nm parse = requireChild nm >>= (toChildCollector . parse)
childListAny :: (FromXenoNode a) => Node -> Either Text [a]
childListAny = mapM fromXenoNode . children
maybeElementVal :: (FromAttrBs a) => ByteString -> ChildCollector (Maybe a)
maybeElementVal nm = do
mN <- maybeChild nm
fmap join . forM mN $ \n ->
toChildCollector . parseAttributes n $ maybeAttr "val"
newtype AttrParser a = AttrParser
{ runAttrParser :: [(ByteString, ByteString)] -> Either Text ( [( ByteString
, ByteString)]
, a)
}
instance Monad AttrParser where
return a = AttrParser $ \as -> Right (as, a)
(AttrParser f) >>= g =
AttrParser $ \as ->
either Left (\(as', f') -> runAttrParser (g f') as') (f as)
instance Applicative AttrParser where
pure = return
(<*>) = ap
instance Functor AttrParser where
fmap = liftM
attrError :: Text -> AttrParser a
attrError err = AttrParser $ \_ -> Left err
toAttrParser :: Either Text a -> AttrParser a
toAttrParser unlifted =
case unlifted of
Right a -> return a
Left e -> AttrParser $ \_ -> Left e
maybeAttrBs :: ByteString -> AttrParser (Maybe ByteString)
maybeAttrBs attrName = AttrParser $ go id
where
go front [] = Right (front [], Nothing)
go front (a@(nm, val):as) =
if nm == attrName
then Right (front as, Just val)
else go (front . (:) a) as
requireAttrBs :: ByteString -> AttrParser ByteString
requireAttrBs nm = do
mVal <- maybeAttrBs nm
case mVal of
Just val -> return val
Nothing -> attrError $ "attribute " <> T.pack (show nm) <> " is required"
unexpectedAttrBs :: Text -> ByteString -> Either Text a
unexpectedAttrBs typ val =
Left $ "Unexpected value for " <> typ <> ": " <> T.pack (show val)
fromAttr :: FromAttrBs a => ByteString -> AttrParser a
fromAttr nm = do
bs <- requireAttrBs nm
toAttrParser $ fromAttrBs bs
maybeAttr :: FromAttrBs a => ByteString -> AttrParser (Maybe a)
maybeAttr nm = do
mBs <- maybeAttrBs nm
forM mBs (toAttrParser . fromAttrBs)
fromAttrDef :: FromAttrBs a => ByteString -> a -> AttrParser a
fromAttrDef nm defVal = fromMaybe defVal <$> maybeAttr nm
parseAttributes :: Node -> AttrParser a -> Either Text a
parseAttributes n attrParser =
case runAttrParser attrParser (attributes n) of
Left e -> Left e
Right (_, a) -> return a
class FromAttrBs a where
fromAttrBs :: ByteString -> Either Text a
instance FromAttrBs ByteString where
fromAttrBs = pure
instance FromAttrBs Bool where
fromAttrBs x | x == "1" || x == "true" = return True
| x == "0" || x == "false" = return False
| otherwise = unexpectedAttrBs "boolean" x
instance FromAttrBs Int where
fromAttrBs = first T.pack . eitherDecimal . T.decodeLatin1
instance FromAttrBs Double where
fromAttrBs = first T.pack . eitherRational . T.decodeLatin1
instance FromAttrBs Text where
fromAttrBs = replaceEntititesBs
replaceEntititesBs :: ByteString -> Either Text Text
replaceEntititesBs str =
T.decodeUtf8 . BS.concat <$> findAmp 0
where
findAmp :: Int -> Either Text [ByteString]
findAmp index =
case elemIndexFrom ampersand str index of
Nothing -> if BS.null text then return [] else return [text]
where text = BS.drop index str
Just fromAmp ->
if BS.null text
then checkEntity fromAmp
else (text:) <$> checkEntity fromAmp
where text = substring str index fromAmp
checkEntity index =
case elemIndexFrom semicolon str index of
Just fromSemi | fromSemi >= index + 3 -> do
entity <- checkElementVal (index + 1) (fromSemi - index - 1)
(BS.singleton entity:) <$> findAmp (fromSemi + 1)
_ -> Left "Unending entity"
checkElementVal index len =
if | len == 2
&& s_index this 0 == 108
&& s_index this 1 == 116
-> return 60
| len == 2
&& s_index this 0 == 103
&& s_index this 1 == 116
-> return 62
| len == 3
&& s_index this 0 == 97
&& s_index this 1 == 109
&& s_index this 2 == 112
-> return 38
| len == 4
&& s_index this 0 == 113
&& s_index this 1 == 117
&& s_index this 2 == 111
&& s_index this 3 == 116
-> return 34
| len == 4
&& s_index this 0 == 97
&& s_index this 1 == 112
&& s_index this 2 == 111
&& s_index this 3 == 115
-> return 39
| s_index this 0 == 35
->
if s_index this 1 == 120
then toEnum <$> checkHexadecimal (index + 2) (len - 2)
else toEnum <$> checkDecimal (index + 1) (len - 1)
| otherwise -> Left $ "Bad entity " <> T.pack (show $ (substring str (index-1) (index+len+1)))
where
this = BS.drop index str
checkDecimal index len = BS.foldl' go (Right 0) (substring str index (index + len))
where
go :: Either Text Int -> Word8 -> Either Text Int
go prev c = do
a <- prev
if c >= 48 && c <= 57
then return $ a * 10 + fromIntegral (c - 48)
else Left $ "Expected decimal digit but encountered " <> T.pack (show (chr $ fromIntegral c))
checkHexadecimal index len = BS.foldl' go (Right 0) (substring str index (index + len))
where
go :: Either Text Int -> Word8 -> Either Text Int
go prev c = do
a <- prev
if | c >= 48 && c <= 57
-> return $ (a `shiftL` 4) .|. fromIntegral (c - 48)
| c >= 97 && c <= 122
-> return $ (a `shiftL` 4) .|. fromIntegral (c - 87)
| c >= 65 && c <= 90
-> return $ (a `shiftL` 4) .|. fromIntegral (c - 55)
| otherwise
->
Left $ "Expected hexadecimal digit but encountered " <> T.pack (show (chr $ fromIntegral c))
ampersand = 38
semicolon = 59
data EntityReplaceException = EntityReplaceException deriving Show
instance Exception EntityReplaceException
s_index :: ByteString -> Int -> Word8
s_index ps n
| n < 0 = throw EntityReplaceException
| n >= BS.length ps = throw EntityReplaceException
| otherwise = ps `SU.unsafeIndex` n
{-# INLINE s_index #-}
elemIndexFrom :: Word8 -> ByteString -> Int -> Maybe Int
elemIndexFrom c str offset = fmap (+ offset) (BS.elemIndex c (BS.drop offset str))
{-# INLINE elemIndexFrom #-}
substring :: ByteString -> Int -> Int -> ByteString
substring s start end = BS.take (end - start) (BS.drop start s)
{-# INLINE substring #-}
newtype NsPrefixes = NsPrefixes [(ByteString, ByteString)]
nsPrefixes :: Node -> NsPrefixes
nsPrefixes root =
NsPrefixes . flip mapMaybe (attributes root) $ \(nm, val) ->
(val, ) <$> BS.stripPrefix "xmlns:" nm
addPrefix :: NsPrefixes -> ByteString -> (ByteString -> ByteString)
addPrefix (NsPrefixes prefixes) ns =
maybe id (\prefix nm -> BS.concat [prefix, ":", nm]) $ Prelude.lookup ns prefixes
contentBs :: Node -> ByteString
contentBs n = BS.concat . map toBs $ contents n
where
toBs (Element _) = BS.empty
toBs (Text bs) = bs
toBs (CData bs) = bs
contentX :: Node -> Either Text Text
contentX = replaceEntititesBs . contentBs