{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards   #-}
{-# LANGUAGE DeriveGeneric #-}
module Codec.Xlsx.Types.Internal.SharedStringTable (
    -- * Main types
    SharedStringTable(..)
  , sstConstruct
  , sstLookupText
  , sstLookupRich
  , sstItem
  , sstEmpty
  ) where

import Control.Monad
import qualified Data.Map as Map
import Data.Maybe (mapMaybe)
import qualified Data.Set as Set
import Data.Text (Text)
import Data.Vector (Vector)
import qualified Data.Vector as V
import GHC.Generics (Generic)
import Numeric.Search.Range (searchFromTo)
import Safe (fromJustNote)
import Text.XML
import Text.XML.Cursor

import Codec.Xlsx.Parser.Internal
import Codec.Xlsx.Types
import Codec.Xlsx.Writer.Internal

-- | Shared string table
--
-- A workbook can contain thousands of cells containing string (non-numeric)
-- data. Furthermore this data is very likely to be repeated across many rows or
-- columns. The goal of implementing a single string table that is shared across
-- the workbook is to improve performance in opening and saving the file by only
-- reading and writing the repetitive information once.
--
-- Relevant parts of the EMCA standard (2nd edition, part 1,
-- <http://www.ecma-international.org/publications/standards/Ecma-376.htm>),
-- page numbers refer to the page in the PDF rather than the page number as
-- printed on the page):
--
-- * Section 18.4, "Shared String Table" (p. 1712)
--   in particular subsection 18.4.9, "sst (Shared String Table)" (p. 1726)
--
-- TODO: The @extLst@ child element is currently unsupported.
newtype SharedStringTable = SharedStringTable {
    SharedStringTable -> Vector XlsxText
sstTable :: Vector XlsxText
  }
  deriving (SharedStringTable -> SharedStringTable -> Bool
(SharedStringTable -> SharedStringTable -> Bool)
-> (SharedStringTable -> SharedStringTable -> Bool)
-> Eq SharedStringTable
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SharedStringTable -> SharedStringTable -> Bool
$c/= :: SharedStringTable -> SharedStringTable -> Bool
== :: SharedStringTable -> SharedStringTable -> Bool
$c== :: SharedStringTable -> SharedStringTable -> Bool
Eq, Eq SharedStringTable
Eq SharedStringTable
-> (SharedStringTable -> SharedStringTable -> Ordering)
-> (SharedStringTable -> SharedStringTable -> Bool)
-> (SharedStringTable -> SharedStringTable -> Bool)
-> (SharedStringTable -> SharedStringTable -> Bool)
-> (SharedStringTable -> SharedStringTable -> Bool)
-> (SharedStringTable -> SharedStringTable -> SharedStringTable)
-> (SharedStringTable -> SharedStringTable -> SharedStringTable)
-> Ord SharedStringTable
SharedStringTable -> SharedStringTable -> Bool
SharedStringTable -> SharedStringTable -> Ordering
SharedStringTable -> SharedStringTable -> SharedStringTable
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: SharedStringTable -> SharedStringTable -> SharedStringTable
$cmin :: SharedStringTable -> SharedStringTable -> SharedStringTable
max :: SharedStringTable -> SharedStringTable -> SharedStringTable
$cmax :: SharedStringTable -> SharedStringTable -> SharedStringTable
>= :: SharedStringTable -> SharedStringTable -> Bool
$c>= :: SharedStringTable -> SharedStringTable -> Bool
> :: SharedStringTable -> SharedStringTable -> Bool
$c> :: SharedStringTable -> SharedStringTable -> Bool
<= :: SharedStringTable -> SharedStringTable -> Bool
$c<= :: SharedStringTable -> SharedStringTable -> Bool
< :: SharedStringTable -> SharedStringTable -> Bool
$c< :: SharedStringTable -> SharedStringTable -> Bool
compare :: SharedStringTable -> SharedStringTable -> Ordering
$ccompare :: SharedStringTable -> SharedStringTable -> Ordering
$cp1Ord :: Eq SharedStringTable
Ord, Int -> SharedStringTable -> ShowS
[SharedStringTable] -> ShowS
SharedStringTable -> String
(Int -> SharedStringTable -> ShowS)
-> (SharedStringTable -> String)
-> ([SharedStringTable] -> ShowS)
-> Show SharedStringTable
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SharedStringTable] -> ShowS
$cshowList :: [SharedStringTable] -> ShowS
show :: SharedStringTable -> String
$cshow :: SharedStringTable -> String
showsPrec :: Int -> SharedStringTable -> ShowS
$cshowsPrec :: Int -> SharedStringTable -> ShowS
Show, (forall x. SharedStringTable -> Rep SharedStringTable x)
-> (forall x. Rep SharedStringTable x -> SharedStringTable)
-> Generic SharedStringTable
forall x. Rep SharedStringTable x -> SharedStringTable
forall x. SharedStringTable -> Rep SharedStringTable x
forall a.
(forall x. a -> Rep a x) -> (forall x. Rep a x -> a) -> Generic a
$cto :: forall x. Rep SharedStringTable x -> SharedStringTable
$cfrom :: forall x. SharedStringTable -> Rep SharedStringTable x
Generic)

sstEmpty :: SharedStringTable
sstEmpty :: SharedStringTable
sstEmpty = Vector XlsxText -> SharedStringTable
SharedStringTable Vector XlsxText
forall a. Vector a
V.empty

{-------------------------------------------------------------------------------
  Rendering
-------------------------------------------------------------------------------}

instance ToDocument SharedStringTable where
  toDocument :: SharedStringTable -> Document
toDocument = Text -> Element -> Document
documentFromElement Text
"Shared string table generated by xlsx"
             (Element -> Document)
-> (SharedStringTable -> Element) -> SharedStringTable -> Document
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Name -> SharedStringTable -> Element
forall a. ToElement a => Name -> a -> Element
toElement Name
"sst"

-- | See @CT_Sst@, p. 3902.
--
-- TODO: The @count@ and @uniqCount@ attributes are currently unsupported.
instance ToElement SharedStringTable where
  toElement :: Name -> SharedStringTable -> Element
toElement Name
nm SharedStringTable{Vector XlsxText
sstTable :: Vector XlsxText
sstTable :: SharedStringTable -> Vector XlsxText
..} = Element :: Name -> Map Name Text -> [Node] -> Element
Element {
      elementName :: Name
elementName       = Name
nm
    , elementAttributes :: Map Name Text
elementAttributes = Map Name Text
forall k a. Map k a
Map.empty
    , elementNodes :: [Node]
elementNodes      = (XlsxText -> Node) -> [XlsxText] -> [Node]
forall a b. (a -> b) -> [a] -> [b]
map (Element -> Node
NodeElement (Element -> Node) -> (XlsxText -> Element) -> XlsxText -> Node
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Name -> XlsxText -> Element
forall a. ToElement a => Name -> a -> Element
toElement Name
"si")
                        ([XlsxText] -> [Node]) -> [XlsxText] -> [Node]
forall a b. (a -> b) -> a -> b
$ Vector XlsxText -> [XlsxText]
forall a. Vector a -> [a]
V.toList Vector XlsxText
sstTable
    }

{-------------------------------------------------------------------------------
  Parsing
-------------------------------------------------------------------------------}

-- | See @CT_Sst@, p. 3902
--
-- The optional attributes @count@ and @uniqCount@ are being ignored at least currently
instance FromCursor SharedStringTable where
  fromCursor :: Cursor -> [SharedStringTable]
fromCursor Cursor
cur = do
    let
      items :: [XlsxText]
items = Cursor
cur Cursor -> (Cursor -> [XlsxText]) -> [XlsxText]
forall node a. Cursor node -> (Cursor node -> [a]) -> [a]
$/ Name -> Axis
element (Text -> Name
n_ Text
"si") Axis -> (Cursor -> [XlsxText]) -> Cursor -> [XlsxText]
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> Cursor -> [XlsxText]
forall a. FromCursor a => Cursor -> [a]
fromCursor
    SharedStringTable -> [SharedStringTable]
forall (m :: * -> *) a. Monad m => a -> m a
return (Vector XlsxText -> SharedStringTable
SharedStringTable ([XlsxText] -> Vector XlsxText
forall a. [a] -> Vector a
V.fromList [XlsxText]
items))

{-------------------------------------------------------------------------------
  Extract shared strings
-------------------------------------------------------------------------------}

-- | Construct the 'SharedStringsTable' from an existing document
sstConstruct :: [Worksheet] -> SharedStringTable
sstConstruct :: [Worksheet] -> SharedStringTable
sstConstruct =
    Vector XlsxText -> SharedStringTable
SharedStringTable (Vector XlsxText -> SharedStringTable)
-> ([Worksheet] -> Vector XlsxText)
-> [Worksheet]
-> SharedStringTable
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [XlsxText] -> Vector XlsxText
forall a. [a] -> Vector a
V.fromList ([XlsxText] -> Vector XlsxText)
-> ([Worksheet] -> [XlsxText]) -> [Worksheet] -> Vector XlsxText
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [XlsxText] -> [XlsxText]
forall a. Ord a => [a] -> [a]
uniq ([XlsxText] -> [XlsxText])
-> ([Worksheet] -> [XlsxText]) -> [Worksheet] -> [XlsxText]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Worksheet -> [XlsxText]) -> [Worksheet] -> [XlsxText]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Worksheet -> [XlsxText]
goSheet
  where
    goSheet :: Worksheet -> [XlsxText]
    goSheet :: Worksheet -> [XlsxText]
goSheet = (Cell -> Maybe XlsxText) -> [Cell] -> [XlsxText]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe (Cell -> Maybe CellValue
_cellValue (Cell -> Maybe CellValue)
-> (CellValue -> Maybe XlsxText) -> Cell -> Maybe XlsxText
forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> CellValue -> Maybe XlsxText
sstEntry) ([Cell] -> [XlsxText])
-> (Worksheet -> [Cell]) -> Worksheet -> [XlsxText]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map (Int, Int) Cell -> [Cell]
forall k a. Map k a -> [a]
Map.elems (Map (Int, Int) Cell -> [Cell])
-> (Worksheet -> Map (Int, Int) Cell) -> Worksheet -> [Cell]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Worksheet -> Map (Int, Int) Cell
_wsCells

    sstEntry :: CellValue -> Maybe XlsxText
    sstEntry :: CellValue -> Maybe XlsxText
sstEntry (CellText Text
text) = XlsxText -> Maybe XlsxText
forall a. a -> Maybe a
Just (XlsxText -> Maybe XlsxText) -> XlsxText -> Maybe XlsxText
forall a b. (a -> b) -> a -> b
$ Text -> XlsxText
XlsxText Text
text
    sstEntry (CellRich [RichTextRun]
rich) = XlsxText -> Maybe XlsxText
forall a. a -> Maybe a
Just (XlsxText -> Maybe XlsxText) -> XlsxText -> Maybe XlsxText
forall a b. (a -> b) -> a -> b
$ [RichTextRun] -> XlsxText
XlsxRichText [RichTextRun]
rich
    sstEntry CellValue
_               = Maybe XlsxText
forall a. Maybe a
Nothing

    uniq :: Ord a => [a] -> [a]
    uniq :: [a] -> [a]
uniq = Set a -> [a]
forall a. Set a -> [a]
Set.elems (Set a -> [a]) -> ([a] -> Set a) -> [a] -> [a]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [a] -> Set a
forall a. Ord a => [a] -> Set a
Set.fromList

sstLookupText :: SharedStringTable -> Text -> Int
sstLookupText :: SharedStringTable -> Text -> Int
sstLookupText SharedStringTable
sst = SharedStringTable -> XlsxText -> Int
sstLookup SharedStringTable
sst (XlsxText -> Int) -> (Text -> XlsxText) -> Text -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> XlsxText
XlsxText

sstLookupRich :: SharedStringTable -> [RichTextRun] -> Int
sstLookupRich :: SharedStringTable -> [RichTextRun] -> Int
sstLookupRich SharedStringTable
sst = SharedStringTable -> XlsxText -> Int
sstLookup SharedStringTable
sst (XlsxText -> Int)
-> ([RichTextRun] -> XlsxText) -> [RichTextRun] -> Int
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [RichTextRun] -> XlsxText
XlsxRichText

-- | Internal generalization used by 'sstLookupText' and 'sstLookupRich'
sstLookup :: SharedStringTable -> XlsxText -> Int
sstLookup :: SharedStringTable -> XlsxText -> Int
sstLookup SharedStringTable{sstTable :: SharedStringTable -> Vector XlsxText
sstTable = Vector XlsxText
shared} XlsxText
si =
    String -> Maybe Int -> Int
forall a. Partial => String -> Maybe a -> a
fromJustNote (String
"SST entry for " String -> ShowS
forall a. [a] -> [a] -> [a]
++ XlsxText -> String
forall a. Show a => a -> String
show XlsxText
si String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
" not found") (Maybe Int -> Int) -> Maybe Int -> Int
forall a b. (a -> b) -> a -> b
$
    (Int -> Bool) -> Int -> Int -> Maybe Int
forall a. Integral a => (a -> Bool) -> a -> a -> Maybe a
searchFromTo (\Int
p -> Vector XlsxText
shared Vector XlsxText -> Int -> XlsxText
forall a. Vector a -> Int -> a
V.! Int
p XlsxText -> XlsxText -> Bool
forall a. Ord a => a -> a -> Bool
>= XlsxText
si) Int
0 (Vector XlsxText -> Int
forall a. Vector a -> Int
V.length Vector XlsxText
shared Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1)

sstItem :: SharedStringTable -> Int -> Maybe XlsxText
sstItem :: SharedStringTable -> Int -> Maybe XlsxText
sstItem (SharedStringTable Vector XlsxText
shared) = Vector XlsxText -> Int -> Maybe XlsxText
forall a. Vector a -> Int -> Maybe a
(V.!?) Vector XlsxText
shared