{-# LANGUAGE OverloadedStrings  #-}
{-# LANGUAGE RecordWildCards    #-}
{-# LANGUAGE TemplateHaskell    #-}
{-# OPTIONS_GHC -Wall #-}
module Codec.Xlsx.Types.Internal.SharedStringTable (
    -- * Main types
    SharedStringTable(..)
  , sstConstruct
  , sstLookupText
  , sstLookupRich
  , sstItem
  ) where

import           Control.Monad

import           Data.Maybe (mapMaybe)
import           Data.Text (Text)
import           Data.Vector (Vector)
import           Numeric.Search.Range (searchFromTo)
import           Text.XML
import           Text.XML.Cursor
import qualified Data.Map as Map
import qualified Data.Set as Set
import qualified Data.Vector as V

import           Codec.Xlsx.Parser.Internal
import           Codec.Xlsx.Types
import           Codec.Xlsx.Writer.Internal

-- | Shared string table
--
-- A workbook can contain thousands of cells containing string (non-numeric)
-- data. Furthermore this data is very likely to be repeated across many rows or
-- columns. The goal of implementing a single string table that is shared across
-- the workbook is to improve performance in opening and saving the file by only
-- reading and writing the repetitive information once.
--
-- Relevant parts of the EMCA standard (2nd edition, part 1,
-- <http://www.ecma-international.org/publications/standards/Ecma-376.htm>),
-- page numbers refer to the page in the PDF rather than the page number as
-- printed on the page):
--
-- * Section 18.4, "Shared String Table" (p. 1712)
--   in particular subsection 18.4.9, "sst (Shared String Table)" (p. 1726)
--
-- TODO: The @extLst@ child element is currently unsupported.
newtype SharedStringTable = SharedStringTable {
    sstTable :: Vector XlsxText
  }
  deriving (Show, Eq, Ord)

{-------------------------------------------------------------------------------
  Rendering
-------------------------------------------------------------------------------}

instance ToDocument SharedStringTable where
  toDocument = documentFromElement "Shared string table generated by xlsx"
             . toElement "sst"

-- | See @CT_Sst@, p. 3902.
--
-- TODO: The @count@ and @uniqCount@ attributes are currently unsupported.
instance ToElement SharedStringTable where
  toElement nm SharedStringTable{..} = Element {
      elementName       = nm
    , elementAttributes = Map.empty
    , elementNodes      = map (NodeElement . toElement "si")
                        $ V.toList sstTable
    }

{-------------------------------------------------------------------------------
  Parsing
-------------------------------------------------------------------------------}

-- | See @CT_Sst@, p. 3902
--
-- The optional attributes @count@ and @uniqCount@ are being ignored at least currently
instance FromCursor SharedStringTable where
  fromCursor cur = do
    let
      items = cur $/ element (n"si") >=> fromCursor
    return (SharedStringTable (V.fromList items))

{-------------------------------------------------------------------------------
  Extract shared strings
-------------------------------------------------------------------------------}

-- | Construct the 'SharedStringsTable' from an existing document
sstConstruct :: [Worksheet] -> SharedStringTable
sstConstruct =
    SharedStringTable . V.fromList . uniq . concatMap goSheet
  where
    goSheet :: Worksheet -> [XlsxText]
    goSheet = mapMaybe (_cellValue >=> sstEntry) . Map.elems . _wsCells

    sstEntry :: CellValue -> Maybe XlsxText
    sstEntry (CellText text) = Just $ XlsxText text
    sstEntry (CellRich rich) = Just $ XlsxRichText rich
    sstEntry _               = Nothing

    uniq :: Ord a => [a] -> [a]
    uniq = Set.elems . Set.fromList

sstLookupText :: SharedStringTable -> Text -> Int
sstLookupText sst = sstLookup sst . XlsxText

sstLookupRich :: SharedStringTable -> [RichTextRun] -> Int
sstLookupRich sst = sstLookup sst . XlsxRichText

-- | Internal generalization used by 'sstLookupText' and 'sstLookupRich'
sstLookup :: SharedStringTable -> XlsxText -> Int
sstLookup SharedStringTable{sstTable = shared} si =
    case searchFromTo (\p -> shared V.! p >= si) 0 (V.length shared - 1) of
      Just i  -> i
      Nothing -> error $ "SST entry for " ++ show si ++ " not found"

sstItem :: SharedStringTable -> Int -> XlsxText
sstItem (SharedStringTable shared) = (V.!) shared