{-# LANGUAGE LambdaCase        #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE ViewPatterns      #-}
{- |
   Module      : Text.Pandoc.Readers.HTML.Table
   Copyright   : © 2006-2023 John MacFarlane,
                   2020-2023 Albert Krewinkel
   License     : GNU GPL, version 2 or above

   Maintainer  : Albert Krewinkel <albert@zeitkraut.de>
   Stability   : alpha
   Portability : portable

HTML table parser.
-}
module Text.Pandoc.Readers.HTML.Table (pTable) where

import qualified Data.Vector as V
import Control.Applicative ((<|>))
import Data.Maybe (fromMaybe, isJust)
import Data.Either (lefts, rights)
import Data.List.NonEmpty (nonEmpty)
import Data.List (foldl')
import Data.Text (Text)
import Text.HTML.TagSoup
import Text.Pandoc.Builder (Blocks)
import Text.Pandoc.CSS (cssAttributes)
import Text.Pandoc.Definition
import Text.Pandoc.Class.PandocMonad (PandocMonad (..))
import Text.Pandoc.Parsing
  ( eof, lookAhead, many, many1, manyTill, option, optional
  , optionMaybe, skipMany, try )
import Text.Pandoc.Readers.HTML.Parsing
import Text.Pandoc.Readers.HTML.Types (TagParser)
import Text.Pandoc.Shared (onlySimpleTableCells, safeRead)
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B
import Control.Monad (guard)

-- | Parses a @<col>@ element, returning the column's width.
-- An Either value is used:  Left i means a "relative length" with
-- integral value i (see https://www.w3.org/TR/html4/types.html#h-6.6);
-- Right w means a regular width.  Defaults to @'Right ColWidthDefault'@
-- if the width is not set or cannot be determined.
pCol :: PandocMonad m => TagParser m (Either Int ColWidth)
pCol :: forall (m :: * -> *).
PandocMonad m =>
TagParser m (Either Int ColWidth)
pCol = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
  TagOpen Text
_ [Attribute Text]
attribs' <- forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> [Attribute Text] -> Tag Text -> Bool
matchTagOpen Text
"col" [])
  let attribs :: [Attribute Text]
attribs = [Attribute Text] -> [Attribute Text]
toStringAttr [Attribute Text]
attribs'
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> Tag Text -> Bool
matchTagClose Text
"col")
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ case forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"width" [Attribute Text]
attribs of
                Maybe Text
Nothing -> case forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"style" [Attribute Text]
attribs of
                  Just (Text -> Text -> Maybe Text
T.stripPrefix Text
"width:" -> Just Text
xs) | (Char -> Bool) -> Text -> Bool
T.any (forall a. Eq a => a -> a -> Bool
== Char
'%') Text
xs ->
                    forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. b -> Either a b
Right ColWidth
ColWidthDefault) (forall a b. b -> Either a b
Right forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> ColWidth
ColWidth forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Fractional a => a -> a -> a
/ Double
100.0))
                      forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. (MonadPlus m, Read a) => Text -> m a
safeRead ((Char -> Bool) -> Text -> Text
T.filter
                                   (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` (String
" \t\r\n%'\";" :: [Char])) Text
xs)
                  Maybe Text
_ -> forall a b. b -> Either a b
Right ColWidth
ColWidthDefault
                Just (Text -> Maybe (Text, Char)
T.unsnoc -> Just (Text
xs, Char
'*')) ->
                  forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. a -> Either a b
Left Int
1) forall a b. a -> Either a b
Left forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. (MonadPlus m, Read a) => Text -> m a
safeRead Text
xs
                Just (Text -> Maybe (Text, Char)
T.unsnoc -> Just (Text
xs, Char
'%')) ->
                  forall b a. b -> (a -> b) -> Maybe a -> b
maybe (forall a b. b -> Either a b
Right ColWidth
ColWidthDefault)
                        (forall a b. b -> Either a b
Right forall b c a. (b -> c) -> (a -> b) -> a -> c
. Double -> ColWidth
ColWidth forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. Fractional a => a -> a -> a
/ Double
100.0)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. (MonadPlus m, Read a) => Text -> m a
safeRead Text
xs
                Maybe Text
_ -> forall a b. b -> Either a b
Right ColWidth
ColWidthDefault

pColgroup :: PandocMonad m => TagParser m [Either Int ColWidth]
pColgroup :: forall (m :: * -> *).
PandocMonad m =>
TagParser m [Either Int ColWidth]
pColgroup = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
  forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> [Attribute Text] -> Tag Text -> Bool
matchTagOpen Text
"colgroup" [])
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill forall (m :: * -> *).
PandocMonad m =>
TagParser m (Either Int ColWidth)
pCol (forall (m :: * -> *). PandocMonad m => Text -> TagParser m ()
pCloses Text
"colgroup" forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall s (m :: * -> *) t u.
(Stream s m t, Show t) =>
ParsecT s u m ()
eof) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank

resolveRelativeLengths :: [Either Int ColWidth] -> [ColWidth]
resolveRelativeLengths :: [Either Int ColWidth] -> [ColWidth]
resolveRelativeLengths [Either Int ColWidth]
ws =
  let remaining :: Double
remaining = Double
1 forall a. Num a => a -> a -> a
- forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum (forall a b. (a -> b) -> [a] -> [b]
map ColWidth -> Double
getColWidth forall a b. (a -> b) -> a -> b
$ forall a b. [Either a b] -> [b]
rights [Either Int ColWidth]
ws)
      relatives :: Int
relatives = forall (t :: * -> *) a. (Foldable t, Num a) => t a -> a
sum forall a b. (a -> b) -> a -> b
$ forall a b. [Either a b] -> [a]
lefts [Either Int ColWidth]
ws
      relUnit :: Double
relUnit = Double
remaining forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
relatives
      toColWidth :: Either a ColWidth -> ColWidth
toColWidth (Right ColWidth
x) = ColWidth
x
      toColWidth (Left a
i) = Double -> ColWidth
ColWidth (forall a b. (Integral a, Num b) => a -> b
fromIntegral a
i forall a. Num a => a -> a -> a
* Double
relUnit)
  in  forall a b. (a -> b) -> [a] -> [b]
map forall {a}. Integral a => Either a ColWidth -> ColWidth
toColWidth [Either Int ColWidth]
ws

getColWidth :: ColWidth -> Double
getColWidth :: ColWidth -> Double
getColWidth ColWidth
ColWidthDefault = Double
0
getColWidth (ColWidth Double
w) = Double
w

data CellType
  = HeaderCell
  | BodyCell
  deriving CellType -> CellType -> Bool
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CellType -> CellType -> Bool
$c/= :: CellType -> CellType -> Bool
== :: CellType -> CellType -> Bool
$c== :: CellType -> CellType -> Bool
Eq

pCell :: PandocMonad m
      => TagParser m Blocks
      -> CellType
      -> TagParser m (CellType, Cell)
pCell :: forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> CellType -> TagParser m (CellType, Cell)
pCell TagParser m Blocks
block CellType
celltype = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
  let celltype' :: Text
celltype' = case CellType
celltype of
        CellType
HeaderCell -> Text
"th"
        CellType
BodyCell   -> Text
"td"
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  TagOpen Text
_ [Attribute Text]
attribs <- forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> [Attribute Text] -> Tag Text -> Bool
matchTagOpen Text
celltype' [])
  let cssAttribs :: [Attribute Text]
cssAttribs = forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] Text -> [Attribute Text]
cssAttributes forall a b. (a -> b) -> a -> b
$ forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"style" [Attribute Text]
attribs
  let align :: Alignment
align = case forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"align" [Attribute Text]
attribs forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
                   forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"text-align" [Attribute Text]
cssAttribs of
                Just Text
"left"   -> Alignment
AlignLeft
                Just Text
"right"  -> Alignment
AlignRight
                Just Text
"center" -> Alignment
AlignCenter
                Maybe Text
_             -> Alignment
AlignDefault
  let rowspan :: RowSpan
rowspan = Int -> RowSpan
RowSpan forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. a -> Maybe a -> a
fromMaybe Int
1 forall a b. (a -> b) -> a -> b
$
                forall (m :: * -> *) a. (MonadPlus m, Read a) => Text -> m a
safeRead forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"rowspan" [Attribute Text]
attribs
  let colspan :: ColSpan
colspan = Int -> ColSpan
ColSpan forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. a -> Maybe a -> a
fromMaybe Int
1 forall a b. (a -> b) -> a -> b
$
                forall (m :: * -> *) a. (MonadPlus m, Read a) => Text -> m a
safeRead forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
"colspan" [Attribute Text]
attribs
  Blocks
res <- forall (m :: * -> *) a.
(PandocMonad m, Monoid a) =>
Text -> TagParser m a -> TagParser m a
pInTags Text
celltype' TagParser m Blocks
block
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  let handledAttribs :: [Text]
handledAttribs = [Text
"align", Text
"colspan", Text
"rowspan", Text
"text-align"]
      attribs' :: [Attribute Text]
attribs' = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Attribute Text -> [Attribute Text] -> [Attribute Text]
go [] [Attribute Text]
attribs
      go :: Attribute Text -> [Attribute Text] -> [Attribute Text]
go kv :: Attribute Text
kv@(Text
k, Text
_) [Attribute Text]
acc = case Text
k of
        Text
"style" -> case forall a. (a -> Bool) -> [a] -> [a]
filter ((forall a. Eq a => a -> a -> Bool
/= Text
"text-align") forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) [Attribute Text]
cssAttribs of
                     [] -> [Attribute Text]
acc
                     [Attribute Text]
cs -> (Text
"style", [Attribute Text] -> Text
toStyleString [Attribute Text]
cs) forall a. a -> [a] -> [a]
: [Attribute Text]
acc
        -- drop attrib if it's already handled
        Text
_ | Text
k forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
handledAttribs -> [Attribute Text]
acc
        Text
_ -> Attribute Text
kv forall a. a -> [a] -> [a]
: [Attribute Text]
acc
  forall (m :: * -> *) a. Monad m => a -> m a
return (CellType
celltype, Attr -> Alignment -> RowSpan -> ColSpan -> Blocks -> Cell
B.cellWith ([Attribute Text] -> Attr
toAttr [Attribute Text]
attribs') Alignment
align RowSpan
rowspan ColSpan
colspan Blocks
res)

-- | Create a style attribute string from a list of CSS attributes
toStyleString :: [(Text, Text)] -> Text
toStyleString :: [Attribute Text] -> Text
toStyleString = Text -> [Text] -> Text
T.intercalate Text
"; " forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a -> b) -> [a] -> [b]
map (\(Text
k, Text
v) -> Text
k forall a. Semigroup a => a -> a -> a
<> Text
": " forall a. Semigroup a => a -> a -> a
<> Text
v)

-- | Parses a normal table row; returns the row and the number
-- of cells at the beginning that are header cells.
pRow :: PandocMonad m
     => TagParser m Blocks
     -> TagParser m (Int, B.Row)
pRow :: forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m (Int, Row)
pRow TagParser m Blocks
block = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  TagOpen Text
_ [Attribute Text]
attribs <- forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> [Attribute Text] -> Tag Text -> Bool
matchTagOpen Text
"tr" []) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  [(CellType, Cell)]
cells <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> CellType -> TagParser m (CellType, Cell)
pCell TagParser m Blocks
block CellType
BodyCell forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> CellType -> TagParser m (CellType, Cell)
pCell TagParser m Blocks
block CellType
HeaderCell)
  TagClose Text
_ <- forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> Tag Text -> Bool
matchTagClose Text
"tr")
  let numheadcells :: Int
numheadcells = forall (t :: * -> *) a. Foldable t => t a -> Int
length forall a b. (a -> b) -> a -> b
$ forall a. (a -> Bool) -> [a] -> [a]
takeWhile (\(CellType
ct,Cell
_) -> CellType
ct forall a. Eq a => a -> a -> Bool
== CellType
HeaderCell) [(CellType, Cell)]
cells
  forall (m :: * -> *) a. Monad m => a -> m a
return (Int
numheadcells, Attr -> [Cell] -> Row
Row ([Attribute Text] -> Attr
toAttr [Attribute Text]
attribs) forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(CellType, Cell)]
cells)

-- | Parses a header row, i.e., a row which containing nothing but
-- @<th>@ elements.
pHeaderRow :: PandocMonad m
           => TagParser m Blocks
           -> TagParser m B.Row
pHeaderRow :: forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m Row
pHeaderRow TagParser m Blocks
block = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  let pThs :: ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) [Cell]
pThs = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (forall a b. (a, b) -> b
snd forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> CellType -> TagParser m (CellType, Cell)
pCell TagParser m Blocks
block CellType
HeaderCell)
  let mkRow :: ([Attribute Text], [Cell]) -> Row
mkRow ([Attribute Text]
attribs, [Cell]
cells) = Attr -> [Cell] -> Row
Row ([Attribute Text] -> Attr
toAttr [Attribute Text]
attribs) [Cell]
cells
  ([Attribute Text], [Cell]) -> Row
mkRow forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a.
PandocMonad m =>
TagOmission
-> Text -> TagParser m a -> TagParser m ([Attribute Text], a)
pInTagWithAttribs TagOmission
TagsRequired Text
"tr" ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) [Cell]
pThs

-- | Parses a table head. If there is no @thead@ element, this looks for
-- a row of @<th>@-only elements as the first line of the table.
pTableHead :: PandocMonad m
           => TagParser m Blocks
           -> TagParser m TableHead
pTableHead :: forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m TableHead
pTableHead TagParser m Blocks
block = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  let pRows :: ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Int, Row)]
pRows = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m (Int, Row)
pRow TagParser m Blocks
block)
  let pThead :: TagParser m ([Attribute Text], [(Int, Row)])
pThead = forall (m :: * -> *) a.
PandocMonad m =>
TagOmission
-> Text -> TagParser m a -> TagParser m ([Attribute Text], a)
pInTagWithAttribs TagOmission
ClosingTagOptional Text
"thead" ParsecT [Tag Text] HTMLState (ReaderT HTMLLocal m) [(Int, Row)]
pRows
  forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m (Maybe a)
optionMaybe TagParser m ([Attribute Text], [(Int, Row)])
pThead forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= \case
    Just ([Attribute Text]
attribs, [(Int, Row)]
rows) ->
      forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Attr -> [Row] -> TableHead
TableHead ([Attribute Text] -> Attr
toAttr [Attribute Text]
attribs) forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(Int, Row)]
rows
    Maybe ([Attribute Text], [(Int, Row)])
Nothing -> Maybe Row -> TableHead
mkTableHead forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m (Maybe a)
optionMaybe (forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m Row
pHeaderRow TagParser m Blocks
block)
               where
                 mkTableHead :: Maybe Row -> TableHead
mkTableHead = Attr -> [Row] -> TableHead
TableHead Attr
nullAttr forall b c a. (b -> c) -> (a -> b) -> a -> c
. \case
                   -- Use row as header only if it's non-empty
                   Just row :: Row
row@(Row Attr
_ (Cell
_:[Cell]
_)) -> [Row
row]
                   Maybe Row
_                      -> []

-- | Parses a table foot
pTableFoot :: PandocMonad m
           => TagParser m Blocks
           -> TagParser m TableFoot
pTableFoot :: forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m TableFoot
pTableFoot TagParser m Blocks
block = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  TagOpen Text
_ [Attribute Text]
attribs <- forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> [Attribute Text] -> Tag Text -> Bool
matchTagOpen Text
"tfoot" []) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  [Row]
rows <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many forall a b. (a -> b) -> a -> b
$ forall a b. (a, b) -> b
snd forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m (Int, Row)
pRow TagParser m Blocks
block forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank)
  forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> Tag Text -> Bool
matchTagClose Text
"tfoot")
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Attr -> [Row] -> TableFoot
TableFoot ([Attribute Text] -> Attr
toAttr [Attribute Text]
attribs) [Row]
rows

-- | Parses a table body
pTableBody :: PandocMonad m
           => TagParser m Blocks
           -> TagParser m TableBody
pTableBody :: forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m TableBody
pTableBody TagParser m Blocks
block = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  Maybe [Attribute Text]
mbattribs <- forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option forall a. Maybe a
Nothing forall a b. (a -> b) -> a -> b
$ forall a. a -> Maybe a
Just forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall {str}. Tag str -> [Attribute str]
getAttribs forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
                 forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> [Attribute Text] -> Tag Text -> Bool
matchTagOpen Text
"tbody" []) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  [Row]
bodyheads <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m Row
pHeaderRow TagParser m Blocks
block)
  [(Int, Row)]
rows <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m (Int, Row)
pRow TagParser m Blocks
block forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank)
  forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m ()
optional forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> Tag Text -> Bool
matchTagClose Text
"tbody")
  forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall a b. (a -> b) -> a -> b
$ forall a. Maybe a -> Bool
isJust Maybe [Attribute Text]
mbattribs Bool -> Bool -> Bool
|| Bool -> Bool
not (forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Row]
bodyheads Bool -> Bool -> Bool
&& forall (t :: * -> *) a. Foldable t => t a -> Bool
null [(Int, Row)]
rows)
  let attribs :: [Attribute Text]
attribs = forall a. a -> Maybe a -> a
fromMaybe [] Maybe [Attribute Text]
mbattribs
  -- we only set row head columns if all rows agree;
  -- if some rows have headings but others not, we use 0; see #8984, #8634:
  let numrows :: Int
numrows = forall (t :: * -> *) a. Foldable t => t a -> Int
length [(Int, Row)]
rows
  let adjustRowHeadColsForCell :: Int -> Vector Int -> Cell -> Vector Int
adjustRowHeadColsForCell Int
currentrow Vector Int
headcolsv
                    (Cell Attr
_ Alignment
_ (RowSpan Int
rowspan) (ColSpan Int
colspan) [Block]
_) =
        forall a b. (Int -> a -> b) -> Vector a -> Vector b
V.imap (\Int
i Int
x -> if Int
i forall a. Ord a => a -> a -> Bool
>= Int
currentrow Bool -> Bool -> Bool
&&
                           Int
i forall a. Ord a => a -> a -> Bool
< Int
currentrow forall a. Num a => a -> a -> a
+ Int
rowspan
                           then Int
x forall a. Num a => a -> a -> a
+ Int
colspan
                           else Int
x) Vector Int
headcolsv
  let adjustRowHeadCols :: Vector Int -> (Int, (Int, Row)) -> Vector Int
adjustRowHeadCols
        Vector Int
headcolsv
        (Int
currentrow, (Int
numheads, Row Attr
_ [Cell]
cells)) =
          forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' (Int -> Vector Int -> Cell -> Vector Int
adjustRowHeadColsForCell Int
currentrow) Vector Int
headcolsv
            (forall a. Int -> [a] -> [a]
take Int
numheads [Cell]
cells)
  let headcols :: Vector Int
headcols = forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl' Vector Int -> (Int, (Int, Row)) -> Vector Int
adjustRowHeadCols
                         (forall a. Int -> a -> Vector a
V.replicate Int
numrows (Int
0 :: Int))
                         (forall a b. [a] -> [b] -> [(a, b)]
zip [(Int
0 :: Int)..] [(Int, Row)]
rows)
  let rowHeadCols :: RowHeadColumns
rowHeadCols = case forall a. Vector a -> Maybe (a, Vector a)
V.uncons Vector Int
headcols of
                       Just (Int
x, Vector Int
v) | forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all (forall a. Eq a => a -> a -> Bool
== Int
x) Vector Int
v -> Int -> RowHeadColumns
RowHeadColumns Int
x
                       Maybe (Int, Vector Int)
_ -> Int -> RowHeadColumns
RowHeadColumns Int
0
  forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ Attr -> RowHeadColumns -> [Row] -> [Row] -> TableBody
TableBody ([Attribute Text] -> Attr
toAttr [Attribute Text]
attribs) RowHeadColumns
rowHeadCols [Row]
bodyheads (forall a b. (a -> b) -> [a] -> [b]
map forall a b. (a, b) -> b
snd [(Int, Row)]
rows)
  where
    getAttribs :: Tag str -> [Attribute str]
getAttribs (TagOpen str
_ [Attribute str]
attribs) = [Attribute str]
attribs
    getAttribs Tag str
_ = []

-- | Parses a simple HTML table
pTable :: PandocMonad m
       => TagParser m Blocks -- ^ Caption and cell contents parser
       -> TagParser m Blocks
pTable :: forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m Blocks
pTable TagParser m Blocks
block = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
  TagOpen Text
_ [Attribute Text]
attribs <- forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> [Attribute Text] -> Tag Text -> Bool
matchTagOpen Text
"table" [])  forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  Blocks
caption <- forall s (m :: * -> *) t a u.
Stream s m t =>
a -> ParsecT s u m a -> ParsecT s u m a
option forall a. Monoid a => a
mempty forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a.
(PandocMonad m, Monoid a) =>
Text -> TagParser m a -> TagParser m a
pInTags Text
"caption" TagParser m Blocks
block       forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  [ColWidth]
widths <- [Either Int ColWidth] -> [ColWidth]
resolveRelativeLengths forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
              ((forall a. Monoid a => [a] -> a
mconcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m [a]
many1 forall (m :: * -> *).
PandocMonad m =>
TagParser m [Either Int ColWidth]
pColgroup) forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many forall (m :: * -> *).
PandocMonad m =>
TagParser m (Either Int ColWidth)
pCol) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  TableHead
thead   <- forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m TableHead
pTableHead TagParser m Blocks
block               forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  Maybe TableFoot
topfoot <- forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m (Maybe a)
optionMaybe (forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m TableFoot
pTableFoot TagParser m Blocks
block) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  [TableBody]
tbodies <- forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m [a]
many (forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m TableBody
pTableBody TagParser m Blocks
block)        forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  Maybe TableFoot
botfoot <- forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m (Maybe a)
optionMaybe (forall (m :: * -> *).
PandocMonad m =>
TagParser m Blocks -> TagParser m TableFoot
pTableFoot TagParser m Blocks
block) forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m ()
skipMany forall (m :: * -> *). PandocMonad m => TagParser m ()
pBlank
  TagClose Text
_ <- forall (m :: * -> *).
PandocMonad m =>
(Tag Text -> Bool) -> TagParser m (Tag Text)
pSatisfy (Text -> Tag Text -> Bool
matchTagClose Text
"table")
  let tfoot :: TableFoot
tfoot = forall a. a -> Maybe a -> a
fromMaybe (Attr -> [Row] -> TableFoot
TableFoot Attr
nullAttr []) forall a b. (a -> b) -> a -> b
$ Maybe TableFoot
topfoot forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Maybe TableFoot
botfoot
  case [ColWidth]
-> TableHead
-> [TableBody]
-> TableFoot
-> Either String ([ColSpec], TableHead, [TableBody], TableFoot)
normalize [ColWidth]
widths TableHead
thead [TableBody]
tbodies TableFoot
tfoot of
    Left String
err -> forall (m :: * -> *) a. MonadFail m => String -> m a
fail String
err
    Right ([ColSpec]
colspecs, TableHead
thead', [TableBody]
tbodies', TableFoot
tfoot') -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$
      Attr
-> Caption
-> [ColSpec]
-> TableHead
-> [TableBody]
-> TableFoot
-> Blocks
B.tableWith ([Attribute Text] -> Attr
toAttr [Attribute Text]
attribs)
                  (Blocks -> Caption
B.simpleCaption Blocks
caption)
                  [ColSpec]
colspecs
                  TableHead
thead'
                  [TableBody]
tbodies'
                  TableFoot
tfoot'
data TableType
  = SimpleTable
  | NormalTable

tableType :: [[Cell]] -> TableType
tableType :: [[Cell]] -> TableType
tableType [[Cell]]
cells =
  if [[[Block]]] -> Bool
onlySimpleTableCells forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map (forall a b. (a -> b) -> [a] -> [b]
map Cell -> [Block]
cellContents) [[Cell]]
cells
  then TableType
SimpleTable
  else TableType
NormalTable
  where
    cellContents :: Cell -> [Block]
    cellContents :: Cell -> [Block]
cellContents (Cell Attr
_ Alignment
_ RowSpan
_ ColSpan
_ [Block]
bs) = [Block]
bs

normalize :: [ColWidth] -> TableHead -> [TableBody] -> TableFoot
          -> Either String ([ColSpec], TableHead, [TableBody], TableFoot)
normalize :: [ColWidth]
-> TableHead
-> [TableBody]
-> TableFoot
-> Either String ([ColSpec], TableHead, [TableBody], TableFoot)
normalize [ColWidth]
widths TableHead
head' [TableBody]
bodies TableFoot
foot = do
  let rows :: [Row]
rows = TableHead -> [Row]
headRows TableHead
head' forall a. Semigroup a => a -> a -> a
<> forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap TableBody -> [Row]
bodyRows [TableBody]
bodies forall a. Semigroup a => a -> a -> a
<> TableFoot -> [Row]
footRows TableFoot
foot
  let cellWidth :: Cell -> Int
cellWidth (Cell Attr
_ Alignment
_ RowSpan
_ (ColSpan Int
cs) [Block]
_) = Int
cs
  let rowLength :: Row -> Int
rowLength = forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr (\Cell
cell Int
acc -> Cell -> Int
cellWidth Cell
cell forall a. Num a => a -> a -> a
+ Int
acc) Int
0 forall b c a. (b -> c) -> (a -> b) -> a -> c
. Row -> [Cell]
rowCells
  let ncols :: Int
ncols = forall b a. b -> (a -> b) -> Maybe a -> b
maybe Int
0 forall (t :: * -> *) a. (Foldable t, Ord a) => t a -> a
maximum forall a b. (a -> b) -> a -> b
$ forall a. [a] -> Maybe (NonEmpty a)
nonEmpty forall a b. (a -> b) -> a -> b
$ forall a b. (a -> b) -> [a] -> [b]
map Row -> Int
rowLength [Row]
rows
  let tblType :: TableType
tblType = [[Cell]] -> TableType
tableType (forall a b. (a -> b) -> [a] -> [b]
map Row -> [Cell]
rowCells [Row]
rows)
  -- fail on empty table
  if forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Row]
rows
    then forall a b. a -> Either a b
Left String
"empty table"
    else forall a b. b -> Either a b
Right
         ( forall a b. [a] -> [b] -> [(a, b)]
zip (Int -> [TableBody] -> [Alignment]
calculateAlignments Int
ncols [TableBody]
bodies)
               (Int -> TableType -> [ColWidth] -> [ColWidth]
normalizeColWidths Int
ncols TableType
tblType [ColWidth]
widths)
         , TableHead
head'
         , [TableBody]
bodies
         , TableFoot
foot
         )

normalizeColWidths :: Int -> TableType -> [ColWidth] -> [ColWidth]
normalizeColWidths :: Int -> TableType -> [ColWidth] -> [ColWidth]
normalizeColWidths Int
ncols TableType
tblType = \case
  [] -> case TableType
tblType of
          TableType
SimpleTable -> forall a. Int -> a -> [a]
replicate Int
ncols ColWidth
ColWidthDefault
          TableType
NormalTable -> forall a. Int -> a -> [a]
replicate Int
ncols (Double -> ColWidth
ColWidth forall a b. (a -> b) -> a -> b
$ Double
1 forall a. Fractional a => a -> a -> a
/ forall a b. (Integral a, Num b) => a -> b
fromIntegral Int
ncols)
  [ColWidth]
widths -> [ColWidth]
widths

calculateAlignments :: Int -> [TableBody] -> [Alignment]
calculateAlignments :: Int -> [TableBody] -> [Alignment]
calculateAlignments Int
cols [TableBody]
tbodies =
  case [[Cell]]
cells of
    [Cell]
cs:[[Cell]]
_ -> forall a. Int -> [a] -> [a]
take Int
cols forall a b. (a -> b) -> a -> b
$ forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Cell -> [Alignment]
cellAligns [Cell]
cs forall a. [a] -> [a] -> [a]
++ forall a. a -> [a]
repeat Alignment
AlignDefault
    [[Cell]]
_    -> forall a. Int -> a -> [a]
replicate Int
cols Alignment
AlignDefault
  where
    cells :: [[Cell]]
    cells :: [[Cell]]
cells = forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap TableBody -> [[Cell]]
bodyRowCells [TableBody]
tbodies
    cellAligns :: Cell -> [Alignment]
    cellAligns :: Cell -> [Alignment]
cellAligns (Cell Attr
_ Alignment
align RowSpan
_ (ColSpan Int
cs) [Block]
_) = forall a. Int -> a -> [a]
replicate Int
cs Alignment
align

bodyRowCells :: TableBody -> [[Cell]]
bodyRowCells :: TableBody -> [[Cell]]
bodyRowCells = forall a b. (a -> b) -> [a] -> [b]
map Row -> [Cell]
rowCells forall b c a. (b -> c) -> (a -> b) -> a -> c
. TableBody -> [Row]
bodyRows

headRows :: TableHead -> [B.Row]
headRows :: TableHead -> [Row]
headRows (TableHead Attr
_ [Row]
rows) = [Row]
rows

bodyRows :: TableBody -> [B.Row]
bodyRows :: TableBody -> [Row]
bodyRows (TableBody Attr
_ RowHeadColumns
_ [Row]
headerRows [Row]
bodyRows') = [Row]
headerRows forall a. Semigroup a => a -> a -> a
<> [Row]
bodyRows'

footRows :: TableFoot -> [B.Row]
footRows :: TableFoot -> [Row]
footRows (TableFoot Attr
_ [Row]
rows) = [Row]
rows

rowCells :: B.Row -> [Cell]
rowCells :: Row -> [Cell]
rowCells (Row Attr
_ [Cell]
cells) = [Cell]
cells