{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE CPP               #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards     #-}
{- |
   Module      : Text.Pandoc.Readers.Docx
   Copyright   : Copyright (C) 2014-2019 Jesse Rosenthal
   License     : GNU GPL, version 2 or above

   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
   Stability   : alpha
   Portability : portable

Conversion of Docx type (defined in Text.Pandoc.Readers.Docx.Parse)
to 'Pandoc' document.  -}

{-
Current state of implementation of Docx entities ([x] means
implemented, [-] means partially implemented):

* Blocks

  - [X] Para
  - [X] CodeBlock (styled with `SourceCode`)
  - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally,
        indented)
  - [X] OrderedList
  - [X] BulletList
  - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`)
  - [X] Header (styled with `Heading#`)
  - [ ] HorizontalRule
  - [-] Table (column widths and alignments not yet implemented)

* Inlines

  - [X] Str
  - [X] Emph
  - [X] Strong
  - [X] Strikeout
  - [X] Superscript
  - [X] Subscript
  - [X] SmallCaps
  - [-] Underline (was previously converted to Emph)
  - [ ] Quoted
  - [ ] Cite
  - [X] Code (styled with `VerbatimChar`)
  - [X] Space
  - [X] LineBreak (these are invisible in Word: entered with Shift-Return)
  - [X] Math
  - [X] Link (links to an arbitrary bookmark create a span with the target as
        id and "anchor" class)
  - [X] Image
  - [X] Note (Footnotes and Endnotes are silently combined.)
-}

module Text.Pandoc.Readers.Docx
       ( readDocx
       ) where

import Prelude
import Codec.Archive.Zip
import Control.Monad.Reader
import Control.Monad.State.Strict
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect)
import qualified Data.Map as M
import Data.Maybe (isJust, fromMaybe)
import Data.Sequence (ViewL (..), viewl)
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import Text.Pandoc.Builder
-- import Text.Pandoc.Definition
import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.Options
import Text.Pandoc.Readers.Docx.Combine
import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Shared
import Text.Pandoc.Walk
import Text.TeXMath (writeTeX)
import Control.Monad.Except (throwError)
import Text.Pandoc.Class (PandocMonad)
import qualified Text.Pandoc.Class as P
import Text.Pandoc.Error
import Text.Pandoc.Logging

readDocx :: PandocMonad m
         => ReaderOptions
         -> B.ByteString
         -> m Pandoc
readDocx opts bytes
  | Right archive <- toArchiveOrFail bytes
  , Right (docx, parserWarnings) <- archiveToDocxWithWarnings archive = do
      mapM_ (P.report . DocxParserWarning) parserWarnings
      (meta, blks) <- docxToOutput opts docx
      return $ Pandoc meta blks
readDocx _ _ =
  throwError $ PandocSomeError "couldn't parse docx file"

data DState = DState { docxAnchorMap :: M.Map String String
                     , docxAnchorSet :: Set.Set String
                     , docxImmedPrevAnchor :: Maybe String
                     , docxMediaBag  :: MediaBag
                     , docxDropCap   :: Inlines
                     -- keep track of (numId, lvl) values for
                     -- restarting
                     , docxListState :: M.Map (String, String) Integer
                     , docxPrevPara  :: Inlines
                     }

instance Default DState where
  def = DState { docxAnchorMap = M.empty
               , docxAnchorSet = mempty
               , docxImmedPrevAnchor = Nothing
               , docxMediaBag  = mempty
               , docxDropCap   = mempty
               , docxListState = M.empty
               , docxPrevPara  = mempty
               }

data DEnv = DEnv { docxOptions       :: ReaderOptions
                 , docxInHeaderBlock :: Bool
                 }

instance Default DEnv where
  def = DEnv def False

type DocxContext m = ReaderT DEnv (StateT DState m)

evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a
evalDocxContext ctx env st = flip evalStateT st $flip runReaderT env ctx

-- This is empty, but we put it in for future-proofing.
spansToKeep :: [String]
spansToKeep = []

divsToKeep :: [String]
divsToKeep = ["list-item", "Definition", "DefinitionTerm"]

metaStyles :: M.Map String String
metaStyles = M.fromList [ ("Title", "title")
                        , ("Subtitle", "subtitle")
                        , ("Author", "author")
                        , ("Date", "date")
                        , ("Abstract", "abstract")]

sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp)

isMetaPar :: BodyPart -> Bool
isMetaPar (Paragraph pPr _) =
  not $ null $ intersect (pStyle pPr) (M.keys metaStyles)
isMetaPar _ = False

isEmptyPar :: BodyPart -> Bool
isEmptyPar (Paragraph _ parParts) =
  all isEmptyParPart parParts
  where
    isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
    isEmptyParPart _                           = False
    isEmptyElem (TextRun s) = trim s == ""
    isEmptyElem _           = True
isEmptyPar _ = False

bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map String MetaValue)
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
  | (Paragraph pPr parParts) <- bp
  , (c : _)<- (pStyle pPr) `intersect` (M.keys metaStyles)
  , (Just metaField) <- M.lookup c metaStyles = do
    inlines <- smushInlines <$> mapM parPartToInlines parParts
    remaining <- bodyPartsToMeta' bps
    let
      f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
      f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks (Para ils : blks)
      f m (MetaList mv) = MetaList (m : mv)
      f m n             = MetaList [m, n]
    return $ M.insertWith f metaField (MetaInlines (toList inlines)) remaining
bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps

bodyPartsToMeta :: PandocMonad m => [BodyPart] -> DocxContext m Meta
bodyPartsToMeta bps = do
  mp <- bodyPartsToMeta' bps
  let mp' =
        case M.lookup "author" mp of
          Just mv -> M.insert "author" (fixAuthors mv) mp
          Nothing -> mp
  return $ Meta mp'

fixAuthors :: MetaValue -> MetaValue
fixAuthors (MetaBlocks blks) =
  MetaList $ map g $ filter f blks
    where f (Para _) = True
          f _        = False
          g (Para ils) = MetaInlines ils
          g _          = MetaInlines []
fixAuthors mv = mv

codeStyles :: [String]
codeStyles = ["VerbatimChar"]

codeDivs :: [String]
codeDivs = ["SourceCode"]

runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s)   = text s
runElemToInlines LnBrk         = linebreak
runElemToInlines Tab           = space
runElemToInlines SoftHyphen    = text "\xad"
runElemToInlines NoBreakHyphen = text "\x2011"

runElemToString :: RunElem -> String
runElemToString (TextRun s)   = s
runElemToString LnBrk         = ['\n']
runElemToString Tab           = ['\t']
runElemToString SoftHyphen    = ['\xad']
runElemToString NoBreakHyphen = ['\x2011']

runToString :: Run -> String
runToString (Run _ runElems) = concatMap runElemToString runElems
runToString _                = ""

parPartToString :: ParPart -> String
parPartToString (PlainRun run)             = runToString run
parPartToString (InternalHyperLink _ runs) = concatMap runToString runs
parPartToString (ExternalHyperLink _ runs) = concatMap runToString runs
parPartToString _                          = ""

blacklistedCharStyles :: [String]
blacklistedCharStyles = ["Hyperlink"]

resolveDependentRunStyle :: PandocMonad m => RunStyle -> DocxContext m RunStyle
resolveDependentRunStyle rPr
  | Just (s, _)  <- rStyle rPr, s `elem` blacklistedCharStyles =
    return rPr
  | Just (_, cs) <- rStyle rPr = do
      opts <- asks docxOptions
      if isEnabled Ext_styles opts
        then return rPr
        else do rPr' <- resolveDependentRunStyle cs
                return $
                  RunStyle { isBold = case isBold rPr of
                               Just bool -> Just bool
                               Nothing   -> isBold rPr'
                           , isItalic = case isItalic rPr of
                               Just bool -> Just bool
                               Nothing   -> isItalic rPr'
                           , isSmallCaps = case isSmallCaps rPr of
                               Just bool -> Just bool
                               Nothing   -> isSmallCaps rPr'
                           , isStrike = case isStrike rPr of
                               Just bool -> Just bool
                               Nothing   -> isStrike rPr'
                           , isRTL = case isRTL rPr of
                               Just bool -> Just bool
                               Nothing   -> isRTL rPr'
                           , rVertAlign = case rVertAlign rPr of
                               Just valign -> Just valign
                               Nothing     -> rVertAlign rPr'
                           , rUnderline = case rUnderline rPr of
                               Just ulstyle -> Just ulstyle
                               Nothing      -> rUnderline rPr'
                           , rStyle = rStyle rPr
                           }
  | otherwise = return rPr

runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines)
runStyleToTransform rPr
  | Just (s, _) <- rStyle rPr
  , s `elem` spansToKeep = do
      transform <- runStyleToTransform rPr{rStyle = Nothing}
      return $ spanWith ("", [s], []) . transform
  | Just (s, _) <- rStyle rPr = do
      opts <- asks docxOptions
      let extraInfo = if isEnabled Ext_styles opts
                      then spanWith ("", [], [("custom-style", s)])
                      else id
      transform <- runStyleToTransform rPr{rStyle = Nothing}
      return $ extraInfo . transform
  | Just True <- isItalic rPr = do
      transform <- runStyleToTransform rPr{isItalic = Nothing}
      return $ emph  . transform
  | Just True <- isBold rPr = do
      transform <- runStyleToTransform rPr{isBold = Nothing}
      return $ strong . transform
  | Just True <- isSmallCaps rPr = do
      transform <- runStyleToTransform rPr{isSmallCaps = Nothing}
      return $ smallcaps . transform
  | Just True <- isStrike rPr = do
      transform <- runStyleToTransform rPr{isStrike = Nothing}
      return $ strikeout . transform
  | Just True <- isRTL rPr = do
      transform <- runStyleToTransform rPr{isRTL = Nothing}
      return $ spanWith ("",[],[("dir","rtl")]) . transform
  | Just False <- isRTL rPr = do
      transform <- runStyleToTransform rPr{isRTL = Nothing}
      return $ spanWith ("",[],[("dir","ltr")]) . transform
  | Just SupScrpt <- rVertAlign rPr = do
      transform <- runStyleToTransform rPr{rVertAlign = Nothing}
      return $ superscript . transform
  | Just SubScrpt <- rVertAlign rPr = do
      transform <- runStyleToTransform rPr{rVertAlign = Nothing}
      return $ subscript . transform
  | Just "single" <- rUnderline rPr = do
      transform <- runStyleToTransform rPr{rUnderline = Nothing}
      return $ underlineSpan . transform
  | otherwise = return id

runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
  | Just (s, _) <- rStyle rs
  , s `elem` codeStyles = do
      rPr <- resolveDependentRunStyle rs
      let codeString = code $ concatMap runElemToString runElems
      return $ case rVertAlign rPr of
        Just SupScrpt -> superscript codeString
        Just SubScrpt -> subscript codeString
        _             -> codeString
  | otherwise = do
      rPr <- resolveDependentRunStyle rs
      let ils = smushInlines (map runElemToInlines runElems)
      transform <- runStyleToTransform rPr
      return $ transform ils
runToInlines (Footnote bps) = do
  blksList <- smushBlocks <$> mapM bodyPartToBlocks bps
  return $ note blksList
runToInlines (Endnote bps) = do
  blksList <- smushBlocks <$> mapM bodyPartToBlocks bps
  return $ note blksList
runToInlines (InlineDrawing fp title alt bs ext) = do
  (lift . lift) $ P.insertMedia fp Nothing bs
  return $ imageWith (extentToAttr ext) fp title $ text alt
runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"

extentToAttr :: Extent -> Attr
extentToAttr (Just (w, h)) =
  ("", [], [("width", showDim w), ("height", showDim h)] )
  where
    showDim d = show (d / 914400) ++ "in"
extentToAttr _ = nullAttr

blocksToInlinesWarn :: PandocMonad m => String -> Blocks -> DocxContext m Inlines
blocksToInlinesWarn cmtId blks = do
  let blkList = toList blks
      notParaOrPlain :: Block -> Bool
      notParaOrPlain (Para _)  = False
      notParaOrPlain (Plain _) = False
      notParaOrPlain _         = True
  unless ( not (any notParaOrPlain blkList)) $
    lift $ P.report $ DocxParserWarning $
      "Docx comment " ++ cmtId ++ " will not retain formatting"
  return $ blocksToInlines' blkList

-- The majority of work in this function is done in the primed
-- subfunction `partPartToInlines'`. We make this wrapper so that we
-- don't have to modify `docxImmedPrevAnchor` state after every function.
parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines
parPartToInlines parPart =
  case parPart of
    (BookMark _ anchor) | anchor `notElem` dummyAnchors -> do
      inHdrBool <- asks docxInHeaderBlock
      ils <- parPartToInlines' parPart
      immedPrevAnchor <- gets docxImmedPrevAnchor
      unless (isJust immedPrevAnchor || inHdrBool)
        (modify $ \s -> s{ docxImmedPrevAnchor = Just anchor})
      return ils
    _ -> do
      ils <- parPartToInlines' parPart
      modify $ \s -> s{ docxImmedPrevAnchor = Nothing}
      return ils

parPartToInlines' :: PandocMonad m => ParPart -> DocxContext m Inlines
parPartToInlines' (PlainRun r) = runToInlines r
parPartToInlines' (ChangedRuns (TrackedChange Insertion (ChangeInfo _ author date)) runs) = do
  opts <- asks docxOptions
  case readerTrackChanges opts of
    AcceptChanges -> smushInlines <$> mapM runToInlines runs
    RejectChanges -> return mempty
    AllChanges    -> do
      ils <- smushInlines <$> mapM runToInlines runs
      let attr = ("", ["insertion"], [("author", author), ("date", date)])
      return $ spanWith attr ils
parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date)) runs) = do
  opts <- asks docxOptions
  case readerTrackChanges opts of
    AcceptChanges -> return mempty
    RejectChanges -> smushInlines <$> mapM runToInlines runs
    AllChanges    -> do
      ils <- smushInlines <$> mapM runToInlines runs
      let attr = ("", ["deletion"], [("author", author), ("date", date)])
      return $ spanWith attr ils
parPartToInlines' (CommentStart cmtId author date bodyParts) = do
  opts <- asks docxOptions
  case readerTrackChanges opts of
    AllChanges -> do
      blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts
      ils <- blocksToInlinesWarn cmtId blks
      let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)])
      return $ spanWith attr ils
    _ -> return mempty
parPartToInlines' (CommentEnd cmtId) = do
  opts <- asks docxOptions
  case readerTrackChanges opts of
    AllChanges -> do
      let attr = ("", ["comment-end"], [("id", cmtId)])
      return $ spanWith attr mempty
    _ -> return mempty
parPartToInlines' (BookMark _ anchor) | anchor `elem` dummyAnchors =
  return mempty
parPartToInlines' (BookMark _ anchor) =
  -- We record these, so we can make sure not to overwrite
  -- user-defined anchor links with header auto ids.
  do
    -- get whether we're in a header.
    inHdrBool <- asks docxInHeaderBlock
    -- Get the anchor map.
    anchorMap <- gets docxAnchorMap
    -- We don't want to rewrite if we're in a header, since we'll take
    -- care of that later, when we make the header anchor. If the
    -- bookmark were already in uniqueIdent form, this would lead to a
    -- duplication. Otherwise, we check to see if the id is already in
    -- there. Rewrite if necessary. This will have the possible effect
    -- of rewriting user-defined anchor links. However, since these
    -- are not defined in pandoc, it seems like a necessary evil to
    -- avoid an extra pass.
    immedPrevAnchor <- gets docxImmedPrevAnchor
    case immedPrevAnchor of
      Just prevAnchor -> do
        unless inHdrBool
          (modify $ \s -> s { docxAnchorMap = M.insert anchor prevAnchor anchorMap})
        return mempty
      Nothing -> do
        exts <- readerExtensions <$> asks docxOptions
        let newAnchor =
              if not inHdrBool && anchor `elem` M.elems anchorMap
              then uniqueIdent exts [Str anchor]
                     (Set.fromList $ M.elems anchorMap)
              else anchor
        unless inHdrBool
          (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
        return $ spanWith (newAnchor, ["anchor"], []) mempty
parPartToInlines' (Drawing fp title alt bs ext) = do
  (lift . lift) $ P.insertMedia fp Nothing bs
  return $ imageWith (extentToAttr ext) fp title $ text alt
parPartToInlines' Chart =
  return $ spanWith ("", ["chart"], []) $ text "[CHART]"
parPartToInlines' (InternalHyperLink anchor runs) = do
  ils <- smushInlines <$> mapM runToInlines runs
  return $ link ('#' : anchor) "" ils
parPartToInlines' (ExternalHyperLink target runs) = do
  ils <- smushInlines <$> mapM runToInlines runs
  return $ link target "" ils
parPartToInlines' (PlainOMath exps) =
  return $ math $ writeTeX exps
parPartToInlines' (Field info runs) =
  case info of
    HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url runs
    UnknownField -> smushInlines <$> mapM runToInlines runs
parPartToInlines' NullParPart = return mempty

isAnchorSpan :: Inline -> Bool
isAnchorSpan (Span (_, classes, kvs) _) =
  classes == ["anchor"] &&
  null kvs
isAnchorSpan _ = False

dummyAnchors :: [String]
dummyAnchors = ["_GoBack"]

makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks
makeHeaderAnchor bs = traverse makeHeaderAnchor' bs

makeHeaderAnchor' :: PandocMonad m => Block -> DocxContext m Block
-- If there is an anchor already there (an anchor span in the header,
-- to be exact), we rename and associate the new id with the old one.
makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
  | (c:_) <- filter isAnchorSpan ils
  , (Span (anchIdent, ["anchor"], _) cIls) <- c = do
    hdrIDMap <- gets docxAnchorMap
    exts <- readerExtensions <$> asks docxOptions
    let newIdent = if null ident
                   then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
                   else ident
        newIls = concatMap f ils where f il | il == c   = cIls
                                            | otherwise = [il]
    modify $ \s -> s {docxAnchorMap = M.insert anchIdent newIdent hdrIDMap}
    makeHeaderAnchor' $ Header n (newIdent, classes, kvs) newIls
-- Otherwise we just give it a name, and register that name (associate
-- it with itself.)
makeHeaderAnchor' (Header n (ident, classes, kvs) ils) =
  do
    hdrIDMap <- gets docxAnchorMap
    exts <- readerExtensions <$> asks docxOptions
    let newIdent = if null ident
                   then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
                   else ident
    modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
    return $ Header n (newIdent, classes, kvs) ils
makeHeaderAnchor' blk = return blk

-- Rewrite a standalone paragraph block as a plain
singleParaToPlain :: Blocks -> Blocks
singleParaToPlain blks
  | (Para ils :< seeq) <- viewl $ unMany blks
  , Seq.null seeq =
      singleton $ Plain ils
singleParaToPlain blks = blks

cellToBlocks :: PandocMonad m => Cell -> DocxContext m Blocks
cellToBlocks (Cell bps) = do
  blks <- smushBlocks <$> mapM bodyPartToBlocks bps
  return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks

rowToBlocksList :: PandocMonad m => Row -> DocxContext m [Blocks]
rowToBlocksList (Row cells) = do
  blksList <- mapM cellToBlocks cells
  return $ map singleParaToPlain blksList

-- like trimInlines, but also take out linebreaks
trimSps :: Inlines -> Inlines
trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils
  where isSp Space     = True
        isSp SoftBreak = True
        isSp LineBreak = True
        isSp _         = False

parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
parStyleToTransform pPr
  | (c:cs) <- pStyle pPr
  , c `elem` divsToKeep = do
      let pPr' = pPr { pStyle = cs }
      transform <- parStyleToTransform pPr'
      return $ divWith ("", [c], []) . transform
  | (c:cs) <- pStyle pPr,
    c `elem` listParagraphDivs = do
      let pPr' = pPr { pStyle = cs, indentation = Nothing}
      transform <- parStyleToTransform pPr'
      return $ divWith ("", [c], []) . transform
  | (c:cs) <- pStyle pPr
  , Just True <- pBlockQuote pPr = do
      opts <- asks docxOptions
      let pPr' = pPr { pStyle = cs }
      transform <- parStyleToTransform pPr'
      let extraInfo = if isEnabled Ext_styles opts
                      then divWith ("", [], [("custom-style", c)])
                      else id
      return $ extraInfo . blockQuote . transform
  | (c:cs) <- pStyle pPr = do
      opts <- asks docxOptions
      let pPr' = pPr { pStyle = cs}
      transform <- parStyleToTransform pPr'
      let extraInfo = if isEnabled Ext_styles opts
                      then divWith ("", [], [("custom-style", c)])
                      else id
      return $ extraInfo . transform
  | null (pStyle pPr)
  , Just left <- indentation pPr >>= leftParIndent
  , Just hang <- indentation pPr >>= hangingParIndent = do
    let pPr' = pPr { indentation = Nothing }
    transform <- parStyleToTransform pPr'
    return $ case (left - hang) > 0 of
               True  -> blockQuote . transform
               False -> transform
  | null (pStyle pPr),
    Just left <- indentation pPr >>= leftParIndent = do
      let pPr' = pPr { indentation = Nothing }
      transform <- parStyleToTransform pPr'
      return $ case left > 0 of
         True  -> blockQuote . transform
         False -> transform
parStyleToTransform _ = return id

bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Paragraph pPr parparts)
  | not $ null $ codeDivs `intersect` (pStyle pPr) = do
      transform <- parStyleToTransform pPr
      return $
        transform $
        codeBlock $
        concatMap parPartToString parparts
  | Just (style, n) <- pHeading pPr = do
    ils <-local (\s-> s{docxInHeaderBlock=True})
           (smushInlines <$> mapM parPartToInlines parparts)
    makeHeaderAnchor $
      headerWith ("", delete style (pStyle pPr), []) n ils
  | otherwise = do
    ils <- (trimSps . smushInlines) <$> mapM parPartToInlines parparts
    prevParaIls <- gets docxPrevPara
    dropIls <- gets docxDropCap
    let ils' = dropIls <> ils
    if dropCap pPr
      then do modify $ \s -> s { docxDropCap = ils' }
              return mempty
      else do modify $ \s -> s { docxDropCap = mempty }
              let ils'' = prevParaIls <>
                          (if isNull prevParaIls then mempty else space) <>
                          ils'
              opts <- asks docxOptions
              case () of

                _ | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) ->
                    return mempty
                _ | Just (TrackedChange Insertion _) <- pChange pPr
                  , AcceptChanges <- readerTrackChanges opts -> do
                      modify $ \s -> s {docxPrevPara = mempty}
                      transform <- parStyleToTransform pPr
                      return $ transform $ para ils''
                _ | Just (TrackedChange Insertion _) <- pChange pPr
                  , RejectChanges <- readerTrackChanges opts -> do
                      modify $ \s -> s {docxPrevPara = ils''}
                      return mempty
                _ | Just (TrackedChange Insertion cInfo) <- pChange pPr
                  , AllChanges <- readerTrackChanges opts
                  , ChangeInfo _ cAuthor cDate <- cInfo -> do
                      let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)])
                          insertMark = spanWith attr mempty
                      transform <- parStyleToTransform pPr
                      return $ transform $
                        para $ ils'' <> insertMark
                _ | Just (TrackedChange Deletion _) <- pChange pPr
                  , AcceptChanges <- readerTrackChanges opts -> do
                      modify $ \s -> s {docxPrevPara = ils''}
                      return mempty
                _ | Just (TrackedChange Deletion _) <- pChange pPr
                  , RejectChanges <- readerTrackChanges opts -> do
                      modify $ \s -> s {docxPrevPara = mempty}
                      transform <- parStyleToTransform pPr
                      return $ transform $ para ils''
                _ | Just (TrackedChange Deletion cInfo) <- pChange pPr
                  , AllChanges <- readerTrackChanges opts
                  , ChangeInfo _ cAuthor cDate <- cInfo -> do
                      let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)])
                          insertMark = spanWith attr mempty
                      transform <- parStyleToTransform pPr
                      return $ transform $
                        para $ ils'' <> insertMark
                _ | otherwise -> do
                      modify $ \s -> s {docxPrevPara = mempty}
                      transform <- parStyleToTransform pPr
                      return $ transform $ para ils''
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
  -- We check whether this current numId has previously been used,
  -- since Docx expects us to pick up where we left off.
  listState <- gets docxListState
  let startFromState = M.lookup (numId, lvl) listState
      Level _ fmt txt startFromLevelInfo = levelInfo
      start = case startFromState of
        Just n -> n + 1
        Nothing -> fromMaybe 1 startFromLevelInfo
      kvs = [ ("level", lvl)
            , ("num-id", numId)
            , ("format", fmt)
            , ("text", txt)
            , ("start", show start)
            ]
  modify $ \st -> st{ docxListState = M.insert (numId, lvl) start listState}
  blks <- bodyPartToBlocks (Paragraph pPr parparts)
  return $ divWith ("", ["list-item"], kvs) blks
bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
  let pPr' = pPr {pStyle = "ListParagraph": pStyle pPr}
  in
    bodyPartToBlocks $ Paragraph pPr' parparts
bodyPartToBlocks (Tbl _ _ _ []) =
  return $ para mempty
bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
  let caption = text cap
      (hdr, rows) = case firstRowFormatting look of
        True | null rs -> (Nothing, [r])
             | otherwise -> (Just r, rs)
        False -> (Nothing, r:rs)

  cells <- mapM rowToBlocksList rows

  let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
      -- Data.List.NonEmpty is not available with ghc 7.10 so we roll out
      -- our own, see
      -- https://github.com/jgm/pandoc/pull/4361#issuecomment-365416155
      nonEmpty [] = Nothing
      nonEmpty l  = Just l
      rowLength :: Row -> Int
      rowLength (Row c) = length c

  -- pad cells.  New Text.Pandoc.Builder will do that for us,
  -- so this is for compatibility while we switch over.
  let cells' = map (\row -> take width (row ++ repeat mempty)) cells

  hdrCells <- case hdr of
    Just r' -> rowToBlocksList r'
    Nothing -> return $ replicate width mempty

      -- The two following variables (horizontal column alignment and
      -- relative column widths) go to the default at the
      -- moment. Width information is in the TblGrid field of the Tbl,
      -- so should be possible. Alignment might be more difficult,
      -- since there doesn't seem to be a column entity in docx.
  let alignments = replicate width AlignDefault
      widths = replicate width 0 :: [Double]

  return $ table caption (zip alignments widths) hdrCells cells'
bodyPartToBlocks (OMathPara e) =
  return $ para $ displayMath (writeTeX e)


-- replace targets with generated anchors.
rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
rewriteLink' l@(Link attr ils ('#':target, title)) = do
  anchorMap <- gets docxAnchorMap
  case M.lookup target anchorMap of
    Just newTarget -> do
      modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)}
      return $ Link attr ils ('#':newTarget, title)
    Nothing        -> do
      modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)}
      return l
rewriteLink' il = return il

rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block]
rewriteLinks = mapM (walkM rewriteLink')

removeOrphanAnchors'' :: PandocMonad m => Inline -> DocxContext m [Inline]
removeOrphanAnchors'' s@(Span (ident, classes, _) ils)
  | "anchor" `elem` classes = do
      anchorSet <- gets docxAnchorSet
      return $ if ident `Set.member` anchorSet
               then [s]
               else ils
removeOrphanAnchors'' il = return [il]

removeOrphanAnchors' :: PandocMonad m => [Inline] -> DocxContext m [Inline]
removeOrphanAnchors' ils = liftM concat $ mapM removeOrphanAnchors'' ils

removeOrphanAnchors :: PandocMonad m => [Block] -> DocxContext m [Block]
removeOrphanAnchors = mapM (walkM removeOrphanAnchors')

bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
bodyToOutput (Body bps) = do
  let (metabps, blkbps) = sepBodyParts bps
  meta <- bodyPartsToMeta metabps
  blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
  blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
  blks'' <- removeOrphanAnchors blks'
  return (meta, blks'')

docxToOutput :: PandocMonad m
             => ReaderOptions
             -> Docx
             -> m (Meta, [Block])
docxToOutput opts (Docx (Document _ body)) =
  let dEnv   = def { docxOptions  = opts} in
   evalDocxContext (bodyToOutput body) dEnv def