{-# LANGUAGE CPP               #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE PatternGuards     #-}
{-# LANGUAGE MultiWayIf        #-}
{-# LANGUAGE FlexibleContexts  #-}
{-# LANGUAGE ViewPatterns      #-}
{- |
   Module      : Text.Pandoc.Readers.Docx
   Copyright   : Copyright (C) 2014-2020 Jesse Rosenthal
   License     : GNU GPL, version 2 or above

   Maintainer  : Jesse Rosenthal <jrosenthal@jhu.edu>
   Stability   : alpha
   Portability : portable

Conversion of Docx type (defined in Text.Pandoc.Readers.Docx.Parse)
to 'Pandoc' document.  -}

{-
Current state of implementation of Docx entities ([x] means
implemented, [-] means partially implemented):

* Blocks

  - [X] Para
  - [X] CodeBlock (styled with `SourceCode`)
  - [X] BlockQuote (styled with `Quote`, `BlockQuote`, or, optionally,
        indented)
  - [X] OrderedList
  - [X] BulletList
  - [X] DefinitionList (styled with adjacent `DefinitionTerm` and `Definition`)
  - [X] Header (styled with `Heading#`)
  - [ ] HorizontalRule
  - [-] Table (column widths and alignments not yet implemented)

* Inlines

  - [X] Str
  - [X] Emph
  - [X] Strong
  - [X] Strikeout
  - [X] Superscript
  - [X] Subscript
  - [X] SmallCaps
  - [-] Underline (was previously converted to Emph)
  - [ ] Quoted
  - [ ] Cite
  - [X] Code (styled with `VerbatimChar`)
  - [X] Space
  - [X] LineBreak (these are invisible in Word: entered with Shift-Return)
  - [X] Math
  - [X] Link (links to an arbitrary bookmark create a span with the target as
        id and "anchor" class)
  - [X] Image
  - [X] Note (Footnotes and Endnotes are silently combined.)
-}

module Text.Pandoc.Readers.Docx
       ( readDocx
       ) where

import Codec.Archive.Zip
import Control.Monad.Reader
import Control.Monad.State.Strict
import qualified Data.ByteString.Lazy as B
import Data.Default (Default)
import Data.List (delete, intersect)
import Data.Char (isSpace)
import qualified Data.Map as M
import qualified Data.Text as T
import Data.Maybe (isJust, fromMaybe)
import Data.Sequence (ViewL (..), viewl)
import qualified Data.Sequence as Seq
import qualified Data.Set as Set
import Text.Pandoc.Builder
import Text.Pandoc.MediaBag (MediaBag)
import Text.Pandoc.Options
import Text.Pandoc.Readers.Docx.Combine
import Text.Pandoc.Readers.Docx.Lists
import Text.Pandoc.Readers.Docx.Parse
import Text.Pandoc.Shared
import Text.Pandoc.Walk
import Text.TeXMath (writeTeX)
import Control.Monad.Except (throwError)
import Text.Pandoc.Class.PandocMonad (PandocMonad)
import qualified Text.Pandoc.Class.PandocMonad as P
import Text.Pandoc.Error
import Text.Pandoc.Logging

readDocx :: PandocMonad m
         => ReaderOptions
         -> B.ByteString
         -> m Pandoc
readDocx opts bytes
  | Right archive <- toArchiveOrFail bytes
  , Right (docx, parserWarnings) <- archiveToDocxWithWarnings archive = do
      mapM_ (P.report . DocxParserWarning) parserWarnings
      (meta, blks) <- docxToOutput opts docx
      return $ Pandoc meta blks
readDocx _ _ =
  throwError $ PandocSomeError "couldn't parse docx file"

data DState = DState { docxAnchorMap :: M.Map T.Text T.Text
                     , docxAnchorSet :: Set.Set T.Text
                     , docxImmedPrevAnchor :: Maybe T.Text
                     , docxMediaBag  :: MediaBag
                     , docxDropCap   :: Inlines
                     -- keep track of (numId, lvl) values for
                     -- restarting
                     , docxListState :: M.Map (T.Text, T.Text) Integer
                     , docxPrevPara  :: Inlines
                     }

instance Default DState where
  def = DState { docxAnchorMap = M.empty
               , docxAnchorSet = mempty
               , docxImmedPrevAnchor = Nothing
               , docxMediaBag  = mempty
               , docxDropCap   = mempty
               , docxListState = M.empty
               , docxPrevPara  = mempty
               }

data DEnv = DEnv { docxOptions       :: ReaderOptions
                 , docxInHeaderBlock :: Bool
                 , docxInBidi        :: Bool
                 }

instance Default DEnv where
  def = DEnv def False False

type DocxContext m = ReaderT DEnv (StateT DState m)

evalDocxContext :: PandocMonad m => DocxContext m a -> DEnv -> DState -> m a
evalDocxContext ctx env st = flip evalStateT st $ runReaderT ctx env

-- This is empty, but we put it in for future-proofing.
spansToKeep :: [CharStyleName]
spansToKeep = []

divsToKeep :: [ParaStyleName]
divsToKeep = ["Definition", "Definition Term"]

metaStyles :: M.Map ParaStyleName T.Text
metaStyles = M.fromList [ ("Title", "title")
                        , ("Subtitle", "subtitle")
                        , ("Author", "author")
                        , ("Date", "date")
                        , ("Abstract", "abstract")]

sepBodyParts :: [BodyPart] -> ([BodyPart], [BodyPart])
sepBodyParts = span (\bp -> isMetaPar bp || isEmptyPar bp)

isMetaPar :: BodyPart -> Bool
isMetaPar (Paragraph pPr _) =
  not $ null $ intersect (getStyleNames $ pStyle pPr) (M.keys metaStyles)
isMetaPar _ = False

isEmptyPar :: BodyPart -> Bool
isEmptyPar (Paragraph _ parParts) =
  all isEmptyParPart parParts
  where
    isEmptyParPart (PlainRun (Run _ runElems)) = all isEmptyElem runElems
    isEmptyParPart _                           = False
    isEmptyElem (TextRun s) = trim s == ""
    isEmptyElem _           = True
isEmptyPar _ = False

bodyPartsToMeta' :: PandocMonad m => [BodyPart] -> DocxContext m (M.Map T.Text MetaValue)
bodyPartsToMeta' [] = return M.empty
bodyPartsToMeta' (bp : bps)
  | (Paragraph pPr parParts) <- bp
  , (c : _)<- getStyleNames (pStyle pPr) `intersect` M.keys metaStyles
  , (Just metaField) <- M.lookup c metaStyles = do
    inlines <- smushInlines <$> mapM parPartToInlines parParts
    remaining <- bodyPartsToMeta' bps
    let
      f (MetaInlines ils) (MetaInlines ils') = MetaBlocks [Para ils, Para ils']
      f (MetaInlines ils) (MetaBlocks blks) = MetaBlocks (Para ils : blks)
      f m (MetaList mv) = MetaList (m : mv)
      f m n             = MetaList [m, n]
    return $ M.insertWith f metaField (MetaInlines (toList inlines)) remaining
bodyPartsToMeta' (_ : bps) = bodyPartsToMeta' bps

bodyPartsToMeta :: PandocMonad m => [BodyPart] -> DocxContext m Meta
bodyPartsToMeta bps = do
  mp <- bodyPartsToMeta' bps
  let mp' =
        case M.lookup "author" mp of
          Just mv -> M.insert "author" (fixAuthors mv) mp
          Nothing -> mp
  return $ Meta mp'

fixAuthors :: MetaValue -> MetaValue
fixAuthors (MetaBlocks blks) =
  MetaList $ map g $ filter f blks
    where f (Para _) = True
          f _        = False
          g (Para ils) = MetaInlines ils
          g _          = MetaInlines []
fixAuthors mv = mv

isInheritedFromStyles :: (Eq (StyleName s), HasStyleName s, HasParentStyle s) => [StyleName s] -> s -> Bool
isInheritedFromStyles names sty
  | getStyleName sty `elem` names = True
  | Just psty <- getParentStyle sty = isInheritedFromStyles names psty
  | otherwise = False

hasStylesInheritedFrom :: [ParaStyleName] -> ParagraphStyle -> Bool
hasStylesInheritedFrom ns s = any (isInheritedFromStyles ns) $ pStyle s

removeStyleNamed :: ParaStyleName -> ParagraphStyle -> ParagraphStyle
removeStyleNamed sn ps = ps{pStyle = filter (\psd -> getStyleName psd /= sn) $ pStyle ps}

isCodeCharStyle :: CharStyle -> Bool
isCodeCharStyle = isInheritedFromStyles ["Verbatim Char"]

isCodeDiv :: ParagraphStyle -> Bool
isCodeDiv = hasStylesInheritedFrom ["Source Code"]

isBlockQuote :: ParStyle -> Bool
isBlockQuote =
  isInheritedFromStyles [
    "Quote", "Block Text", "Block Quote", "Block Quotation"
    ]

runElemToInlines :: RunElem -> Inlines
runElemToInlines (TextRun s)   = text s
runElemToInlines LnBrk         = linebreak
runElemToInlines Tab           = space
runElemToInlines SoftHyphen    = text "\xad"
runElemToInlines NoBreakHyphen = text "\x2011"

runElemToText :: RunElem -> T.Text
runElemToText (TextRun s)   = s
runElemToText LnBrk         = T.singleton '\n'
runElemToText Tab           = T.singleton '\t'
runElemToText SoftHyphen    = T.singleton '\xad'
runElemToText NoBreakHyphen = T.singleton '\x2011'

runToText :: Run -> T.Text
runToText (Run _ runElems) = T.concat $ map runElemToText runElems
runToText _                = ""

parPartToText :: ParPart -> T.Text
parPartToText (PlainRun run)             = runToText run
parPartToText (InternalHyperLink _ runs) = T.concat $ map runToText runs
parPartToText (ExternalHyperLink _ runs) = T.concat $ map runToText runs
parPartToText _                          = ""

blacklistedCharStyles :: [CharStyleName]
blacklistedCharStyles = ["Hyperlink"]

resolveDependentRunStyle :: PandocMonad m => RunStyle -> DocxContext m RunStyle
resolveDependentRunStyle rPr
  | Just s  <- rParentStyle rPr
  , getStyleName s `elem` blacklistedCharStyles =
    return rPr
  | Just s  <- rParentStyle rPr = do
      opts <- asks docxOptions
      if isEnabled Ext_styles opts
        then return rPr
        else leftBiasedMergeRunStyle rPr <$> resolveDependentRunStyle (cStyleData s)
  | otherwise = return rPr

runStyleToTransform :: PandocMonad m => RunStyle -> DocxContext m (Inlines -> Inlines)
runStyleToTransform rPr
  | Just sn <- getStyleName <$> rParentStyle rPr
  , sn `elem` spansToKeep = do
      transform <- runStyleToTransform rPr{rParentStyle = Nothing}
      return $ spanWith ("", [normalizeToClassName sn], []) . transform
  | Just s <- rParentStyle rPr = do
      ei <- extraInfo spanWith s
      transform <- runStyleToTransform rPr{rParentStyle = Nothing}
      return $ ei . transform
  | Just True <- isItalic rPr = do
      transform <- runStyleToTransform rPr{isItalic = Nothing}
      return $ emph  . transform
  | Just True <- isBold rPr = do
      transform <- runStyleToTransform rPr{isBold = Nothing}
      return $ strong . transform
  | Just True <- isSmallCaps rPr = do
      transform <- runStyleToTransform rPr{isSmallCaps = Nothing}
      return $ smallcaps . transform
  | Just True <- isStrike rPr = do
      transform <- runStyleToTransform rPr{isStrike = Nothing}
      return $ strikeout . transform
  | Just True <- isRTL rPr = do
      transform <- runStyleToTransform rPr{isRTL = Nothing}
      return $ spanWith ("",[],[("dir","rtl")]) . transform
  | Just False <- isRTL rPr = do
      transform <- runStyleToTransform rPr{isRTL = Nothing}
      inBidi <- asks docxInBidi
      return $ if inBidi
               then spanWith ("",[],[("dir","ltr")]) . transform
               else transform
  | Just SupScrpt <- rVertAlign rPr = do
      transform <- runStyleToTransform rPr{rVertAlign = Nothing}
      return $ superscript . transform
  | Just SubScrpt <- rVertAlign rPr = do
      transform <- runStyleToTransform rPr{rVertAlign = Nothing}
      return $ subscript . transform
  | Just "single" <- rUnderline rPr = do
      transform <- runStyleToTransform rPr{rUnderline = Nothing}
      return $ underlineSpan . transform
  | otherwise = return id

runToInlines :: PandocMonad m => Run -> DocxContext m Inlines
runToInlines (Run rs runElems)
  | maybe False isCodeCharStyle $ rParentStyle rs = do
      rPr <- resolveDependentRunStyle rs
      let codeString = code $ T.concat $ map runElemToText runElems
      return $ case rVertAlign rPr of
        Just SupScrpt -> superscript codeString
        Just SubScrpt -> subscript codeString
        _             -> codeString
  | otherwise = do
      rPr <- resolveDependentRunStyle rs
      let ils = smushInlines (map runElemToInlines runElems)
      transform <- runStyleToTransform rPr
      return $ transform ils
runToInlines (Footnote bps) = do
  blksList <- smushBlocks <$> mapM bodyPartToBlocks bps
  return $ note blksList
runToInlines (Endnote bps) = do
  blksList <- smushBlocks <$> mapM bodyPartToBlocks bps
  return $ note blksList
runToInlines (InlineDrawing fp title alt bs ext) = do
  (lift . lift) $ P.insertMedia fp Nothing bs
  return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
runToInlines InlineChart = return $ spanWith ("", ["chart"], []) $ text "[CHART]"

extentToAttr :: Extent -> Attr
extentToAttr (Just (w, h)) =
  ("", [], [("width", showDim w), ("height", showDim h)] )
  where
    showDim d = tshow (d / 914400) <> "in"
extentToAttr _ = nullAttr

blocksToInlinesWarn :: PandocMonad m => T.Text -> Blocks -> DocxContext m Inlines
blocksToInlinesWarn cmtId blks = do
  let blkList = toList blks
      paraOrPlain :: Block -> Bool
      paraOrPlain (Para _)  = True
      paraOrPlain (Plain _) = True
      paraOrPlain _         = False
  unless (all paraOrPlain blkList) $
    lift $ P.report $ DocxParserWarning $
      "Docx comment " <> cmtId <> " will not retain formatting"
  return $ blocksToInlines' blkList

-- The majority of work in this function is done in the primed
-- subfunction `partPartToInlines'`. We make this wrapper so that we
-- don't have to modify `docxImmedPrevAnchor` state after every function.
parPartToInlines :: PandocMonad m => ParPart -> DocxContext m Inlines
parPartToInlines parPart =
  case parPart of
    (BookMark _ anchor) | anchor `notElem` dummyAnchors -> do
      inHdrBool <- asks docxInHeaderBlock
      ils <- parPartToInlines' parPart
      immedPrevAnchor <- gets docxImmedPrevAnchor
      unless (isJust immedPrevAnchor || inHdrBool)
        (modify $ \s -> s{ docxImmedPrevAnchor = Just anchor})
      return ils
    _ -> do
      ils <- parPartToInlines' parPart
      modify $ \s -> s{ docxImmedPrevAnchor = Nothing}
      return ils

parPartToInlines' :: PandocMonad m => ParPart -> DocxContext m Inlines
parPartToInlines' (PlainRun r) = runToInlines r
parPartToInlines' (ChangedRuns (TrackedChange Insertion (ChangeInfo _ author date)) runs) = do
  opts <- asks docxOptions
  case readerTrackChanges opts of
    AcceptChanges -> smushInlines <$> mapM runToInlines runs
    RejectChanges -> return mempty
    AllChanges    -> do
      ils <- smushInlines <$> mapM runToInlines runs
      let attr = ("", ["insertion"], [("author", author), ("date", date)])
      return $ spanWith attr ils
parPartToInlines' (ChangedRuns (TrackedChange Deletion (ChangeInfo _ author date)) runs) = do
  opts <- asks docxOptions
  case readerTrackChanges opts of
    AcceptChanges -> return mempty
    RejectChanges -> smushInlines <$> mapM runToInlines runs
    AllChanges    -> do
      ils <- smushInlines <$> mapM runToInlines runs
      let attr = ("", ["deletion"], [("author", author), ("date", date)])
      return $ spanWith attr ils
parPartToInlines' (CommentStart cmtId author date bodyParts) = do
  opts <- asks docxOptions
  case readerTrackChanges opts of
    AllChanges -> do
      blks <- smushBlocks <$> mapM bodyPartToBlocks bodyParts
      ils <- blocksToInlinesWarn cmtId blks
      let attr = ("", ["comment-start"], [("id", cmtId), ("author", author), ("date", date)])
      return $ spanWith attr ils
    _ -> return mempty
parPartToInlines' (CommentEnd cmtId) = do
  opts <- asks docxOptions
  case readerTrackChanges opts of
    AllChanges -> do
      let attr = ("", ["comment-end"], [("id", cmtId)])
      return $ spanWith attr mempty
    _ -> return mempty
parPartToInlines' (BookMark _ anchor) | anchor `elem` dummyAnchors =
  return mempty
parPartToInlines' (BookMark _ anchor) =
  -- We record these, so we can make sure not to overwrite
  -- user-defined anchor links with header auto ids.
  do
    -- get whether we're in a header.
    inHdrBool <- asks docxInHeaderBlock
    -- Get the anchor map.
    anchorMap <- gets docxAnchorMap
    -- We don't want to rewrite if we're in a header, since we'll take
    -- care of that later, when we make the header anchor. If the
    -- bookmark were already in uniqueIdent form, this would lead to a
    -- duplication. Otherwise, we check to see if the id is already in
    -- there. Rewrite if necessary. This will have the possible effect
    -- of rewriting user-defined anchor links. However, since these
    -- are not defined in pandoc, it seems like a necessary evil to
    -- avoid an extra pass.
    immedPrevAnchor <- gets docxImmedPrevAnchor
    case immedPrevAnchor of
      Just prevAnchor -> do
        unless inHdrBool
          (modify $ \s -> s { docxAnchorMap = M.insert anchor prevAnchor anchorMap})
        return mempty
      Nothing -> do
        exts <- readerExtensions <$> asks docxOptions
        let newAnchor =
              if not inHdrBool && anchor `elem` M.elems anchorMap
              then uniqueIdent exts [Str anchor]
                     (Set.fromList $ M.elems anchorMap)
              else anchor
        unless inHdrBool
          (modify $ \s -> s { docxAnchorMap = M.insert anchor newAnchor anchorMap})
        return $ spanWith (newAnchor, ["anchor"], []) mempty
parPartToInlines' (Drawing fp title alt bs ext) = do
  (lift . lift) $ P.insertMedia fp Nothing bs
  return $ imageWith (extentToAttr ext) (T.pack fp) title $ text alt
parPartToInlines' Chart =
  return $ spanWith ("", ["chart"], []) $ text "[CHART]"
parPartToInlines' (InternalHyperLink anchor runs) = do
  ils <- smushInlines <$> mapM runToInlines runs
  return $ link ("#" <> anchor) "" ils
parPartToInlines' (ExternalHyperLink target runs) = do
  ils <- smushInlines <$> mapM runToInlines runs
  return $ link target "" ils
parPartToInlines' (PlainOMath exps) =
  return $ math $ writeTeX exps
parPartToInlines' (Field info runs) =
  case info of
    HyperlinkField url -> parPartToInlines' $ ExternalHyperLink url runs
    UnknownField -> smushInlines <$> mapM runToInlines runs
parPartToInlines' NullParPart = return mempty

isAnchorSpan :: Inline -> Bool
isAnchorSpan (Span (_, classes, kvs) _) =
  classes == ["anchor"] &&
  null kvs
isAnchorSpan _ = False

dummyAnchors :: [T.Text]
dummyAnchors = ["_GoBack"]

makeHeaderAnchor :: PandocMonad m => Blocks -> DocxContext m Blocks
makeHeaderAnchor bs = traverse makeHeaderAnchor' bs

makeHeaderAnchor' :: PandocMonad m => Block -> DocxContext m Block
-- If there is an anchor already there (an anchor span in the header,
-- to be exact), we rename and associate the new id with the old one.
makeHeaderAnchor' (Header n (ident, classes, kvs) ils)
  | (c:_) <- filter isAnchorSpan ils
  , (Span (anchIdent, ["anchor"], _) cIls) <- c = do
    hdrIDMap <- gets docxAnchorMap
    exts <- readerExtensions <$> asks docxOptions
    let newIdent = if T.null ident
                   then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
                   else ident
        newIls = concatMap f ils where f il | il == c   = cIls
                                            | otherwise = [il]
    modify $ \s -> s {docxAnchorMap = M.insert anchIdent newIdent hdrIDMap}
    makeHeaderAnchor' $ Header n (newIdent, classes, kvs) newIls
-- Otherwise we just give it a name, and register that name (associate
-- it with itself.)
makeHeaderAnchor' (Header n (ident, classes, kvs) ils) =
  do
    hdrIDMap <- gets docxAnchorMap
    exts <- readerExtensions <$> asks docxOptions
    let newIdent = if T.null ident
                   then uniqueIdent exts ils (Set.fromList $ M.elems hdrIDMap)
                   else ident
    modify $ \s -> s {docxAnchorMap = M.insert newIdent newIdent hdrIDMap}
    return $ Header n (newIdent, classes, kvs) ils
makeHeaderAnchor' blk = return blk

-- Rewrite a standalone paragraph block as a plain
singleParaToPlain :: Blocks -> Blocks
singleParaToPlain blks
  | (Para ils :< seeq) <- viewl $ unMany blks
  , Seq.null seeq =
      singleton $ Plain ils
singleParaToPlain blks = blks

cellToBlocks :: PandocMonad m => Cell -> DocxContext m Blocks
cellToBlocks (Cell bps) = do
  blks <- smushBlocks <$> mapM bodyPartToBlocks bps
  return $ fromList $ blocksToDefinitions $ blocksToBullets $ toList blks

rowToBlocksList :: PandocMonad m => Row -> DocxContext m [Blocks]
rowToBlocksList (Row cells) = do
  blksList <- mapM cellToBlocks cells
  return $ map singleParaToPlain blksList

-- like trimInlines, but also take out linebreaks
trimSps :: Inlines -> Inlines
trimSps (Many ils) = Many $ Seq.dropWhileL isSp $Seq.dropWhileR isSp ils
  where isSp Space     = True
        isSp SoftBreak = True
        isSp LineBreak = True
        isSp _         = False

extraInfo :: (Eq (StyleName a), PandocMonad m, HasStyleName a)
          => (Attr -> i -> i) -> a -> DocxContext m (i -> i)
extraInfo f s = do
  opts <- asks docxOptions
  return $ if isEnabled Ext_styles opts
              then f ("", [], [("custom-style", fromStyleName $ getStyleName s)])
              else id

parStyleToTransform :: PandocMonad m => ParagraphStyle -> DocxContext m (Blocks -> Blocks)
parStyleToTransform pPr
  | (c:cs) <- pStyle pPr
  , getStyleName c `elem` divsToKeep = do
      let pPr' = pPr { pStyle = cs }
      transform <- parStyleToTransform pPr'
      return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform
  | (c:cs) <- pStyle pPr,
    getStyleName c `elem` listParagraphStyles = do
      let pPr' = pPr { pStyle = cs, indentation = Nothing}
      transform <- parStyleToTransform pPr'
      return $ divWith ("", [normalizeToClassName $ getStyleName c], []) . transform
  | (c:cs) <- pStyle pPr = do
      let pPr' = pPr { pStyle = cs }
      transform <- parStyleToTransform pPr'
      ei <- extraInfo divWith c
      return $ ei . (if isBlockQuote c then blockQuote else id) . transform
  | null (pStyle pPr)
  , Just left <- indentation pPr >>= leftParIndent = do
    let pPr' = pPr { indentation = Nothing }
        hang = fromMaybe 0 $ indentation pPr >>= hangingParIndent
    transform <- parStyleToTransform pPr'
    return $ if (left - hang) > 0
             then blockQuote . transform
             else transform
parStyleToTransform _ = return id

normalizeToClassName :: (FromStyleName a) => a -> T.Text
normalizeToClassName = T.map go . fromStyleName
  where go c | isSpace c = '-'
             | otherwise = c

bodyPartToBlocks :: PandocMonad m => BodyPart -> DocxContext m Blocks
bodyPartToBlocks (Paragraph pPr parparts)
  | Just True <- pBidi pPr = do
      let pPr' = pPr { pBidi = Nothing }
      local (\s -> s{ docxInBidi = True })
        (bodyPartToBlocks (Paragraph pPr' parparts))
  | isCodeDiv pPr = do
      transform <- parStyleToTransform pPr
      return $
        transform $
        codeBlock $
        T.concat $
        map parPartToText parparts
  | Just (style, n) <- pHeading pPr = do
    ils <-local (\s-> s{docxInHeaderBlock=True})
           (smushInlines <$> mapM parPartToInlines parparts)
    makeHeaderAnchor $
      headerWith ("", map normalizeToClassName . delete style $ getStyleNames (pStyle pPr), []) n ils
  | otherwise = do
    ils <- trimSps . smushInlines <$> mapM parPartToInlines parparts
    prevParaIls <- gets docxPrevPara
    dropIls <- gets docxDropCap
    let ils' = dropIls <> ils
    let (paraOrPlain, pPr')
          | hasStylesInheritedFrom ["Compact"] pPr = (plain, removeStyleNamed "Compact" pPr)
          | otherwise = (para, pPr)
    if dropCap pPr'
      then do modify $ \s -> s { docxDropCap = ils' }
              return mempty
      else do modify $ \s -> s { docxDropCap = mempty }
              let ils'' = prevParaIls <>
                          (if isNull prevParaIls then mempty else space) <>
                          ils'
                  handleInsertion = do
                    modify $ \s -> s {docxPrevPara = mempty}
                    transform <- parStyleToTransform pPr'
                    return $ transform $ paraOrPlain ils''
              opts <- asks docxOptions
              if  | isNull ils'' && not (isEnabled Ext_empty_paragraphs opts) ->
                    return mempty
                  | Just (TrackedChange Insertion _) <- pChange pPr'
                  , AcceptChanges <- readerTrackChanges opts ->
                      handleInsertion
                  | Just (TrackedChange Insertion _) <- pChange pPr'
                  , RejectChanges <- readerTrackChanges opts -> do
                      modify $ \s -> s {docxPrevPara = ils''}
                      return mempty
                  | Just (TrackedChange Insertion cInfo) <- pChange pPr'
                  , AllChanges <- readerTrackChanges opts
                  , ChangeInfo _ cAuthor cDate <- cInfo -> do
                      let attr = ("", ["paragraph-insertion"], [("author", cAuthor), ("date", cDate)])
                          insertMark = spanWith attr mempty
                      transform <- parStyleToTransform pPr'
                      return $ transform $
                        paraOrPlain $ ils'' <> insertMark
                  | Just (TrackedChange Deletion _) <- pChange pPr'
                  , AcceptChanges <- readerTrackChanges opts -> do
                      modify $ \s -> s {docxPrevPara = ils''}
                      return mempty
                  | Just (TrackedChange Deletion _) <- pChange pPr'
                  , RejectChanges <- readerTrackChanges opts ->
                      handleInsertion
                  | Just (TrackedChange Deletion cInfo) <- pChange pPr'
                  , AllChanges <- readerTrackChanges opts
                  , ChangeInfo _ cAuthor cDate <- cInfo -> do
                      let attr = ("", ["paragraph-deletion"], [("author", cAuthor), ("date", cDate)])
                          insertMark = spanWith attr mempty
                      transform <- parStyleToTransform pPr'
                      return $ transform $
                        paraOrPlain $ ils'' <> insertMark
                  | otherwise -> handleInsertion
bodyPartToBlocks (ListItem pPr numId lvl (Just levelInfo) parparts) = do
  -- We check whether this current numId has previously been used,
  -- since Docx expects us to pick up where we left off.
  listState <- gets docxListState
  let startFromState = M.lookup (numId, lvl) listState
      Level _ fmt txt startFromLevelInfo = levelInfo
      start = case startFromState of
        Just n -> n + 1
        Nothing -> fromMaybe 1 startFromLevelInfo
      kvs = [ ("level", lvl)
            , ("num-id", numId)
            , ("format", fmt)
            , ("text", txt)
            , ("start", tshow start)
            ]
  modify $ \st -> st{ docxListState =
    -- expire all the continuation data for lists of level > this one:
    -- a new level 1 list item resets continuation for level 2+
    let expireKeys = [ (numid', lvl')
                     |  (numid', lvl') <- M.keys listState
                     , lvl' > lvl
                     ]
    in foldr M.delete (M.insert (numId, lvl) start listState) expireKeys }
  blks <- bodyPartToBlocks (Paragraph pPr parparts)
  return $ divWith ("", ["list-item"], kvs) blks
bodyPartToBlocks (ListItem pPr _ _ _ parparts) =
  let pPr' = pPr {pStyle = constructBogusParStyleData "list-paragraph": pStyle pPr}
  in
    bodyPartToBlocks $ Paragraph pPr' parparts
bodyPartToBlocks (Tbl _ _ _ []) =
  return $ para mempty
bodyPartToBlocks (Tbl cap _ look parts@(r:rs)) = do
  let caption = text cap
      (hdr, rows) = case firstRowFormatting look of
        True | null rs -> (Nothing, [r])
             | otherwise -> (Just r, rs)
        False -> (Nothing, r:rs)

  cells <- mapM rowToBlocksList rows

  let width = maybe 0 maximum $ nonEmpty $ map rowLength parts
      -- Data.List.NonEmpty is not available with ghc 7.10 so we roll out
      -- our own, see
      -- https://github.com/jgm/pandoc/pull/4361#issuecomment-365416155
      nonEmpty [] = Nothing
      nonEmpty l  = Just l
      rowLength :: Row -> Int
      rowLength (Row c) = length c

  -- pad cells.  New Text.Pandoc.Builder will do that for us,
  -- so this is for compatibility while we switch over.
  let cells' = map (\row -> take width (row ++ repeat mempty)) cells

  hdrCells <- case hdr of
    Just r' -> rowToBlocksList r'
    Nothing -> return $ replicate width mempty

      -- The two following variables (horizontal column alignment and
      -- relative column widths) go to the default at the
      -- moment. Width information is in the TblGrid field of the Tbl,
      -- so should be possible. Alignment might be more difficult,
      -- since there doesn't seem to be a column entity in docx.
  let alignments = replicate width AlignDefault
      widths = replicate width 0 :: [Double]

  return $ table caption (zip alignments widths) hdrCells cells'
bodyPartToBlocks (OMathPara e) =
  return $ para $ displayMath (writeTeX e)


-- replace targets with generated anchors.
rewriteLink' :: PandocMonad m => Inline -> DocxContext m Inline
rewriteLink' l@(Link attr ils (T.uncons -> Just ('#',target), title)) = do
  anchorMap <- gets docxAnchorMap
  case M.lookup target anchorMap of
    Just newTarget -> do
      modify $ \s -> s{docxAnchorSet = Set.insert newTarget (docxAnchorSet s)}
      return $ Link attr ils ("#" <> newTarget, title)
    Nothing        -> do
      modify $ \s -> s{docxAnchorSet = Set.insert target (docxAnchorSet s)}
      return l
rewriteLink' il = return il

rewriteLinks :: PandocMonad m => [Block] -> DocxContext m [Block]
rewriteLinks = mapM (walkM rewriteLink')

removeOrphanAnchors'' :: PandocMonad m => Inline -> DocxContext m [Inline]
removeOrphanAnchors'' s@(Span (ident, classes, _) ils)
  | "anchor" `elem` classes = do
      anchorSet <- gets docxAnchorSet
      return $ if ident `Set.member` anchorSet
               then [s]
               else ils
removeOrphanAnchors'' il = return [il]

removeOrphanAnchors' :: PandocMonad m => [Inline] -> DocxContext m [Inline]
removeOrphanAnchors' ils = liftM concat $ mapM removeOrphanAnchors'' ils

removeOrphanAnchors :: PandocMonad m => [Block] -> DocxContext m [Block]
removeOrphanAnchors = mapM (walkM removeOrphanAnchors')

bodyToOutput :: PandocMonad m => Body -> DocxContext m (Meta, [Block])
bodyToOutput (Body bps) = do
  let (metabps, blkbps) = sepBodyParts bps
  meta <- bodyPartsToMeta metabps
  blks <- smushBlocks <$> mapM bodyPartToBlocks blkbps
  blks' <- rewriteLinks $ blocksToDefinitions $ blocksToBullets $ toList blks
  blks'' <- removeOrphanAnchors blks'
  return (meta, blks'')

docxToOutput :: PandocMonad m
             => ReaderOptions
             -> Docx
             -> m (Meta, [Block])
docxToOutput opts (Docx (Document _ body)) =
  let dEnv   = def { docxOptions  = opts} in
   evalDocxContext (bodyToOutput body) dEnv def