Text/MMark/Parser.hs

-- |
-- Module      :  Text.MMark.Parser
-- Copyright   :  © 2017 Mark Karpov
-- License     :  BSD 3 clause
--
-- Maintainer  :  Mark Karpov <markkarpov92@gmail.com>
-- Stability   :  experimental
-- Portability :  portable
--
-- MMark markdown parser.

{-# LANGUAGE BangPatterns       #-}
{-# LANGUAGE CPP                #-}
{-# LANGUAGE DataKinds          #-}
{-# LANGUAGE DeriveDataTypeable #-}
{-# LANGUAGE DeriveGeneric      #-}
{-# LANGUAGE FlexibleContexts   #-}
{-# LANGUAGE GADTs              #-}
{-# LANGUAGE LambdaCase         #-}
{-# LANGUAGE MultiWayIf         #-}
{-# LANGUAGE OverloadedStrings  #-}
{-# LANGUAGE RecordWildCards    #-}
{-# LANGUAGE TypeFamilies       #-}

module Text.MMark.Parser
  ( MMarkErr (..)
  , parse )
where

import Control.Applicative
import Control.DeepSeq
import Control.Monad
import Control.Monad.Reader
import Control.Monad.State.Strict
import Data.Bifunctor (Bifunctor (..))
import Data.Data (Data)
import Data.Default.Class
import Data.List.NonEmpty (NonEmpty (..), (<|))
import Data.Maybe (isNothing, fromJust, fromMaybe)
import Data.Monoid (Any (..))
import Data.Semigroup (Semigroup (..))
import Data.Text (Text)
import Data.Typeable (Typeable)
import Data.Void
import GHC.Generics
import Text.MMark.Internal
import Text.Megaparsec hiding (parse)
import Text.Megaparsec.Char hiding (eol)
import Text.URI (URI)
import qualified Control.Applicative.Combinators.NonEmpty as NE
import qualified Data.Char                  as Char
import qualified Data.List.NonEmpty         as NE
import qualified Data.Set                   as E
import qualified Data.Text                  as T
import qualified Data.Text.Encoding         as TE
import qualified Data.Yaml                  as Yaml
import qualified Text.Email.Validate        as Email
import qualified Text.Megaparsec.Char.Lexer as L
import qualified Text.URI                   as URI

----------------------------------------------------------------------------
-- Data types

-- | Block-level parser type.

type BParser = ParsecT MMarkErr Text (Reader BlockEnv)

-- | Block-level parser environment.

data BlockEnv = BlockEnv
  { benvAllowNaked :: !Bool
    -- ^ Should we consider a paragraph that does not end with a blank line
    -- 'Naked'? It does not make sense to do so for top-level document, but
    -- in lists, 'Naked' text is pretty common.
  , benvRefLevel :: !Pos
    -- ^ Current reference level: 1 column for top-level of document, column
    -- where content starts for block quotes and lists.
  }

instance Default BlockEnv where
  def = BlockEnv
    { benvAllowNaked = False
    , benvRefLevel   = pos1
    }

-- | MMark custom parse errors.

data MMarkErr
  = YamlParseError String
    -- ^ YAML error that occurred during parsing of a YAML block
  | ListStartIndexTooBig Word
    -- ^ Ordered list start numbers must be nine digits or less
  | ListIndexOutOfOrder Word Word
    -- ^ The index in an ordered list is out of order, first number is the
    -- actual index we ran into, the second number is the expected index
  | NonFlankingDelimiterRun (NonEmpty Char)
    -- ^ This delimiter run should be in left- or right- flanking position
  deriving (Eq, Ord, Show, Read, Generic, Typeable, Data)

instance ShowErrorComponent MMarkErr where
  showErrorComponent = \case
    YamlParseError str ->
      "YAML parse error: " ++ str
    ListStartIndexTooBig n ->
      "Ordered list start numbers must be nine digits or less, " ++ show n
        ++ " is too big"
    ListIndexOutOfOrder actual expected ->
      "List index out of order: " ++ show actual ++ ", expected "
        ++ show expected
    NonFlankingDelimiterRun dels ->
      showTokens dels ++ " should be in left- or right- flanking position"

instance NFData MMarkErr

-- | Inline-level parser type. We store type of the last consumed character
-- in the state.

type IParser = StateT CharType (Parsec MMarkErr Text)

-- | 'Inline' source pending parsing.

data Isp
  = IspSpan SourcePos Text
    -- ^ We have an inline source pending parsing
  | IspError (ParseError Char MMarkErr)
    -- ^ We should just return this parse error
  deriving (Eq, Show)

-- | Type of last seen character.

data CharType
  = SpaceChar          -- ^ White space or a transparent character
  | LeftFlankingDel    -- ^ Left flanking delimiter
  | RightFlankingDel   -- ^ Right flaking delimiter
  | OtherChar          -- ^ Other character
  deriving (Eq, Ord, Show)

-- | Frame that describes where we are in parsing inlines.

data InlineFrame
  = EmphasisFrame      -- ^ Emphasis with asterisk @*@
  | EmphasisFrame_     -- ^ Emphasis with underscore @_@
  | StrongFrame        -- ^ Strong emphasis with asterisk @**@
  | StrongFrame_       -- ^ Strong emphasis with underscore @__@
  | StrikeoutFrame     -- ^ Strikeout
  | SubscriptFrame     -- ^ Subscript
  | SuperscriptFrame   -- ^ Superscript
  deriving (Eq, Ord, Show)

-- | State of inline parsing that specifies whether we expect to close one
-- frame or there is a possibility to close one of two alternatives.

data InlineState
  = SingleFrame InlineFrame             -- ^ One frame to be closed
  | DoubleFrame InlineFrame InlineFrame -- ^ Two frames to be closed
  deriving (Eq, Ord, Show)

-- | Configuration of inline parser.

data InlineConfig = InlineConfig
  { iconfigAllowEmpty :: !Bool
    -- ^ Whether to accept empty inline blocks
  , iconfigAllowLinks :: !Bool
    -- ^ Whether to parse links
  , iconfigAllowImages :: !Bool
    -- ^ Whether to parse images
  }

instance Default InlineConfig where
  def = InlineConfig
    { iconfigAllowEmpty  = True
    , iconfigAllowLinks  = True
    , iconfigAllowImages = True
    }

-- | An auxiliary type for collapsing levels of 'Either's.

data Pair s a
  = PairL s
  | PairR ([a] -> [a])

instance Semigroup s => Semigroup (Pair s a) where
  (PairL l) <> (PairL r) = PairL (l <> r)
  (PairL l) <> (PairR _) = PairL l
  (PairR _) <> (PairL r) = PairL r
  (PairR l) <> (PairR r) = PairR (l . r)

instance Semigroup s => Monoid (Pair s a) where
  mempty  = PairR id
  mappend = (<>)

----------------------------------------------------------------------------
-- Top-level API

-- | Parse a markdown document in the form of a strict 'Text' value and
-- either report parse errors or return an 'MMark' document. Note that the
-- parser has the ability to report multiple parse errors at once.

parse
  :: String
     -- ^ File name (only to be used in error messages), may be empty
  -> Text
     -- ^ Input to parse
  -> Either (NonEmpty (ParseError Char MMarkErr)) MMark
     -- ^ Parse errors or parsed document
parse file input =
  case runReader (runParserT pMMark file input) def of
    -- NOTE This parse error only happens when document structure on block
    -- level cannot be parsed even with recovery, which should not normally
    -- happen except for the cases when we deal with YAML parsing errors.
    Left err -> Left (nes err)
    Right (myaml, rawBlocks) ->
      let parsed = doInline <$> rawBlocks
          doInline = fmap
            $ first (nes . replaceEof "end of inline block")
            . runIsp pInlinesTop
          f block =
            case foldMap e2p block of
              PairL errs -> PairL errs
              PairR _    -> PairR (fmap fromRight block :)
      in case foldMap f parsed of
           PairL errs   -> Left errs
           PairR blocks -> Right MMark
             { mmarkYaml      = myaml
             , mmarkBlocks    = blocks []
             , mmarkExtension = mempty }

----------------------------------------------------------------------------
-- Block parser

-- | Parse an MMark document on block level.

pMMark :: BParser (Maybe Yaml.Value, [Block Isp])
pMMark = do
  meyaml <- optional pYamlBlock
  setTabWidth (mkPos 4)
  blocks <- pBlocks
  eof
  return $ case meyaml of
    Nothing ->
      (Nothing, blocks)
    Just (Left (pos, err)) ->
      (Nothing, prependErr pos (YamlParseError err) blocks)
    Just (Right yaml) ->
      (Just yaml, blocks)

-- | Parse a YAML block. On success return the actual parsed 'Yaml.Value' in
-- 'Right', otherwise return 'SourcePos' of parse error and 'String'
-- describing the error as generated by the @yaml@ package in 'Left'.

pYamlBlock :: BParser (Either (SourcePos, String) Yaml.Value)
pYamlBlock = do
  dpos <- getPosition
  string "---" *> sc' *> eol
  let go = do
        l <- takeWhileP Nothing notNewline
        void (optional eol)
        e <- atEnd
        if e || T.stripEnd l == "---"
          then return []
          else (l :) <$> go
  ls <- go
  case (Yaml.decodeEither . TE.encodeUtf8 . T.intercalate "\n") ls of
    Left err' -> do
      let (apos, err) = splitYamlError (sourceName dpos) err'
      return $ Left (fromMaybe dpos apos, err)
    Right v ->
      return (Right v)

-- | Parse several (possibly zero) blocks in a row.

pBlocks :: BParser [Block Isp]
pBlocks = many pBlock

-- | Parse a single block of markdown document.

pBlock :: BParser (Block Isp)
pBlock = do
  sc
  rlevel <- asks benvRefLevel
  alevel <- L.indentLevel
  done   <- atEnd
  if done || alevel < rlevel then empty else
    case compare alevel (ilevel rlevel) of
      LT -> choice
        [ pThematicBreak
        , pAtxHeading
        , pFencedCodeBlock
        , pUnorderedList
        , pOrderedList
        , pBlockquote
        , pParagraph ]
      _  ->
          pIndentedCodeBlock

-- | Parse a thematic break.

pThematicBreak :: BParser (Block Isp)
pThematicBreak = do
  l' <- lookAhead nonEmptyLine
  let l = T.filter (not . isSpace) l'
  if T.length l >= 3   &&
     (T.all (== '*') l ||
      T.all (== '-') l ||
      T.all (== '_') l)
    then ThematicBreak <$ nonEmptyLine <* sc
    else empty

-- | Parse an ATX heading.

pAtxHeading :: BParser (Block Isp)
pAtxHeading = do
  (void . lookAhead . try) hashIntro
  withRecovery recover $ do
    hlevel <- length <$> hashIntro
    sc1'
    ispPos <- getPosition
    r <- someTill (satisfy notNewline <?> "heading character") . try $
      optional (sc1' *> some (char '#') *> sc') *> (eof <|> eol)
    let toBlock = case hlevel of
          1 -> Heading1
          2 -> Heading2
          3 -> Heading3
          4 -> Heading4
          5 -> Heading5
          _ -> Heading6
    toBlock (IspSpan ispPos (T.strip (T.pack r))) <$ sc
  where
    hashIntro = count' 1 6 (char '#')
    recover err =
      Heading1 (IspError err) <$ takeWhileP Nothing notNewline <* sc

-- | Parse a fenced code block.

pFencedCodeBlock :: BParser (Block Isp)
pFencedCodeBlock = do
  let p ch = try $ do
        void $ count 3 (char ch)
        n  <- (+ 3) . length <$> many (char ch)
        ml <- optional
          (T.strip <$> someEscapedWith notNewline <?> "info string")
        guard (maybe True (not . T.any (== '`')) ml)
        return
          (ch, n,
             case ml of
               Nothing -> Nothing
               Just l  ->
                 if T.null l
                   then Nothing
                   else Just l)
  alevel <- L.indentLevel
  (ch, n, infoString) <- (p '`' <|> p '~') <* eol
  let content = label "code block content" (option "" nonEmptyLine <* eol)
      closingFence = try . label "closing code fence" $ do
        clevel <- ilevel <$> asks benvRefLevel
        void $ L.indentGuard sc' LT clevel
        void $ count n (char ch)
        (void . many . char) ch
        sc'
        eof <|> eol
  ls <- manyTill content closingFence
  CodeBlock infoString (assembleCodeBlock alevel ls) <$ sc

-- | Parse an indented code block.

pIndentedCodeBlock :: BParser (Block Isp)
pIndentedCodeBlock = do
  alevel <- L.indentLevel
  clevel <- ilevel <$> asks benvRefLevel
  let go ls = do
        immediate <- lookAhead $
          (>= clevel) <$> (sc' *> L.indentLevel)
        eventual  <- lookAhead $
          (>= clevel) <$> (sc *> L.indentLevel)
        if immediate || eventual
          then do
            l        <- option "" nonEmptyLine
            continue <- eol'
            if continue
              then go (l:ls)
              else return (l:ls)
          else return ls
      -- NOTE This is a bit unfortunate, but it's difficult to guarantee
      -- that preceding space is not yet consumed when we get to
      -- interpreting input as an indented code block, so we need to restore
      -- the space this way.
      f x      = T.replicate (unPos alevel - 1) " " <> x
      g []     = []
      g (x:xs) = f x : xs
  ls <- g . reverse . dropWhile isBlank <$> go []
  CodeBlock Nothing (assembleCodeBlock clevel ls) <$ sc

-- | Parse an unorederd list.

pUnorderedList :: BParser (Block Isp)
pUnorderedList = do
  (bullet, bulletPos, minLevel, indLevel) <-
    pListBullet Nothing
  x      <- innerBlocks bulletPos minLevel indLevel
  xs     <- many $ do
    (_, bulletPos', minLevel', indLevel') <-
      pListBullet (Just (bullet, bulletPos))
    innerBlocks bulletPos' minLevel' indLevel'
  return (UnorderedList (normalizeListItems (x:|xs)))
  where
    innerBlocks bulletPos minLevel indLevel = do
      p <- getPosition
      let tooFar = sourceLine p > sourceLine bulletPos <> pos1
          rlevel = slevel minLevel indLevel
      if tooFar || sourceColumn p < minLevel
        then return [if tooFar then emptyParagraph else emptyNaked]
        else subEnv True rlevel pBlocks

-- | Parse a list bullet. Return a tuple with the following components (in
-- order):
--
--     * 'Char' used to represent the bullet
--     * 'SourcePos' at which the bullet was located
--     * the closest column position where content could start
--     * the indentation level after the bullet

pListBullet
  :: Maybe (Char, SourcePos)
     -- ^ Bullet 'Char' and start position of the first bullet in a list
  -> BParser (Char, SourcePos, Pos, Pos)
pListBullet mbullet = try $ do
  pos     <- getPosition
  l       <- (<> mkPos 2) <$> L.indentLevel
  bullet  <-
    case mbullet of
      Nothing -> char '-' <|> char '+' <|> char '*'
      Just (bullet, bulletPos) -> do
        guard (sourceColumn pos >= sourceColumn bulletPos)
        char bullet
  eof <|> sc1
  l'      <- L.indentLevel
  return (bullet, pos, l, l')

-- | Parse an ordered list.

pOrderedList :: BParser (Block Isp)
pOrderedList = do
  (startIx, del, startPos, minLevel, indLevel) <-
    pListIndex Nothing
  x  <- innerBlocks startPos minLevel indLevel
  xs <- manyIndexed (startIx + 1) $ \expectedIx -> do
    (actualIx, _, startPos', minLevel', indLevel') <-
      pListIndex (Just (del, startPos))
    let f blocks =
          if actualIx == expectedIx
            then blocks
            else prependErr
                   startPos'
                   (ListIndexOutOfOrder actualIx expectedIx)
                   blocks
    f <$> innerBlocks startPos' minLevel' indLevel'
  return . OrderedList startIx . normalizeListItems $
    (if startIx <= 999999999
       then x
       else prependErr startPos (ListStartIndexTooBig startIx) x)
    :| xs
  where
    innerBlocks indexPos minLevel indLevel = do
      p <- getPosition
      let tooFar = sourceLine p > sourceLine indexPos <> pos1
          rlevel = slevel minLevel indLevel
      if tooFar || sourceColumn p < minLevel
        then return [if tooFar then emptyParagraph else emptyNaked]
        else subEnv True rlevel pBlocks

-- | Parse a list index. Return a tuple with the following components (in
-- order):
--
--     * 'Word' parsed numeric index
--     * 'Char' used as delimiter after the numeric index
--     * 'SourcePos' at which the index was located
--     * the closest column position where content could start
--     * the indentation level after the index

pListIndex
  :: Maybe (Char, SourcePos)
     -- ^ Delimiter 'Char' and start position of the first index in a list
  -> BParser (Word, Char, SourcePos, Pos, Pos)
pListIndex mstart = try $ do
  pos <- getPosition
  i   <- L.decimal
  del <- case mstart of
    Nothing -> char '.' <|> char ')'
    Just (del, startPos) -> do
      guard (sourceColumn pos >= sourceColumn startPos)
      char del
  l   <- (<> pos1) <$> L.indentLevel
  eof <|> sc1
  l'  <- L.indentLevel
  return (i, del, pos, l, l')

-- | Parse a block quote.

pBlockquote :: BParser (Block Isp)
pBlockquote = do
  minLevel <- try $ do
    minLevel <- (<> pos1) <$> L.indentLevel
    void (char '>')
    eof <|> sc
    l <- L.indentLevel
    return $
      if l > minLevel
        then minLevel <> pos1
        else minLevel
  indLevel <- L.indentLevel
  if indLevel >= minLevel
    then do
      let rlevel = slevel minLevel indLevel
      xs <- subEnv False rlevel pBlocks
      return (Blockquote xs)
    else return (Blockquote [])

-- | Parse a paragraph or naked text (is some cases).

pParagraph :: BParser (Block Isp)
pParagraph = do
  startPos   <- getPosition
  allowNaked <- asks benvAllowNaked
  rlevel     <- asks benvRefLevel
  let go ls = do
        l <- lookAhead (option "" nonEmptyLine)
        broken <- succeeds . lookAhead . try $ do
          sc
          alevel <- L.indentLevel
          guard (alevel < ilevel rlevel)
          unless (alevel < rlevel) . choice $
            [ void (char '>')
            , void pThematicBreak
            , void pAtxHeading
            , void (pListBullet Nothing)
            , void (pListIndex  Nothing) ]
        if isBlank l
          then return (ls, Paragraph)
          else if broken
                 then return (ls, Naked)
                 else do
                   void nonEmptyLine
                   continue <- eol'
                   let ls' = ls . (l:)
                   if continue
                     then go ls'
                     else return (ls', Naked)
  l        <- nonEmptyLine
  continue <- eol'
  (ls, toBlock) <-
    if continue
      then go id
      else return (id, Naked)
  (if allowNaked then toBlock else Paragraph)
    (IspSpan startPos (assembleParagraph (l:ls []))) <$ sc

----------------------------------------------------------------------------
-- Inline parser

-- | Run a given parser on 'Isp'.

runIsp
  :: IParser a         -- ^ The parser to run
  -> Isp               -- ^ Input for the parser
  -> Either (ParseError Char MMarkErr) a -- ^ Result of parsing
runIsp _ (IspError err) = Left err
runIsp p (IspSpan startPos input) =
  snd (runParser' (evalStateT p SpaceChar) pst)
  where
    pst = State
      { stateInput           = input
      , statePos             = nes startPos
      , stateTokensProcessed = 0
      , stateTabWidth        = mkPos 4 }

-- | The top level inline parser.

pInlinesTop :: IParser (NonEmpty Inline)
pInlinesTop = do
  inlines <- pInlines def
  eof <|> void pLfdr
  return inlines

-- | Parse inlines using settings from given 'InlineConfig'.

pInlines :: InlineConfig -> IParser (NonEmpty Inline)
pInlines InlineConfig {..} = do
  done <- atEnd
  if done
    then
      if iconfigAllowEmpty
        then (return . nes . Plain) ""
        else unexp EndOfInput
    else NE.some $ do
      mch <- lookAhead (anyChar <?> "inline content")
      case mch of
        '`' -> pCodeSpan
        '[' ->
          if iconfigAllowLinks
            then pInlineLink
            else unexp (Tokens $ nes '[')
        '!' ->
          if iconfigAllowImages
            then try pImage <|> pPlain
            else pPlain
        '<' ->
          if iconfigAllowLinks
            then try pAutolink <|> pPlain
            else pPlain
        '\\' ->
          try pHardLineBreak <|> pPlain
        ch ->
          if isMarkupChar ch
            then pEnclosedInline
            else pPlain
  where
    unexp x = failure
      (Just x)
      (E.singleton . Label . NE.fromList $ "inline content")

-- | Parse a code span.

pCodeSpan :: IParser Inline
pCodeSpan = do
  n <- try (length <$> some (char '`'))
  let finalizer = try $ do
        void $ count n (char '`')
        notFollowedBy (char '`')
  r <- CodeSpan . collapseWhiteSpace . T.concat <$>
    manyTill (label "code span content" $
               takeWhile1P Nothing (== '`') <|>
               takeWhile1P Nothing (/= '`'))
      finalizer
  r <$ put OtherChar

-- | Parse a link.

pInlineLink :: IParser Inline
pInlineLink = do
  xs     <- between (char '[') (char ']') $
    pInlines def { iconfigAllowLinks = False }
  void (char '(') <* sc
  dest   <- pUri
  mtitle <- optional (sc1 *> pTitle)
  sc <* char ')'
  Link xs dest mtitle <$ put OtherChar

-- | Parse an image.

pImage :: IParser Inline
pImage = do
  let nonEmptyDesc = char '!' *> between (char '[') (char ']')
        (pInlines def { iconfigAllowImages = False })
  alt    <- nes (Plain "") <$ string "![]" <|> nonEmptyDesc
  void (char '(') <* sc
  src    <- pUri
  mtitle <- optional (sc1 *> pTitle)
  sc <* char ')'
  Image alt src mtitle <$ put OtherChar

-- | Parse an autolink.

pAutolink :: IParser Inline
pAutolink = between (char '<') (char '>') $ do
  notFollowedBy (char '>') -- empty links don't make sense
  uri' <- URI.parser
  let (txt, uri) =
        case isEmailUri uri' of
          Nothing ->
            ( (nes . Plain . URI.render) uri'
            , uri' )
          Just email ->
            ( nes (Plain email)
            , URI.makeAbsolute mailtoScheme uri' )
  Link txt uri Nothing <$ put OtherChar

-- | Parse inline content inside an enclosing construction such as emphasis,
-- strikeout, superscript, and\/or subscript markup.

pEnclosedInline :: IParser Inline
pEnclosedInline = pLfdr >>= \case
  SingleFrame x ->
    liftFrame x <$> pInlines' <* pRfdr x
  DoubleFrame x y -> do
    inlines0  <- pInlines'
    thisFrame <- pRfdr x <|> pRfdr y
    let thatFrame = if thisFrame == x then y else x
    minlines1 <- optional pInlines'
    void (pRfdr thatFrame)
    return . liftFrame thatFrame $
      case minlines1 of
        Nothing ->
          nes (liftFrame thisFrame inlines0)
        Just inlines1 ->
          liftFrame thisFrame inlines0 <| inlines1
  where
    pInlines' = pInlines def { iconfigAllowEmpty = False }

-- | Parse a hard line break.

pHardLineBreak :: IParser Inline
pHardLineBreak = do
  void (char '\\')
  eol
  notFollowedBy eof
  sc'
  put SpaceChar
  return LineBreak

-- | Parse plain text.

pPlain :: IParser Inline
pPlain = fmap (Plain . T.pack) . some $ do
  ch <- lookAhead (anyChar <?> "inline content")
  case ch of
    '\\' ->
      (escapedChar <* put OtherChar) <|>
      try (char '\\' <* notFollowedBy eol <* put OtherChar)
    '\n' ->
      '\n' <$ eol <* sc' <* put SpaceChar
    '\r' ->
      '\n' <$ eol <* sc' <* put SpaceChar
    '!' -> do
      notFollowedBy (string "![")
      char '!'
    '<' -> do
      notFollowedBy pAutolink
      char '<'
    _ ->
      pOther ch
  where
    pNewline = hidden $
      '\n' <$ sc' <* eol <* sc' <* put SpaceChar
    pOther ch
      | isSpace ch = (try pNewline <|> char ch) <* put SpaceChar
      | isTrans ch = char ch                    <* put SpaceChar
      | isOther ch = char ch                    <* put OtherChar
      | otherwise  = empty
    isTrans x = isTransparentPunctuation x && x /= '!'
    isOther x = not (isMarkupChar x) && x /= '\\' && x /= '!' && x /= '<'

----------------------------------------------------------------------------
-- Auxiliary inline-level parsers

-- | Parse a URI.

pUri :: (Ord e, MonadParsec e Text m) => m URI
pUri =
  between (char '<') (char '>') URI.parser <|> naked
  where
    naked = do
      startPos <- getPosition
      input    <- takeWhileP Nothing $ \x ->
        not (isSpaceN x || x == ')')
      let pst = State
            { stateInput           = input
            , statePos             = nes startPos
            , stateTokensProcessed = 0
            , stateTabWidth        = mkPos 4 }
      case snd (runParser' (URI.parser <* eof) pst) of
        Left err' ->
          case replaceEof "end of URI literal" err' of
            TrivialError pos us es -> do
              setPosition (NE.head pos)
              failure us es
            FancyError pos xs -> do
              setPosition (NE.head pos)
              fancyFailure xs
        Right x -> return x

-- | Parse a title of a link or an image.

pTitle :: MonadParsec e Text m => m Text
pTitle = choice
  [ p '\"' '\"'
  , p '\'' '\''
  , p '('  ')' ]
  where
    p start end = between (char start) (char end) $
      manyEscapedWith (/= end) "unescaped character"

-- | Parse an opening markup sequence corresponding to given 'InlineState'.

pLfdr :: IParser InlineState
pLfdr = try $ do
  pos <- getPosition
  let r st = st <$ string (inlineStateDel st)
  st <- hidden $ choice
    [ r (DoubleFrame StrongFrame StrongFrame)
    , r (DoubleFrame StrongFrame EmphasisFrame)
    , r (SingleFrame StrongFrame)
    , r (SingleFrame EmphasisFrame)
    , r (DoubleFrame StrongFrame_ StrongFrame_)
    , r (DoubleFrame StrongFrame_ EmphasisFrame_)
    , r (SingleFrame StrongFrame_)
    , r (SingleFrame EmphasisFrame_)
    , r (DoubleFrame StrikeoutFrame StrikeoutFrame)
    , r (DoubleFrame StrikeoutFrame SubscriptFrame)
    , r (SingleFrame StrikeoutFrame)
    , r (SingleFrame SubscriptFrame)
    , r (SingleFrame SuperscriptFrame) ]
  let dels = inlineStateDel st
      failNow = do
        setPosition pos
        (mmarkErr . NonFlankingDelimiterRun . toNesTokens) dels
  lch <- get
  when (lch == OtherChar) failNow
  rch <- lookAhead (optional anyChar)
  when (maybe True isTransparent rch) failNow
  return st

-- | Parse a closing markup sequence corresponding to given 'InlineFrame'.

pRfdr :: InlineFrame -> IParser InlineFrame
pRfdr frame = try $ do
  let dels = inlineFrameDel frame
      expectingInlineContent = region $ \case
        TrivialError pos us es ->
          TrivialError pos us (E.insert (Label $ NE.fromList "inline content") es)
        other -> other
  pos <- getPosition
  (void . expectingInlineContent . string) dels
  let failNow = do
        setPosition pos
        (mmarkErr . NonFlankingDelimiterRun . toNesTokens) dels
      goodAfter x =
        isTransparent x || isMarkupChar x
  lch <- get
  unless (lch == OtherChar) failNow
  rch <- lookAhead (optional anyChar)
  unless (maybe True goodAfter rch) failNow
  return frame

----------------------------------------------------------------------------
-- Parsing helpers

nonEmptyLine :: BParser Text
nonEmptyLine = takeWhile1P Nothing notNewline

manyEscapedWith :: MonadParsec e Text m => (Char -> Bool) -> String -> m Text
manyEscapedWith f l = T.pack <$> many (escapedChar <|> (satisfy f <?> l))

someEscapedWith :: MonadParsec e Text m => (Char -> Bool) -> m Text
someEscapedWith f = T.pack <$> some (escapedChar <|> satisfy f)

escapedChar :: MonadParsec e Text m => m Char
escapedChar = label "escaped character" $
  try (char '\\' *> satisfy isAsciiPunctuation)

sc :: MonadParsec e Text m => m ()
sc = void $ takeWhileP (Just "white space") isSpaceN

sc1 :: MonadParsec e Text m => m ()
sc1 = void $ takeWhile1P (Just "white space") isSpaceN

sc' :: MonadParsec e Text m => m ()
sc' = void $ takeWhileP (Just "white space") isSpace

sc1' :: MonadParsec e Text m => m ()
sc1' = void $ takeWhile1P (Just "white space") isSpace

eol :: MonadParsec e Text m => m ()
eol = void . label "newline" $ choice
  [ string "\n"
  , string "\r\n"
  , string "\r" ]

eol' :: MonadParsec e Text m => m Bool
eol' = option False (True <$ eol)

subEnv :: Bool -> Pos -> BParser a -> BParser a
subEnv benvAllowNaked benvRefLevel = local (const BlockEnv {..})

----------------------------------------------------------------------------
-- Other helpers

slevel :: Pos -> Pos -> Pos
slevel a l = if l >= ilevel a then a else l

ilevel :: Pos -> Pos
ilevel = (<> mkPos 4)

isSpace :: Char -> Bool
isSpace x = x == ' ' || x == '\t'

isSpaceN :: Char -> Bool
isSpaceN x = isSpace x || x == '\n' || x == '\r'

notNewline :: Char -> Bool
notNewline x = x /= '\n' && x /= '\r'

isBlank :: Text -> Bool
isBlank = T.all isSpace

isFrameConstituent :: Char -> Bool
isFrameConstituent = \case
  '*' -> True
  '^' -> True
  '_' -> True
  '~' -> True
  _   -> False

isMarkupChar :: Char -> Bool
isMarkupChar x = isFrameConstituent x || f x
  where
    f = \case
      '[' -> True
      ']' -> True
      '`' -> True
      _   -> False

isAsciiPunctuation :: Char -> Bool
isAsciiPunctuation x =
  (x >= '!' && x <= '/') ||
  (x >= ':' && x <= '@') ||
  (x >= '[' && x <= '`') ||
  (x >= '{' && x <= '~')

isTransparentPunctuation :: Char -> Bool
isTransparentPunctuation = \case
  '!' -> True
  '"' -> True
  '(' -> True
  ')' -> True
  ',' -> True
  '-' -> True
  '.' -> True
  ':' -> True
  ';' -> True
  '?' -> True
  '{' -> True
  '}' -> True
  '–' -> True
  '—' -> True
  _   -> False

isTransparent :: Char -> Bool
isTransparent x = Char.isSpace x || isTransparentPunctuation x

assembleCodeBlock :: Pos -> [Text] -> Text
assembleCodeBlock indent ls = T.unlines (stripIndent indent <$> ls)

stripIndent :: Pos -> Text -> Text
stripIndent indent txt = T.drop m txt
  where
    m = snd $ T.foldl' f (0, 0) (T.takeWhile p txt)
    p x = isSpace x || x == '>'
    f (!j, !n) ch
      | j  >= i    = (j, n)
      | ch == ' '  = (j + 1, n + 1)
      | ch == '\t' = (j + 4, n + 1)
      | otherwise  = (j, n)
    i = unPos indent - 1

assembleParagraph :: [Text] -> Text
assembleParagraph = go
  where
    go []     = ""
    go [x]    = T.dropWhileEnd isSpace x
    go (x:xs) = x <> "\n" <> go xs

collapseWhiteSpace :: Text -> Text
collapseWhiteSpace =
  T.stripEnd . T.filter (/= '\0') . snd . T.mapAccumL f True
  where
    f seenSpace ch =
      case (seenSpace, g ch) of
        (False, False) -> (False, ch)
        (True,  False) -> (False, ch)
        (False, True)  -> (True,  ' ')
        (True,  True)  -> (True,  '\0')
    g ' '  = True
    g '\t' = True
    g '\n' = True
    g _    = False

inlineStateDel :: InlineState -> Text
inlineStateDel = \case
  SingleFrame x   -> inlineFrameDel x
  DoubleFrame x y -> inlineFrameDel x <> inlineFrameDel y

liftFrame :: InlineFrame -> NonEmpty Inline -> Inline
liftFrame = \case
  StrongFrame      -> Strong
  EmphasisFrame    -> Emphasis
  StrongFrame_     -> Strong
  EmphasisFrame_   -> Emphasis
  StrikeoutFrame   -> Strikeout
  SubscriptFrame   -> Subscript
  SuperscriptFrame -> Superscript

inlineFrameDel :: InlineFrame -> Text
inlineFrameDel = \case
  EmphasisFrame    -> "*"
  EmphasisFrame_   -> "_"
  StrongFrame      -> "**"
  StrongFrame_     -> "__"
  StrikeoutFrame   -> "~~"
  SubscriptFrame   -> "~"
  SuperscriptFrame -> "^"

replaceEof :: String -> ParseError Char e -> ParseError Char e
replaceEof altLabel = \case
  TrivialError pos us es -> TrivialError pos (f <$> us) (E.map f es)
  FancyError   pos xs    -> FancyError pos xs
  where
    f EndOfInput = Label (NE.fromList altLabel)
    f x          = x

isEmailUri :: URI -> Maybe Text
isEmailUri uri =
  case URI.unRText <$> URI.uriPath uri of
    [x] ->
      if Email.isValid (TE.encodeUtf8 x) &&
          (isNothing (URI.uriScheme uri) ||
           URI.uriScheme uri == Just mailtoScheme)
        then Just x
        else Nothing
    _ -> Nothing

splitYamlError :: FilePath -> String -> (Maybe SourcePos, String)
splitYamlError file str = maybe (Nothing, str) (first pure) (parseMaybe p str)
  where
    p :: Parsec Void String (SourcePos, String)
    p = do
      void (string "YAML parse exception at line ")
      l <- mkPos . (+ 2) <$> L.decimal
      void (string ", column ")
      c <- mkPos . (+ 1) <$> L.decimal
      void (string ":\n")
      r <- takeRest
      return (SourcePos file l c, r)

emptyParagraph :: Block Isp
emptyParagraph = Paragraph (IspSpan (initialPos "") "")

emptyNaked :: Block Isp
emptyNaked = Naked (IspSpan (initialPos "") "")

manyIndexed :: (Alternative m, Num n) => n -> (n -> m a) -> m [a]
manyIndexed n' m = go n'
  where
    go !n = liftA2 (:) (m n) (go (n + 1)) <|> pure []

normalizeListItems :: NonEmpty [Block Isp] -> NonEmpty [Block Isp]
normalizeListItems xs' =
  if getAny $ foldMap (foldMap (Any . isParagraph)) (drop 1 x :| xs)
    then fmap toParagraph <$> xs'
    else case x of
           [] -> xs'
           (y:ys) -> r $ (toNaked y : ys) :| xs
  where
    (x:|xs) = r xs'
    r = NE.reverse . fmap reverse
    isParagraph = \case
      OrderedList _ _ -> False
      UnorderedList _ -> False
      Naked         _ -> False
      _               -> True
    toParagraph (Naked inner) = Paragraph inner
    toParagraph other         = other
    toNaked (Paragraph inner) = Naked inner
    toNaked other             = other

e2p :: Either a b -> Pair a b
e2p = \case
  Left  a -> PairL a
  Right b -> PairR (b:)

succeeds :: Alternative m => m () -> m Bool
succeeds m = True <$ m <|> pure False

prependErr :: SourcePos -> MMarkErr -> [Block Isp] -> [Block Isp]
prependErr pos custom blocks = Naked (IspError err) : blocks
  where
    err = FancyError (nes pos) (E.singleton $ ErrorCustom custom)

mmarkErr :: MonadParsec MMarkErr s m => MMarkErr -> m a
mmarkErr = fancyFailure . E.singleton . ErrorCustom

mailtoScheme :: URI.RText 'URI.Scheme
mailtoScheme = fromJust (URI.mkScheme "mailto")

toNesTokens :: Text -> NonEmpty Char
toNesTokens = NE.fromList . T.unpack

nes :: a -> NonEmpty a
nes a = a :| []

fromRight :: Either a b -> b
fromRight (Right x) = x
fromRight _         =
  error "Text.MMark.Parser.fromRight: the impossible happened"