{-# LANGUAGE FlexibleContexts           #-}
{-# LANGUAGE OverloadedStrings          #-}
{- |
Module      : Text.Pandoc.Parsing.Smart
Copyright   : © 2006-2023 John MacFarlane
License     : GPL-2.0-or-later
Maintainer  : John MacFarlane <jgm@berkeley.edu>

Smart parsing of quotes, dashes, and other character combinations.
-}

module Text.Pandoc.Parsing.Smart
  ( apostrophe
  , dash
  , doubleCloseQuote
  , doubleQuoteEnd
  , doubleQuoteStart
  , doubleQuoted
  , ellipses
  , singleQuoteEnd
  , singleQuoteStart
  , singleQuoted
  , smartPunctuation
  )
where

import Control.Monad (guard , void)
import Text.Pandoc.Builder (Inlines)
import Text.Pandoc.Options
  ( extensionEnabled
  , Extension(Ext_old_dashes, Ext_smart)
  , ReaderOptions(readerExtensions) )
import Text.Pandoc.Sources
import Text.Pandoc.Parsing.Capabilities
import Text.Pandoc.Parsing.General
import Text.Parsec
  ( (<|>)
  , Stream(..)
  , ParsecT
  , choice
  , lookAhead
  , manyTill
  , notFollowedBy
  , try
  )
import qualified Data.Text as T
import qualified Text.Pandoc.Builder as B

-- | Parses various ASCII punctuation, quotes, and apostrophe in a smart
-- way, inferring their semantic meaning.
--
-- Fails unless the 'Ext_smart' extension has been enabled.
smartPunctuation :: (HasReaderOptions st, HasLastStrPosition st,
                     HasQuoteContext st m,
                     Stream s m Char, UpdateSourcePos s Char)
                 => ParsecT s st m Inlines
                 -> ParsecT s st m Inlines
smartPunctuation :: forall st (m :: * -> *) s.
(HasReaderOptions st, HasLastStrPosition st, HasQuoteContext st m,
 Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Inlines -> ParsecT s st m Inlines
smartPunctuation ParsecT s st m Inlines
inlineParser = do
  forall s (m :: * -> *) a st.
(Stream s m a, HasReaderOptions st) =>
Extension -> ParsecT s st m ()
guardEnabled Extension
Ext_smart
  forall s (m :: * -> *) t u a.
Stream s m t =>
[ParsecT s u m a] -> ParsecT s u m a
choice [ forall st (m :: * -> *) s.
(HasLastStrPosition st, HasQuoteContext st m, Stream s m Char,
 UpdateSourcePos s Char) =>
ParsecT s st m Inlines -> ParsecT s st m Inlines
quoted ParsecT s st m Inlines
inlineParser, forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Inlines
apostrophe, forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Inlines
doubleCloseQuote, forall st s (m :: * -> *).
(HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Inlines
dash, forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Inlines
ellipses ]

-- | Parses inline text in single or double quotes, assumes English
-- quoting conventions.
quoted :: (HasLastStrPosition st, HasQuoteContext st m,
           Stream s m Char, UpdateSourcePos s Char)
       => ParsecT s st m Inlines
       -> ParsecT s st m Inlines
quoted :: forall st (m :: * -> *) s.
(HasLastStrPosition st, HasQuoteContext st m, Stream s m Char,
 UpdateSourcePos s Char) =>
ParsecT s st m Inlines -> ParsecT s st m Inlines
quoted ParsecT s st m Inlines
inlineParser = forall st (m :: * -> *) s.
(HasQuoteContext st m, HasLastStrPosition st, Stream s m Char,
 UpdateSourcePos s Char) =>
ParsecT s st m Inlines -> ParsecT s st m Inlines
doubleQuoted ParsecT s st m Inlines
inlineParser forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall st (m :: * -> *) s.
(HasLastStrPosition st, HasQuoteContext st m, Stream s m Char,
 UpdateSourcePos s Char) =>
ParsecT s st m Inlines -> ParsecT s st m Inlines
singleQuoted ParsecT s st m Inlines
inlineParser

-- | Parses inline text in single quotes, assumes English quoting
-- conventions.
singleQuoted :: (HasLastStrPosition st, HasQuoteContext st m,
                 Stream s m Char, UpdateSourcePos s Char)
             => ParsecT s st m Inlines
             -> ParsecT s st m Inlines
singleQuoted :: forall st (m :: * -> *) s.
(HasLastStrPosition st, HasQuoteContext st m, Stream s m Char,
 UpdateSourcePos s Char) =>
ParsecT s st m Inlines -> ParsecT s st m Inlines
singleQuoted ParsecT s st m Inlines
inlineParser = do
  forall st (m :: * -> *) s.
(HasLastStrPosition st, HasQuoteContext st m, Stream s m Char,
 UpdateSourcePos s Char) =>
ParsecT s st m ()
singleQuoteStart
  (Inlines -> Inlines
B.singleQuoted forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Monoid a => [a] -> a
mconcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
    forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try
     (forall st (m :: * -> *) s a.
HasQuoteContext st m =>
QuoteContext -> ParsecT s st m a -> ParsecT s st m a
withQuoteContext QuoteContext
InSingleQuote (forall end s (m :: * -> *) t st a.
(Show end, Stream s m t) =>
ParsecT s st m a -> ParsecT s st m end -> ParsecT s st m [a]
many1Till ParsecT s st m Inlines
inlineParser forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m ()
singleQuoteEnd)))
   forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure Inlines
"\8217"

-- | Parses inline text in double quotes; assumes English quoting
-- conventions.
doubleQuoted :: (HasQuoteContext st m, HasLastStrPosition st,
                 Stream s m Char, UpdateSourcePos s Char)
             => ParsecT s st m Inlines
             -> ParsecT s st m Inlines
doubleQuoted :: forall st (m :: * -> *) s.
(HasQuoteContext st m, HasLastStrPosition st, Stream s m Char,
 UpdateSourcePos s Char) =>
ParsecT s st m Inlines -> ParsecT s st m Inlines
doubleQuoted ParsecT s st m Inlines
inlineParser = do
  forall st (m :: * -> *) s.
(HasLastStrPosition st, HasQuoteContext st m, Stream s m Char,
 UpdateSourcePos s Char) =>
ParsecT s st m ()
doubleQuoteStart
  (Inlines -> Inlines
B.doubleQuoted forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Monoid a => [a] -> a
mconcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
    forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try
     (forall st (m :: * -> *) s a.
HasQuoteContext st m =>
QuoteContext -> ParsecT s st m a -> ParsecT s st m a
withQuoteContext QuoteContext
InDoubleQuote (forall s (m :: * -> *) t u a end.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m end -> ParsecT s u m [a]
manyTill ParsecT s st m Inlines
inlineParser forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m ()
doubleQuoteEnd)))
   forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> Inlines
B.str Text
"\8220")

charOrRef :: (Stream s m Char, UpdateSourcePos s Char) => [Char] -> ParsecT s st m Char
charOrRef :: forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
[Char] -> ParsecT s st m Char
charOrRef [Char]
cs =
  forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
[Char] -> ParsecT s u m Char
oneOf [Char]
cs forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (do Text
t <- forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Text
characterReference
                       case Text -> [Char]
T.unpack Text
t of
                         [Char
c] | Char
c forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Char]
cs -> forall (m :: * -> *) a. Monad m => a -> m a
return Char
c
                         [Char]
_ -> forall (m :: * -> *) a. MonadFail m => [Char] -> m a
fail [Char]
"unexpected character reference")

-- | Succeeds if the parser is
--
-- * not within single quoted text;
-- * not directly after a word; and
-- * looking at an opening single quote char that's not followed by a
--   space.
--
-- Gobbles the quote character on success.
singleQuoteStart :: (HasLastStrPosition st, HasQuoteContext st m,
                     Stream s m Char, UpdateSourcePos s Char)
                 => ParsecT s st m ()
singleQuoteStart :: forall st (m :: * -> *) s.
(HasLastStrPosition st, HasQuoteContext st m, Stream s m Char,
 UpdateSourcePos s Char) =>
ParsecT s st m ()
singleQuoteStart = do
  forall st (m :: * -> *) s t.
(HasQuoteContext st m, Stream s m t) =>
QuoteContext -> ParsecT s st m ()
failIfInQuoteContext QuoteContext
InSingleQuote
  -- single quote start can't be right after str
  forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall s (m :: * -> *) a st.
(Stream s m a, HasLastStrPosition st) =>
ParsecT s st m Bool
notAfterString
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
    forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
[Char] -> ParsecT s st m Char
charOrRef [Char]
"'\8216\145"
    forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead (forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpaceChar))

singleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
               => ParsecT s st m ()
singleQuoteEnd :: forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m ()
singleQuoteEnd = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
  forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
[Char] -> ParsecT s st m Char
charOrRef [Char]
"'\8217\146"
  forall s (m :: * -> *) t a u.
(Stream s m t, Show a) =>
ParsecT s u m a -> ParsecT s u m ()
notFollowedBy forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
alphaNum

-- | Succeeds if the parser is
--
-- * not within a double quoted text;
--
-- * not directly after a word; and
--
-- * looking at an opening double quote char that's not followed by a
--   space.
--
-- Gobbles the quote character on success.
doubleQuoteStart :: (HasLastStrPosition st,
                     HasQuoteContext st m,
                     Stream s m Char, UpdateSourcePos s Char)
                 => ParsecT s st m ()
doubleQuoteStart :: forall st (m :: * -> *) s.
(HasLastStrPosition st, HasQuoteContext st m, Stream s m Char,
 UpdateSourcePos s Char) =>
ParsecT s st m ()
doubleQuoteStart = do
  forall st (m :: * -> *) s t.
(HasQuoteContext st m, Stream s m t) =>
QuoteContext -> ParsecT s st m ()
failIfInQuoteContext QuoteContext
InDoubleQuote
  forall (f :: * -> *). Alternative f => Bool -> f ()
guard forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<< forall s (m :: * -> *) a st.
(Stream s m a, HasLastStrPosition st) =>
ParsecT s st m Bool
notAfterString
  forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
[Char] -> ParsecT s st m Char
charOrRef [Char]
"\"\8220\147"
           forall (f :: * -> *) a. Functor f => f a -> f ()
void forall a b. (a -> b) -> a -> b
$ forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead (forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
(Char -> Bool) -> ParsecT s u m Char
satisfy (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpaceChar))

-- | Parses a closing quote character.
doubleQuoteEnd :: (Stream s m Char, UpdateSourcePos s Char)
               => ParsecT s st m ()
doubleQuoteEnd :: forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m ()
doubleQuoteEnd = forall (f :: * -> *) a. Functor f => f a -> f ()
void (forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
[Char] -> ParsecT s st m Char
charOrRef [Char]
"\"\8221\148")

-- | Parses an ASCII apostrophe (@'@) or right single quotation mark and
-- returns a RIGHT SINGLE QUOtatiON MARK character.
apostrophe :: (Stream s m Char, UpdateSourcePos s Char)
           => ParsecT s st m Inlines
apostrophe :: forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Inlines
apostrophe = (forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'\'' forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'\8217') forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Inlines
B.str Text
"\8217")

-- | Parses an ASCII quotation mark character and returns a RIGHT DOUBLE
-- QUOTATION MARK.
doubleCloseQuote :: (Stream s m Char, UpdateSourcePos s Char)
                 => ParsecT s st m Inlines
doubleCloseQuote :: forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Inlines
doubleCloseQuote = Text -> Inlines
B.str Text
"\8221" forall (f :: * -> *) a b. Functor f => a -> f b -> f a
<$ forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'"'

-- | Parses three dots as HORIZONTAL ELLIPSIS.
ellipses :: (Stream s m Char, UpdateSourcePos s Char)
         => ParsecT s st m Inlines
ellipses :: forall s (m :: * -> *) st.
(Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Inlines
ellipses = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try (forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
[Char] -> ParsecT s u m [Char]
string [Char]
"..." forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Inlines
B.str Text
"\8230"))

-- | Parses two hyphens as EN DASH and three as EM DASH.
--
-- If the extension @'Ext_old_dashes'@ is enabled, then two hyphens are
-- parsed as EM DASH, and one hyphen is parsed as EN DASH if it is
-- followed by a digit.
dash :: (HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char)
     => ParsecT s st m Inlines
dash :: forall st s (m :: * -> *).
(HasReaderOptions st, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s st m Inlines
dash = forall s u (m :: * -> *) a. ParsecT s u m a -> ParsecT s u m a
try forall a b. (a -> b) -> a -> b
$ do
  Bool
oldDashes <- Extension -> Extensions -> Bool
extensionEnabled Extension
Ext_old_dashes forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall st s (m :: * -> *) t b.
(HasReaderOptions st, Stream s m t) =>
(ReaderOptions -> b) -> ParsecT s st m b
getOption ReaderOptions -> Extensions
readerExtensions
  if Bool
oldDashes
     then do
       forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'-'
       (forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Inlines
B.str Text
"\8212"))
         forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> (forall s (m :: * -> *) t u a.
Stream s m t =>
ParsecT s u m a -> ParsecT s u m a
lookAhead forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
ParsecT s u m Char
digit forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Inlines
B.str Text
"\8211"))
     else do
       forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
[Char] -> ParsecT s u m [Char]
string [Char]
"--"
       (forall (m :: * -> *) s u.
(Monad m, Stream s m Char, UpdateSourcePos s Char) =>
Char -> ParsecT s u m Char
char Char
'-' forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Inlines
B.str Text
"\8212"))
         forall s u (m :: * -> *) a.
ParsecT s u m a -> ParsecT s u m a -> ParsecT s u m a
<|> forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Inlines
B.str Text
"\8211")