{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedLists #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
{-# LANGUAGE TupleSections #-}
-- | This module deals with punctuations in Korean text.
module Text.Seonbi.Punctuation
    ( -- * Arrows
      ArrowTransformationOption (..)
    , transformArrow
      -- * Quotes
    , CitationQuotes (..)
    , Quotes (..)
    , QuotePair (..)
    , angleQuotes
    , cornerBrackets
    , curvedQuotes
    , curvedSingleQuotesWithQ
    , guillemets
    , horizontalCornerBrackets
    , horizontalCornerBracketsWithQ
    , quoteCitation
    , transformQuote
    , verticalCornerBrackets
    , verticalCornerBracketsWithQ
      -- * Stops: periods, commas, & interpuncts
    , Stops (..)
    , horizontalStops
    , horizontalStopsWithSlashes
    , normalizeStops
    , transformEllipsis
    , verticalStops
      -- * Dashes
    , transformEmDash
    ) where

import Prelude hiding (takeWhile)

import Control.Monad
import Data.Char (isSpace)
import Data.Either
import Data.List (minimumBy)
import Data.Maybe
import Data.Ord
import Numeric

import Data.Attoparsec.Text
import Data.Set
import Data.Text hiding (any, length, takeWhile)
import qualified Data.Text

import Text.Seonbi.Html
import Text.Seonbi.Html.Clipper
import Text.Seonbi.Html.Lang
import Text.Seonbi.Html.Preservation
import Text.Seonbi.Html.Wrapper
import Text.Seonbi.PairedTransformer

-- | A set of quoting parentheses to be used by 'quoteCitation' function.
--
-- There are two presets: 'angleQuotes' and 'cornerBrackets'.  These both
-- surround titles with a @\<cite>@ tag.  In order to disable surrounded
-- elements, set 'htmlElement' field to 'Nothing', e.g.:
--
-- @
-- 'angleQuotes' { 'htmlElement' = 'Nothing' }
-- @
data CitationQuotes = CitationQuotes
    { -- | The leading and trailing punctuations to surround a title of
      -- novel, newspaper, magazine, movie, television program, etc.
      CitationQuotes -> (Text, Text)
title :: (Text, Text)
    , -- | The leading and trailing punctuations to surround a title of
      -- short story, chapter, article, episode, etc.
      CitationQuotes -> (Text, Text)
subtitle :: (Text, Text)
    , -- | Optional pair of an HTML element and its attributes to surround
      -- citations.  E.g., if it is @'Just' ('Cite', " class=\"autogen\")@
      -- titles are transformed like @\<cite class="autogen">이런 날\</cite>@.
      CitationQuotes -> Maybe (HtmlTag, Text)
htmlElement :: Maybe (HtmlTag, HtmlRawAttrs)
    } deriving (CitationQuotes -> CitationQuotes -> Bool
(CitationQuotes -> CitationQuotes -> Bool)
-> (CitationQuotes -> CitationQuotes -> Bool) -> Eq CitationQuotes
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: CitationQuotes -> CitationQuotes -> Bool
$c/= :: CitationQuotes -> CitationQuotes -> Bool
== :: CitationQuotes -> CitationQuotes -> Bool
$c== :: CitationQuotes -> CitationQuotes -> Bool
Eq, Int -> CitationQuotes -> ShowS
[CitationQuotes] -> ShowS
CitationQuotes -> String
(Int -> CitationQuotes -> ShowS)
-> (CitationQuotes -> String)
-> ([CitationQuotes] -> ShowS)
-> Show CitationQuotes
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [CitationQuotes] -> ShowS
$cshowList :: [CitationQuotes] -> ShowS
show :: CitationQuotes -> String
$cshow :: CitationQuotes -> String
showsPrec :: Int -> CitationQuotes -> ShowS
$cshowsPrec :: Int -> CitationQuotes -> ShowS
Show)

-- | Cite a title using angle quotes, used by South Korean orthography in
-- horizontal writing (橫書), e.g., 《나비와 엉겅퀴》 or 〈枾崎의 바다〉.
angleQuotes :: CitationQuotes
angleQuotes :: CitationQuotes
angleQuotes = CitationQuotes :: (Text, Text)
-> (Text, Text) -> Maybe (HtmlTag, Text) -> CitationQuotes
CitationQuotes
    { title :: (Text, Text)
title = (Text
"&#12298;", Text
"&#12299;")
    , subtitle :: (Text, Text)
subtitle = (Text
"&#12296;", Text
"&#12297;")
    , htmlElement :: Maybe (HtmlTag, Text)
htmlElement = (HtmlTag, Text) -> Maybe (HtmlTag, Text)
forall a. a -> Maybe a
Just (HtmlTag
Cite, Text
"")
    }

-- | Cite a title using corner brackets, used by South Korean orthography in
-- vertical writing (縱書) and Japanese orthography,
-- e.g., 『나비와 엉겅퀴』 or 「枾崎의 바다」.
cornerBrackets :: CitationQuotes
cornerBrackets :: CitationQuotes
cornerBrackets = CitationQuotes :: (Text, Text)
-> (Text, Text) -> Maybe (HtmlTag, Text) -> CitationQuotes
CitationQuotes
    { title :: (Text, Text)
title = (Text
"&#12302;", Text
"&#12303;")
    , subtitle :: (Text, Text)
subtitle = (Text
"&#12300;", Text
"&#12301;")
    , htmlElement :: Maybe (HtmlTag, Text)
htmlElement = (HtmlTag, Text) -> Maybe (HtmlTag, Text)
forall a. a -> Maybe a
Just (HtmlTag
Cite, Text
"")
    }

-- | People tend to cite the title of a work (e.g., a book, a paper, a poem,
-- a song, a film, a TV show, a game) by wrapping inequality symbols
-- like @\<\<나비와 엉겅퀴>>@ or @\<枾崎의 바다>@ instead of proper angle quotes
-- like @《나비와 엉겅퀴》@ or @〈枾崎의 바다〉@.
--
-- This transforms, in the given HTML fragments, all folk-citing quotes into
-- typographic citing quotes:
--
-- - Pairs of less-than and greater-than inequality symbols (@<@ & @>@) into
--   pairs of proper angle quotes (@〈@ & @〉@)
-- - Pairs of two consecutive inequality symbols (@<<@ & @>>@) into
--   pairs of proper double angle quotes (@《@ & @》@)
quoteCitation :: CitationQuotes -- ^ Quoting parentheses to wrap titles.
              -> [HtmlEntity] -- ^ The input HTML entities to transform.
              -> [HtmlEntity]
quoteCitation :: CitationQuotes -> [HtmlEntity] -> [HtmlEntity]
quoteCitation CitationQuotes
quotes =
    PairedTransformer TitlePunct -> [HtmlEntity] -> [HtmlEntity]
forall m. PairedTransformer m -> [HtmlEntity] -> [HtmlEntity]
transformPairs PairedTransformer TitlePunct
pairedTransformer
  where
    pairedTransformer :: PairedTransformer TitlePunct
    pairedTransformer :: PairedTransformer TitlePunct
pairedTransformer = PairedTransformer :: forall match.
(HtmlTagStack -> Bool)
-> ([match] -> Text -> Maybe (match, Text, Text, Text))
-> (Text -> Maybe (match, Text, Text, Text))
-> (match -> match -> Bool)
-> (match -> match -> [HtmlEntity] -> [HtmlEntity])
-> PairedTransformer match
PairedTransformer
        { ignoresTagStack :: HtmlTagStack -> Bool
ignoresTagStack = HtmlTagStack -> Bool
isPreservedTagStack
        , matchStart :: [TitlePunct] -> Text -> Maybe (TitlePunct, Text, Text, Text)
matchStart = \ [TitlePunct]
_ -> Parser [Either Text (TitlePunct, Text, Text)]
-> Text -> Maybe (TitlePunct, Text, Text, Text)
matcher (Parser [Either Text (TitlePunct, Text, Text)]
 -> Text -> Maybe (TitlePunct, Text, Text, Text))
-> Parser [Either Text (TitlePunct, Text, Text)]
-> Text
-> Maybe (TitlePunct, Text, Text, Text)
forall a b. (a -> b) -> a -> b
$ Parser (TitlePunct, Text)
-> Parser (TitlePunct, Text)
-> Parser [Either Text (TitlePunct, Text, Text)]
parser Parser (TitlePunct, Text)
openTitle Parser (TitlePunct, Text)
openSubtitle
        , matchEnd :: Text -> Maybe (TitlePunct, Text, Text, Text)
matchEnd = Parser [Either Text (TitlePunct, Text, Text)]
-> Text -> Maybe (TitlePunct, Text, Text, Text)
matcher (Parser [Either Text (TitlePunct, Text, Text)]
 -> Text -> Maybe (TitlePunct, Text, Text, Text))
-> Parser [Either Text (TitlePunct, Text, Text)]
-> Text
-> Maybe (TitlePunct, Text, Text, Text)
forall a b. (a -> b) -> a -> b
$ Parser (TitlePunct, Text)
-> Parser (TitlePunct, Text)
-> Parser [Either Text (TitlePunct, Text, Text)]
parser Parser (TitlePunct, Text)
closeTitle Parser (TitlePunct, Text)
closeSubtitle
        , areMatchesPaired :: TitlePunct -> TitlePunct -> Bool
areMatchesPaired = TitlePunct -> TitlePunct -> Bool
forall a. Eq a => a -> a -> Bool
(==)
        , transformPair :: TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
transformPair = TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
transformPair'
        }
    transformPair' :: TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
    transformPair' :: TitlePunct -> TitlePunct -> [HtmlEntity] -> [HtmlEntity]
transformPair' TitlePunct
punct TitlePunct
_ [HtmlEntity]
buffer =
        case [HtmlEntity]
cited of
            [] -> []
            entities :: [HtmlEntity]
entities@(HtmlEntity
x : [HtmlEntity]
_) ->
                let
                    ts :: HtmlTagStack
ts = HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
x
                    makeText :: Text -> HtmlEntity
makeText = HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
ts
                    category :: CitationQuotes -> (Text, Text)
category = case TitlePunct
punct of
                        TitlePunct
DoubleAngle -> CitationQuotes -> (Text, Text)
title
                        TitlePunct
DoubleCorner -> CitationQuotes -> (Text, Text)
title
                        TitlePunct
DoubleInequal -> CitationQuotes -> (Text, Text)
title
                        TitlePunct
Angle -> CitationQuotes -> (Text, Text)
subtitle
                        TitlePunct
Corner -> CitationQuotes -> (Text, Text)
subtitle
                        TitlePunct
Inequal -> CitationQuotes -> (Text, Text)
subtitle
                    (Text
startP, Text
endP) = CitationQuotes -> (Text, Text)
category CitationQuotes
quotes
                in
                    Text -> HtmlEntity
makeText Text
startP HtmlEntity -> [HtmlEntity] -> [HtmlEntity]
forall a. a -> [a] -> [a]
: [HtmlEntity]
entities [HtmlEntity] -> [HtmlEntity] -> [HtmlEntity]
forall a. [a] -> [a] -> [a]
++ [Text -> HtmlEntity
makeText Text
endP]
      where
        buffer' :: [HtmlEntity]
        buffer' :: [HtmlEntity]
buffer' = Int -> [HtmlEntity] -> [HtmlEntity]
forall a. Int -> [a] -> [a]
Prelude.drop Int
1 ([HtmlEntity] -> [HtmlEntity]) -> [HtmlEntity] -> [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ Int -> [HtmlEntity] -> [HtmlEntity]
forall a. Int -> [a] -> [a]
Prelude.take ([HtmlEntity] -> Int
forall (t :: * -> *) a. Foldable t => t a -> Int
length [HtmlEntity]
buffer Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1) [HtmlEntity]
buffer
        cited :: [HtmlEntity]
        cited :: [HtmlEntity]
cited = case (CitationQuotes -> Maybe (HtmlTag, Text)
htmlElement CitationQuotes
quotes, [HtmlEntity]
buffer') of
            (Maybe (HtmlTag, Text)
Nothing, [HtmlEntity]
b) -> [HtmlEntity]
b
            (Maybe (HtmlTag, Text)
_, []) -> []
            (Just (HtmlTag
tag', Text
""), HtmlEntity
x : [HtmlEntity]
_) ->
                if [HtmlEntity]
buffer' [HtmlEntity] -> HtmlTag -> Bool
`isWrappedBy` HtmlTag
tag'
                    then [HtmlEntity]
buffer'
                    else HtmlTagStack -> HtmlTag -> Text -> [HtmlEntity] -> [HtmlEntity]
wrap (HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
x) HtmlTag
tag' Text
"" [HtmlEntity]
buffer'
            (Just (HtmlTag
tag', Text
attrs), HtmlEntity
x : [HtmlEntity]
_) ->
                if [HtmlEntity] -> HtmlTag -> Maybe Text -> Bool
isWrappedBy' [HtmlEntity]
buffer' HtmlTag
tag' (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
attrs)
                    then [HtmlEntity]
buffer'
                    else HtmlTagStack -> HtmlTag -> Text -> [HtmlEntity] -> [HtmlEntity]
wrap (HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
x) HtmlTag
tag' Text
attrs [HtmlEntity]
buffer'
    specialChars :: Set Char
    specialChars :: Set Char
specialChars =
        [ Char
Item (Set Char)
'<', Char
Item (Set Char)
'>', Char
Item (Set Char)
'&'
        , Char
Item (Set Char)
'\x3008', Char
Item (Set Char)
'\x3009', Char
Item (Set Char)
'\x300a', Char
Item (Set Char)
'\x300b', Char
Item (Set Char)
'\x300e', Char
Item (Set Char)
'\x300f'
        ]
    matcher :: Parser [Either Text (TitlePunct, Text, Text)]
            -> Text
            -> Maybe (TitlePunct, Text, Text, Text)
    matcher :: Parser [Either Text (TitlePunct, Text, Text)]
-> Text -> Maybe (TitlePunct, Text, Text, Text)
matcher Parser [Either Text (TitlePunct, Text, Text)]
parser' Text
text' = case Parser [Either Text (TitlePunct, Text, Text)]
-> Text -> Either String [Either Text (TitlePunct, Text, Text)]
forall a. Parser a -> Text -> Either String a
parseOnly Parser [Either Text (TitlePunct, Text, Text)]
parser' Text
text' of
        Left String
_ -> Maybe (TitlePunct, Text, Text, Text)
forall a. Maybe a
Nothing
        Right [Either Text (TitlePunct, Text, Text)]
matches -> case [Either Text (TitlePunct, Text, Text)]
-> ([Text], [(TitlePunct, Text, Text)])
forall a b. [Either a b] -> ([a], [b])
partitionEithers [Either Text (TitlePunct, Text, Text)]
matches of
            ([Text]
l, [(punct, m, post)]) -> (TitlePunct, Text, Text, Text)
-> Maybe (TitlePunct, Text, Text, Text)
forall a. a -> Maybe a
Just (TitlePunct
punct, [Text] -> Text
Data.Text.concat [Text]
l, Text
m, Text
post)
            ([Text], [(TitlePunct, Text, Text)])
_ -> Maybe (TitlePunct, Text, Text, Text)
forall a. Maybe a
Nothing
    parser :: Parser (TitlePunct, Text)
           -> Parser (TitlePunct, Text)
           -> Parser [Either Text (TitlePunct, Text, Text)]
    parser :: Parser (TitlePunct, Text)
-> Parser (TitlePunct, Text)
-> Parser [Either Text (TitlePunct, Text, Text)]
parser Parser (TitlePunct, Text)
title' Parser (TitlePunct, Text)
subtitle' = Parser Text (Either Text (TitlePunct, Text, Text))
-> Parser [Either Text (TitlePunct, Text, Text)]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text (Either Text (TitlePunct, Text, Text))
 -> Parser [Either Text (TitlePunct, Text, Text)])
-> Parser Text (Either Text (TitlePunct, Text, Text))
-> Parser [Either Text (TitlePunct, Text, Text)]
forall a b. (a -> b) -> a -> b
$ [Parser Text (Either Text (TitlePunct, Text, Text))]
-> Parser Text (Either Text (TitlePunct, Text, Text))
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Text -> Either Text (TitlePunct, Text, Text)
forall a b. a -> Either a b
Left (Text -> Either Text (TitlePunct, Text, Text))
-> Parser Text Text
-> Parser Text (Either Text (TitlePunct, Text, Text))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text Text
takeWhile1 (Char -> Set Char -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` Set Char
specialChars)
        , do
            (TitlePunct
punct, Text
m) <- Parser (TitlePunct, Text)
title'
            Text
remain <- (Char -> Bool) -> Parser Text Text
takeWhile (Bool -> Char -> Bool
forall a b. a -> b -> a
const Bool
True)
            Either Text (TitlePunct, Text, Text)
-> Parser Text (Either Text (TitlePunct, Text, Text))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either Text (TitlePunct, Text, Text)
 -> Parser Text (Either Text (TitlePunct, Text, Text)))
-> Either Text (TitlePunct, Text, Text)
-> Parser Text (Either Text (TitlePunct, Text, Text))
forall a b. (a -> b) -> a -> b
$ (TitlePunct, Text, Text) -> Either Text (TitlePunct, Text, Text)
forall a b. b -> Either a b
Right (TitlePunct
punct, Text
m, Text
remain)
        , do
            (TitlePunct
punct, Text
m) <- Parser (TitlePunct, Text)
subtitle'
            Text
remain <- (Char -> Bool) -> Parser Text Text
takeWhile (Bool -> Char -> Bool
forall a b. a -> b -> a
const Bool
True)
            Either Text (TitlePunct, Text, Text)
-> Parser Text (Either Text (TitlePunct, Text, Text))
forall (m :: * -> *) a. Monad m => a -> m a
return (Either Text (TitlePunct, Text, Text)
 -> Parser Text (Either Text (TitlePunct, Text, Text)))
-> Either Text (TitlePunct, Text, Text)
-> Parser Text (Either Text (TitlePunct, Text, Text))
forall a b. (a -> b) -> a -> b
$ (TitlePunct, Text, Text) -> Either Text (TitlePunct, Text, Text)
forall a b. b -> Either a b
Right (TitlePunct
punct, Text
m, Text
remain)
        , Text -> Either Text (TitlePunct, Text, Text)
forall a b. a -> Either a b
Left (Text -> Either Text (TitlePunct, Text, Text))
-> (Char -> Text) -> Char -> Either Text (TitlePunct, Text, Text)
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Text
Data.Text.singleton (Char -> Either Text (TitlePunct, Text, Text))
-> Parser Text Char
-> Parser Text (Either Text (TitlePunct, Text, Text))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Char
anyChar
        ]
    openTitle :: Parser (TitlePunct, Text)
    openTitle :: Parser (TitlePunct, Text)
openTitle = [Parser (TitlePunct, Text)] -> Parser (TitlePunct, Text)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Parser (TitlePunct, Text)
Item [Parser (TitlePunct, Text)]
leftDoubleAngle
        , Parser (TitlePunct, Text)
Item [Parser (TitlePunct, Text)]
leftDoubleCorner
        , (TitlePunct
DoubleInequal,) (Text -> (TitlePunct, Text))
-> Parser Text Text -> Parser (TitlePunct, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text -> Parser Text Text
double' Parser Text Text
lt
        ]
    closeTitle :: Parser (TitlePunct, Text)
    closeTitle :: Parser (TitlePunct, Text)
closeTitle = [Parser (TitlePunct, Text)] -> Parser (TitlePunct, Text)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Parser (TitlePunct, Text)
Item [Parser (TitlePunct, Text)]
rightDoubleAngle
        , Parser (TitlePunct, Text)
Item [Parser (TitlePunct, Text)]
rightDoubleCorner
        , (TitlePunct
DoubleInequal,) (Text -> (TitlePunct, Text))
-> Parser Text Text -> Parser (TitlePunct, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text -> Parser Text Text
double' Parser Text Text
gt
        ]
    double' :: Parser Text -> Parser Text
    double' :: Parser Text Text -> Parser Text Text
double' Parser Text Text
p = do
        Text
t <- Parser Text Text
p
        Text
t' <- Parser Text Text
p
        Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text
t Text -> Text -> Text
`append` Text
t')
    openSubtitle :: Parser (TitlePunct, Text)
    openSubtitle :: Parser (TitlePunct, Text)
openSubtitle = [Parser (TitlePunct, Text)] -> Parser (TitlePunct, Text)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser (TitlePunct, Text)
Item [Parser (TitlePunct, Text)]
leftAngle, (TitlePunct
Inequal,) (Text -> (TitlePunct, Text))
-> Parser Text Text -> Parser (TitlePunct, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text
lt]
    closeSubtitle :: Parser (TitlePunct, Text)
    closeSubtitle :: Parser (TitlePunct, Text)
closeSubtitle = [Parser (TitlePunct, Text)] -> Parser (TitlePunct, Text)
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser (TitlePunct, Text)
Item [Parser (TitlePunct, Text)]
rightAngle, (TitlePunct
Inequal,) (Text -> (TitlePunct, Text))
-> Parser Text Text -> Parser (TitlePunct, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text
gt]
    leftAngle :: Parser (TitlePunct, Text)
    leftAngle :: Parser (TitlePunct, Text)
leftAngle = (TitlePunct
Angle,) (Text -> (TitlePunct, Text))
-> Parser Text Text -> Parser (TitlePunct, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Text Char
char Char
'\x3008'
        , Text -> Parser Text Text
string Text
"&#12296;"
        , Text -> Parser Text Text
asciiCI Text
"&#x3008;"
        ]
    rightAngle :: Parser (TitlePunct, Text)
    rightAngle :: Parser (TitlePunct, Text)
rightAngle = (TitlePunct
Angle,) (Text -> (TitlePunct, Text))
-> Parser Text Text -> Parser (TitlePunct, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Text Char
char Char
'\x3009'
        , Text -> Parser Text Text
string Text
"&#12297;"
        , Text -> Parser Text Text
asciiCI Text
"&#x3009;"
        ]
    leftDoubleAngle :: Parser (TitlePunct, Text)
    leftDoubleAngle :: Parser (TitlePunct, Text)
leftDoubleAngle = (TitlePunct
DoubleAngle,) (Text -> (TitlePunct, Text))
-> Parser Text Text -> Parser (TitlePunct, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Text Char
char Char
'\x300a'
        , Text -> Parser Text Text
string Text
"&#12298;"
        , Text -> Parser Text Text
asciiCI Text
"&#x300a;"
        ]
    rightDoubleAngle :: Parser (TitlePunct, Text)
    rightDoubleAngle :: Parser (TitlePunct, Text)
rightDoubleAngle = (TitlePunct
DoubleAngle,) (Text -> (TitlePunct, Text))
-> Parser Text Text -> Parser (TitlePunct, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Text Char
char Char
'\x300b'
        , Text -> Parser Text Text
string Text
"&#12299;"
        , Text -> Parser Text Text
asciiCI Text
"&#x300b;"
        ]
    leftDoubleCorner :: Parser (TitlePunct, Text)
    leftDoubleCorner :: Parser (TitlePunct, Text)
leftDoubleCorner = (TitlePunct
DoubleCorner,) (Text -> (TitlePunct, Text))
-> Parser Text Text -> Parser (TitlePunct, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Text Char
char Char
'\x300e'
        , Text -> Parser Text Text
string Text
"&#12302;"
        , Text -> Parser Text Text
asciiCI Text
"&#x300e;"
        ]
    rightDoubleCorner :: Parser (TitlePunct, Text)
    rightDoubleCorner :: Parser (TitlePunct, Text)
rightDoubleCorner = (TitlePunct
DoubleCorner,) (Text -> (TitlePunct, Text))
-> Parser Text Text -> Parser (TitlePunct, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Text Char
char Char
'\x300f'
        , Text -> Parser Text Text
string Text
"&#12303;"
        , Text -> Parser Text Text
asciiCI Text
"&#x300f;"
        ]

data TitlePunct
    = DoubleAngle | Angle
    | DoubleCorner | Corner
    | DoubleInequal | Inequal
    deriving (TitlePunct -> TitlePunct -> Bool
(TitlePunct -> TitlePunct -> Bool)
-> (TitlePunct -> TitlePunct -> Bool) -> Eq TitlePunct
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TitlePunct -> TitlePunct -> Bool
$c/= :: TitlePunct -> TitlePunct -> Bool
== :: TitlePunct -> TitlePunct -> Bool
$c== :: TitlePunct -> TitlePunct -> Bool
Eq, Int -> TitlePunct -> ShowS
[TitlePunct] -> ShowS
TitlePunct -> String
(Int -> TitlePunct -> ShowS)
-> (TitlePunct -> String)
-> ([TitlePunct] -> ShowS)
-> Show TitlePunct
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TitlePunct] -> ShowS
$cshowList :: [TitlePunct] -> ShowS
show :: TitlePunct -> String
$cshow :: TitlePunct -> String
showsPrec :: Int -> TitlePunct -> ShowS
$cshowsPrec :: Int -> TitlePunct -> ShowS
Show)


-- | A set of stops—'period', 'comma', and 'interpunct'—to be used by
-- 'normalizeStops' function.
--
-- There are three presets: 'horizontalStops', 'verticalStops', and
-- 'horizontalStopsWithSlashes'.
data Stops = Stops
    { Stops -> Text
period :: Text
    , Stops -> Text
comma :: Text
    , Stops -> Text
interpunct :: Text
    } deriving (Stops -> Stops -> Bool
(Stops -> Stops -> Bool) -> (Stops -> Stops -> Bool) -> Eq Stops
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Stops -> Stops -> Bool
$c/= :: Stops -> Stops -> Bool
== :: Stops -> Stops -> Bool
$c== :: Stops -> Stops -> Bool
Eq, Int -> Stops -> ShowS
[Stops] -> ShowS
Stops -> String
(Int -> Stops -> ShowS)
-> (Stops -> String) -> ([Stops] -> ShowS) -> Show Stops
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Stops] -> ShowS
$cshowList :: [Stops] -> ShowS
show :: Stops -> String
$cshow :: Stops -> String
showsPrec :: Int -> Stops -> ShowS
$cshowsPrec :: Int -> Stops -> ShowS
Show)

-- | Stop sentences in the modern Korean style which follows Western stops.
-- E.g.:
--
-- > 봄·여름·가을·겨울. 어제, 오늘.
horizontalStops :: Stops
horizontalStops :: Stops
horizontalStops = Stops :: Text -> Text -> Text -> Stops
Stops
    { period :: Text
period = Text
". "
    , comma :: Text
comma = Text
", "
    , interpunct :: Text
interpunct = Text
"·"
    }

-- | Stop sentences in the pre-modern Korean style which follows Chinese stops.
-- E.g.:
--
-- > 봄·여름·가을·겨울。어제、오늘。
verticalStops :: Stops
verticalStops :: Stops
verticalStops = Stops :: Text -> Text -> Text -> Stops
Stops
    { period :: Text
period = Text
"。"
    , comma :: Text
comma = Text
"、"
    , interpunct :: Text
interpunct = Text
"·"
    }

-- | Similar to 'horizontalStops' except slashes are used instead of
-- interpuncts. E.g.:
--
-- > 봄/여름/가을/겨울. 어제, 오늘.
horizontalStopsWithSlashes :: Stops
horizontalStopsWithSlashes :: Stops
horizontalStopsWithSlashes = Stops :: Text -> Text -> Text -> Stops
Stops
    { period :: Text
period = Text
". "
    , comma :: Text
comma = Text
", "
    , interpunct :: Text
interpunct = Text
"/"
    }


-- | Normalizes sentence stops (periods, commas, and interpuncts).
normalizeStops :: Stops -> [HtmlEntity] -> [HtmlEntity]
normalizeStops :: Stops -> [HtmlEntity] -> [HtmlEntity]
normalizeStops Stops
stops [HtmlEntity]
input = ((LangHtmlEntity -> HtmlEntity) -> [LangHtmlEntity] -> [HtmlEntity]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [LangHtmlEntity]
annotatedEntities) ((LangHtmlEntity -> HtmlEntity) -> [HtmlEntity])
-> (LangHtmlEntity -> HtmlEntity) -> [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ \ case
    LangHtmlEntity { lang :: LangHtmlEntity -> Maybe Text
lang = Maybe Text
l
                   , entity :: LangHtmlEntity -> HtmlEntity
entity = e :: HtmlEntity
e@HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
stack, rawText :: HtmlEntity -> Text
rawText = Text
txt }
                   } ->
        if HtmlTagStack -> Bool
isPreservedTagStack HtmlTagStack
stack Bool -> Bool -> Bool
|| Maybe Text -> Bool
isNeverKorean Maybe Text
l
        then HtmlEntity
e
        else HtmlEntity
e { rawText :: Text
rawText = Text -> Text
replaceText Text
txt }
    LangHtmlEntity { entity :: LangHtmlEntity -> HtmlEntity
entity = HtmlEntity
e } ->
        HtmlEntity
e
  where
    annotatedEntities :: [LangHtmlEntity]
    annotatedEntities :: [LangHtmlEntity]
annotatedEntities = ([HtmlEntity] -> [LangHtmlEntity]
annotateWithLang ([HtmlEntity] -> [LangHtmlEntity])
-> ([HtmlEntity] -> [HtmlEntity])
-> [HtmlEntity]
-> [LangHtmlEntity]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [HtmlEntity] -> [HtmlEntity]
normalizeText) [HtmlEntity]
input
    replaceText :: Text -> Text
    replaceText :: Text -> Text
replaceText Text
txt =
        case Parser Text Text -> Text -> Either String Text
forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
            Left String
_ -> String -> Text
forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
            Right Text
t -> Text
t
    parser :: Parser Text
    parser :: Parser Text Text
parser = do
        [Text]
chunks <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text Text -> Parser Text [Text])
-> Parser Text Text -> Parser Text [Text]
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
            [ Parser Text Text
Item [Parser Text Text]
stops'
            , Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Char
anyChar
            ]
        Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput
        Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
    stops' :: Parser Text
    stops' :: Parser Text Text
stops' = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ do { Ending
ending <- Parser Ending
period'
             ; Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text
toEntity (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Ending -> Text -> Text
adjustEnding Ending
ending (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Stops -> Text
period Stops
stops)
             }
        , do { Ending
ending <- Parser Ending
comma'
             ; Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text
toEntity (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Ending -> Text -> Text
adjustEnding Ending
ending (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Stops -> Text
comma Stops
stops)
             }
        , do { Ending
ending <- Parser Ending
interpunct'
             ; Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Text
toEntity (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Ending -> Text -> Text
adjustEnding Ending
ending (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Stops -> Text
interpunct Stops
stops)
             }
        ]
    adjustEnding :: Ending -> Text -> Text
    adjustEnding :: Ending -> Text -> Text
adjustEnding Ending
ending Text
text
      | Text -> Int
Data.Text.length Text
text Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
> Int
0 Bool -> Bool -> Bool
&& Char -> Bool
isSpace (Text -> Char
Data.Text.last Text
text) =
            Text -> Text
stripEnd Text
text Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> case Ending
ending of { TrailingChars Text
c -> Text
c
                                            ; TrailingSpaces Text
s -> Text
s
                                            ; Ending
Ending -> Text
Data.Text.empty
                                            }
      | Bool
otherwise = Text
text Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> case Ending
ending of { TrailingChars Text
c -> Text
c
                                           ; Ending
_ -> Text
Data.Text.empty
                                           }
    toEntity :: Text -> Text
    toEntity :: Text -> Text
toEntity = (Char -> Text) -> Text -> Text
Data.Text.concatMap ((Char -> Text) -> Text -> Text) -> (Char -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ \ Char
c ->
        if Char
c Char -> Char -> Bool
forall a. Ord a => a -> a -> Bool
< Char
'\x80' -- ASCII compatible characters
        then Char -> Text
Data.Text.singleton Char
c
        else [Text] -> Text
Data.Text.concat [Item [Text]
"&#x", String -> Text
pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Int -> ShowS
forall a. (Integral a, Show a) => a -> ShowS
showHex (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) String
"", Item [Text]
";"]
    period' :: Parser Ending
    period' :: Parser Ending
period' = [Parser Ending] -> Parser Ending
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Parser Text Char
char Char
'.' Parser Text Char -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
boundary
        , Char -> Parser Text Char
char Char
'。' Parser Text Char -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
trailingSpaces
        , Text -> Parser Text Text
string Text
"&period;" Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
boundary
        , Text -> Parser Text Text
string Text
"&#46;" Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
boundary
        , Text -> Parser Text Text
string Text
"&#12290;" Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
trailingSpaces
        , Text -> Parser Text Text
asciiCI Text
"&#x2e;" Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
boundary
        , Text -> Parser Text Text
asciiCI Text
"&#x3002;" Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
trailingSpaces
        ]
    comma' :: Parser Ending
    comma' :: Parser Ending
comma' = [Parser Ending] -> Parser Ending
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Parser Text Char
char Char
'、' Parser Text Char -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
trailingSpaces
        , Text -> Parser Text Text
string Text
"," Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
boundary
        , Text -> Parser Text Text
string Text
"&comma;" Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
boundary
        , Text -> Parser Text Text
string Text
"&#44;" Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
boundary
        , Text -> Parser Text Text
string Text
"&#12289;" Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
trailingSpaces
        , Text -> Parser Text Text
asciiCI Text
"&#x2c;" Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
boundary
        , Text -> Parser Text Text
asciiCI Text
"&#x3001;" Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Ending
trailingSpaces
        ]
    interpunct' :: Parser Ending
    interpunct' :: Parser Ending
interpunct' = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Parser Text Char
char Char
'·' Parser Text Char -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
        , Text -> Parser Text Text
string Text
"&middot;"
        , Text -> Parser Text Text
string Text
"&centerdot;"
        , Text -> Parser Text Text
string Text
"&CenterDot;"
        , Text -> Parser Text Text
string Text
"&#183;"
        , Text -> Parser Text Text
asciiCI Text
"&#xb7;"
        ] Parser Text Text -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Ending -> Parser Ending
forall (m :: * -> *) a. Monad m => a -> m a
return Ending
Ending
    closingChars :: String
    closingChars :: String
closingChars =
        [ Char
Item String
'"', Char
Item String
'”', Char
Item String
'\'', Char
Item String
'’', Char
Item String
')', Char
Item String
']', Char
Item String
'}', Char
Item String
'」', Char
Item String
'』', Char
Item String
'〉', Char
Item String
'》', Char
Item String
')', Char
Item String
'〕'
        , Char
Item String
']', Char
Item String
'}', Char
Item String
'⦆', Char
Item String
'】', Char
Item String
'〗', Char
Item String
'〙', Char
Item String
'〛', Char
Item String
'›', Char
Item String
'»'
        ]
    closingEntities :: [Text]
    closingEntities :: [Text]
closingEntities =
        [ Item [Text]
"&quot;", Item [Text]
"&QUOT;"                               -- "
        , Item [Text]
"&apos;"                                         -- '
        , Item [Text]
"&rpar;"                                         -- )
        , Item [Text]
"&rsqb;", Item [Text]
"&rbrack;"                             -- ]
        , Item [Text]
"&rcub;", Item [Text]
"&rbrace;"                             -- }
        , Item [Text]
"&raquo;"                                        -- »
        , Item [Text]
"&rsquo;", Item [Text]
"&rsquor;", Item [Text]
"&CloseCurlyQuote;"       -- ’
        , Item [Text]
"&rdquo;", Item [Text]
"&rdquor;", Item [Text]
"&CloseCurlyDoubleQuote;" -- ”
        , Item [Text]
"&rsaquo;"                                       -- ›
        ]
    closing :: Parser Text
    closing :: Parser Text Text
closing = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice ([Parser Text Text] -> Parser Text Text)
-> [Parser Text Text] -> Parser Text Text
forall a b. (a -> b) -> a -> b
$
        [Text -> Parser Text Text
string [Char
Item Text
c] | Char
c <- String
closingChars] [Parser Text Text] -> [Parser Text Text] -> [Parser Text Text]
forall a. [a] -> [a] -> [a]
++
        [Text -> Parser Text Text
string Text
e | Text
e <- [Text]
closingEntities] [Parser Text Text] -> [Parser Text Text] -> [Parser Text Text]
forall a. [a] -> [a] -> [a]
++
        [Text -> Parser Text Text
asciiCI (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ String -> Text
pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ String
"&#x" String -> ShowS
forall a. [a] -> [a] -> [a]
++ Int -> ShowS
forall a. (Integral a, Show a) => a -> ShowS
showHex (Char -> Int
forall a. Enum a => a -> Int
fromEnum Char
c) String
"" String -> ShowS
forall a. [a] -> [a] -> [a]
++ String
";"
        | Char
c <- String
closingChars
        ] [Parser Text Text] -> [Parser Text Text] -> [Parser Text Text]
forall a. [a] -> [a] -> [a]
++
        [Text -> Parser Text Text
string (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ Text
"&#" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
pack (Char -> String
forall a. Show a => a -> String
show Char
c) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
";" | Char
c <- String
closingChars]
    ending' :: Parser Ending
    ending' :: Parser Ending
ending' = [Parser Ending] -> Parser Ending
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput Parser Text () -> Parser Ending -> Parser Ending
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Ending -> Parser Ending
forall (m :: * -> *) a. Monad m => a -> m a
return Ending
Ending
        , Text -> Ending
TrailingChars (Text -> Ending) -> Parser Text Text -> Parser Ending
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Text
closing
        ]
    boundary :: Parser Ending
    boundary :: Parser Ending
boundary = [Parser Ending] -> Parser Ending
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Parser Ending
Item [Parser Ending]
ending'
        , Text -> Ending
TrailingSpaces (Text -> Ending) -> Parser Text Text -> Parser Ending
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (Char -> Bool) -> Parser Text Text
takeWhile1 Char -> Bool
isSpace
        ]
    trailingSpaces :: Parser Ending
    trailingSpaces :: Parser Ending
trailingSpaces = [Parser Ending] -> Parser Ending
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Parser Ending
Item [Parser Ending]
boundary
        , Ending -> Parser Ending
forall (m :: * -> *) a. Monad m => a -> m a
return (Ending -> Parser Ending) -> Ending -> Parser Ending
forall a b. (a -> b) -> a -> b
$ Text -> Ending
TrailingSpaces Text
" "
        ]


data Ending = TrailingChars Text | TrailingSpaces Text | Ending


-- | Substitution options for 'transformArrow' function.  These options can
-- be composited as an element of a set.
--
-- - @[]@: Transform only leftwards and rightwards arrows.
-- - @['LeftRight']@: Transform bi-directional arrows as well as left/rightwards
-- arrows.
-- - @['DoubleArrow']@: Transform double arrows as well as single arrows.
-- - @['LeftRight', 'DoubleArrow']@: Transform all types of arrows.
data ArrowTransformationOption
    -- | A bidirect arrow (e.g., ↔︎).
    = LeftRight
    -- | An arrow which has two lines (e.g., ⇒).
    | DoubleArrow
    deriving (ArrowTransformationOption -> ArrowTransformationOption -> Bool
(ArrowTransformationOption -> ArrowTransformationOption -> Bool)
-> (ArrowTransformationOption -> ArrowTransformationOption -> Bool)
-> Eq ArrowTransformationOption
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c/= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
== :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c== :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
Eq, Eq ArrowTransformationOption
Eq ArrowTransformationOption
-> (ArrowTransformationOption
    -> ArrowTransformationOption -> Ordering)
-> (ArrowTransformationOption -> ArrowTransformationOption -> Bool)
-> (ArrowTransformationOption -> ArrowTransformationOption -> Bool)
-> (ArrowTransformationOption -> ArrowTransformationOption -> Bool)
-> (ArrowTransformationOption -> ArrowTransformationOption -> Bool)
-> (ArrowTransformationOption
    -> ArrowTransformationOption -> ArrowTransformationOption)
-> (ArrowTransformationOption
    -> ArrowTransformationOption -> ArrowTransformationOption)
-> Ord ArrowTransformationOption
ArrowTransformationOption -> ArrowTransformationOption -> Bool
ArrowTransformationOption -> ArrowTransformationOption -> Ordering
ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
$cmin :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
max :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
$cmax :: ArrowTransformationOption
-> ArrowTransformationOption -> ArrowTransformationOption
>= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c>= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
> :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c> :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
<= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c<= :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
< :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
$c< :: ArrowTransformationOption -> ArrowTransformationOption -> Bool
compare :: ArrowTransformationOption -> ArrowTransformationOption -> Ordering
$ccompare :: ArrowTransformationOption -> ArrowTransformationOption -> Ordering
$cp1Ord :: Eq ArrowTransformationOption
Ord, Int -> ArrowTransformationOption -> ShowS
[ArrowTransformationOption] -> ShowS
ArrowTransformationOption -> String
(Int -> ArrowTransformationOption -> ShowS)
-> (ArrowTransformationOption -> String)
-> ([ArrowTransformationOption] -> ShowS)
-> Show ArrowTransformationOption
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [ArrowTransformationOption] -> ShowS
$cshowList :: [ArrowTransformationOption] -> ShowS
show :: ArrowTransformationOption -> String
$cshow :: ArrowTransformationOption -> String
showsPrec :: Int -> ArrowTransformationOption -> ShowS
$cshowsPrec :: Int -> ArrowTransformationOption -> ShowS
Show)

-- | Transforms hyphens and less-than and greater-than inequality symbols that
-- mimic arrows into actual arrow characters:
--
-- - @->@ turns into @→@ (U+2192 RIGHTWARDS ARROW).
-- - @<-@ turns into @←@ (U+2190 LEFTWARDS ARROW).
-- - @\<->@ turns into @↔@ (U+2194 LEFT RIGHT ARROW)
--   if 'LeftRight' is configured.
-- - @=>@ turns into @⇒@ (U+21D2 RIGHTWARDS DOUBLE ARROW)
--   if 'DoubleArrow' is configured.
-- - @<=@ turns into @⇐@ (U+21D0 LEFTWARDS DOUBLE ARROW)
--   if 'DoubleArrow' is configured.
-- - @\<=>@ turns into @⇔@ (U+21D4 LEFT RIGHT DOUBLE ARROW)
--   if both 'DoubleArrow' and 'LeftRight' are configured at a time.
transformArrow :: Set ArrowTransformationOption -> [HtmlEntity] -> [HtmlEntity]
transformArrow :: Set ArrowTransformationOption -> [HtmlEntity] -> [HtmlEntity]
transformArrow Set ArrowTransformationOption
options [HtmlEntity]
input = ((HtmlEntity -> HtmlEntity) -> [HtmlEntity] -> [HtmlEntity]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
`fmap` [HtmlEntity] -> [HtmlEntity]
normalizeText [HtmlEntity]
input) ((HtmlEntity -> HtmlEntity) -> [HtmlEntity])
-> (HtmlEntity -> HtmlEntity) -> [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ \ case
    e :: HtmlEntity
e@HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
stack, rawText :: HtmlEntity -> Text
rawText = Text
txt } ->
        if HtmlTagStack -> Bool
isPreservedTagStack HtmlTagStack
stack
        then HtmlEntity
e
        else HtmlEntity
e { rawText :: Text
rawText = Text -> Text
replaceText Text
txt }
    HtmlEntity
e ->
        HtmlEntity
e
  where
    replaceText :: Text -> Text
    replaceText :: Text -> Text
replaceText Text
txt =
        case Parser Text Text -> Text -> Either String Text
forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
            Left String
_ -> String -> Text
forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
            Right Text
t -> Text
t
    specialChars :: Set Char
    specialChars :: Set Char
specialChars = if ArrowTransformationOption
DoubleArrow ArrowTransformationOption -> Set ArrowTransformationOption -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` Set ArrowTransformationOption
options
       then [Char
Item (Set Char)
'<', Char
Item (Set Char)
'>', Char
Item (Set Char)
'&', Char
Item (Set Char)
'-', Char
Item (Set Char)
'=']
       else [Char
Item (Set Char)
'<', Char
Item (Set Char)
'>', Char
Item (Set Char)
'&', Char
Item (Set Char)
'-']
    parser :: Parser Text
    parser :: Parser Text Text
parser = do
        [Text]
chunks <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text Text -> Parser Text [Text])
-> Parser Text Text -> Parser Text [Text]
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
            [ (Char -> Bool) -> Parser Text Text
takeWhile1 (Char -> Set Char -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` Set Char
specialChars)
            , [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text Text]
arrows
            , Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Char
anyChar
            ]
        Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput
        Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
    arrows :: [Parser Text]
    arrows :: [Parser Text Text]
arrows = [Maybe (Parser Text Text)] -> [Parser Text Text]
forall a. [Maybe a] -> [a]
catMaybes
        [ if ArrowTransformationOption
DoubleArrow ArrowTransformationOption -> Set ArrowTransformationOption -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` Set ArrowTransformationOption
options Bool -> Bool -> Bool
&& ArrowTransformationOption
LeftRight ArrowTransformationOption -> Set ArrowTransformationOption -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` Set ArrowTransformationOption
options
             then Parser Text Text -> Maybe (Parser Text Text)
forall a. a -> Maybe a
Just Parser Text Text
doubleLeftRight
             else Item [Maybe (Parser Text Text)]
forall a. Maybe a
Nothing
        , if ArrowTransformationOption
DoubleArrow ArrowTransformationOption -> Set ArrowTransformationOption -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` Set ArrowTransformationOption
options then Parser Text Text -> Maybe (Parser Text Text)
forall a. a -> Maybe a
Just Parser Text Text
doubleLeft else Item [Maybe (Parser Text Text)]
forall a. Maybe a
Nothing
        , if ArrowTransformationOption
DoubleArrow ArrowTransformationOption -> Set ArrowTransformationOption -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` Set ArrowTransformationOption
options then Parser Text Text -> Maybe (Parser Text Text)
forall a. a -> Maybe a
Just Parser Text Text
doubleRight else Item [Maybe (Parser Text Text)]
forall a. Maybe a
Nothing
        , if ArrowTransformationOption
LeftRight ArrowTransformationOption -> Set ArrowTransformationOption -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` Set ArrowTransformationOption
options then Parser Text Text -> Maybe (Parser Text Text)
forall a. a -> Maybe a
Just Parser Text Text
leftRight else Item [Maybe (Parser Text Text)]
forall a. Maybe a
Nothing
        , Parser Text Text -> Maybe (Parser Text Text)
forall a. a -> Maybe a
Just Parser Text Text
left
        , Parser Text Text -> Maybe (Parser Text Text)
forall a. a -> Maybe a
Just Parser Text Text
right
        ]
    doubleLeftRight :: Parser Text
    doubleLeftRight :: Parser Text Text
doubleLeftRight = Parser Text Text
lt Parser Text Text -> Parser Text () -> Parser Text ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
equals Parser Text () -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&hArr;"
    doubleLeft :: Parser Text
    doubleLeft :: Parser Text Text
doubleLeft = Parser Text Text
lt Parser Text Text -> Parser Text () -> Parser Text ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
equals Parser Text () -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&lArr;"
    doubleRight :: Parser Text
    doubleRight :: Parser Text Text
doubleRight = Parser Text ()
equals Parser Text () -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&rArr;"
    leftRight :: Parser Text
    leftRight :: Parser Text Text
leftRight = Parser Text Text
lt Parser Text Text -> Parser Text () -> Parser Text ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
hyphen Parser Text () -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&harr;"
    left :: Parser Text
    left :: Parser Text Text
left = Parser Text Text
lt Parser Text Text -> Parser Text () -> Parser Text ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text ()
hyphen Parser Text () -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&larr;"
    right :: Parser Text
    right :: Parser Text Text
right = Parser Text ()
hyphen Parser Text () -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
gt Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&rarr;"
    hyphen :: Parser ()
    hyphen :: Parser Text ()
hyphen = Parser Text Text -> Parser Text ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser Text Text -> Parser Text ())
-> Parser Text Text -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Parser Text Char
char Char
'-' Parser Text Char -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
        , Text -> Parser Text Text
string Text
"&hyphen;"
        , Text -> Parser Text Text
string Text
"&dash;"
        , Text -> Parser Text Text
string Text
"&#45;"
        , Text -> Parser Text Text
asciiCI Text
"&#x2d;"
        ]
    equals :: Parser ()
    equals :: Parser Text ()
equals = Parser Text Text -> Parser Text ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser Text Text -> Parser Text ())
-> Parser Text Text -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Char -> Parser Text Char
char Char
'=' Parser Text Char -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
""
        , Text -> Parser Text Text
string Text
"&equals;"
        , Text -> Parser Text Text
string Text
"&61;"
        , Text -> Parser Text Text
asciiCI Text
"&#x3d;"
        ]

lt :: Parser Text
lt :: Parser Text Text
lt = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
    [ Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Text Char
char Char
'<'
    , Text -> Parser Text Text
string Text
"&lt;"
    , Text -> Parser Text Text
string Text
"&#60;"
    , Text -> Parser Text Text
asciiCI Text
"&#x3c;"
    ]

gt :: Parser Text
gt :: Parser Text Text
gt = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
    [ Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Char -> Parser Text Char
char Char
'>'
    , Text -> Parser Text Text
string Text
"&gt;"
    , Text -> Parser Text Text
string Text
"&#62;"
    , Text -> Parser Text Text
asciiCI Text
"&#x3e;"
    ]

-- | Until 2015, the National Institute of Korean Language (國立國語院) had
-- allowed to use only ellipses (@…@) for omitted word, phrase, line,
-- paragraph, or speechlessness.  However, people tend to use three or more
-- consecutive periods (@...@) instead of a proper ellipsis.
-- Although NIKL has started to allow consecutive periods besides an ellipsis
-- for these uses, ellipses are still a proper punctuation in principle.
--
-- This transforms, in the given HTML fragments, all three consecutive periods
-- into proper ellipses.
transformEllipsis :: [HtmlEntity] -> [HtmlEntity]
transformEllipsis :: [HtmlEntity] -> [HtmlEntity]
transformEllipsis = (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText ((Text -> Text) -> [HtmlEntity] -> [HtmlEntity])
-> (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ \ Text
txt ->
    case Parser Text Text -> Text -> Either String Text
forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
        Left String
_ -> String -> Text
forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
        Right Text
t -> Text
t
  where
    parser :: Parser Text
    parser :: Parser Text Text
parser = do
        [Text]
chunks <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text Text -> Parser Text [Text])
-> Parser Text Text -> Parser Text [Text]
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
            [ (Char -> Bool) -> Parser Text Text
takeWhile1 (Char -> Set Char -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`notElem` ([Char
Item (Set Char)
'&', Char
Item (Set Char)
'.', Char
Item (Set Char)
'。'] :: Set Char))
            , Parser Text Text
Item [Parser Text Text]
ellipsis
            , Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Char
anyChar
            ]
        Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput
        Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
    ellipsis :: Parser Text
    ellipsis :: Parser Text Text
ellipsis = do
        Parser Text Text -> Parser Text ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser Text Text -> Parser Text ())
-> Parser Text Text -> Parser Text ()
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
            [ Parser Text Text
period Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
period Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
period
            , Parser Text Text
chinesePeriod Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
chinesePeriod Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
chinesePeriod
            ]
        Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&hellip;"
    period :: Parser Text
    period :: Parser Text Text
period = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Text -> Parser Text Text
string Text
"."
        , Text -> Parser Text Text
string Text
"&period;"
        , Text -> Parser Text Text
string Text
"&#46;"
        , Text -> Parser Text Text
asciiCI Text
"&#x2e;"
        ]
    chinesePeriod :: Parser Text
    chinesePeriod :: Parser Text Text
chinesePeriod = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Text -> Parser Text Text
string Text
"。"
        , Text -> Parser Text Text
string Text
"&#12290;"
        , Text -> Parser Text Text
asciiCI Text
"&#x3002;"
        ]

-- | Pairs of substitute folk single and double quotes.
-- Used by 'transformQuote' function.
--
-- The are three presets: 'curvedQuotes', 'guillemets', and
-- 'curvedSingleQuotesWithQ':
--
-- - 'curvedQuotes' uses South Korean curved quotation marks which follows
--   English quotes (@‘@: U+2018, @’@: U+2019, @“@: U+201C, @”@: U+201D)
-- - 'guillemets' uses North Korean angular quotation marks, influenced
--   by Russian guillemets but with some adjustments to replace guillemets with
--   East Asian angular quotes (@〈@: U+3008, @〉@: U+3009, @《@: U+300A,
--   @》@: U+300B).
-- - 'curvedSingleQuotesWithQ' is the almost same to 'curvedQuotes' but
--   wrap text with a @\<q>@ tag instead of curved double quotes.
data Quotes = Quotes
    { Quotes -> QuotePair
singleQuotes :: QuotePair
    , Quotes -> QuotePair
doubleQuotes :: QuotePair
    } deriving (Quotes -> Quotes -> Bool
(Quotes -> Quotes -> Bool)
-> (Quotes -> Quotes -> Bool) -> Eq Quotes
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Quotes -> Quotes -> Bool
$c/= :: Quotes -> Quotes -> Bool
== :: Quotes -> Quotes -> Bool
$c== :: Quotes -> Quotes -> Bool
Eq, Eq Quotes
Eq Quotes
-> (Quotes -> Quotes -> Ordering)
-> (Quotes -> Quotes -> Bool)
-> (Quotes -> Quotes -> Bool)
-> (Quotes -> Quotes -> Bool)
-> (Quotes -> Quotes -> Bool)
-> (Quotes -> Quotes -> Quotes)
-> (Quotes -> Quotes -> Quotes)
-> Ord Quotes
Quotes -> Quotes -> Bool
Quotes -> Quotes -> Ordering
Quotes -> Quotes -> Quotes
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Quotes -> Quotes -> Quotes
$cmin :: Quotes -> Quotes -> Quotes
max :: Quotes -> Quotes -> Quotes
$cmax :: Quotes -> Quotes -> Quotes
>= :: Quotes -> Quotes -> Bool
$c>= :: Quotes -> Quotes -> Bool
> :: Quotes -> Quotes -> Bool
$c> :: Quotes -> Quotes -> Bool
<= :: Quotes -> Quotes -> Bool
$c<= :: Quotes -> Quotes -> Bool
< :: Quotes -> Quotes -> Bool
$c< :: Quotes -> Quotes -> Bool
compare :: Quotes -> Quotes -> Ordering
$ccompare :: Quotes -> Quotes -> Ordering
$cp1Ord :: Eq Quotes
Ord, Int -> Quotes -> ShowS
[Quotes] -> ShowS
Quotes -> String
(Int -> Quotes -> ShowS)
-> (Quotes -> String) -> ([Quotes] -> ShowS) -> Show Quotes
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Quotes] -> ShowS
$cshowList :: [Quotes] -> ShowS
show :: Quotes -> String
$cshow :: Quotes -> String
showsPrec :: Int -> Quotes -> ShowS
$cshowsPrec :: Int -> Quotes -> ShowS
Show)

-- | A pair of an opening quote and a closing quote.
data QuotePair
    -- | Wrap the quoted text with a pair of punctuation characters.
    = QuotePair Text Text
    -- | Wrap the quoted text (HTML elements) with an element like @\<q>@ tag.
    | HtmlElement HtmlTag HtmlRawAttrs
    deriving (QuotePair -> QuotePair -> Bool
(QuotePair -> QuotePair -> Bool)
-> (QuotePair -> QuotePair -> Bool) -> Eq QuotePair
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: QuotePair -> QuotePair -> Bool
$c/= :: QuotePair -> QuotePair -> Bool
== :: QuotePair -> QuotePair -> Bool
$c== :: QuotePair -> QuotePair -> Bool
Eq, Eq QuotePair
Eq QuotePair
-> (QuotePair -> QuotePair -> Ordering)
-> (QuotePair -> QuotePair -> Bool)
-> (QuotePair -> QuotePair -> Bool)
-> (QuotePair -> QuotePair -> Bool)
-> (QuotePair -> QuotePair -> Bool)
-> (QuotePair -> QuotePair -> QuotePair)
-> (QuotePair -> QuotePair -> QuotePair)
-> Ord QuotePair
QuotePair -> QuotePair -> Bool
QuotePair -> QuotePair -> Ordering
QuotePair -> QuotePair -> QuotePair
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: QuotePair -> QuotePair -> QuotePair
$cmin :: QuotePair -> QuotePair -> QuotePair
max :: QuotePair -> QuotePair -> QuotePair
$cmax :: QuotePair -> QuotePair -> QuotePair
>= :: QuotePair -> QuotePair -> Bool
$c>= :: QuotePair -> QuotePair -> Bool
> :: QuotePair -> QuotePair -> Bool
$c> :: QuotePair -> QuotePair -> Bool
<= :: QuotePair -> QuotePair -> Bool
$c<= :: QuotePair -> QuotePair -> Bool
< :: QuotePair -> QuotePair -> Bool
$c< :: QuotePair -> QuotePair -> Bool
compare :: QuotePair -> QuotePair -> Ordering
$ccompare :: QuotePair -> QuotePair -> Ordering
$cp1Ord :: Eq QuotePair
Ord, Int -> QuotePair -> ShowS
[QuotePair] -> ShowS
QuotePair -> String
(Int -> QuotePair -> ShowS)
-> (QuotePair -> String)
-> ([QuotePair] -> ShowS)
-> Show QuotePair
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [QuotePair] -> ShowS
$cshowList :: [QuotePair] -> ShowS
show :: QuotePair -> String
$cshow :: QuotePair -> String
showsPrec :: Int -> QuotePair -> ShowS
$cshowsPrec :: Int -> QuotePair -> ShowS
Show)

-- | English-style curved quotes (@‘@: U+2018, @’@: U+2019, @“@: U+201C,
-- @”@: U+201D), which are used by South Korean orthography.
curvedQuotes :: Quotes
curvedQuotes :: Quotes
curvedQuotes = Quotes :: QuotePair -> QuotePair -> Quotes
Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&lsquo;" Text
"&rsquo;"
    , doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&ldquo;" Text
"&rdquo;"
    }

-- | Vertical corner brackets (@﹁@: U+FE41, @﹂@: U+FE42, @﹃@: U+FE43,
-- @﹄@: U+FE44), which are used by East Asian orthography.
verticalCornerBrackets :: Quotes
verticalCornerBrackets :: Quotes
verticalCornerBrackets = Quotes :: QuotePair -> QuotePair -> Quotes
Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#xfe41;" Text
"&#xfe42;"
    , doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#xfe43;" Text
"&#xfe44;"
    }

-- | Traditional horizontal corner brackets (@「@: U+300C, @」@: U+300D,
-- @『@: U+300E, @』@: U+300F), which are used by East Asian orthography.
horizontalCornerBrackets :: Quotes
horizontalCornerBrackets :: Quotes
horizontalCornerBrackets = Quotes :: QuotePair -> QuotePair -> Quotes
Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#x300c;" Text
"&#x300d;"
    , doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#x300e;" Text
"&#x300f;"
    }

-- | East Asian guillemets (@〈@: U+3008, @〉@: U+3009, @《@: U+300A, @》@:
-- U+300B), which are used by North Korean orthography.
guillemets :: Quotes
guillemets :: Quotes
guillemets = Quotes :: QuotePair -> QuotePair -> Quotes
Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#x3008;" Text
"&#x3009;"
    , doubleQuotes :: QuotePair
doubleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#x300a;" Text
"&#x300b;"
    }

-- | Use English-style curved quotes (@‘@: U+2018, @’@: U+2019) for single
-- quotes, and HTML @\<q\>@ tags for double quotes.
curvedSingleQuotesWithQ :: Quotes
curvedSingleQuotesWithQ :: Quotes
curvedSingleQuotesWithQ = Quotes :: QuotePair -> QuotePair -> Quotes
Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&lsquo;" Text
"&rsquo;"
    , doubleQuotes :: QuotePair
doubleQuotes = HtmlTag -> Text -> QuotePair
HtmlElement HtmlTag
Q Text
""
    }

-- | Use vertical corner brackets (@﹁@: U+FE41, @﹂@: U+FE42) for single quotes,
-- and HTML @\<q\>@ tags for double quotes.
verticalCornerBracketsWithQ :: Quotes
verticalCornerBracketsWithQ :: Quotes
verticalCornerBracketsWithQ = Quotes :: QuotePair -> QuotePair -> Quotes
Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#xfe41;" Text
"&#xfe42;"
    , doubleQuotes :: QuotePair
doubleQuotes = HtmlTag -> Text -> QuotePair
HtmlElement HtmlTag
Q Text
""
    }

-- | Use horizontal corner brackets (@「@: U+300C, @」@: U+300D)
-- for single quotes, and HTML @\<q\>@ tags for double quotes.
horizontalCornerBracketsWithQ :: Quotes
horizontalCornerBracketsWithQ :: Quotes
horizontalCornerBracketsWithQ = Quotes :: QuotePair -> QuotePair -> Quotes
Quotes
    { singleQuotes :: QuotePair
singleQuotes = Text -> Text -> QuotePair
QuotePair Text
"&#x300c;" Text
"&#x300d;"
    , doubleQuotes :: QuotePair
doubleQuotes = HtmlTag -> Text -> QuotePair
HtmlElement HtmlTag
Q Text
""
    }

-- | Transform pairs of apostrophes (@'@: U+0027) and straight double
-- quotes (@"@: U+0022) into more appropriate quotation marks like
-- typographic single quotes (@‘@: U+2018, @’@: U+2019) and
-- double quotes (@“@: U+201C, @”@: U+201D), or rather wrap them with an HTML
-- element like @\<q>@ tag.  It depends on the options passed to the first
-- parameter; see also 'Quotes'.
transformQuote :: Quotes -- ^ Pair of quoting punctuations and wrapping element.
               -> [HtmlEntity] -- ^ The input HTML entities to transform.
               -> [HtmlEntity]
transformQuote :: Quotes -> [HtmlEntity] -> [HtmlEntity]
transformQuote Quotes { QuotePair
doubleQuotes :: QuotePair
singleQuotes :: QuotePair
doubleQuotes :: Quotes -> QuotePair
singleQuotes :: Quotes -> QuotePair
.. } = PairedTransformer (QuotePunct, Text)
-> [HtmlEntity] -> [HtmlEntity]
forall m. PairedTransformer m -> [HtmlEntity] -> [HtmlEntity]
transformPairs (PairedTransformer (QuotePunct, Text)
 -> [HtmlEntity] -> [HtmlEntity])
-> PairedTransformer (QuotePunct, Text)
-> [HtmlEntity]
-> [HtmlEntity]
forall a b. (a -> b) -> a -> b
$
    PairedTransformer :: forall match.
(HtmlTagStack -> Bool)
-> ([match] -> Text -> Maybe (match, Text, Text, Text))
-> (Text -> Maybe (match, Text, Text, Text))
-> (match -> match -> Bool)
-> (match -> match -> [HtmlEntity] -> [HtmlEntity])
-> PairedTransformer match
PairedTransformer
        { ignoresTagStack :: HtmlTagStack -> Bool
ignoresTagStack = HtmlTagStack -> Bool
isPreservedTagStack
        , matchStart :: [(QuotePunct, Text)]
-> Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchStart = [(QuotePunct, Text)]
-> Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchStart'
        , matchEnd :: Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchEnd = Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchEnd'
        , areMatchesPaired :: (QuotePunct, Text) -> (QuotePunct, Text) -> Bool
areMatchesPaired = \ (QuotePunct
punct, Text
text) (QuotePunct
punct', Text
text') ->
            QuotePunct -> QuotePunct -> Bool
arePaired QuotePunct
punct QuotePunct
punct' Bool -> Bool -> Bool
&& Text
text Text -> Text -> Bool
forall a. Eq a => a -> a -> Bool
== Text
text'
        , transformPair :: (QuotePunct, Text)
-> (QuotePunct, Text) -> [HtmlEntity] -> [HtmlEntity]
transformPair = (QuotePunct, Text)
-> (QuotePunct, Text) -> [HtmlEntity] -> [HtmlEntity]
transformPair'
        }
  where
    punctuations :: [(QuotePunct, [Text])]
    punctuations :: [(QuotePunct, [Text])]
punctuations =
        [ ( QuotePunct
Apostrophe
          , [Item [Text]
"'", Item [Text]
"&apos;", Item [Text]
"&#39;", Item [Text]
"&#x27;", Item [Text]
"&#X27;"]
          )
        , ( QuotePunct
DoubleQuote
          , [Item [Text]
"\"", Item [Text]
"&quot;", Item [Text]
"&QUOT;", Item [Text]
"&#34;", Item [Text]
"&#x22;", Item [Text]
"&#X22;"]
          )
        , ( QuotePunct
DoubleQuote
          , [Item [Text]
"\"", Item [Text]
"&quot;", Item [Text]
"&QUOT;", Item [Text]
"&#34;", Item [Text]
"&#x22;", Item [Text]
"&#X22;"]
          )
        , ( QuotePunct
OpeningSingleQuote
          , [ Item [Text]
"\x2018", Item [Text]
"&lsquo;", Item [Text]
"&OpenCurlyQuote;"
            , Item [Text]
"&#8216;", Item [Text]
"&#x2018;", Item [Text]
"&#X2018;"
            ]
          )
        , ( QuotePunct
ClosingSingleQuote
          , [ Item [Text]
"\x2019", Item [Text]
"&rsquo;", Item [Text]
"&rsquor;", Item [Text]
"&CloseCurlyQuote;"
            , Item [Text]
"&#8217;", Item [Text]
"&#x2019;", Item [Text]
"&#X2019;"
            ]
          )
        , ( QuotePunct
OpeningDoubleQuote
          , [ Item [Text]
"\x201c", Item [Text]
"&ldquo;", Item [Text]
"&OpenCurlyDoubleQuote;"
            , Item [Text]
"&#8220;", Item [Text]
"&#x201c;", Item [Text]
"&#x201C;", Item [Text]
"&#X201c;", Item [Text]
"&#X201C;"
            ]
          )
        , ( QuotePunct
ClosingDoubleQuote
          , [ Item [Text]
"\x201d", Item [Text]
"&rdquo;", Item [Text]
"&rdquor;", Item [Text]
"&CloseCurlyDoubleQuote;"
            , Item [Text]
"&#8221;", Item [Text]
"&#x201d;", Item [Text]
"&#x201D;", Item [Text]
"&#X201d;", Item [Text]
"&#X201D;"
            ]
          )
        ]
    matchStart' :: [(QuotePunct, Text)]
                -> Text
                -> Maybe ((QuotePunct, Text), Text, Text, Text)
    matchStart' :: [(QuotePunct, Text)]
-> Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchStart' [(QuotePunct, Text)]
prevMatches Text
text
      | [(QuotePunct, Text, (Text, Text))] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
Prelude.null [(QuotePunct, Text, (Text, Text))]
prevMatcherCandidates = Maybe ((QuotePunct, Text), Text, Text, Text)
forall a. Maybe a
Nothing
      | Bool
otherwise =
            let (QuotePunct
matcher, Text
entity, (Text
pre, Text
post)) = ((QuotePunct, Text, (Text, Text))
 -> (QuotePunct, Text, (Text, Text)) -> Ordering)
-> [(QuotePunct, Text, (Text, Text))]
-> (QuotePunct, Text, (Text, Text))
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy
                    (((QuotePunct, Text, (Text, Text)) -> Int)
-> (QuotePunct, Text, (Text, Text))
-> (QuotePunct, Text, (Text, Text))
-> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (((QuotePunct, Text, (Text, Text)) -> Int)
 -> (QuotePunct, Text, (Text, Text))
 -> (QuotePunct, Text, (Text, Text))
 -> Ordering)
-> ((QuotePunct, Text, (Text, Text)) -> Int)
-> (QuotePunct, Text, (Text, Text))
-> (QuotePunct, Text, (Text, Text))
-> Ordering
forall a b. (a -> b) -> a -> b
$ \ (QuotePunct
_, Text
_, (Text
pre', Text
_)) -> Text -> Int
Data.Text.length Text
pre')
                    [(QuotePunct, Text, (Text, Text))]
prevMatcherCandidates
            in
                if Text -> Bool
Data.Text.null Text
post then
                   Maybe ((QuotePunct, Text), Text, Text, Text)
forall a. Maybe a
Nothing
                else
                    ((QuotePunct, Text), Text, Text, Text)
-> Maybe ((QuotePunct, Text), Text, Text, Text)
forall a. a -> Maybe a
Just
                        ( (QuotePunct
matcher, Text
entity)
                        , Text
pre
                        , Text
entity
                        , Int -> Text -> Text
Data.Text.drop (Text -> Int
Data.Text.length Text
entity) Text
post
                        )
      where
        prevMatchers :: Set QuotePunct
        prevMatchers :: Set QuotePunct
prevMatchers = [QuotePunct] -> Set QuotePunct
forall a. Ord a => [a] -> Set a
Data.Set.fromList ((QuotePunct, Text) -> QuotePunct
forall a b. (a, b) -> a
fst ((QuotePunct, Text) -> QuotePunct)
-> [(QuotePunct, Text)] -> [QuotePunct]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(QuotePunct, Text)]
prevMatches)
        prevMatcherCandidates :: [(QuotePunct, Text, (Text, Text))]
        prevMatcherCandidates :: [(QuotePunct, Text, (Text, Text))]
prevMatcherCandidates =
            [ (QuotePunct
matcher', Text
entity', Text -> Text -> (Text, Text)
breakOn Text
entity' Text
text)
            | (QuotePunct
matcher', [Text]
entities) <- [(QuotePunct, [Text])]
punctuations
            , QuotePunct -> Bool
opens QuotePunct
matcher'
            , QuotePunct
matcher' QuotePunct -> Set QuotePunct -> Bool
forall a. Ord a => a -> Set a -> Bool
`Data.Set.notMember` Set QuotePunct
prevMatchers
            , Text
entity' <- [Text]
entities
            ]
    matchEnd' :: Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
    matchEnd' :: Text -> Maybe ((QuotePunct, Text), Text, Text, Text)
matchEnd' Text
text
      | [(QuotePunct, Text, (Text, Text))] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
Prelude.null [(QuotePunct, Text, (Text, Text))]
matcherCandidates = Maybe ((QuotePunct, Text), Text, Text, Text)
forall a. Maybe a
Nothing
      | Bool
otherwise =
            let (QuotePunct
matcher, Text
entity, (Text
pre, Text
post)) = ((QuotePunct, Text, (Text, Text))
 -> (QuotePunct, Text, (Text, Text)) -> Ordering)
-> [(QuotePunct, Text, (Text, Text))]
-> (QuotePunct, Text, (Text, Text))
forall (t :: * -> *) a.
Foldable t =>
(a -> a -> Ordering) -> t a -> a
minimumBy
                    (((QuotePunct, Text, (Text, Text)) -> Int)
-> (QuotePunct, Text, (Text, Text))
-> (QuotePunct, Text, (Text, Text))
-> Ordering
forall a b. Ord a => (b -> a) -> b -> b -> Ordering
comparing (((QuotePunct, Text, (Text, Text)) -> Int)
 -> (QuotePunct, Text, (Text, Text))
 -> (QuotePunct, Text, (Text, Text))
 -> Ordering)
-> ((QuotePunct, Text, (Text, Text)) -> Int)
-> (QuotePunct, Text, (Text, Text))
-> (QuotePunct, Text, (Text, Text))
-> Ordering
forall a b. (a -> b) -> a -> b
$ \ (QuotePunct
_, Text
_, (Text
pre', Text
_)) -> Text -> Int
Data.Text.length Text
pre')
                    [(QuotePunct, Text, (Text, Text))]
matcherCandidates
            in
                if Text -> Bool
Data.Text.null Text
post then
                    Maybe ((QuotePunct, Text), Text, Text, Text)
forall a. Maybe a
Nothing
                else
                    ((QuotePunct, Text), Text, Text, Text)
-> Maybe ((QuotePunct, Text), Text, Text, Text)
forall a. a -> Maybe a
Just
                        ( (QuotePunct
matcher, Text
entity)
                        , Text
pre
                        , Text
entity
                        , Int -> Text -> Text
Data.Text.drop (Text -> Int
Data.Text.length Text
entity) Text
post
                        )
      where
        matcherCandidates :: [(QuotePunct, Text, (Text, Text))]
        matcherCandidates :: [(QuotePunct, Text, (Text, Text))]
matcherCandidates =
            [ (QuotePunct
matcher', Text
entity', Text -> Text -> (Text, Text)
breakOn Text
entity' Text
text)
            | (QuotePunct
matcher', [Text]
entities) <- [(QuotePunct, [Text])]
punctuations
            , QuotePunct -> Bool
closes QuotePunct
matcher'
            , Text
entity' <- [Text]
entities
            ]
    transformPair' :: (QuotePunct, Text)
                   -> (QuotePunct, Text)
                   -> [HtmlEntity]
                   -> [HtmlEntity]
    transformPair' :: (QuotePunct, Text)
-> (QuotePunct, Text) -> [HtmlEntity] -> [HtmlEntity]
transformPair' (QuotePunct
punct, Text
start) (QuotePunct
_, Text
end) buffer :: [HtmlEntity]
buffer@(HtmlEntity
firstEntity : [HtmlEntity]
_) =
        case Text -> Text -> [HtmlEntity] -> Maybe [HtmlEntity]
clipText Text
start Text
end [HtmlEntity]
buffer of
            Maybe [HtmlEntity]
Nothing -> [HtmlEntity]
buffer
            Just [HtmlEntity]
es -> case QuotePair
pair of
                QuotePair Text
open Text
close ->
                    HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
tagStack' Text
open HtmlEntity -> [HtmlEntity] -> [HtmlEntity]
forall a. a -> [a] -> [a]
: [HtmlEntity]
es [HtmlEntity] -> [HtmlEntity] -> [HtmlEntity]
forall a. [a] -> [a] -> [a]
++ [HtmlTagStack -> Text -> HtmlEntity
HtmlText HtmlTagStack
tagStack' Text
close]
                HtmlElement HtmlTag
tag Text
attrs ->
                    HtmlTagStack -> HtmlTag -> Text -> [HtmlEntity] -> [HtmlEntity]
wrap HtmlTagStack
tagStack' HtmlTag
tag Text
attrs [HtmlEntity]
es
      where
        pair :: QuotePair
        pair :: QuotePair
pair = case QuotePunct
punct of
            QuotePunct
DoubleQuote -> QuotePair
doubleQuotes
            QuotePunct
OpeningDoubleQuote -> QuotePair
doubleQuotes
            QuotePunct
ClosingDoubleQuote -> QuotePair
doubleQuotes
            QuotePunct
_ -> QuotePair
singleQuotes
        tagStack' :: HtmlTagStack
        tagStack' :: HtmlTagStack
tagStack' = HtmlEntity -> HtmlTagStack
tagStack HtmlEntity
firstEntity
    transformPair' (QuotePunct, Text)
_ (QuotePunct, Text)
_ [] = []
    arePaired :: QuotePunct -> QuotePunct -> Bool
    arePaired :: QuotePunct -> QuotePunct -> Bool
arePaired QuotePunct
OpeningSingleQuote = (QuotePunct -> QuotePunct -> Bool
forall a. Eq a => a -> a -> Bool
== QuotePunct
ClosingSingleQuote)
    arePaired QuotePunct
OpeningDoubleQuote = (QuotePunct -> QuotePunct -> Bool
forall a. Eq a => a -> a -> Bool
== QuotePunct
ClosingDoubleQuote)
    arePaired QuotePunct
punct = (QuotePunct -> QuotePunct -> Bool
forall a. Eq a => a -> a -> Bool
== QuotePunct
punct)

data QuotePunct
    = DoubleQuote
    | Apostrophe
    | OpeningSingleQuote | ClosingSingleQuote
    | OpeningDoubleQuote | ClosingDoubleQuote
    deriving (QuotePunct -> QuotePunct -> Bool
(QuotePunct -> QuotePunct -> Bool)
-> (QuotePunct -> QuotePunct -> Bool) -> Eq QuotePunct
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: QuotePunct -> QuotePunct -> Bool
$c/= :: QuotePunct -> QuotePunct -> Bool
== :: QuotePunct -> QuotePunct -> Bool
$c== :: QuotePunct -> QuotePunct -> Bool
Eq, Eq QuotePunct
Eq QuotePunct
-> (QuotePunct -> QuotePunct -> Ordering)
-> (QuotePunct -> QuotePunct -> Bool)
-> (QuotePunct -> QuotePunct -> Bool)
-> (QuotePunct -> QuotePunct -> Bool)
-> (QuotePunct -> QuotePunct -> Bool)
-> (QuotePunct -> QuotePunct -> QuotePunct)
-> (QuotePunct -> QuotePunct -> QuotePunct)
-> Ord QuotePunct
QuotePunct -> QuotePunct -> Bool
QuotePunct -> QuotePunct -> Ordering
QuotePunct -> QuotePunct -> QuotePunct
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: QuotePunct -> QuotePunct -> QuotePunct
$cmin :: QuotePunct -> QuotePunct -> QuotePunct
max :: QuotePunct -> QuotePunct -> QuotePunct
$cmax :: QuotePunct -> QuotePunct -> QuotePunct
>= :: QuotePunct -> QuotePunct -> Bool
$c>= :: QuotePunct -> QuotePunct -> Bool
> :: QuotePunct -> QuotePunct -> Bool
$c> :: QuotePunct -> QuotePunct -> Bool
<= :: QuotePunct -> QuotePunct -> Bool
$c<= :: QuotePunct -> QuotePunct -> Bool
< :: QuotePunct -> QuotePunct -> Bool
$c< :: QuotePunct -> QuotePunct -> Bool
compare :: QuotePunct -> QuotePunct -> Ordering
$ccompare :: QuotePunct -> QuotePunct -> Ordering
$cp1Ord :: Eq QuotePunct
Ord, Int -> QuotePunct -> ShowS
[QuotePunct] -> ShowS
QuotePunct -> String
(Int -> QuotePunct -> ShowS)
-> (QuotePunct -> String)
-> ([QuotePunct] -> ShowS)
-> Show QuotePunct
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [QuotePunct] -> ShowS
$cshowList :: [QuotePunct] -> ShowS
show :: QuotePunct -> String
$cshow :: QuotePunct -> String
showsPrec :: Int -> QuotePunct -> ShowS
$cshowsPrec :: Int -> QuotePunct -> ShowS
Show)

opens :: QuotePunct -> Bool
opens :: QuotePunct -> Bool
opens QuotePunct
DoubleQuote = Bool
True
opens QuotePunct
Apostrophe = Bool
True
opens QuotePunct
OpeningSingleQuote = Bool
True
opens QuotePunct
OpeningDoubleQuote = Bool
True
opens QuotePunct
_ = Bool
False

closes :: QuotePunct -> Bool
closes :: QuotePunct -> Bool
closes QuotePunct
DoubleQuote = Bool
True
closes QuotePunct
Apostrophe = Bool
True
closes QuotePunct
ClosingSingleQuote = Bool
True
closes QuotePunct
ClosingDoubleQuote = Bool
True
closes QuotePunct
_ = Bool
False

-- | Transform the following folk em dashes into proper em dashes
-- (@—@: @U+2014 EM DASH@):
--
-- - A hyphen (@-@: @U+002D HYPHEN-MINUS@) surrounded by spaces.
-- - Two or three consecutive hyphens (@--@ or @---@).
-- - A hangul vowel @ㅡ@ (@U+3161 HANGUL LETTER EU@) surrounded by spaces.
--   There are Korean people that use a hangul vowel @ㅡ@ ("eu") instead of
--   an em dash due to their ignorance or negligence.
transformEmDash :: [HtmlEntity] -> [HtmlEntity]
transformEmDash :: [HtmlEntity] -> [HtmlEntity]
transformEmDash = (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText ((Text -> Text) -> [HtmlEntity] -> [HtmlEntity])
-> (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ \ Text
txt ->
    case Parser Text Text -> Text -> Either String Text
forall a. Parser a -> Text -> Either String a
parseOnly Parser Text Text
parser Text
txt of
        Left String
_ -> String -> Text
forall a. HasCallStack => String -> a
error String
"unexpected error: failed to parse text node"
        Right Text
t -> Text
t
  where
    parser :: Parser Text
    parser :: Parser Text Text
parser = do
        [Text]
chunks <- Parser Text Text -> Parser Text [Text]
forall (m :: * -> *) a. MonadPlus m => m a -> m [a]
many' (Parser Text Text -> Parser Text [Text])
-> Parser Text Text -> Parser Text [Text]
forall a b. (a -> b) -> a -> b
$ [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
            [ (Char -> Bool) -> Parser Text Text
takeWhile1 ((Char -> Bool) -> Parser Text Text)
-> (Char -> Bool) -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ \ Char
c ->
                Bool -> Bool
not (Char -> Bool
isSpace Char
c Bool -> Bool -> Bool
|| Char
c Char -> Set Char -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` ([Char
Item (Set Char)
'&', Char
Item (Set Char)
'-', Char
Item (Set Char)
'\x3161'] :: Set Char))
            , Parser Text Text
Item [Parser Text Text]
emDash
            , Char -> Text
Data.Text.singleton (Char -> Text) -> Parser Text Char -> Parser Text Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser Text Char
anyChar
            ]
        Parser Text ()
forall t. Chunk t => Parser t ()
endOfInput
        Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return (Text -> Parser Text Text) -> Text -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ [Text] -> Text
Data.Text.concat [Text]
chunks
    emDash :: Parser Text
    emDash :: Parser Text Text
emDash = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice
        [ Parser Text Text
Item [Parser Text Text]
hyphens
        , (Char -> Bool) -> Parser Text Text
takeWhile1 Char -> Bool
isSpace Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [Parser Text Text
Item [Parser Text Text]
eu, Parser Text Text
Item [Parser Text Text]
hyphen] Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser Text Text
takeWhile1 Char -> Bool
isSpace
        ] Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text
forall (m :: * -> *) a. Monad m => a -> m a
return Text
"&mdash;"
    hyphens :: Parser Text
    hyphens :: Parser Text Text
hyphens = Parser Text Text
hyphen Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Text Text
hyphen Parser Text Text -> Parser Text Text -> Parser Text Text
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Text -> Parser Text Text -> Parser Text Text
forall (f :: * -> *) a. Alternative f => a -> f a -> f a
option Text
"" Parser Text Text
hyphen
    hyphen :: Parser Text
    hyphen :: Parser Text Text
hyphen = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice ([Parser Text Text] -> Parser Text Text)
-> [Parser Text Text] -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ (Text -> Parser Text Text) -> [Text] -> [Parser Text Text]
forall a b. (a -> b) -> [a] -> [b]
Prelude.map Text -> Parser Text Text
string
        [Item [Text]
"-", Item [Text]
"&#45;", Item [Text]
"&#x2d;", Item [Text]
"&#x2D;", Item [Text]
"&#X2d;", Item [Text]
"&#X2D;"]
    eu :: Parser Text
    eu :: Parser Text Text
eu = [Parser Text Text] -> Parser Text Text
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice ([Parser Text Text] -> Parser Text Text)
-> [Parser Text Text] -> Parser Text Text
forall a b. (a -> b) -> a -> b
$ (Text -> Parser Text Text) -> [Text] -> [Parser Text Text]
forall a b. (a -> b) -> [a] -> [b]
Prelude.map Text -> Parser Text Text
string
        [Item [Text]
"\x3161", Item [Text]
"&#12641;", Item [Text]
"&#x3161;", Item [Text]
"&#X3161;"]

transformText :: (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText :: (Text -> Text) -> [HtmlEntity] -> [HtmlEntity]
transformText Text -> Text
replace' = (HtmlEntity -> HtmlEntity) -> [HtmlEntity] -> [HtmlEntity]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((HtmlEntity -> HtmlEntity) -> [HtmlEntity] -> [HtmlEntity])
-> (HtmlEntity -> HtmlEntity) -> [HtmlEntity] -> [HtmlEntity]
forall a b. (a -> b) -> a -> b
$ \ case
    e :: HtmlEntity
e@HtmlText { tagStack :: HtmlEntity -> HtmlTagStack
tagStack = HtmlTagStack
stack, rawText :: HtmlEntity -> Text
rawText = Text
txt } ->
        if HtmlTagStack -> Bool
isPreservedTagStack HtmlTagStack
stack
        then HtmlEntity
e
        else HtmlEntity
e { rawText :: Text
rawText = Text -> Text
replace' Text
txt }
    HtmlEntity
e ->
        HtmlEntity
e