{-# LANGUAGE OverloadedStrings #-} {- | Module : Text.Pandoc.Writers.LaTeX.Util Copyright : Copyright (C) 2006-2022 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable -} module Text.Pandoc.Writers.LaTeX.Util ( stringToLaTeX , StringContext(..) , toLabel , inCmd , wrapDiv , hypertarget , labelFor , getListingsLanguage , mbBraced ) where import Control.Applicative ((<|>)) import Control.Monad (when) import Text.Pandoc.Class (PandocMonad, toLang) import Text.Pandoc.Options (WriterOptions(..), isEnabled) import Text.Pandoc.Writers.LaTeX.Types (LW, WriterState(..)) import Text.Pandoc.Writers.LaTeX.Lang (toBabel) import Text.Pandoc.Highlighting (toListingsLanguage) import Text.DocLayout import Text.Pandoc.Definition import Text.Pandoc.ImageSize (showFl) import Control.Monad.State.Strict (gets, modify) import Data.Text (Text) import qualified Data.Text as T import Text.Pandoc.Extensions (Extension(Ext_smart)) import Data.Char (isLetter, isSpace, isDigit, isAscii, ord, isAlphaNum) import Text.Printf (printf) import Text.Pandoc.Shared (safeRead, elemText) import qualified Data.Text.Normalize as Normalize import Data.List (uncons) data StringContext = TextString | URLString | CodeString deriving (Eq) -- escape things as needed for LaTeX stringToLaTeX :: PandocMonad m => StringContext -> Text -> LW m Text stringToLaTeX context zs = do opts <- gets stOptions when ('\x200c' `elemText` zs) $ modify (\s -> s { stZwnj = True }) return $ T.pack $ foldr (go opts context) mempty $ T.unpack $ if writerPreferAscii opts then Normalize.normalize Normalize.NFD zs else zs where go :: WriterOptions -> StringContext -> Char -> String -> String go opts ctx x xs = let ligatures = isEnabled Ext_smart opts && ctx == TextString isUrl = ctx == URLString mbAccentCmd = if writerPreferAscii opts && ctx == TextString then uncons xs >>= \(c,_) -> lookupAccent c else Nothing emits s = case mbAccentCmd of Just cmd -> cmd <> "{" <> s <> "}" <> drop 1 xs -- drop combining accent Nothing -> s <> xs emitc c = case mbAccentCmd of Just cmd -> cmd <> "{" <> [c] <> "}" <> drop 1 xs -- drop combining accent Nothing -> c : xs emitcseq cs = case xs of c:_ | isLetter c , ctx == TextString -> cs <> " " <> xs | isSpace c -> cs <> "{}" <> xs | ctx == TextString -> cs <> xs _ -> cs <> "{}" <> xs emitquote cs = case xs of '`':_ -> cs <> "\\," <> xs -- add thin space '\'':_ -> cs <> "\\," <> xs -- add thin space _ -> cs <> xs in case x of '?' | ligatures -> -- avoid ?` ligature case xs of '`':_ -> emits "?{}" _ -> emitc x '!' | ligatures -> -- avoid !` ligature case xs of '`':_ -> emits "!{}" _ -> emitc x '{' -> emits "\\{" '}' -> emits "\\}" '`' | ctx == CodeString -> emitcseq "\\textasciigrave" '$' | not isUrl -> emits "\\$" '%' -> emits "\\%" '&' -> emits "\\&" '_' | not isUrl -> emits "\\_" '#' -> emits "\\#" '-' | not isUrl -> case xs of -- prevent adjacent hyphens from forming ligatures ('-':_) -> emits "-\\/" _ -> emitc '-' '~' | not isUrl -> emitcseq "\\textasciitilde" '^' -> emits "\\^{}" '\\'| isUrl -> emitc '/' -- NB. / works as path sep even on Windows | otherwise -> emitcseq "\\textbackslash" '|' | not isUrl -> emitcseq "\\textbar" '<' -> emitcseq "\\textless" '>' -> emitcseq "\\textgreater" '[' -> emits "{[}" -- to avoid interpretation as ']' -> emits "{]}" -- optional arguments '\'' | ctx == CodeString -> emitcseq "\\textquotesingle" '\160' -> emits "~" '\x200B' -> emits "\\hspace{0pt}" -- zero-width space '\x202F' -> emits "\\," '\x2026' | ligatures -> emitcseq "\\ldots" '\x2018' | ligatures -> emitquote "`" '\x2019' | ligatures -> emitquote "'" '\x201C' | ligatures -> emitquote "``" '\x201D' | ligatures -> emitquote "''" '\x2014' | ligatures -> emits "---" '\x2013' | ligatures -> emits "--" _ | writerPreferAscii opts -> case x of 'ı' -> emitcseq "\\i" 'ȷ' -> emitcseq "\\j" 'å' -> emitcseq "\\aa" 'Å' -> emitcseq "\\AA" 'ß' -> emitcseq "\\ss" 'ø' -> emitcseq "\\o" 'Ø' -> emitcseq "\\O" 'Ł' -> emitcseq "\\L" 'ł' -> emitcseq "\\l" 'æ' -> emitcseq "\\ae" 'Æ' -> emitcseq "\\AE" 'œ' -> emitcseq "\\oe" 'Œ' -> emitcseq "\\OE" '£' -> emitcseq "\\pounds" '€' -> emitcseq "\\euro" '©' -> emitcseq "\\copyright" _ -> emitc x | otherwise -> emitc x lookupAccent :: Char -> Maybe String lookupAccent '\779' = Just "\\H" lookupAccent '\768' = Just "\\`" lookupAccent '\769' = Just "\\'" lookupAccent '\770' = Just "\\^" lookupAccent '\771' = Just "\\~" lookupAccent '\776' = Just "\\\"" lookupAccent '\775' = Just "\\." lookupAccent '\772' = Just "\\=" lookupAccent '\781' = Just "\\|" lookupAccent '\817' = Just "\\b" lookupAccent '\807' = Just "\\c" lookupAccent '\783' = Just "\\G" lookupAccent '\777' = Just "\\h" lookupAccent '\803' = Just "\\d" lookupAccent '\785' = Just "\\f" lookupAccent '\778' = Just "\\r" lookupAccent '\865' = Just "\\t" lookupAccent '\782' = Just "\\U" lookupAccent '\780' = Just "\\v" lookupAccent '\774' = Just "\\u" lookupAccent '\808' = Just "\\k" lookupAccent '\8413' = Just "\\textcircled" lookupAccent _ = Nothing toLabel :: PandocMonad m => Text -> LW m Text toLabel z = go `fmap` stringToLaTeX URLString z where go = T.concatMap $ \x -> case x of _ | (isLetter x || isDigit x) && isAscii x -> T.singleton x | x `elemText` "_-+=:;." -> T.singleton x | otherwise -> T.pack $ "ux" <> printf "%x" (ord x) -- | Puts contents into LaTeX command. inCmd :: Text -> Doc Text -> Doc Text inCmd cmd contents = char '\\' <> literal cmd <> braces contents mapAlignment :: Text -> Text mapAlignment a = case a of "top" -> "T" "top-baseline" -> "t" "bottom" -> "b" "center" -> "c" _ -> a wrapDiv :: PandocMonad m => Attr -> Doc Text -> LW m (Doc Text) wrapDiv (_,classes,kvs) t = do beamer <- gets stBeamer let align dir txt = inCmd "begin" dir $$ txt $$ inCmd "end" dir lang <- toLang $ lookup "lang" kvs let wrapColumns = if beamer && "columns" `elem` classes then \contents -> let valign = maybe "T" mapAlignment (lookup "align" kvs) totalwidth = maybe [] (\x -> ["totalwidth=" <> x]) (lookup "totalwidth" kvs) onlytextwidth = filter ("onlytextwidth" ==) classes options = text $ T.unpack $ T.intercalate "," $ valign : totalwidth ++ onlytextwidth in inCmd "begin" "columns" <> brackets options $$ contents $$ inCmd "end" "columns" else id wrapColumn = if beamer && "column" `elem` classes then \contents -> let valign = maybe "" (brackets . text . T.unpack . mapAlignment) (lookup "align" kvs) w = maybe "0.48" fromPct (lookup "width" kvs) in inCmd "begin" "column" <> valign <> braces (literal w <> "\\textwidth") $$ contents $$ inCmd "end" "column" else id fromPct xs = case T.unsnoc xs of Just (ds, '%') -> case safeRead ds of Just digits -> showFl (digits / 100 :: Double) Nothing -> xs _ -> xs wrapDir = case lookup "dir" kvs of Just "rtl" -> align "RTL" Just "ltr" -> align "LTR" _ -> id wrapLang txt = case lang of Just lng -> let l = toBabel lng in inCmd "begin" "otherlanguage" <> (braces (literal l)) $$ blankline <> txt <> blankline $$ inCmd "end" "otherlanguage" Nothing -> txt return $ wrapColumns . wrapColumn . wrapDir . wrapLang $ t hypertarget :: PandocMonad m => Bool -> Text -> Doc Text -> LW m (Doc Text) hypertarget _ "" x = return x hypertarget addnewline ident x = do ref <- literal `fmap` toLabel ident return $ text "\\hypertarget" <> braces ref <> braces ((if addnewline && not (isEmpty x) then "%" <> cr else empty) <> x) labelFor :: PandocMonad m => Text -> LW m (Doc Text) labelFor "" = return empty labelFor ident = do ref <- literal `fmap` toLabel ident return $ text "\\label" <> braces ref -- Determine listings language from list of class attributes. getListingsLanguage :: [Text] -> Maybe Text getListingsLanguage xs = foldr ((<|>) . toListingsLanguage) Nothing xs mbBraced :: Text -> Text mbBraced x = if not (T.all isAlphaNum x) then "{" <> x <> "}" else x