{-# LANGUAGE OverloadedStrings #-} -- | -- Module : Text.Gemini.Markdown -- Copyright : (c) Sena, 2024 -- License : AGPL-3.0-or-later -- -- Maintainer : Sena -- Stability : stable -- Portability : portable -- -- A tiny Gemtext to Markdown converter for gemmula. -- -- Encodes parsed Gemtext documents and lines as Markdown 'Text'. -- Follows the [CommonMark specification](https://spec.commonmark.org/current). module Text.Gemini.Markdown ( -- * Encoding documents encode -- * Encoding single items , prettyItem , encodeItem -- * Rewriting links , rewriteLink ) where import Control.Monad (join) import Control.Arrow ((***), second) import Data.Text (Text) import qualified Data.Text as T import Data.Maybe (fromMaybe, isNothing) import Data.Either (isRight) import Data.List (groupBy, intercalate) import Data.Char (isDigit) import Data.Bool (bool) import qualified Text.URI as URI import Text.Gemini (GemDocument, GemItem (..)) -- | Encode parsed 'GemDocument' as a Markdown file. -- The output 'Text' uses LF-endings. Uses the 'prettyItem' function below. -- -- Valid Markdown characters are escaped before encoding. -- -- The adjacent links are grouped together in a paragraph to make them look pretty. -- -- Empty 'GemText's and 'GemList's are ignored. encode :: GemDocument -> Text encode = T.unlines . map prettyItem . intercalate [GemText ""] . groupBy links . filter (not . empty) where empty :: GemItem -> Bool empty (GemText line) = T.null . T.strip $ line empty (GemList list) = null list empty _ = False links :: GemItem -> GemItem -> Bool links (GemLink _ _) (GemLink _ _) = True links _ _ = False -- | Encode a /single/ parsed 'GemItem' as Markdown text. -- The output 'Text' uses LF-endings and might be multiple lines. -- -- Valid Markdown characters are escaped before encoding. -- -- Unlike 'encodeItem', long lines (> 80) will be split to multiple lines to -- make it look prettier. The link items are also put in a seperate line to make them -- look nice. -- -- /Beware/ that the output text doesn't end with a newline. prettyItem :: GemItem -> Text prettyItem (GemText line) = multiline Nothing $ escapeContent line prettyItem (GemLink link desc) = let desc' = maybe (escapeContent link) (multiline Nothing . escapeContent) desc in " => [" <> desc' <> "](" <> link <> ") " prettyItem (GemHeading level text) = "\n" <> T.replicate (min level 6) "#" <> " " <> escapeContent text prettyItem (GemList list) = T.intercalate "\n" $ map ((" * " <>) . multiline Nothing . escapeContent) list prettyItem (GemQuote text) = multiline (Just " > ") $ escapeContent text prettyItem (GemPre text alt) = T.intercalate "\n" $ ["```" <> fromMaybe "" alt] <> map escapePre text <> ["```"] -- | Encode a /single/ parsed 'GemItem' as Markdown text. -- The output 'Text' uses LF-endings and might be multiple lines. -- -- Valid Markdown characters are escaped before encoding. -- -- /Beware/ that the output text doesn't end with a newline. encodeItem :: GemItem -> Text encodeItem (GemText line) = escapePrefixes $ escapeContent line encodeItem (GemLink link desc) = "[" <> escapeContent (fromMaybe link desc) <> "](" <> link <> ")" encodeItem (GemHeading level text) = T.replicate (min level 6) "#" <> " " <> escapeContent text encodeItem (GemList list) = T.intercalate "\n" $ map ((" * " <>) . escapeContent) list encodeItem (GemQuote text) = " > " <> escapeContent text encodeItem (GemPre text alt) = T.intercalate "\n" $ ["```" <> fromMaybe "" alt] <> map escapePre text <> ["```"] -- | Rewrite @.gmi@ links as @.md@ links. -- -- /Beware/ that this only applies to local 'GemLink's. -- For rewriting non-local links as @http@, see 'Text.Gemini.Web.webifyLink'. rewriteLink :: GemItem -> GemItem rewriteLink (GemLink link desc) | isNothing (URI.uriPath uri) || isRight (URI.uriAuthority uri) = GemLink link desc | otherwise = GemLink (maybe link (<> ".md") $ T.stripSuffix ".gmi" link) desc where uri = fromMaybe URI.emptyURI $ URI.mkURI link rewriteLink item = item -- Split the text to multiple lines if the text is longer than 80 characters. -- If given, adds the prefix to the beginning of every line. -- Escapes the valid prefixes of every line if the text has any. multiline :: Maybe Text -> Text -> Text multiline pre text = T.intercalate "\n" $ map (maybe id (<>) pre . escapePrefixes) $ split [] [] $ T.words text where split :: [Text] -> [Text] -> [Text] -> [Text] split line ls (w:ws) | T.length (T.unwords line) < 80 = split (line <> [w]) ls ws | otherwise = split [w] (ls <> [T.unwords line]) ws split line ls [] = ls <> [T.unwords line] -- Escapes the line prefixes such as list items and quotes. escapePrefixes :: Text -> Text escapePrefixes text = foldr escapePrefix text chars where escapePrefix :: Char -> Text -> Text escapePrefix c t | T.null end = t -- Ordered lists | c == '.' = bool t (pre <> "\\" <> end) (all isDigit before && (not . null $ before)) | otherwise = bool t (pre <> "\\" <> end) (null before) where (pre, end) = T.break (== c) t before = T.unpack . T.stripStart $ pre chars = ['.', '-', '+', '#', '>', '*'] -- Escapes the content of the text, such as the backslashes; as well as the -- surround characters, such as emphasis, links and codeblocks. escapeContent :: Text -> Text escapeContent text = foldr escapeSurround (T.replace "\\" "\\\\" text) chars where escapeSurround :: (Char, Char) -> Text -> Text escapeSurround del@(op, cl) t | T.null t = t | otherwise = let (pre, (ins, post)) = second (T.break (== cl) . T.drop 1 . (<> " ")) $ T.break (== op) t (op', cl') = join (***) (("\\" <>) . T.singleton) del in pre <> T.dropEnd 1 (if T.null post then bool (T.singleton op <> ins) ins (T.null ins) else op' <> ins <> cl' <> escapeSurround del (T.drop 1 post)) chars = [('~', '~'), ('`', '`'), ('(', ')'), ('<', '>'), ('[', ']'), ('{', '}'), ('_', '_'), ('*', '*')] -- Escapes the preformatted delimiter inside a preformatted text. escapePre :: Text -> Text escapePre text = bool (T.replace "```" " ```" text) text (T.null text)