{-# LANGUAGE OverloadedStrings, LambdaCase, TupleSections #-} -- | -- Module : Text.Gemini.Web -- Copyright : (c) Sena, 2024 -- License : AGPL-3.0-or-later -- -- Maintainer : Sena -- Stability : stable -- Portability : portable -- -- A tiny Gemtext to HTML converter for gemmula. -- -- Encodes parsed Gemtext documents and lines as HTML 'Text'. module Text.Gemini.Web ( -- * Encoding documents encode -- * Encoding single items , prettyItem , encodeItem -- * Rewriting links , rewriteLink , webifyLink -- * Other , getTitle ) where import Control.Exception (catch, SomeException) import Data.Text (Text) import qualified Data.Text as T import Data.Maybe (fromMaybe, fromJust, isNothing, maybeToList) import Data.Either (isRight) import Data.List (find) import Data.Bool (bool) import qualified Text.URI as URI import Network.HTTP (simpleHTTP, getRequest, getResponseCode) import Text.Gemini (GemDocument, GemItem (..)) -- | Encode parsed 'GemDocument' as a HTML file. -- The output 'Text' uses LF-endings. Uses the 'prettyItem' function below. -- -- Valid HTML characters are escaped before encoding. -- -- Empty 'GemList's are ignored and empty 'GemText's are replaced with @
@. encode :: GemDocument -> Text encode = T.unlines . map prettyItem . filter (not . empty) where empty :: GemItem -> Bool empty (GemList list) = null list empty _ = False -- | Encode a /single/ parsed 'GemItem' as HTML text. -- The output 'Text' uses LF-endings and might be multiple lines. -- -- Valid HTML characters are escaped before encoding. -- -- Unlike 'encodeItem', long lines (> 80) will be split to multiple lines to -- make it look prettier. Empty 'GemText's are also replaced with @
@. -- -- Links have a "scheme" attribute set to "gemini" if the scheme of the URI -- is @gemini://@, to make them stylable with CSS. -- -- /Beware/ that the output text doesn't end with a newline. prettyItem :: GemItem -> Text prettyItem (GemText line) = bool (tag "p" [] $ multiline line) "
" (T.null . T.strip $ line) prettyItem (GemLink link desc) = let s = maybe "http" URI.unRText $ URI.uriScheme $ fromMaybe URI.emptyURI $ URI.mkURI link in tag "a" ([("href", link)] <> bool [] [("scheme", s)] (s == "gemini")) $ multiline $ fromMaybe link desc prettyItem (GemHeading level text) = "\n" <> tag ("h" <> (T.pack . show $ min level 6)) [] (multiline text) prettyItem (GemList list) = "" prettyItem (GemQuote text) = tag "blockquote" [] $ multiline text prettyItem (GemPre text alt) = tag "pre" (maybeToList $ ("title",) <$> alt) $ "\n" <> T.unlines text -- | Encode a /single/ parsed 'GemItem' as HTML text. -- The output 'Text' uses LF-endings and might be multiple lines. -- -- Valid HTML characters are escaped before encoding. -- -- /Beware/ that the output text doesn't end with a newline. encodeItem :: GemItem -> Text encodeItem (GemText line) = tag "p" [] line encodeItem (GemLink link desc) = tag "a" [("href", link)] $ fromMaybe link desc encodeItem (GemHeading level text) = tag ("h" <> (T.pack . show $ min level 6)) [] text encodeItem (GemList list) = "" encodeItem (GemQuote text) = tag "blockquote" [] text encodeItem (GemPre text alt) = tag "pre" (maybeToList $ ("title",) <$> alt) $ "\n" <> T.unlines text -- | Rewrite @.gmi@ links as @.html@ links. -- -- /Beware/ that this only applies to local 'GemLink's. -- For rewriting non-local links as @http@, see 'webifyLink'. rewriteLink :: GemItem -> GemItem rewriteLink (GemLink link desc) | isNothing (URI.uriPath uri) || isRight (URI.uriAuthority uri) = GemLink link desc | otherwise = GemLink (maybe link (<> ".html") $ T.stripSuffix ".gmi" link) desc where uri = fromMaybe URI.emptyURI $ URI.mkURI link rewriteLink item = item -- | Rewrite @gemini://@ link as @http://@ if it can be reached over HTTP. -- -- This is only useful if the specified link has a proxied mirror (like @geminiprotocol.net@). -- -- Does /nothing/ if the link is local or can't be reached. webifyLink :: GemItem -> IO GemItem webifyLink (GemLink link desc) | isRight (URI.uriAuthority uri) && maybe "" URI.unRText (URI.uriScheme uri) == "gemini" = (\(t, _, _) -> GemLink (bool (URI.render uri') link (t > 3)) desc) <$> catch (simpleHTTP (getRequest $ URI.renderStr uri') >>= getResponseCode) (\e -> let _ = (e :: SomeException) in return (9, 9, 9)) | otherwise = return $ GemLink link desc where uri = fromMaybe URI.emptyURI $ URI.mkURI link uri' = uri {URI.uriScheme = Just (fromJust $ URI.mkScheme "http")} webifyLink item = return item -- | Get the text of the first @

@ in the document, if there's any. -- -- Useful for using as @@. getTitle :: GemDocument -> Maybe Text getTitle doc = (\case { GemHeading _ t -> Just t; _ -> Nothing }) =<< find (\case { GemHeading l _ -> l == 1; _ -> False }) doc -- Creates a HTML tag with the given name, attributes and body. -- The body and attributes are escaped using the functions below. tag :: Text -> [(Text, Text)] -> Text -> Text tag name attrs body = "<" <> name <> T.concat (map attr attrs) <> ">" <> escapeBody body <> "</" <> name <> ">" where attr :: (Text, Text) -> Text attr (n, v) = " " <> n <> "=\"" <> (bool escapeAttr escapeHref $ n == "href") v <> "\"" -- Split the text to multiple lines if the text is longer than 80 characters. -- Indents the text by 2 spaces for every line then. multiline :: Text -> Text multiline text = let result = split [] [] $ T.words text in bool (T.concat result) ("\n " <> T.intercalate "\n " result <> "\n") (length result > 1) where split :: [Text] -> [Text] -> [Text] -> [Text] split line ls (w:ws) | T.length (T.unwords line) < 80 = split (line <> [w]) ls ws | otherwise = split [w] (ls <> [T.unwords line]) ws split line ls [] = ls <> [T.unwords line] -- Escape the relevant characters inside bodies such as ampersands and tag delimiters. escapeBody :: Text -> Text escapeBody = T.replace ">" ">" . T.replace "<" "<" . T.replace "&" "&" -- Escape the relevant characters inside attributes such as ampersands and quotes. escapeAttr :: Text -> Text escapeAttr = T.replace "'" "'" . T.replace "\"" """ . T.replace "&" "&" -- Escape the relevant characters inside links attributes such as quotes. escapeHref :: Text -> Text escapeHref = T.replace "'" "%27". T.replace "\"" "%22"