{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      :  Text.Gemini
-- Copyright   :  (c) 2024 Sena
-- License     :  LGPL-3.0-or-later
--
-- Maintainer  :  contact@sena.pink
-- Stability   :  stable
-- Portability :  portable
--
-- Parse gemtext (@text/gemini@) documents from and back into `Text'.
--
-- See the Gemini hypertext format specification at
-- <https://geminiprotocol.net/docs/gemtext-specification.gmi>.
module Text.Gemini
    ( -- * Gemtext types
      GemDocument
    , GemItem (..)

      -- * Decoding from text
    , decode
    , decodeLine

      -- * Encoding into text
    , encode
    , encodeItem

      -- * Utility functions
    , documentTitle
    ) where

import Data.Bool (bool)
import Data.Char (isSpace)
import Data.Maybe (listToMaybe)
import Data.Text (Text)
import qualified Data.Text as T

-- | A gemtext document, in the form of a list of `GemItem's
type GemDocument = [GemItem]

-- | A gemtext item
data GemItem
    = -- | A regular gemtext line -- @`GemText' \<Text>@
      GemText !Text
    | -- | A gemtext link -- @`GemLink' \<Link> \[Optional Description]@
      GemLink !Text !(Maybe Text)
    | -- | A gemtext heading of 3 levels max -- @`GemHeading' \<Level> \<Text>@
      GemHeading !Int !Text
    | -- | An unordered gemtext list -- @`GemList' \<Lines>@
      --
      -- Gemtext specification does /not/ endorse grouping list items this way.
      -- This approach is purely for practical reasons, such as ease of conversion
      -- to unordered HTML lists.
      GemList ![Text]
    | -- | A gemtext quote -- @`GemQuote' \<Text>@
      GemQuote !Text
    | -- | A preformatted gemtext block -- @`GemPre' \<Lines> [Optional Alt Text]@
      GemPre ![Text] !(Maybe Text)
    deriving (Int -> GemItem -> ShowS
[GemItem] -> ShowS
GemItem -> String
(Int -> GemItem -> ShowS)
-> (GemItem -> String) -> ([GemItem] -> ShowS) -> Show GemItem
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GemItem -> ShowS
showsPrec :: Int -> GemItem -> ShowS
$cshow :: GemItem -> String
show :: GemItem -> String
$cshowList :: [GemItem] -> ShowS
showList :: [GemItem] -> ShowS
Show, GemItem -> GemItem -> Bool
(GemItem -> GemItem -> Bool)
-> (GemItem -> GemItem -> Bool) -> Eq GemItem
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GemItem -> GemItem -> Bool
== :: GemItem -> GemItem -> Bool
$c/= :: GemItem -> GemItem -> Bool
/= :: GemItem -> GemItem -> Bool
Eq)

-- | Parse a gemtext block as a `GemDocument'.
--
-- The input `Text' must use either CRLF or LF.
decode :: Text -> GemDocument
decode :: Text -> [GemItem]
decode = [Text] -> [GemItem]
parse ([Text] -> [GemItem]) -> (Text -> [Text]) -> Text -> [GemItem]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines (Text -> [Text]) -> (Text -> Text) -> Text -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
T.replace Text
"\CR" Text
""
  where
    parse :: [Text] -> GemDocument
    parse :: [Text] -> [GemItem]
parse (Text
l : [Text]
ls)
        | Text
"* " Text -> Text -> Bool
`T.isPrefixOf` Text
l =
            let ([Text]
items, [Text]
rest) = (Text -> Bool) -> [Text] -> ([Text], [Text])
forall a. (a -> Bool) -> [a] -> ([a], [a])
span (Text
"* " Text -> Text -> Bool
`T.isPrefixOf`) [Text]
ls
             in ([Text] -> GemItem
GemList ([Text] -> GemItem) -> ([Text] -> [Text]) -> [Text] -> GemItem
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Int -> Text -> Text
value Int
2) ([Text] -> GemItem) -> [Text] -> GemItem
forall a b. (a -> b) -> a -> b
$ [Text
l] [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text]
items) GemItem -> [GemItem] -> [GemItem]
forall a. a -> [a] -> [a]
: [Text] -> [GemItem]
parse [Text]
rest
        | Text
"```" Text -> Text -> Bool
`T.isPrefixOf` Text
l =
            let ([Text]
pre, [Text]
rest) = (Text -> Bool) -> [Text] -> ([Text], [Text])
forall a. (a -> Bool) -> [a] -> ([a], [a])
break (Text
"```" Text -> Text -> Bool
`T.isPrefixOf`) [Text]
ls
             in case [Text]
rest of
                    [] -> Text -> GemItem
GemText Text
l GemItem -> [GemItem] -> [GemItem]
forall a. a -> [a] -> [a]
: [Text] -> [GemItem]
parse [Text]
ls
                    (Text
_ : [Text]
after) -> [Text] -> Maybe Text -> GemItem
GemPre [Text]
pre (Text -> Maybe Text
optional (Text -> Maybe Text) -> (Text -> Text) -> Text -> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
value Int
3 (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Text
l) GemItem -> [GemItem] -> [GemItem]
forall a. a -> [a] -> [a]
: [Text] -> [GemItem]
parse [Text]
after
        | Bool
otherwise = Text -> GemItem
decodeLine Text
l GemItem -> [GemItem] -> [GemItem]
forall a. a -> [a] -> [a]
: [Text] -> [GemItem]
parse [Text]
ls
    parse [] = []

-- | Parse a /single/ gemtext line as `GemItem'.
--
-- There /should not/ be any line breaks in the input; if present, they are
-- treated as spaces. Preformatted text blocks are regarded as `GemText's as they
-- are strictly multiline.
decodeLine :: Text -> GemItem
decodeLine :: Text -> GemItem
decodeLine Text
line
    | Text
"=>" Text -> Text -> Bool
`T.isPrefixOf` Text
sane =
        let (Text
link, Text
desc) = (Char -> Bool) -> Text -> (Text, Text)
T.break Char -> Bool
isSpace (Text -> (Text, Text)) -> Text -> (Text, Text)
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
value Int
2 Text
sane
         in if Text -> Bool
T.null Text
link
                then Text -> GemItem
GemText (Text -> GemItem) -> (Text -> Text) -> Text -> GemItem
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
T.stripEnd (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text
sane
                else Text -> Maybe Text -> GemItem
GemLink Text
link (Maybe Text -> GemItem) -> (Text -> Maybe Text) -> Text -> GemItem
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Maybe Text
optional (Text -> Maybe Text) -> (Text -> Text) -> Text -> Maybe Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
T.strip (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text
desc
    | Text
"#" Text -> Text -> Bool
`T.isPrefixOf` Text
sane =
        let level :: Int
level = Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Text -> Int
T.length (Text -> Int) -> Text -> Int
forall a b. (a -> b) -> a -> b
$ (Text, Text) -> Text
forall a b. (a, b) -> a
fst ((Text, Text) -> Text) -> (Text -> (Text, Text)) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#') (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
sane) Int
3
         in Int -> Text -> GemItem
GemHeading Int
level (Text -> GemItem) -> (Text -> Text) -> Text -> GemItem
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
value Int
level (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text
sane
    | Text
"* " Text -> Text -> Bool
`T.isPrefixOf` Text
sane = [Text] -> GemItem
GemList [Int -> Text -> Text
value Int
2 Text
sane]
    | Text
">" Text -> Text -> Bool
`T.isPrefixOf` Text
sane = Text -> GemItem
GemQuote (Text -> GemItem) -> (Text -> Text) -> Text -> GemItem
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
value Int
1 (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text
sane
    | Bool
otherwise = Text -> GemItem
GemText (Text -> GemItem) -> (Text -> Text) -> Text -> GemItem
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
T.stripEnd (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text
sane
  where
    sane :: Text
sane = Text -> Text
escapeLineBreaks Text
line

-- | Encode a parsed `GemDocument' into a gemtext block.
--
-- The output `Text' uses CRLF line breaks and ends with a newline.
--
-- Empty lists are ignored completely if present. See the `encodeItem' function
-- for additional quirks.
encode :: GemDocument -> Text
encode :: [GemItem] -> Text
encode = [Text] -> Text
T.unlines ([Text] -> Text) -> ([GemItem] -> [Text]) -> [GemItem] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (GemItem -> Text) -> [GemItem] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ((Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
"\CR") (Text -> Text) -> (GemItem -> Text) -> GemItem -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GemItem -> Text
encodeItem) ([GemItem] -> [Text])
-> ([GemItem] -> [GemItem]) -> [GemItem] -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (GemItem -> Bool) -> [GemItem] -> [GemItem]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (GemItem -> Bool) -> GemItem -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GemItem -> Bool
empty)
  where
    empty :: GemItem -> Bool
empty (GemList [Text]
list) = [Text] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Text]
list
    empty GemItem
_ = Bool
False

-- | Encode a /single/ parsed `GemItem' into gemtext.
--
-- The output `Text' does /not/ end with an additional newline, and might be
-- multiple lines, in which case it uses CRLF line breaks.
--
-- Prefixes are escaped as necessary before encoding and the spaces in `GemLink's
-- are replaced with @%20@ to normalize them. Line breaks inside the body of an
-- item are also replaced with a space.
encodeItem :: GemItem -> Text
encodeItem :: GemItem -> Text
encodeItem GemItem
item = case GemItem
item of
    GemText Text
text ->
        let shouldEscape :: Bool
shouldEscape =
                (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> Text -> Bool
`T.isPrefixOf` Text
text) [Text
"#", Text
"* ", Text
">"]
                    Bool -> Bool -> Bool
|| Text
"```" Text -> Text -> Bool
`T.isPrefixOf` Text
text
                    Bool -> Bool -> Bool
|| (Text
"=>" Text -> Text -> Bool
`T.isPrefixOf` Text
text Bool -> Bool -> Bool
&& Bool -> Bool
not (Text -> Bool
T.null (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
value Int
2 Text
text))
         in Text -> Text
T.stripEnd (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
escapeLineBreaks (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text -> Text -> Bool -> Text
forall a. a -> a -> Bool -> a
bool Text
text (Char -> Text -> Text
T.cons Char
' ' Text
text) Bool
shouldEscape
    GemLink Text
link Maybe Text
desc -> Text
"=> " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> (HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
T.replace Text
" " Text
"%20" (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
sanitize (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ Text
link) Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Maybe Text -> Text
append Maybe Text
desc
    GemHeading Int
level Text
text -> Int -> Text -> Text
T.replicate (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
level Int
3) Text
"#" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Maybe Text -> Text
append (Text -> Maybe Text
optional Text
text)
    GemList [Text]
list -> Text -> [Text] -> Text
T.intercalate Text
"\CR\LF" ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map ((Text
"* " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<>) (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
sanitize) [Text]
list
    GemQuote Text
text -> Text
">" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Maybe Text -> Text
append (Text -> Maybe Text
optional Text
text)
    GemPre [Text]
text Maybe Text
alt -> Text -> [Text] -> Text
T.intercalate Text
"\CR\LF" ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ [Text
"```" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Maybe Text -> Text
append Maybe Text
alt] [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text]
text [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text
"```"]

-- | Get the title of the `GemDocument', which is the very first `GemHeading'
-- in the document, regardless of the level of the heading.
documentTitle :: GemDocument -> Maybe Text
documentTitle :: [GemItem] -> Maybe Text
documentTitle [GemItem]
doc = [Text] -> Maybe Text
forall a. [a] -> Maybe a
listToMaybe [Text
h | GemHeading Int
_ Text
h <- [GemItem]
doc]

-- `Nothing' if the text is empty.
optional :: Text -> Maybe Text
optional :: Text -> Maybe Text
optional Text
text = Maybe Text -> Maybe Text -> Bool -> Maybe Text
forall a. a -> a -> Bool -> a
bool (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
text) Maybe Text
forall a. Maybe a
Nothing (Bool -> Maybe Text) -> Bool -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Text -> Bool
T.null Text
text

-- Sanitize and add a single space at the beginning if the `Text' exist,
-- otherwise return an empty `Text'.
append :: Maybe Text -> Text
append :: Maybe Text -> Text
append = Text -> (Text -> Text) -> Maybe Text -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" ((Text
" " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<>) (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
sanitize)

-- Strip the prefix of the line and then sanitize it to get the value.
value :: Int -> Text -> Text
value :: Int -> Text -> Text
value Int
prefix = Text -> Text
sanitize (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
T.drop Int
prefix

-- Replace the line breaks with spaces and strip the `Text'.
sanitize :: Text -> Text
sanitize :: Text -> Text
sanitize = Text -> Text
T.strip (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
escapeLineBreaks

-- Replace the line breaks with spaces.
escapeLineBreaks :: Text -> Text
escapeLineBreaks :: Text -> Text
escapeLineBreaks = HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
T.replace Text
"\LF" Text
" " (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
T.replace Text
"\CR" Text
""