{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      :  Text.Gemini
-- Copyright   :  (c) Sena, 2024
-- License     :  AGPL-3.0-or-later
--
-- Maintainer  :  Sena <jn-sena@proton.me>
-- Stability   :  stable
-- Portability :  portable
--
-- A tiny gemtext (unofficially @text/gemini@) parser
--
-- Parses gemtext documents from and back to 'Text'.
--
-- See the Gemini hypertext format specification at
-- <https://geminiprotocol.net/docs/gemtext-specification.gmi>.

module Text.Gemini
    ( -- * Gemtext types
      GemDocument
    , GemItem (..)

      -- * Decoding from text
    , decode
    , decodeLine

      -- * Encoding into text
    , encode
    , encodeItem
    ) where

import Data.Bool (bool)
import Data.Char (isSpace)
import Data.Maybe (fromMaybe)
import Data.Text (Text)
import qualified Data.Text as T


-- | A gemtext document, in the form of an ordered list of 'GemItem's
type GemDocument = [GemItem]

-- | A gemtext item
data GemItem
    = -- | A regular gemtext line -- @'GemText' \<Text>@
      GemText !Text
    | -- | A gemtext link -- @'GemLink' \<Link> \[Optional Description]@
      GemLink !Text !(Maybe Text)
    | -- | A gemtext heading of 3 levels max -- @'GemHeading' \<Level> \<Text>@
      GemHeading !Int !Text
    | -- | An unordered gemtext list -- @'GemList' \<Lines>@
      GemList ![Text]
    | -- | A gemtext quote -- @'GemQuote' \<Text>@
      GemQuote !Text
    | -- | A preformatted gemtext block -- @'GemPre' \<Lines> [Optional Alt Text]@
      GemPre ![Text] !(Maybe Text)
    deriving (Int -> GemItem -> ShowS
GemDocument -> ShowS
GemItem -> String
(Int -> GemItem -> ShowS)
-> (GemItem -> String) -> (GemDocument -> ShowS) -> Show GemItem
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GemItem -> ShowS
showsPrec :: Int -> GemItem -> ShowS
$cshow :: GemItem -> String
show :: GemItem -> String
$cshowList :: GemDocument -> ShowS
showList :: GemDocument -> ShowS
Show, GemItem -> GemItem -> Bool
(GemItem -> GemItem -> Bool)
-> (GemItem -> GemItem -> Bool) -> Eq GemItem
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GemItem -> GemItem -> Bool
== :: GemItem -> GemItem -> Bool
$c/= :: GemItem -> GemItem -> Bool
/= :: GemItem -> GemItem -> Bool
Eq)


-- | Parse a gemtext block as 'GemDocument'.
--
-- The text should be supplied as a 'Text' which uses LF line breaks.
decode :: Text -> GemDocument
decode :: Text -> GemDocument
decode = GemDocument -> [Text] -> GemDocument
parse [] ([Text] -> GemDocument) -> (Text -> [Text]) -> Text -> GemDocument
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines
  where
    -- Default line-by-line recursive parser with 'GemDocument' carried.
    -- If a list or preformatting is detected, will continue with
    -- @parseList@ and @parsePre@ respectively. Otherwise, will use
    -- 'decodeLine' on the current line only.
    parse :: GemDocument -> [Text] -> GemDocument
    parse :: GemDocument -> [Text] -> GemDocument
parse GemDocument
doc (Text
l : [Text]
ls)
        | Text
l Text -> Text -> Bool
`hasValueOf` Text
"* " = GemDocument -> [Text] -> [Text] -> GemDocument
parseList GemDocument
doc [Int -> Text -> Text
value Int
1 Text
l] [Text]
ls
        | Text
"```" Text -> Text -> Bool
`T.isPrefixOf` Text
l = GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
parsePre GemDocument
doc [] (Text -> Maybe Text
optional (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
value Int
3 Text
l) [Text]
ls
        | Bool
otherwise = GemDocument -> [Text] -> GemDocument
parse (GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [Text -> GemItem
decodeLine Text
l]) [Text]
ls
    parse GemDocument
doc [] = GemDocument
doc

    -- List recursive parser. Continues with @parse@ when the line
    -- is no longer a list item, and adds the 'GemList' to the 'GemDocument'.
    parseList :: GemDocument -> [Text] -> [Text] -> GemDocument
    parseList :: GemDocument -> [Text] -> [Text] -> GemDocument
parseList GemDocument
doc [Text]
glist (Text
l : [Text]
ls)
        | Text
l Text -> Text -> Bool
`hasValueOf` Text
"* " = GemDocument -> [Text] -> [Text] -> GemDocument
parseList GemDocument
doc ([Text]
glist [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Int -> Text -> Text
value Int
1 Text
l]) [Text]
ls
        | Bool
otherwise = GemDocument -> [Text] -> GemDocument
parse (GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> GemItem
GemList [Text]
glist]) (Text
l Text -> [Text] -> [Text]
forall a. a -> [a] -> [a]
: [Text]
ls)
    parseList GemDocument
doc [Text]
glist [] = GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> GemItem
GemList [Text]
glist]

    -- Preformatted recursive parser. Continues with @parse@ when the
    -- closing delimiter is reached, and adds the 'GemPre' to the 'GemDocument'.
    parsePre :: GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
    parsePre :: GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
parsePre GemDocument
doc [Text]
glines Maybe Text
alt (Text
l : [Text]
ls)
        | Text
"```" Text -> Text -> Bool
`T.isPrefixOf` Text
l = GemDocument -> [Text] -> GemDocument
parse (GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> Maybe Text -> GemItem
GemPre [Text]
glines Maybe Text
alt]) [Text]
ls
        | Bool
otherwise = GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
parsePre GemDocument
doc ([Text]
glines [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text
l]) Maybe Text
alt [Text]
ls
    parsePre GemDocument
doc [Text]
glines Maybe Text
alt [] = GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> Maybe Text -> GemItem
GemPre [Text]
glines Maybe Text
alt]

-- | Parse a /single/ gemtext line as 'GemItem'.
--
-- Preformatted text blocks are regarded as 'GemText's as they are strictly multiline.
decodeLine :: Text -> GemItem
decodeLine :: Text -> GemItem
decodeLine Text
line
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
"=>" =
        let (Text
link, Text
desc) = (Char -> Bool) -> Text -> (Text, Text)
T.break Char -> Bool
isSpace (Text -> (Text, Text)) -> Text -> (Text, Text)
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
value Int
2 Text
line
         in Text -> Maybe Text -> GemItem
GemLink Text
link (Maybe Text -> GemItem) -> Maybe Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Maybe Text
optional (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
desc
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
"#" = Text -> GemItem
parseHeading Text
line
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
"* " = [Text] -> GemItem
GemList [Int -> Text -> Text
value Int
1 Text
line]
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
">" = Text -> GemItem
GemQuote (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
value Int
1 Text
line
    | Bool
otherwise = Text -> GemItem
GemText (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.stripEnd Text
line
  where
    -- Parse a gemtext heading.
    -- The max level of a heading is 3.
    parseHeading :: Text -> GemItem
    parseHeading :: Text -> GemItem
parseHeading Text
l
        | Text -> Bool
T.null (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
text = Text -> GemItem
GemText (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
l
        | Bool
otherwise = Int -> Text -> GemItem
GemHeading (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min (Text -> Int
T.length Text
pre) Int
3) (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
text
      where
        (Text
pre, Text
text) = (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#') Text
l


-- | Encode a parsed 'GemDocument' into a gemtext block.
--
-- The output 'Text' uses LF line breaks.
--
-- Valid prefixes are escaped before encoding. See the 'encodeItem' function below.
--
-- Empty lists are ignored if given.
encode :: GemDocument -> Text
encode :: GemDocument -> Text
encode = [Text] -> Text
T.unlines ([Text] -> Text) -> (GemDocument -> [Text]) -> GemDocument -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (GemItem -> Text) -> GemDocument -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map GemItem -> Text
encodeItem (GemDocument -> [Text])
-> (GemDocument -> GemDocument) -> GemDocument -> [Text]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (GemItem -> Bool) -> GemDocument -> GemDocument
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (GemItem -> Bool) -> GemItem -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. GemItem -> Bool
empty)
  where
    empty :: GemItem -> Bool
    empty :: GemItem -> Bool
empty (GemList [Text]
list) = [Text] -> Bool
forall a. [a] -> Bool
forall (t :: * -> *) a. Foldable t => t a -> Bool
null [Text]
list
    empty GemItem
_ = Bool
False

-- | Encode a /single/ parsed 'GemItem' into gemtext.
--
-- /Beware/ that the output text does /not/ end with a newline.
--
-- The output 'Text' might be multiple lines, in which case it uses LF line breaks.
--
-- Valid prefixes are escaped before encoding.
--
-- The spaces in 'GemLink's are replaced with @%20@ to normalize them.
encodeItem :: GemItem -> Text
encodeItem :: GemItem -> Text
encodeItem (GemText Text
line) = Text -> Text
escapePrefixes Text
line
encodeItem (GemLink Text
link Maybe Text
desc) = Text
"=> " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> HasCallStack => Text -> Text -> Text -> Text
Text -> Text -> Text -> Text
T.replace Text
" " Text
"%20" Text
link Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> (Text -> Text) -> Maybe Text -> Text
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Text
"" (Text
" " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<>) Maybe Text
desc
encodeItem (GemHeading Int
level Text
text) = Int -> Text -> Text
T.replicate (Int -> Int -> Int
forall a. Ord a => a -> a -> a
min Int
level Int
3) Text
"#" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
" " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
text
encodeItem (GemList [Text]
list) = Text -> [Text] -> Text
T.intercalate Text
"\n" ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text
"* " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<>) [Text]
list
encodeItem (GemQuote Text
text) = Text
"> " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text
text
encodeItem (GemPre [Text]
text Maybe Text
alt) = Text -> [Text] -> Text
T.intercalate Text
"\n" ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ [Text
"```" Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> Text -> Maybe Text -> Text
forall a. a -> Maybe a -> a
fromMaybe Text
"" Maybe Text
alt] [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text]
text [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text
"```"]


-- Escape the line prefixes by adding a whitespace at the beginning for 'GemText'.
escapePrefixes :: Text -> Text
escapePrefixes :: Text -> Text
escapePrefixes Text
line = Text -> Text -> Bool -> Text
forall a. a -> a -> Bool -> a
bool Text
line (Char -> Text -> Text
T.cons Char
' ' Text
line) Bool
escape
  where
    escape :: Bool
escape = Text
"```" Text -> Text -> Bool
`T.isPrefixOf` Text
line
               Bool -> Bool -> Bool
|| (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> Text -> Bool
hasValueOf Text
line) [Text
"=>", Text
"* ", Text
">"]
               Bool -> Bool -> Bool
|| (Text
line Text -> Text -> Bool
`hasValueOf` Text
"#"
                     Bool -> Bool -> Bool
&& let (Text
_, Text
text) = (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#') Text
line
                         in Bool -> Bool
not (Text -> Bool
T.null (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
text)
                  )


-- @True@ if the the line has prefix /and/ a following value.
hasValueOf :: Text -> Text -> Bool
hasValueOf :: Text -> Text -> Bool
hasValueOf Text
line Text
prefix =
    Text
prefix Text -> Text -> Bool
`T.isPrefixOf` Text
line
        Bool -> Bool -> Bool
&& Bool -> Bool
not (Text -> Bool
T.null (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ Int -> Text -> Text
value (Text -> Int
T.length Text
prefix) Text
line)

-- Get the value of a line.
-- Removes the prefix of given length and strips.
value :: Int -> Text -> Text
value :: Int -> Text -> Text
value Int
prefix = Text -> Text
T.strip (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int -> Text -> Text
T.drop Int
prefix

-- @Nothing@ if the text is empty.
optional :: Text -> Maybe Text
optional :: Text -> Maybe Text
optional Text
text = Maybe Text -> Maybe Text -> Bool -> Maybe Text
forall a. a -> a -> Bool -> a
bool (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
text) Maybe Text
forall a. Maybe a
Nothing (Bool -> Maybe Text) -> Bool -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Text -> Bool
T.null Text
text