{-# LANGUAGE OverloadedStrings #-}

-- |
-- Module      :  Text.Gemini
-- Copyright   :  (c) Sena, 2023
-- License     :  AGPL-3.0-or-later
--
-- Maintainer  :  Sena <jn-sena@proton.me>
-- Stability   :  unstable
-- Portability :  portable
--
-- A tiny @text/gemini@ parser.
--
-- Parses Gemtext documents from and to 'Text'. See the Section 5 of the
-- [Gemini Protocol specification](https://geminiprotocol.net/docs/specification.gmi).

module Text.Gemini
  ( -- * Gemtext Types
    GemDocument
  , GemItem (..)
    -- * Decoding from Text
  , decode
  , decodeLine
    -- * Encoding to Text
  , encode
  , encodeItem
  ) where

import Data.Text.Lazy (Text)
import qualified Data.Text.Lazy as T
import Data.Maybe (maybeToList)
import Data.Char (isSpace)
import Data.Int (Int64)
import Data.Bool (bool)


-- | A Gemtext document, in the form of an ordered list.
type GemDocument = [GemItem]

-- | A Gemtext item.
data GemItem = GemText Text -- ^ A regular Gemtext line. -- @'GemText' \<Text>@
             | GemLink Text (Maybe Text) -- ^ A Gemtext link. -- @'GemLink' \<Link> \[Optional Description]@
             | GemHeading Int64 Text -- ^ A Gemtext heading of 3 levels max. -- @'GemHeading' \<Level> \<Text>@
             | GemList [Text] -- ^ A Gemtext unordered list. -- @'GemList' \<Lines>@
             | GemQuote Text -- ^ A Gemtext quote. -- @'GemQuote' \<Text>@
             | GemPre [Text] (Maybe Text) -- ^ A Gemtext preformat. -- @'GemPre' \<Lines> [Optional Alt Text]@
             deriving (Int -> GemItem -> ShowS
GemDocument -> ShowS
GemItem -> String
(Int -> GemItem -> ShowS)
-> (GemItem -> String) -> (GemDocument -> ShowS) -> Show GemItem
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
$cshowsPrec :: Int -> GemItem -> ShowS
showsPrec :: Int -> GemItem -> ShowS
$cshow :: GemItem -> String
show :: GemItem -> String
$cshowList :: GemDocument -> ShowS
showList :: GemDocument -> ShowS
Show, GemItem -> GemItem -> Bool
(GemItem -> GemItem -> Bool)
-> (GemItem -> GemItem -> Bool) -> Eq GemItem
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
$c== :: GemItem -> GemItem -> Bool
== :: GemItem -> GemItem -> Bool
$c/= :: GemItem -> GemItem -> Bool
/= :: GemItem -> GemItem -> Bool
Eq)


-- | Parse a @text/gemini@ file as 'GemDocument'.
-- The text should be supplied as an LF-ending 'Text'.
decode :: Text -> GemDocument
decode :: Text -> GemDocument
decode = GemDocument -> [Text] -> GemDocument
parse [] ([Text] -> GemDocument) -> (Text -> [Text]) -> Text -> GemDocument
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text]
T.lines
    where -- Default line-by-line recursive parser with 'GemDocument' carried.
          -- If a list or preformatting is detected, will continue with
          -- @parseList@ and @parsePre@ respectively. Otherwise, will use
          -- 'decodeLine' on the current line only.
          parse :: GemDocument -> [Text] -> GemDocument
          parse :: GemDocument -> [Text] -> GemDocument
parse GemDocument
doc (Text
l:[Text]
ls)
              | Text
l Text -> Text -> Bool
`hasValueOf` Text
"* " = GemDocument -> [Text] -> [Text] -> GemDocument
parseList GemDocument
doc [Int64 -> Text -> Text
value Int64
1 Text
l] [Text]
ls
              | Text
"```" Text -> Text -> Bool
`T.isPrefixOf` Text
l = GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
parsePre GemDocument
doc [] (Text -> Maybe Text
optional (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Int64 -> Text -> Text
value Int64
3 Text
l) [Text]
ls
              | Bool
otherwise = GemDocument -> [Text] -> GemDocument
parse (GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [Text -> GemItem
decodeLine Text
l]) [Text]
ls
          parse GemDocument
doc [] = GemDocument
doc

          -- List recursive parser with a list carried. Adds the line to
          -- the list if it starts with list prefix. If the line doesn't
          -- start with the prefix, will end the recursion and continue with
          -- @parse@, with the final list added to the carried 'GemDocument'
          -- as 'GemList'.
          parseList :: GemDocument -> [Text] -> [Text] -> GemDocument
          parseList :: GemDocument -> [Text] -> [Text] -> GemDocument
parseList GemDocument
doc [Text]
glist (Text
l:[Text]
ls)
              | Text
l Text -> Text -> Bool
`hasValueOf` Text
"* " = GemDocument -> [Text] -> [Text] -> GemDocument
parseList GemDocument
doc ([Text]
glist [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Int64 -> Text -> Text
value Int64
1 Text
l]) [Text]
ls
              | Bool
otherwise = GemDocument -> [Text] -> GemDocument
parse (GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> GemItem
GemList [Text]
glist]) (Text
lText -> [Text] -> [Text]
forall a. a -> [a] -> [a]
:[Text]
ls)
          parseList GemDocument
doc [Text]
glist [] = GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> GemItem
GemList [Text]
glist]

          -- Preformatted recursive parser with lines carried. Adds the line to
          -- the list if the ending delimiter hasn't been reached yet. If the line
          -- is the ending with the delimiter, will end the recursion and continue
          -- with @parse@, with the final list of lines added to the carried
          -- 'GemDocument' as 'GemPre'.
          parsePre :: GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
          parsePre :: GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
parsePre GemDocument
doc [Text]
glines Maybe Text
alt (Text
l:[Text]
ls)
              | Text
"```" Text -> Text -> Bool
`T.isPrefixOf` Text
l = GemDocument -> [Text] -> GemDocument
parse (GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> Maybe Text -> GemItem
GemPre [Text]
glines Maybe Text
alt]) [Text]
ls
              | Bool
otherwise = GemDocument -> [Text] -> Maybe Text -> [Text] -> GemDocument
parsePre GemDocument
doc ([Text]
glines [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text
l]) Maybe Text
alt [Text]
ls
          parsePre GemDocument
doc [Text]
glines Maybe Text
alt [] = GemDocument
doc GemDocument -> GemDocument -> GemDocument
forall a. Semigroup a => a -> a -> a
<> [[Text] -> Maybe Text -> GemItem
GemPre [Text]
glines Maybe Text
alt]

-- | Parse a /single/ @text/gemini@ line as 'GemItem'.
--
-- Notes:
--
--     * Isn't able to decode preformatted texts as they are strictly multiline.
--     * 'GemList's are also obviously singleton.
decodeLine :: Text -> GemItem
decodeLine :: Text -> GemItem
decodeLine Text
line
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
"=>" = let (Text
link, Text
desc) = (Char -> Bool) -> Text -> (Text, Text)
T.break Char -> Bool
isSpace (Text -> (Text, Text)) -> Text -> (Text, Text)
forall a b. (a -> b) -> a -> b
$ Int64 -> Text -> Text
value Int64
2 Text
line
                                in Text -> Maybe Text -> GemItem
GemLink Text
link (Maybe Text -> GemItem) -> Maybe Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Maybe Text
optional (Text -> Maybe Text) -> Text -> Maybe Text
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
desc
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
"#" = Text -> GemItem
parseHeading Text
line
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
"* " = [Text] -> GemItem
GemList [Int64 -> Text -> Text
value Int64
1 Text
line]
    | Text
line Text -> Text -> Bool
`hasValueOf` Text
">" = Text -> GemItem
GemQuote (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Int64 -> Text -> Text
value Int64
1 Text
line
    | Bool
otherwise = Text -> GemItem
GemText (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.stripEnd Text
line
    where -- Parse a Gemtext heading.
          -- The max level of a heading is 3.
          parseHeading :: Text -> GemItem
          parseHeading :: Text -> GemItem
parseHeading Text
l
              | Text -> Bool
T.null (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
text = Text -> GemItem
GemText (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
l
              | Bool
otherwise = Int64 -> Text -> GemItem
GemHeading (Int64 -> Int64 -> Int64
forall a. Ord a => a -> a -> a
min (Text -> Int64
T.length Text
pre) Int64
3) (Text -> GemItem) -> Text -> GemItem
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
text
              where (Text
pre, Text
text) = (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'#') Text
l


-- | Encode parsed 'GemDocument' to a @text/gemini@ file.
-- The output 'Text' uses LF-endings.
encode :: GemDocument -> Text
encode :: GemDocument -> Text
encode = [Text] -> Text
T.unlines ([Text] -> Text) -> (GemDocument -> [Text]) -> GemDocument -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([Text] -> GemItem -> [Text]) -> [Text] -> GemDocument -> [Text]
forall b a. (b -> a -> b) -> b -> [a] -> b
forall (t :: * -> *) b a.
Foldable t =>
(b -> a -> b) -> b -> t a -> b
foldl (\[Text]
acc GemItem
x -> [Text]
acc [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [GemItem -> Text
encodeItem GemItem
x]) []

-- | Encode a /single/ parsed 'GemItem' as @text/gemini@ text.
-- The output 'Text' uses LF-endings and might be multiple lines.
--
-- /Beware/ that the final newline is always stripped, if any.
--
-- Notes:
--
--     * The text of the 'GemText' will follow a whitespace to escape the prefix
--     at the beginning if the line starts with a valid one.
--     * The link in the 'GemLink' should not have spaces.
encodeItem :: GemItem -> Text
encodeItem :: GemItem -> Text
encodeItem (GemText Text
line) = Text -> Text
escapePrefixes Text
line
encodeItem (GemLink Text
link Maybe Text
desc) = [Text] -> Text
T.unwords ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ [Text
"=>", Text
link] [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> Maybe Text -> [Text]
forall a. Maybe a -> [a]
maybeToList Maybe Text
desc
encodeItem (GemHeading Int64
level Text
text) = [Text] -> Text
T.unwords [Int64 -> Text -> Text
T.replicate (Int64 -> Int64 -> Int64
forall a. Ord a => a -> a -> a
min Int64
level Int64
3) Text
"#", Text
text]
encodeItem (GemList [Text]
list) = Text -> Text
T.stripEnd (Text -> Text) -> ([Text] -> Text) -> [Text] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
T.unlines ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ (Text -> Text) -> [Text] -> [Text]
forall a b. (a -> b) -> [a] -> [b]
map (Text -> Text -> Text
T.append Text
"* ") [Text]
list
encodeItem (GemQuote Text
text) = [Text] -> Text
T.unwords [Text
">", Text
text]
encodeItem (GemPre [Text]
text Maybe Text
alt) = Text -> Text
T.stripEnd (Text -> Text) -> ([Text] -> Text) -> [Text] -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [Text] -> Text
T.unlines ([Text] -> Text) -> [Text] -> Text
forall a b. (a -> b) -> a -> b
$ [[Text] -> Text
T.concat ([Text
"```"] [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> Maybe Text -> [Text]
forall a. Maybe a -> [a]
maybeToList Maybe Text
alt)] [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text]
text [Text] -> [Text] -> [Text]
forall a. Semigroup a => a -> a -> a
<> [Text
"```"]

-- Escape the line prefixes by adding a whitespace at the beginning for 'GemText'.
escapePrefixes :: Text -> Text
escapePrefixes :: Text -> Text
escapePrefixes Text
line = Text -> Text -> Bool -> Text
forall a. a -> a -> Bool -> a
bool Text
line (Char -> Text -> Text
T.cons Char
' ' Text
line) Bool
escape
    where escape :: Bool
escape = Text
"```" Text -> Text -> Bool
`T.isPrefixOf` Text
line Bool -> Bool -> Bool
||
                   (Text -> Bool) -> [Text] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (Text -> Text -> Bool
hasValueOf Text
line) [Text
"=>", Text
"* ", Text
">"] Bool -> Bool -> Bool
||
                   (Text
line Text -> Text -> Bool
`hasValueOf` Text
"#" Bool -> Bool -> Bool
&& let (Text
_, Text
text) = (Char -> Bool) -> Text -> (Text, Text)
T.span (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
'#') Text
line
                                              in Bool -> Bool
not (Text -> Bool
T.null (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ Text -> Text
T.strip Text
text))

-- @True@ if the the line has prefix /and/ a following value.
hasValueOf :: Text -> Text -> Bool
hasValueOf :: Text -> Text -> Bool
hasValueOf Text
line Text
prefix = Text
prefix Text -> Text -> Bool
`T.isPrefixOf` Text
line Bool -> Bool -> Bool
&&
                         Bool -> Bool
not (Text -> Bool
T.null (Text -> Bool) -> Text -> Bool
forall a b. (a -> b) -> a -> b
$ Int64 -> Text -> Text
value (Text -> Int64
T.length Text
prefix) Text
line)

-- Get the value of a line.
-- Removes the prefix and strips.
value :: Int64 -> Text -> Text
value :: Int64 -> Text -> Text
value Int64
prefix = Text -> Text
T.strip (Text -> Text) -> (Text -> Text) -> Text -> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Int64 -> Text -> Text
T.drop Int64
prefix

-- @Nothing@ if the text is empty.
optional :: Text -> Maybe Text
optional :: Text -> Maybe Text
optional Text
text = if Text -> Bool
T.null Text
text then Maybe Text
forall a. Maybe a
Nothing else Text -> Maybe Text
forall a. a -> Maybe a
Just Text
text