{-# LANGUAGE ConstraintKinds #-}
{-# LANGUAGE DeriveAnyClass #-}
{-# LANGUAGE RankNTypes #-}
{-# LANGUAGE TypeApplications #-}
{-# LANGUAGE TypeFamilies #-}
{-# LANGUAGE UndecidableInstances #-}

-- | Helper to deal with Markdown files
--
-- TODO: Publish this eventually to Hackage, along with wiki-link stuff from
-- emanote (maybe as separate package).
module Ema.Helper.Markdown
  ( -- Parsing
    -- TODO: Publish to Hackage as commonmark-pandoc-simple?
    parseMarkdownWithFrontMatter,
    parseMarkdown,
    fullMarkdownSpec,
    -- Utilities
    plainify,
  )
where

import qualified Commonmark as CM
import qualified Commonmark.Extensions as CE
import qualified Commonmark.Pandoc as CP
import Control.Monad.Combinators (manyTill)
import Data.Aeson (FromJSON)
import qualified Data.Yaml as Y
import qualified Text.Megaparsec as M
import qualified Text.Megaparsec.Char as M
import qualified Text.Pandoc.Builder as B
import Text.Pandoc.Definition (Pandoc (..))
import qualified Text.Pandoc.Walk as W

-- | Parse a Markdown file using commonmark-hs with all extensions enabled
parseMarkdownWithFrontMatter ::
  forall meta m il bl.
  ( FromJSON meta,
    m ~ Either CM.ParseError,
    bl ~ CP.Cm () B.Blocks,
    il ~ CP.Cm () B.Inlines
  ) =>
  CM.SyntaxSpec m il bl ->
  -- | Path to file associated with this Markdown
  FilePath ->
  -- | Markdown text to parse
  Text ->
  Either Text (Maybe meta, Pandoc)
parseMarkdownWithFrontMatter :: SyntaxSpec m il bl
-> FilePath -> Text -> Either Text (Maybe meta, Pandoc)
parseMarkdownWithFrontMatter SyntaxSpec m il bl
spec FilePath
fn Text
s = do
  (Maybe Text
mMeta, Text
markdown) <- FilePath -> Text -> Either Text (Maybe Text, Text)
partitionMarkdown FilePath
fn Text
s
  Maybe meta
mMetaVal <- (ParseException -> Text)
-> Either ParseException (Maybe meta) -> Either Text (Maybe meta)
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first ParseException -> Text
forall b a. (Show a, IsString b) => a -> b
show (Either ParseException (Maybe meta) -> Either Text (Maybe meta))
-> Either ParseException (Maybe meta) -> Either Text (Maybe meta)
forall a b. (a -> b) -> a -> b
$ (ByteString -> Either ParseException meta
forall a. FromJSON a => ByteString -> Either ParseException a
Y.decodeEither' (ByteString -> Either ParseException meta)
-> (Text -> ByteString) -> Text -> Either ParseException meta
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
forall a b. ConvertUtf8 a b => a -> b
encodeUtf8) (Text -> Either ParseException meta)
-> Maybe Text -> Either ParseException (Maybe meta)
forall (t :: * -> *) (f :: * -> *) a b.
(Traversable t, Applicative f) =>
(a -> f b) -> t a -> f (t b)
`traverse` Maybe Text
mMeta
  bl
blocks <- (ParseError -> Text) -> Either ParseError bl -> Either Text bl
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first ParseError -> Text
forall b a. (Show a, IsString b) => a -> b
show (Either ParseError bl -> Either Text bl)
-> Either ParseError bl -> Either Text bl
forall a b. (a -> b) -> a -> b
$ Either ParseError (Either ParseError bl) -> Either ParseError bl
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Either ParseError (Either ParseError bl) -> Either ParseError bl)
-> Either ParseError (Either ParseError bl) -> Either ParseError bl
forall a b. (a -> b) -> a -> b
$ SyntaxSpec (Either ParseError) il bl
-> FilePath -> Text -> Either ParseError (Either ParseError bl)
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl, IsInline il) =>
SyntaxSpec m il bl -> FilePath -> Text -> m (Either ParseError bl)
CM.commonmarkWith @(Either CM.ParseError) SyntaxSpec m il bl
SyntaxSpec (Either ParseError) il bl
spec FilePath
fn Text
markdown
  let doc :: Pandoc
doc = Meta -> [Block] -> Pandoc
Pandoc Meta
forall a. Monoid a => a
mempty ([Block] -> Pandoc) -> [Block] -> Pandoc
forall a b. (a -> b) -> a -> b
$ Many Block -> [Block]
forall a. Many a -> [a]
B.toList (Many Block -> [Block])
-> (Cm () (Many Block) -> Many Block)
-> Cm () (Many Block)
-> [Block]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Cm () (Many Block) -> Many Block
forall b a. Cm b a -> a
CP.unCm @() @B.Blocks (Cm () (Many Block) -> [Block]) -> Cm () (Many Block) -> [Block]
forall a b. (a -> b) -> a -> b
$ bl
Cm () (Many Block)
blocks
  (Maybe meta, Pandoc) -> Either Text (Maybe meta, Pandoc)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe meta
mMetaVal, Pandoc
doc)

parseMarkdown :: FilePath -> Text -> Either Text Pandoc
parseMarkdown :: FilePath -> Text -> Either Text Pandoc
parseMarkdown FilePath
fn Text
s = do
  Cm () (Many Block)
cmBlocks <- (ParseError -> Text)
-> Either ParseError (Cm () (Many Block))
-> Either Text (Cm () (Many Block))
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first ParseError -> Text
forall b a. (Show a, IsString b) => a -> b
show (Either ParseError (Cm () (Many Block))
 -> Either Text (Cm () (Many Block)))
-> Either ParseError (Cm () (Many Block))
-> Either Text (Cm () (Many Block))
forall a b. (a -> b) -> a -> b
$ Either ParseError (Either ParseError (Cm () (Many Block)))
-> Either ParseError (Cm () (Many Block))
forall (m :: * -> *) a. Monad m => m (m a) -> m a
join (Either ParseError (Either ParseError (Cm () (Many Block)))
 -> Either ParseError (Cm () (Many Block)))
-> Either ParseError (Either ParseError (Cm () (Many Block)))
-> Either ParseError (Cm () (Many Block))
forall a b. (a -> b) -> a -> b
$ SyntaxSpec (Either ParseError) (Cm () Inlines) (Cm () (Many Block))
-> FilePath
-> Text
-> Either ParseError (Either ParseError (Cm () (Many Block)))
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl, IsInline il) =>
SyntaxSpec m il bl -> FilePath -> Text -> m (Either ParseError bl)
CM.commonmarkWith @(Either CM.ParseError) SyntaxSpec (Either ParseError) (Cm () Inlines) (Cm () (Many Block))
forall (m :: * -> *) il bl.
SyntaxSpec' m il bl =>
SyntaxSpec m il bl
fullMarkdownSpec FilePath
fn Text
s
  let blocks :: [Block]
blocks = Many Block -> [Block]
forall a. Many a -> [a]
B.toList (Many Block -> [Block])
-> (Cm () (Many Block) -> Many Block)
-> Cm () (Many Block)
-> [Block]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Cm () (Many Block) -> Many Block
forall b a. Cm b a -> a
CP.unCm @() @B.Blocks (Cm () (Many Block) -> [Block]) -> Cm () (Many Block) -> [Block]
forall a b. (a -> b) -> a -> b
$ Cm () (Many Block)
cmBlocks
  Pandoc -> Either Text Pandoc
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Pandoc -> Either Text Pandoc) -> Pandoc -> Either Text Pandoc
forall a b. (a -> b) -> a -> b
$ Meta -> [Block] -> Pandoc
Pandoc Meta
forall a. Monoid a => a
mempty [Block]
blocks

type SyntaxSpec' m il bl =
  ( Monad m,
    CM.IsBlock il bl,
    CM.IsInline il,
    Typeable m,
    Typeable il,
    Typeable bl,
    CE.HasEmoji il,
    CE.HasStrikethrough il,
    CE.HasPipeTable il bl,
    CE.HasTaskList il bl,
    CM.ToPlainText il,
    CE.HasFootnote il bl,
    CE.HasMath il,
    CE.HasDefinitionList il bl,
    CE.HasDiv bl,
    CE.HasQuoted il,
    CE.HasSpan il
  )

-- | GFM + official commonmark extensions
fullMarkdownSpec ::
  SyntaxSpec' m il bl =>
  CM.SyntaxSpec m il bl
fullMarkdownSpec :: SyntaxSpec m il bl
fullMarkdownSpec =
  [SyntaxSpec m il bl] -> SyntaxSpec m il bl
forall a. Monoid a => [a] -> a
mconcat
    [ SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, Typeable m, Typeable il, Typeable bl, IsBlock il bl,
 IsInline il, HasFootnote il bl, HasEmoji il, HasStrikethrough il,
 HasPipeTable il bl, HasTaskList il bl, ToPlainText il) =>
SyntaxSpec m il bl
CE.gfmExtensions,
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl, IsInline il) =>
SyntaxSpec m il bl
CE.fancyListSpec,
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, Typeable m, IsBlock il bl, IsInline il, Typeable il,
 Typeable bl, HasFootnote il bl) =>
SyntaxSpec m il bl
CE.footnoteSpec,
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl, IsInline il, HasMath il) =>
SyntaxSpec m il bl
CE.mathSpec,
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl, IsInline il, HasQuoted il) =>
SyntaxSpec m il bl
CE.smartPunctuationSpec,
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl, IsInline il, Typeable il, Typeable bl,
 HasDefinitionList il bl) =>
SyntaxSpec m il bl
CE.definitionListSpec,
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, IsInline il) =>
SyntaxSpec m il bl
CE.attributesSpec,
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl) =>
SyntaxSpec m il bl
CE.rawAttributeSpec,
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, IsInline il, IsBlock il bl, HasDiv bl) =>
SyntaxSpec m il bl
CE.fencedDivSpec,
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, IsInline il, HasSpan il) =>
SyntaxSpec m il bl
CE.bracketedSpanSpec,
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl, IsInline il) =>
SyntaxSpec m il bl
CE.autolinkSpec,
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl, IsInline il) =>
SyntaxSpec m il bl
CM.defaultSyntaxSpec,
      -- as the commonmark documentation states, pipeTableSpec should be placed after
      -- fancyListSpec and defaultSyntaxSpec to avoid bad results when parsing
      -- non-table lines
      SyntaxSpec m il bl
forall (m :: * -> *) il bl.
(Monad m, IsBlock il bl, IsInline il, HasPipeTable il bl) =>
SyntaxSpec m il bl
CE.pipeTableSpec
    ]

-- | Identify metadata block at the top, and split it from markdown body.
--
-- FIXME: https://github.com/srid/neuron/issues/175
partitionMarkdown :: FilePath -> Text -> Either Text (Maybe Text, Text)
partitionMarkdown :: FilePath -> Text -> Either Text (Maybe Text, Text)
partitionMarkdown =
  Parsec Void Text (Maybe Text, Text)
-> FilePath -> Text -> Either Text (Maybe Text, Text)
forall a. Parsec Void Text a -> FilePath -> Text -> Either Text a
parse (Parsec Void Text (Maybe Text, Text)
-> Parsec Void Text (Maybe Text, Text)
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
M.try Parsec Void Text (Maybe Text, Text)
splitP Parsec Void Text (Maybe Text, Text)
-> Parsec Void Text (Maybe Text, Text)
-> Parsec Void Text (Maybe Text, Text)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Text -> (Maybe Text, Text))
-> ParsecT Void Text Identity Text
-> Parsec Void Text (Maybe Text, Text)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Maybe Text
forall a. Maybe a
Nothing,) ParsecT Void Text Identity Text
forall e s (m :: * -> *). MonadParsec e s m => m (Tokens s)
M.takeRest)
  where
    separatorP :: M.Parsec Void Text ()
    separatorP :: Parsec Void Text ()
separatorP =
      ParsecT Void Text Identity Text -> Parsec Void Text ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (ParsecT Void Text Identity Text -> Parsec Void Text ())
-> ParsecT Void Text Identity Text -> Parsec Void Text ()
forall a b. (a -> b) -> a -> b
$ Tokens Text -> ParsecT Void Text Identity (Tokens Text)
forall e s (m :: * -> *).
MonadParsec e s m =>
Tokens s -> m (Tokens s)
M.string Tokens Text
"---" ParsecT Void Text Identity Text
-> ParsecT Void Text Identity Text
-> ParsecT Void Text Identity Text
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* ParsecT Void Text Identity Text
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
M.eol
    splitP :: M.Parsec Void Text (Maybe Text, Text)
    splitP :: Parsec Void Text (Maybe Text, Text)
splitP = do
      Parsec Void Text ()
separatorP
      Text
a <- FilePath -> Text
forall a. ToText a => a -> Text
toText (FilePath -> Text)
-> ParsecT Void Text Identity FilePath
-> ParsecT Void Text Identity Text
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> ParsecT Void Text Identity Char
-> Parsec Void Text () -> ParsecT Void Text Identity FilePath
forall (m :: * -> *) a end. MonadPlus m => m a -> m end -> m [a]
manyTill ParsecT Void Text Identity Char
forall e s (m :: * -> *). MonadParsec e s m => m (Token s)
M.anySingle (Parsec Void Text () -> Parsec Void Text ()
forall e s (m :: * -> *) a. MonadParsec e s m => m a -> m a
M.try (Parsec Void Text () -> Parsec Void Text ())
-> Parsec Void Text () -> Parsec Void Text ()
forall a b. (a -> b) -> a -> b
$ ParsecT Void Text Identity Text
forall e s (m :: * -> *).
(MonadParsec e s m, Token s ~ Char) =>
m (Tokens s)
M.eol ParsecT Void Text Identity Text
-> Parsec Void Text () -> Parsec Void Text ()
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f b
*> Parsec Void Text ()
separatorP)
      Text
b <- ParsecT Void Text Identity Text
forall e s (m :: * -> *). MonadParsec e s m => m (Tokens s)
M.takeRest
      (Maybe Text, Text) -> Parsec Void Text (Maybe Text, Text)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Text -> Maybe Text
forall a. a -> Maybe a
Just Text
a, Text
b)
    parse :: M.Parsec Void Text a -> String -> Text -> Either Text a
    parse :: Parsec Void Text a -> FilePath -> Text -> Either Text a
parse Parsec Void Text a
p FilePath
fn Text
s =
      (ParseErrorBundle Text Void -> Text)
-> Either (ParseErrorBundle Text Void) a -> Either Text a
forall (p :: * -> * -> *) a b c.
Bifunctor p =>
(a -> b) -> p a c -> p b c
first (FilePath -> Text
forall a. ToText a => a -> Text
toText (FilePath -> Text)
-> (ParseErrorBundle Text Void -> FilePath)
-> ParseErrorBundle Text Void
-> Text
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ParseErrorBundle Text Void -> FilePath
forall s e.
(VisualStream s, TraversableStream s, ShowErrorComponent e) =>
ParseErrorBundle s e -> FilePath
M.errorBundlePretty) (Either (ParseErrorBundle Text Void) a -> Either Text a)
-> Either (ParseErrorBundle Text Void) a -> Either Text a
forall a b. (a -> b) -> a -> b
$
        Parsec Void Text a
-> FilePath -> Text -> Either (ParseErrorBundle Text Void) a
forall e s a.
Parsec e s a -> FilePath -> s -> Either (ParseErrorBundle s e) a
M.parse (Parsec Void Text a
p Parsec Void Text a -> Parsec Void Text () -> Parsec Void Text a
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parsec Void Text ()
forall e s (m :: * -> *). MonadParsec e s m => m ()
M.eof) FilePath
fn Text
s

-- | Convert Pandoc AST inlines to raw text.
plainify :: [B.Inline] -> Text
plainify :: [Inline] -> Text
plainify = (Inline -> Text) -> [Inline] -> Text
forall a b c. (Walkable a b, Monoid c) => (a -> c) -> b -> c
W.query ((Inline -> Text) -> [Inline] -> Text)
-> (Inline -> Text) -> [Inline] -> Text
forall a b. (a -> b) -> a -> b
$ \case
  B.Str Text
x -> Text
x
  B.Code Attr
_attr Text
x -> Text
x
  Inline
B.Space -> Text
" "
  Inline
B.SoftBreak -> Text
" "
  Inline
B.LineBreak -> Text
" "
  B.RawInline Format
_fmt Text
s -> Text
s
  B.Math MathType
_mathTyp Text
s -> Text
s
  -- Ignore the rest of AST nodes, as they are recursively defined in terms of
  -- `Inline` which `W.query` will traverse again.
  Inline
_ -> Text
""