{-# LANGUAGE
TemplateHaskell,
RecordWildCards,
DeriveFunctor,
DeriveFoldable,
DeriveTraversable,
OverloadedStrings,
NoImplicitPrelude
  #-}


{- |
This library lets you parse Markdown into a hierarchical structure (delimited by headings). For instance, let's say your document looks like this:

@
This is the preface.

First chapter
========================================

This chapter doesn't have sections.

Second chapter
========================================

First section
--------------------

Here's some text.

Second section
--------------------

And more text.
@

It can be represented as a tree:

@
'preface': "This is the preface."
'sections':
    * 'heading': __"First chapter"__
      'content': "This chapter doesn't have sections."
      'sections': []

    * 'heading': __"Second chapter"__
      'sections':
          * 'heading': __"First section"__
            'content': "Here's some text."
            'sections': []

          * 'heading': __"Second section"__
            'content': "And more text."
            'sections': []
@

That's what this library does. Moreover, it lets you access the Markdown source of every node of the tree.

In most cases the only thing you need to do is something like this:

@
'nodesToDocument' . 'commonmarkToAnnotatedNodes' ['optSafe', 'optNormalize']
@

You can preprocess parsed Markdown after doing 'commonmarkToAnnotatedNodes' as long as you don't add or remove any top-level nodes.
-}
module CMark.Sections
(
  -- * Parse Markdown to trees
  commonmarkToAnnotatedNodes,
  nodesToDocument,
  Annotated(..),
  Section(..),
  Document(..),

  -- * Work with parsed trees
  -- $monoid-note
  flattenDocument,
  flattenSection,
  flattenTree,
  flattenForest,
)
where


import BasePrelude
-- Lenses
import Lens.Micro hiding ((&))
-- Text
import qualified Data.Text as T
import Data.Text (Text)
-- Markdown
import CMark
-- Containers
import qualified Data.Tree as Tree
-- Lists
import Data.List.Split


{- |
A data type for annotating things with their source. In this library we only use @Annotated [Node]@, which stands for “some Markdown nodes + source”.
-}
data Annotated a = Ann {
  annSource :: Text,
  annValue  :: a }
  deriving (Eq, Show, Functor, Foldable, Traversable)

instance Monoid a => Monoid (Annotated a) where
  mempty = Ann "" mempty
  Ann s1 v1 `mappend` Ann s2 v2 = Ann (s1 <> s2) (v1 <> v2)

{- |
A section in the Markdown tree. Does not contain subsections (the tree is built using 'Tree.Forest' from "Data.Tree").
-}
data Section a b = Section {
  -- | Level (from 1 to 6).
  level      :: Int,
  heading    :: Annotated [Node],
  headingAnn :: a,
  -- | Text between the heading and the first subsection. Can be empty.
  content    :: Annotated [Node],
  contentAnn :: b }
  deriving (Eq, Show)

{- |
The whole parsed Markdown tree. The first parameter is the type of annotations for headings (i.e. sections), the second – chunks of text (which are all associated with sections except for the preface).
-}
data Document a b = Document {
  -- | Text before the first section. Can be empty.
  preface    :: Annotated [Node],
  prefaceAnn :: b,
  sections   :: Tree.Forest (Section a b) }
  deriving (Eq, Show)

{- |
'commonmarkToAnnotatedNodes' parses Markdown with the given options and extracts nodes from the initial 'DOCUMENT' node.
-}
commonmarkToAnnotatedNodes :: [CMarkOption] -> Text -> Annotated [Node]
commonmarkToAnnotatedNodes opts s = Ann s ns
  where
    Node _ DOCUMENT ns = commonmarkToNode opts s

{- | Break Markdown into pieces:

@
    blah blah blah               }
                                 }----> init
    blah blah blah               }

    # foo                        }
                                 }
    blah blah                    }----> (heading, blocks after)
                                 }
    blah blah                    }

    ## bar                       }
                                 }----> (heading, blocks after)
    blah blah                    }

    ...
@
-}
breakAtHeadings
  :: [Node]
  -> ([Node], [(Node, [Node])])     -- ^ (blocks before the first heading,
                                    --    headings + blocks after)
breakAtHeadings nodes =
  let (init':rest') = split (keepDelimsL (whenElt isHeading)) nodes
  in  (init', map (fromJust . uncons) rest')
  where
    isHeading (Node _ (HEADING _) _) = True
    isHeading _ = False

-- | Get start line of a node.
start :: Node -> Int
start (Node (Just p) _ _) = startLine p
start (Node Nothing  _ _) =
  error "CMark.Sections.start: node doesn't have a position"

-- We assume here that two top-level blocks can't possibly be on the same line.
cut
  :: Node      -- ^ First node to include
  -> Node      -- ^ First node to exclude
  -> Text
  -> Text
cut a b = T.unlines . take (start b - start a) . drop (start a - 1) . T.lines

cutTo
  :: Node
  -> Text
  -> Text
cutTo b = T.unlines . take (start b - 1) . T.lines

cutFrom
  :: Node
  -> Text
  -> Text
cutFrom a = T.unlines . drop (start a - 1) . T.lines

{- |
Turn a list of Markdown nodes into a tree.
-}
nodesToDocument :: Annotated [Node] -> Document () ()
nodesToDocument (Ann src nodes) = do
  -- Break at headings
  let prefaceNodes :: [Node]
      restNodes :: [(Node, [Node])]
      (prefaceNodes, restNodes) = breakAtHeadings nodes
  -- Annotate the first block with the source. If there are no headings at
  -- all, we just copy everything; otherwise we cut until the first heading.
  let prefaceAnnotated :: Annotated [Node]
      prefaceAnnotated = case restNodes of
        []    -> Ann src prefaceNodes
        (x:_) -> Ann (cutTo (fst x) src) prefaceNodes
  -- Annotate other blocks with their sources by cutting until the position
  -- of the next block
  let blocks :: [((Int, Annotated [Node]), Annotated [Node])]
      blocks = do
        ((heading, afterBlocks), mbNext) <-
            zip restNodes (tail (map Just restNodes ++ [Nothing]))
        let Node _ (HEADING hLevel) hNodes = heading
        let hSrc = case (afterBlocks, mbNext) of
              (x:_, _)          -> cut heading x src
              ([], Just (x, _)) -> cut heading x src
              ([], Nothing)     -> cutFrom heading src
        let afterBlocksSrc = case (afterBlocks, mbNext) of
              ([], _)            -> ""
              (x:_, Just (y, _)) -> cut x y src
              (x:_, Nothing)     -> cutFrom x src
        return ((hLevel, Ann hSrc hNodes),
                Ann afterBlocksSrc afterBlocks)
  -- A function for turning blocks into a tree
  let makeTree [] = []
      makeTree (((level, heading), content) : xs) =
        let (nested, others) = span (\x -> x^._1._1 > level) xs
            section = Section {
              headingAnn = (),
              contentAnn = (),
              .. }
        in  Tree.Node section (makeTree nested) : makeTree others
  -- Return the result
  Document {
    preface    = prefaceAnnotated,
    prefaceAnn = (),
    sections   = makeTree blocks }

{- $monoid-note

Note that you can use ('<>') to combine 'Annotated' nodes together.
-}

flattenDocument :: Document a b -> Annotated [Node]
flattenDocument Document{..} = preface <> flattenForest sections

flattenSection :: Section a b -> Annotated [Node]
flattenSection Section{..} =
  Ann (annSource heading <> annSource content)
      (headingNode : annValue content)
  where
    headingNode = Node Nothing (HEADING level) (annValue heading)

flattenTree :: Tree.Tree (Section a b) -> Annotated [Node]
flattenTree (Tree.Node r f) = flattenSection r <> flattenForest f

flattenForest :: Tree.Forest (Section a b) -> Annotated [Node]
flattenForest = mconcat . map flattenSection . concatMap Tree.flatten