{-# LANGUAGE NoImplicitPrelude #-}
{-# LANGUAGE CPP                        #-}
{-# LANGUAGE DeriveDataTypeable         #-}
{-# LANGUAGE DeriveGeneric              #-}
{-# LANGUAGE GeneralizedNewtypeDeriving #-}
{-# LANGUAGE TemplateHaskell            #-}
{-
Copyright (C) 2012-2018 John MacFarlane <jgm@berkeley.edu>

This program is free software; you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation; either version 2 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program; if not, write to the Free Software
Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
-}

{- |
   Module      : Text.Pandoc.Extensions
   Copyright   : Copyright (C) 2012-2018 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

Data structures and functions for representing markup extensions.
-}
module Text.Pandoc.Extensions ( Extension(..)
                              , Extensions
                              , emptyExtensions
                              , extensionsFromList
                              , parseFormatSpec
                              , extensionEnabled
                              , enableExtension
                              , disableExtension
                              , getDefaultExtensions
                              , pandocExtensions
                              , plainExtensions
                              , strictExtensions
                              , phpMarkdownExtraExtensions
                              , githubMarkdownExtensions
                              , multimarkdownExtensions )
where
import Prelude
import Data.Aeson (FromJSON (..), ToJSON (..), defaultOptions)
import Data.Aeson.TH (deriveJSON)
import Data.Bits (clearBit, setBit, testBit, (.|.))
import Data.Data (Data)
import Data.Typeable (Typeable)
import GHC.Generics (Generic)
import Text.Pandoc.Shared (safeRead)
import Text.Parsec

newtype Extensions = Extensions Integer
  deriving (Show, Read, Eq, Ord, Data, Typeable, Generic, ToJSON, FromJSON)

instance Semigroup Extensions where
  (Extensions a) <> (Extensions b) = Extensions (a .|. b)
instance Monoid Extensions where
  mempty = Extensions 0
  mappend = (<>)

extensionsFromList :: [Extension] -> Extensions
extensionsFromList = foldr enableExtension emptyExtensions

emptyExtensions :: Extensions
emptyExtensions = Extensions 0

extensionEnabled :: Extension -> Extensions -> Bool
extensionEnabled x (Extensions exts) = testBit exts (fromEnum x)

enableExtension :: Extension -> Extensions -> Extensions
enableExtension x (Extensions exts) = Extensions (setBit exts (fromEnum x))

disableExtension :: Extension -> Extensions -> Extensions
disableExtension x (Extensions exts) = Extensions (clearBit exts (fromEnum x))

-- | Individually selectable syntax extensions.
data Extension =
      Ext_abbreviations       -- ^ PHP markdown extra abbreviation definitions
    | Ext_all_symbols_escapable  -- ^ Make all non-alphanumerics escapable
    | Ext_amuse -- ^ Enable Text::Amuse extensions to Emacs Muse markup
    | Ext_angle_brackets_escapable  -- ^ Make < and > escapable
    | Ext_ascii_identifiers   -- ^ ascii-only identifiers for headers
    | Ext_auto_identifiers    -- ^ Automatic identifiers for headers
    | Ext_autolink_bare_uris  -- ^ Make all absolute URIs into links
    | Ext_backtick_code_blocks    -- ^ GitHub style ``` code blocks
    | Ext_blank_before_blockquote -- ^ Require blank line before a blockquote
    | Ext_blank_before_header     -- ^ Require blank line before a header
    | Ext_bracketed_spans         -- ^ Bracketed spans with attributes
    | Ext_citations           -- ^ Pandoc/citeproc citations
    | Ext_compact_definition_lists  -- ^ Definition lists without space between items,
                                    --   and disallow laziness
    | Ext_definition_lists    -- ^ Definition lists as in pandoc, mmd, php
    | Ext_east_asian_line_breaks  -- ^ Newlines in paragraphs are ignored between
                                  --   East Asian wide characters. Note: this extension
                                  --   does not affect readers/writers directly; it causes
                                  --   the eastAsianLineBreakFilter to be applied after
                                  --   parsing, in Text.Pandoc.App.convertWithOpts.
    | Ext_emoji               -- ^ Support emoji like :smile:
    | Ext_empty_paragraphs -- ^ Allow empty paragraphs
    | Ext_epub_html_exts      -- ^ Recognise the EPUB extended version of HTML
    | Ext_escaped_line_breaks     -- ^ Treat a backslash at EOL as linebreak
    | Ext_example_lists       -- ^ Markdown-style numbered examples
    | Ext_fancy_lists         -- ^ Enable fancy list numbers and delimiters
    | Ext_fenced_code_attributes  -- ^ Allow attributes on fenced code blocks
    | Ext_fenced_code_blocks  -- ^ Parse fenced code blocks
    | Ext_fenced_divs             -- ^ Allow fenced div syntax :::
    | Ext_footnotes           -- ^ Pandoc\/PHP\/MMD style footnotes
    | Ext_four_space_rule     -- ^ Require 4-space indent for list contents
    | Ext_gfm_auto_identifiers  -- ^ Automatic identifiers for headers, using
                                --  GitHub's method for generating identifiers
    | Ext_grid_tables         -- ^ Grid tables (pandoc, reST)
    | Ext_hard_line_breaks    -- ^ All newlines become hard line breaks
    | Ext_header_attributes   -- ^ Explicit header attributes {#id .class k=v}
    | Ext_ignore_line_breaks  -- ^ Newlines in paragraphs are ignored
    | Ext_implicit_figures    -- ^ A paragraph with just an image is a figure
    | Ext_implicit_header_references -- ^ Implicit reference links for headers
    | Ext_inline_code_attributes  -- ^ Allow attributes on inline code
    | Ext_inline_notes        -- ^ Pandoc-style inline notes
    | Ext_intraword_underscores  -- ^ Treat underscore inside word as literal
    | Ext_latex_macros        -- ^ Parse LaTeX macro definitions (for math only)
    | Ext_line_blocks         -- ^ RST style line blocks
    | Ext_link_attributes         -- ^ link and image attributes
    | Ext_lists_without_preceding_blankline -- ^ Allow lists without preceding blank
    | Ext_literate_haskell    -- ^ Enable literate Haskell conventions
    | Ext_markdown_attribute      -- ^ Interpret text inside HTML as markdown iff
                                  --   container has attribute 'markdown'
    | Ext_markdown_in_html_blocks -- ^ Interpret as markdown inside HTML blocks
    | Ext_mmd_header_identifiers -- ^ Multimarkdown style header identifiers [myid]
    | Ext_mmd_link_attributes     -- ^ MMD style reference link attributes
    | Ext_mmd_title_block     -- ^ Multimarkdown metadata block
    | Ext_multiline_tables    -- ^ Pandoc-style multiline tables
    | Ext_native_divs             -- ^ Use Div blocks for contents of <div> tags
    | Ext_native_spans            -- ^ Use Span inlines for contents of <span>
    | Ext_ntb                 -- ^ ConTeXt Natural Tables
    | Ext_old_dashes          -- ^ -- = em, - before number = en
    | Ext_pandoc_title_block  -- ^ Pandoc title block
    | Ext_pipe_tables         -- ^ Pipe tables (as in PHP markdown extra)
    | Ext_raw_attribute           -- ^ Allow explicit raw blocks/inlines
    | Ext_raw_html            -- ^ Allow raw HTML
    | Ext_raw_tex             -- ^ Allow raw TeX (other than math)
    | Ext_shortcut_reference_links -- ^ Shortcut reference links
    | Ext_simple_tables       -- ^ Pandoc-style simple tables
    | Ext_smart               -- ^ "Smart" quotes, apostrophes, ellipses, dashes
    | Ext_space_in_atx_header -- ^ Require space between # and header text
    | Ext_spaced_reference_links -- ^ Allow space between two parts of ref link
    | Ext_startnum            -- ^ Make start number of ordered list significant
    | Ext_strikeout           -- ^ Strikeout using ~~this~~ syntax
    | Ext_subscript           -- ^ Subscript using ~this~ syntax
    | Ext_superscript         -- ^ Superscript using ^this^ syntax
    | Ext_styles              -- ^ Read styles that pandoc doesn't know
    | Ext_table_captions      -- ^ Pandoc-style table captions
    | Ext_tex_math_dollars    -- ^ TeX math between $..$ or $$..$$
    | Ext_tex_math_double_backslash  -- ^ TeX math btw \\(..\\) \\[..\\]
    | Ext_tex_math_single_backslash  -- ^ TeX math btw \(..\) \[..\]
    | Ext_yaml_metadata_block -- ^ YAML metadata block
    deriving (Show, Read, Enum, Eq, Ord, Bounded, Data, Typeable, Generic)

-- | Extensions to be used with pandoc-flavored markdown.
pandocExtensions :: Extensions
pandocExtensions = extensionsFromList
  [ Ext_footnotes
  , Ext_inline_notes
  , Ext_pandoc_title_block
  , Ext_yaml_metadata_block
  , Ext_table_captions
  , Ext_implicit_figures
  , Ext_simple_tables
  , Ext_multiline_tables
  , Ext_grid_tables
  , Ext_pipe_tables
  , Ext_citations
  , Ext_raw_tex
  , Ext_raw_html
  , Ext_tex_math_dollars
  , Ext_latex_macros
  , Ext_fenced_code_blocks
  , Ext_fenced_code_attributes
  , Ext_backtick_code_blocks
  , Ext_inline_code_attributes
  , Ext_raw_attribute
  , Ext_markdown_in_html_blocks
  , Ext_native_divs
  , Ext_fenced_divs
  , Ext_native_spans
  , Ext_bracketed_spans
  , Ext_escaped_line_breaks
  , Ext_fancy_lists
  , Ext_startnum
  , Ext_definition_lists
  , Ext_example_lists
  , Ext_all_symbols_escapable
  , Ext_intraword_underscores
  , Ext_blank_before_blockquote
  , Ext_blank_before_header
  , Ext_space_in_atx_header
  , Ext_strikeout
  , Ext_superscript
  , Ext_subscript
  , Ext_auto_identifiers
  , Ext_header_attributes
  , Ext_link_attributes
  , Ext_implicit_header_references
  , Ext_line_blocks
  , Ext_shortcut_reference_links
  , Ext_smart
  ]

-- | Extensions to be used with plain text output.
plainExtensions :: Extensions
plainExtensions = extensionsFromList
  [ Ext_table_captions
  , Ext_implicit_figures
  , Ext_simple_tables
  , Ext_multiline_tables
  , Ext_grid_tables
  , Ext_latex_macros
  , Ext_fancy_lists
  , Ext_startnum
  , Ext_definition_lists
  , Ext_example_lists
  , Ext_intraword_underscores
  , Ext_blank_before_blockquote
  , Ext_blank_before_header
  , Ext_strikeout
  ]

-- | Extensions to be used with github-flavored markdown.
phpMarkdownExtraExtensions :: Extensions
phpMarkdownExtraExtensions = extensionsFromList
  [ Ext_footnotes
  , Ext_pipe_tables
  , Ext_raw_html
  , Ext_markdown_attribute
  , Ext_fenced_code_blocks
  , Ext_definition_lists
  , Ext_intraword_underscores
  , Ext_header_attributes
  , Ext_link_attributes
  , Ext_abbreviations
  , Ext_shortcut_reference_links
  , Ext_spaced_reference_links
  ]

-- | Extensions to be used with github-flavored markdown.
githubMarkdownExtensions :: Extensions
githubMarkdownExtensions = extensionsFromList
  [ Ext_angle_brackets_escapable
  , Ext_pipe_tables
  , Ext_raw_html
  , Ext_fenced_code_blocks
  , Ext_gfm_auto_identifiers
  , Ext_ascii_identifiers
  , Ext_backtick_code_blocks
  , Ext_autolink_bare_uris
  , Ext_space_in_atx_header
  , Ext_intraword_underscores
  , Ext_strikeout
  , Ext_emoji
  , Ext_lists_without_preceding_blankline
  , Ext_shortcut_reference_links
  ]

-- | Extensions to be used with multimarkdown.
multimarkdownExtensions :: Extensions
multimarkdownExtensions = extensionsFromList
  [ Ext_pipe_tables
  , Ext_raw_html
  , Ext_markdown_attribute
  , Ext_mmd_link_attributes
  -- , Ext_raw_tex
  -- Note: MMD's raw TeX syntax requires raw TeX to be
  -- enclosed in HTML comment
  , Ext_tex_math_double_backslash
  , Ext_intraword_underscores
  , Ext_mmd_title_block
  , Ext_footnotes
  , Ext_definition_lists
  , Ext_all_symbols_escapable
  , Ext_implicit_header_references
  , Ext_shortcut_reference_links
  , Ext_auto_identifiers
  , Ext_mmd_header_identifiers
  , Ext_implicit_figures
  -- Note: MMD's syntax for superscripts and subscripts
  -- is a bit more permissive than pandoc's, allowing
  -- e^2 and a~1 instead of e^2^ and a~1~, so even with
  -- these options we don't have full support for MMD
  -- superscripts and subscripts, but there's no reason
  -- not to include these:
  , Ext_superscript
  , Ext_subscript
  , Ext_backtick_code_blocks
  , Ext_spaced_reference_links
  -- So far only in dev version of mmd:
  , Ext_raw_attribute
  ]

-- | Language extensions to be used with strict markdown.
strictExtensions :: Extensions
strictExtensions = extensionsFromList
  [ Ext_raw_html
  , Ext_shortcut_reference_links
  , Ext_spaced_reference_links
  ]

-- | Default extensions from format-describing string.
getDefaultExtensions :: String -> Extensions
getDefaultExtensions "markdown_strict" = strictExtensions
getDefaultExtensions "markdown_phpextra" = phpMarkdownExtraExtensions
getDefaultExtensions "markdown_mmd" = multimarkdownExtensions
getDefaultExtensions "markdown_github" = githubMarkdownExtensions
getDefaultExtensions "markdown"        = pandocExtensions
getDefaultExtensions "muse"            = extensionsFromList
                                           [Ext_amuse,
                                            Ext_auto_identifiers]
getDefaultExtensions "plain"           = plainExtensions
getDefaultExtensions "gfm"             = githubMarkdownExtensions
getDefaultExtensions "commonmark"      = extensionsFromList
                                          [Ext_raw_html]
getDefaultExtensions "org"             = extensionsFromList
                                          [Ext_citations,
                                           Ext_auto_identifiers]
getDefaultExtensions "html"            = extensionsFromList
                                          [Ext_auto_identifiers,
                                           Ext_native_divs,
                                           Ext_line_blocks,
                                           Ext_native_spans]
getDefaultExtensions "html4"           = getDefaultExtensions "html"
getDefaultExtensions "html5"           = getDefaultExtensions "html"
getDefaultExtensions "epub"            = extensionsFromList
                                          [Ext_raw_html,
                                           Ext_native_divs,
                                           Ext_native_spans,
                                           Ext_epub_html_exts]
getDefaultExtensions "epub2"           = getDefaultExtensions "epub"
getDefaultExtensions "epub3"           = getDefaultExtensions "epub"
getDefaultExtensions "latex"           = extensionsFromList
                                          [Ext_smart,
                                           Ext_latex_macros,
                                           Ext_auto_identifiers]
getDefaultExtensions "beamer"          = extensionsFromList
                                          [Ext_smart,
                                           Ext_latex_macros,
                                           Ext_auto_identifiers]
getDefaultExtensions "context"         = extensionsFromList
                                          [Ext_smart,
                                           Ext_auto_identifiers]
getDefaultExtensions "textile"         = extensionsFromList
                                          [Ext_old_dashes,
                                           Ext_smart,
                                           Ext_raw_html,
                                           Ext_auto_identifiers]
getDefaultExtensions "opml"            = pandocExtensions -- affects notes
getDefaultExtensions _                 = extensionsFromList
                                          [Ext_auto_identifiers]

-- | Parse a format-specifying string into a markup format and a function that
-- takes Extensions and enables and disables extensions as defined in the format
-- spec.
parseFormatSpec :: String
                -> Either ParseError (String, Extensions -> Extensions)
parseFormatSpec = parse formatSpec ""
  where formatSpec = do
          name <- formatName
          extMods <- many extMod
          return (name, \x -> foldl (flip ($)) x extMods)
        formatName = many1 $ noneOf "-+"
        extMod = do
          polarity <- oneOf "-+"
          name <- many $ noneOf "-+"
          ext <- case safeRead ("Ext_" ++ name) of
                       Just n  -> return n
                       Nothing
                         | name == "lhs" -> return Ext_literate_haskell
                         | otherwise -> fail $ "Unknown extension: " ++ name
          return $ case polarity of
                        '-' -> disableExtension ext
                        _   -> enableExtension ext

$(deriveJSON defaultOptions ''Extension)