{-# LANGUAGE MonoLocalBinds      #-}
{-# LANGUAGE ScopedTypeVariables #-}
{-# LANGUAGE OverloadedStrings   #-}
{- |
   Module      : Text.Pandoc.Readers
   Copyright   : Copyright (C) 2006-2021 John MacFarlane
   License     : GNU GPL, version 2 or above

   Maintainer  : John MacFarlane <jgm@berkeley.edu>
   Stability   : alpha
   Portability : portable

This helper module exports the readers.

Note:  all of the readers assume that the input text has @'\n'@
line endings.  So if you get your input text from a web form,
you should remove @'\r'@ characters using @filter (/='\r')@.

-}

module Text.Pandoc.Readers
  (
    -- * Readers: converting /to/ Pandoc format
    Reader (..)
  , readers
  , readDocx
  , readOdt
  , readMarkdown
  , readCommonMark
  , readCreole
  , readDokuWiki
  , readMediaWiki
  , readVimwiki
  , readRST
  , readOrg
  , readLaTeX
  , readHtml
  , readJATS
  , readJira
  , readTextile
  , readDocBook
  , readOPML
  , readHaddock
  , readNative
  , readJSON
  , readTWiki
  , readTikiWiki
  , readTxt2Tags
  , readEPUB
  , readMuse
  , readFB2
  , readIpynb
  , readCSV
  , readCslJson
  , readBibTeX
  , readBibLaTeX
  -- * Miscellaneous
  , getReader
  , getDefaultExtensions
  ) where

import Control.Monad (unless)
import Control.Monad.Except (throwError)
import Data.Aeson
import qualified Data.ByteString.Lazy as BL
import Data.Text (Text)
import qualified Data.Text as T
import Text.Pandoc.Class
import Text.Pandoc.Definition
import Text.Pandoc.Error
import Text.Pandoc.Extensions
import Text.Pandoc.Options
import Text.Pandoc.Readers.CommonMark
import Text.Pandoc.Readers.Creole
import Text.Pandoc.Readers.DocBook
import Text.Pandoc.Readers.Docx
import Text.Pandoc.Readers.DokuWiki
import Text.Pandoc.Readers.EPUB
import Text.Pandoc.Readers.FB2
import Text.Pandoc.Readers.Ipynb
import Text.Pandoc.Readers.Haddock
import Text.Pandoc.Readers.HTML (readHtml)
import Text.Pandoc.Readers.JATS (readJATS)
import Text.Pandoc.Readers.Jira (readJira)
import Text.Pandoc.Readers.LaTeX
import Text.Pandoc.Readers.Markdown
import Text.Pandoc.Readers.MediaWiki
import Text.Pandoc.Readers.Muse
import Text.Pandoc.Readers.Native
import Text.Pandoc.Readers.Odt
import Text.Pandoc.Readers.OPML
import Text.Pandoc.Readers.Org
import Text.Pandoc.Readers.RST
import Text.Pandoc.Readers.Textile
import Text.Pandoc.Readers.TikiWiki
import Text.Pandoc.Readers.TWiki
import Text.Pandoc.Readers.Txt2Tags
import Text.Pandoc.Readers.Vimwiki
import Text.Pandoc.Readers.Man
import Text.Pandoc.Readers.CSV
import Text.Pandoc.Readers.CslJson
import Text.Pandoc.Readers.BibTeX
import qualified Text.Pandoc.UTF8 as UTF8
import Text.Parsec.Error

data Reader m = TextReader (ReaderOptions -> Text -> m Pandoc)
              | ByteStringReader (ReaderOptions -> BL.ByteString -> m Pandoc)

-- | Association list of formats and readers.
readers :: PandocMonad m => [(Text, Reader m)]
readers :: [(Text, Reader m)]
readers = [ (Text
"native"       , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readNative)
           ,(Text
"json"         , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readJSON)
           ,(Text
"markdown"     , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readMarkdown)
           ,(Text
"markdown_strict" , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readMarkdown)
           ,(Text
"markdown_phpextra" , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readMarkdown)
           ,(Text
"markdown_github" , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readMarkdown)
           ,(Text
"markdown_mmd",  (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readMarkdown)
           ,(Text
"commonmark"   , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readCommonMark)
           ,(Text
"commonmark_x" , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readCommonMark)
           ,(Text
"creole"       , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readCreole)
           ,(Text
"dokuwiki"     , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readDokuWiki)
           ,(Text
"gfm"          , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readCommonMark)
           ,(Text
"rst"          , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readRST)
           ,(Text
"mediawiki"    , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readMediaWiki)
           ,(Text
"vimwiki"      , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readVimwiki)
           ,(Text
"docbook"      , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readDocBook)
           ,(Text
"opml"         , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readOPML)
           ,(Text
"org"          , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readOrg)
           ,(Text
"textile"      , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readTextile) -- TODO : textile+lhs
           ,(Text
"html"         , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readHtml)
           ,(Text
"jats"         , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readJATS)
           ,(Text
"jira"         , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readJira)
           ,(Text
"latex"        , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readLaTeX)
           ,(Text
"haddock"      , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readHaddock)
           ,(Text
"twiki"        , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readTWiki)
           ,(Text
"tikiwiki"     , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readTikiWiki)
           ,(Text
"docx"         , (ReaderOptions -> ByteString -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> ByteString -> m Pandoc) -> Reader m
ByteStringReader ReaderOptions -> ByteString -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> ByteString -> m Pandoc
readDocx)
           ,(Text
"odt"          , (ReaderOptions -> ByteString -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> ByteString -> m Pandoc) -> Reader m
ByteStringReader ReaderOptions -> ByteString -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> ByteString -> m Pandoc
readOdt)
           ,(Text
"t2t"          , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readTxt2Tags)
           ,(Text
"epub"         , (ReaderOptions -> ByteString -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> ByteString -> m Pandoc) -> Reader m
ByteStringReader ReaderOptions -> ByteString -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> ByteString -> m Pandoc
readEPUB)
           ,(Text
"muse"         , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readMuse)
           ,(Text
"man"          , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readMan)
           ,(Text
"fb2"          , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readFB2)
           ,(Text
"ipynb"        , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readIpynb)
           ,(Text
"csv"          , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readCSV)
           ,(Text
"csljson"      , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readCslJson)
           ,(Text
"bibtex"       , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readBibTeX)
           ,(Text
"biblatex"     , (ReaderOptions -> Text -> m Pandoc) -> Reader m
forall (m :: * -> *).
(ReaderOptions -> Text -> m Pandoc) -> Reader m
TextReader ReaderOptions -> Text -> m Pandoc
forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> Text -> m Pandoc
readBibLaTeX)
           ]

-- | Retrieve reader, extensions based on formatSpec (format+extensions).
getReader :: PandocMonad m => Text -> m (Reader m, Extensions)
getReader :: Text -> m (Reader m, Extensions)
getReader Text
s =
  case Text -> Either ParseError (Text, [Extension], [Extension])
parseFormatSpec Text
s of
       Left ParseError
e  -> PandocError -> m (Reader m, Extensions)
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m (Reader m, Extensions))
-> PandocError -> m (Reader m, Extensions)
forall a b. (a -> b) -> a -> b
$ Text -> PandocError
PandocAppError
                    (Text -> PandocError) -> Text -> PandocError
forall a b. (a -> b) -> a -> b
$ Text -> [Text] -> Text
T.intercalate Text
"\n" [String -> Text
T.pack String
m | Message String
m <- ParseError -> [Message]
errorMessages ParseError
e]
       Right (Text
readerName, [Extension]
extsToEnable, [Extension]
extsToDisable) ->
           case Text -> [(Text, Reader m)] -> Maybe (Reader m)
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup Text
readerName [(Text, Reader m)]
forall (m :: * -> *). PandocMonad m => [(Text, Reader m)]
readers of
                   Maybe (Reader m)
Nothing  -> PandocError -> m (Reader m, Extensions)
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m (Reader m, Extensions))
-> PandocError -> m (Reader m, Extensions)
forall a b. (a -> b) -> a -> b
$ Text -> PandocError
PandocUnknownReaderError
                                             Text
readerName
                   Just  Reader m
r  -> do
                     let allExts :: Extensions
allExts = Text -> Extensions
getAllExtensions Text
readerName
                     let exts :: Extensions
exts = (Extension -> Extensions -> Extensions)
-> Extensions -> [Extension] -> Extensions
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Extension -> Extensions -> Extensions
disableExtension
                           ((Extension -> Extensions -> Extensions)
-> Extensions -> [Extension] -> Extensions
forall (t :: * -> *) a b.
Foldable t =>
(a -> b -> b) -> b -> t a -> b
foldr Extension -> Extensions -> Extensions
enableExtension
                             (Text -> Extensions
getDefaultExtensions Text
readerName)
                                   [Extension]
extsToEnable) [Extension]
extsToDisable
                     (Extension -> m ()) -> [Extension] -> m ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (\Extension
ext ->
                              Bool -> m () -> m ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless (Extension -> Extensions -> Bool
extensionEnabled Extension
ext Extensions
allExts) (m () -> m ()) -> m () -> m ()
forall a b. (a -> b) -> a -> b
$
                                PandocError -> m ()
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m ()) -> PandocError -> m ()
forall a b. (a -> b) -> a -> b
$
                                   Text -> Text -> PandocError
PandocUnsupportedExtensionError
                                   (Int -> Text -> Text
T.drop Int
4 (Text -> Text) -> Text -> Text
forall a b. (a -> b) -> a -> b
$ String -> Text
T.pack (String -> Text) -> String -> Text
forall a b. (a -> b) -> a -> b
$ Extension -> String
forall a. Show a => a -> String
show Extension
ext) Text
readerName)
                          ([Extension]
extsToEnable [Extension] -> [Extension] -> [Extension]
forall a. [a] -> [a] -> [a]
++ [Extension]
extsToDisable)
                     (Reader m, Extensions) -> m (Reader m, Extensions)
forall (m :: * -> *) a. Monad m => a -> m a
return (Reader m
r, Extensions
exts)

-- | Read pandoc document from JSON format.
readJSON :: PandocMonad m
         => ReaderOptions -> Text -> m Pandoc
readJSON :: ReaderOptions -> Text -> m Pandoc
readJSON ReaderOptions
_ Text
t =
  case ByteString -> Either String Pandoc
forall a. FromJSON a => ByteString -> Either String a
eitherDecode' (ByteString -> Either String Pandoc)
-> (Text -> ByteString) -> Text -> Either String Pandoc
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BL.fromStrict (ByteString -> ByteString)
-> (Text -> ByteString) -> Text -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> ByteString
UTF8.fromText (Text -> Either String Pandoc) -> Text -> Either String Pandoc
forall a b. (a -> b) -> a -> b
$ Text
t of
       Right Pandoc
doc -> Pandoc -> m Pandoc
forall (m :: * -> *) a. Monad m => a -> m a
return Pandoc
doc
       Left String
e    -> PandocError -> m Pandoc
forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError (PandocError -> m Pandoc) -> PandocError -> m Pandoc
forall a b. (a -> b) -> a -> b
$ Text -> PandocError
PandocParseError (Text
"JSON parse error: " Text -> Text -> Text
forall a. Semigroup a => a -> a -> a
<> String -> Text
T.pack String
e)