{-# LANGUAGE LambdaCase          #-}
{-# LANGUAGE OverloadedStrings   #-}
{-# LANGUAGE TupleSections       #-}
{- |
   Module      : Text.Pandoc.App.Input
   Copyright   : © 2006-2023 John MacFarlane
   License     : GPL-2.0-or-later
   Maintainer  : John MacFarlane <jgm@berkeley@edu>

Read from the file system into a pandoc document.
-}
module Text.Pandoc.App.Input
  ( InputParameters (..)
  , readInput
  ) where

import Control.Monad ((>=>))
import Control.Monad.Except (throwError, catchError)
import Data.Text (Text)
import Network.URI (URI (..), parseURI, unEscapeString)
import Text.Pandoc.Class ( PandocMonad, openURL, toTextM
                         , readFileStrict, readStdinStrict, report)
import Text.Pandoc.Definition (Pandoc (..), Attr, Block (..), Inline (..))
import Text.Pandoc.Error (PandocError (..))
import Text.Pandoc.Logging (LogMessage (..))
import Text.Pandoc.MIME (getCharset, MimeType)
import Text.Pandoc.Options (Extensions, ReaderOptions (..))
import Text.Pandoc.Readers (Reader (..))
import Text.Pandoc.Shared (tabFilter, textToIdentifier)
import Text.Pandoc.URI (uriPathToPath)
import Text.Pandoc.Walk (walk)
import qualified Data.ByteString as BS
import qualified Data.ByteString.Char8 as B8
import qualified Data.ByteString.Lazy as BL
import qualified Data.Text as T

-- | Settings specifying how and which input should be processed.
data InputParameters m = InputParameters
  { forall (m :: * -> *). InputParameters m -> Reader m
inputReader         :: Reader m
  , forall (m :: * -> *). InputParameters m -> Text
inputReaderName     :: Text
  , forall (m :: * -> *). InputParameters m -> ReaderOptions
inputReaderOptions  :: ReaderOptions
  , forall (m :: * -> *). InputParameters m -> [FilePath]
inputSources        :: [FilePath]
  , forall (m :: * -> *). InputParameters m -> Maybe Int
inputSpacesPerTab   :: Maybe Int
  , forall (m :: * -> *). InputParameters m -> Bool
inputFileScope      :: Bool
  }

-- | Read all input into a pandoc document.
readInput :: PandocMonad m => InputParameters m -> m Pandoc
readInput :: forall (m :: * -> *).
PandocMonad m =>
InputParameters m -> m Pandoc
readInput InputParameters m
params = do
  let sources :: [FilePath]
sources = forall (m :: * -> *). InputParameters m -> [FilePath]
inputSources InputParameters m
params
  let readerName :: Text
readerName = forall (m :: * -> *). InputParameters m -> Text
inputReaderName InputParameters m
params
  let readerOpts :: ReaderOptions
readerOpts = forall (m :: * -> *). InputParameters m -> ReaderOptions
inputReaderOptions InputParameters m
params
  let convertTabs :: Text -> Text
      convertTabs :: Text -> Text
convertTabs = Int -> Text -> Text
tabFilter forall a b. (a -> b) -> a -> b
$ case forall (m :: * -> *). InputParameters m -> Maybe Int
inputSpacesPerTab InputParameters m
params of
        Maybe Int
Nothing -> Int
0
        Just Int
ts -> if Text
readerName forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text
"t2t", Text
"man", Text
"tsv"]
                   then Int
0
                   else Int
ts

  [(FilePath, (ByteString, Maybe Text))]
inputs <- forall (m :: * -> *).
PandocMonad m =>
[FilePath] -> m [(FilePath, (ByteString, Maybe Text))]
readSources [FilePath]
sources

  case forall (m :: * -> *). InputParameters m -> Reader m
inputReader InputParameters m
params of
    TextReader forall a. ToSources a => ReaderOptions -> a -> m Pandoc
r
      | Text
readerName forall a. Eq a => a -> a -> Bool
== Text
"json" ->
          forall a. Monoid a => [a] -> a
mconcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (forall (m :: * -> *).
PandocMonad m =>
(Text -> Text)
-> (FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text)
inputToText Text -> Text
convertTabs forall (m :: * -> *) a b c.
Monad m =>
(a -> m b) -> (b -> m c) -> a -> m c
>=> forall a. ToSources a => ReaderOptions -> a -> m Pandoc
r ReaderOptions
readerOpts forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a. a -> [a] -> [a]
:[]))
                           [(FilePath, (ByteString, Maybe Text))]
inputs
      | forall (m :: * -> *). InputParameters m -> Bool
inputFileScope InputParameters m
params ->
          forall a. Monoid a => [a] -> a
mconcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM
              (\(FilePath, (ByteString, Maybe Text))
source -> do
                  (FilePath
fp, Text
txt) <- forall (m :: * -> *).
PandocMonad m =>
(Text -> Text)
-> (FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text)
inputToText Text -> Text
convertTabs (FilePath, (ByteString, Maybe Text))
source
                  Extensions -> Text -> [Text] -> Pandoc -> Pandoc
adjustLinksAndIds (ReaderOptions -> Extensions
readerExtensions ReaderOptions
readerOpts)
                    (FilePath -> Text
T.pack FilePath
fp) (forall a b. (a -> b) -> [a] -> [b]
map (FilePath -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) [(FilePath, (ByteString, Maybe Text))]
inputs)
                    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall a. ToSources a => ReaderOptions -> a -> m Pandoc
r ReaderOptions
readerOpts [(FilePath
fp, Text
txt)])
              [(FilePath, (ByteString, Maybe Text))]
inputs
      | Bool
otherwise -> forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (forall (m :: * -> *).
PandocMonad m =>
(Text -> Text)
-> (FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text)
inputToText Text -> Text
convertTabs) [(FilePath, (ByteString, Maybe Text))]
inputs forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= forall a. ToSources a => ReaderOptions -> a -> m Pandoc
r ReaderOptions
readerOpts
    ByteStringReader ReaderOptions -> ByteString -> m Pandoc
r ->
      forall a. Monoid a => [a] -> a
mconcat forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (ReaderOptions -> ByteString -> m Pandoc
r ReaderOptions
readerOpts forall b c a. (b -> c) -> (a -> b) -> a -> c
. (FilePath, (ByteString, Maybe Text)) -> ByteString
inputToLazyByteString) [(FilePath, (ByteString, Maybe Text))]
inputs

readSources :: PandocMonad m
            => [FilePath] -> m [(FilePath, (BS.ByteString, Maybe MimeType))]
readSources :: forall (m :: * -> *).
PandocMonad m =>
[FilePath] -> m [(FilePath, (ByteString, Maybe Text))]
readSources [FilePath]
srcs =
  forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM (\FilePath
fp -> do (ByteString, Maybe Text)
t <- forall (m :: * -> *).
PandocMonad m =>
FilePath -> m (ByteString, Maybe Text)
readSource FilePath
fp
                  forall (m :: * -> *) a. Monad m => a -> m a
return (if FilePath
fp forall a. Eq a => a -> a -> Bool
== FilePath
"-" then FilePath
"" else FilePath
fp, (ByteString, Maybe Text)
t)) [FilePath]
srcs

-- | Read input from a resource, i.e., either a file, a URL, or stdin
-- (@-@).
readSource :: PandocMonad m
           => FilePath -> m (BS.ByteString, Maybe MimeType)
readSource :: forall (m :: * -> *).
PandocMonad m =>
FilePath -> m (ByteString, Maybe Text)
readSource FilePath
"-" = (,forall a. Maybe a
Nothing) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *). PandocMonad m => m ByteString
readStdinStrict
readSource FilePath
src =
  case FilePath -> Maybe URI
parseURI FilePath
src of
    Just URI
u | URI -> FilePath
uriScheme URI
u forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [FilePath
"http:",FilePath
"https:"] -> forall (m :: * -> *).
PandocMonad m =>
Text -> m (ByteString, Maybe Text)
openURL (FilePath -> Text
T.pack FilePath
src)
           | URI -> FilePath
uriScheme URI
u forall a. Eq a => a -> a -> Bool
== FilePath
"file:" ->
               (,forall a. Maybe a
Nothing) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
                 forall (m :: * -> *). PandocMonad m => FilePath -> m ByteString
readFileStrict (Text -> FilePath
uriPathToPath forall a b. (a -> b) -> a -> b
$ FilePath -> Text
T.pack forall a b. (a -> b) -> a -> b
$ URI -> FilePath
uriPath URI
u)
    Maybe URI
_       -> (,forall a. Maybe a
Nothing) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *). PandocMonad m => FilePath -> m ByteString
readFileStrict FilePath
src

inputToText :: PandocMonad m
            => (Text -> Text)
            -> (FilePath, (BS.ByteString, Maybe MimeType))
            -> m (FilePath, Text)
inputToText :: forall (m :: * -> *).
PandocMonad m =>
(Text -> Text)
-> (FilePath, (ByteString, Maybe Text)) -> m (FilePath, Text)
inputToText Text -> Text
convTabs (FilePath
fp, (ByteString
bs,Maybe Text
mt)) =
  (FilePath
fp,) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Text
convTabs forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
T.filter (forall a. Eq a => a -> a -> Bool
/=Char
'\r') forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$>
  case Maybe Text
mt forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Text -> Maybe Text
getCharset of
    Just Text
"UTF-8"      -> forall (m :: * -> *).
PandocMonad m =>
FilePath -> ByteString -> m Text
toTextM FilePath
fp ByteString
bs
    Just Text
"ISO-8859-1" -> forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ FilePath -> Text
T.pack forall a b. (a -> b) -> a -> b
$ ByteString -> FilePath
B8.unpack ByteString
bs
    Just Text
charset      -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError forall a b. (a -> b) -> a -> b
$ Text -> PandocError
PandocUnsupportedCharsetError Text
charset
    Maybe Text
Nothing           -> forall e (m :: * -> *) a.
MonadError e m =>
m a -> (e -> m a) -> m a
catchError
                           (forall (m :: * -> *).
PandocMonad m =>
FilePath -> ByteString -> m Text
toTextM FilePath
fp ByteString
bs)
                           (\case
                              PandocUTF8DecodingError{} -> do
                                forall (m :: * -> *). PandocMonad m => LogMessage -> m ()
report forall a b. (a -> b) -> a -> b
$ FilePath -> LogMessage
NotUTF8Encoded
                                  (if forall (t :: * -> *) a. Foldable t => t a -> Bool
null FilePath
fp
                                      then FilePath
"input"
                                      else FilePath
fp)
                                forall (m :: * -> *) a. Monad m => a -> m a
return forall a b. (a -> b) -> a -> b
$ FilePath -> Text
T.pack forall a b. (a -> b) -> a -> b
$ ByteString -> FilePath
B8.unpack ByteString
bs
                              PandocError
e -> forall e (m :: * -> *) a. MonadError e m => e -> m a
throwError PandocError
e)

inputToLazyByteString :: (FilePath, (BS.ByteString, Maybe MimeType))
                      -> BL.ByteString
inputToLazyByteString :: (FilePath, (ByteString, Maybe Text)) -> ByteString
inputToLazyByteString (FilePath
_, (ByteString
bs,Maybe Text
_)) = ByteString -> ByteString
BL.fromStrict ByteString
bs

adjustLinksAndIds :: Extensions -> Text -> [Text] -> Pandoc -> Pandoc
adjustLinksAndIds :: Extensions -> Text -> [Text] -> Pandoc -> Pandoc
adjustLinksAndIds Extensions
exts Text
thisfile [Text]
allfiles
  | forall (t :: * -> *) a. Foldable t => t a -> Int
length [Text]
allfiles forall a. Ord a => a -> a -> Bool
> Int
1 = Pandoc -> Pandoc
addDiv forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. Walkable a b => (a -> a) -> b -> b
walk Inline -> Inline
fixInline forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. Walkable a b => (a -> a) -> b -> b
walk Block -> Block
fixBlock
  | Bool
otherwise           = forall a. a -> a
id
 where
  toIdent :: Text -> Text
  toIdent :: Text -> Text
toIdent = Extensions -> Text -> Text
textToIdentifier Extensions
exts forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> [Text] -> Text
T.intercalate Text
"__" forall b c a. (b -> c) -> (a -> b) -> a -> c
.
            (Char -> Bool) -> Text -> [Text]
T.split (\Char
c -> Char
c forall a. Eq a => a -> a -> Bool
== Char
'/' Bool -> Bool -> Bool
|| Char
c forall a. Eq a => a -> a -> Bool
== Char
'\\')

  addDiv :: Pandoc -> Pandoc
  addDiv :: Pandoc -> Pandoc
addDiv (Pandoc Meta
m [Block]
bs)
    | Text -> Bool
T.null Text
thisfile = Meta -> [Block] -> Pandoc
Pandoc Meta
m [Block]
bs
    | Bool
otherwise = Meta -> [Block] -> Pandoc
Pandoc Meta
m [Attr -> [Block] -> Block
Div (Text -> Text
toIdent Text
thisfile,[],[]) [Block]
bs]

  fixBlock :: Block -> Block
  fixBlock :: Block -> Block
fixBlock (CodeBlock Attr
attr Text
t) = Attr -> Text -> Block
CodeBlock (Attr -> Attr
fixAttrs Attr
attr) Text
t
  fixBlock (Header Int
lev Attr
attr [Inline]
ils) = Int -> Attr -> [Inline] -> Block
Header Int
lev (Attr -> Attr
fixAttrs Attr
attr) [Inline]
ils
  fixBlock (Table Attr
attr Caption
cap [ColSpec]
cols TableHead
th [TableBody]
tbs TableFoot
tf) =
     Attr
-> Caption
-> [ColSpec]
-> TableHead
-> [TableBody]
-> TableFoot
-> Block
Table (Attr -> Attr
fixAttrs Attr
attr) Caption
cap [ColSpec]
cols TableHead
th [TableBody]
tbs TableFoot
tf
  fixBlock (Div Attr
attr [Block]
bs) = Attr -> [Block] -> Block
Div (Attr -> Attr
fixAttrs Attr
attr) [Block]
bs
  fixBlock Block
x = Block
x

  -- add thisfile as prefix of identifier
  fixAttrs :: Attr -> Attr
  fixAttrs :: Attr -> Attr
fixAttrs (Text
i,[Text]
cs,[(Text, Text)]
kvs)
    | Text -> Bool
T.null Text
i = (Text
i,[Text]
cs,[(Text, Text)]
kvs)
    | Bool
otherwise =
        (Text -> [Text] -> Text
T.intercalate Text
"__"
          (forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text -> Text
toIdent Text
thisfile, Text
i]),
        [Text]
cs, [(Text, Text)]
kvs)

  -- if URL begins with file from allfiles, convert to
  -- an internal link with the appropriate identifier
  fixURL :: Text -> Text
  fixURL :: Text -> Text
fixURL Text
u =
    let (Text
a,Text
b) = (Char -> Bool) -> Text -> (Text, Text)
T.break (forall a. Eq a => a -> a -> Bool
== Char
'#') forall a b. (a -> b) -> a -> b
$ FilePath -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. FilePath -> FilePath
unEscapeString forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> FilePath
T.unpack forall a b. (a -> b) -> a -> b
$ Text
u
        filepart :: Text
filepart = if Text -> Bool
T.null Text
a
                      then Text -> Text
toIdent Text
thisfile
                      else Text -> Text
toIdent Text
a
        fragpart :: Text
fragpart = (Char -> Bool) -> Text -> Text
T.dropWhile (forall a. Eq a => a -> a -> Bool
== Char
'#') Text
b
     in if Text -> Bool
T.null Text
a Bool -> Bool -> Bool
|| Text
a forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` [Text]
allfiles
           then Text
"#" forall a. Semigroup a => a -> a -> a
<> Text -> [Text] -> Text
T.intercalate Text
"__"
                         (forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Bool
T.null) [Text
filepart, Text
fragpart])
           else Text
u

  fixInline :: Inline -> Inline
  fixInline :: Inline -> Inline
fixInline (Code Attr
attr Text
t) = Attr -> Text -> Inline
Code (Attr -> Attr
fixAttrs Attr
attr) Text
t
  fixInline (Link Attr
attr [Inline]
ils (Text
url,Text
tit)) =
    Attr -> [Inline] -> (Text, Text) -> Inline
Link (Attr -> Attr
fixAttrs Attr
attr) [Inline]
ils (Text -> Text
fixURL Text
url,Text
tit)
  fixInline (Image Attr
attr [Inline]
ils (Text
url,Text
tit)) =
    Attr -> [Inline] -> (Text, Text) -> Inline
Image (Attr -> Attr
fixAttrs Attr
attr) [Inline]
ils (Text -> Text
fixURL Text
url,Text
tit)
  fixInline (Span Attr
attr [Inline]
ils) = Attr -> [Inline] -> Inline
Span (Attr -> Attr
fixAttrs Attr
attr) [Inline]
ils
  fixInline Inline
x = Inline
x