{-# LANGUAGE DataKinds #-} {-# LANGUAGE TypeOperators #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE OverloadedStrings #-} module Text.Pandoc.Server ( app , ServerOpts(..) , Params(..) , Blob(..) , parseServerOpts ) where import Data.Aeson import Network.Wai import Servant import Text.DocTemplates as DocTemplates import Text.Pandoc import Text.Pandoc.Citeproc (processCitations) import Text.Pandoc.Highlighting (lookupHighlightingStyle) import qualified Text.Pandoc.UTF8 as UTF8 import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.Lazy as TL import qualified Data.Text.Lazy.Encoding as TLE import Data.Maybe (fromMaybe) import Data.Char (isAlphaNum) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as BL import Data.ByteString.Base64 (decodeBase64, encodeBase64) import Data.Default import Control.Monad (when, foldM) import qualified Data.Set as Set import Skylighting (defaultSyntaxMap) import qualified Data.Map as M import System.Console.GetOpt import System.Environment (getArgs, getProgName) import qualified Control.Exception as E import Text.Pandoc.Shared (safeStrRead, headerShift, filterIpynbOutput, eastAsianLineBreakFilter, stripEmptyParagraphs) import Text.Pandoc.App.Opt ( IpynbOutput (..), Opt(..), defaultOpts ) import Text.Pandoc.Builder (setMeta) import Text.Pandoc.SelfContained (makeSelfContained) import System.Exit data ServerOpts = ServerOpts { serverPort :: Int , serverTimeout :: Int } deriving (Show) defaultServerOpts :: ServerOpts defaultServerOpts = ServerOpts { serverPort = 3030, serverTimeout = 2 } cliOptions :: [OptDescr (ServerOpts -> IO ServerOpts)] cliOptions = [ Option ['p'] ["port"] (ReqArg (\s opts -> case safeStrRead s of Just i -> return opts{ serverPort = i } Nothing -> E.throwIO $ PandocOptionError $ T.pack s <> " is not a number") "NUMBER") "port number" , Option ['t'] ["timeout"] (ReqArg (\s opts -> case safeStrRead s of Just i -> return opts{ serverTimeout = i } Nothing -> E.throwIO $ PandocOptionError $ T.pack s <> " is not a number") "NUMBER") "timeout (seconds)" , Option ['h'] ["help"] (NoArg (\_ -> do prg <- getProgName let header = "Usage: " <> prg <> " [OPTION...]" putStrLn $ usageInfo header cliOptions exitWith ExitSuccess)) "help message" , Option ['v'] ["version"] (NoArg (\_ -> do prg <- getProgName putStrLn $ prg <> " " <> T.unpack pandocVersion exitWith ExitSuccess)) "version info" ] parseServerOpts :: IO ServerOpts parseServerOpts = do args <- getArgs let handleUnknownOpt x = "Unknown option: " <> x case getOpt' Permute cliOptions args of (os, ns, unrecognizedOpts, es) -> do when (not (null es) || not (null unrecognizedOpts)) $ E.throwIO $ PandocOptionError $ T.pack $ concat es ++ unlines (map handleUnknownOpt unrecognizedOpts) ++ ("Try --help for more information.") when (not (null ns)) $ E.throwIO $ PandocOptionError $ T.pack $ "Unknown arguments: " <> unwords ns foldM (flip ($)) defaultServerOpts os newtype Blob = Blob BL.ByteString deriving (Show, Eq) instance ToJSON Blob where toJSON (Blob bs) = toJSON (encodeBase64 $ BL.toStrict bs) instance FromJSON Blob where parseJSON = withText "Blob" $ \t -> do let inp = UTF8.fromText t case decodeBase64 inp of Right bs -> return $ Blob $ BL.fromStrict bs Left _ -> -- treat as regular text return $ Blob $ BL.fromStrict inp -- This is the data to be supplied by the JSON payload -- of requests. Maybe values may be omitted and will be -- given default values. data Params = Params { options :: Opt , text :: Text , files :: Maybe (M.Map FilePath Blob) , citeproc :: Maybe Bool } deriving (Show) instance Default Params where def = Params { options = defaultOpts , text = mempty , files = Nothing , citeproc = Nothing } -- Automatically derive code to convert to/from JSON. instance FromJSON Params where parseJSON = withObject "Params" $ \o -> Params <$> parseJSON (Object o) <*> o .: "text" <*> o .:? "files" <*> o .:? "citeproc" -- This is the API. The "/convert" endpoint takes a request body -- consisting of a JSON-encoded Params structure and responds to -- Get requests with either plain text or JSON, depending on the -- Accept header. type API = ReqBody '[JSON] Params :> Post '[PlainText, JSON] Text :<|> ReqBody '[JSON] Params :> Post '[OctetStream] BS.ByteString :<|> "batch" :> ReqBody '[JSON] [Params] :> Post '[JSON] [Text] :<|> "babelmark" :> QueryParam' '[Required] "text" Text :> QueryParam "from" Text :> QueryParam "to" Text :> QueryFlag "standalone" :> Get '[JSON] Value :<|> "version" :> Get '[PlainText, JSON] Text app :: Application app = serve api server api :: Proxy API api = Proxy server :: Server API server = convert :<|> convertBytes :<|> mapM convert :<|> babelmark -- for babelmark which expects {"html": "", "version": ""} :<|> pure pandocVersion where babelmark text' from' to' standalone' = do res <- convert def{ text = text', options = defaultOpts{ optFrom = from', optTo = to', optStandalone = standalone' } } return $ toJSON $ object [ "html" .= res, "version" .= pandocVersion ] -- We use runPure for the pandoc conversions, which ensures that -- they will do no IO. This makes the server safe to use. However, -- it will mean that features requiring IO, like RST includes, will not work. -- Changing this to -- handleErr =<< liftIO (runIO (convert' params)) -- will allow the IO operations. convert params = handleErr $ runPure (convert' id (encodeBase64 . BL.toStrict) params) convertBytes params = handleErr $ runPure (convert' UTF8.fromText BL.toStrict params) convert' :: (Text -> a) -> (BL.ByteString -> a) -> Params -> PandocPure a convert' textHandler bsHandler params = do curtime <- getCurrentTime -- put files params in ersatz file system let addFile :: FilePath -> Blob -> FileTree -> FileTree addFile fp (Blob lbs) = insertInFileTree fp FileInfo{ infoFileMTime = curtime , infoFileContents = BL.toStrict lbs } case files params of Nothing -> return () Just fs -> do let filetree = M.foldrWithKey addFile mempty fs modifyPureState $ \st -> st{ stFiles = filetree } let opts = options params let readerFormat = fromMaybe "markdown" $ optFrom opts let writerFormat = fromMaybe "html" $ optTo opts (readerSpec, readerExts) <- getReader readerFormat (writerSpec, writerExts) <- getWriter writerFormat let isStandalone = optStandalone opts let toformat = T.toLower $ T.takeWhile isAlphaNum $ writerFormat hlStyle <- traverse (lookupHighlightingStyle . T.unpack) $ optHighlightStyle opts mbTemplate <- if isStandalone then case optTemplate opts of Nothing -> Just <$> compileDefaultTemplate toformat Just t -> Just <$> compileCustomTemplate toformat t else return Nothing abbrevs <- Set.fromList . filter (not . T.null) . T.lines . UTF8.toText <$> case optAbbreviations opts of Nothing -> readDataFile "abbreviations" Just f -> readFileStrict f let readeropts = def{ readerExtensions = readerExts , readerStandalone = isStandalone , readerTabStop = optTabStop opts , readerIndentedCodeClasses = optIndentedCodeClasses opts , readerAbbreviations = abbrevs , readerDefaultImageExtension = optDefaultImageExtension opts , readerTrackChanges = optTrackChanges opts , readerStripComments = optStripComments opts } let writeropts = def{ writerExtensions = writerExts , writerTabStop = optTabStop opts , writerWrapText = optWrap opts , writerColumns = optColumns opts , writerTemplate = mbTemplate , writerSyntaxMap = defaultSyntaxMap , writerVariables = optVariables opts , writerTableOfContents = optTableOfContents opts , writerIncremental = optIncremental opts , writerHTMLMathMethod = optHTMLMathMethod opts , writerNumberSections = optNumberSections opts , writerNumberOffset = optNumberOffset opts , writerSectionDivs = optSectionDivs opts , writerReferenceLinks = optReferenceLinks opts , writerDpi = optDpi opts , writerEmailObfuscation = optEmailObfuscation opts , writerIdentifierPrefix = optIdentifierPrefix opts , writerCiteMethod = optCiteMethod opts , writerHtmlQTags = optHtmlQTags opts , writerSlideLevel = optSlideLevel opts , writerTopLevelDivision = optTopLevelDivision opts , writerListings = optListings opts , writerHighlightStyle = hlStyle , writerSetextHeaders = optSetextHeaders opts , writerEpubSubdirectory = T.pack $ optEpubSubdirectory opts , writerEpubMetadata = T.pack <$> optEpubMetadata opts , writerEpubFonts = optEpubFonts opts , writerEpubChapterLevel = optEpubChapterLevel opts , writerTOCDepth = optTOCDepth opts , writerReferenceDoc = optReferenceDoc opts , writerReferenceLocation = optReferenceLocation opts , writerPreferAscii = optAscii opts } let reader = case readerSpec of TextReader r -> r readeropts ByteStringReader r -> \t -> do let eitherbs = decodeBase64 $ UTF8.fromText t case eitherbs of Left errt -> throwError $ PandocSomeError errt Right bs -> r readeropts $ BL.fromStrict bs let writer = case writerSpec of TextWriter w -> fmap textHandler . (\d -> w writeropts d >>= if optEmbedResources opts && htmlFormat (optTo opts) then makeSelfContained else return) ByteStringWriter w -> fmap bsHandler . w writeropts let transforms :: Pandoc -> Pandoc transforms = (case optShiftHeadingLevelBy opts of 0 -> id x -> headerShift x) . (case optStripEmptyParagraphs opts of True -> stripEmptyParagraphs False -> id) . (if extensionEnabled Ext_east_asian_line_breaks readerExts && not (extensionEnabled Ext_east_asian_line_breaks writerExts && optWrap opts == WrapPreserve) then eastAsianLineBreakFilter else id) . (case optIpynbOutput opts of IpynbOutputAll -> id IpynbOutputNone -> filterIpynbOutput Nothing IpynbOutputBest -> filterIpynbOutput (Just $ case optTo opts of Just "latex" -> Format "latex" Just "beamer" -> Format "latex" Nothing -> Format "html" Just f | htmlFormat (optTo opts) -> Format "html" | otherwise -> Format f)) let meta = (case optBibliography opts of [] -> id fs -> setMeta "bibliography" (MetaList (map (MetaString . T.pack) fs))) . maybe id (setMeta "csl" . MetaString . T.pack) (optCSL opts) . maybe id (setMeta "citation-abbreviations" . MetaString . T.pack) (optCitationAbbreviations opts) $ optMetadata opts let addMetadata m' (Pandoc m bs) = Pandoc (m <> m') bs reader (text params) >>= return . transforms . addMetadata meta >>= (case citeproc params of Just True -> processCitations _ -> return) >>= writer htmlFormat :: Maybe Text -> Bool htmlFormat Nothing = True htmlFormat (Just f) = any (`T.isPrefixOf` f) ["html","html4","html5","s5","slidy", "slideous","dzslides","revealjs"] handleErr (Right t) = return t handleErr (Left err) = throwError $ err500 { errBody = TLE.encodeUtf8 $ TL.fromStrict $ renderError err } compileCustomTemplate toformat t = do res <- runWithPartials $ compileTemplate ("custom." <> T.unpack toformat) (T.pack t) case res of Left e -> throwError $ PandocTemplateError (T.pack e) Right tpl -> return tpl