{-# LANGUAGE CPP, ViewPatterns #-} -- | Function for generating highlighted and aligned LaTeX from literate -- Agda source. module Agda.Interaction.Highlighting.LaTeX ( generateLaTeX ) where import Prelude hiding (log) import Data.Char import Data.Maybe import Data.Function import Control.Monad.RWS import Control.Monad.Error import System.Directory import System.FilePath import Data.Text (Text) import qualified Data.Text as T import qualified Data.Text.IO as T import qualified Data.Text.Encoding as E import qualified Data.ByteString as BS import qualified Data.List as List import qualified Data.Map as Map import Paths_Agda import Agda.Syntax.Common import qualified Agda.Syntax.Abstract as A import Agda.Interaction.Highlighting.Precise import Agda.TypeChecking.Monad (TCM) import qualified Agda.TypeChecking.Monad as TCM import Agda.Interaction.Options import Agda.Compiler.CallCompiler import qualified Agda.Utils.IO.UTF8 as UTF8 #include "../../undefined.h" import Agda.Utils.Impossible ------------------------------------------------------------------------ -- * Datatypes. -- | The @LaTeX@ monad is a combination of @ErrorT@, @RWST@ and -- @IO@. The error part is just used to keep track whether we finished -- or not, the reader part isn't used, the writer is where the output -- goes and the state is for keeping track of the tokens and some other -- useful info, and the I/O part is used for printing debugging info. type LaTeX = ErrorT String (RWST () Text State IO) data State = State { tokens :: Tokens , column :: Int -- ^ Column number, used for polytable alignment. , indent :: Int -- ^ Indentation level, also for alignment. , indentPrev :: Int , inCode :: Bool -- ^ Keeps track of whether we are in a code -- block or not. , debugs :: [Debug] -- ^ Says what debug information should printed. } type Tokens = [Token] data Token = Token { text :: Text , info :: MetaInfo , position :: Integer -- ^ Is not used currently, but could -- potentially be used for hyperlinks as in -- the HTML output? } deriving Show data Debug = MoveColumn | NonCode | Code | Spaces | Output deriving (Eq, Show) -- | Run function for the @LaTeX@ monad. runLaTeX :: LaTeX a -> () -> State -> IO (Either String a, State, Text) runLaTeX = runRWST . runErrorT emptyState :: State emptyState = State { tokens = [] , column = 0 , indent = 0 , indentPrev = 0 , inCode = False , debugs = [] } ------------------------------------------------------------------------ -- * Some helpers. (<+>) = T.append isInfixOf' :: Text -> Text -> Maybe (Text, Text) isInfixOf' needle haystack = go (T.tails haystack) 0 where go [] n = Nothing go ((T.stripPrefix needle -> Just suf) : xss) n = Just (T.take n haystack, suf) go (_ : xss) n = go xss (n + 1) -- Same as above, but starts searching from the back rather than the -- front. isInfixOfRev :: Text -> Text -> Maybe (Text, Text) isInfixOfRev needle haystack = case T.reverse needle `isInfixOf'` T.reverse haystack of Nothing -> Nothing Just (pre, suf) -> Just (T.reverse suf, T.reverse pre) isSpaces :: Text -> Bool isSpaces = T.all isSpace isActualSpaces :: Text -> Bool isActualSpaces = T.all (== ' ') -- | Yields the next token, taking special care to begin/end code -- blocks. Junk occuring before and after the code blocks is separated -- into separate tokens, this makes it easier to keep track of whether -- we are in a code block or not. nextToken' :: LaTeX Token nextToken' = do toks <- gets tokens case toks of [] -> throwError "Done" -- Clean begin/end code block or a LaTeX comment. t : ts | text t == beginCode || text t == endCode || T.singleton '%' == T.take 1 (T.stripStart (text t)) -> do modify $ \s -> s { tokens = ts } return t t : ts -> do modify $ \s -> s { tokens = ts } inCode <- gets inCode let code = if inCode then endCode else beginCode case code `isInfixOf'` text t of Nothing -> do -- Spaces take care of their own column tracking. unless (isSpaces (text t)) $ do log MoveColumn $ text t moveColumn $ T.length $ text t return t Just (pre, suf) -> do let (textToReturn, textsToPutBack) = -- This bit fixes issue 954. -- Drop spaces up until and including the first trailing -- newline after begin code blocks. if code == beginCode && isSpaces suf then case T.singleton '\n' `isInfixOf'` suf of Nothing -> (pre, [ beginCode ]) Just (_, suf') -> (pre, [ beginCode, suf' ]) -- Do the converse thing for end code blocks. else if code == endCode && isSpaces pre then case T.singleton '\n' `isInfixOfRev` pre of Nothing -> (code, [ suf ]) Just (pre', suf') -> (pre' <+> T.dropWhile (`elem` [' ', '\t']) suf', [ code, suf ]) -- This case happens for example when you have two code -- blocks after each other, i.e. the begin code of the -- second ends up in the suffix of the first's end code. else (pre, [ code, suf ]) let tokToReturn = t { text = textToReturn } let toksToPutBack = map (\txt -> t { text = txt }) textsToPutBack unless (isSpaces pre) $ do log MoveColumn pre moveColumn $ T.length pre modify $ \s -> s { tokens = toksToPutBack ++ tokens s } return tokToReturn nextToken :: LaTeX Text nextToken = text `fmap` nextToken' resetColumn :: LaTeX () resetColumn = modify $ \s -> s { column = 0 } moveColumn :: Int -> LaTeX () moveColumn i = modify $ \s -> s { column = i + column s } setIndent :: Int -> LaTeX () setIndent i = modify $ \s -> s { indent = i } resetIndent :: LaTeX () resetIndent = modify $ \s -> s { indent = 0 } setIndentPrev :: Int -> LaTeX () setIndentPrev i = modify $ \s -> s { indentPrev = i } resetIndentPrev :: LaTeX () resetIndentPrev = modify $ \s -> s { indentPrev = 0 } setInCode :: LaTeX () setInCode = modify $ \s -> s { inCode = True } unsetInCode :: LaTeX () unsetInCode = modify $ \s -> s { inCode = False } logHelper :: Debug -> Text -> [String] -> LaTeX () logHelper debug text extra = do debugs <- gets debugs when (debug `elem` debugs) $ do lift $ lift $ T.putStrLn $ T.pack (show debug ++ ": ") <+> T.pack "'" <+> text <+> T.pack "' " <+> if null extra then T.empty else T.pack "(" <+> T.pack (unwords extra) <+> T.pack ")" log :: Debug -> Text -> LaTeX () log MoveColumn text = do ind <- gets indent logHelper MoveColumn text ["ind=", show ind] log Code text = do ind <- gets indent col <- gets column logHelper Code text ["ind=", show ind, "col=", show col] log debug text = logHelper debug text [] log' :: Debug -> String -> LaTeX () log' d = log d . T.pack output :: Text -> LaTeX () output text = do log Output text tell text ------------------------------------------------------------------------ -- * LaTeX and polytable strings. -- Polytable, http://www.ctan.org/pkg/polytable, is used for code -- alignment, similar to lhs2TeX's approach. nl = T.pack "%\n" beginCode = T.pack "\\begin{code}" endCode = T.pack "\\end{code}" ptOpen = T.pack "\\>" ptOpen' i = ptOpen <+> T.pack ("[" ++ show i ++ "]") ptClose = T.pack "\\<" ptClose' i = ptClose <+> T.pack ("[" ++ show i ++ "]") ptNL = nl <+> T.pack "\\\\\n" cmdPrefix = T.pack "\\Agda" cmdArg x = T.singleton '{' <+> x <+> T.singleton '}' cmdIndent i = cmdPrefix <+> T.pack "Indent" <+> cmdArg (T.pack (show i)) <+> cmdArg T.empty infixl' = T.pack "infixl" infix' = T.pack "infix" infixr' = T.pack "infixr" ------------------------------------------------------------------------ -- * Automaton. -- | The start state, @nonCode@, prints non-code (the LaTeX part of -- literate Agda) until it sees a @beginBlock@. nonCode :: LaTeX () nonCode = do tok <- nextToken log NonCode tok if tok == beginCode then do output $ beginCode <+> nl resetColumn setInCode code else do output tok nonCode -- | Deals with code blocks. Every token, except spaces, is pretty -- printed as a LaTeX command. code :: LaTeX () code = do -- Get the column information before grabbing the token, since -- grabbing (possibly) moves the column. col <- gets column tok' <- nextToken' let tok = text tok' log Code tok when (tok == T.empty) code when (col == 0 && not (isActualSpaces tok)) $ do output ptOpen when (tok == endCode) $ do output $ ptClose <+> nl <+> endCode unsetInCode nonCode when (tok `elem` [ infixl', infix', infixr' ]) $ do output $ cmdPrefix <+> T.pack "Keyword" <+> cmdArg tok fixity code when (isSpaces tok) $ do spaces $ T.group tok code case aspect (info tok') of Nothing -> output $ escape tok -- Just a -> output $ cmdPrefix <+> T.pack (cmd a) <+> cmdArg (escape tok) -- Andreas, 2014-02-17 preliminary fix for issue 1062 Just a -> case cmd a of "" -> output $ escape tok s -> output $ cmdPrefix <+> T.pack s <+> cmdArg (escape tok) code where cmd :: Aspect -> String -- cmd (Name mKind _) = maybe __IMPOSSIBLE__ showKind mKind -- Andreas, 2014-02-17 preliminary fix for issue 1062 cmd (Name mKind _) = maybe "" showKind mKind where showKind :: NameKind -> String showKind (Constructor Inductive) = "InductiveConstructor" showKind (Constructor CoInductive) = "CoinductiveConstructor" -- Andreas, 2014-02-17 -- It might be boring boilerplate, but please spell out the -- remaining cases instead of using the brittle @show@ function. -- What if a constructor in @NameKind@ gets renamed? showKind k = show k cmd a = show a -- Escapes special characters. escape :: Text -> Text escape (T.uncons -> Nothing) = T.empty escape (T.uncons -> Just (c, s)) = T.pack (replace c) <+> escape s where replace :: Char -> String replace c = case c of '_' -> "\\_" '{' -> "\\{" '}' -> "\\}" '#' -> "\\#" '$' -> "\\$" '&' -> "\\&" '%' -> "\\%" '~' -> "\\textasciitilde" '^' -> "\\textasciicircum" '\\' -> "\\textbackslash" -- Escaping newlines seems to fix the problem caused by pattern -- synonyms. '\n' -> "\\<\\\\\n\\>" _ -> [ c ] escape _ = __IMPOSSIBLE__ -- | Fixity declarations need a special treatment. The operations in -- declarations like: -- -- infix num op1 op2 op3 -- -- are treated as comments and thus grouped together with the newlines -- that follow, which results incorrect LaTeX output -- the following -- state remedies the problem by breaking on newlines. fixity :: LaTeX () fixity = do tok <- nextToken case T.breakOn (T.pack "\n") tok of -- Spaces. (sps, nls) | nls == T.empty && isSpaces sps -> do spaces $ T.group sps fixity -- Fixity level. (num, nls) | nls == T.empty -> do output $ cmdPrefix <+> T.pack "Number" <+> cmdArg num fixity -- Operations followed by newlines. (ops, nls) | otherwise -> do output $ escape ops spaces (T.group nls) -- | Spaces are grouped before processed, because multiple consecutive -- spaces determine the alignment of the code and consecutive newline -- characters need special treatment as well. spaces :: [Text] -> LaTeX () spaces [] = return () spaces ((T.uncons -> Nothing) : ss) = __IMPOSSIBLE__ -- Single spaces are ignored. spaces ((T.uncons -> Just (' ', s)) : []) | T.null s = do col <- gets column when (col == 0) $ do output ptOpen moveColumn 1 output $ T.singleton ' ' -- Multiple spaces. spaces (s@(T.uncons -> Just (' ', _)) : ss) = do let len = T.length s col <- gets column moveColumn len if col /= 0 then do log' Spaces "col /= 0" output $ T.singleton ' ' col <- gets column output $ ptClose' col <+> nl <+> ptOpen' col else do log' Spaces "col == 0" indent <- gets indent indentPrev <- gets indentPrev case compare len indent of GT -> do log' Spaces "GT" setIndent len setIndentPrev indent output $ ptOpen' indent output $ cmdIndent len output $ ptClose' len <+> nl <+> ptOpen' len EQ -> do log' Spaces "EQ" output $ ptOpen' indentPrev output $ cmdIndent len output $ ptClose' len <+> nl <+> ptOpen' len LT -> do log' Spaces "LT" setIndent len resetIndentPrev output $ ptOpen' 0 output $ cmdIndent len output $ ptClose' len <+> nl <+> ptOpen' len spaces ss -- Newlines. spaces (s@(T.uncons -> Just ('\n', _)) : ss) = do resetColumn output $ ptClose <+> T.replicate (T.length s) ptNL spaces ss -- Treat tabs as if they were spaces. spaces (s@(T.uncons -> Just ('\t', _)) : ss) = spaces $ T.replicate (T.length s) (T.singleton ' ') : ss spaces (_ : ss) = __IMPOSSIBLE__ ------------------------------------------------------------------------ -- * Main. defaultStyFile = "agda.sty" -- | The only exported function. It's (only) called in @Main.hs@. generateLaTeX :: A.ModuleName -> HighlightingInfo -> TCM () generateLaTeX mod hi = do options <- TCM.commandLineOptions -- There is a default directory given by 'defaultLaTeXDir'. let dir = optLaTeXDir options liftIO $ createDirectoryIfMissing True dir TCM.reportSLn "latex" 1 $ unlines [ "" , "Checking if " ++ defaultStyFile ++ " is found by the LaTeX environment." ] merrors <- callCompiler' "kpsewhich" [ "--path=" ++ dir, defaultStyFile ] when (isJust merrors) $ do TCM.reportSLn "latex" 1 $ unlines [ "" , defaultStyFile ++ " was not found. Copying a default version of " ++ defaultStyFile , "into the directory " ++ dir ++ "." ] liftIO $ do styFile <- getDataFileName defaultStyFile liftIO $ copyFile styFile (dir defaultStyFile) liftIO $ do let filePath = modToFile mod source <- UTF8.readTextFile (filePath <.> "lagda") latex <- E.encodeUtf8 `fmap` toLaTeX source hi createDirectoryIfMissing True $ dir takeDirectory filePath BS.writeFile (dir filePath <.> "tex") latex where modToFile :: A.ModuleName -> FilePath modToFile mod = go $ show mod where go [] = [] go ('.' : cs) = pathSeparator : go cs go (c : cs) = c : go cs -- | Transforms the source code into LaTeX. toLaTeX :: String -> HighlightingInfo -> IO Text toLaTeX source hi = processTokens -- Head the list (the grouped chars contain the same meta info) and -- collect the characters into a string. . map (\xs -> case xs of (mi, (pos, _)) : _ -> Token { text = T.pack $ map (\(_, (_, c)) -> c) xs , info = maybe mempty id mi , position = pos } [] -> __IMPOSSIBLE__) . List.groupBy ((==) `on` fst) -- Characters which share the same -- meta info are the same token, so -- group them together. -- Look up the meta info at each position in the highlighting info. . map (\(pos, char) -> (Map.lookup pos infoMap, (pos, char))) -- Add position in file to each character. . zip [1..] $ source where infoMap = toMap (decompress hi) processTokens :: Tokens -> IO Text processTokens ts = do (x, _, s) <- runLaTeX nonCode () (emptyState { tokens = ts }) case x of Left "Done" -> return s _ -> __IMPOSSIBLE__