{-# LANGUAGE BangPatterns #-} {-# LANGUAGE CPP #-} {-# LANGUAGE ViewPatterns #-} -- | Function for generating highlighted and aligned LaTeX from literate -- Agda source. module Agda.Interaction.Highlighting.LaTeX ( generateLaTeX ) where import Prelude hiding (log) import Data.Char import Data.Maybe import Data.Function import Data.Foldable (toList) import Control.Monad.RWS.Strict import Control.Arrow (second) import System.Directory import System.Exit import System.FilePath import System.Process import Data.Text (Text) import qualified Data.Text as T #ifdef COUNT_CLUSTERS import qualified Data.Text.ICU as ICU #endif import qualified Data.Text.IO as T import qualified Data.Text.Lazy as L import qualified Data.Text.Lazy.Encoding as E import qualified Data.ByteString.Lazy as BS import Data.HashSet (HashSet) import qualified Data.HashSet as Set import qualified Data.IntMap as IntMap import qualified Data.List as List import Paths_Agda import Agda.Syntax.Abstract (toTopLevelModuleName) import Agda.Syntax.Common import Agda.Syntax.Concrete (TopLevelModuleName, moduleNameParts, projectRoot) import Agda.Syntax.Parser.Literate (literateTeX, LayerRole, atomizeLayers) import qualified Agda.Syntax.Parser.Literate as L import Agda.Syntax.Position (startPos) import qualified Agda.Interaction.FindFile as Find import Agda.Interaction.Highlighting.Precise import Agda.TypeChecking.Monad (TCM, Interface(..)) import qualified Agda.TypeChecking.Monad as TCM import qualified Agda.Interaction.Options as O import Agda.Compiler.CallCompiler import qualified Agda.Utils.IO.UTF8 as UTF8 import Agda.Utils.FileName (filePath, AbsolutePath, mkAbsolute) #include "undefined.h" import Agda.Utils.Impossible ------------------------------------------------------------------------ -- * Datatypes. -- | The @LaTeX@ monad is a combination of @ExceptT@, @RWST@ and -- @IO@. The error part is just used to keep track whether we finished -- or not, the reader part isn't used, the writer is where the output -- goes and the state is for keeping track of the tokens and some other -- useful info, and the I/O part is used for printing debugging info. type LaTeX = RWST () [Output] State IO -- | Output items. data Output = Text !Text -- ^ A piece of text. | MaybeColumn !AlignmentColumn -- ^ A column. If it turns out to be an indentation column that is -- not used to indent or align something, then no column will be -- generated, only whitespace ('agdaSpace'). deriving Show -- | Column kinds. data Kind = Indentation -- ^ Used only for indentation (the placement of the first token -- on a line, relative to tokens on previous lines). | Alignment -- ^ Used both for indentation and for alignment. deriving (Eq, Show) -- | Unique identifiers for indentation columns. type IndentationColumnId = Int -- | Alignment and indentation columns. data AlignmentColumn = AlignmentColumn { columnCodeBlock :: !Int -- ^ The column's code block. , columnColumn :: !Int -- ^ The column number. , columnKind :: Maybe IndentationColumnId -- ^ The column kind. 'Nothing' for alignment columns and @'Just' -- i@ for indentation columns, where @i@ is the column's unique -- identifier. } deriving Show data State = State { codeBlock :: !Int -- ^ The number of the current code block. , column :: !Int -- ^ The current column number. , columns :: [AlignmentColumn] -- ^ All alignment columns found on the -- current line (so far), in reverse -- order. , columnsPrev :: [AlignmentColumn] -- ^ All alignment columns found in -- previous lines (in any code block), -- with larger columns coming first. , nextId :: !IndentationColumnId -- ^ The next indentation column -- identifier. , usedColumns :: HashSet IndentationColumnId -- ^ Indentation columns that have -- actually -- been used. , countClusters :: !Bool -- ^ Count extended grapheme clusters? } type Tokens = [Token] data Token = Token { text :: !Text , info :: Aspects } deriving Show withTokenText :: (Text -> Text) -> Token -> Token withTokenText f tok@Token{text = t} = tok{text = f t} data Debug = MoveColumn | NonCode | Code | Spaces | Output deriving (Eq, Show) -- | Says what debug information should printed. debugs :: [Debug] debugs = [] -- | Run function for the @LaTeX@ monad. runLaTeX :: LaTeX a -> () -> State -> IO (a, State, [Output]) runLaTeX = runRWST emptyState :: Bool -- ^ Count extended grapheme clusters? -> State emptyState cc = State { codeBlock = 0 , column = 0 , columns = [] , columnsPrev = [] , nextId = 0 , usedColumns = Set.empty , countClusters = cc } ------------------------------------------------------------------------ -- * Some helpers. -- | Gives the size of the string. If cluster counting is enabled, -- then the number of extended grapheme clusters is computed (the root -- locale is used), and otherwise the number of code points. size :: Text -> LaTeX Int size t = do cc <- countClusters <$> get if cc then #ifdef COUNT_CLUSTERS return $ length $ ICU.breaks (ICU.breakCharacter ICU.Root) t #else __IMPOSSIBLE__ #endif else return $ T.length t (<+>) :: Text -> Text -> Text (<+>) = T.append isInfixOf' :: Text -> Text -> Maybe (Text, Text) isInfixOf' needle haystack = go (T.tails haystack) 0 where go [] !n = Nothing go ((T.stripPrefix needle -> Just suf) : xss) n = Just (T.take n haystack, suf) go (_ : xss) n = go xss (n + 1) -- Same as above, but starts searching from the back rather than the -- front. isInfixOfRev :: Text -> Text -> Maybe (Text, Text) isInfixOfRev needle haystack = case T.reverse needle `isInfixOf'` T.reverse haystack of Nothing -> Nothing Just (pre, suf) -> Just (T.reverse suf, T.reverse pre) -- | Does the string consist solely of whitespace? isSpaces :: Text -> Bool isSpaces = T.all isSpace -- | Is the character a whitespace character distinct from '\n'? isSpaceNotNewline :: Char -> Bool isSpaceNotNewline c = isSpace c && c /= '\n' -- | Replaces all forms of whitespace, except for new-line characters, -- with spaces. replaceSpaces :: Text -> Text replaceSpaces = T.map (\c -> if isSpaceNotNewline c then ' ' else c) -- | If the `Token` consists of spaces, the internal column counter is advanced -- by the length of the token. Otherwise, `moveColumnForToken` is a no-op. moveColumnForToken :: Token -> LaTeX () moveColumnForToken t = do unless (isSpaces (text t)) $ do log MoveColumn $ text t moveColumn =<< size (text t) -- | Merges 'columns' into 'columnsPrev', resets 'column' and -- 'columns' resetColumn :: LaTeX () resetColumn = modify $ \s -> s { column = 0 , columnsPrev = merge (columns s) (columnsPrev s) , columns = [] } where -- Remove shadowed columns from old. merge [] old = old merge new old = new ++ filter ((< leastNew) . columnColumn) old where leastNew = columnColumn (last new) moveColumn :: Int -> LaTeX () moveColumn i = modify $ \s -> s { column = i + column s } -- | Registers a column of the given kind. The column is returned. registerColumn :: Kind -> LaTeX AlignmentColumn registerColumn kind = do column <- gets column codeBlock <- gets codeBlock kind <- case kind of Alignment -> return Nothing Indentation -> do nextId <- gets nextId modify $ \s -> s { nextId = succ nextId } return (Just nextId) let c = AlignmentColumn { columnColumn = column , columnCodeBlock = codeBlock , columnKind = kind } modify $ \s -> s { columns = c : columns s } return c -- | Registers the given column as used (if it is an indentation -- column). useColumn :: AlignmentColumn -> LaTeX () useColumn c = case columnKind c of Nothing -> return () Just i -> modify $ \s -> s { usedColumns = Set.insert i (usedColumns s) } -- | Alignment column zero in the current code block. columnZero :: LaTeX AlignmentColumn columnZero = do codeBlock <- gets codeBlock return $ AlignmentColumn { columnColumn = 0 , columnCodeBlock = codeBlock , columnKind = Nothing } -- | Registers column zero as an alignment column. registerColumnZero :: LaTeX () registerColumnZero = do c <- columnZero modify $ \s -> s { columns = [c] } -- | Changes to the state that are performed at the start of a code -- block. enterCode :: LaTeX () enterCode = do resetColumn modify $ \s -> s { codeBlock = codeBlock s + 1 } -- | Changes to the state that are performed at the end of a code -- block. leaveCode :: LaTeX () leaveCode = return () logHelper :: Debug -> Text -> [String] -> LaTeX () logHelper debug text extra = when (debug `elem` debugs) $ do lift $ T.putStrLn $ T.pack (show debug ++ ": ") <+> T.pack "'" <+> text <+> T.pack "' " <+> if null extra then T.empty else T.pack "(" <+> T.pack (unwords extra) <+> T.pack ")" log :: Debug -> Text -> LaTeX () log MoveColumn text = do cols <- gets columns logHelper MoveColumn text ["columns=", show cols] log Code text = do cols <- gets columns col <- gets column logHelper Code text ["columns=", show cols, "col=", show col] log debug text = logHelper debug text [] log' :: Debug -> String -> LaTeX () log' d = log d . T.pack output :: Output -> LaTeX () output item = do log' Output (show item) tell [item] ------------------------------------------------------------------------ -- * LaTeX and polytable strings. -- Polytable, http://www.ctan.org/pkg/polytable, is used for code -- alignment, similar to lhs2TeX's approach. nl, beginCode, endCode :: Text nl = T.pack "%\n" beginCode = T.pack "\\begin{code}" endCode = T.pack "\\end{code}" -- | A command that is used when two tokens are put next to each other -- in the same column. agdaSpace :: Text agdaSpace = cmdPrefix <+> T.pack "Space" <+> cmdArg T.empty <+> nl -- | The column's name. -- -- Indentation columns have unique names, distinct from all alignment -- column names. columnName :: AlignmentColumn -> Text columnName c = T.pack $ case columnKind c of Nothing -> show (columnColumn c) Just i -> show i ++ "I" -- | Opens a column with the given name. ptOpen' :: Text -> Text ptOpen' name = T.pack "\\>[" <+> name <+> T.singleton ']' -- | Opens the given column. ptOpen :: AlignmentColumn -> Text ptOpen c = ptOpen' (columnName c) -- | Opens a special column that is only used at the beginning of -- lines. ptOpenBeginningOfLine :: Text ptOpenBeginningOfLine = ptOpen' (T.pack ".") -- | Opens the given column, and inserts an indentation instruction -- with the given argument at the end of it. ptOpenIndent :: AlignmentColumn -> Int -- ^ Indentation instruction argument. -> Text ptOpenIndent c delta = ptOpen c <+> T.pack "[@{}l@{" <+> cmdPrefix <+> T.pack "Indent" <+> cmdArg (T.pack $ show delta) <+> T.pack "}]" ptClose :: Text ptClose = T.pack "\\<" ptClose' :: AlignmentColumn -> Text ptClose' c = ptClose <+> T.singleton '[' <+> columnName c <+> T.singleton ']' ptNL :: Text ptNL = nl <+> T.pack "\\\\\n" ptEmptyLine :: Text ptEmptyLine = nl <+> T.pack "\\\\[" <+> cmdPrefix <+> T.pack "EmptyExtraSkip" <+> T.pack "]%\n" cmdPrefix :: Text cmdPrefix = T.pack "\\Agda" cmdArg :: Text -> Text cmdArg x = T.singleton '{' <+> x <+> T.singleton '}' ------------------------------------------------------------------------ -- * Output generation from a stream of labelled tokens. processLayers :: [(LayerRole, Tokens)] -> LaTeX () processLayers = mapM_ $ \(layerRole,toks) -> do case layerRole of L.Markup -> processMarkup toks L.Comment -> processComment toks L.Code -> processCode toks processMarkup, processComment, processCode :: Tokens -> LaTeX () -- | Deals with markup, which is output verbatim. processMarkup = mapM_ $ \t -> do moveColumnForToken t output (Text (text t)) -- | Deals with literate text, which is output verbatim processComment = mapM_ $ \t -> do unless (T.singleton '%' == T.take 1 (T.stripStart (text t))) $ do moveColumnForToken t output (Text (text t)) -- | Deals with code blocks. Every token, except spaces, is pretty -- printed as a LaTeX command. processCode toks' = do output $ Text nl enterCode mapM_ go toks' ptOpenWhenColumnZero =<< gets column output $ Text $ ptClose <+> nl leaveCode where go tok' = do -- Get the column information before grabbing the token, since -- grabbing (possibly) moves the column. col <- gets column moveColumnForToken tok' let tok = text tok' log Code tok if (tok == T.empty) then return () else do if (isSpaces tok) then do spaces $ T.group $ replaceSpaces tok else do ptOpenWhenColumnZero col output $ Text $ -- we return the escaped token wrapped in commands corresponding -- to its aspect (if any) and other aspects (e.g. error, unsolved meta) foldr (\c t -> cmdPrefix <+> T.pack c <+> cmdArg t) (escape tok) $ concatMap fromAspect (toList $ aspect $ info tok') ++ map fromOtherAspect (otherAspects $ info tok') -- Non-whitespace tokens at the start of a line trigger an -- alignment column. ptOpenWhenColumnZero col = when (col == 0) $ do registerColumnZero output . Text . ptOpen =<< columnZero -- Translation from OtherAspect to command strings. So far it happens -- to correspond to @show@ but it does not have to (cf. fromAspect) fromOtherAspect :: OtherAspect -> String fromOtherAspect = show fromAspect :: Aspect -> [String] fromAspect a = let s = [show a] in case a of Comment -> s Option -> s Keyword -> s String -> s Number -> s Symbol -> s PrimitiveType -> s Name Nothing isOp -> fromAspect (Name (Just Postulate) isOp) -- At the time of writing the case above can be encountered in -- --only-scope-checking mode, for instance for the token "Size" -- in the following code: -- -- {-# BUILTIN SIZE Size #-} -- -- The choice of "Postulate" works for this example, but might -- be less appropriate for others. Name (Just kind) isOp -> (\c -> if isOp then ["Operator", c] else [c]) $ case kind of Bound -> s Constructor Inductive -> "InductiveConstructor" Constructor CoInductive -> "CoinductiveConstructor" Datatype -> s Field -> s Function -> s Module -> s Postulate -> s Primitive -> s Record -> s Argument -> s Macro -> s where s = show kind -- | Escapes special characters. escape :: Text -> Text escape (T.uncons -> Nothing) = T.empty escape (T.uncons -> Just (c, s)) = T.pack (replace c) <+> escape s where replace :: Char -> String replace c = case c of '_' -> "\\AgdaUnderscore{}" '{' -> "\\{" '}' -> "\\}" '#' -> "\\#" '$' -> "\\$" '&' -> "\\&" '%' -> "\\%" '~' -> "\\textasciitilde{}" '^' -> "\\textasciicircum{}" '\\' -> "\\textbackslash{}" '-' -> "{-}" _ -> [ c ] escape _ = __IMPOSSIBLE__ -- | Every element in the list should consist of either one or more -- newline characters, or one or more space characters. Two adjacent -- list elements must not contain the same character. -- -- If the final element of the list consists of spaces, then these -- spaces are assumed to not be trailing whitespace. spaces :: [Text] -> LaTeX () spaces [] = return () -- Newlines. spaces (s@(T.uncons -> Just ('\n', _)) : ss) = do col <- gets column when (col == 0) $ output . Text . ptOpen =<< columnZero output $ Text $ ptClose <+> ptNL <+> T.replicate (T.length s - 1) ptEmptyLine resetColumn spaces ss -- Spaces followed by a newline character. spaces (_ : ss@(_ : _)) = spaces ss -- Spaces that are not followed by a newline character. spaces [ s ] = do col <- gets column let len = T.length s kind = if col /= 0 && len == 1 then Indentation else Alignment moveColumn len column <- registerColumn kind if col /= 0 then log' Spaces "col /= 0" else do columns <- gets columnsPrev codeBlock <- gets codeBlock log' Spaces $ "col == 0: " ++ show (len, columns) case filter ((<= len) . columnColumn) columns of c : _ | columnColumn c == len, isJust (columnKind c) -> do -- Align. (This happens automatically if the column is an -- alignment column, but c is an indentation column.) useColumn c output $ Text $ ptOpenBeginningOfLine output $ Text $ ptClose' c c : _ | columnColumn c < len -> do -- Indent. useColumn c output $ Text $ ptOpenIndent c (codeBlock - columnCodeBlock c) _ -> return () output $ MaybeColumn column -- | Split multi-lines string literals into multiple string literals -- Isolating leading spaces for the alignment machinery to work -- properly stringLiteral :: Token -> Tokens stringLiteral t | aspect (info t) == Just String = reverse $ foldl (\xs x -> t { text = x } : xs) [] $ concatMap leadingSpaces $ List.intersperse (T.pack "\n") $ T.lines (text t) where leadingSpaces :: Text -> [Text] leadingSpaces t = [pre, suf] where (pre , suf) = T.span isSpaceNotNewline t stringLiteral t = [t] ------------------------------------------------------------------------ -- * Main. defaultStyFile :: String defaultStyFile = "agda.sty" -- | Generates a LaTeX file for the given interface. -- -- The underlying source file is assumed to match the interface, but -- this is not checked. TODO: Fix this problem, perhaps by storing the -- source code in the interface. generateLaTeX :: Interface -> TCM () generateLaTeX i = do let mod = toTopLevelModuleName $ iModuleName i hi = iHighlighting i options <- TCM.commandLineOptions dir <- case O.optGHCiInteraction options of False -> return $ O.optLaTeXDir options True -> do sourceFile <- Find.findFile mod return $ filePath (projectRoot sourceFile mod) O.optLaTeXDir options liftIO $ createDirectoryIfMissing True dir (code, _, _) <- liftIO $ readProcessWithExitCode "kpsewhich" [ "--path=" ++ dir, defaultStyFile ] "" when (code /= ExitSuccess) $ do TCM.reportSLn "compile.latex" 1 $ unlines [ defaultStyFile ++ " was not found. Copying a default version of " ++ defaultStyFile , "into " ++ dir ++ "." ] liftIO $ do styFile <- getDataFileName defaultStyFile liftIO $ copyFile styFile (dir defaultStyFile) let outPath = modToFile mod inAbsPath <- liftM filePath (Find.findFile mod) liftIO $ do source <- UTF8.readTextFile inAbsPath latex <- E.encodeUtf8 `fmap` toLaTeX (O.optCountClusters $ O.optPragmaOptions options) (mkAbsolute inAbsPath) source hi createDirectoryIfMissing True $ dir takeDirectory outPath BS.writeFile (dir outPath) latex where modToFile :: TopLevelModuleName -> FilePath modToFile m = List.intercalate [pathSeparator] (moduleNameParts m) <.> "tex" groupByFst :: forall a b. Eq a => [(a,b)] -> [(a,[b])] groupByFst = map (\xs -> case xs of -- Float the grouping to the top level [] -> __IMPOSSIBLE__ (tag, _): _ -> (tag, map snd xs)) . List.groupBy ((==) `on` fst) -- Group together characters in the same -- role. -- | Transforms the source code into LaTeX. toLaTeX :: Bool -- ^ Count extended grapheme clusters? -> AbsolutePath -> String -> HighlightingInfo -> IO L.Text toLaTeX cc path source hi = processTokens cc . map (\(role, tokens) -> (role,) $ -- This bit fixes issue 954 (if L.isCode role then -- Remove trailing whitespace from the -- final line; the function spaces -- expects trailing whitespace to be -- followed by a newline character. whenMoreThanOne (withLast $ withTokenText $ \suf -> fromMaybe suf $ fmap (T.dropWhileEnd isSpaceNotNewline) $ T.stripSuffix (T.singleton '\n') suf) . (withLast $ withTokenText $ T.dropWhileEnd isSpaceNotNewline) . (withFirst $ withTokenText $ \pre -> fromMaybe pre $ T.stripPrefix (T.singleton '\n') $ T.dropWhile isSpaceNotNewline pre) else -- do nothing id) tokens) . map (second ( -- Split tokens at newlines concatMap stringLiteral -- Head the list (the grouped chars contain the same meta info) and -- collect the characters into a string. . map (\(mi, cs) -> Token { text = T.pack cs , info = fromMaybe mempty mi }) -- Characters which share the same meta info are the same token, so -- group them together. . groupByFst )) -- Characters in different layers belong to different tokens . groupByFst -- Look up the meta info at each position in the highlighting info. . map (\(pos, (role, char)) -> (role, (IntMap.lookup pos infoMap, char))) -- Add position in file to each character. . zip [1..] -- Map each character to its role . atomizeLayers . literateTeX (startPos (Just path)) $ source where infoMap = toMap (decompress hi) -- | This function preserves laziness of the list withLast :: (a -> a) -> [a] -> [a] withLast f [] = [] withLast f [a] = [f a] withLast f (a:as) = a:withLast f as -- | This function preserves laziness of the list withFirst :: (a -> a) -> [a] -> [a] withFirst _ [] = [] withFirst f (a:as) = f a:as whenMoreThanOne :: ([a] -> [a]) -> [a] -> [a] whenMoreThanOne f xs@(_:_:_) = f xs whenMoreThanOne _ xs = xs processTokens :: Bool -- ^ Count extended grapheme clusters? -> [(LayerRole, Tokens)] -> IO L.Text processTokens cc ts = do ((), s, os) <- runLaTeX (processLayers ts) () (emptyState cc) return $ L.fromChunks $ map (render s) os where render _ (Text s) = s render s (MaybeColumn c) | Just i <- columnKind c, not (Set.member i (usedColumns s)) = agdaSpace | otherwise = nl <+> ptOpen c