{-DHUN| This modules converts the parse tree to a latex document DHUN-} module LatexRenderer (treeToLaTeX3, shallowFlatten, prepateTemplate, replace, doUnicode, getGalleryNumbers, getTitle, initialState, getJ, urld, analyseNetloc, templateMap, getUserTemplateMap, urls, mUrlState, initialUrlState, makeLables, templateRegistry, baseUrl, deepFlatten) where import Data.String.HT (trim) import MyState import Data.List import qualified Data.Map as Map import Data.Map (Map) import Data.Char import Text.Printf import FontTool import MediaWikiParseTree import MagicStrings import Tools import Control.Monad.Trans.State (State, state, runState, StateT, runStateT, put, get) import Control.Monad.Trans.Class (lift) import Control.Monad (guard, mplus, msum) import TableHelper import GHC.Float import Text.Highlighting.Kate import WikiLinkHelper import WikiHelper import Data.List.Split import BaseFont import Data.Maybe import Data.Tuple (swap) {-DHUN| flattens a part of the parse tree, that is takes the characters found in the tree and turns them into a string dropping all other information in the tree DHUN-} deepFlatten :: [Anything t] -> [Anything t] deepFlatten ll = concat $ map go ll where go (Environment _ _ l) = (deepFlatten l) go x = [x] {-DHUN| the maximum width of lines for preformat and source code DHUN-} linewidth :: Int linewidth = 80 {-DHUN| The user can provide her own translation table for mediawiki templates to latex commands. this is done in the templates.user files. This function takes this file in list representation and converts it to the map representation to be able to look up the names of templates DHUN-} getUserTemplateMap :: [[String]] -> Map String [String] getUserTemplateMap input = Map.fromList (map (\ (x : xs) -> (x, xs)) input) {-DHUN| table may omit tailing columns in a row, but in latex they need to be written out, this function does so DHUN-} rowaddsym :: TableState -> [Char] rowaddsym st = if (currentColumn st) < ((numberOfColumnsInTable st) + 1) then (if (currentColumn st) == 1 then replicate (((numberOfColumnsInTable st)) - (currentColumn st)) '&' else replicate (((numberOfColumnsInTable st) + 1) - (currentColumn st)) '&') else [] {-DHUN| This function renders the inner parts of a table to latex it does so by calling tableContentToLaTeX and additionally removes superfluous newlines which might cause compilation problems in latex when used inside tables DHUN-} tableContentToLaTeX2 :: [Anything Char] -> (StateT TableState (State MyState) String) tableContentToLaTeX2 l = do r <- tableContentToLaTeX l return (killnl2 r) {-DHUN| This function renders the inner parts of a table to latex, please always use tableContentToLaTeX2 since this also removes superfluous newlines DHUN-} tableContentToLaTeX :: [Anything Char] -> (StateT TableState (State MyState) String) tableContentToLaTeX ((Environment TableRowSep _ _) : []) = do st <- get let cc = (currentColumn st) let c = cc + (multiRowCount cc (multiRowMap st)) return $ (headendsym (lastCellWasHeaderCell st)) ++ (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ (rowaddsym st{currentColumn = c}) tableContentToLaTeX ((Environment TableRowSep _ l) : xs) = do sst <- lift get st <- get let cc = (currentColumn st) let c = cc + (multiRowCount cc (multiRowMap st)) let mycond = (not (currentRowIsHeaderRow st)) && (stillInTableHeader st) && (lastRowHadEmptyMultiRowMap st) && (not (isFirstRow st)) put (st{rowCounter = 1 + (rowCounter st), outputTableHasHeaderRows = (outputTableHasHeaderRows st) || (currentRowIsHeaderRow st), lastRowHadEmptyMultiRowMap = (myempty (multiRowMap st))}) st2 <- get put st2{outputLastRowOfHeader = (if mycond then rowCounter st2 else outputLastRowOfHeader st2)} st3 <- get put st3{lastCellWasNotFirstCellOfRow = False, lastCellWasMultiColumn = False, currentColumn = 1, multiRowMap = multiRowDictChange (currentColumn st) (multiRowMap st) l, lastCellWasMultiRow = False, isFirstRow = False, lastCellWasHeaderCell = False, currentRowIsHeaderRow = False, stillInTableHeader = if stillInTableHeader st then not mycond else False} xx <- tableContentToLaTeX xs return $ if (not (isFirstRow st)) then (headendsym (lastCellWasHeaderCell st)) ++ (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ (multiRowEndSymbol (lastCellWasMultiRow st)) ++ (multiRowSymbolForRowSep (currentColumn st) (multiRowMap st) (seperatingLinesRequestedForTable st)) ++ (rowaddsym st{currentColumn = c}) ++ (rowendsymb ((getInTab sst) <= 1) ((rowCounter st) == (inputLastRowOfHeader st) - 2)) ++ (innerHorizontalLine (seperatingLinesRequestedForTable st) (multiRowMap st3) (numberOfColumnsInTable st)) ++ " \n" ++ xx else xx tableContentToLaTeX ((Environment TableColSep _ l) : xs) = do st <- get let cc = (currentColumn st) let c = cc + (multiRowCount cc (multiRowMap st)) put st{lastCellWasNotFirstCellOfRow = True, lastCellWasMultiColumn = ("" /= (multiColumnStartSymbol l (columnsWidthList st) c (seperatingLinesRequestedForTable st) st)), currentColumn = (c + (columnMultiplicityForCounting l)), multiRowMap = multiRowDictChange (currentColumn st) (multiRowMap st) l, lastCellWasMultiRow = (multiRowStartSymbol l (activeColumn st)) /= "", isFirstRow = False, lastCellWasHeaderCell = False} xx <- tableContentToLaTeX xxs return $ (headendsym (lastCellWasHeaderCell st)) ++ (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ (multiRowEndSymbol (lastCellWasMultiRow st)) ++ (columnSeperator (lastCellWasNotFirstCellOfRow st)) ++ (multiRowSymbol (currentColumn st) (multiRowMap st) (seperatingLinesRequestedForTable st)) ++ (multiColumnStartSymbol l (columnsWidthList st) c (seperatingLinesRequestedForTable st) st) ++ (multiRowStartSymbol l (activeColumn st)) ++ (if rig then "\\RaggedLeft{}" else "") ++ (tablecolorsym l) ++ hypennothing ++ xx where rig = isInfixOf2 [Environment Attribute (Attr ("style", "text-align:right")) []] l xxs = if rig then (reverse . removesp . reverse . removesp) xs else xs removesp (C ' ' : as) = removesp as removesp a = a tableContentToLaTeX ((Environment TableHeadColSep _ l) : xs) = do st <- get let cc = currentColumn st let c = cc + (multiRowCount cc (multiRowMap st)) put st{lastCellWasNotFirstCellOfRow = True, lastCellWasMultiColumn = ("" /= (multiColumnStartSymbol l (columnsWidthList st) c (seperatingLinesRequestedForTable st) st)), currentColumn = (c + (columnMultiplicityForCounting l)), multiRowMap = multiRowDictChange (currentColumn st) (multiRowMap st) l, lastCellWasMultiRow = multiRowStartSymbol l (activeColumn st) /= "", isFirstRow = False, lastCellWasHeaderCell = True, currentRowIsHeaderRow = True} xx <- tableContentToLaTeX xs return $ (headendsym (lastCellWasHeaderCell st)) ++ (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ (multiRowEndSymbol (lastCellWasMultiRow st)) ++ (columnSeperator (lastCellWasNotFirstCellOfRow st)) ++ (multiRowSymbol (currentColumn st) (multiRowMap st) (seperatingLinesRequestedForTable st)) ++ (multiColumnStartSymbol l (columnsWidthList st) c (seperatingLinesRequestedForTable st) st) ++ (multiRowStartSymbol l (activeColumn st)) ++ headstartsym ++ (tablecolorsym l) ++ hypennothing ++ xx tableContentToLaTeX (x : xs) = do st <- get ele <- case (activeColumn st) of Just n | (n /= fromIntegral (currentColumn st)) || (lastCellWasMultiColumn st) -> return [] _ -> lift $ treeToLaTeX2 [x] xx <- tableContentToLaTeX xs return $ ele ++ xx tableContentToLaTeX [] = do st <- get return $ (headendsym (lastCellWasHeaderCell st)) ++ (multiColumnEndSymbol (lastCellWasMultiColumn st)) ++ (multiRowSymbolForTableEnd (currentColumn st) (multiRowMap st) (seperatingLinesRequestedForTable st)) ++ (multiRowEndSymbol (lastCellWasMultiRow st)) ++ (rowaddsym st) {-DHUN| This string has to be added to each new cell in a latex table in order to allow for hyphenation of the first word in this cell DHUN-} hypennothing :: [Char] hypennothing = "\\hspace*{0pt}\\ignorespaces{}\\hspace*{0pt}" {-DHUN| color cell in latex if HTML attribute bgcolor is present in the parse tree for the cell DHUN-} tablecolorsym :: [Anything Char] -> [Char] tablecolorsym ll = case genLookup "bgcolor" ll of Just x -> case x of ('#' : ys) -> let (p, colname, col) = colinfo ('l' : 'l' : ys) in if p then "\\cellcolor[rgb]" ++ col else "\\cellcolor{" ++ colname ++ "}" _ -> "\\cellcolor{" ++ x ++ "}" Nothing -> "" {-DHUN| the caption of a table is given in |+ or