{-| helper functions to convert tables from the parse tree notation to the latex notation -} module TableHelper where import MediaWikiParseTree import Tools import Text.Printf import Data.Char import qualified Data.Map as Map import Data.Map.Strict (Map) import Data.Maybe import Control.Monad import MyState {-| the width of a columns as float wrapped in a Just value of the maybe monad if it could be determined. Return the value Nothing of the maybe monad otherwise. The only parameter is of part of the parse tree that describe the opening part of the table cell element. -} widthOfColumn :: [Anything Char] -> Maybe Float widthOfColumn = msum . (map f) where f (Environment Attribute (Attr (k, v)) _) = listToMaybe $ do (z, _) <- reads v guard (k == "width") guard ('%' `elem` v) return (1.0e-2 * z) f _ = Nothing columnMultiplicityForSimple :: [Anything Char] -> Int columnMultiplicityForSimple x = case columnMultiplicity x of Just a -> a _ -> -1 raggedArrayOfWidths :: [Anything Char] -> [Maybe Float] -> [[Maybe Float]] raggedArrayOfWidths ((Environment TableRowSep _ _) : xs) temp = if temp == [] then raggedArrayOfWidths xs [] else temp : (raggedArrayOfWidths xs []) raggedArrayOfWidths ((Environment TableColSep _ x) : xs) temp = (raggedArrayOfWidths xs (temp ++ [widthOfColumn x] ++ (concat (replicate ((columnMultiplicityForSimple x) - 1) [Nothing])))) raggedArrayOfWidths ((Environment TableHeadColSep _ x) : xs) temp = (raggedArrayOfWidths xs (temp ++ [widthOfColumn x] ++ (concat (replicate ((columnMultiplicityForSimple x) - 1) [Nothing])))) raggedArrayOfWidths (_ : xs) temp = (raggedArrayOfWidths xs temp) raggedArrayOfWidths [] temp = [temp] numberOfColumns :: [Anything Char] -> Int numberOfColumns a = (maximum ([length x | x <- (raggedArrayOfWidths a [])])) initialListofWidths :: [Anything Char] -> [Maybe Float] initialListofWidths x = replicate (numberOfColumns x) Nothing listMax :: [Maybe Float] -> [Maybe Float] -> [Maybe Float] listMax (Just x : xs) (Just y : ys) = Just (max x y) : listMax xs ys listMax (x : xs) (y : ys) = (x `mplus` y) : listMax xs ys listMax [] (y : ys) = y : listMax [] ys listMax (x : xs) [] = x : listMax xs [] listMax [] [] = [] preliminaryWidths :: [[Maybe Float]] -> [Maybe Float] -> [Maybe Float] preliminaryWidths l k = foldl (listMax) k l standardColumnWitdh :: [Anything Char] -> Maybe Float standardColumnWitdh a = if columns > columnsWithDefinedWidth then if sumOfDefinedWidths > 1.0 then Nothing else Just $ (1.0 - sumOfDefinedWidths) / fromIntegral (columns - columnsWithDefinedWidth) else if sumOfDefinedWidths > 1.001 then Nothing else Just 0.0 where l = rawWidths a columns = numberOfColumns a columnsWithDefinedWidth = length (filter isJust l) sumOfDefinedWidths = sum (map fromJust (filter isJust l)) rawWidths :: [Anything Char] -> [Maybe Float] rawWidths a = (preliminaryWidths (raggedArrayOfWidths a []) (initialListofWidths a)) {- fallback function for the width of columns if the precompilation procedure for the width of columns is not available yet. So particularly when the precompilation is just running. Takes the parse tree representation of the table as first input parameter. Returns the list of widths of the columns. I am not documenting its subfunctions since the final width of the columns don't have anything to do with the width calculated here, still these preliminary widths are needed for the precompilation procedure -} columnWidths :: [Anything Char] -> [Float] columnWidths a = w where l = rawWidths a m = numberOfColumns a mf = fromIntegral m f = 1.0 - (scalefactor mf) w = fromMaybe (concat (replicate m [f / mf])) $ do ww <- standardColumnWitdh a return [x * f | x <- map (fromMaybe ww) l] {-| part of the correction calculation for the space between columns inside a table. Takes the number of columns as first input parameter returns a scaling factor -} scalefactor :: (Fractional a, Ord a) => a -> a scalefactor n | n <= 10 = 12.8 * (n) / 448.0 scalefactor _ = 12.8 * (11.0) / 448.0 {-| part of the correction calculation for the space between columns inside a table. Takes the number of columns as first input parameter returns a scaling factor -} tableScale :: Int -> Float tableScale nColumns = (1.0 / n) * (1.0 - (scalefactor n)) where n = fromIntegral nColumns {-| returns the latex environment name for a table environments. It the float passes as first parameter it 1.0 the result is longtable, otherwise it is tabular. This function is usually called with the width of the current cell in units of the line width as first parameter. Outside any table this value is 1.0 inside a table it is always smaller than 1.0. So this function will return tabular in case of a nested table, which is necessary since longtables can not be nested inside other longtables, but tabulars can be nested within longtables as well as tabulars. -} tableEnvironment :: Float -> String tableEnvironment 1.0 = "longtable" tableEnvironment _ = "tabular" innerTableSpecifier :: [Float] -> String -> String innerTableSpecifier (f : xs) t = ">{\\RaggedRight}p{" ++ (printf "%0.5f" f) ++ "\\linewidth}" ++ t ++ (innerTableSpecifier xs t) innerTableSpecifier [] _ = [] {-| Returns the table header which represents the width of the columns of a table in units of the line width. It takes a list of width as second parameter. It is understood that necessary correction for the width to compensate for the space needed by separations of columns have already been applied. The is the first boolean parameter is true rules will be drawn in the table, otherwise they won't. See also documentation of the wdth3 function in the module LatexRenderer. -} tableSpecifier :: Bool -> [Float] -> String tableSpecifier True f = '|' : (innerTableSpecifier f "|") tableSpecifier False f = (innerTableSpecifier f "") {-| Takes the multirowmap as first input parameter. See documentation on the function multiRowDictChangeStart in this module for details on the multirowmap. It returns true if there are currently no multirow cells active in the given multirowdict -} myempty :: Map Int (Int, Int) -> Bool myempty d = [x | x <- Map.toList d, (fst (snd x)) /= 0] == [] {-| takes the string found in the header symbol of a table or the opening tag of the html table tag. That is the place where the attributes are, but only understood as string so without parsing the attributes as map, and returns a boolean. It this boolean is true the rules of the table need to be drawn, otherwise they must not be drawn -} seperatingLinesRequested :: String -> Bool seperatingLinesRequested s = (isInfixOf2 "Prettytable" (map toLower s)) || (isInfixOf2 "prettytable" (map toLower s)) || (isInfixOf2 "wikitable" (map toLower s)) {-| returns the latex symbol for a horizontal line on the last row of a table, that is a horizontal rule, if the first boolean parameter is true, otherwise the empty string is returned. This function is usually being called with the first parameter indicating whether or not rules should be drawn with the table -} rowDelimiter :: Bool -> String rowDelimiter True = "\\\\ \\hline" rowDelimiter False = "" {-| returns the latex symbol for a horizontal line in a table, that is a horizontal rule, if the first boolean parameter is true, otherwise the empty string is returned. This function is usually being called with the first parameter indicating whether or not rules should be drawn with the table -} horizontalLine :: Bool -> String horizontalLine True = " \\hline" horizontalLine False = "" {-| return the latex symbol for a partly drawn inner horizontal line in a table, that is a horizontal rule. It has to be drawn only partly since multirow cells intersect with it. The second parameter is the multirowmap (the documentation on the function multiRowDictChangeStart in this module for details). The third parameter is the total number of columns in the table. The first parameter is a and index that is incremented during the course of this function and has to be 1 when called this function from outside -} makeCLines :: Int -> Map Int (Int, Int) -> Int -> [Char] makeCLines m d t = if m <= t then fromMaybe def $ do (a, b) <- Map.lookup m d guard (a /= 0) return $ makeCLines (m + (if b > 0 then b else 1)) d t else "" where def = "\\cline{" ++ (show m) ++ "-" ++ (show m) ++ "}" ++ (makeCLines (m + 1) d t) {-| return the latex symbol for an inner horizontal line in a table, that is a horizontal rule. If the first boolean parameter is true the rule is drawn otherwise it is not. If multirow cells interfere with this rule the rule is only drawn in parts as required. The second parameter is the multirowmap (the documentation on the function multiRowDictChangeStart in this module for details). The third parameter is the total number of columns in the table -} innerHorizontalLine :: Bool -> Map Int (Int, Int) -> Int -> String innerHorizontalLine b d m = if b then if myempty d then horizontalLine True else ' ' : makeCLines 1 d m else "" {-| the symbol in latex for separating columns. It is returned if the fist boolean parameter is true otherwise the empty string is returned. This function is usually called with the first parameter being true if the current column was not the first column of a row since the symbol is not needed before the start of the first column of a row. This is a contrast to html where the first cell of a row has its own td or th tag. The mediawiki markup notation is similar to html in this respect. -} columnSeperator :: Bool -> String columnSeperator True = "&" columnSeperator False = "" {-| takes the parse result of the attributes of a th tag or td tag or a corresponding header column separator or column separator as second parameter. It takes a key for this map of attributes as string as first parameter. If the key is present in the map, and the value found in the map at that key can be parsed as an Integer that integer is returned. If no value for the key could be found in the map or it could not be parsed as an integer the value Nothing of the Maybe monad is returned. -} genMultiplicity :: String -> [Anything Char] -> Maybe Int genMultiplicity s = msum . (map f) where f (Environment Attribute (Attr (k, v)) _) = listToMaybe $ do (z, _) <- reads v guard (k == s) return z f _ = Nothing {-| takes the parse result of the attributes of a th tag or td tag or a corresponding header column separator or column separator as second parameter. It takes a key for this map of attributes as string as first parameter. It returns a result of the lookup of the key in the map (so the value). as string wrapped into the Maybe monad. If no value for the key could be found in the map it returns the value Nothing of the Maybe monad. -} genLookup :: String -> [Anything Char] -> Maybe String genLookup s = msum . (map f) where f (Environment Attribute (Attr (k, v)) _) = listToMaybe $ do return () guard (k == s) return v f _ = Nothing {-| the column multiplicity of the current cell. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements. The result is an integer wrapped into the maybe monad. The value Nothing of the Maybe monad is returned if the attribute colspan is not present (or did not have a value parseable as Integer) within the first parameter -} columnMultiplicity :: [Anything Char] -> Maybe Int columnMultiplicity = genMultiplicity "colspan" {-| the row multiplicity of the current cell. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements. The result is an integer wrapped into the maybe monad. The value Nothing of the Maybe monad is returned if the attribute rowspan is not present (or did not have a value parseable as Integer) within the first parameter -} rowMultiplicity :: [Anything Char] -> Maybe Int rowMultiplicity = genMultiplicity "rowspan" {-| the column multiplicity of the current cell. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements.The result is an integer that default to zero -} columnMultiplicityForCounting :: [Anything Char] -> Int columnMultiplicityForCounting = (fromMaybe 1) . columnMultiplicity {-| return the symbol for the start of a multicolumn cell in latex. The first parameter is the parse result of the inner part of the column separator of header column separator, that corresponds to the attributes of the th or td html elements. It takes the list of the final widths of all columns of the table as second parameter. It takes to the column index of the current column as third parameter. The fourth parameter is a boolean if it is true rules will be drawn in the table otherwise they won't. The fifth parameter is the table state. That is the mutable state that exists during rendering of a table. -} multiColumnStartSymbol :: [Anything Char] -> [Float] -> Int -> Bool -> TableState -> String multiColumnStartSymbol l f i t st = fromMaybe "" $ do n <- columnMultiplicity l return $ "\\multicolumn{" ++ (show n) ++ "}{" ++ (spec n) ++ "}{" where spec mm = case activeColumn st of Nothing -> tableSpecifier t (mylist mm) _ -> "l" mylist nn = [min 1.0 (((1.0 - (scalefactor 1)) * (sum (take nn (drop (i - 1) f)))) / (1.0 - (scalefactor (fromIntegral nn))))] {-| return the symbol for the end of a multicolumn cell in latex. The first boolean parameter tells if the cell is actually a multicolumn cell. If it is false the empty string is returned instead -} multiColumnEndSymbol :: Bool -> String multiColumnEndSymbol True = "}" multiColumnEndSymbol False = "" {-| return the symbol for the end of a multirow cell in latex. The first boolean parameter tells if the cell is actually a multirow cell. If it is false the empty string is returned instead -} multiRowEndSymbol :: Bool -> String multiRowEndSymbol True = "}" multiRowEndSymbol False = "" {-| This function takes a default value as first parameter. It takes a predicate as second parameter. It take a map from Int to a two tuple of Int as firth parameter. It takes a key for that map as fourth parameter. It takes a function mapping a two tuple of Int to the same type as the default value as third parameter. It tries to find a two tuple of Ints (that is value) in the map under the given key. If it finds one and the predicate returns true on the first element of that two tuple it returns the result of the function on the two tuple. In any other case it returns the default value -} withDefault :: t -> (Int -> Bool) -> (Int -> Int -> t) -> Int -> Map Int (Int, Int) -> t withDefault def p f i d = fromMaybe def $ do (a, b) <- Map.lookup i d guard $ p a return $ f a b {-| this function return the vertical separator for column for the table header in latex. The only parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't -} verticalSeperator :: Bool -> [Char] verticalSeperator True = "|" verticalSeperator False = "" {-| the function returns the a string to be inserted into a latex document for multirows at a when a new column (that is a new cell) starts, thats when a column separator, or header column separator is encountered. The first parameter it the index of the current column. The second parameter is the multirowdict (see documentation of the multiRowDictChangeStart function in this module). The third parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't -} multiRowSymbol :: Int -> Map Int (Int, Int) -> Bool -> String multiRowSymbol i d t = withDefault "" (> 0) (\ _ b -> "\\multicolumn{" ++ (show b) ++ "}{" ++ (verticalSeperator t) ++ "c" ++ (verticalSeperator t) ++ "}{}&" ++ (multiRowSymbol (i + b) d t)) i d {-| the function returns the a string to be inserted into a latex document for multirows at a when a new row starts, thats when a row separator is encountered. The first parameter it the index of the current column. The second parameter is the multirowdict (see documentation of the multiRowDictChangeStart function in this module). The third parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't -} multiRowSymbolForRowSep :: Int -> Map Int (Int, Int) -> Bool -> String multiRowSymbolForRowSep i d t = withDefault "" (> 0) (\ _ b -> "&\\multicolumn{" ++ (show b) ++ "}{" ++ (verticalSeperator t) ++ "c" ++ (verticalSeperator t) ++ "}{}" ++ (multiRowSymbolForRowSep (i + b) d) t) i d {-| the function returns the a string to be inserted into a latex document for multirows at the end of the table. The first parameter it the index of the current column. The second parameter is the multirowdict (see documentation of the multiRowDictChangeStart function in this module). The third parameter is a boolean. If it is true rules will be drawn in the table, otherwise they won't -} multiRowSymbolForTableEnd :: Int -> Map Int (Int, Int) -> Bool -> String multiRowSymbolForTableEnd i d t = withDefault "" (> 0) (\ _ b -> "&\\multicolumn{" ++ (show b) ++ "}{" ++ (verticalSeperator t) ++ "c" ++ (verticalSeperator t) ++ "}{}" ++ (multiRowSymbolForTableEnd (i + 1) d t)) i d {-| in case of a multirow, that cell has to be skipped further down. So if I got a multirow in row 1 column 2 with a rowspan of 2 or more I need to expand row 2 column 1 by 1 . So if I passed row 2 column one I am not in row 2 column 2 since that is where the multirow cell resides, I am rather in row 2 cell 3. And if there are more multicolumns involved I am more possible even further right. So this function just tell me how many cells I have to skip. The first parameter is the index of the current column. The second parameter is the multirowdict. See also documentation on the function multiRowDictChangeStart in this module -} multiRowCount :: Int -> Map Int (Int, Int) -> Int multiRowCount i d = withDefault 0 (/= 0) (\ _ b -> b + (multiRowCount (i + 1) d)) i d {-| see documentation on multiRowDictChangeStart. This function take the index of the current column as first parameter. This function takes the multiRowDict as first parameter and returns the modified version of it. -} multiRowDictChangeEnd :: Int -> Map Int (Int, Int) -> Map Int (Int, Int) multiRowDictChangeEnd i d = withDefault d (/= 0) (\ a b -> multiRowDictChangeEnd (i + 1) (Map.insert i (a - 1, b) d)) i d {-| The multiRowDict is a facility for keeping track of cells spanning multiple rows. It is stored as mutable state in the type TableState in the parameter multiRowMap. It is passed to this function as second parameter. This function return an updated version of it. It is a map mapping and Int to a tuple whose both elements are also ints. It the key is the column index and the value is a pair (rowmultiplicity, columnmultiplicity). The rowmultiplicity is the number of rows the cell spans. This number is decrease every time a row ends. So it actually says how many rows the columns spans further down from the current column. The column multiplicity is the number of columns the cell spans. This function take the index of the current column as first parameter. This function takes the parse result of the opening part of the cell environment of the current cell as third input parameter. This function calculates only the changes in the multirowdict for the opening environment of the cell. You should not use this function but rather use multiRowDictChange since this also considers the effect by ending of cells -} multiRowDictChangeStart :: Int -> Map Int (Int, Int) -> [Anything Char] -> Map Int (Int, Int) multiRowDictChangeStart i d l = fromMaybe d $ do n <- rowMultiplicity l return (Map.insert i ((n - 1), c) d) where c = (columnMultiplicityForCounting l) {-| calculate the full change to the multirowdict. See documentation on the function multiRowDictChangeStart in this module for more information on the multirowdict. The first parameter is the index of the current column. The second parameter is the current multirowdict. This function takes the parse result of the opening part of the cell environment of the current cell as third input parameter. This function returns the updated multirowdict. -} multiRowDictChange :: Int -> Map Int (Int, Int) -> [Anything Char] -> Map Int (Int, Int) multiRowDictChange i d l = multiRowDictChangeStart n (multiRowDictChangeEnd i d) l where n = i + (multiRowCount i d) {-| returns the latex symbol for the start of a multirow cell. That is a cell spanning multiple rows. The second parameter is activeColumn. This is an integer wrapped in the maybe monad. If it is has the value Just then the row is to be renderer in a special mode. This mode is needed to determine the width of the columns. In this special mode no line break occur and width of the paper is infinite, so that the width of the of each column is its respective natural width. So there is no limit on the width of the column. If the value is Nothing this means that the table is typeset in normal mode and the width is to be limited. The first parameter is the inner parse result of the row separator containing information on whether or not the cell is multirow -} multiRowStartSymbol :: [Anything Char] -> Maybe Int -> String multiRowStartSymbol l m = fromMaybe "" $ do n <- rowMultiplicity l return $ "\\multirow{" ++ (show n) ++ "}{" ++ (if isJust m then "*" else "\\linewidth") ++ "}{" {-| a symbol to be added at the end of header cell in order to make its content bold. The only boolean parameter is use to indicate whether the cell currently under consideration is a header cell. Otherwise the empty string is returned. -} headendsym :: Bool -> String headendsym False = "" headendsym True = "}" {-| a symbol to be added at the start of header cell in order to make its content bold -} headstartsym :: String headstartsym = "{\\bfseries " {-| the symbol to be inserted into the latex document, for the end of a row in a table. The first boolean parameter is should be true if the current table is not nested in an other one. The second boolean parameter should be true if the column was the last column of the header of the table. The header of the table is repeated by latex each time page wrapping inside the table occurs. See also documentation of the longtable latex package -} rowendsymb :: Bool -> Bool -> String rowendsymb True True = "\\endhead " rowendsymb _ _ = "\\\\"