{-# OPTIONS_GHC -fno-warn-deprecations #-} {- Copyright (C) 2006-8 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Writers.HTML Copyright : Copyright (C) 2006-8 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Conversion of 'Pandoc' documents to HTML. -} module Text.Pandoc.Writers.HTML ( writeHtml , writeHtmlString ) where import Text.Pandoc.Definition import Text.Pandoc.LaTeXMathML import Text.Pandoc.CharacterReferences ( decodeCharacterReferences ) import Text.Pandoc.Shared import Text.Pandoc.Readers.TeXMath import Text.Pandoc.Highlighting ( highlightHtml, defaultHighlightingCss ) import Numeric ( showHex ) import Data.Char ( ord, toLower, isAlpha ) import Data.List ( isPrefixOf, intercalate ) import qualified Data.Set as S import Control.Monad.State import Text.XHtml.Transitional hiding ( stringToHtml ) data WriterState = WriterState { stNotes :: [Html] -- ^ List of notes , stIds :: [String] -- ^ List of header identifiers , stMath :: Bool -- ^ Math is used in document , stCSS :: S.Set String -- ^ CSS to include in header } deriving Show defaultWriterState :: WriterState defaultWriterState = WriterState {stNotes= [], stIds = [], stMath = False, stCSS = S.empty} -- Helpers to render HTML with the appropriate function. render :: (HTML html) => WriterOptions -> html -> String render opts = if writerWrapText opts then renderHtml else showHtml renderFragment :: (HTML html) => WriterOptions -> html -> String renderFragment opts = if writerWrapText opts then renderHtmlFragment else showHtmlFragment -- | Slightly modified version of Text.XHtml's stringToHtml. -- Only uses numerical entities for 0xff and greater. -- Adds  . stringToHtml :: String -> Html stringToHtml = primHtml . concatMap fixChar where fixChar '<' = "<" fixChar '>' = ">" fixChar '&' = "&" fixChar '"' = """ fixChar '\160' = " " fixChar c | ord c < 0xff = [c] fixChar c = "&#" ++ show (ord c) ++ ";" -- | Convert Pandoc document to Html string. writeHtmlString :: WriterOptions -> Pandoc -> String writeHtmlString opts = if writerStandalone opts then render opts . writeHtml opts else renderFragment opts . writeHtml opts -- | Convert Pandoc document to Html structure. writeHtml :: WriterOptions -> Pandoc -> Html writeHtml opts (Pandoc (Meta tit authors date) blocks) = let titlePrefix = writerTitlePrefix opts topTitle = evalState (inlineListToHtml opts tit) defaultWriterState topTitle' = if null titlePrefix then topTitle else if null tit then stringToHtml titlePrefix else titlePrefix +++ " - " +++ topTitle metadata = thetitle topTitle' +++ meta ! [httpequiv "Content-Type", content "text/html; charset=UTF-8"] +++ meta ! [name "generator", content "pandoc"] +++ (toHtmlFromList $ map (\a -> meta ! [name "author", content a]) authors) +++ (if null date then noHtml else meta ! [name "date", content date]) titleHeader = if writerStandalone opts && not (null tit) && not (writerS5 opts) then h1 ! [theclass "title"] $ topTitle else noHtml headerBlocks = filter isHeaderBlock blocks ids = uniqueIdentifiers $ map (\(Header _ lst) -> lst) headerBlocks toc = if writerTableOfContents opts then tableOfContents opts headerBlocks ids else noHtml (blocks', newstate) = runState (blockListToHtml opts blocks) (defaultWriterState {stIds = ids}) cssLines = stCSS newstate css = if S.null cssLines then noHtml else style ! [thetype "text/css"] $ primHtml $ '\n':(unlines $ S.toList cssLines) math = if stMath newstate then case writerHTMLMathMethod opts of LaTeXMathML Nothing -> primHtml latexMathMLScript LaTeXMathML (Just url) -> script ! [src url, thetype "text/javascript"] $ noHtml _ -> noHtml else noHtml head' = header $ metadata +++ math +++ css +++ primHtml (writerHeader opts) notes = reverse (stNotes newstate) before = primHtml $ writerIncludeBefore opts after = primHtml $ writerIncludeAfter opts thebody = before +++ titleHeader +++ toc +++ blocks' +++ footnoteSection notes +++ after in if writerStandalone opts then head' +++ body thebody else thebody -- | Construct table of contents from list of header blocks and identifiers. -- Assumes there are as many identifiers as header blocks. tableOfContents :: WriterOptions -> [Block] -> [String] -> Html tableOfContents _ [] _ = noHtml tableOfContents opts headers ids = let opts' = opts { writerIgnoreNotes = True } contentsTree = hierarchicalize headers contents = evalState (mapM (elementToListItem opts') contentsTree) (defaultWriterState {stIds = ids}) in thediv ! [identifier "toc"] $ unordList contents -- | Converts an Element to a list item for a table of contents, -- retrieving the appropriate identifier from state. elementToListItem :: WriterOptions -> Element -> State WriterState Html elementToListItem _ (Blk _) = return noHtml elementToListItem opts (Sec headerText subsecs) = do st <- get let ids = stIds st let (id', rest) = if null ids then ("", []) else (head ids, tail ids) put $ st {stIds = rest} txt <- inlineListToHtml opts headerText subHeads <- mapM (elementToListItem opts) subsecs let subList = if null subHeads then noHtml else unordList subHeads return $ (anchor ! [href ("#" ++ id'), identifier ("TOC-" ++ id')] $ txt) +++ subList -- | Convert list of Note blocks to a footnote
. -- Assumes notes are sorted. footnoteSection :: [Html] -> Html footnoteSection notes = if null notes then noHtml else thediv ! [theclass "footnotes"] $ hr +++ (olist << notes) -- | Parse a mailto link; return Just (name, domain) or Nothing. parseMailto :: String -> Maybe (String, String) parseMailto ('m':'a':'i':'l':'t':'o':':':addr) = let (name', rest) = span (/='@') addr domain = drop 1 rest in Just (name', domain) parseMailto _ = Nothing -- | Obfuscate a "mailto:" link using Javascript. obfuscateLink :: WriterOptions -> String -> String -> Html obfuscateLink opts txt s = let s' = map toLower s in case parseMailto s' of (Just (name', domain)) -> let domain' = substitute "." " dot " domain at' = obfuscateChar '@' (linkText, altText) = if txt == drop 7 s' -- autolink then ("''+e+''", name' ++ " at " ++ domain') else ("'" ++ txt ++ "'", txt ++ " (" ++ name' ++ " at " ++ domain' ++ ")") in if writerStrictMarkdown opts then -- need to use primHtml or &'s are escaped to & in URL primHtml $ "" ++ (obfuscateString txt) ++ "" else (script ! [thetype "text/javascript"] $ primHtml ("\n\n")) +++ noscript (primHtml $ obfuscateString altText) _ -> anchor ! [href s] $ primHtml txt -- malformed email -- | Obfuscate character as entity. obfuscateChar :: Char -> String obfuscateChar char = let num = ord char numstr = if even num then show num else "x" ++ showHex num "" in "&#" ++ numstr ++ ";" -- | Obfuscate string using entities. obfuscateString :: String -> String obfuscateString = concatMap obfuscateChar . decodeCharacterReferences -- | True if character is a punctuation character (unicode). isPunctuation :: Char -> Bool isPunctuation c = let c' = ord c in if c `elem` "!\"'()*,-./:;<>?[\\]`{|}~" || c' >= 0x2000 && c' <= 0x206F || c' >= 0xE000 && c' <= 0xE0FF then True else False -- | Add CSS for document header. addToCSS :: String -> State WriterState () addToCSS item = do st <- get let current = stCSS st put $ st {stCSS = S.insert item current} -- | Convert Pandoc inline list to plain text identifier. inlineListToIdentifier :: [Inline] -> String inlineListToIdentifier = dropWhile (not . isAlpha) . inlineListToIdentifier' inlineListToIdentifier' :: [Inline] -> [Char] inlineListToIdentifier' [] = "" inlineListToIdentifier' (x:xs) = xAsText ++ inlineListToIdentifier' xs where xAsText = case x of Str s -> filter (\c -> c == '-' || not (isPunctuation c)) $ intercalate "-" $ words $ map toLower s Emph lst -> inlineListToIdentifier' lst Strikeout lst -> inlineListToIdentifier' lst Superscript lst -> inlineListToIdentifier' lst SmallCaps lst -> inlineListToIdentifier' lst Subscript lst -> inlineListToIdentifier' lst Strong lst -> inlineListToIdentifier' lst Quoted _ lst -> inlineListToIdentifier' lst Cite _ lst -> inlineListToIdentifier' lst Code s -> s Space -> "-" EmDash -> "-" EnDash -> "-" Apostrophe -> "" Ellipses -> "" LineBreak -> "-" Math _ _ -> "" TeX _ -> "" HtmlInline _ -> "" Link lst _ -> inlineListToIdentifier' lst Image lst _ -> inlineListToIdentifier' lst Note _ -> "" -- | Return unique identifiers for list of inline lists. uniqueIdentifiers :: [[Inline]] -> [String] uniqueIdentifiers ls = let addIdentifier (nonuniqueIds, uniqueIds) l = let new = inlineListToIdentifier l matches = length $ filter (== new) nonuniqueIds new' = (if null new then "section" else new) ++ if matches > 0 then ("-" ++ show matches) else "" in (new:nonuniqueIds, new':uniqueIds) in reverse $ snd $ foldl addIdentifier ([],[]) ls -- | Convert Pandoc block element to HTML. blockToHtml :: WriterOptions -> Block -> State WriterState Html blockToHtml _ Null = return $ noHtml blockToHtml opts (Plain lst) = inlineListToHtml opts lst blockToHtml opts (Para lst) = inlineListToHtml opts lst >>= (return . paragraph) blockToHtml _ (RawHtml str) = return $ primHtml str blockToHtml _ (HorizontalRule) = return $ hr blockToHtml _ (CodeBlock attr@(_,classes,_) rawCode) = do case highlightHtml attr rawCode of Left _ -> -- change leading newlines into
tags, because some -- browsers ignore leading newlines in pre blocks let (leadingBreaks, rawCode') = span (=='\n') rawCode in return $ pre ! (if null classes then [] else [theclass $ unwords classes]) $ thecode << (replicate (length leadingBreaks) br +++ [stringToHtml $ rawCode' ++ "\n"]) Right h -> addToCSS defaultHighlightingCss >> return h blockToHtml opts (BlockQuote blocks) = -- in S5, treat list in blockquote specially -- if default is incremental, make it nonincremental; -- otherwise incremental if writerS5 opts then let inc = not (writerIncremental opts) in case blocks of [BulletList lst] -> blockToHtml (opts {writerIncremental = inc}) (BulletList lst) [OrderedList attribs lst] -> blockToHtml (opts {writerIncremental = inc}) (OrderedList attribs lst) _ -> blockListToHtml opts blocks >>= (return . blockquote) else blockListToHtml opts blocks >>= (return . blockquote) blockToHtml opts (Header level lst) = do contents <- inlineListToHtml opts lst st <- get let ids = stIds st let (id', rest) = if null ids then ("", []) else (head ids, tail ids) put $ st {stIds = rest} let attribs = if writerStrictMarkdown opts && not (writerTableOfContents opts) then [] else [identifier id'] let contents' = if writerTableOfContents opts then anchor ! [href ("#TOC-" ++ id')] $ contents else contents return $ case level of 1 -> h1 contents' ! attribs 2 -> h2 contents' ! attribs 3 -> h3 contents' ! attribs 4 -> h4 contents' ! attribs 5 -> h5 contents' ! attribs 6 -> h6 contents' ! attribs _ -> paragraph contents' ! attribs blockToHtml opts (BulletList lst) = do contents <- mapM (blockListToHtml opts) lst let attribs = if writerIncremental opts then [theclass "incremental"] else [] return $ unordList ! attribs $ contents blockToHtml opts (OrderedList (startnum, numstyle, _) lst) = do contents <- mapM (blockListToHtml opts) lst let numstyle' = camelCaseToHyphenated $ show numstyle let attribs = (if writerIncremental opts then [theclass "incremental"] else []) ++ (if startnum /= 1 then [start startnum] else []) ++ (if numstyle /= DefaultStyle then [thestyle $ "list-style-type: " ++ numstyle' ++ ";"] else []) return $ ordList ! attribs $ contents blockToHtml opts (DefinitionList lst) = do contents <- mapM (\(term, def) -> do term' <- inlineListToHtml opts term def' <- blockListToHtml opts def return $ (term', def')) lst let attribs = if writerIncremental opts then [theclass "incremental"] else [] return $ defList ! attribs $ contents blockToHtml opts (Table capt aligns widths headers rows') = do let alignStrings = map alignmentToString aligns captionDoc <- if null capt then return noHtml else inlineListToHtml opts capt >>= return . caption colHeads <- colHeadsToHtml opts alignStrings widths headers rows'' <- mapM (tableRowToHtml opts alignStrings) rows' return $ table $ captionDoc +++ colHeads +++ rows'' colHeadsToHtml :: WriterOptions -> [[Char]] -> [Double] -> [[Block]] -> State WriterState Html colHeadsToHtml opts alignStrings widths headers = do heads <- sequence $ zipWith3 (\alignment columnwidth item -> tableItemToHtml opts th alignment columnwidth item) alignStrings widths headers return $ tr $ toHtmlFromList heads alignmentToString :: Alignment -> [Char] alignmentToString alignment = case alignment of AlignLeft -> "left" AlignRight -> "right" AlignCenter -> "center" AlignDefault -> "left" tableRowToHtml :: WriterOptions -> [[Char]] -> [[Block]] -> State WriterState Html tableRowToHtml opts aligns columns = (sequence $ zipWith3 (tableItemToHtml opts td) aligns (repeat 0) columns) >>= return . tr . toHtmlFromList tableItemToHtml :: WriterOptions -> (Html -> Html) -> [Char] -> Double -> [Block] -> State WriterState Html tableItemToHtml opts tag' align' width' item = do contents <- blockListToHtml opts item let attrib = [align align'] ++ if width' /= 0 then [thestyle ("width: " ++ (show (truncate (100 * width') :: Integer)) ++ "%;")] else [] return $ tag' ! attrib $ contents blockListToHtml :: WriterOptions -> [Block] -> State WriterState Html blockListToHtml opts lst = mapM (blockToHtml opts) lst >>= return . toHtmlFromList -- | Convert list of Pandoc inline elements to HTML. inlineListToHtml :: WriterOptions -> [Inline] -> State WriterState Html inlineListToHtml opts lst = mapM (inlineToHtml opts) lst >>= return . toHtmlFromList -- | Convert Pandoc inline element to HTML. inlineToHtml :: WriterOptions -> Inline -> State WriterState Html inlineToHtml opts inline = case inline of (Str str) -> return $ stringToHtml str (Space) -> return $ stringToHtml " " (LineBreak) -> return $ br (EmDash) -> return $ primHtmlChar "mdash" (EnDash) -> return $ primHtmlChar "ndash" (Ellipses) -> return $ primHtmlChar "hellip" (Apostrophe) -> return $ primHtmlChar "rsquo" (Emph lst) -> inlineListToHtml opts lst >>= return . emphasize (Strong lst) -> inlineListToHtml opts lst >>= return . strong (Code str) -> return $ thecode << str (Strikeout lst) -> inlineListToHtml opts lst >>= return . (thespan ! [thestyle "text-decoration: line-through;"]) (SmallCaps lst) -> inlineListToHtml opts lst >>= return . (thespan ! [thestyle "font-variant: small-caps;"]) (Superscript lst) -> inlineListToHtml opts lst >>= return . sup (Subscript lst) -> inlineListToHtml opts lst >>= return . sub (Quoted quoteType lst) -> let (leftQuote, rightQuote) = case quoteType of SingleQuote -> (primHtmlChar "lsquo", primHtmlChar "rsquo") DoubleQuote -> (primHtmlChar "ldquo", primHtmlChar "rdquo") in do contents <- inlineListToHtml opts lst return $ leftQuote +++ contents +++ rightQuote (Math t str) -> modify (\st -> st {stMath = True}) >> (case writerHTMLMathMethod opts of LaTeXMathML _ -> return $ if t == InlineMath then primHtml ("$" ++ str ++ "$") else primHtml ("$$" ++ str ++ "$$") MimeTeX url -> return $ image ! [src (url ++ "?" ++ str), alt str, title str] GladTeX -> return $ primHtml $ "" ++ str ++ "" PlainMath -> inlineListToHtml opts (readTeXMath str) >>= return . (thespan ! [theclass "math"])) (TeX str) -> case writerHTMLMathMethod opts of LaTeXMathML _ -> do modify (\st -> st {stMath = True}) return $ primHtml str _ -> return noHtml (HtmlInline str) -> return $ primHtml str (Link [Code str] (s,_)) | "mailto:" `isPrefixOf` s -> return $ obfuscateLink opts str s (Link txt (s,_)) | "mailto:" `isPrefixOf` s -> do linkText <- inlineListToHtml opts txt return $ obfuscateLink opts (show linkText) s (Link txt (s,tit)) -> do linkText <- inlineListToHtml opts txt return $ anchor ! ([href s] ++ if null tit then [] else [title tit]) $ linkText (Image txt (s,tit)) -> do alternate <- inlineListToHtml opts txt let alternate' = renderFragment opts alternate let attributes = [src s] ++ (if null tit then [] else [title tit]) ++ if null txt then [] else [alt alternate'] return $ image ! attributes -- note: null title included, as in Markdown.pl (Note contents) -> do st <- get let notes = stNotes st let number = (length notes) + 1 let ref = show number htmlContents <- blockListToNote opts ref contents -- push contents onto front of notes put $ st {stNotes = (htmlContents:notes)} return $ anchor ! [href ("#fn" ++ ref), theclass "footnoteRef", identifier ("fnref" ++ ref)] << sup << ref (Cite _ il) -> inlineListToHtml opts il blockListToNote :: WriterOptions -> String -> [Block] -> State WriterState Html blockListToNote opts ref blocks = -- If last block is Para or Plain, include the backlink at the end of -- that block. Otherwise, insert a new Plain block with the backlink. let backlink = [HtmlInline $ " "] blocks' = if null blocks then [] else let lastBlock = last blocks otherBlocks = init blocks in case lastBlock of (Para lst) -> otherBlocks ++ [Para (lst ++ backlink)] (Plain lst) -> otherBlocks ++ [Plain (lst ++ backlink)] _ -> otherBlocks ++ [lastBlock, Plain backlink] in do contents <- blockListToHtml opts blocks' return $ li ! [identifier ("fn" ++ ref)] $ contents