{- Copyright (C) 2010 John MacFarlane This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with this program; if not, write to the Free Software Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -} {- | Module : Text.Pandoc.Writers.EPUB Copyright : Copyright (C) 2010 John MacFarlane License : GNU GPL, version 2 or above Maintainer : John MacFarlane Stability : alpha Portability : portable Conversion of 'Pandoc' documents to EPUB. -} module Text.Pandoc.Writers.EPUB ( writeEPUB ) where import Data.IORef import Data.Maybe ( fromMaybe, isNothing ) import Data.List ( findIndices, isPrefixOf ) import System.Environment ( getEnv ) import System.FilePath ( (), takeBaseName, takeExtension ) import qualified Data.ByteString.Lazy as B import Data.ByteString.Lazy.UTF8 ( fromString ) import Codec.Archive.Zip import System.Time import Text.Pandoc.Shared hiding ( Element ) import Text.Pandoc.Definition import Text.Pandoc.Generic import Control.Monad (liftM) import Text.XML.Light hiding (ppTopElement) import Text.Pandoc.UUID import Text.Pandoc.Writers.HTML import Text.Pandoc.Writers.Markdown ( writePlain ) import Data.Char ( toLower ) -- | Produce an EPUB file from a Pandoc document. writeEPUB :: Maybe String -- ^ EPUB stylesheet specified at command line -> WriterOptions -- ^ Writer options -> Pandoc -- ^ Document to convert -> IO B.ByteString writeEPUB mbStylesheet opts doc@(Pandoc meta _) = do (TOD epochtime _) <- getClockTime let mkEntry path content = toEntry path epochtime content let opts' = opts{ writerEmailObfuscation = NoObfuscation , writerStandalone = True , writerWrapText = False } let sourceDir = writerSourceDirectory opts' -- title page let vars = writerVariables opts' let tpContent = fromString $ writeHtmlString opts'{writerTemplate = pageTemplate ,writerVariables = ("titlepage","yes"):vars} (Pandoc meta []) let tpEntry = mkEntry "title_page.xhtml" tpContent -- handle pictures picsRef <- newIORef [] Pandoc _ blocks <- liftM (bottomUp transformBlock) $ bottomUpM (transformInlines (writerHTMLMathMethod opts) sourceDir picsRef) doc pics <- readIORef picsRef let readPicEntry (oldsrc, newsrc) = readEntry [] oldsrc >>= \e -> return e{ eRelativePath = newsrc } picEntries <- mapM readPicEntry pics -- body pages let isH1 (Header 1 _) = True isH1 _ = False let h1Indices = dropWhile (== 0) $ findIndices isH1 blocks let chunks = splitByIndices h1Indices blocks let titleize (Header 1 xs : ys) = Pandoc meta{docTitle = xs} ys titleize xs = Pandoc meta xs let chapToHtml = writeHtmlString opts'{ writerTemplate = pageTemplate , writerHTMLMathMethod = PlainMath } let chapters = map titleize chunks let chapterToEntry :: Int -> Pandoc -> Entry chapterToEntry num chap = mkEntry ("ch" ++ show num ++ ".xhtml") $ fromString $ chapToHtml chap let chapterEntries = zipWith chapterToEntry [1..] chapters -- contents.opf lang <- catch (liftM (takeWhile (/='.')) $ getEnv "lang") (\_ -> return "en-US") uuid <- getRandomUUID let chapterNode ent = unode "item" ! [("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", "application/xhtml+xml")] $ () let chapterRefNode ent = unode "itemref" ! [("idref", takeBaseName $ eRelativePath ent)] $ () let pictureNode ent = unode "item" ! [("id", takeBaseName $ eRelativePath ent), ("href", eRelativePath ent), ("media-type", fromMaybe "application/octet-stream" $ imageTypeOf $ eRelativePath ent)] $ () let plainify t = removeTrailingSpace $ writePlain opts'{ writerStandalone = False } $ Pandoc meta [Plain t] let plainTitle = plainify $ docTitle meta let plainAuthors = map plainify $ docAuthors meta let contentsData = fromString $ ppTopElement $ unode "package" ! [("version","2.0") ,("xmlns","http://www.idpf.org/2007/opf") ,("unique-identifier","BookId")] $ [ metadataElement (writerEPUBMetadata opts') uuid lang plainTitle plainAuthors , unode "manifest" $ [ unode "item" ! [("id","ncx"), ("href","toc.ncx") ,("media-type","application/x-dtbncx+xml")] $ () , unode "item" ! [("id","style"), ("href","stylesheet.css") ,("media-type","text/css")] $ () ] ++ map chapterNode (tpEntry : chapterEntries) ++ map pictureNode picEntries , unode "spine" ! [("toc","ncx")] $ map chapterRefNode (tpEntry : chapterEntries) ] let contentsEntry = mkEntry "content.opf" contentsData -- toc.ncx let navPointNode ent n tit = unode "navPoint" ! [("id", "navPoint-" ++ show n) ,("playOrder", show n)] $ [ unode "navLabel" $ unode "text" tit , unode "content" ! [("src", eRelativePath ent)] $ () ] let tocData = fromString $ ppTopElement $ unode "ncx" ! [("version","2005-1") ,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $ [ unode "head" [ unode "meta" ! [("name","dtb:uid") ,("content", show uuid)] $ () , unode "meta" ! [("name","dtb:depth") ,("content", "1")] $ () , unode "meta" ! [("name","dtb:totalPageCount") ,("content", "0")] $ () , unode "meta" ! [("name","dtb:maxPageNumber") ,("content", "0")] $ () ] , unode "docTitle" $ unode "text" $ plainTitle , unode "navMap" $ zipWith3 navPointNode (tpEntry : chapterEntries) [1..(length chapterEntries + 1)] ("Title Page" : map (\(Pandoc m _) -> plainify $ docTitle m) chapters) ] let tocEntry = mkEntry "toc.ncx" tocData -- mimetype let mimetypeEntry = mkEntry "mimetype" $ fromString "application/epub+zip" -- container.xml let containerData = fromString $ ppTopElement $ unode "container" ! [("version","1.0") ,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $ unode "rootfiles" $ unode "rootfile" ! [("full-path","content.opf") ,("media-type","application/oebps-package+xml")] $ () let containerEntry = mkEntry "META-INF/container.xml" containerData -- stylesheet stylesheet <- case mbStylesheet of Just s -> return s Nothing -> readDataFile (writerUserDataDir opts) "epub.css" let stylesheetEntry = mkEntry "stylesheet.css" $ fromString stylesheet -- construct archive let archive = foldr addEntryToArchive emptyArchive (mimetypeEntry : containerEntry : stylesheetEntry : tpEntry : contentsEntry : tocEntry : (picEntries ++ chapterEntries) ) return $ fromArchive archive metadataElement :: String -> UUID -> String -> String -> [String] -> Element metadataElement metadataXML uuid lang title authors = let userNodes = parseXML metadataXML elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/") ,("xmlns:opf","http://www.idpf.org/2007/opf")] $ filter isDublinCoreElement $ onlyElems userNodes dublinElements = ["contributor","coverage","creator","date", "description","format","identifier","language","publisher", "relation","rights","source","subject","title","type"] isDublinCoreElement e = qPrefix (elName e) == Just "dc" && qName (elName e) `elem` dublinElements contains e n = not (null (findElements (QName n Nothing (Just "dc")) e)) newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++ [ unode "dc:language" lang | not (elt `contains` "language") ] ++ [ unode "dc:identifier" ! [("id","BookId")] $ show uuid | not (elt `contains` "identifier") ] ++ [ unode "dc:creator" ! [("opf:role","aut")] $ a | a <- authors ] in elt{ elContent = elContent elt ++ map Elem newNodes } transformInlines :: HTMLMathMethod -> FilePath -> IORef [(FilePath, FilePath)] -- ^ (oldpath, newpath) images -> [Inline] -> IO [Inline] transformInlines _ _ _ (Image lab (src,_) : xs) | isNothing (imageTypeOf src) = return $ Emph lab : xs transformInlines _ sourceDir picsRef (Image lab (src,tit) : xs) = do pics <- readIORef picsRef let oldsrc = sourceDir src let ext = takeExtension src newsrc <- case lookup oldsrc pics of Just n -> return n Nothing -> do let new = "images/img" ++ show (length pics) ++ ext modifyIORef picsRef ( (oldsrc, new): ) return new return $ Image lab (newsrc, tit) : xs transformInlines (MathML _) _ _ (x@(Math _ _) : xs) = do let writeHtmlInline opts z = removeTrailingSpace $ writeHtmlString opts $ Pandoc (Meta [] [] []) [Plain [z]] mathml = writeHtmlInline defaultWriterOptions{ writerHTMLMathMethod = MathML Nothing } x fallback = writeHtmlInline defaultWriterOptions{ writerHTMLMathMethod = PlainMath } x inOps = "" ++ "" ++ mathml ++ "" ++ fallback ++ "" ++ "" result = if " Block transformBlock (RawBlock _ _) = Null transformBlock x = x (!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element (!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n) -- | Version of 'ppTopElement' that specifies UTF-8 encoding. ppTopElement :: Element -> String ppTopElement = ("\n" ++) . ppElement imageTypeOf :: FilePath -> Maybe String imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of "jpg" -> Just "image/jpeg" "jpeg" -> Just "image/jpeg" "jfif" -> Just "image/jpeg" "png" -> Just "image/png" "gif" -> Just "image/gif" "svg" -> Just "image/svg+xml" _ -> Nothing pageTemplate :: String pageTemplate = unlines [ "" , "" , "" , "" , "$title$" , "" , "" , "" , "$if(titlepage)$" , "

$title$

" , "$for(author)$" , "

$author$

" , "$endfor$" , "$else$" , "

$title$

" , "$body$" , "$endif$" , "" , "" ]