module Text.Pandoc.Writers.EPUB ( writeEPUB ) where
import Data.IORef
import Data.Maybe ( fromMaybe, isNothing )
import Data.List ( isInfixOf, intercalate )
import System.Environment ( getEnv )
import Text.Printf (printf)
import System.FilePath ( (</>), takeBaseName, takeExtension, takeFileName )
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as B8
import Text.Pandoc.UTF8 ( fromStringLazy, toString )
import Text.Pandoc.SelfContained ( makeSelfContained )
import Codec.Archive.Zip
import Data.Time.Clock.POSIX
import Data.Time
import System.Locale
import Text.Pandoc.Shared hiding ( Element )
import qualified Text.Pandoc.Shared as Shared
import Text.Pandoc.Builder (fromList, setMeta)
import Text.Pandoc.Options
import Text.Pandoc.Definition
import Text.Pandoc.Walk
import Control.Monad.State
import Text.XML.Light hiding (ppTopElement)
import Text.Pandoc.UUID
import Text.Pandoc.Writers.HTML
import Text.Pandoc.Writers.Markdown ( writePlain )
import Data.Char ( toLower )
import Network.URI ( unEscapeString )
import Text.Pandoc.MIME (getMimeType)
#if MIN_VERSION_base(4,6,0)
#else
import Prelude hiding (catch)
#endif
import Control.Exception (catch, SomeException)
import Text.Blaze.Html.Renderer.Utf8 (renderHtml)
data Chapter = Chapter (Maybe [Int]) [Block]
writeEPUB :: WriterOptions
-> Pandoc
-> IO B.ByteString
writeEPUB opts doc@(Pandoc meta _) = do
let version = maybe EPUB2 id (writerEpubVersion opts)
let epub3 = version == EPUB3
epochtime <- floor `fmap` getPOSIXTime
let mkEntry path content = toEntry path epochtime content
let vars = ("epub3", if epub3 then "true" else "false")
: ("css", "stylesheet.css")
: writerVariables opts
let opts' = opts{ writerEmailObfuscation = NoObfuscation
, writerStandalone = True
, writerSectionDivs = True
, writerHtml5 = epub3
, writerTableOfContents = False
, writerVariables = vars
, writerHTMLMathMethod =
if epub3
then MathML Nothing
else writerHTMLMathMethod opts
, writerWrapText = False }
let mbCoverImage = lookup "epub-cover-image" vars
(cpgEntry, cpicEntry) <-
case mbCoverImage of
Nothing -> return ([],[])
Just img -> do
let coverImage = "cover-image" ++ takeExtension img
let cpContent = renderHtml $ writeHtml opts'
(Pandoc meta [RawBlock (Format "html") $ "<div id=\"cover-image\">\n<img src=\"" ++ coverImage ++ "\" alt=\"cover image\" />\n</div>"])
imgContent <- B.readFile img
return ( [mkEntry "cover.xhtml" cpContent]
, [mkEntry coverImage imgContent] )
let tpContent = renderHtml $ writeHtml opts'{
writerVariables = ("titlepage","true"):vars }
(Pandoc meta [])
let tpEntry = mkEntry "title_page.xhtml" tpContent
picsRef <- newIORef []
Pandoc _ blocks <- walkM
(transformInline opts' picsRef) doc
pics <- readIORef picsRef
let readPicEntry entries (oldsrc, newsrc) = do
res <- fetchItem (writerSourceURL opts') oldsrc
case res of
Left _ -> do
warn $ "Could not find image `" ++ oldsrc ++ "', skipping..."
return entries
Right (img,_) -> return $
(toEntry newsrc epochtime $ B.fromChunks . (:[]) $ img) : entries
picEntries <- foldM readPicEntry [] pics
let mkFontEntry f = mkEntry (takeFileName f) `fmap` B.readFile f
fontEntries <- mapM mkFontEntry $ writerEpubFonts opts'
let blocks' = addIdentifiers
$ case blocks of
(Header 1 _ _ : _) -> blocks
_ -> Header 1 ("",["unnumbered"],[])
(docTitle meta) : blocks
let chapterHeaderLevel = writerEpubChapterLevel opts
let reftable = correlateRefs chapterHeaderLevel blocks'
let blocks'' = replaceRefs reftable blocks'
let isChapterHeader (Header n _ _) = n <= chapterHeaderLevel
isChapterHeader _ = False
let toChapters :: [Block] -> State [Int] [Chapter]
toChapters [] = return []
toChapters (Header n attr@(_,classes,_) ils : bs) = do
nums <- get
mbnum <- if "unnumbered" `elem` classes
then return Nothing
else case splitAt (n 1) nums of
(ks, (m:_)) -> do
let nums' = ks ++ [m+1]
put nums'
return $ Just (ks ++ [m])
(ks, []) -> do
let nums' = ks ++ [1]
put nums'
return $ Just ks
let (xs,ys) = break isChapterHeader bs
(Chapter mbnum (Header n attr ils : xs) :) `fmap` toChapters ys
toChapters (b:bs) = do
let (xs,ys) = break isChapterHeader bs
(Chapter Nothing (b:xs) :) `fmap` toChapters ys
let chapters = evalState (toChapters blocks'') []
let chapToEntry :: Int -> Chapter -> Entry
chapToEntry num (Chapter mbnum bs) = mkEntry (showChapter num)
$ renderHtml
$ writeHtml opts'{ writerNumberOffset =
maybe [] id mbnum }
$ case bs of
(Header _ _ xs : _) ->
Pandoc (setMeta "title" (fromList xs) nullMeta) bs
_ ->
Pandoc nullMeta bs
let chapterEntries = zipWith chapToEntry [1..] chapters
let containsMathML ent = "<math" `isInfixOf` (B8.unpack $ fromEntry ent)
localeLang <- catch (liftM (map (\c -> if c == '_' then '-' else c) .
takeWhile (/='.')) $ getEnv "LANG")
(\e -> let _ = (e :: SomeException) in return "en-US")
let lang = case lookup "lang" (writerVariables opts') of
Just x -> x
Nothing -> localeLang
uuid <- getRandomUUID
let chapterNode ent = unode "item" !
([("id", takeBaseName $ eRelativePath ent),
("href", eRelativePath ent),
("media-type", "application/xhtml+xml")]
++ [("properties","mathml") | epub3 &&
containsMathML ent]) $ ()
let chapterRefNode ent = unode "itemref" !
[("idref", takeBaseName $ eRelativePath ent)] $ ()
let pictureNode ent = unode "item" !
[("id", takeBaseName $ eRelativePath ent),
("href", eRelativePath ent),
("media-type", fromMaybe "application/octet-stream"
$ imageTypeOf $ eRelativePath ent)] $ ()
let fontNode ent = unode "item" !
[("id", takeBaseName $ eRelativePath ent),
("href", eRelativePath ent),
("media-type", maybe "" id $ getMimeType $ eRelativePath ent)] $ ()
let plainify t = trimr $
writePlain opts'{ writerStandalone = False } $
Pandoc meta [Plain t]
let plainTitle = plainify $ docTitle meta
let plainAuthors = map plainify $ docAuthors meta
currentTime <- getCurrentTime
let plainDate = maybe (showDateTimeISO8601 currentTime) id
$ normalizeDate $ stringify $ docDate meta
let contentsData = fromStringLazy $ ppTopElement $
unode "package" ! [("version", case version of
EPUB2 -> "2.0"
EPUB3 -> "3.0")
,("xmlns","http://www.idpf.org/2007/opf")
,("unique-identifier","BookId")] $
[ metadataElement version (writerEpubMetadata opts')
uuid lang plainTitle plainAuthors plainDate currentTime mbCoverImage
, unode "manifest" $
[ unode "item" ! [("id","ncx"), ("href","toc.ncx")
,("media-type","application/x-dtbncx+xml")] $ ()
, unode "item" ! [("id","style"), ("href","stylesheet.css")
,("media-type","text/css")] $ ()
, unode "item" ! ([("id","nav")
,("href","nav.xhtml")
,("media-type","application/xhtml+xml")] ++
[("properties","nav") | epub3 ]) $ ()
] ++
map chapterNode (cpgEntry ++ (tpEntry : chapterEntries)) ++
map pictureNode (cpicEntry ++ picEntries) ++
map fontNode fontEntries
, unode "spine" ! [("toc","ncx")] $
case mbCoverImage of
Nothing -> []
Just _ -> [ unode "itemref" !
[("idref", "cover"),("linear","no")] $ () ]
++ ((unode "itemref" ! [("idref", "title_page")
,("linear", if null (docTitle meta)
then "no"
else "yes")] $ ()) :
(unode "itemref" ! [("idref", "nav")
,("linear", if writerTableOfContents opts
then "yes"
else "no")] $ ()) :
map chapterRefNode chapterEntries)
, unode "guide" $
unode "reference" !
[("type","toc"),("title",plainTitle),("href","nav.xhtml")] $ ()
]
let contentsEntry = mkEntry "content.opf" contentsData
let secs = hierarchicalize blocks''
let tocLevel = writerTOCDepth opts
let navPointNode :: (Int -> String -> String -> [Element] -> Element)
-> Shared.Element -> State Int Element
navPointNode formatter (Sec _ nums (ident,_,_) ils children) = do
n <- get
modify (+1)
let showNums :: [Int] -> String
showNums = intercalate "." . map show
let tit' = plainify ils
let tit = if writerNumberSections opts && not (null nums)
then showNums nums ++ " " ++ tit'
else tit'
let src = case lookup ident reftable of
Just x -> x
Nothing -> error (ident ++ " not found in reftable")
let isSec (Sec lev _ _ _ _) = lev <= tocLevel
isSec _ = False
let subsecs = filter isSec children
subs <- mapM (navPointNode formatter) subsecs
return $ formatter n tit src subs
navPointNode _ (Blk _) = error "navPointNode encountered Blk"
let navMapFormatter :: Int -> String -> String -> [Element] -> Element
navMapFormatter n tit src subs = unode "navPoint" !
[("id", "navPoint-" ++ show n)
,("playOrder", show n)] $
[ unode "navLabel" $ unode "text" tit
, unode "content" ! [("src", src)] $ ()
] ++ subs
let tpNode = unode "navPoint" ! [("id", "navPoint-0")] $
[ unode "navLabel" $ unode "text" (plainify $ docTitle meta)
, unode "content" ! [("src","title_page.xhtml")] $ () ]
let tocData = fromStringLazy $ ppTopElement $
unode "ncx" ! [("version","2005-1")
,("xmlns","http://www.daisy.org/z3986/2005/ncx/")] $
[ unode "head" $
[ unode "meta" ! [("name","dtb:uid")
,("content", show uuid)] $ ()
, unode "meta" ! [("name","dtb:depth")
,("content", "1")] $ ()
, unode "meta" ! [("name","dtb:totalPageCount")
,("content", "0")] $ ()
, unode "meta" ! [("name","dtb:maxPageNumber")
,("content", "0")] $ ()
] ++ case mbCoverImage of
Nothing -> []
Just _ -> [unode "meta" ! [("name","cover"),
("content","cover-image")] $ ()]
, unode "docTitle" $ unode "text" $ plainTitle
, unode "navMap" $
tpNode : evalState (mapM (navPointNode navMapFormatter) secs) 1
]
let tocEntry = mkEntry "toc.ncx" tocData
let navXhtmlFormatter :: Int -> String -> String -> [Element] -> Element
navXhtmlFormatter n tit src subs = unode "li" !
[("id", "toc-li-" ++ show n)] $
(unode "a" ! [("href",src)]
$ (unode "span" tit))
: case subs of
[] -> []
(_:_) -> [unode "ol" ! [("class","toc")] $ subs]
let navtag = if epub3 then "nav" else "div"
let navData = fromStringLazy $ ppTopElement $
unode "html" ! [("xmlns","http://www.w3.org/1999/xhtml")
,("xmlns:epub","http://www.idpf.org/2007/ops")] $
[ unode "head" $
[ unode "title" plainTitle
, unode "link" ! [("rel","stylesheet"),("type","text/css"),("href","stylesheet.css")] $ () ]
, unode "body" $
unode navtag ! [("epub:type","toc") | epub3] $
[ unode "h1" ! [("id","toc-title")] $ plainTitle
, unode "ol" ! [("class","toc")] $ evalState (mapM (navPointNode navXhtmlFormatter) secs) 1]
]
let navEntry = mkEntry "nav.xhtml" navData
let mimetypeEntry = mkEntry "mimetype" $ fromStringLazy "application/epub+zip"
let containerData = fromStringLazy $ ppTopElement $
unode "container" ! [("version","1.0")
,("xmlns","urn:oasis:names:tc:opendocument:xmlns:container")] $
unode "rootfiles" $
unode "rootfile" ! [("full-path","content.opf")
,("media-type","application/oebps-package+xml")] $ ()
let containerEntry = mkEntry "META-INF/container.xml" containerData
let apple = fromStringLazy $ ppTopElement $
unode "display_options" $
unode "platform" ! [("name","*")] $
unode "option" ! [("name","specified-fonts")] $ "true"
let appleEntry = mkEntry "META-INF/com.apple.ibooks.display-options.xml" apple
stylesheet <- case writerEpubStylesheet opts of
Just s -> return s
Nothing -> toString `fmap`
readDataFile (writerUserDataDir opts) "epub.css"
let stylesheetEntry = mkEntry "stylesheet.css" $ fromStringLazy stylesheet
let archive = foldr addEntryToArchive emptyArchive
(mimetypeEntry : containerEntry : appleEntry : stylesheetEntry : tpEntry :
contentsEntry : tocEntry : navEntry :
(picEntries ++ cpicEntry ++ cpgEntry ++ chapterEntries ++ fontEntries))
return $ fromArchive archive
metadataElement :: EPUBVersion -> String -> UUID -> String -> String -> [String]
-> String -> UTCTime -> Maybe a -> Element
metadataElement version metadataXML uuid lang title authors date currentTime mbCoverImage =
let userNodes = parseXML metadataXML
elt = unode "metadata" ! [("xmlns:dc","http://purl.org/dc/elements/1.1/")
,("xmlns:opf","http://www.idpf.org/2007/opf")] $
filter isMetadataElement $ onlyElems userNodes
dublinElements = ["contributor","coverage","creator","date",
"description","format","identifier","language","publisher",
"relation","rights","source","subject","title","type"]
isMetadataElement e = (qPrefix (elName e) == Just "dc" &&
qName (elName e) `elem` dublinElements) ||
(qPrefix (elName e) == Nothing &&
qName (elName e) `elem` ["link","meta"])
contains e n = not (null (findElements (QName n Nothing (Just "dc")) e))
newNodes = [ unode "dc:title" title | not (elt `contains` "title") ] ++
[ unode "dc:language" lang | not (elt `contains` "language") ] ++
[ unode "dc:identifier" ! [("id","BookId")] $ show uuid |
not (elt `contains` "identifier") ] ++
[ unode "dc:creator" ! [("opf:role","aut") | version == EPUB2]
$ a | a <- authors, not (elt `contains` "creator") ] ++
[ unode "dc:date" date | not (elt `contains` "date") ] ++
[ unode "meta" ! [("property", "dcterms:modified")] $
(showDateTimeISO8601 currentTime) | version == EPUB3] ++
[ unode "meta" ! [("name","cover"), ("content","cover-image")] $ () |
not (isNothing mbCoverImage) ]
in elt{ elContent = elContent elt ++ map Elem newNodes }
showDateTimeISO8601 :: UTCTime -> String
showDateTimeISO8601 = formatTime defaultTimeLocale "%FT%TZ"
transformInline :: WriterOptions
-> IORef [(FilePath, FilePath)]
-> Inline
-> IO Inline
transformInline opts picsRef (Image lab (src,tit)) = do
let src' = unEscapeString src
pics <- readIORef picsRef
let oldsrc = maybe src' (</> src) $ writerSourceURL opts
let ext = takeExtension src'
newsrc <- case lookup oldsrc pics of
Just n -> return n
Nothing -> do
let new = "images/img" ++ show (length pics) ++ ext
modifyIORef picsRef ( (oldsrc, new): )
return new
return $ Image lab (newsrc, tit)
transformInline opts _ (x@(Math _ _))
| WebTeX _ <- writerHTMLMathMethod opts = do
raw <- makeSelfContained Nothing $ writeHtmlInline opts x
return $ RawInline (Format "html") raw
transformInline _ _ x = return x
writeHtmlInline :: WriterOptions -> Inline -> String
writeHtmlInline opts z = trimr $
writeHtmlString opts{ writerStandalone = False }
$ Pandoc nullMeta [Plain [z]]
(!) :: Node t => (t -> Element) -> [(String, String)] -> t -> Element
(!) f attrs n = add_attrs (map (\(k,v) -> Attr (unqual k) v) attrs) (f n)
ppTopElement :: Element -> String
ppTopElement = ("<?xml version=\"1.0\" encoding=\"UTF-8\"?>\n" ++) . unEntity . ppElement
where unEntity [] = ""
unEntity ('&':'#':xs) =
let (ds,ys) = break (==';') xs
rest = drop 1 ys
in case safeRead ('\'':'\\':ds ++ "'") of
Just x -> x : unEntity rest
Nothing -> '&':'#':unEntity xs
unEntity (x:xs) = x : unEntity xs
imageTypeOf :: FilePath -> Maybe String
imageTypeOf x = case drop 1 (map toLower (takeExtension x)) of
"jpg" -> Just "image/jpeg"
"jpeg" -> Just "image/jpeg"
"jfif" -> Just "image/jpeg"
"png" -> Just "image/png"
"gif" -> Just "image/gif"
"svg" -> Just "image/svg+xml"
_ -> Nothing
data IdentState = IdentState{
chapterNumber :: Int,
identTable :: [(String,String)]
} deriving (Read, Show)
showChapter :: Int -> String
showChapter = printf "ch%03d.xhtml"
addIdentifiers :: [Block] -> [Block]
addIdentifiers bs = evalState (mapM go bs) []
where go (Header n (ident,classes,kvs) ils) = do
ids <- get
let ident' = if null ident
then uniqueIdent ils ids
else ident
put $ ident' : ids
return $ Header n (ident',classes,kvs) ils
go x = return x
correlateRefs :: Int -> [Block] -> [(String,String)]
correlateRefs chapterHeaderLevel bs =
identTable $ execState (mapM_ go bs)
IdentState{ chapterNumber = 0
, identTable = [] }
where go :: Block -> State IdentState ()
go (Header n (ident,_,_) _) = do
when (n <= chapterHeaderLevel) $
modify $ \s -> s{ chapterNumber = chapterNumber s + 1 }
st <- get
let chapterid = showChapter (chapterNumber st) ++
if n <= chapterHeaderLevel
then ""
else '#' : ident
modify $ \s -> s{ identTable = (ident, chapterid) : identTable st }
go _ = return ()
replaceRefs :: [(String,String)] -> [Block] -> [Block]
replaceRefs refTable = walk replaceOneRef
where replaceOneRef x@(Link lab ('#':xs,tit)) =
case lookup xs refTable of
Just url -> Link lab (url,tit)
Nothing -> x
replaceOneRef x = x