module Codec.Epub.IO
( getPkgXmlFromZip
, getPkgPathXmlFromZip
, getPkgPathXmlFromBS
, getPkgPathXmlFromDir
, mkEpubArchive
, readArchive
, writeArchive
)
where
import Codec.Archive.Zip
import Control.Arrow.ListArrows ( (>>>), deep )
import Control.Exception
import Control.Monad.Except
import qualified Data.ByteString.Char8 as BS
import Data.ByteString.Lazy ( fromChunks )
import qualified Data.ByteString.Lazy as B
import qualified Data.ByteString.Lazy.Char8 as BL
import Data.List
import System.Directory
import System.FilePath
import Text.XML.HXT.Arrow.ReadDocument ( readString )
import Text.XML.HXT.Arrow.XmlArrow ( getAttrValue, hasName, isElem )
import Text.XML.HXT.Arrow.XmlState ( no, runX, withValidate )
import Codec.Epub.Util
locateRootFile :: (MonadIO m, MonadError String m) =>
FilePath -> String -> m FilePath
locateRootFile containerPath' containerDoc = do
result <- liftIO $ runX (
readString [withValidate no] containerDoc
>>> deep (isElem >>> hasName "rootfile")
>>> getAttrValue "full-path"
)
case result of
(p : []) -> return p
_ -> throwError $
"ERROR: rootfile full-path missing from " ++ containerPath'
fileFromArchive :: MonadError String m =>
FilePath -> Archive -> m String
fileFromArchive filePath archive = do
let mbEntry = findEntryByPath filePath archive
maybe
(throwError $ "Unable to locate file " ++ filePath)
(return . BL.unpack . fromEntry) mbEntry
containerPath :: FilePath
containerPath = "META-INF/container.xml"
getPkgPathXmlFromBS :: (MonadError String m, MonadIO m)
=> BS.ByteString
-> m (FilePath, String)
getPkgPathXmlFromBS strictBytes = do
let lazyBytes = fromChunks [strictBytes]
result <- liftIO $ ( try $ evaluate
(toArchive lazyBytes) :: IO (Either SomeException Archive) )
archive <- either (throwError . show) return result
containerDoc <- fileFromArchive containerPath archive
let cleanedContents = removeIllegalStartChars . removeEncoding
. removeDoctype $ containerDoc
rootPath <- locateRootFile containerPath cleanedContents
rootContents <- fileFromArchive rootPath archive
return (rootPath, rootContents)
getPkgPathXmlFromZip :: (MonadError String m, MonadIO m)
=> FilePath
-> m (FilePath, String)
getPkgPathXmlFromZip zipPath = do
zipFileBytes <- liftIO $ BS.readFile zipPath
getPkgPathXmlFromBS zipFileBytes
getPkgXmlFromZip :: (MonadError String m, MonadIO m)
=> FilePath
-> m String
getPkgXmlFromZip zipPath = snd `liftM` getPkgPathXmlFromZip zipPath
getPkgPathXmlFromDir :: (MonadError String m, MonadIO m)
=> FilePath
-> m (FilePath, String)
getPkgPathXmlFromDir dir = do
liftIO $ setCurrentDirectory dir
containerDoc <- liftIO $ readFile containerPath
rootPath <- locateRootFile (dir </> containerPath) containerDoc
rootContents <- liftIO $ readFile rootPath
return (rootPath, rootContents)
getRecursiveContents :: FilePath -> IO [FilePath]
getRecursiveContents parent = do
fullContents <- getDirectoryContents parent
let contents = filter (not . isPrefixOf ".") fullContents
paths <- forM contents $ \name -> do
let path = parent </> name
isDirectory <- doesDirectoryExist path
if isDirectory
then getRecursiveContents path
else return [path]
return $ concat paths
mkEpubArchive :: FilePath -> IO Archive
mkEpubArchive rootDir = do
setCurrentDirectory rootDir
let mimetype = ["mimetype"]
allFiles <- getRecursiveContents "."
let restFiles = allFiles \\ mimetype
flip (addFilesToArchive [OptRecursive]) restFiles >=>
flip (addFilesToArchive []) ["mimetype"]
$ emptyArchive
readArchive :: FilePath -> IO Archive
readArchive = fmap toArchive . B.readFile
writeArchive :: FilePath -> Archive -> IO ()
writeArchive zipPath = (B.writeFile zipPath) . fromArchive