module Codec.Epub.Parse
( getGuide
, getManifest
, getMetadata
, getPackage
, getSpine
)
where
import Control.Arrow.ListArrows
import Control.Monad.Error
import Text.Regex
import Text.XML.HXT.Arrow.Namespace ( propagateNamespaces )
import Text.XML.HXT.Arrow.XmlState ( no, runX, withValidate )
import Text.XML.HXT.Arrow.XmlState.TypeDefs
import Text.XML.HXT.Arrow.ReadDocument ( readString )
import Text.XML.HXT.DOM.TypeDefs
import Codec.Epub.Data.Guide
import Codec.Epub.Data.Manifest
import Codec.Epub.Data.Metadata
import Codec.Epub.Data.Package
import Codec.Epub.Data.Spine
import Codec.Epub.Parse.Guide
import Codec.Epub.Parse.Manifest
import Codec.Epub.Parse.Metadata
import Codec.Epub.Parse.Package
import Codec.Epub.Parse.Refinements
import Codec.Epub.Parse.Spine
removeIllegalStartChars :: String -> String
removeIllegalStartChars = dropWhile (/= '<')
removeEncoding :: String -> String
removeEncoding = flip (subRegex
(mkRegexWithOpts " +encoding=\"UTF-8\"" False False)) ""
removeDoctype :: String -> String
removeDoctype = flip (subRegex
(mkRegexWithOpts "<!DOCTYPE [^>]*>" False True)) ""
performParse :: (MonadIO m, MonadError String m) =>
IOSLA (XIOState ()) XmlTree b -> String -> m b
performParse parser contents = do
let cleanedContents = removeIllegalStartChars . removeEncoding
. removeDoctype $ contents
result <- liftIO $ runX (
readString [withValidate no] cleanedContents
>>> propagateNamespaces
>>> parser
)
case result of
(r : []) -> return r
_ -> throwError
"ERROR: FIXME with a better message"
getGuide :: (MonadIO m, MonadError String m) =>
String -> m [GuideRef]
getGuide = performParse guideP
getManifest :: (MonadIO m, MonadError String m) =>
String -> m Manifest
getManifest = performParse manifestP
getMetadata :: (MonadIO m, MonadError String m) =>
String -> m Metadata
getMetadata opfContents = do
refinements <- performParse refinementsP opfContents
rawMd <- performParse (metadataP refinements) opfContents
return rawMd
getPackage :: (MonadIO m, MonadError String m) =>
String -> m Package
getPackage = performParse packageP
getSpine :: (MonadIO m, MonadError String m) =>
String -> m Spine
getSpine = performParse spineP