module Codec.Epub.Opf.Metadata.Parse
( parseXmlToOpf
, parseEpubOpf
)
where
import Control.Applicative
import Control.Monad.Error
import Data.Tree.NTree.TypeDefs ( NTree )
import Prelude hiding ( cos )
import Text.XML.HXT.Arrow
import Codec.Epub.IO
import Codec.Epub.Opf.Metadata
atTag :: (ArrowXml a) => String -> a (NTree XNode) XmlTree
atTag tag = deep (isElem >>> hasName tag)
atQTag :: (ArrowXml a) => QName -> a (NTree XNode) XmlTree
atQTag tag = deep (isElem >>> hasQName tag)
text :: (ArrowXml a) => a (NTree XNode) String
text = getChildren >>> getText
notNullA :: (ArrowList a) => a [b] [b]
notNullA = isA $ not . null
mbQTagText :: (ArrowXml a) => QName -> a (NTree XNode) (Maybe String)
mbQTagText tag =
( atQTag tag >>>
text >>> notNullA >>> arr Just )
`orElse`
(constA Nothing)
mbGetAttrValue :: (ArrowXml a) =>
String -> a XmlTree (Maybe String)
mbGetAttrValue n =
(getAttrValue n >>> notNullA >>> arr Just)
`orElse` (constA Nothing)
mbGetQAttrValue :: (ArrowXml a) =>
QName -> a XmlTree (Maybe String)
mbGetQAttrValue qn =
(getQAttrValue qn >>> notNullA >>> arr Just)
`orElse` (constA Nothing)
dcName, opfName, xmlName :: String -> QName
dcName local = mkQName "dc" local "http://purl.org/dc/elements/1.1/"
opfName local = mkQName "opf" local "http://www.idpf.org/2007/opf"
xmlName local = mkQName "xml" local "http://www.w3.org/XML/1998/namespace"
getPackage :: (ArrowXml a) => a (NTree XNode) (String, String)
getPackage = atTag "package" >>>
proc x -> do
v <- getAttrValue "version" -< x
u <- getAttrValue "unique-identifier" -< x
returnA -< (v, u)
getTitle :: (ArrowXml a) => a (NTree XNode) EMTitle
getTitle = atQTag (dcName "title") >>>
proc x -> do
l <- mbGetQAttrValue (xmlName "lang") -< x
c <- text -< x
returnA -< EMTitle l c
getCreator :: (ArrowXml a) => String -> a (NTree XNode) EMCreator
getCreator tag = atQTag (dcName tag) >>> ( unwrapArrow $ EMCreator
<$> (WrapArrow $ mbGetQAttrValue (opfName "role"))
<*> (WrapArrow $ mbGetQAttrValue (opfName "file-as"))
<*> (WrapArrow $ text)
)
getSubject :: (ArrowXml a) => a (NTree XNode) String
getSubject = atQTag (dcName "subject") >>> text
getDescription :: (ArrowXml a) => a (NTree XNode) (Maybe String)
getDescription = mbQTagText $ dcName "description"
getPublisher :: (ArrowXml a) => a (NTree XNode) (Maybe String)
getPublisher = mbQTagText $ dcName "publisher"
getDate :: (ArrowXml a) => a (NTree XNode) EMDate
getDate = atQTag (dcName "date") >>>
proc x -> do
e <- mbGetQAttrValue (opfName "event") -< x
c <- text -< x
returnA -< EMDate e c
getType :: (ArrowXml a) => a (NTree XNode) (Maybe String)
getType = mbQTagText $ dcName "type"
getFormat :: (ArrowXml a) => a (NTree XNode) (Maybe String)
getFormat = mbQTagText $ dcName "format"
getId :: (ArrowXml a) => a (NTree XNode) EMId
getId = atQTag (dcName "identifier") >>>
proc x -> do
mbi <- mbGetAttrValue "id" -< x
s <- mbGetQAttrValue (opfName "scheme") -< x
c <- text -< x
let i = maybe "[WARNING: missing required id attribute]" id mbi
returnA -< EMId i s c
getSource :: (ArrowXml a) => a (NTree XNode) (Maybe String)
getSource = mbQTagText $ dcName "source"
getLang :: (ArrowXml a) => a (NTree XNode) String
getLang = atQTag (dcName "language") >>> text
getRelation :: (ArrowXml a) => a (NTree XNode) (Maybe String)
getRelation = mbQTagText $ dcName "relation"
getCoverage :: (ArrowXml a) => a (NTree XNode) (Maybe String)
getCoverage = mbQTagText $ dcName "coverage"
getRights :: (ArrowXml a) => a (NTree XNode) (Maybe String)
getRights = mbQTagText $ dcName "rights"
getMeta :: (ArrowXml a) => a (NTree XNode) EpubMeta
getMeta = atTag "metadata" >>> ( unwrapArrow $ EpubMeta
<$> (WrapArrow $ listA getTitle)
<*> (WrapArrow $ listA $ getCreator "creator")
<*> (WrapArrow $ listA $ getCreator "contributor")
<*> (WrapArrow $ listA getSubject)
<*> (WrapArrow $ getDescription)
<*> (WrapArrow $ getPublisher)
<*> (WrapArrow $ listA getDate)
<*> (WrapArrow $ getType)
<*> (WrapArrow $ getFormat)
<*> (WrapArrow $ listA getId)
<*> (WrapArrow $ getSource)
<*> (WrapArrow $ listA getLang)
<*> (WrapArrow $ getRelation)
<*> (WrapArrow $ getCoverage)
<*> (WrapArrow $ getRights)
)
getBookData :: (ArrowXml a) => a (NTree XNode) OPFPackage
getBookData =
proc x -> do
(v, u) <- getPackage -< x
m <- getMeta -< x
returnA -< OPFPackage v u m
parseXmlToOpf :: (MonadIO m) => String -> m [OPFPackage]
parseXmlToOpf opfContents =
liftIO $ runX (
readString [(a_validate, v_0)] opfContents
>>> propagateNamespaces
>>> getBookData
)
parseEpubOpf :: (MonadIO m, MonadError String m) =>
FilePath -> m OPFPackage
parseEpubOpf zipPath = do
opfContents <- extractFileFromZip zipPath =<< opfPath zipPath
result <- parseXmlToOpf opfContents
case result of
(em : []) -> return em
_ -> throwError
"ERROR: we didn't come up with a single EpubMeta"