{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} {-# LANGUAGE TemplateHaskell #-} -- | Streaming parsers for the RSS 1.0 standard. module Text.RSS1.Conduit.Parse ( -- * Top-level rss1Document -- * Elements , rss1ChannelItems , rss1Image , rss1Item , rss1TextInput ) where -- {{{ Imports import Text.RSS.Types import Conduit hiding (throwM) import Control.Exception.Safe as Exception import Control.Monad import Control.Monad.Fix import Data.Conduit import Data.Text as Text import Data.Text.Encoding import Data.Time.Clock import Data.Time.LocalTime import Data.Time.RFC3339 import Data.Version import Data.XML.Types import Lens.Simple import Text.XML.Stream.Parse import URI.ByteString -- }}} -- {{{ Util asDate :: (MonadThrow m) => Text -> m UTCTime asDate text = maybe (throw $ InvalidTime text) (return . zonedTimeToUTC) $ parseTimeRFC3339 text asRssURI :: (MonadThrow m) => Text -> m RssURI asRssURI t = case (parseURI' t, parseRelativeRef' t) of (Right u, _) -> return $ RssURI u (_, Right u) -> return $ RssURI u (_, Left e) -> throwM $ InvalidURI e where parseURI' = parseURI laxURIParserOptions . encodeUtf8 parseRelativeRef' = parseRelativeRef laxURIParserOptions . encodeUtf8 nullURI :: RssURI nullURI = RssURI $ RelativeRef Nothing "" (Query []) Nothing headRequiredC :: MonadThrow m => Text -> Consumer a m a headRequiredC e = maybe (throw $ MissingElement e) return =<< headC projectC :: Monad m => Fold a a' b b' -> Conduit a m b projectC prism = fix $ \recurse -> do item <- await case (item, item ^? (_Just . prism)) of (_, Just a) -> yield a >> recurse (Just _, _) -> recurse _ -> return () contentTag :: MonadThrow m => Text -> AttrParser a -> (a -> ConduitM Event o m b) -> ConduitM Event o m (Maybe b) contentTag string = tag' (matching (== contentName string)) dcTag :: MonadThrow m => Text -> AttrParser a -> (a -> ConduitM Event o m b) -> ConduitM Event o m (Maybe b) dcTag string = tag' (matching (== dcName string)) rdfTag :: MonadThrow m => Text -> AttrParser a -> (a -> ConduitM Event o m b) -> ConduitM Event o m (Maybe b) rdfTag string = tag' (matching (== rdfName string)) rss1Tag :: MonadThrow m => Text -> AttrParser a -> (a -> ConduitM Event o m b) -> ConduitM Event o m (Maybe b) rss1Tag string = tag' (matching (== rss1Name string)) contentName :: Text -> Name contentName string = Name string (Just "http://purl.org/rss/1.0/modules/content/") (Just "content") dcName :: Text -> Name dcName string = Name string (Just "http://purl.org/dc/elements/1.1/") (Just "dc") rdfName :: Text -> Name rdfName string = Name string (Just "http://www.w3.org/1999/02/22-rdf-syntax-ns#") (Just "rdf") rss1Name :: Text -> Name rss1Name string = Name string (Just "http://purl.org/rss/1.0/") Nothing -- }}} data TextInputPiece = TextInputTitle Text | TextInputDescription Text | TextInputName Text | TextInputLink RssURI makeTraversals ''TextInputPiece -- | Parse a @\@ element. rss1TextInput :: (MonadThrow m) => ConduitM Event o m (Maybe RssTextInput) rss1TextInput = rss1Tag "textinput" attributes $ \uri -> (manyYield' (choose piece) =$= parser uri) <* many ignoreAnyTreeContent where parser uri = getZipConduit $ RssTextInput <$> ZipConduit (projectC _TextInputTitle =$= headRequiredC "Missing element") <*> ZipConduit (projectC _TextInputDescription =$= headRequiredC "Missing <description> element") <*> ZipConduit (projectC _TextInputName =$= headRequiredC "Missing <name> element") <*> ZipConduit (projectC _TextInputLink =$= headDefC uri) -- Lenient piece = [ fmap TextInputTitle <$> rss1Tag "title" ignoreAttrs (const content) , fmap TextInputDescription <$> rss1Tag "description" ignoreAttrs (const content) , fmap TextInputName <$> rss1Tag "name" ignoreAttrs (const content) , fmap TextInputLink <$> rss1Tag "link" ignoreAttrs (const $ content >>= asRssURI) ] attributes = (requireAttr (rdfName "about") >>= asRssURI) <* ignoreAttrs data ItemPiece = ItemTitle Text | ItemLink RssURI | ItemDescription Text | ItemCreator Text | ItemDate UTCTime | ItemContent Text makeTraversals ''ItemPiece -- | Parse an @\<item\>@ element. rss1Item :: MonadThrow m => ConduitM Event o m (Maybe RssItem) rss1Item = rss1Tag "item" attributes $ \uri -> (manyYield' (choose piece) =$= parser uri) <* many ignoreAnyTreeContent where parser uri = getZipConduit $ RssItem <$> ZipConduit (projectC _ItemTitle =$= headDefC mempty) <*> (Just <$> ZipConduit (projectC _ItemLink =$= headDefC uri)) <*> ZipConduit (projectC _ItemDescription =$= headDefC mempty) <*> ZipConduit (projectC _ItemCreator =$= headDefC mempty) <*> pure mempty <*> pure mzero <*> pure mempty <*> pure mzero <*> ZipConduit (projectC _ItemDate =$= headC) <*> pure mzero piece = [ fmap ItemTitle <$> rss1Tag "title" ignoreAttrs (const content) , fmap ItemLink <$> rss1Tag "link" ignoreAttrs (const $ content >>= asRssURI) , fmap ItemDescription <$> (rss1Tag "description" ignoreAttrs (const content) `orE` contentTag "encoded" ignoreAttrs (const content)) , fmap ItemCreator <$> dcTag "creator" ignoreAttrs (const content) , fmap ItemDate <$> dcTag "date" ignoreAttrs (const $ content >>= asDate) ] attributes = (requireAttr (rdfName "about") >>= asRssURI) <* ignoreAttrs data ImagePiece = ImageUri RssURI | ImageTitle Text | ImageLink RssURI makeTraversals ''ImagePiece -- | Parse an @\<image\>@ element. rss1Image :: (MonadThrow m) => ConduitM Event o m (Maybe RssImage) rss1Image = rss1Tag "image" attributes $ \uri -> (manyYield' (choose piece) =$= parser uri) <* many ignoreAnyTreeContent where parser uri = getZipConduit $ RssImage <$> ZipConduit (projectC _ImageUri =$= headDefC uri) -- Lenient <*> ZipConduit (projectC _ImageTitle =$= headDefC "Unnamed image") -- Lenient <*> ZipConduit (projectC _ImageLink =$= headDefC nullURI) -- Lenient <*> pure mzero <*> pure mzero <*> pure mempty piece = [ fmap ImageUri <$> rss1Tag "url" ignoreAttrs (const $ content >>= asRssURI) , fmap ImageTitle <$> rss1Tag "title" ignoreAttrs (const content) , fmap ImageLink <$> rss1Tag "link" ignoreAttrs (const $ content >>= asRssURI) ] attributes = (requireAttr (rdfName "about") >>= asRssURI) <* ignoreAttrs -- | Parse an @\<items\>@ element. rss1ChannelItems :: MonadThrow m => ConduitM Event o m (Maybe [Text]) rss1ChannelItems = fmap join $ rss1Tag "items" ignoreAttrs $ const $ rdfTag "Seq" ignoreAttrs $ const $ many $ rdfTag "li" attributes return where attributes = requireAttr (rdfName "resource") <* ignoreAttrs data Rss1Channel = Rss1Channel { channelId' :: RssURI , channelTitle' :: Text , channelLink' :: RssURI , channelDescription' :: Text , channelItems' :: [Text] , channelImage' :: Maybe RssImage , channelTextInput' :: Maybe RssURI } data ChannelPiece = ChannelTitle Text | ChannelLink RssURI | ChannelDescription Text | ChannelImage RssImage | ChannelItems [Text] | ChannelTextInput RssURI makeTraversals ''ChannelPiece -- | Parse a @\<channel\>@ element. rss1Channel :: MonadThrow m => ConduitM Event o m (Maybe Rss1Channel) rss1Channel = rss1Tag "channel" attributes $ \channelId -> (manyYield' (choose piece) =$= parser channelId) <* many ignoreAnyTreeContent where parser channelId = getZipConduit $ Rss1Channel channelId <$> ZipConduit (projectC _ChannelTitle =$= headRequiredC "Missing <title> element") <*> ZipConduit (projectC _ChannelLink =$= headRequiredC "Missing <link> element") <*> ZipConduit (projectC _ChannelDescription =$= headDefC "") -- Lenient <*> ZipConduit (projectC _ChannelItems =$= concatC =$= sinkList) <*> ZipConduit (projectC _ChannelImage =$= headC) <*> ZipConduit (projectC _ChannelTextInput =$= headC) piece = [ fmap ChannelTitle <$> rss1Tag "title" ignoreAttrs (const content) , fmap ChannelLink <$> rss1Tag "link" ignoreAttrs (const $ content >>= asRssURI) , fmap ChannelDescription <$> rss1Tag "description" ignoreAttrs (const content) , fmap ChannelItems <$> rss1ChannelItems , fmap ChannelImage <$> rss1Image , fmap ChannelTextInput <$> rss1Tag "textinput" (requireAttr (rdfName "resource") >>= asRssURI) return ] attributes = (requireAttr (rdfName "about") >>= asRssURI) <* ignoreAttrs data Rss1Document = Rss1Document Rss1Channel (Maybe RssImage) [RssItem] (Maybe RssTextInput) rss1ToRss2 :: Rss1Document -> RssDocument rss1ToRss2 (Rss1Document channel image items textInput) = RssDocument (Version [1] []) (channelTitle' channel) (channelLink' channel) (channelDescription' channel) items mempty mempty mempty mempty mzero mzero mzero mempty mzero mzero mzero image mempty textInput mempty mempty data DocumentPiece = DocumentChannel Rss1Channel | DocumentImage RssImage | DocumentItem RssItem | DocumentTextInput RssTextInput makeTraversals ''DocumentPiece -- | Parse an @\<RDF\>@ element. rss1Document :: MonadThrow m => ConduitM Event o m (Maybe RssDocument) rss1Document = fmap (fmap rss1ToRss2) $ rdfTag "RDF" ignoreAttrs $ const $ (manyYield' (choose piece) =$= parser) <* many ignoreAnyTreeContent where parser = getZipConduit $ Rss1Document <$> ZipConduit (projectC _DocumentChannel =$= headRequiredC "Missing <channel> element") <*> ZipConduit (projectC _DocumentImage =$= headC) <*> ZipConduit (projectC _DocumentItem =$= sinkList) <*> ZipConduit (projectC _DocumentTextInput =$= headC) piece = [ fmap DocumentChannel <$> rss1Channel , fmap DocumentImage <$> rss1Image , fmap DocumentItem <$> rss1Item , fmap DocumentTextInput <$> rss1TextInput ]