module Text.Atom.Conduit.Parse
(
atomFeed
, atomEntry
, atomContent
, atomCategory
, atomLink
, atomGenerator
, atomSource
, atomPerson
, atomText
) where
import Text.Atom.Types
import Control.Applicative
import Control.Foldl hiding (mconcat, set)
import Control.Monad hiding (foldM)
import Control.Monad.Catch
import Data.Conduit.Parser
import Data.Conduit.Parser.XML
import Data.Maybe
import Data.Monoid
import Data.MonoTraversable
import Data.NonNull (NonNull, fromNullable, toNullable)
import Data.Text as Text (Text)
import Data.Text.Encoding
import Data.Time.Clock
import Data.Time.LocalTime
import Data.Time.RFC3339
import Data.XML.Types
import Lens.Simple
import Prelude hiding (last, lookup)
import Text.Parser.Combinators
import URI.ByteString
data AtomException = InvalidURI URIParseError
| NullElement
deriving instance Eq AtomException
deriving instance Show AtomException
instance Exception AtomException where
displayException (InvalidURI e) = "Invalid URI reference: " ++ show e
displayException NullElement = "Null element"
asURIReference :: (MonadThrow m) => Text -> m AtomURI
asURIReference t = case (parseURI' t, parseRelativeRef' t) of
(Right u, _) -> return $ AtomURI u
(_, Right u) -> return $ AtomURI u
(Left _, Left e) -> throwM $ InvalidURI e
where parseURI' = parseURI laxURIParserOptions . encodeUtf8
parseRelativeRef' = parseRelativeRef laxURIParserOptions . encodeUtf8
asNonNull :: (MonoFoldable a, MonadThrow m) => a -> m (NonNull a)
asNonNull = maybe (throwM NullElement) return . fromNullable
tagName' :: (MonadCatch m) => Text -> AttrParser a -> (a -> ConduitParser Event m b) -> ConduitParser Event m b
tagName' t = tagPredicate (\n -> nameLocalName n == t)
tagDate :: (MonadCatch m) => Text -> ConduitParser Event m UTCTime
tagDate name = tagIgnoreAttrs' name $ content (fmap zonedTimeToUTC . parseTimeRFC3339)
tagIgnoreAttrs' :: (MonadCatch m) => Text -> ConduitParser Event m a -> ConduitParser Event m a
tagIgnoreAttrs' name handler = tagName' name ignoreAttrs $ const handler
unknownTag :: (MonadCatch m) => ConduitParser Event m ()
unknownTag = anyTag $ \_ _ -> void $ many (void unknownTag <|> void textContent)
atomId :: (MonadCatch m) => ConduitParser Event m (NonNull Text)
atomId = tagIgnoreAttrs' "id" $ content asNonNull
atomIcon, atomLogo :: (MonadCatch m) => ConduitParser Event m AtomURI
atomIcon = tagIgnoreAttrs' "icon" $ content asURIReference
atomLogo = tagIgnoreAttrs' "logo" $ content asURIReference
lastRequired :: (Monad m, Parsing m) => String -> FoldM m a a
lastRequired e = FoldM (\_ a -> return $ Right a) (return $ Left e) (either unexpected return)
data PersonPiece = PersonName (NonNull Text)
| PersonEmail Text
| PersonUri AtomURI
| PersonUnknown
makeTraversals ''PersonPiece
atomPerson :: (MonadCatch m) => Text -> ConduitParser Event m AtomPerson
atomPerson name = named ("Atom person construct <" <> name <> ">") $ tagIgnoreAttrs' name $ do
p <- many piece
flip foldM p $ AtomPerson
<$> handlesM _PersonName (lastRequired "Missing or invalid <name> element.")
<*> generalize (handles _PersonEmail $ lastDef "")
<*> generalize (handles _PersonUri last)
where piece :: (MonadCatch m) => ConduitParser Event m PersonPiece
piece = choice [ PersonName <$> tagIgnoreAttrs' "name" (content asNonNull)
, PersonEmail <$> tagIgnoreAttrs' "email" textContent
, PersonUri <$> tagIgnoreAttrs' "uri" (content asURIReference)
, PersonUnknown <$ unknownTag
]
atomCategory :: (MonadCatch m) => ConduitParser Event m AtomCategory
atomCategory = tagName' "category" categoryAttrs $ \(t, s, l) -> do
term <- asNonNull t
return $ AtomCategory term s l
where categoryAttrs = (,,) <$> textAttr "term"
<*> (textAttr "scheme" <|> pure mempty)
<*> (textAttr "label" <|> pure mempty)
<* ignoreAttrs
atomContent :: (MonadCatch m) => ConduitParser Event m AtomContent
atomContent = tagName' "content" contentAttrs handler
where contentAttrs = (,) <$> optional (textAttr "type") <*> optional (attr "src" asURIReference) <* ignoreAttrs
handler (Just "xhtml", _) = AtomContentInlineXHTML <$> tagIgnoreAttrs' "div" textContent
handler (ctype, Just uri) = return $ AtomContentOutOfLine (fromMaybe mempty ctype) uri
handler (Just "html", _) = AtomContentInlineText TypeHTML <$> textContent
handler (Nothing, _) = AtomContentInlineText TypeText <$> textContent
handler (Just ctype, _) = AtomContentInlineOther ctype <$> textContent
atomLink :: (MonadCatch m) => ConduitParser Event m AtomLink
atomLink = tagName' "link" linkAttrs $ \(href, rel, ltype, lang, title, length') ->
return $ AtomLink href rel ltype lang title length'
where linkAttrs = (,,,,,) <$> attr "href" asURIReference
<*> (textAttr "rel" <|> pure mempty)
<*> (textAttr "type" <|> pure mempty)
<*> (textAttr "hreflang" <|> pure mempty)
<*> (textAttr "title" <|> pure mempty)
<*> (textAttr "length" <|> pure mempty)
<* ignoreAttrs
atomText :: (MonadCatch m) => Text -> ConduitParser Event m AtomText
atomText name = named ("Atom text construct <" <> name <> ">") $ tagName' name (optional (textAttr "type") <* ignoreAttrs) handler
where handler (Just "xhtml") = AtomXHTMLText <$> tagIgnoreAttrs' "div" xhtmlContent
handler (Just "html") = AtomPlainText TypeHTML <$> textContent
handler _ = AtomPlainText TypeText <$> textContent
xhtmlContent :: MonadCatch m => ConduitParser Event m Text
xhtmlContent = mconcat <$> many (textContent <|> anyTag (\name attrs -> renderTag name attrs <$> xhtmlContent))
renderTag name attrs content = "<" <> nameLocalName name <> renderAttrs attrs <> ">" <> content <> "</" <> nameLocalName name <> ">"
renderAttrs [] = ""
renderAttrs ((name, content):t) = " " <> nameLocalName name <> "=\"" <> mconcat (renderContent <$> content) <> "\"" <> renderAttrs t
renderContent (ContentText t) = t
renderContent (ContentEntity t) = t
atomGenerator :: (MonadCatch m) => ConduitParser Event m AtomGenerator
atomGenerator = tagName' "generator" generatorAttrs $ \(uri, version) -> AtomGenerator uri version <$> (asNonNull =<< textContent)
where generatorAttrs = (,) <$> optional (attr "uri" asURIReference) <*> (textAttr "version" <|> pure mempty) <* ignoreAttrs
data SourcePiece = SourceAuthor AtomPerson
| SourceCategory AtomCategory
| SourceContributor AtomPerson
| SourceGenerator AtomGenerator
| SourceIcon AtomURI
| SourceId Text
| SourceLink AtomLink
| SourceLogo AtomURI
| SourceRights AtomText
| SourceSubtitle AtomText
| SourceTitle AtomText
| SourceUpdated UTCTime
| SourceUnknown
makeTraversals ''SourcePiece
atomSource :: (MonadCatch m) => ConduitParser Event m AtomSource
atomSource = named "Atom <source> element" $ tagIgnoreAttrs' "source" $ do
p <- many piece
flip foldM p $ AtomSource
<$> generalize (handles _SourceAuthor list)
<*> generalize (handles _SourceCategory list)
<*> generalize (handles _SourceContributor list)
<*> generalize (handles _SourceGenerator last)
<*> generalize (handles _SourceIcon last)
<*> generalize (handles _SourceId $ lastDef "")
<*> generalize (handles _SourceLink list)
<*> generalize (handles _SourceLogo last)
<*> generalize (handles _SourceRights last)
<*> generalize (handles _SourceSubtitle last)
<*> generalize (handles _SourceTitle last)
<*> generalize (handles _SourceUpdated last)
where piece :: (MonadCatch m) => ConduitParser Event m SourcePiece
piece = choice [ SourceAuthor <$> atomPerson "author"
, SourceCategory <$> atomCategory
, SourceContributor <$> atomPerson "contributor"
, SourceGenerator <$> atomGenerator
, SourceIcon <$> atomIcon
, SourceId . toNullable <$> atomId
, SourceLink <$> atomLink
, SourceLogo <$> atomLogo
, SourceRights <$> atomText "rights"
, SourceSubtitle <$> atomText "subtitle"
, SourceTitle <$> atomText "title"
, SourceUpdated <$> tagDate "updated"
, SourceUnknown <$ unknownTag
]
data EntryPiece = EntryAuthor AtomPerson
| EntryCategory AtomCategory
| EntryContent AtomContent
| EntryContributor AtomPerson
| EntryId (NonNull Text)
| EntryLink AtomLink
| EntryPublished UTCTime
| EntryRights AtomText
| EntrySource AtomSource
| EntrySummary AtomText
| EntryTitle AtomText
| EntryUpdated UTCTime
| EntryUnknown
makeTraversals ''EntryPiece
atomEntry :: (MonadCatch m) => ConduitParser Event m AtomEntry
atomEntry = named "Atom <entry> element" $ tagIgnoreAttrs' "entry" $ do
p <- many piece
flip foldM p $ AtomEntry
<$> generalize (handles _EntryAuthor list)
<*> generalize (handles _EntryCategory list)
<*> generalize (handles _EntryContent last)
<*> generalize (handles _EntryContributor list)
<*> handlesM _EntryId (lastRequired "Missing or invalid <id> element.")
<*> generalize (handles _EntryLink list)
<*> generalize (handles _EntryPublished last)
<*> generalize (handles _EntryRights last)
<*> generalize (handles _EntrySource last)
<*> generalize (handles _EntrySummary last)
<*> handlesM _EntryTitle (lastRequired "Missing or invalid <title> element.")
<*> handlesM _EntryUpdated (lastRequired "Missing or invalid <updated> element.")
where piece :: (MonadCatch m) => ConduitParser Event m EntryPiece
piece = choice [ EntryAuthor <$> atomPerson "author"
, EntryCategory <$> atomCategory
, EntryContent <$> atomContent
, EntryContributor <$> atomPerson "contributor"
, EntryId <$> atomId
, EntryLink <$> atomLink
, EntryPublished <$> tagDate "published"
, EntryRights <$> atomText "rights"
, EntrySource <$> atomSource
, EntrySummary <$> atomText "summary"
, EntryTitle <$> atomText "title"
, EntryUpdated <$> tagDate "updated"
, EntryUnknown <$ unknownTag
]
data FeedPiece = FeedAuthor AtomPerson
| FeedCategory AtomCategory
| FeedContributor AtomPerson
| FeedEntry AtomEntry
| FeedGenerator AtomGenerator
| FeedIcon AtomURI
| FeedId (NonNull Text)
| FeedLink AtomLink
| FeedLogo AtomURI
| FeedRights AtomText
| FeedSubtitle AtomText
| FeedTitle AtomText
| FeedUpdated UTCTime
| FeedUnknown
makeTraversals ''FeedPiece
atomFeed :: (MonadCatch m) => ConduitParser Event m AtomFeed
atomFeed = named "Atom <feed> element" $ tagIgnoreAttrs' "feed" $ do
p <- many piece
flip foldM p $ AtomFeed
<$> generalize (handles _FeedAuthor list)
<*> generalize (handles _FeedCategory list)
<*> generalize (handles _FeedContributor list)
<*> generalize (handles _FeedEntry list)
<*> generalize (handles _FeedGenerator last)
<*> generalize (handles _FeedIcon last)
<*> handlesM _FeedId (lastRequired "Missing or empty <id> element.")
<*> generalize (handles _FeedLink list)
<*> generalize (handles _FeedLogo last)
<*> generalize (handles _FeedRights last)
<*> generalize (handles _FeedSubtitle last)
<*> handlesM _FeedTitle (lastRequired "Missing <title> element.")
<*> handlesM _FeedUpdated (lastRequired "Missing <updated> element.")
where piece :: MonadCatch m => ConduitParser Event m FeedPiece
piece = choice [ FeedAuthor <$> atomPerson "author"
, FeedCategory <$> atomCategory
, FeedContributor <$> atomPerson "contributor"
, FeedEntry <$> atomEntry
, FeedGenerator <$> atomGenerator
, FeedIcon <$> atomIcon
, FeedId <$> atomId
, FeedLink <$> atomLink
, FeedLogo <$> atomLogo
, FeedRights <$> atomText "rights"
, FeedSubtitle <$> atomText "subtitle"
, FeedTitle <$> atomText "title"
, FeedUpdated <$> tagDate "updated"
, FeedUnknown <$ unknownTag
]