{-# LANGUAGE DeriveGeneric #-} {-# LANGUAGE LambdaCase #-} {-# LANGUAGE OverloadedStrings #-} {-| The @Web.Sitemap.Gen@ module contains types & rendering functions to generate XML compliant with the sitemaps.org specification. For more information see https://www.sitemaps.org/protocol.html -} module Web.Sitemap.Gen ( -- * Sitemaps Sitemap(..) , renderSitemap , SitemapUrl(..) , renderSitemapUrl , ChangeFrequency(..) , renderChangeFrequency -- * Sitemap Indexes , SitemapIndex(..) , renderSitemapIndex , IndexEntry(..) , renderIndexEntry -- * Utilities , sitemapNamespace , formatSitemapTime , renderLastModified ) where import Data.Maybe ( catMaybes ) import Data.Time ( UTCTime , formatTime , defaultTimeLocale ) import GHC.Generics ( Generic ) import Text.XML.Generator ( Xml , Elem , XmlOutput , Namespace , xrender , doc , defaultDocInfo , xelemQ , namespace , xelems , xelemWithText , xtext , xelem ) import qualified Data.Text as T -- SITEMAPS -- | A 'Sitemap' contains multiple 'SitemapUrl' elements which describe -- crawlable locations for search engines. newtype Sitemap = Sitemap { sitemapUrls :: [SitemapUrl] } deriving (Show, Read, Eq, Generic) -- | Render a Sitemap into a output format supported by the @xmlgen@ package. -- -- In most cases you will want to generate a @ByteString@. renderSitemap :: XmlOutput x => Sitemap -> x renderSitemap sitemap = xrender $ doc defaultDocInfo $ xelemQ sitemapNamespace "urlset" $ xelems $ map renderSitemapUrl $ sitemapUrls sitemap -- | A 'SitemapUrl' describes a single URL in a 'Sitemap'. data SitemapUrl = SitemapUrl { sitemapLocation :: T.Text -- ^ The full URL of the page, including the protocol and -- domain name. , sitemapLastModified :: Maybe UTCTime -- ^ The time the page's content was last changed. , sitemapChangeFrequency :: Maybe ChangeFrequency -- ^ How often does the content at the URL change? , sitemapPriority :: Maybe Double -- ^ The relative priority of this URL compared to other URLs in -- the sitemap. } deriving (Show, Read, Eq, Generic) -- | Render a 'SitemapUrl' as a @url@ XML element. renderSitemapUrl :: SitemapUrl -> Xml Elem renderSitemapUrl url = xelem "url" $ xelems $ catMaybes [ Just $ xelemWithText "loc" $ sitemapLocation url , renderLastModified <$> sitemapLastModified url , xelem "changefreq" . renderChangeFrequency <$> sitemapChangeFrequency url , xelemWithText "priority" . T.pack . show <$> sitemapPriority url ] -- | Describes how often a SitemapUrl' is updated. This is considered -- a hint for crawlers and may or may not be respected. data ChangeFrequency = Always -- ^ The page changes every time it is visited. | Hourly | Daily | Weekly | Monthly | Yearly | Never -- ^ The page is archived and will never change from now on. deriving (Show, Read, Eq, Enum, Bounded, Generic) -- | Build the XML text content for a 'ChangeFrequency'. renderChangeFrequency :: ChangeFrequency -> Xml Elem renderChangeFrequency = xtext . \case Always -> "always" Hourly -> "hourly" Daily -> "daily" Weekly -> "weekly" Monthly -> "monthly" Yearly -> "yearly" Never -> "never" -- INDEXES -- | A 'SitemapIndex' allows informing crawlers of multiple sitemap files -- hosted on the same domain. -- -- See https://www.sitemaps.org/protocol.html#index newtype SitemapIndex = SitemapIndex { indexEntries :: [IndexEntry] } deriving (Show, Read, Eq, Generic) -- | Render a 'SitemapIndex' into an output format supported by the -- @xmlgen@ package. renderSitemapIndex :: XmlOutput x => SitemapIndex -> x renderSitemapIndex index = xrender $ doc defaultDocInfo $ xelemQ sitemapNamespace "sitemapindex" $ xelems $ map renderIndexEntry $ indexEntries index -- | A single sitemap entry for a sitemap index. data IndexEntry = IndexEntry { indexLocation :: T.Text -- ^ The Full URL of a Sitemap, including the protocol. -- -- E.g., @https://www.southernexposure.com/sitemap.xml@ , indexLastModified :: Maybe UTCTime -- ^ The time the sitemap was last changed. } deriving (Show, Read, Eq, Generic) -- | Render an 'IndexEntry' as a @sitemap@ element. renderIndexEntry :: IndexEntry -> Xml Elem renderIndexEntry entry = xelem "sitemap" $ xelems $ catMaybes [ Just $ xelemWithText "loc" $ indexLocation entry , renderLastModified <$> indexLastModified entry ] -- UTILS -- | An XML Namespace for the sitemaps.org @v0.9@ schema. sitemapNamespace :: Namespace sitemapNamespace = namespace "" "http://www.sitemaps.org/schemas/sitemap/0.9" -- | Render the 'UTCTime' in @YYYY-MM-DDTHH:MM:SS+00:00@ format. formatSitemapTime :: UTCTime -> T.Text formatSitemapTime = T.pack . formatTime defaultTimeLocale "%FT%T+00:00" -- | Render a 'UTCTime' in a @lastmod@ element. renderLastModified :: UTCTime -> Xml Elem renderLastModified = xelemWithText "lastmod" . formatSitemapTime