{-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE FlexibleContexts #-} {-# LANGUAGE FlexibleInstances #-} {-| Load, compose and render content. -} module Pencil.Content ( -- ** Page Page , load , load' , loadDir , loadDir' , loadAndRender , rename , to , move , useFilePath , escapeXml , getPageEnv, setPageEnv , filterByVar , sortByVar , groupByElements , insertPages -- ** Structure , Structure , struct , (<||) , (<|) , (<<|) , coll -- ** Resource , Resource , passthrough , loadResource , loadResources -- ** Render , Render(..) -- ** File paths and types , listDir , toExpected , toHtml , toCss , toDir , FileType , fileType , toExtension , HasFilePath(..) ) where import Pencil.App.Internal import Pencil.Config import Pencil.Content.Internal import Pencil.Content.Nodes import Pencil.Env import Pencil.Env.Internal import Pencil.Parser import Control.Monad (forM_, foldM, filterM) import Control.Monad.Except import Control.Monad.Reader import Data.List.NonEmpty (NonEmpty(..)) -- Import the NonEmpty data constructor, (:|) import qualified Data.HashMap.Strict as H import qualified Data.List as L import qualified Data.List.NonEmpty as NE import qualified Data.Maybe as M import qualified Data.Text as T import qualified Data.Text.IO as TIO import qualified System.Directory as D import qualified System.FilePath as FP import qualified Text.Pandoc.XML as XML -- | Lists files in given directory. The file paths returned is prefixed with the -- given directory. listDir :: Bool -- ^ Recursive if @True@. -> FilePath -> PencilApp [FilePath] listDir recursive dir = do let dir' = if null dir then dir else FP.addTrailingPathSeparator dir fps <- listDir_ recursive dir' return $ map (dir' ++) fps -- | 'listDir' helper. listDir_ :: Bool -> FilePath -> PencilApp [FilePath] listDir_ recursive dir = do sitePrefix <- asks getSourceDir -- List files (just the filename, without the fp directory prefix) listing <- liftIO $ D.listDirectory (sitePrefix ++ dir) -- Filter only for files (we have to add the right directory prefixes to the -- file check) files <- liftIO $ filterM (\f -> D.doesFileExist (sitePrefix ++ dir ++ f)) listing dirs <- liftIO $ filterM (\f -> D.doesDirectoryExist (sitePrefix ++ dir ++ f)) listing innerFiles <- if recursive then mapM (\d -> do ff <- listDir_ recursive (dir ++ d ++ "/") -- Add the inner directory as a prefix return (map (\f -> d ++ "/" ++ f) ff)) dirs else return [] return $ files ++ concat innerFiles ---------------------------------------------------------------------- -- Page ---------------------------------------------------------------------- -- | Applies the environment variables on the given pages. -- -- The 'Structure' is expected to be ordered by inner-most content first (such -- that the final, HTML structure layout is last in the list). -- -- The returned Page contains the fully-applied environment and the nodes of the -- fully rendered page in this.nodes. The `this.url` variable is set to the Page -- in the Structure that specified 'useFilePath'; if no Page specified this, -- then it defaults to the URL of the first (inner-most) Page. -- -- Variable application. The outer environment's variables are applied down into -- the inner environments. Once it hits the lowest environment, that page is -- rendered (and has access to all variables defined in the parent). The page's -- rendered content is now set as the @${body}@ variable in the environment. -- The parent page now gets rendered with this new environment, and so on. -- -- As an example, there is the common scenario where we have a default layout -- (e.g. @default.html@), with the full HTML structure, but no body. It has only -- a @${body}@ template variable inside. This is the parent layout. There is a -- child layout, the partial called "blog-post.html", which has HTML for -- rendering a blog post, like usage of ${postTitle} and ${postDate}. Inside -- this, there is another child layout, the blog post content itself, which -- defines the variables @postTitle@ and @postDate@, and may renderer parent -- variables such as @websiteTitle@. -- -- > +--------------+ -- > | | <--- default.html -- > | | Defines websiteTitle -- > | +---------+ | -- > | | |<+----- blog-post.html -- > | | +-----+ | | Renders ${postTitle}, ${postDate} -- > | | | | | | -- > | | | | | | -- > | | | |<+-+----- blog-article-content.markdown -- > | | | | | | Renders ${websiteTitle} -- > | | +-----+ | | Defines postTitle, postDate -- > | +---------+ | -- > +--------------+ -- -- In this case, we want to accumulate the environment variables, starting from -- default.html, to blog-post.html, and blog-article-content.markdown variables. -- Combine all of that, then render the blog post content. This content is then -- injected into the parent's environment as a @${body}@ variable, for use in -- blog-post.html. Now /that/ content is injected into the parent environment's -- @${body}@ variable, which is then used to render the full-blown HTML page. -- apply :: Structure -> PencilApp Page apply structure = do let reversed = NE.reverse (structureNodes structure) -- Collections cannot be the first element in the structure. It would be -- useless there, since nothing can reference it. let h = NE.head reversed when (isColl h) $ throwError (CollectionFirstInStructure (nodeName h)) setFilePath (getFilePath structure) <$> apply_ reversed -- | Apply list of @Page@s and convert to @Page@. -- -- It's simpler to implement if NonEmpty is ordered outer-structure first (e.g. -- HTML layout). -- apply_ :: NE.NonEmpty Node -> PencilApp Page apply_ (Node _ page :| []) = do env <- asks getEnv -- Evaluate this page's nodes using the combined env applyPage env page apply_ (Nodes name pages :| rest) = do -- Collection nodes must be the last element in the Structure. Before, you -- could attach more Pages after a collection (such that each page in the -- collection received the rest of the structure's env.) -- -- But we now disallow this for a couple of reasons: -- -- - Most use-cases of collections involve it being the last item anyways -- (e.g. list of blog posts) -- - Simplifes logic of choosing with Page becomes the default file path to be -- used in the Structure render. The rule is now: the last non-collection -- Page becomes the default file name. It was complicated with collections, -- since a collection had N pages to choose from. And most use-cases (e.g. -- list of blog posts) would then require you to call `useFilePath` for the -- last Page to specify. when (not (null rest)) $ throwError (CollectionNotLastInStructure name) env <- asks getEnv -- Apply the inner pages against the rest of the Structure. pages' <- mapM (\p -> apply_ (Node "body" p :| rest)) pages return $ Page ((H.insert name (VEnvList (map getPageEnv pages')) env)) "UNSPECIFIED_FILE_PATH" False False apply_ (Node name page :| (h : rest)) = do -- Apply the inner Pages to accumulate the inner environments and pages. Do it -- in a local env where this Page's env is merged with the current env. pageInner <- local (\c -> setEnv (merge (getPageEnv page) (getEnv c)) c) (apply_ (h :| rest)) -- At this point, pageInner's env is the accumulate of this page's env and all -- the inner pages' envs. -- -- Get the inner page's rendered content, and insert into a new env using the -- specified `name`. This assumes that the inner Page's content was rendered, -- above. Some pages don't have this.content (e.g. 'Nodes' pages). let env' = maybe (getPageEnv pageInner) (\content -> insertText name content (getPageEnv pageInner)) (getContent (getPageEnv pageInner)) -- Apply this current Page's nodes with the accumulated environment of all the -- inner Pages. applyPage env' page -- | Applies the Page by merging the given env with the Page's env, evaluating -- the nodes with the combined env, and generating a new env for the Page, -- containing @this.url@, @this.nodes@ and @this.content@ in the env. applyPage :: Env -> Page -> PencilApp Page applyPage env page = do let env' = merge (getPageEnv page) env -- Evaluate the Page's nodes with the specified environment. nodes <- evalNodes env' (getNodes (getPageEnv page)) -- Generate this Page's final file path. -- Insert the nodes and rendered content into the env. let env'' = (insertText "this.url" (T.pack ("/" ++ pageFilePath page)) . insert "this.nodes" (VNodes nodes) . insertText "this.content" (nodesToText (pageEscapeXml page) nodes)) env' -- Use inner-most Page's file path, as this will be the destination of the -- accumluated, final, rendered page. return $ page { pageEnv = env'' } -- | Render nodes. XML/HTML tags are escaped if @escpXml@ is True. nodesToText :: Bool -- ^ XML/HTML tags escaped if True. -> [PNode] -> T.Text nodesToText escXml nodes = (if escXml then escapeForXml else id) (renderNodes nodes) -- | Escape XML tags in the given Text. escapeForXml :: T.Text -> T.Text escapeForXml text = T.pack (XML.escapeStringForXML (T.unpack text)) -- | Sorts pages by an ordering function. sortByVar :: T.Text -- ^ Environment variable name. -> (Value -> Value -> Ordering) -- ^ Ordering function to compare Value against. If the variable is -- not in the Env, the Page will be placed at the bottom of the order. -> [Page] -> [Page] sortByVar var ordering = L.sortBy (\a b -> maybeOrdering ordering (H.lookup var (getPageEnv a)) (H.lookup var (getPageEnv b))) -- | Filters pages by a variable's value in the environment. filterByVar :: Bool -- ^ If true, include pages without the specified variable. -> T.Text -- ^ Environment variable name. -> (Value -> Bool) -> [Page] -> [Page] filterByVar includeMissing var f = L.filter (\p -> M.fromMaybe includeMissing (H.lookup var (getPageEnv p) >>= (Just . f))) -- | Given a variable (whose value is assumed to be an array of VText) and list -- of pages, groups the pages by the VText found in the variable. -- -- For example, say each Page has a variable "tags" that is a list of tags. The -- first Page has a "tags" variable that is an @VArray [VText "a"]@, and the -- second Page has a "tags" variable that is an @VArray [VText "a", VText "b"]@. -- The final output would be a map @fromList [("a", [page1, page2]), ("b", -- [page2])]@. groupByElements :: T.Text -- ^ Environment variable name. -> [Page] -> H.HashMap T.Text [Page] groupByElements var pages = -- This outer fold takes the list of pages, and accumulates the giant HashMap. L.foldl' (\acc page -> let x = H.lookup var (getPageEnv page) in case x of Just (VArray values) -> -- This fold takes each of the found values (each is a key in the -- hash map), and adds the current page (from the outer fold) into -- each of the key. L.foldl' (\hashmap envData -> case envData of -- Only insert Pages into the map if the variable is an VArray of -- VText. Alter the map to either (1) insert this current -- page into the existing list, or (2) create a new list (the -- key has never been seen) with just this page. VText val -> H.alter (\mv -> Just (page : M.fromMaybe [] mv)) val hashmap _ -> hashmap) acc values _ -> acc ) H.empty -- Reverse to keep ordering consistent inside hash map, since the fold -- prepends into accumulated list. (reverse pages) -- | Copy file from source to output dir. If both the input and output file -- paths are directories, recursively copy the contents from one to the other. copyFile :: FilePath -> FilePath -> PencilApp () copyFile fpIn fpOut = do sitePrefix <- asks getSourceDir outPrefix <- asks getOutputDir liftIO $ D.createDirectoryIfMissing True (FP.takeDirectory (outPrefix ++ fpOut)) if isDir fpIn && isDir fpOut -- Copying directories then do fps <- listDir True fpIn forM_ fps (\fp -> copyFile fp (fpOut ++ (FP.takeFileName fp))) else liftIO $ D.copyFile (sitePrefix ++ fpIn) (outPrefix ++ fpOut) -- | Loads a file as a 'Resource'. Use this for binary files (e.g. images) and -- for files without template directives that may still need conversion (e.g. -- Markdown to HTML, SASS to CSS). -- -- Generally, you can just use `loadAndRender` instead of this method. -- -- Loads and renders the image as-is. Underneath the hood this is just a file -- copy: -- -- > loadResource "images/profile.jpg" >>= render -- -- Loads and renders to @about.html@: -- -- > loadResource "about.markdown" >>= render -- loadResource :: FilePath -> PencilApp Resource loadResource fp = -- If we can load the Page as text file, convert to a Single. Otherwise if it -- wasn't a text file, then return a Passthroguh resource. This is where we -- finally handle the "checked" exception; that is, converting the Left error -- case (NotTextFile) into a Right case (Passthrough). fmap Single (load fp) `catchError` handle -- 'handle' requires FlexibleContexts where handle e = case e of NotTextFile _ -> return (Passthrough fp fp) _ -> throwError e -- | Loads file in given directory as 'Resource's. -- -- Generally, you can just use `loadAndRender` instead of this method. -- -- Load everything inside the @assets/@ folder, renaming converted files as -- expected (e.g. SCSS to CSS): -- -- > loadResources True True "assets/" -- loadResources :: Bool -- ^ Recursive if @True@. -> Bool -- ^ Handle as pass-throughs (file copy) if @True@. -> FilePath -> PencilApp [Resource] loadResources recursive pass dir = do fps <- listDir recursive dir if pass then return $ map (\fp -> Passthrough fp fp) fps else mapM loadResource fps -- | Loads file as a pass-through. There is no content conversion, and template -- directives are ignored. In essence this is a file copy. -- -- @ -- passthrough "robots.txt" >>= render -- -- render (move "images\/profile.jpg" \<$\> passthrough "images\/myProfile.jpg") -- @ -- passthrough :: FilePath -> PencilApp Resource passthrough fp = return $ Passthrough fp fp -- | Loads a file as a page, rendering the file (as determined by the file -- extension) into the proper output format (e.g. Markdown rendered to -- HTML, SCSS to CSS). Parses the template directives and preamble variables -- into its environment. -- -- This loads @index.markdown@ with the destination file path set to @index.html@: -- -- > load "index.markdown" -- -- Because this is already an HTML file, the file path is kept as @about.html@: -- -- > load "about.html" -- -- Using 'rename' and 'toDir', the destination file path becomes @pages\/about\/index.html@: -- -- @ -- 'render' $ 'rename' 'toDir' \<$\> load "pages/about.markdown" -- @ -- load :: FilePath -> PencilApp Page load fp = rename toExpected <$> load' fp -- | Like 'load', loads a file as a page. Unlike 'load', the source file path -- is used as the destination file path (i.e. the extension name is not changed). -- load' :: FilePath -> PencilApp Page load' fp = do (_, nodes) <- parseAndConvertTextFiles fp -- Filter out preamble nodes, since we've already injected preamble into the -- env. let env' = H.insert "this.nodes" (VNodes (filter (not . isPreamble) nodes)) (findEnv nodes) return $ Page env' fp False False -- | A version of 'load' for directories. -- -- @ -- layout <- load "layout.html" -- tutorials <- loadDir False "tutorials/" -- render $ fmap ((layout '<||') . 'rename' 'toDir') tutorials -- @ loadDir :: Bool -- ^ If True, recursively load files in the directory -> FilePath -> PencilApp [Page] loadDir = loadDirWith load -- | A version of load' for directories. Loads the files in the specified -- directory as pages. Keeps the original file path. -- -- @ -- tutorials <- loadDir' False "tutorials/" -- render $ fmap ((layout <||) . rename toDir) pages -- @ loadDir' :: Bool -- ^ Recursive if true -> FilePath -> PencilApp [Page] loadDir' = loadDirWith load' -- | Internal implemenation for loading directories to pgaes. loadDirWith :: (FilePath -> PencilApp Page) -> Bool -- ^ Recursive if true -> FilePath -> PencilApp [Page] loadDirWith loadF recur fp = do fps <- listDir recur fp foldM (\acc fpath -> do mp <- (loadF fpath >>= return . Just) `catchError` handle return (maybe acc (\p -> p : acc) mp)) [] (reverse fps) where handle e = case e of NotTextFile _ -> return Nothing _ -> throwError e -- | Loads and renders file, converting content if it's convertible (e.g. -- Markdown to HTML). The final file path is the "default conversion", if Pencil -- knows how to convert the file (e.g. .markdown to .html). Otherwise, the same -- file name is kept (e.g. .txt). -- -- Load @style.sass@, convert to CSS, and render as @style.css@: -- -- > loadAndRender "style.sass" -- -- Load, convert and render everything in the @assets/@ folder. Binary files are -- copied as-is without any further processing: -- -- > loadAndRender "assets/" -- loadAndRender :: FilePath -> PencilApp () loadAndRender fp = if isDir fp then loadResources True False fp >>= render else loadResource fp >>= render -- | Returns True if the given Node is a collection node. isColl :: Node -> Bool isColl (Nodes _ _) = True isColl _ = False -- | Get the node's name. nodeName :: Node -> T.Text nodeName (Node n _) = n nodeName (Nodes n _) = n -- | Creates a new structure from two pages. Pronounced "smash". -- -- @ -- layout <- load "layout.html" -- index <- load "index.markdown" -- render (layout <|| index) -- @ (<||) :: Page -> Page -> Structure (<||) x y = Structure { structureNodes = Node "body" y :| [Node "body" x] , structureFilePath = getFilePath y , structureFilePathFrozen = False } -- | Pushes @Page@ into @Structure@. Pronounced "push". -- -- @ -- layout <- load "layout.html" -- blogLayout <- load "blog-layout.html" -- blogPost <- load "myblogpost.markdown" -- render (layout <|| blogLayout <| blogPost) -- @ (<|) :: Structure -> Page -> Structure (<|) s p = s { structureNodes = NE.cons (Node "body" p) (structureNodes s) , structureFilePath = if structureFilePathFrozen s then structureFilePath s else getFilePath p , structureFilePathFrozen = structureFilePathFrozen s || pageUseFilePath p } -- | Pushes @Node@ into the @Structure@. Usually used in conjunction with 'coll'. -- -- @ -- blogLayout <- load "blog-layout.html" -- blogPosts <- 'Pencil.Blog.loadPosts' "posts/" -- render (struct blogLayout <<| coll "posts" blogPosts) -- @ (<<|) :: Structure -> Node -> Structure (<<|) s node = let (fp, frozen) = if structureFilePathFrozen s then (structureFilePath s, True) else case node of -- If collection, use the existing file path (ignore file paths in -- collection). This keeps with the rule that the default file path -- is the last non-collection page. Nodes _ _ -> (structureFilePath s, False) Node _ p -> (getFilePath p, pageUseFilePath p) in s { structureNodes = NE.cons node (structureNodes s) , structureFilePath = fp , structureFilePathFrozen = frozen } -- | Creates a collection 'Node'. Usually used in conjunction with '<<|'. coll :: T.Text -> [Page] -> Node coll = Nodes -- | Converts a @Page@ into a @Structure@. This is a "singleton" structure. struct :: Page -> Structure struct p = Structure { structureNodes = Node "body" p :| [] , structureFilePath = getFilePath p , structureFilePathFrozen = pageUseFilePath p } ---------------------------------------------------------------------- -- Render class ---------------------------------------------------------------------- -- | To render something is to create the output web pages, evaluating template -- directives into their final form using the current environment. class Render a where -- | Renders @a@ as web page(s). render :: a -> PencilApp () instance Render Resource where render (Single page) = render page render (Passthrough fpIn fpOut) = copyFile fpIn fpOut -- This requires FlexibleInstances. instance Render Structure where render s = apply s >>= render instance Render Page where render page = do let fpOut = pageFilePath page outPrefix <- asks getOutputDir let fpOut' = outPrefix ++ if isDir fpOut then fpOut ++ "index.html" else fpOut liftIO $ D.createDirectoryIfMissing True (FP.takeDirectory fpOut') liftIO $ TIO.writeFile fpOut' (renderNodes (getNodes (getPageEnv page))) -- This requires FlexibleInstances. instance Render r => Render [r] where render rs = forM_ rs render ---------------------------------------------------------------------- -- Environment modifications ---------------------------------------------------------------------- -- | Inserts pages into the environment. The pages are evaluated and applied before insertion. -- -- @ -- posts <- 'Pencil.Blog.loadPosts' "blog/" -- env <- asks 'getEnv' -- env' <- insertPages "posts" posts env -- @ insertPages :: T.Text -- ^ Environment variable name. -> [Page] -- ^ @Page@s to insert. -> Env -- ^ Environment to modify. -> PencilApp Env insertPages var pages env = do envs <- mapM (\p -> do p' <- apply (struct p) let text = renderNodes (getNodes (getPageEnv p')) let penv = insertText "this.content" text (getPageEnv p') return penv) pages return $ H.insert var (VEnvList envs) env