-------------------------------------------------------------------------------- {-# LANGUAGE CPP #-} {-# LANGUAGE OverloadedStrings #-} module Hakyll.Check ( Check (..) , check ) where -------------------------------------------------------------------------------- import Control.Concurrent.MVar (MVar, newEmptyMVar, putMVar, readMVar) import Control.Exception (SomeAsyncException (..), SomeException (..), throw, try) import Control.Monad (foldM, forM_) import Control.Monad.Reader (ReaderT, ask, runReaderT) import Control.Monad.State (StateT, get, modify, runStateT) import Control.Monad.Trans (liftIO) import Control.Monad.Trans.Resource (runResourceT) import Data.List (isPrefixOf) import qualified Data.Map.Lazy as Map #if MIN_VERSION_base(4,9,0) import Data.Semigroup (Semigroup (..)) #endif import Network.URI (unEscapeString) import System.Directory (doesDirectoryExist, doesFileExist) import System.Exit (ExitCode (..)) import System.FilePath (takeDirectory, takeExtension, ()) import qualified Text.HTML.TagSoup as TS -------------------------------------------------------------------------------- #ifdef CHECK_EXTERNAL import Data.List (intercalate) import Data.Typeable (cast) import Data.Version (versionBranch) import GHC.Exts (fromString) import qualified Network.HTTP.Conduit as Http import qualified Network.HTTP.Types as Http import qualified Paths_hakyll as Paths_hakyll #endif -------------------------------------------------------------------------------- import Hakyll.Core.Configuration import Hakyll.Core.Logger (Logger) import qualified Hakyll.Core.Logger as Logger import Hakyll.Core.Util.File import Hakyll.Web.Html -------------------------------------------------------------------------------- data Check = All | InternalLinks deriving (Eq, Ord, Show) -------------------------------------------------------------------------------- check :: Configuration -> Logger -> Check -> IO ExitCode check config logger check' = do ((), state) <- runChecker checkDestination config logger check' failed <- countFailedLinks state return $ if failed > 0 then ExitFailure 1 else ExitSuccess -------------------------------------------------------------------------------- countFailedLinks :: CheckerState -> IO Int countFailedLinks state = foldM addIfFailure 0 (Map.elems state) where addIfFailure failures mvar = do checkerWrite <- readMVar mvar return $ failures + checkerFaulty checkerWrite -------------------------------------------------------------------------------- data CheckerRead = CheckerRead { checkerConfig :: Configuration , checkerLogger :: Logger , checkerCheck :: Check } -------------------------------------------------------------------------------- data CheckerWrite = CheckerWrite { checkerFaulty :: Int , checkerOk :: Int } deriving (Show) -------------------------------------------------------------------------------- #if MIN_VERSION_base(4,9,0) instance Semigroup CheckerWrite where (<>) (CheckerWrite f1 o1) (CheckerWrite f2 o2) = CheckerWrite (f1 + f2) (o1 + o2) instance Monoid CheckerWrite where mempty = CheckerWrite 0 0 mappend = (<>) #else instance Monoid CheckerWrite where mempty = CheckerWrite 0 0 mappend (CheckerWrite f1 o1) (CheckerWrite f2 o2) = CheckerWrite (f1 + f2) (o1 + o2) #endif -------------------------------------------------------------------------------- type CheckerState = Map.Map URL (MVar CheckerWrite) -------------------------------------------------------------------------------- type Checker a = ReaderT CheckerRead (StateT CheckerState IO) a -------------------------------------------------------------------------------- type URL = String -------------------------------------------------------------------------------- runChecker :: Checker a -> Configuration -> Logger -> Check -> IO (a, CheckerState) runChecker checker config logger check' = do let read' = CheckerRead { checkerConfig = config , checkerLogger = logger , checkerCheck = check' } Logger.flush logger runStateT (runReaderT checker read') Map.empty -------------------------------------------------------------------------------- checkDestination :: Checker () checkDestination = do config <- checkerConfig <$> ask files <- liftIO $ getRecursiveContents (const $ return False) (destinationDirectory config) let htmls = [ destinationDirectory config file | file <- files , takeExtension file == ".html" ] forM_ htmls checkFile -------------------------------------------------------------------------------- checkFile :: FilePath -> Checker () checkFile filePath = do logger <- checkerLogger <$> ask contents <- liftIO $ readFile filePath Logger.header logger $ "Checking file " ++ filePath let urls = getUrls $ TS.parseTags contents forM_ urls $ \url -> do Logger.debug logger $ "Checking link " ++ url m <- liftIO newEmptyMVar checkUrlIfNeeded filePath (canonicalizeUrl url) m where -- Check scheme-relative links canonicalizeUrl url = if schemeRelative url then "http:" ++ url else url schemeRelative = isPrefixOf "//" -------------------------------------------------------------------------------- checkUrlIfNeeded :: FilePath -> URL -> MVar CheckerWrite -> Checker () checkUrlIfNeeded filepath url m = do logger <- checkerLogger <$> ask needsCheck <- (== All) . checkerCheck <$> ask checked <- (url `Map.member`) <$> get if not needsCheck || checked then Logger.debug logger "Already checked, skipping" else do modify $ Map.insert url m checkUrl filepath url -------------------------------------------------------------------------------- checkUrl :: FilePath -> URL -> Checker () checkUrl filePath url | isExternal url = checkExternalUrl url | hasProtocol url = skip url $ Just "Unknown protocol, skipping" | otherwise = checkInternalUrl filePath url where validProtoChars = ['A'..'Z'] ++ ['a'..'z'] ++ ['0'..'9'] ++ "+-." hasProtocol str = case break (== ':') str of (proto, ':' : _) -> all (`elem` validProtoChars) proto _ -> False -------------------------------------------------------------------------------- ok :: URL -> Checker () ok url = putCheckResult url mempty {checkerOk = 1} -------------------------------------------------------------------------------- skip :: URL -> Maybe String -> Checker () skip url maybeReason = do logger <- checkerLogger <$> ask case maybeReason of Nothing -> return () Just reason -> Logger.debug logger reason putCheckResult url mempty {checkerOk = 1} -------------------------------------------------------------------------------- faulty :: URL -> Maybe String -> Checker () faulty url reason = do logger <- checkerLogger <$> ask Logger.error logger $ "Broken link to " ++ show url ++ explanation putCheckResult url mempty {checkerFaulty = 1} where formatExplanation = (" (" ++) . (++ ")") explanation = maybe "" formatExplanation reason -------------------------------------------------------------------------------- putCheckResult :: URL -> CheckerWrite -> Checker () putCheckResult url result = do state <- get let maybeMVar = Map.lookup url state case maybeMVar of Just m -> liftIO $ putMVar m result Nothing -> do logger <- checkerLogger <$> ask Logger.debug logger "Failed to find existing entry for checked URL" -------------------------------------------------------------------------------- checkInternalUrl :: FilePath -> URL -> Checker () checkInternalUrl base url = case url' of "" -> ok url _ -> do config <- checkerConfig <$> ask let dest = destinationDirectory config dir = takeDirectory base filePath | "/" `isPrefixOf` url' = dest ++ url' | otherwise = dir url' exists <- checkFileExists filePath if exists then ok url else faulty url Nothing where url' = stripFragments $ unEscapeString url -------------------------------------------------------------------------------- checkExternalUrl :: URL -> Checker () #ifdef CHECK_EXTERNAL checkExternalUrl url = do result <- requestExternalUrl url case result of Left (SomeException e) -> case (cast e :: Maybe SomeAsyncException) of Just ae -> throw ae _ -> faulty url (Just $ showException e) Right _ -> ok url where -- Convert exception to a concise form showException e = case cast e of Just (Http.HttpExceptionRequest _ e') -> show e' _ -> head $ words $ show e requestExternalUrl :: URL -> Checker (Either SomeException Bool) requestExternalUrl url = liftIO $ try $ do mgr <- Http.newManager Http.tlsManagerSettings runResourceT $ do request <- Http.parseRequest url response <- Http.http (settings request) mgr let code = Http.statusCode (Http.responseStatus response) return $ code >= 200 && code < 300 where -- Add additional request info settings r = r { Http.method = "HEAD" , Http.redirectCount = 10 , Http.requestHeaders = ("User-Agent", ua) : Http.requestHeaders r } -- Nice user agent info ua = fromString $ "hakyll-check/" ++ (intercalate "." $ map show $ versionBranch Paths_hakyll.version) #else checkExternalUrl url = skip url Nothing #endif -------------------------------------------------------------------------------- -- | Wraps doesFileExist, also checks for index.html checkFileExists :: FilePath -> Checker Bool checkFileExists filePath = liftIO $ do file <- doesFileExist filePath dir <- doesDirectoryExist filePath case (file, dir) of (True, _) -> return True (_, True) -> doesFileExist $ filePath "index.html" _ -> return False -------------------------------------------------------------------------------- stripFragments :: String -> String stripFragments = takeWhile (not . flip elem ['?', '#'])