{-# LANGUAGE DeriveFunctor #-}
{-# LANGUAGE PartialTypeSignatures #-}
{-# OPTIONS_GHC -Wno-partial-type-signatures #-}
module Xrefcheck.Verify
(
VerifyResult (..)
, verifyOk
, verifyErrors
, verifying
, WithReferenceLoc (..)
, VerifyError (..)
, verifyRepo
, checkExternalResource
) where
import Control.Concurrent.Async (forConcurrently, withAsync)
import Control.Monad.Except (MonadError (..))
import qualified Data.Map as M
import qualified Data.Text as T
import Data.Text.Metrics (damerauLevenshteinNorm)
import Fmt (Buildable (..), blockListF', listF, (+|), (|+))
import qualified GHC.Exts as Exts
import Network.HTTP.Client (HttpException (..), HttpExceptionContent (..), responseStatus)
import Network.HTTP.Req (GET (..), HEAD (..), HttpException (..), NoReqBody (..), defaultHttpConfig,
ignoreResponse, req, runReq, useURI)
import Network.HTTP.Types.Status (Status, statusCode, statusMessage)
import System.Console.Pretty (Style (..), style)
import System.Directory (canonicalizePath, doesDirectoryExist, doesFileExist)
import System.FilePath (takeDirectory, (</>))
import qualified System.FilePath.Glob as Glob
import Text.URI (mkURI)
import Time (RatioNat, Second, Time (..), ms, threadDelay, timeout)
import Xrefcheck.Config
import Xrefcheck.Core
import Xrefcheck.Progress
import Xrefcheck.System
{-# ANN module ("HLint: ignore Use uncurry" :: Text) #-}
{-# ANN module ("HLint: ignore Use 'runExceptT' from Universum" :: Text) #-}
newtype VerifyResult e = VerifyResult [e]
deriving (Show, Functor)
deriving instance Semigroup (VerifyResult e)
deriving instance Monoid (VerifyResult e)
instance Buildable e => Buildable (VerifyResult e) where
build vr = case verifyErrors vr of
Nothing -> "ok"
Just errs -> listF errs
verifyOk :: VerifyResult e -> Bool
verifyOk (VerifyResult errors) = null errors
verifyErrors :: VerifyResult e -> Maybe (NonEmpty e)
verifyErrors (VerifyResult errors) = nonEmpty errors
verifying :: Monad m => ExceptT e m () -> m (VerifyResult e)
verifying (ExceptT action) = fmap toVerifyRes action
toVerifyRes :: Either e () -> VerifyResult e
toVerifyRes = VerifyResult . either one (\() -> [])
data WithReferenceLoc a = WithReferenceLoc
{ wrlFile :: FilePath
, wrlReference :: Reference
, wrlItem :: a
}
instance Buildable a => Buildable (WithReferenceLoc a) where
build WithReferenceLoc{..} =
"In file " +| style Faint (style Bold wrlFile) |+ "\nbad " +| wrlReference |+ "\n"
+| wrlItem |+ "\n\n"
data VerifyError
= FileDoesNotExist FilePath
| AnchorDoesNotExist Text [Anchor]
| AmbiguousAnchorRef FilePath Text (NonEmpty Anchor)
| ExternalResourceInvalidUri
| ExternalResourceUnknownProtocol
| ExternalResourceUnavailable Status
| ExternalResourceSomeError Text
deriving (Show)
instance Buildable VerifyError where
build = \case
FileDoesNotExist file ->
"⛀ File does not exist:\n " +| file |+ "\n"
AnchorDoesNotExist anchor similar ->
"⛀ Anchor '" +| anchor |+ "' is not present" +|
anchorHints similar
AmbiguousAnchorRef file anchor fileAnchors ->
"⛀ Ambiguous reference to anchor '" +| anchor |+ "'\n " +|
"In file " +| file |+ "\n " +|
"Similar anchors are:\n" +|
blockListF' " -" build fileAnchors |+ "" +|
" Use of such anchors is discouraged because referenced object\n\
\ can change silently whereas the document containing it evolves.\n"
ExternalResourceInvalidUri ->
"⛂ Invalid url\n"
ExternalResourceUnknownProtocol ->
"⛂ Bad url (expected 'http' or 'https')\n"
ExternalResourceUnavailable status ->
"⛂ Resource unavailable (" +| statusCode status |+ " " +|
decodeUtf8 @Text (statusMessage status) |+ ")\n"
ExternalResourceSomeError err ->
"⛂ " +| build err |+ "\n\n"
where
anchorHints = \case
[] -> "\n"
[h] -> ",\n did you mean " +| h |+ "?\n"
hs -> ", did you mean:\n" +| blockListF' " -" build hs
verifyRepo
:: Rewrite
-> VerifyConfig
-> VerifyMode
-> FilePath
-> RepoInfo
-> IO (VerifyResult $ WithReferenceLoc VerifyError)
verifyRepo rw config@VerifyConfig{..} mode root repoInfo'@(RepoInfo repoInfo) = do
let toScan = do
(file, fileInfo) <- M.toList repoInfo
guard . not $ any ((`isPrefixOf` file) . (root </>)) vcNotScanned
ref <- _fiReferences fileInfo
return (file, ref)
progressRef <- newIORef $ initVerifyProgress (map snd toScan)
withAsync (printer progressRef) $ \_ ->
fmap fold . forConcurrently toScan $ \(file, ref) ->
verifyReference config mode progressRef repoInfo' root file ref
where
printer progressRef = forever $ do
readIORef progressRef >>= reprintAnalyseProgress rw mode
threadDelay (ms 100)
shouldCheckLocType :: VerifyMode -> LocationType -> Bool
shouldCheckLocType mode locType
| isExternal locType = shouldCheckExternal mode
| isLocal locType = shouldCheckLocal mode
| otherwise = False
verifyReference
:: VerifyConfig
-> VerifyMode
-> IORef VerifyProgress
-> RepoInfo
-> FilePath
-> FilePath
-> Reference
-> IO (VerifyResult $ WithReferenceLoc VerifyError)
verifyReference config@VerifyConfig{..} mode progressRef (RepoInfo repoInfo)
root fileWithReference ref@Reference{..} = do
let locType = locationType rLink
if shouldCheckLocType mode locType
then do
res <- case locType of
LocalLoc -> checkRef rAnchor fileWithReference
RelativeLoc -> checkRef rAnchor
(takeDirectory fileWithReference
</> toString (canonizeLocalRef rLink))
AbsoluteLoc -> checkRef rAnchor (root <> toString rLink)
ExternalLoc -> checkExternalResource config rLink
OtherLoc -> verifying pass
let moveProgress =
incProgress .
(if verifyOk res then id else incProgressErrors)
atomicModifyIORef' progressRef $ \VerifyProgress{..} ->
( if isExternal locType
then VerifyProgress{ vrExternal = moveProgress vrExternal, .. }
else VerifyProgress{ vrLocal = moveProgress vrLocal, .. }
, ()
)
return $ fmap (WithReferenceLoc fileWithReference ref) res
else return mempty
where
checkRef mAnchor referredFile = verifying $ do
checkReferredFileExists referredFile
case M.lookup referredFile repoInfo of
Nothing -> pass
Just referredFileInfo ->
whenJust mAnchor $ checkAnchor referredFile (_fiAnchors referredFileInfo)
checkReferredFileExists file = do
let fileExists = readingSystem $ doesFileExist file
let dirExists = readingSystem $ doesDirectoryExist file
let cfile = readingSystem $ canonicalizePath file
let isVirtual = or
[ Glob.match pat cfile
| virtualFile <- vcVirtualFiles
, let pat = bindGlobPattern root virtualFile ]
unless (fileExists || dirExists || isVirtual) $
throwError (FileDoesNotExist file)
checkAnchor file fileAnchors anchor = do
checkAnchorReferenceAmbiguity file fileAnchors anchor
checkDeduplicatedAnchorReference file fileAnchors anchor
checkAnchorExists fileAnchors anchor
checkAnchorReferenceAmbiguity file fileAnchors anchor = do
let similarAnchors = filter ((== anchor) . aName) fileAnchors
when (length similarAnchors > 1) $
throwError $ AmbiguousAnchorRef file anchor (Exts.fromList similarAnchors)
checkDeduplicatedAnchorReference file fileAnchors anchor =
whenJust (stripAnchorDupNo anchor) $ \origAnchor ->
checkAnchorReferenceAmbiguity file fileAnchors origAnchor
checkAnchorExists givenAnchors anchor =
case find ((== anchor) . aName) givenAnchors of
Just _ -> pass
Nothing ->
let isSimilar = (>= vcAnchorSimilarityThreshold)
similarAnchors =
filter (isSimilar . realToFrac . damerauLevenshteinNorm anchor . aName)
givenAnchors
in throwError $ AnchorDoesNotExist anchor similarAnchors
checkExternalResource :: VerifyConfig
-> Text
-> IO (VerifyResult VerifyError)
checkExternalResource VerifyConfig{..} link
| doesReferLocalhost = return mempty
| otherwise = fmap toVerifyRes $ do
makeRequest HEAD 0.3 >>= \case
Right () -> return $ Right ()
Left _ -> makeRequest GET 0.7
where
doesReferLocalhost = any (`T.isInfixOf` link) ["://localhost", "://127.0.0.1"]
makeRequest :: _ => method -> RatioNat -> IO (Either VerifyError ())
makeRequest method timeoutFrac = runExceptT $ do
uri <- mkURI link
& maybe (throwError ExternalResourceInvalidUri) pure
parsedUrl <- useURI uri
& maybe (throwError ExternalResourceUnknownProtocol) pure
let reqLink = case parsedUrl of
Left (url, option) ->
runReq defaultHttpConfig $
req method url NoReqBody ignoreResponse option
Right (url, option) ->
runReq defaultHttpConfig $
req method url NoReqBody ignoreResponse option
let maxTime = Time @Second $ unTime vcExternalRefCheckTimeout * timeoutFrac
mres <- liftIO (timeout maxTime $ void reqLink)
`catch` (either throwError (\() -> return (Just ())) . interpretErrors)
maybe (throwError $ ExternalResourceSomeError "Response timeout") pure mres
isAllowedErrorCode = or . sequence
[ (403 ==)
, (405 ==)
]
interpretErrors = \case
JsonHttpException _ -> error "External link JSON parse exception"
VanillaHttpException err -> case err of
InvalidUrlException{} -> error "External link URL invalid exception"
HttpExceptionRequest _ exc -> case exc of
StatusCodeException resp _
| isAllowedErrorCode (statusCode $ responseStatus resp) -> Right ()
| otherwise -> Left $ ExternalResourceUnavailable (responseStatus resp)
other -> Left . ExternalResourceSomeError $ show other