{-# LANGUAGE LambdaCase #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RecordWildCards #-}
module SeoCheck
( seoCheck,
runSeoCheck,
)
where
import Control.Applicative
import Control.Concurrent
import Control.Monad
import Control.Monad.IO.Class
import Control.Monad.Logger
import qualified Data.ByteString as SB
import qualified Data.ByteString.Lazy as LB
import qualified Data.CaseInsensitive as CI
import Data.IntMap (IntMap)
import qualified Data.IntMap.Strict as IM
import Data.List
import Data.Map (Map)
import qualified Data.Map as M
import Data.Maybe
import Data.Set (Set)
import qualified Data.Set as S
import Data.Text (Text)
import qualified Data.Text as T
import Data.Validity
import Network.HTTP.Client as HTTP
import Network.HTTP.Client.TLS as HTTP
import Network.HTTP.Types as HTTP
import Network.URI
import Rainbow
import SeoCheck.OptParse
import System.Exit
import Text.HTML.DOM as HTML
import Text.Show.Pretty (ppShow)
import Text.XML as XML
import UnliftIO hiding (link)
seoCheck :: IO ()
seoCheck :: IO ()
seoCheck = IO Settings
getSettings IO Settings -> (Settings -> IO ()) -> IO ()
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Settings -> IO ()
runSeoCheck
runSeoCheck :: Settings -> IO ()
runSeoCheck :: Settings -> IO ()
runSeoCheck settings :: Settings
settings@Settings {Maybe Int
Maybe Word
URI
LogLevel
setMaxDepth :: Settings -> Maybe Word
setFetchers :: Settings -> Maybe Int
setLogLevel :: Settings -> LogLevel
setUri :: Settings -> URI
setMaxDepth :: Maybe Word
setFetchers :: Maybe Int
setLogLevel :: LogLevel
setUri :: URI
..} = do
Manager
man <- IO Manager
forall (m :: * -> *). MonadIO m => m Manager
HTTP.newTlsManager
TQueue Link
queue <- IO (TQueue Link)
forall (m :: * -> *) a. MonadIO m => m (TQueue a)
newTQueueIO
TVar (Set URI)
seen <- Set URI -> IO (TVar (Set URI))
forall (m :: * -> *) a. MonadIO m => a -> m (TVar a)
newTVarIO Set URI
forall a. Set a
S.empty
TVar (Map Link Result)
results <- Map Link Result -> IO (TVar (Map Link Result))
forall (m :: * -> *) a. MonadIO m => a -> m (TVar a)
newTVarIO Map Link Result
forall k a. Map k a
M.empty
let fetchers :: Int
fetchers = Int -> Maybe Int -> Int
forall a. a -> Maybe a -> a
fromMaybe Int
1 Maybe Int
setFetchers
indexes :: [Int]
indexes = [Int
0 .. Int
fetchers Int -> Int -> Int
forall a. Num a => a -> a -> a
- Int
1]
TVar (IntMap Bool)
fetcherStati <- IntMap Bool -> IO (TVar (IntMap Bool))
forall (m :: * -> *) a. MonadIO m => a -> m (TVar a)
newTVarIO (IntMap Bool -> IO (TVar (IntMap Bool)))
-> IntMap Bool -> IO (TVar (IntMap Bool))
forall a b. (a -> b) -> a -> b
$ [(Int, Bool)] -> IntMap Bool
forall a. [(Int, a)] -> IntMap a
IM.fromList ([(Int, Bool)] -> IntMap Bool) -> [(Int, Bool)] -> IntMap Bool
forall a b. (a -> b) -> a -> b
$ [Int] -> [Bool] -> [(Int, Bool)]
forall a b. [a] -> [b] -> [(a, b)]
zip [Int]
indexes (Bool -> [Bool]
forall a. a -> [a]
repeat Bool
True)
STM () -> IO ()
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM () -> IO ()) -> STM () -> IO ()
forall a b. (a -> b) -> a -> b
$ TQueue Link -> Link -> STM ()
forall a. TQueue a -> a -> STM ()
writeTQueue TQueue Link
queue (LinkType -> URI -> Word -> Link
Link LinkType
A URI
setUri Word
0)
LoggingT IO () -> IO ()
forall (m :: * -> *) a. MonadIO m => LoggingT m a -> m a
runStderrLoggingT (LoggingT IO () -> IO ()) -> LoggingT IO () -> IO ()
forall a b. (a -> b) -> a -> b
$
(LogSource -> LogLevel -> Bool) -> LoggingT IO () -> LoggingT IO ()
forall (m :: * -> *) a.
(LogSource -> LogLevel -> Bool) -> LoggingT m a -> LoggingT m a
filterLogger (\LogSource
_ LogLevel
ll -> LogLevel
ll LogLevel -> LogLevel -> Bool
forall a. Ord a => a -> a -> Bool
>= LogLevel
setLogLevel) (LoggingT IO () -> LoggingT IO ())
-> LoggingT IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ do
LogSource -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => LogSource -> m ()
logDebugN (LogSource -> LoggingT IO ()) -> LogSource -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ String -> LogSource
T.pack (String -> LogSource) -> String -> LogSource
forall a b. (a -> b) -> a -> b
$ Settings -> String
forall a. Show a => a -> String
ppShow Settings
settings
LogSource -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => LogSource -> m ()
logInfoN (LogSource -> LoggingT IO ()) -> LogSource -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ LogSource
"Running with " LogSource -> LogSource -> LogSource
forall a. Semigroup a => a -> a -> a
<> String -> LogSource
T.pack (Int -> String
forall a. Show a => a -> String
show Int
fetchers) LogSource -> LogSource -> LogSource
forall a. Semigroup a => a -> a -> a
<> LogSource
" fetchers"
[Int] -> (Int -> LoggingT IO ()) -> LoggingT IO ()
forall (m :: * -> *) (f :: * -> *) a b.
(MonadUnliftIO m, Foldable f) =>
f a -> (a -> m b) -> m ()
forConcurrently_ [Int]
indexes ((Int -> LoggingT IO ()) -> LoggingT IO ())
-> (Int -> LoggingT IO ()) -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ \Int
ix ->
Maybe Word
-> Manager
-> TQueue Link
-> TVar (Set URI)
-> TVar (Map Link Result)
-> TVar (IntMap Bool)
-> Int
-> LoggingT IO ()
worker Maybe Word
setMaxDepth Manager
man TQueue Link
queue TVar (Set URI)
seen TVar (Map Link Result)
results TVar (IntMap Bool)
fetcherStati Int
ix
Map Link Result
resultsMap <- TVar (Map Link Result) -> IO (Map Link Result)
forall (m :: * -> *) a. MonadIO m => TVar a -> m a
readTVarIO TVar (Map Link Result)
results
Chunk -> [ByteString] -> [ByteString]
bytestringMaker <- IO (Chunk -> [ByteString] -> [ByteString])
byteStringMakerFromEnvironment
([Chunk] -> IO ()) -> [[Chunk]] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ ((ByteString -> IO ()) -> [ByteString] -> IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ ByteString -> IO ()
SB.putStr ([ByteString] -> IO ())
-> ([Chunk] -> [ByteString]) -> [Chunk] -> IO ()
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Chunk -> [ByteString] -> [ByteString]) -> [Chunk] -> [ByteString]
chunksToByteStrings Chunk -> [ByteString] -> [ByteString]
bytestringMaker) ([[Chunk]] -> IO ()) -> [[Chunk]] -> IO ()
forall a b. (a -> b) -> a -> b
$ SEOResult -> [[Chunk]]
renderSEOResult (SEOResult -> [[Chunk]]) -> SEOResult -> [[Chunk]]
forall a b. (a -> b) -> a -> b
$ SEOResult :: Map Link Result -> SEOResult
SEOResult {seoResultPageResults :: Map Link Result
seoResultPageResults = Map Link Result
resultsMap}
Bool -> IO () -> IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when ((Result -> Bool) -> Map Link Result -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any Result -> Bool
resultBad Map Link Result
resultsMap) (IO () -> IO ()) -> IO () -> IO ()
forall a b. (a -> b) -> a -> b
$
ExitCode -> IO ()
forall a. ExitCode -> IO a
exitWith (ExitCode -> IO ()) -> ExitCode -> IO ()
forall a b. (a -> b) -> a -> b
$
Int -> ExitCode
ExitFailure Int
1
newtype SEOResult = SEOResult
{ :: Map Link Result
}
deriving (Int -> SEOResult -> ShowS
[SEOResult] -> ShowS
SEOResult -> String
(Int -> SEOResult -> ShowS)
-> (SEOResult -> String)
-> ([SEOResult] -> ShowS)
-> Show SEOResult
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [SEOResult] -> ShowS
$cshowList :: [SEOResult] -> ShowS
show :: SEOResult -> String
$cshow :: SEOResult -> String
showsPrec :: Int -> SEOResult -> ShowS
$cshowsPrec :: Int -> SEOResult -> ShowS
Show, SEOResult -> SEOResult -> Bool
(SEOResult -> SEOResult -> Bool)
-> (SEOResult -> SEOResult -> Bool) -> Eq SEOResult
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: SEOResult -> SEOResult -> Bool
$c/= :: SEOResult -> SEOResult -> Bool
== :: SEOResult -> SEOResult -> Bool
$c== :: SEOResult -> SEOResult -> Bool
Eq)
renderSEOResult :: SEOResult -> [[Chunk]]
renderSEOResult :: SEOResult -> [[Chunk]]
renderSEOResult SEOResult {Map Link Result
seoResultPageResults :: Map Link Result
seoResultPageResults :: SEOResult -> Map Link Result
..} = [[[Chunk]]] -> [[Chunk]]
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat ([[[Chunk]]] -> [[Chunk]]) -> [[[Chunk]]] -> [[Chunk]]
forall a b. (a -> b) -> a -> b
$ ((Link, Result) -> Maybe [[Chunk]])
-> [(Link, Result)] -> [[[Chunk]]]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe ((Link -> Result -> Maybe [[Chunk]])
-> (Link, Result) -> Maybe [[Chunk]]
forall a b c. (a -> b -> c) -> (a, b) -> c
uncurry Link -> Result -> Maybe [[Chunk]]
renderPageResult) (Map Link Result -> [(Link, Result)]
forall k a. Map k a -> [(k, a)]
M.toList Map Link Result
seoResultPageResults)
renderPageResult :: Link -> Result -> Maybe [[Chunk]]
Link
link r :: Result
r@Result {Maybe DocResult
Status
resultDocResult :: Result -> Maybe DocResult
resultStatus :: Result -> Status
resultDocResult :: Maybe DocResult
resultStatus :: Status
..} =
if Result -> Bool
resultBad Result
r
then [[Chunk]] -> Maybe [[Chunk]]
forall a. a -> Maybe a
Just [[Chunk]]
go
else Maybe [[Chunk]]
forall a. Maybe a
Nothing
where
go :: [[Chunk]]
go :: [[Chunk]]
go =
[Chunk] -> [[Chunk]] -> [[Chunk]]
forall a. a -> [a] -> [a]
intersperse [LogSource -> Chunk
chunk LogSource
"\n"] ([[Chunk]] -> [[Chunk]]) -> [[Chunk]] -> [[Chunk]]
forall a b. (a -> b) -> a -> b
$
[ [Radiant -> Chunk -> Chunk
fore Radiant
blue (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk (LogSource -> Chunk) -> LogSource -> Chunk
forall a b. (a -> b) -> a -> b
$ String -> LogSource
T.pack (String -> LogSource) -> String -> LogSource
forall a b. (a -> b) -> a -> b
$ URI -> String
forall a. Show a => a -> String
show (Link -> URI
linkUri Link
link)],
Status -> [Chunk]
renderStatusResult Status
resultStatus
]
[[Chunk]] -> [[Chunk]] -> [[Chunk]]
forall a. [a] -> [a] -> [a]
++ [[Chunk]]
-> (DocResult -> [[Chunk]]) -> Maybe DocResult -> [[Chunk]]
forall b a. b -> (a -> b) -> Maybe a -> b
maybe [] DocResult -> [[Chunk]]
renderDocResult Maybe DocResult
resultDocResult
renderStatusResult :: HTTP.Status -> [Chunk]
renderStatusResult :: Status -> [Chunk]
renderStatusResult Status
s =
[ LogSource -> Chunk
chunk LogSource
"Status: ",
Radiant -> Chunk -> Chunk
fore Radiant
col (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk (LogSource -> Chunk) -> LogSource -> Chunk
forall a b. (a -> b) -> a -> b
$ String -> LogSource
T.pack (String -> LogSource) -> String -> LogSource
forall a b. (a -> b) -> a -> b
$ Int -> String
forall a. Show a => a -> String
show Int
sci
]
where
sci :: Int
sci = Status -> Int
HTTP.statusCode Status
s
col :: Radiant
col = if Int
200 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
sci Bool -> Bool -> Bool
&& Int
sci Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
300 then Radiant
green else Radiant
red
renderDocResult :: DocResult -> [[Chunk]]
renderDocResult :: DocResult -> [[Chunk]]
renderDocResult DocResult {[Link]
Set LogSource
DescriptionResult
TitleResult
DocTypeResult
docResultImagesWithoutAlt :: DocResult -> Set LogSource
docResultDescription :: DocResult -> DescriptionResult
docResultTitle :: DocResult -> TitleResult
docResultDocType :: DocResult -> DocTypeResult
docResultLinks :: DocResult -> [Link]
docResultImagesWithoutAlt :: Set LogSource
docResultDescription :: DescriptionResult
docResultTitle :: TitleResult
docResultDocType :: DocTypeResult
docResultLinks :: [Link]
..} =
[ [ LogSource -> Chunk
chunk LogSource
"Doctype: ",
case DocTypeResult
docResultDocType of
DocTypeResult
HtmlDocType -> Radiant -> Chunk -> Chunk
fore Radiant
green (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk LogSource
"html"
DocTypeResult
UnknownDocType -> Radiant -> Chunk -> Chunk
fore Radiant
red (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk LogSource
"Unknown doctype"
DocTypeResult
NoDocType -> Radiant -> Chunk -> Chunk
fore Radiant
red (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk LogSource
"No doctype"
],
[ LogSource -> Chunk
chunk LogSource
"Title: ",
case TitleResult
docResultTitle of
TitleResult
NoTitleFound -> Radiant -> Chunk -> Chunk
fore Radiant
red (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk LogSource
"No title"
TitleResult
EmptyTitle -> Radiant -> Chunk -> Chunk
fore Radiant
red (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk LogSource
"Empty title"
NonStandardTitle Element
e -> Radiant -> Chunk -> Chunk
fore Radiant
red (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk (LogSource -> Chunk) -> LogSource -> Chunk
forall a b. (a -> b) -> a -> b
$ String -> LogSource
T.pack (String -> LogSource) -> String -> LogSource
forall a b. (a -> b) -> a -> b
$ String
"Non-standard title: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Element -> String
forall a. Show a => a -> String
show Element
e
TitleFound LogSource
t -> Radiant -> Chunk -> Chunk
fore Radiant
green (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk LogSource
t
],
[ LogSource -> Chunk
chunk LogSource
"Description: ",
case DescriptionResult
docResultDescription of
Description LogSource
d -> Radiant -> Chunk -> Chunk
fore Radiant
green (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk LogSource
d
DescriptionResult
EmptyDescription -> Radiant -> Chunk -> Chunk
fore Radiant
red (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk LogSource
"Empty description"
DescriptionResult
NoDescription -> Radiant -> Chunk -> Chunk
fore Radiant
red (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk LogSource
"No description"
DescriptionResult
MultipleDescriptions -> Radiant -> Chunk -> Chunk
fore Radiant
red (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk LogSource
"Multiple descriptions"
NonStandardDescription Element
e -> Radiant -> Chunk -> Chunk
fore Radiant
red (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk (LogSource -> Chunk) -> LogSource -> Chunk
forall a b. (a -> b) -> a -> b
$ String -> LogSource
T.pack (String -> LogSource) -> String -> LogSource
forall a b. (a -> b) -> a -> b
$ String
"Non-standard description: " String -> ShowS
forall a. Semigroup a => a -> a -> a
<> Element -> String
forall a. Show a => a -> String
show Element
e
],
[ LogSource -> Chunk
chunk LogSource
"Images without Alt: ",
case Set LogSource -> [LogSource]
forall a. Set a -> [a]
S.toList Set LogSource
docResultImagesWithoutAlt of
[] -> Radiant -> Chunk -> Chunk
fore Radiant
green (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk LogSource
"None"
[LogSource]
is -> Radiant -> Chunk -> Chunk
fore Radiant
red (Chunk -> Chunk) -> Chunk -> Chunk
forall a b. (a -> b) -> a -> b
$ LogSource -> Chunk
chunk (LogSource -> Chunk) -> LogSource -> Chunk
forall a b. (a -> b) -> a -> b
$ String -> LogSource
T.pack (String -> LogSource) -> String -> LogSource
forall a b. (a -> b) -> a -> b
$ [LogSource] -> String
forall a. Show a => a -> String
show [LogSource]
is
],
[LogSource -> Chunk
chunk LogSource
"\n"]
]
worker ::
Maybe Word ->
HTTP.Manager ->
TQueue Link ->
TVar (Set URI) ->
TVar (Map Link Result) ->
TVar (IntMap Bool) ->
Int ->
LoggingT IO ()
worker :: Maybe Word
-> Manager
-> TQueue Link
-> TVar (Set URI)
-> TVar (Map Link Result)
-> TVar (IntMap Bool)
-> Int
-> LoggingT IO ()
worker Maybe Word
maxDepth Manager
man TQueue Link
queue TVar (Set URI)
seen TVar (Map Link Result)
results TVar (IntMap Bool)
stati Int
index = Bool -> LoggingT IO ()
go Bool
True
where
setStatus :: Bool -> m ()
setStatus Bool
b = STM () -> m ()
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM () -> m ()) -> STM () -> m ()
forall a b. (a -> b) -> a -> b
$ TVar (IntMap Bool) -> (IntMap Bool -> IntMap Bool) -> STM ()
forall a. TVar a -> (a -> a) -> STM ()
modifyTVar' TVar (IntMap Bool)
stati ((IntMap Bool -> IntMap Bool) -> STM ())
-> (IntMap Bool -> IntMap Bool) -> STM ()
forall a b. (a -> b) -> a -> b
$ Int -> Bool -> IntMap Bool -> IntMap Bool
forall a. Int -> a -> IntMap a -> IntMap a
IM.insert Int
index Bool
b
setBusy :: LoggingT IO ()
setBusy = Bool -> LoggingT IO ()
forall (m :: * -> *). MonadIO m => Bool -> m ()
setStatus Bool
True
setIdle :: LoggingT IO ()
setIdle = Bool -> LoggingT IO ()
forall (m :: * -> *). MonadIO m => Bool -> m ()
setStatus Bool
False
allDone :: MonadIO m => m Bool
allDone :: m Bool
allDone = (Bool -> Bool) -> IntMap Bool -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
all Bool -> Bool
not (IntMap Bool -> Bool) -> m (IntMap Bool) -> m Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TVar (IntMap Bool) -> m (IntMap Bool)
forall (m :: * -> *) a. MonadIO m => TVar a -> m a
readTVarIO TVar (IntMap Bool)
stati
go :: Bool -> LoggingT IO ()
go Bool
busy = do
Maybe Link
mv <- STM (Maybe Link) -> LoggingT IO (Maybe Link)
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM (Maybe Link) -> LoggingT IO (Maybe Link))
-> STM (Maybe Link) -> LoggingT IO (Maybe Link)
forall a b. (a -> b) -> a -> b
$ TQueue Link -> STM (Maybe Link)
forall a. TQueue a -> STM (Maybe a)
tryReadTQueue TQueue Link
queue
case Maybe Link
mv of
Maybe Link
Nothing -> do
LogSource -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => LogSource -> m ()
logDebugN (LogSource -> LoggingT IO ()) -> LogSource -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ LogSource
"Worker is idle: " LogSource -> LogSource -> LogSource
forall a. Semigroup a => a -> a -> a
<> String -> LogSource
T.pack (Int -> String
forall a. Show a => a -> String
show Int
index)
Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
busy LoggingT IO ()
setIdle
Bool
ad <- LoggingT IO Bool
forall (m :: * -> *). MonadIO m => m Bool
allDone
Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
ad (LoggingT IO () -> LoggingT IO ())
-> LoggingT IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ do
IO () -> LoggingT IO ()
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO () -> LoggingT IO ()) -> IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ Int -> IO ()
threadDelay Int
10000
Bool -> LoggingT IO ()
go Bool
False
Just Link
link -> do
LogSource -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => LogSource -> m ()
logDebugN (LogSource -> LoggingT IO ()) -> LogSource -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ LogSource
"Worker is busy: " LogSource -> LogSource -> LogSource
forall a. Semigroup a => a -> a -> a
<> String -> LogSource
T.pack (Int -> String
forall a. Show a => a -> String
show Int
index)
Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
unless Bool
busy LoggingT IO ()
setBusy
Bool
alreadySeen <- URI -> Set URI -> Bool
forall a. Ord a => a -> Set a -> Bool
S.member (Link -> URI
linkUri Link
link) (Set URI -> Bool) -> LoggingT IO (Set URI) -> LoggingT IO Bool
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> TVar (Set URI) -> LoggingT IO (Set URI)
forall (m :: * -> *) a. MonadIO m => TVar a -> m a
readTVarIO TVar (Set URI)
seen
if Bool
alreadySeen
then do
LogSource -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => LogSource -> m ()
logDebugN (LogSource -> LoggingT IO ()) -> LogSource -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ LogSource
"Not fetching again: " LogSource -> LogSource -> LogSource
forall a. Semigroup a => a -> a -> a
<> String -> LogSource
T.pack (URI -> String
forall a. Show a => a -> String
show (Link -> URI
linkUri Link
link))
() -> LoggingT IO ()
forall (f :: * -> *) a. Applicative f => a -> f a
pure ()
else do
STM () -> LoggingT IO ()
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM () -> LoggingT IO ()) -> STM () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ TVar (Set URI) -> (Set URI -> Set URI) -> STM ()
forall a. TVar a -> (a -> a) -> STM ()
modifyTVar' TVar (Set URI)
seen ((Set URI -> Set URI) -> STM ()) -> (Set URI -> Set URI) -> STM ()
forall a b. (a -> b) -> a -> b
$ URI -> Set URI -> Set URI
forall a. Ord a => a -> Set a -> Set a
S.insert (Link -> URI
linkUri Link
link)
Maybe Result
mres <- Maybe Word -> Manager -> Link -> LoggingT IO (Maybe Result)
produceResult Maybe Word
maxDepth Manager
man Link
link
Maybe Result -> (Result -> LoggingT IO ()) -> LoggingT IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ Maybe Result
mres ((Result -> LoggingT IO ()) -> LoggingT IO ())
-> (Result -> LoggingT IO ()) -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ \Result
res -> do
STM () -> LoggingT IO ()
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM () -> LoggingT IO ()) -> STM () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ TVar (Map Link Result)
-> (Map Link Result -> Map Link Result) -> STM ()
forall a. TVar a -> (a -> a) -> STM ()
modifyTVar' TVar (Map Link Result)
results ((Map Link Result -> Map Link Result) -> STM ())
-> (Map Link Result -> Map Link Result) -> STM ()
forall a b. (a -> b) -> a -> b
$ Link -> Result -> Map Link Result -> Map Link Result
forall k a. Ord k => k -> a -> Map k a -> Map k a
M.insert Link
link Result
res
let recurse :: Bool
recurse = case Maybe Word
maxDepth of
Maybe Word
Nothing -> Bool
True
Just Word
md -> Link -> Word
linkDepth Link
link Word -> Word -> Bool
forall a. Ord a => a -> a -> Bool
< Word
md
Bool -> LoggingT IO () -> LoggingT IO ()
forall (f :: * -> *). Applicative f => Bool -> f () -> f ()
when Bool
recurse (LoggingT IO () -> LoggingT IO ())
-> LoggingT IO () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$
Maybe [Link] -> ([Link] -> LoggingT IO ()) -> LoggingT IO ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
t a -> (a -> m b) -> m ()
forM_ (DocResult -> [Link]
docResultLinks (DocResult -> [Link]) -> Maybe DocResult -> Maybe [Link]
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Result -> Maybe DocResult
resultDocResult Result
res) (([Link] -> LoggingT IO ()) -> LoggingT IO ())
-> ([Link] -> LoggingT IO ()) -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ \[Link]
uris ->
STM () -> LoggingT IO ()
forall (m :: * -> *) a. MonadIO m => STM a -> m a
atomically (STM () -> LoggingT IO ()) -> STM () -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ (Link -> STM ()) -> [Link] -> STM ()
forall (t :: * -> *) (m :: * -> *) a b.
(Foldable t, Monad m) =>
(a -> m b) -> t a -> m ()
mapM_ (TQueue Link -> Link -> STM ()
forall a. TQueue a -> a -> STM ()
writeTQueue TQueue Link
queue) [Link]
uris
Bool -> LoggingT IO ()
go Bool
True
data Link = Link
{ Link -> LinkType
linkType :: !LinkType,
Link -> URI
linkUri :: !URI,
Link -> Word
linkDepth :: !Word
}
deriving (Int -> Link -> ShowS
[Link] -> ShowS
Link -> String
(Int -> Link -> ShowS)
-> (Link -> String) -> ([Link] -> ShowS) -> Show Link
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Link] -> ShowS
$cshowList :: [Link] -> ShowS
show :: Link -> String
$cshow :: Link -> String
showsPrec :: Int -> Link -> ShowS
$cshowsPrec :: Int -> Link -> ShowS
Show, Link -> Link -> Bool
(Link -> Link -> Bool) -> (Link -> Link -> Bool) -> Eq Link
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Link -> Link -> Bool
$c/= :: Link -> Link -> Bool
== :: Link -> Link -> Bool
$c== :: Link -> Link -> Bool
Eq, Eq Link
Eq Link
-> (Link -> Link -> Ordering)
-> (Link -> Link -> Bool)
-> (Link -> Link -> Bool)
-> (Link -> Link -> Bool)
-> (Link -> Link -> Bool)
-> (Link -> Link -> Link)
-> (Link -> Link -> Link)
-> Ord Link
Link -> Link -> Bool
Link -> Link -> Ordering
Link -> Link -> Link
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: Link -> Link -> Link
$cmin :: Link -> Link -> Link
max :: Link -> Link -> Link
$cmax :: Link -> Link -> Link
>= :: Link -> Link -> Bool
$c>= :: Link -> Link -> Bool
> :: Link -> Link -> Bool
$c> :: Link -> Link -> Bool
<= :: Link -> Link -> Bool
$c<= :: Link -> Link -> Bool
< :: Link -> Link -> Bool
$c< :: Link -> Link -> Bool
compare :: Link -> Link -> Ordering
$ccompare :: Link -> Link -> Ordering
$cp1Ord :: Eq Link
Ord)
data LinkType
= A
| IMG
| LINK
deriving (Int -> LinkType -> ShowS
[LinkType] -> ShowS
LinkType -> String
(Int -> LinkType -> ShowS)
-> (LinkType -> String) -> ([LinkType] -> ShowS) -> Show LinkType
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [LinkType] -> ShowS
$cshowList :: [LinkType] -> ShowS
show :: LinkType -> String
$cshow :: LinkType -> String
showsPrec :: Int -> LinkType -> ShowS
$cshowsPrec :: Int -> LinkType -> ShowS
Show, LinkType -> LinkType -> Bool
(LinkType -> LinkType -> Bool)
-> (LinkType -> LinkType -> Bool) -> Eq LinkType
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: LinkType -> LinkType -> Bool
$c/= :: LinkType -> LinkType -> Bool
== :: LinkType -> LinkType -> Bool
$c== :: LinkType -> LinkType -> Bool
Eq, Eq LinkType
Eq LinkType
-> (LinkType -> LinkType -> Ordering)
-> (LinkType -> LinkType -> Bool)
-> (LinkType -> LinkType -> Bool)
-> (LinkType -> LinkType -> Bool)
-> (LinkType -> LinkType -> Bool)
-> (LinkType -> LinkType -> LinkType)
-> (LinkType -> LinkType -> LinkType)
-> Ord LinkType
LinkType -> LinkType -> Bool
LinkType -> LinkType -> Ordering
LinkType -> LinkType -> LinkType
forall a.
Eq a
-> (a -> a -> Ordering)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> Bool)
-> (a -> a -> a)
-> (a -> a -> a)
-> Ord a
min :: LinkType -> LinkType -> LinkType
$cmin :: LinkType -> LinkType -> LinkType
max :: LinkType -> LinkType -> LinkType
$cmax :: LinkType -> LinkType -> LinkType
>= :: LinkType -> LinkType -> Bool
$c>= :: LinkType -> LinkType -> Bool
> :: LinkType -> LinkType -> Bool
$c> :: LinkType -> LinkType -> Bool
<= :: LinkType -> LinkType -> Bool
$c<= :: LinkType -> LinkType -> Bool
< :: LinkType -> LinkType -> Bool
$c< :: LinkType -> LinkType -> Bool
compare :: LinkType -> LinkType -> Ordering
$ccompare :: LinkType -> LinkType -> Ordering
$cp1Ord :: Eq LinkType
Ord)
data Result = Result
{ Result -> Status
resultStatus :: !HTTP.Status,
Result -> Maybe DocResult
resultDocResult :: !(Maybe DocResult)
}
deriving (Int -> Result -> ShowS
[Result] -> ShowS
Result -> String
(Int -> Result -> ShowS)
-> (Result -> String) -> ([Result] -> ShowS) -> Show Result
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Result] -> ShowS
$cshowList :: [Result] -> ShowS
show :: Result -> String
$cshow :: Result -> String
showsPrec :: Int -> Result -> ShowS
$cshowsPrec :: Int -> Result -> ShowS
Show, Result -> Result -> Bool
(Result -> Result -> Bool)
-> (Result -> Result -> Bool) -> Eq Result
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Result -> Result -> Bool
$c/= :: Result -> Result -> Bool
== :: Result -> Result -> Bool
$c== :: Result -> Result -> Bool
Eq)
resultBad :: Result -> Bool
resultBad :: Result -> Bool
resultBad Result {Maybe DocResult
Status
resultDocResult :: Maybe DocResult
resultStatus :: Status
resultDocResult :: Result -> Maybe DocResult
resultStatus :: Result -> Status
..} =
Bool -> Bool
not (Bool -> Bool) -> Bool -> Bool
forall a b. (a -> b) -> a -> b
$
Validation -> Bool
validationIsValid (Validation -> Bool) -> Validation -> Bool
forall a b. (a -> b) -> a -> b
$
[Validation] -> Validation
forall a. Monoid a => [a] -> a
mconcat
[ String -> Bool -> Validation
declare String
"The status code is in the 200 range" (Bool -> Validation) -> Bool -> Validation
forall a b. (a -> b) -> a -> b
$
let sci :: Int
sci = Status -> Int
HTTP.statusCode Status
resultStatus
in Int
200 Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
<= Int
sci Bool -> Bool -> Bool
&& Int
sci Int -> Int -> Bool
forall a. Ord a => a -> a -> Bool
< Int
300,
String -> Validation -> Validation
decorate String
"Doc result" (Validation -> Validation) -> Validation -> Validation
forall a b. (a -> b) -> a -> b
$ Validation
-> (DocResult -> Validation) -> Maybe DocResult -> Validation
forall b a. b -> (a -> b) -> Maybe a -> b
maybe Validation
valid DocResult -> Validation
docResultValidation Maybe DocResult
resultDocResult
]
produceResult :: Maybe Word -> HTTP.Manager -> Link -> LoggingT IO (Maybe Result)
produceResult :: Maybe Word -> Manager -> Link -> LoggingT IO (Maybe Result)
produceResult Maybe Word
maxDepth Manager
man link :: Link
link@Link {Word
URI
LinkType
linkDepth :: Word
linkUri :: URI
linkType :: LinkType
linkType :: Link -> LinkType
linkDepth :: Link -> Word
linkUri :: Link -> URI
..} =
case URI -> Maybe Request
forall (m :: * -> *). MonadThrow m => URI -> m Request
requestFromURI URI
linkUri of
Maybe Request
Nothing -> do
LogSource -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => LogSource -> m ()
logErrorN (LogSource -> LoggingT IO ()) -> LogSource -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ LogSource
"Unable to construct a request from this uri: " LogSource -> LogSource -> LogSource
forall a. Semigroup a => a -> a -> a
<> String -> LogSource
T.pack (URI -> String
forall a. Show a => a -> String
show URI
linkUri)
Maybe Result -> LoggingT IO (Maybe Result)
forall (f :: * -> *) a. Applicative f => a -> f a
pure Maybe Result
forall a. Maybe a
Nothing
Just Request
req -> do
let fetchingLog :: [String]
fetchingLog = case Maybe Word
maxDepth of
Maybe Word
Nothing -> [String
"Fetching: ", URI -> String
forall a. Show a => a -> String
show URI
linkUri]
Just Word
md -> [String
"Depth ", Word -> String
forall a. Show a => a -> String
show Word
linkDepth, String
"/", Word -> String
forall a. Show a => a -> String
show Word
md, String
"; Fetching: ", URI -> String
forall a. Show a => a -> String
show URI
linkUri]
LogSource -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => LogSource -> m ()
logInfoN (LogSource -> LoggingT IO ()) -> LogSource -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ String -> LogSource
T.pack (String -> LogSource) -> String -> LogSource
forall a b. (a -> b) -> a -> b
$ [String] -> String
forall (t :: * -> *) a. Foldable t => t [a] -> [a]
concat [String]
fetchingLog
Response ByteString
resp <- IO (Response ByteString) -> LoggingT IO (Response ByteString)
forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO (IO (Response ByteString) -> LoggingT IO (Response ByteString))
-> IO (Response ByteString) -> LoggingT IO (Response ByteString)
forall a b. (a -> b) -> a -> b
$ Request -> Manager -> IO (Response ByteString)
httpLbs Request
req Manager
man
let status :: Status
status = Response ByteString -> Status
forall body. Response body -> Status
responseStatus Response ByteString
resp
let sci :: Int
sci = Status -> Int
HTTP.statusCode Status
status
LogSource -> LoggingT IO ()
forall (m :: * -> *). MonadLogger m => LogSource -> m ()
logDebugN (LogSource -> LoggingT IO ()) -> LogSource -> LoggingT IO ()
forall a b. (a -> b) -> a -> b
$ LogSource
"Got response for " LogSource -> LogSource -> LogSource
forall a. Semigroup a => a -> a -> a
<> String -> LogSource
T.pack (URI -> String
forall a. Show a => a -> String
show URI
linkUri) LogSource -> LogSource -> LogSource
forall a. Semigroup a => a -> a -> a
<> LogSource
": " LogSource -> LogSource -> LogSource
forall a. Semigroup a => a -> a -> a
<> String -> LogSource
T.pack (Int -> String
forall a. Show a => a -> String
show Int
sci)
let body :: ByteString
body = Response ByteString -> ByteString
forall body. Response body -> body
responseBody Response ByteString
resp
let headers :: ResponseHeaders
headers = Response ByteString -> ResponseHeaders
forall body. Response body -> ResponseHeaders
responseHeaders Response ByteString
resp
contentType :: Maybe ByteString
contentType = HeaderName -> ResponseHeaders -> Maybe ByteString
forall a b. Eq a => a -> [(a, b)] -> Maybe b
lookup HeaderName
hContentType ResponseHeaders
headers
Maybe Result -> LoggingT IO (Maybe Result)
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Maybe Result -> LoggingT IO (Maybe Result))
-> Maybe Result -> LoggingT IO (Maybe Result)
forall a b. (a -> b) -> a -> b
$
Result -> Maybe Result
forall a. a -> Maybe a
Just (Result -> Maybe Result) -> Result -> Maybe Result
forall a b. (a -> b) -> a -> b
$
Result :: Status -> Maybe DocResult -> Result
Result
{ resultStatus :: Status
resultStatus = Response ByteString -> Status
forall body. Response body -> Status
responseStatus Response ByteString
resp,
resultDocResult :: Maybe DocResult
resultDocResult = case LinkType
linkType of
LinkType
A -> do
ByteString
ct <- Maybe ByteString
contentType
if ByteString
"text/html" ByteString -> ByteString -> Bool
`SB.isInfixOf` ByteString
ct
then DocResult -> Maybe DocResult
forall a. a -> Maybe a
Just (DocResult -> Maybe DocResult) -> DocResult -> Maybe DocResult
forall a b. (a -> b) -> a -> b
$ Link -> Response ByteString -> Document -> DocResult
produceDocResult Link
link Response ByteString
resp (Document -> DocResult) -> Document -> DocResult
forall a b. (a -> b) -> a -> b
$ ByteString -> Document
HTML.parseLBS ByteString
body
else Maybe DocResult
forall a. Maybe a
Nothing
LinkType
_ -> Maybe DocResult
forall a. Maybe a
Nothing
}
data DocResult = DocResult
{ DocResult -> [Link]
docResultLinks :: ![Link],
DocResult -> DocTypeResult
docResultDocType :: !DocTypeResult,
DocResult -> TitleResult
docResultTitle :: !TitleResult,
DocResult -> DescriptionResult
docResultDescription :: !DescriptionResult,
DocResult -> Set LogSource
docResultImagesWithoutAlt :: !(Set Text)
}
deriving (Int -> DocResult -> ShowS
[DocResult] -> ShowS
DocResult -> String
(Int -> DocResult -> ShowS)
-> (DocResult -> String)
-> ([DocResult] -> ShowS)
-> Show DocResult
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DocResult] -> ShowS
$cshowList :: [DocResult] -> ShowS
show :: DocResult -> String
$cshow :: DocResult -> String
showsPrec :: Int -> DocResult -> ShowS
$cshowsPrec :: Int -> DocResult -> ShowS
Show, DocResult -> DocResult -> Bool
(DocResult -> DocResult -> Bool)
-> (DocResult -> DocResult -> Bool) -> Eq DocResult
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DocResult -> DocResult -> Bool
$c/= :: DocResult -> DocResult -> Bool
== :: DocResult -> DocResult -> Bool
$c== :: DocResult -> DocResult -> Bool
Eq)
docResultValidation :: DocResult -> Validation
docResultValidation :: DocResult -> Validation
docResultValidation DocResult {[Link]
Set LogSource
DescriptionResult
TitleResult
DocTypeResult
docResultImagesWithoutAlt :: Set LogSource
docResultDescription :: DescriptionResult
docResultTitle :: TitleResult
docResultDocType :: DocTypeResult
docResultLinks :: [Link]
docResultImagesWithoutAlt :: DocResult -> Set LogSource
docResultDescription :: DocResult -> DescriptionResult
docResultTitle :: DocResult -> TitleResult
docResultDocType :: DocResult -> DocTypeResult
docResultLinks :: DocResult -> [Link]
..} =
[Validation] -> Validation
forall a. Monoid a => [a] -> a
mconcat
[ String -> Bool -> Validation
declare String
"There was a doctype" (Bool -> Validation) -> Bool -> Validation
forall a b. (a -> b) -> a -> b
$ case DocTypeResult
docResultDocType of
DocTypeResult
HtmlDocType -> Bool
True
DocTypeResult
NoDocType -> Bool
False
DocTypeResult
UnknownDocType -> Bool
False,
String -> Bool -> Validation
declare String
"There was exactly one title" (Bool -> Validation) -> Bool -> Validation
forall a b. (a -> b) -> a -> b
$ case TitleResult
docResultTitle of
TitleFound LogSource
_ -> Bool
True
TitleResult
_ -> Bool
False,
String -> Bool -> Validation
declare String
"There was exactly one description" (Bool -> Validation) -> Bool -> Validation
forall a b. (a -> b) -> a -> b
$
case DescriptionResult
docResultDescription of
Description LogSource
_ -> Bool
True
DescriptionResult
_ -> Bool
False,
String -> Bool -> Validation
declare String
"There are no pages without alt tags" (Bool -> Validation) -> Bool -> Validation
forall a b. (a -> b) -> a -> b
$ Set LogSource -> Bool
forall a. Set a -> Bool
S.null Set LogSource
docResultImagesWithoutAlt
]
produceDocResult :: Link -> Response LB.ByteString -> XML.Document -> DocResult
produceDocResult :: Link -> Response ByteString -> Document -> DocResult
produceDocResult Link
link Response ByteString
resp Document
d =
DocResult :: [Link]
-> DocTypeResult
-> TitleResult
-> DescriptionResult
-> Set LogSource
-> DocResult
DocResult
{ docResultLinks :: [Link]
docResultLinks = Link -> Document -> [Link]
documentLinks Link
link Document
d,
docResultDocType :: DocTypeResult
docResultDocType = Response ByteString -> DocTypeResult
documentDocType Response ByteString
resp,
docResultTitle :: TitleResult
docResultTitle = Document -> TitleResult
documentTitle Document
d,
docResultDescription :: DescriptionResult
docResultDescription = Document -> DescriptionResult
documentDescription Document
d,
docResultImagesWithoutAlt :: Set LogSource
docResultImagesWithoutAlt = Document -> Set LogSource
documentImagesWithoutAlt Document
d
}
documentLinks :: Link -> Document -> [Link]
documentLinks :: Link -> Document -> [Link]
documentLinks Link
link = Link -> Element -> [Link]
elementLinks Link
link (Element -> [Link]) -> (Document -> Element) -> Document -> [Link]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Document -> Element
documentRoot
elementLinks :: Link -> Element -> [Link]
elementLinks :: Link -> Element -> [Link]
elementLinks Link
link Element {[Node]
Map Name LogSource
Name
elementName :: Element -> Name
elementAttributes :: Element -> Map Name LogSource
elementNodes :: Element -> [Node]
elementNodes :: [Node]
elementAttributes :: Map Name LogSource
elementName :: Name
..} =
( case Link -> Name -> Map Name LogSource -> Maybe Link
singleElementLink Link
link Name
elementName Map Name LogSource
elementAttributes of
Maybe Link
Nothing -> [Link] -> [Link]
forall a. a -> a
id
Just Link
l -> (Link
l Link -> [Link] -> [Link]
forall a. a -> [a] -> [a]
:)
)
([Link] -> [Link]) -> [Link] -> [Link]
forall a b. (a -> b) -> a -> b
$ (Node -> [Link]) -> [Node] -> [Link]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap (Link -> Node -> [Link]
nodeLinks Link
link) [Node]
elementNodes
singleElementLink :: Link -> Name -> Map Name Text -> Maybe Link
singleElementLink :: Link -> Name -> Map Name LogSource -> Maybe Link
singleElementLink Link
link Name
name Map Name LogSource
attrs = do
(LinkType
typ, LogSource
t) <- case Name
name of
Name
"a" -> (,) LinkType
A (LogSource -> (LinkType, LogSource))
-> Maybe LogSource -> Maybe (LinkType, LogSource)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Name -> Map Name LogSource -> Maybe LogSource
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Name
"href" Map Name LogSource
attrs
Name
"link" -> (,) LinkType
LINK (LogSource -> (LinkType, LogSource))
-> Maybe LogSource -> Maybe (LinkType, LogSource)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Name -> Map Name LogSource -> Maybe LogSource
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Name
"href" Map Name LogSource
attrs
Name
"img" -> (,) LinkType
IMG (LogSource -> (LinkType, LogSource))
-> Maybe LogSource -> Maybe (LinkType, LogSource)
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Name -> Map Name LogSource -> Maybe LogSource
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Name
"src" Map Name LogSource
attrs
Name
_ -> Maybe (LinkType, LogSource)
forall a. Maybe a
Nothing
let root :: URI
root = Link -> URI
linkUri Link
link
URI
uri <- URI -> String -> Maybe URI
parseURIRelativeTo URI
root (String -> Maybe URI) -> String -> Maybe URI
forall a b. (a -> b) -> a -> b
$ LogSource -> String
T.unpack LogSource
t
let uri' :: URI
uri' = URI
uri {uriFragment :: String
uriFragment = String
""}
Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Bool -> Maybe ()) -> Bool -> Maybe ()
forall a b. (a -> b) -> a -> b
$ URI -> Maybe URIAuth
uriAuthority URI
uri' Maybe URIAuth -> Maybe URIAuth -> Bool
forall a. Eq a => a -> a -> Bool
== URI -> Maybe URIAuth
uriAuthority URI
root
Link -> Maybe Link
forall (f :: * -> *) a. Applicative f => a -> f a
pure (Link -> Maybe Link) -> Link -> Maybe Link
forall a b. (a -> b) -> a -> b
$
Link :: LinkType -> URI -> Word -> Link
Link
{ linkType :: LinkType
linkType = LinkType
typ,
linkUri :: URI
linkUri = URI
uri',
linkDepth :: Word
linkDepth = Word -> Word
forall a. Enum a => a -> a
succ (Link -> Word
linkDepth Link
link)
}
nodeLinks :: Link -> Node -> [Link]
nodeLinks :: Link -> Node -> [Link]
nodeLinks Link
link = \case
NodeElement Element
e -> Link -> Element -> [Link]
elementLinks Link
link Element
e
NodeContent LogSource
_ -> []
NodeComment LogSource
_ -> []
NodeInstruction Instruction
_ -> []
parseURIRelativeTo :: URI -> String -> Maybe URI
parseURIRelativeTo :: URI -> String -> Maybe URI
parseURIRelativeTo URI
root String
s =
[Maybe URI] -> Maybe URI
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum
[ (URI -> URI -> URI
`relativeTo` URI
root) (URI -> URI) -> Maybe URI -> Maybe URI
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> String -> Maybe URI
parseRelativeReference String
s,
String -> Maybe URI
parseAbsoluteURI String
s
]
data DocTypeResult = HtmlDocType | NoDocType | UnknownDocType
deriving (Int -> DocTypeResult -> ShowS
[DocTypeResult] -> ShowS
DocTypeResult -> String
(Int -> DocTypeResult -> ShowS)
-> (DocTypeResult -> String)
-> ([DocTypeResult] -> ShowS)
-> Show DocTypeResult
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DocTypeResult] -> ShowS
$cshowList :: [DocTypeResult] -> ShowS
show :: DocTypeResult -> String
$cshow :: DocTypeResult -> String
showsPrec :: Int -> DocTypeResult -> ShowS
$cshowsPrec :: Int -> DocTypeResult -> ShowS
Show, DocTypeResult -> DocTypeResult -> Bool
(DocTypeResult -> DocTypeResult -> Bool)
-> (DocTypeResult -> DocTypeResult -> Bool) -> Eq DocTypeResult
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DocTypeResult -> DocTypeResult -> Bool
$c/= :: DocTypeResult -> DocTypeResult -> Bool
== :: DocTypeResult -> DocTypeResult -> Bool
$c== :: DocTypeResult -> DocTypeResult -> Bool
Eq)
documentDocType :: Response LB.ByteString -> DocTypeResult
documentDocType :: Response ByteString -> DocTypeResult
documentDocType Response ByteString
resp =
if ByteString -> CI ByteString
forall s. FoldCase s => s -> CI s
CI.mk (Int64 -> ByteString -> ByteString
LB.take (Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int64) -> Int -> Int64
forall a b. (a -> b) -> a -> b
$ ByteString -> Int
SB.length ByteString
"<!DOCTYPE ") (Response ByteString -> ByteString
forall body. Response body -> body
responseBody Response ByteString
resp)) CI ByteString -> CI ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString -> CI ByteString
forall s. FoldCase s => s -> CI s
CI.mk ByteString
"<!DOCTYPE "
then
if ByteString -> CI ByteString
forall s. FoldCase s => s -> CI s
CI.mk (Int64 -> ByteString -> ByteString
LB.take (Int -> Int64
forall a b. (Integral a, Num b) => a -> b
fromIntegral (Int -> Int64) -> Int -> Int64
forall a b. (a -> b) -> a -> b
$ ByteString -> Int
SB.length ByteString
"<!DOCTYPE html>") (Response ByteString -> ByteString
forall body. Response body -> body
responseBody Response ByteString
resp)) CI ByteString -> CI ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ByteString -> CI ByteString
forall s. FoldCase s => s -> CI s
CI.mk ByteString
"<!DOCTYPE html>"
then DocTypeResult
HtmlDocType
else DocTypeResult
UnknownDocType
else DocTypeResult
NoDocType
data TitleResult
= NoTitleFound
| EmptyTitle
| TitleFound Text
| NonStandardTitle Element
deriving (Int -> TitleResult -> ShowS
[TitleResult] -> ShowS
TitleResult -> String
(Int -> TitleResult -> ShowS)
-> (TitleResult -> String)
-> ([TitleResult] -> ShowS)
-> Show TitleResult
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [TitleResult] -> ShowS
$cshowList :: [TitleResult] -> ShowS
show :: TitleResult -> String
$cshow :: TitleResult -> String
showsPrec :: Int -> TitleResult -> ShowS
$cshowsPrec :: Int -> TitleResult -> ShowS
Show, TitleResult -> TitleResult -> Bool
(TitleResult -> TitleResult -> Bool)
-> (TitleResult -> TitleResult -> Bool) -> Eq TitleResult
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: TitleResult -> TitleResult -> Bool
$c/= :: TitleResult -> TitleResult -> Bool
== :: TitleResult -> TitleResult -> Bool
$c== :: TitleResult -> TitleResult -> Bool
Eq)
documentTitle :: Document -> TitleResult
documentTitle :: Document -> TitleResult
documentTitle Document
d = case (Name -> Bool) -> Document -> Maybe Element
findDocumentTag (Name -> Name -> Bool
forall a. Eq a => a -> a -> Bool
== Name
"head") Document
d Maybe Element -> (Element -> Maybe Element) -> Maybe Element
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= (Name -> Bool) -> Element -> Maybe Element
findElementTag (Name -> Name -> Bool
forall a. Eq a => a -> a -> Bool
== Name
"title") of
Maybe Element
Nothing -> TitleResult
NoTitleFound
Just e :: Element
e@Element {[Node]
Map Name LogSource
Name
elementNodes :: [Node]
elementAttributes :: Map Name LogSource
elementName :: Name
elementName :: Element -> Name
elementAttributes :: Element -> Map Name LogSource
elementNodes :: Element -> [Node]
..} -> case [Node]
elementNodes of
[] -> TitleResult
EmptyTitle
[NodeContent LogSource
t] -> LogSource -> TitleResult
TitleFound LogSource
t
[Node]
_ -> Element -> TitleResult
NonStandardTitle Element
e
data DescriptionResult
= NoDescription
| EmptyDescription
| MultipleDescriptions
| Description Text
| NonStandardDescription Element
deriving (Int -> DescriptionResult -> ShowS
[DescriptionResult] -> ShowS
DescriptionResult -> String
(Int -> DescriptionResult -> ShowS)
-> (DescriptionResult -> String)
-> ([DescriptionResult] -> ShowS)
-> Show DescriptionResult
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [DescriptionResult] -> ShowS
$cshowList :: [DescriptionResult] -> ShowS
show :: DescriptionResult -> String
$cshow :: DescriptionResult -> String
showsPrec :: Int -> DescriptionResult -> ShowS
$cshowsPrec :: Int -> DescriptionResult -> ShowS
Show, DescriptionResult -> DescriptionResult -> Bool
(DescriptionResult -> DescriptionResult -> Bool)
-> (DescriptionResult -> DescriptionResult -> Bool)
-> Eq DescriptionResult
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: DescriptionResult -> DescriptionResult -> Bool
$c/= :: DescriptionResult -> DescriptionResult -> Bool
== :: DescriptionResult -> DescriptionResult -> Bool
$c== :: DescriptionResult -> DescriptionResult -> Bool
Eq)
documentDescription :: Document -> DescriptionResult
documentDescription :: Document -> DescriptionResult
documentDescription Document
d =
case (Name -> Bool) -> Document -> Maybe Element
findDocumentTag (Name -> Name -> Bool
forall a. Eq a => a -> a -> Bool
== Name
"head") Document
d of
Maybe Element
Nothing -> DescriptionResult
NoDescription
Just Element
headTag ->
let metaTags :: [Element]
metaTags = (Name -> Bool) -> Element -> [Element]
findElementTags (Name -> Name -> Bool
forall a. Eq a => a -> a -> Bool
== Name
"meta") Element
headTag
isMetaDescription :: Element -> Bool
isMetaDescription Element
e = Name -> Map Name LogSource -> Maybe LogSource
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Name
"name" (Element -> Map Name LogSource
elementAttributes Element
e) Maybe LogSource -> Maybe LogSource -> Bool
forall a. Eq a => a -> a -> Bool
== LogSource -> Maybe LogSource
forall a. a -> Maybe a
Just LogSource
"description"
in case (Element -> Bool) -> [Element] -> [Element]
forall a. (a -> Bool) -> [a] -> [a]
filter Element -> Bool
isMetaDescription [Element]
metaTags of
[] -> DescriptionResult
NoDescription
[Element
e] -> case Element -> [Node]
elementNodes Element
e of
[] -> DescriptionResult
-> (LogSource -> DescriptionResult)
-> Maybe LogSource
-> DescriptionResult
forall b a. b -> (a -> b) -> Maybe a -> b
maybe DescriptionResult
EmptyDescription LogSource -> DescriptionResult
Description (Maybe LogSource -> DescriptionResult)
-> Maybe LogSource -> DescriptionResult
forall a b. (a -> b) -> a -> b
$ Name -> Map Name LogSource -> Maybe LogSource
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Name
"content" (Element -> Map Name LogSource
elementAttributes Element
e)
[Node]
_ -> Element -> DescriptionResult
NonStandardDescription Element
e
[Element]
_ -> DescriptionResult
MultipleDescriptions
findDocumentTag :: (Name -> Bool) -> Document -> Maybe Element
findDocumentTag :: (Name -> Bool) -> Document -> Maybe Element
findDocumentTag Name -> Bool
p = (Name -> Bool) -> Element -> Maybe Element
findElementTag Name -> Bool
p (Element -> Maybe Element)
-> (Document -> Element) -> Document -> Maybe Element
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Document -> Element
documentRoot
documentImagesWithoutAlt :: Document -> Set Text
documentImagesWithoutAlt :: Document -> Set LogSource
documentImagesWithoutAlt Document
d = [LogSource] -> Set LogSource
forall a. Ord a => [a] -> Set a
S.fromList ([LogSource] -> Set LogSource) -> [LogSource] -> Set LogSource
forall a b. (a -> b) -> a -> b
$
((Element -> Maybe LogSource) -> [Element] -> [LogSource])
-> [Element] -> (Element -> Maybe LogSource) -> [LogSource]
forall a b c. (a -> b -> c) -> b -> a -> c
flip (Element -> Maybe LogSource) -> [Element] -> [LogSource]
forall a b. (a -> Maybe b) -> [a] -> [b]
mapMaybe ((Name -> Bool) -> Document -> [Element]
findDocumentTags (Name -> Name -> Bool
forall a. Eq a => a -> a -> Bool
== Name
"img") Document
d) ((Element -> Maybe LogSource) -> [LogSource])
-> (Element -> Maybe LogSource) -> [LogSource]
forall a b. (a -> b) -> a -> b
$
\Element
e -> do
LogSource
src <- Name -> Map Name LogSource -> Maybe LogSource
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Name
"src" (Element -> Map Name LogSource
elementAttributes Element
e)
case Name -> Map Name LogSource -> Maybe LogSource
forall k a. Ord k => k -> Map k a -> Maybe a
M.lookup Name
"alt" (Element -> Map Name LogSource
elementAttributes Element
e) of
Maybe LogSource
Nothing -> LogSource -> Maybe LogSource
forall a. a -> Maybe a
Just LogSource
src
Just LogSource
"" -> LogSource -> Maybe LogSource
forall a. a -> Maybe a
Just LogSource
src
Just LogSource
a -> if LogSource -> Bool
T.null (LogSource -> LogSource
T.strip LogSource
a) then LogSource -> Maybe LogSource
forall a. a -> Maybe a
Just LogSource
src else Maybe LogSource
forall a. Maybe a
Nothing
findElementTag :: (Name -> Bool) -> Element -> Maybe Element
findElementTag :: (Name -> Bool) -> Element -> Maybe Element
findElementTag Name -> Bool
p e :: Element
e@Element {[Node]
Map Name LogSource
Name
elementNodes :: [Node]
elementAttributes :: Map Name LogSource
elementName :: Name
elementName :: Element -> Name
elementAttributes :: Element -> Map Name LogSource
elementNodes :: Element -> [Node]
..} =
Maybe Element
go Maybe Element -> Maybe Element -> Maybe Element
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> [Maybe Element] -> Maybe Element
forall (t :: * -> *) (m :: * -> *) a.
(Foldable t, MonadPlus m) =>
t (m a) -> m a
msum ((Node -> Maybe Element) -> [Node] -> [Maybe Element]
forall a b. (a -> b) -> [a] -> [b]
map Node -> Maybe Element
goNode [Node]
elementNodes)
where
go :: Maybe Element
go = do
Bool -> Maybe ()
forall (f :: * -> *). Alternative f => Bool -> f ()
guard (Name -> Bool
p Name
elementName)
Element -> Maybe Element
forall (f :: * -> *) a. Applicative f => a -> f a
pure Element
e
goNode :: Node -> Maybe Element
goNode :: Node -> Maybe Element
goNode = \case
NodeElement Element
e' -> (Name -> Bool) -> Element -> Maybe Element
findElementTag Name -> Bool
p Element
e'
Node
_ -> Maybe Element
forall a. Maybe a
Nothing
findDocumentTags :: (Name -> Bool) -> Document -> [Element]
findDocumentTags :: (Name -> Bool) -> Document -> [Element]
findDocumentTags Name -> Bool
p = (Name -> Bool) -> Element -> [Element]
findElementTags Name -> Bool
p (Element -> [Element])
-> (Document -> Element) -> Document -> [Element]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Document -> Element
documentRoot
findElementTags :: (Name -> Bool) -> Element -> [Element]
findElementTags :: (Name -> Bool) -> Element -> [Element]
findElementTags Name -> Bool
p e :: Element
e@Element {[Node]
Map Name LogSource
Name
elementNodes :: [Node]
elementAttributes :: Map Name LogSource
elementName :: Name
elementName :: Element -> Name
elementAttributes :: Element -> Map Name LogSource
elementNodes :: Element -> [Node]
..} =
[Element] -> [Element]
go ((Node -> [Element]) -> [Node] -> [Element]
forall (t :: * -> *) a b. Foldable t => (a -> [b]) -> t a -> [b]
concatMap Node -> [Element]
goNode [Node]
elementNodes)
where
go :: [Element] -> [Element]
go = if Name -> Bool
p Name
elementName then (Element
e Element -> [Element] -> [Element]
forall a. a -> [a] -> [a]
:) else [Element] -> [Element]
forall a. a -> a
id
goNode :: Node -> [Element]
goNode :: Node -> [Element]
goNode = \case
NodeElement Element
e' -> (Name -> Bool) -> Element -> [Element]
findElementTags Name -> Bool
p Element
e'
Node
_ -> []