{-# LANGUAGE OverloadedStrings #-}
module Network.HTTP.Robots where
import Control.Applicative
import Control.Monad
import Data.Attoparsec.ByteString.Char8 hiding (skipSpace)
import qualified Data.Attoparsec.Text as AT (isEndOfLine)
import Data.ByteString.Char8 (ByteString)
import qualified Data.ByteString.Char8 as BS
import Data.Char (toUpper)
import Data.Either (partitionEithers)
import Data.List (find)
import Data.Maybe (catMaybes)
import Data.Ratio
import Data.Time.Clock
import Data.Time.LocalTime ()
type Robot = ([([UserAgent], [Directive])], [Unparsable])
type Unparsable = ByteString
data UserAgent = Wildcard | Literal ByteString
deriving (Int -> UserAgent -> ShowS
[UserAgent] -> ShowS
UserAgent -> String
(Int -> UserAgent -> ShowS)
-> (UserAgent -> String)
-> ([UserAgent] -> ShowS)
-> Show UserAgent
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [UserAgent] -> ShowS
$cshowList :: [UserAgent] -> ShowS
show :: UserAgent -> String
$cshow :: UserAgent -> String
showsPrec :: Int -> UserAgent -> ShowS
$cshowsPrec :: Int -> UserAgent -> ShowS
Show,UserAgent -> UserAgent -> Bool
(UserAgent -> UserAgent -> Bool)
-> (UserAgent -> UserAgent -> Bool) -> Eq UserAgent
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: UserAgent -> UserAgent -> Bool
$c/= :: UserAgent -> UserAgent -> Bool
== :: UserAgent -> UserAgent -> Bool
$c== :: UserAgent -> UserAgent -> Bool
Eq)
type Path = ByteString
type TimeInterval = (DiffTime, DiffTime)
data Directive = Allow Path
| Disallow Path
| CrawlDelay { Directive -> Rational
crawlDelay :: Rational
, Directive -> TimeInterval
timeInterval :: TimeInterval
}
| NoArchive Path
| NoSnippet Path
| NoTranslate Path
| NoIndex Path
deriving (Int -> Directive -> ShowS
[Directive] -> ShowS
Directive -> String
(Int -> Directive -> ShowS)
-> (Directive -> String)
-> ([Directive] -> ShowS)
-> Show Directive
forall a.
(Int -> a -> ShowS) -> (a -> String) -> ([a] -> ShowS) -> Show a
showList :: [Directive] -> ShowS
$cshowList :: [Directive] -> ShowS
show :: Directive -> String
$cshow :: Directive -> String
showsPrec :: Int -> Directive -> ShowS
$cshowsPrec :: Int -> Directive -> ShowS
Show,Directive -> Directive -> Bool
(Directive -> Directive -> Bool)
-> (Directive -> Directive -> Bool) -> Eq Directive
forall a. (a -> a -> Bool) -> (a -> a -> Bool) -> Eq a
/= :: Directive -> Directive -> Bool
$c/= :: Directive -> Directive -> Bool
== :: Directive -> Directive -> Bool
$c== :: Directive -> Directive -> Bool
Eq)
subParser :: Parser a -> ByteString -> Parser a
subParser :: Parser a -> ByteString -> Parser a
subParser Parser a
p = (String -> Parser a)
-> (a -> Parser a) -> Either String a -> Parser a
forall a c b. (a -> c) -> (b -> c) -> Either a b -> c
either (Parser a -> String -> Parser a
forall a b. a -> b -> a
const Parser a
forall (m :: * -> *) a. MonadPlus m => m a
mzero) a -> Parser a
forall (m :: * -> *) a. Monad m => a -> m a
return (Either String a -> Parser a)
-> (ByteString -> Either String a) -> ByteString -> Parser a
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Parser a -> ByteString -> Either String a
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser a
p
safeParseRational :: Parser Rational
safeParseRational :: Parser Rational
safeParseRational = do
(ByteString
bs,Scientific
_) <- Parser Scientific -> Parser (ByteString, Scientific)
forall a. Parser a -> Parser (ByteString, a)
match Parser Scientific
scientific
if Char -> ByteString -> Bool
BS.elem Char
'e' ByteString
bs Bool -> Bool -> Bool
|| Char -> ByteString -> Bool
BS.elem Char
'E' ByteString
bs
then Parser Rational
forall (m :: * -> *) a. MonadPlus m => m a
mzero
else Parser Rational -> ByteString -> Parser Rational
forall a. Parser a -> ByteString -> Parser a
subParser Parser Rational
forall a. Fractional a => Parser a
rational ByteString
bs
dropUTF8BOM :: ByteString -> ByteString
dropUTF8BOM :: ByteString -> ByteString
dropUTF8BOM ByteString
bs = if Int -> ByteString -> ByteString
BS.take Int
3 ByteString
bs ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
== ( Char
'\239' Char -> ByteString -> ByteString
`BS.cons`
Char
'\187' Char -> ByteString -> ByteString
`BS.cons`
Char
'\191' Char -> ByteString -> ByteString
`BS.cons` ByteString
BS.empty)
then Int -> ByteString -> ByteString
BS.drop Int
3 ByteString
bs
else ByteString
bs
parseHourMinute :: Parser (Integer,Integer)
parseHourMinute :: Parser (Integer, Integer)
parseHourMinute = Parser (Integer, Integer)
parseWithColon Parser (Integer, Integer)
-> Parser (Integer, Integer) -> Parser (Integer, Integer)
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser (Integer, Integer)
parseWithoutColon
where
parseWithColon :: Parser (Integer, Integer)
parseWithColon = do
Integer
hours <- Parser ()
skipSpace Parser () -> Parser ByteString Integer -> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Integer
forall a. Integral a => Parser a
decimal
Parser ByteString Char -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Char -> Parser ())
-> Parser ByteString Char -> Parser ()
forall a b. (a -> b) -> a -> b
$ Parser ()
skipSpace Parser () -> Parser ByteString Char -> Parser ByteString Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser ByteString Char
char Char
':'
Integer
mins <- Parser ()
skipSpace Parser () -> Parser ByteString Integer -> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Integer
forall a. Integral a => Parser a
decimal
(Integer, Integer) -> Parser (Integer, Integer)
forall (m :: * -> *) a. Monad m => a -> m a
return (Integer
hours,Integer
mins)
parseWithoutColon :: Parser (Integer, Integer)
parseWithoutColon = do
Integer
h <- Int -> Parser ByteString
Data.Attoparsec.ByteString.Char8.take Int
2 Parser ByteString
-> (ByteString -> Parser ByteString Integer)
-> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Parser ByteString Integer
-> ByteString -> Parser ByteString Integer
forall a. Parser a -> ByteString -> Parser a
subParser Parser ByteString Integer
forall a. Integral a => Parser a
decimal
Integer
m <- Int -> Parser ByteString
Data.Attoparsec.ByteString.Char8.take Int
2 Parser ByteString
-> (ByteString -> Parser ByteString Integer)
-> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> (a -> m b) -> m b
>>= Parser ByteString Integer
-> ByteString -> Parser ByteString Integer
forall a. Parser a -> ByteString -> Parser a
subParser Parser ByteString Integer
forall a. Integral a => Parser a
decimal
(Integer, Integer) -> Parser (Integer, Integer)
forall (m :: * -> *) a. Monad m => a -> m a
return (Integer
h,Integer
m)
parseTimeInterval :: Parser TimeInterval
parseTimeInterval :: Parser TimeInterval
parseTimeInterval = do
(Integer
hours_start, Integer
mins_start) <- Parser (Integer, Integer)
parseHourMinute
Parser () -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser () -> Parser ()) -> Parser () -> Parser ()
forall a b. (a -> b) -> a -> b
$ (Parser ()
skipSpace Parser () -> Parser ByteString Char -> Parser ByteString Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser ByteString Char
char Char
'-' Parser ByteString Char -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace) Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
skipSpace
(Integer
hours_end , Integer
mins_end ) <- Parser (Integer, Integer)
parseHourMinute
TimeInterval -> Parser TimeInterval
forall (m :: * -> *) a. Monad m => a -> m a
return ( Integer -> DiffTime
secondsToDiffTime (Integer
hours_start Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
mins_start Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60)
, Integer -> DiffTime
secondsToDiffTime (Integer
hours_end Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60 Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
+ Integer
mins_end Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
60))
allDay :: TimeInterval
allDay :: TimeInterval
allDay = ( Integer -> DiffTime
secondsToDiffTime Integer
0
, Integer -> DiffTime
secondsToDiffTime (Integer
24Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
*Integer
60Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
*Integer
60)
)
parseRequestRate :: Parser Directive
parseRequestRate :: Parser Directive
parseRequestRate = do
Parser ByteString -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString -> Parser ()) -> Parser ByteString -> Parser ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString
stringCI ByteString
"Request-rate:"
Integer
docs <- Parser ()
skipSpace Parser () -> Parser ByteString Integer -> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Integer
forall a. Integral a => Parser a
decimal
Parser ByteString Char -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString Char -> Parser ())
-> Parser ByteString Char -> Parser ()
forall a b. (a -> b) -> a -> b
$ Parser ()
skipSpace Parser () -> Parser ByteString Char -> Parser ByteString Char
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Char -> Parser ByteString Char
char Char
'/'
Integer
ptim <- Parser ()
skipSpace Parser () -> Parser ByteString Integer -> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString Integer
forall a. Integral a => Parser a
decimal
Integer
units<- Parser ()
skipSpace Parser () -> Parser ByteString Integer -> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ( (Char -> Parser ByteString Char
char Char
's' Parser ByteString Char
-> Parser ByteString Integer -> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Integer -> Parser ByteString Integer
forall (m :: * -> *) a. Monad m => a -> m a
return ( Integer
1 :: Integer))
Parser ByteString Integer
-> Parser ByteString Integer -> Parser ByteString Integer
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser ByteString Char
char Char
'm' Parser ByteString Char
-> Parser ByteString Integer -> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Integer -> Parser ByteString Integer
forall (m :: * -> *) a. Monad m => a -> m a
return ( Integer
60 :: Integer))
Parser ByteString Integer
-> Parser ByteString Integer -> Parser ByteString Integer
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> (Char -> Parser ByteString Char
char Char
'h' Parser ByteString Char
-> Parser ByteString Integer -> Parser ByteString Integer
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Integer -> Parser ByteString Integer
forall (m :: * -> *) a. Monad m => a -> m a
return (Integer
60Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
*Integer
60 :: Integer))
Parser ByteString Integer
-> Parser ByteString Integer -> Parser ByteString Integer
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Integer -> Parser ByteString Integer
forall (m :: * -> *) a. Monad m => a -> m a
return ( Integer
1 :: Integer)
)
TimeInterval
tint <- Parser ()
skipSpace Parser () -> Parser TimeInterval -> Parser TimeInterval
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> ( Parser TimeInterval
parseTimeInterval Parser TimeInterval -> Parser TimeInterval -> Parser TimeInterval
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> TimeInterval -> Parser TimeInterval
forall (m :: * -> *) a. Monad m => a -> m a
return TimeInterval
allDay)
Directive -> Parser Directive
forall (m :: * -> *) a. Monad m => a -> m a
return (Directive -> Parser Directive) -> Directive -> Parser Directive
forall a b. (a -> b) -> a -> b
$ Rational -> TimeInterval -> Directive
CrawlDelay ((Integer
ptim Integer -> Integer -> Integer
forall a. Num a => a -> a -> a
* Integer
units) Integer -> Integer -> Rational
forall a. Integral a => a -> a -> Ratio a
% Integer
docs) TimeInterval
tint
parseVisitTime :: Parser Directive
parseVisitTime :: Parser Directive
parseVisitTime = do
Parser ByteString -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString -> Parser ()) -> Parser ByteString -> Parser ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString
stringCI ByteString
"Visit-time:"
TimeInterval
tint <- Parser ()
skipSpace Parser () -> Parser TimeInterval -> Parser TimeInterval
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser TimeInterval
parseTimeInterval
Directive -> Parser Directive
forall (m :: * -> *) a. Monad m => a -> m a
return (Directive -> Parser Directive) -> Directive -> Parser Directive
forall a b. (a -> b) -> a -> b
$ Rational -> TimeInterval -> Directive
CrawlDelay ( Integer
0 Integer -> Integer -> Rational
forall a. Integral a => a -> a -> Ratio a
% Integer
1) TimeInterval
tint
parseCrawlDelay :: Parser Directive
parseCrawlDelay :: Parser Directive
parseCrawlDelay = do
Rational
delay <- ByteString -> Parser ByteString
stringCI ByteString
"Crawl-Delay:" Parser ByteString -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser Rational -> Parser Rational
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser Rational
safeParseRational
Directive -> Parser Directive
forall (m :: * -> *) a. Monad m => a -> m a
return (Directive -> Parser Directive) -> Directive -> Parser Directive
forall a b. (a -> b) -> a -> b
$ Rational -> TimeInterval -> Directive
CrawlDelay Rational
delay TimeInterval
allDay
strip :: ByteString -> ByteString
strip :: ByteString -> ByteString
strip = ByteString -> ByteString
BS.reverse (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ByteString -> ByteString
BS.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
' ') (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
BS.reverse (ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ByteString -> ByteString
BS.dropWhile (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
' ')
parseRobots :: ByteString -> Either String Robot
parseRobots :: ByteString -> Either String Robot
parseRobots ByteString
input = case Either String Robot
parsed of
Right ([], out :: [ByteString]
out@(ByteString
_:[ByteString]
_)) ->
String -> Either String Robot
forall a b. a -> Either a b
Left (String
"no parsable lines: " String -> ShowS
forall a. [a] -> [a] -> [a]
++ [ByteString] -> String
forall a. Show a => a -> String
show [ByteString]
out)
Either String Robot
_ -> Either String Robot
parsed
where parsed :: Either String Robot
parsed = Parser Robot -> ByteString -> Either String Robot
forall a. Parser a -> ByteString -> Either String a
parseOnly Parser Robot
robotP
(ByteString -> Either String Robot)
-> (ByteString -> ByteString) -> ByteString -> Either String Robot
forall b c a. (b -> c) -> (a -> b) -> a -> c
. [ByteString] -> ByteString
BS.unlines
([ByteString] -> ByteString)
-> (ByteString -> [ByteString]) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (ByteString -> Bool) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString -> Bool
BS.isPrefixOf ByteString
"SITEMAP:" (ByteString -> Bool)
-> (ByteString -> ByteString) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
toUpper)
([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (ByteString -> Bool) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString -> Bool
BS.isPrefixOf ByteString
"HOST:" (ByteString -> Bool)
-> (ByteString -> ByteString) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> ByteString -> ByteString
BS.map Char -> Char
toUpper)
([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (\ByteString
x -> ByteString -> Char
BS.head ByteString
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'#' )
([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (Bool -> Bool
not (Bool -> Bool) -> (ByteString -> Bool) -> ByteString -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> Bool
BS.null)
([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (ByteString -> ByteString) -> [ByteString] -> [ByteString]
forall a b. (a -> b) -> [a] -> [b]
map ByteString -> ByteString
strip
([ByteString] -> [ByteString])
-> (ByteString -> [ByteString]) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> [ByteString]
BS.lines
(ByteString -> [ByteString])
-> (ByteString -> ByteString) -> ByteString -> [ByteString]
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> ByteString -> ByteString
BS.filter (Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
/= Char
'\r')
(ByteString -> ByteString)
-> (ByteString -> ByteString) -> ByteString -> ByteString
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ByteString -> ByteString
dropUTF8BOM
(ByteString -> Either String Robot)
-> ByteString -> Either String Robot
forall a b. (a -> b) -> a -> b
$ ByteString
input
robotP :: Parser Robot
robotP :: Parser Robot
robotP = do
([([UserAgent], [Directive])]
dirs, [ByteString]
unparsable) <- [Either ([UserAgent], [Directive]) ByteString] -> Robot
forall a b. [Either a b] -> ([a], [b])
partitionEithers ([Either ([UserAgent], [Directive]) ByteString] -> Robot)
-> Parser ByteString [Either ([UserAgent], [Directive]) ByteString]
-> Parser Robot
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString (Either ([UserAgent], [Directive]) ByteString)
-> Parser ByteString [Either ([UserAgent], [Directive]) ByteString]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many (Parser ByteString ([UserAgent], [Directive])
-> Parser ByteString
-> Parser ByteString (Either ([UserAgent], [Directive]) ByteString)
forall (f :: * -> *) a b.
Alternative f =>
f a -> f b -> f (Either a b)
eitherP Parser ByteString ([UserAgent], [Directive])
agentDirectiveP Parser ByteString
unparsableP) Parser Robot -> String -> Parser Robot
forall i a. Parser i a -> String -> Parser i a
<?> String
"robot"
Robot -> Parser Robot
forall (m :: * -> *) a. Monad m => a -> m a
return ([([UserAgent], [Directive])]
dirs, (ByteString -> Bool) -> [ByteString] -> [ByteString]
forall a. (a -> Bool) -> [a] -> [a]
filter (ByteString -> ByteString -> Bool
forall a. Eq a => a -> a -> Bool
/= ByteString
"") [ByteString]
unparsable)
unparsableP :: Parser ByteString
unparsableP :: Parser ByteString
unparsableP = (Char -> Bool) -> Parser ByteString
takeTill Char -> Bool
AT.isEndOfLine Parser ByteString -> Parser () -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine
agentDirectiveP :: Parser ([UserAgent],[Directive])
agentDirectiveP :: Parser ByteString ([UserAgent], [Directive])
agentDirectiveP = (,) ([UserAgent] -> [Directive] -> ([UserAgent], [Directive]))
-> Parser ByteString [UserAgent]
-> Parser ByteString ([Directive] -> ([UserAgent], [Directive]))
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString UserAgent -> Parser ByteString [UserAgent]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser ByteString UserAgent
agentP Parser ByteString ([Directive] -> ([UserAgent], [Directive]))
-> Parser ByteString [Directive]
-> Parser ByteString ([UserAgent], [Directive])
forall (f :: * -> *) a b. Applicative f => f (a -> b) -> f a -> f b
<*> Parser Directive -> Parser ByteString [Directive]
forall (f :: * -> *) a. Alternative f => f a -> f [a]
many1 Parser Directive
directiveP Parser ByteString ([UserAgent], [Directive])
-> String -> Parser ByteString ([UserAgent], [Directive])
forall i a. Parser i a -> String -> Parser i a
<?> String
"agentDirective"
skipSpace :: Parser ()
skipSpace :: Parser ()
skipSpace = (Char -> Bool) -> Parser ()
skipWhile (\Char
x -> Char
xChar -> Char -> Bool
forall a. Eq a => a -> a -> Bool
==Char
' ' Bool -> Bool -> Bool
|| Char
x Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'\t')
directiveP :: Parser Directive
directiveP :: Parser Directive
directiveP = do
Parser ()
skipSpace
[Parser Directive] -> Parser Directive
forall (f :: * -> *) a. Alternative f => [f a] -> f a
choice [ ByteString -> Parser ByteString
stringCI ByteString
"Disallow:" Parser ByteString -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser Directive -> Parser Directive
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
((ByteString -> Directive
Disallow (ByteString -> Directive) -> Parser ByteString -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
tokenP) Parser Directive -> Parser Directive -> Parser Directive
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(Parser ()
endOfLine Parser () -> Parser Directive -> Parser Directive
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Directive -> Parser Directive
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Directive
Allow ByteString
"/")))
, ByteString -> Parser ByteString
stringCI ByteString
"Allow:" Parser ByteString -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser Directive -> Parser Directive
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
((ByteString -> Directive
Allow (ByteString -> Directive) -> Parser ByteString -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
tokenP) Parser Directive -> Parser Directive -> Parser Directive
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(Parser ()
endOfLine Parser () -> Parser Directive -> Parser Directive
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Directive -> Parser Directive
forall (m :: * -> *) a. Monad m => a -> m a
return (ByteString -> Directive
Disallow ByteString
"/")))
, Parser Directive
parseCrawlDelay
, Parser Directive
parseRequestRate
, Parser Directive
parseVisitTime
, ByteString -> Directive
NoArchive (ByteString -> Directive) -> Parser ByteString -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ByteString -> Parser ByteString
stringCI ByteString
"Noarchive:" Parser ByteString -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser ByteString -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
tokenP)
, ByteString -> Directive
NoSnippet (ByteString -> Directive) -> Parser ByteString -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ByteString -> Parser ByteString
stringCI ByteString
"Nosnippet:" Parser ByteString -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser ByteString -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
tokenP)
, ByteString -> Directive
NoTranslate (ByteString -> Directive) -> Parser ByteString -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ByteString -> Parser ByteString
stringCI ByteString
"Notranslate:"Parser ByteString -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser ByteString -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
tokenP)
, ByteString -> Directive
NoIndex (ByteString -> Directive) -> Parser ByteString -> Parser Directive
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> (ByteString -> Parser ByteString
stringCI ByteString
"Noindex:" Parser ByteString -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
skipSpace Parser () -> Parser ByteString -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ByteString
tokenP)
] Parser Directive -> Parser () -> Parser Directive
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
commentsP Parser Directive -> String -> Parser Directive
forall i a. Parser i a -> String -> Parser i a
<?> String
"directive"
agentP :: Parser UserAgent
agentP :: Parser ByteString UserAgent
agentP = do
Parser ByteString -> Parser ()
forall (f :: * -> *) a. Functor f => f a -> f ()
void (Parser ByteString -> Parser ()) -> Parser ByteString -> Parser ()
forall a b. (a -> b) -> a -> b
$ ByteString -> Parser ByteString
stringCI ByteString
"user-agent:"
Parser ()
skipSpace
((ByteString -> Parser ByteString
string ByteString
"*" Parser ByteString
-> Parser ByteString UserAgent -> Parser ByteString UserAgent
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> UserAgent -> Parser ByteString UserAgent
forall (m :: * -> *) a. Monad m => a -> m a
return UserAgent
Wildcard) Parser ByteString UserAgent
-> Parser ByteString UserAgent -> Parser ByteString UserAgent
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|>
(ByteString -> UserAgent
Literal (ByteString -> UserAgent)
-> Parser ByteString -> Parser ByteString UserAgent
forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> Parser ByteString
tokenWithSpacesP)) Parser ByteString UserAgent
-> Parser () -> Parser ByteString UserAgent
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace Parser ByteString UserAgent
-> Parser () -> Parser ByteString UserAgent
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
endOfLine Parser ByteString UserAgent
-> String -> Parser ByteString UserAgent
forall i a. Parser i a -> String -> Parser i a
<?> String
"agent"
commentsP :: Parser ()
= Parser ()
skipSpace Parser () -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>>
( (ByteString -> Parser ByteString
string ByteString
"#" Parser ByteString -> Parser ByteString -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString
takeTill Char -> Bool
AT.isEndOfLine Parser ByteString -> Parser () -> Parser ()
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> Parser ()
endOfLine)
Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> Parser ()
endOfLine
Parser () -> Parser () -> Parser ()
forall (f :: * -> *) a. Alternative f => f a -> f a -> f a
<|> () -> Parser ()
forall (m :: * -> *) a. Monad m => a -> m a
return ())
tokenP :: Parser ByteString
tokenP :: Parser ByteString
tokenP = Parser ()
skipSpace Parser () -> Parser ByteString -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. Char -> Bool
isSpace) Parser ByteString -> Parser () -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* Parser ()
skipSpace
tokenWithSpacesP :: Parser ByteString
tokenWithSpacesP :: Parser ByteString
tokenWithSpacesP = Parser ()
skipSpace Parser () -> Parser ByteString -> Parser ByteString
forall (m :: * -> *) a b. Monad m => m a -> m b -> m b
>> (Char -> Bool) -> Parser ByteString
takeWhile1 (Bool -> Bool
not (Bool -> Bool) -> (Char -> Bool) -> Char -> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. (\Char
c -> Char
c Char -> Char -> Bool
forall a. Eq a => a -> a -> Bool
== Char
'#' Bool -> Bool -> Bool
|| Char -> Bool
AT.isEndOfLine Char
c))
Parser ByteString -> Parser ByteString -> Parser ByteString
forall (f :: * -> *) a b. Applicative f => f a -> f b -> f a
<* (Char -> Bool) -> Parser ByteString
takeTill Char -> Bool
AT.isEndOfLine
canAccess :: ByteString -> Robot -> Path -> Bool
canAccess :: ByteString -> Robot -> ByteString -> Bool
canAccess ByteString
_ Robot
_ ByteString
"/robots.txt" = Bool
True
canAccess ByteString
agent ([([UserAgent], [Directive])]
robot,[ByteString]
_) ByteString
path = case [([UserAgent], [Directive])]
stanzas of
[] -> Bool
True
(([UserAgent]
_,[Directive]
directives):[([UserAgent], [Directive])]
_) -> [Directive] -> Bool
matchingDirective [Directive]
directives
where stanzas :: [([UserAgent], [Directive])]
stanzas = [Maybe ([UserAgent], [Directive])] -> [([UserAgent], [Directive])]
forall a. [Maybe a] -> [a]
catMaybes [(([UserAgent], [Directive]) -> Bool)
-> [([UserAgent], [Directive])] -> Maybe ([UserAgent], [Directive])
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((UserAgent -> Bool) -> [UserAgent] -> Bool
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (UserAgent -> ByteString -> Bool
`isLiteralSubstring` ByteString
agent) ([UserAgent] -> Bool)
-> (([UserAgent], [Directive]) -> [UserAgent])
-> ([UserAgent], [Directive])
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([UserAgent], [Directive]) -> [UserAgent]
forall a b. (a, b) -> a
fst) [([UserAgent], [Directive])]
robot,
(([UserAgent], [Directive]) -> Bool)
-> [([UserAgent], [Directive])] -> Maybe ([UserAgent], [Directive])
forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Maybe a
find ((UserAgent
Wildcard UserAgent -> [UserAgent] -> Bool
forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem`) ([UserAgent] -> Bool)
-> (([UserAgent], [Directive]) -> [UserAgent])
-> ([UserAgent], [Directive])
-> Bool
forall b c a. (b -> c) -> (a -> b) -> a -> c
. ([UserAgent], [Directive]) -> [UserAgent]
forall a b. (a, b) -> a
fst) [([UserAgent], [Directive])]
robot]
isLiteralSubstring :: UserAgent -> ByteString -> Bool
isLiteralSubstring (Literal ByteString
a) ByteString
us = ByteString
a ByteString -> ByteString -> Bool
`BS.isInfixOf` ByteString
us
isLiteralSubstring UserAgent
_ ByteString
_ = Bool
False
matchingDirective :: [Directive] -> Bool
matchingDirective [] = Bool
True
matchingDirective (Directive
x:[Directive]
xs) = case Directive
x of
Allow ByteString
robot_path ->
ByteString
robot_path ByteString -> ByteString -> Bool
`BS.isPrefixOf` ByteString
path Bool -> Bool -> Bool
|| [Directive] -> Bool
matchingDirective [Directive]
xs
Disallow ByteString
robot_path ->
Bool -> Bool
not (ByteString
robot_path ByteString -> ByteString -> Bool
`BS.isPrefixOf` ByteString
path) Bool -> Bool -> Bool
&& [Directive] -> Bool
matchingDirective [Directive]
xs
Directive
_ -> [Directive] -> Bool
matchingDirective [Directive]
xs