module Heuristics.BanWords where
import Control.Applicative
import Data.Attoparsec.Text
import Data.Char
import Data.Foldable
import Data.Monoid
import Data.Text (Text)
import Data.Vector (Vector)
import qualified Data.Vector as V
newtype MatchPolicy = MatchPolicy { _unMatchPolicy :: Vector Text -> Parser Text }
instance Monoid MatchPolicy where
mempty = MatchPolicy . const $ empty
mappend f g = MatchPolicy $ \x -> _unMatchPolicy f x <|> _unMatchPolicy g x
passBlacklist :: MatchPolicy -> Vector Text -> Text -> Maybe Text
passBlacklist f blacklist t =
case parseOnly (_unMatchPolicy f blacklist) t of
Right _ -> Nothing
Left _ -> Just t
banExact :: MatchPolicy
banExact = MatchPolicy $ \blacklist -> asum $ (\x -> string x *> endOfInput *> return x) <$> blacklist
banAlmostExact :: MatchPolicy
banAlmostExact = MatchPolicy $ \blacklist ->
asum $ (\x -> skipNonAlphabetical *> asciiCI x *> skipNonAlphabetical *> endOfInput *> return x) <$> blacklist
where
skipNonAlphabetical :: Parser ()
skipNonAlphabetical = skipWhile $ not . isAlpha
exampleReserved :: Vector Text
exampleReserved = V.fromList
[ "accounts"
, "admin"
, "beta"
, "billing"
, "help"
, "jobs"
, "mail"
, "registration"
, "root"
, "security"
, "support"
]