module Heuristics.BanWords where

import Control.Applicative
import Data.Attoparsec.Text
import Data.Char
import Data.Foldable
import Data.Monoid
import Data.Text (Text)
import Data.Vector (Vector)
import qualified Data.Vector as V

newtype MatchPolicy = MatchPolicy { _unMatchPolicy :: Vector Text -> Parser Text }

instance Monoid MatchPolicy where
  mempty = MatchPolicy . const $ empty
  mappend f g = MatchPolicy $ \x -> _unMatchPolicy f x <|> _unMatchPolicy g x

passBlacklist :: MatchPolicy -> Vector Text -> Text -> Maybe Text
passBlacklist f blacklist t =
  case parseOnly (_unMatchPolicy f blacklist) t of
    Right _ -> Nothing
    Left _  -> Just t

-- | Parses sucessfully (which means the word gets blocked) if any of the Texts
-- in the blacklist are an exact match.
banExact :: MatchPolicy
banExact = MatchPolicy $ \blacklist -> asum $ (\x -> string x *> endOfInput *> return x) <$> blacklist

-- | Parses sucessfully (which means the word gets blocked) if any of the Texts
-- in the blacklist are a match.
--
-- Case is ignored for ASCII characters. Non-alphabetic Unicode characters at
-- either the start of the end of the word can be skipped.
--
-- E.g. " Admin" is banned against ["admin"], but "a admin" isn't.
banAlmostExact :: MatchPolicy
banAlmostExact = MatchPolicy $ \blacklist ->
  asum $ (\x -> skipNonAlphabetical *> asciiCI x *> skipNonAlphabetical *> endOfInput *> return x) <$> blacklist
  where
    skipNonAlphabetical :: Parser ()
    skipNonAlphabetical = skipWhile $ not . isAlpha

exampleReserved :: Vector Text
exampleReserved = V.fromList
  [ "accounts"
  , "admin"
  , "beta"
  , "billing"
  , "help"
  , "jobs"
  , "mail"
  , "registration"
  , "root"
  , "security"
  , "support"
  ]