{-# LANGUAGE OverloadedStrings #-} module Kurita.Prompt.Filter where {- The goal of this module is to filter input prompts. This is done by using a list of words read in from a file. The words are checked first and then the metaphone of a subset of the words is checked to see if it's contained in the sentance ****None of this was actually used. Commentary wasn't taken from users **** -} import Data.Text (Text) import qualified Data.Text as Text import qualified Data.Text.Encoding as Encoding import Data.ByteString.Char8 (ByteString) import qualified Data.ByteString.Char8 as Char8 import Data.Set (Set) import qualified Data.Set as Set import Language.Phonetic.DoubleMetaphone -- 3 symbols first competitor, second competitor, and random emoji -- It might be bad to order the text in a set.. text comparisons :( data BadEntry = BadEntry { badEntryWord :: Text , badEntryMetaphone :: Text } deriving (Eq, Ord, Show) unreasonableMetaphone :: Set BadEntry -> Text -> Set BadEntry unreasonableMetaphone = unreasonableMatches (\entry input -> badEntryMetaphone entry `Text.isInfixOf` input) unreasonableWord :: Set BadEntry -> Text -> Set BadEntry unreasonableWord = unreasonableMatches (\entry input -> badEntryWord entry `Text.isInfixOf` input) -- | Given a check to see if a value is bad, return all the matching bad entries for a single -- sentence unreasonableMatches :: (BadEntry -> Text -> Bool) -> Set BadEntry -> Text -> Set BadEntry unreasonableMatches p badTokens input = Set.foldr go Set.empty badTokens where go entry matches = if p entry input then Set.insert entry matches else matches -- | Read the word set file (line separated phrases) with the double metaphone applied to each word readWordSet :: String -> IO (Set BadEntry) readWordSet wordSetFile = do Set.fromList . filterEmptyEntries . fmap makeBadEntry <$> readWordFile where readWordFile = Char8.split '\n' <$> Char8.readFile wordSetFile filterEmptyEntries = filter (not . isEmptyEntry) makeBadEntry :: ByteString -> BadEntry makeBadEntry word = BadEntry (Encoding.decodeUtf8 word) (Encoding.decodeUtf8 . fst $ doubleMetaphone word) isEmptyEntry :: BadEntry -> Bool isEmptyEntry entry = Text.null (badEntryWord entry) || Text.null (badEntryMetaphone entry)