{-# LANGUAGE FlexibleInstances #-} {-# LANGUAGE OverloadedStrings #-} {-# LANGUAGE RankNTypes #-} -- | -- Module : Text.Eros.Phraselist -- Description : A module for dealing with Phraselists. -- Copyright : 2014, Peter Harpending. -- License : BSD3 -- Maintainer : Peter Harpending -- Stability : experimental -- Portability : archlinux -- -- If you want to make your own phraselist, you need to write a JSON -- file, in accordance with the -- . -- Once you do that, make a data type for your phraselist. -- Make your data type an instance of 'Phraselist', and you're good to -- go. -- -- For example, let's say your phraselist is @mylist.json@, and it's -- all in accordance with the schema. Your code would look something -- like this: -- -- @ -- data MyList = MyList -- -- instance Phraselist MyList where -- phraselistPath MyList = getDataFileName "mylist.json" -- phraselistPath _ = undefined -- @ -- -- Don't forget to add @mylist.json@ to @Data-Files@ in your @.cabal@ -- file. -- -- If you want to use one of the lists we already supply -- module Text.Eros.Phraselist where import Control.Applicative ((<$>), (<*>)) import Control.Monad (mzero, sequence) import Data.Aeson import qualified Data.ByteString.Lazy as B import qualified Data.Map as M import Data.Text.Lazy (Text) import qualified Data.Text.Lazy as L import Data.Tree import Paths_eros import System.Exit import Text.Eros.Phrase -- |Read a 'Phraselist', marshal it into a 'PhraseForest'. readPhraselist :: Phraselist t => t -> IO PhraseForest readPhraselist elist = do lpath <- phraselistPath elist ltext <- B.readFile lpath let ljson = (eitherDecode ltext) :: Either String [PAT] case ljson of Left msg -> fail msg Right pats -> return $ map fromPAT pats -- |Load a 'Phraselist' directly into a 'PhraseMap' readPhraseMap :: Phraselist t => t -> IO PhraseMap readPhraseMap plist = fmap mkMap $ readPhraselist plist -- |Read the phraselist from disk servePhraselist :: Phraselist t => t -> IO B.ByteString servePhraselist plist = B.readFile =<< phraselistPath plist -- This is the type for a Phraselist - it just needs to be provided -- with a filepath. class Phraselist t where phraselistPath :: t -> IO FilePath -- |A set of 'Phraselist's. Note that this is actually a list, and I'm -- calling it a "set" for purely lexical purposes. type PhraselistSet = Phraselist t => [t] -- |The phraselists in @res/@. Each of these constructors correspond -- to one of the files -- . -- -- Gitlab has a terrible interface, so I won't provide links to each -- one of them. data ErosList = Chat | Conspiracy | DrugAdvocacy | Forums | Gambling | Games | Gore | IdTheft | IllegalDrugs | Intolerance | LegalDrugs | Malware | Music | News | Nudism | Peer2Peer | Personals | Pornography | Proxies | SecretSocieties | SelfLabeling | Sport | Translation | UpstreamFilter | Violence | WarezHacking | Weapons | Webmail ------------------------------------ -- Okay, this is the boring stuff -- ------------------------------------ instance Eq ErosList where (==) Chat Chat = True (==) Conspiracy Conspiracy = True (==) DrugAdvocacy DrugAdvocacy = True (==) Forums Forums = True (==) Gambling Gambling = True (==) Games Games = True (==) Gore Gore = True (==) IdTheft IdTheft = True (==) IllegalDrugs IllegalDrugs = True (==) Intolerance Intolerance = True (==) LegalDrugs LegalDrugs = True (==) Malware Malware = True (==) Music Music = True (==) News News = True (==) Nudism Nudism = True (==) Peer2Peer Peer2Peer = True (==) Personals Personals = True (==) Pornography Pornography = True (==) Proxies Proxies = True (==) SecretSocieties SecretSocieties = True (==) SelfLabeling SelfLabeling = True (==) Sport Sport = True (==) Translation Translation = True (==) UpstreamFilter UpstreamFilter = True (==) Violence Violence = True (==) WarezHacking WarezHacking = True (==) Weapons Weapons = True (==) Webmail Webmail = True (==) Chat _ = False (==) Conspiracy _ = False (==) DrugAdvocacy _ = False (==) Forums _ = False (==) Gambling _ = False (==) Games _ = False (==) Gore _ = False (==) IdTheft _ = False (==) IllegalDrugs _ = False (==) Intolerance _ = False (==) LegalDrugs _ = False (==) Malware _ = False (==) Music _ = False (==) News _ = False (==) Nudism _ = False (==) Peer2Peer _ = False (==) Personals _ = False (==) Pornography _ = False (==) Proxies _ = False (==) SecretSocieties _ = False (==) SelfLabeling _ = False (==) Sport _ = False (==) Translation _ = False (==) UpstreamFilter _ = False (==) Violence _ = False (==) WarezHacking _ = False (==) Weapons _ = False (==) Webmail _ = False -- |A list of phraselists we provide. erosLists :: [ErosList] erosLists = [ Chat , Conspiracy , DrugAdvocacy , Forums , Gambling , Games , Gore , IdTheft , IllegalDrugs , Intolerance , LegalDrugs , Malware , Music , News , Nudism , Peer2Peer , Personals , Pornography , Proxies , SecretSocieties , SelfLabeling , Sport , Translation , UpstreamFilter , Violence , WarezHacking , Weapons , Webmail ] -- |A list of the paths to the phraselists we provide. erosListPaths :: IO [FilePath] erosListPaths = mapM phraselistPath erosLists erosListNames :: [L.Text] erosListNames = [ "chat" , "conspiracy" , "drug-advocacy" , "forums" , "gambling" , "games" , "gore" , "id-theft" , "illegal-drugs" , "intolerance" , "legal-drugs" , "malware" , "music" , "news" , "nudism" , "peer2peer" , "personals" , "pornography" , "proxies" , "secret-societies" , "self-labeling" , "sport" , "translation" , "upstream-filter" , "violence" , "warez-hacking" , "weapons" , "webmail" ] -- |You can't really order the lists, so we won't use 'M.Map' erosListNamePairs :: [(ErosList, L.Text)] erosListNamePairs = zip erosLists erosListNames erosNameByList :: ErosList -> Maybe L.Text erosNameByList key = lookup key erosListNamePairs erosNameListMap :: M.Map L.Text ErosList erosNameListMap = M.fromList $ zip erosListNames erosLists erosListByName :: L.Text -> Maybe ErosList erosListByName key = M.lookup key erosNameListMap -- These are the data paths for the various PhraseLists instance Phraselist ErosList where phraselistPath Chat = getDataFileName "res/phraselists-ugly/chat.json" phraselistPath Conspiracy = getDataFileName "res/phraselists-ugly/conspiracy.json" phraselistPath DrugAdvocacy = getDataFileName "res/phraselists-ugly/drug-advocacy.json" phraselistPath Forums = getDataFileName "res/phraselists-ugly/forums.json" phraselistPath Gambling = getDataFileName "res/phraselists-ugly/gambling.json" phraselistPath Games = getDataFileName "res/phraselists-ugly/games.json" phraselistPath Gore = getDataFileName "res/phraselists-ugly/gore.json" phraselistPath IdTheft = getDataFileName "res/phraselists-ugly/id-theft.json" phraselistPath IllegalDrugs = getDataFileName "res/phraselists-ugly/illegal-drugs.json" phraselistPath Intolerance = getDataFileName "res/phraselists-ugly/intolerance.json" phraselistPath LegalDrugs = getDataFileName "res/phraselists-ugly/legal-drugs.json" phraselistPath Malware = getDataFileName "res/phraselists-ugly/malware.json" phraselistPath Music = getDataFileName "res/phraselists-ugly/music.json" phraselistPath News = getDataFileName "res/phraselists-ugly/news.json" phraselistPath Nudism = getDataFileName "res/phraselists-ugly/nudism.json" phraselistPath Peer2Peer = getDataFileName "res/phraselists-ugly/peer2peer.json" phraselistPath Personals = getDataFileName "res/phraselists-ugly/personals.json" phraselistPath Pornography = getDataFileName "res/phraselists-ugly/pornography.json" phraselistPath Proxies = getDataFileName "res/phraselists-ugly/proxies.json" phraselistPath SecretSocieties = getDataFileName "res/phraselists-ugly/secret-societies.json" phraselistPath SelfLabeling = getDataFileName "res/phraselists-ugly/self-labeling.json" phraselistPath Sport = getDataFileName "res/phraselists-ugly/sport.json" phraselistPath Translation = getDataFileName "res/phraselists-ugly/translation.json" phraselistPath UpstreamFilter = getDataFileName "res/phraselists-ugly/upstream-filter.json" phraselistPath Violence = getDataFileName "res/phraselists-ugly/violence.json" phraselistPath WarezHacking = getDataFileName "res/phraselists-ugly/warez-hacking.json" phraselistPath Weapons = getDataFileName "res/phraselists-ugly/weapons.json" phraselistPath Webmail = getDataFileName "res/phraselists-ugly/webmail.json" -- |Placeholder type used to read JSON. The JSON schema (currently, at -- least) is such that one needs this type to read the JSON. You can -- use 'fromPAT' to convert this type into a 'PhraseTree' data PhraseAlmostTree = PhraseAlmostTree { patPhrase :: Text , patScore :: Int , patForest :: [PhraseAlmostTree] } deriving (Show, Read) -- |Alias for 'PhraseAlmostTree' type PAT = PhraseAlmostTree -- |You can read the -- -- to see how this works. instance FromJSON PAT where parseJSON (Object v) = PhraseAlmostTree <$> v .: "phrase" <*> v .: "score" <*> v .: "forest" parseJSON _ = fail "Object not a PhraseAlmostTree." -- |Convert a 'PAT' into a 'PhraseTree'. fromPAT :: PAT -> PhraseTree fromPAT (PhraseAlmostTree p s f) = Node (Phrase p s) $ map fromPAT f -- |I figure some people like to type a lot. fromPhraseAlmostTree :: PAT -> PhraseTree fromPhraseAlmostTree = fromPAT -- |Alias for 'readPhraselist' loadPhraselist :: Phraselist t => t -> IO PhraseForest loadPhraselist = readPhraselist -- |Alias for 'readPhraselist' readPhraseFile :: Phraselist t => t -> IO PhraseForest readPhraseFile = readPhraselist -- |Alias for 'readPhraselist' loadPhraseFile :: Phraselist t => t -> IO PhraseForest loadPhraseFile = readPhraselist -- |Alias for 'readPhraselist' readPhraseForest :: Phraselist t => t -> IO PhraseForest readPhraseForest = readPhraselist -- |Alias for 'readPhraselist' loadPhraseForest :: Phraselist t => t -> IO PhraseForest loadPhraseForest = readPhraselist -- |Alias for 'readPhraseMap' loadPhraseMap :: Phraselist t => t -> IO PhraseMap loadPhraseMap = readPhraseMap