{-# LANGUAGE FlexibleInstances #-}
{-# LANGUAGE OverloadedStrings #-}
{-# LANGUAGE RankNTypes #-}

-- |
-- Module       : Text.Eros.Phraselist
-- Description  : A module for dealing with Phraselists.
-- Copyright    : 2014, Peter Harpending.
-- License      : BSD3
-- Maintainer   : Peter Harpending <pharpend2@gmail.com>
-- Stability    : experimental
-- Portability  : archlinux
--
-- If you want to make your own phraselist, you need to write a JSON
-- file, in accordance with the
-- <https://raw.githubusercontent.com/pharpend/eros/master/res/phraselist-schema.json schema>.
-- Once you do that, make a data type for your phraselist.
-- Make your data type an instance of 'Phraselist', and you're good to
-- go.
-- 
-- For example, let's say your phraselist is @mylist.json@, and it's
-- all in accordance with the schema. Your code would look something
-- like this:
-- 
-- @
-- data MyList = MyList
-- 
-- instance Phraselist MyList where
--   phraselistPath MyList = getDataFileName "mylist.json"
--   phraselistPath _      = undefined
-- @
-- 
-- Don't forget to add @mylist.json@ to @Data-Files@ in your @.cabal@
-- file.
-- 
-- If you want to use one of the lists we already supply
-- 

module Text.Eros.Phraselist where

import           Control.Applicative  ((<$>), (<*>))
import           Control.Monad        (mzero, sequence)
import           Data.Aeson
import qualified Data.ByteString.Lazy as B
import qualified Data.Map             as M
import           Data.Text.Lazy       (Text)
import qualified Data.Text.Lazy       as L
import           Data.Tree
import           Paths_eros
import           System.Exit
import           Text.Eros.Phrase

-- |Read a 'Phraselist', marshal it into a 'PhraseForest'.
readPhraselist :: Phraselist t => t -> IO PhraseForest
readPhraselist elist = do
  lpath <- phraselistPath elist
  ltext <- B.readFile lpath
  let ljson = (eitherDecode ltext) :: Either String [PAT]
  case ljson of
    Left msg   -> fail msg
    Right pats -> return $ map fromPAT pats

-- |Load a 'Phraselist' directly into a 'PhraseMap'
readPhraseMap :: Phraselist t => t -> IO PhraseMap
readPhraseMap plist = mkMap <$> readPhraselist plist

-- |Read the phraselist from disk
servePhraselist :: Phraselist t => t -> IO B.ByteString
servePhraselist plist = B.readFile =<< phraselistPath plist

-- This is the type for a Phraselist - it just needs to be provided
-- with a filepath.
class Phraselist t where
  phraselistPath :: t -> IO FilePath

-- |A set of 'Phraselist's. Note that this is actually a list, and I'm
-- calling it a "set" for purely lexical purposes.
type PhraselistSet = Phraselist t => [t]

-- |The phraselists in @res/@. Each of these constructors correspond
-- to one of the files
-- <https://github.com/pharpend/eros/tree/master/res/phraselists-pretty here>.
-- 
-- Gitlab has a terrible interface, so I won't provide links to each
-- one of them.
data ErosList = Chat
              | Conspiracy
              | DrugAdvocacy
              | Forums
              | Gambling
              | Games
              | Gore
              | IdTheft
              | IllegalDrugs
              | Intolerance
              | LegalDrugs
              | Malware
              | Music
              | News
              | Nudism
              | Peer2Peer
              | Personals
              | Pornography
              | Proxies
              | SecretSocieties
              | SelfLabeling
              | Sport
              | Translation
              | UpstreamFilter
              | Violence
              | WarezHacking
              | Weapons
              | Webmail
  deriving (Eq)

-- |A list of phraselists we provide.
erosLists :: [ErosList]
erosLists = [ Chat
            , Conspiracy
            , DrugAdvocacy
            , Forums
            , Gambling
            , Games
            , Gore
            , IdTheft
            , IllegalDrugs
            , Intolerance
            , LegalDrugs
            , Malware
            , Music
            , News
            , Nudism
            , Peer2Peer
            , Personals
            , Pornography
            , Proxies
            , SecretSocieties
            , SelfLabeling
            , Sport
            , Translation
            , UpstreamFilter
            , Violence
            , WarezHacking
            , Weapons
            , Webmail
            ]

-- |A list of the paths to the phraselists we provide.
erosListPaths :: IO [FilePath]
erosListPaths = mapM phraselistPath erosLists

erosListNames :: [L.Text]
erosListNames = [ "chat"
                , "conspiracy"
                , "drug-advocacy"
                , "forums"
                , "gambling"
                , "games"
                , "gore"
                , "id-theft"
                , "illegal-drugs"
                , "intolerance"
                , "legal-drugs"
                , "malware"
                , "music"
                , "news"
                , "nudism"
                , "peer2peer"
                , "personals"
                , "pornography"
                , "proxies"
                , "secret-societies"
                , "self-labeling"
                , "sport"
                , "translation"
                , "upstream-filter"
                , "violence"
                , "warez-hacking"
                , "weapons"
                , "webmail"
                ]

-- |You can't really order the lists, so we won't use 'M.Map'
erosListNamePairs :: [(ErosList, L.Text)]
erosListNamePairs = zip erosLists erosListNames

erosNameByList :: ErosList -> Maybe L.Text
erosNameByList key = lookup key erosListNamePairs

erosNameListMap :: M.Map L.Text ErosList
erosNameListMap = M.fromList $ zip erosListNames erosLists

erosListByName :: L.Text -> Maybe ErosList
erosListByName key = M.lookup key erosNameListMap 

-- These are the data paths for the various PhraseLists
instance Phraselist ErosList where
  phraselistPath Chat            = getDataFileName "res/phraselists-ugly/chat.json"
  phraselistPath Conspiracy      = getDataFileName "res/phraselists-ugly/conspiracy.json"
  phraselistPath DrugAdvocacy    = getDataFileName "res/phraselists-ugly/drug-advocacy.json"
  phraselistPath Forums          = getDataFileName "res/phraselists-ugly/forums.json"
  phraselistPath Gambling        = getDataFileName "res/phraselists-ugly/gambling.json"
  phraselistPath Games           = getDataFileName "res/phraselists-ugly/games.json"
  phraselistPath Gore            = getDataFileName "res/phraselists-ugly/gore.json"
  phraselistPath IdTheft         = getDataFileName "res/phraselists-ugly/id-theft.json"
  phraselistPath IllegalDrugs    = getDataFileName "res/phraselists-ugly/illegal-drugs.json"
  phraselistPath Intolerance     = getDataFileName "res/phraselists-ugly/intolerance.json"
  phraselistPath LegalDrugs      = getDataFileName "res/phraselists-ugly/legal-drugs.json"
  phraselistPath Malware         = getDataFileName "res/phraselists-ugly/malware.json"
  phraselistPath Music           = getDataFileName "res/phraselists-ugly/music.json"
  phraselistPath News            = getDataFileName "res/phraselists-ugly/news.json"
  phraselistPath Nudism          = getDataFileName "res/phraselists-ugly/nudism.json"
  phraselistPath Peer2Peer       = getDataFileName "res/phraselists-ugly/peer2peer.json"
  phraselistPath Personals       = getDataFileName "res/phraselists-ugly/personals.json"
  phraselistPath Pornography     = getDataFileName "res/phraselists-ugly/pornography.json"
  phraselistPath Proxies         = getDataFileName "res/phraselists-ugly/proxies.json"
  phraselistPath SecretSocieties = getDataFileName "res/phraselists-ugly/secret-societies.json"
  phraselistPath SelfLabeling    = getDataFileName "res/phraselists-ugly/self-labeling.json"
  phraselistPath Sport           = getDataFileName "res/phraselists-ugly/sport.json"
  phraselistPath Translation     = getDataFileName "res/phraselists-ugly/translation.json"
  phraselistPath UpstreamFilter  = getDataFileName "res/phraselists-ugly/upstream-filter.json"
  phraselistPath Violence        = getDataFileName "res/phraselists-ugly/violence.json"
  phraselistPath WarezHacking    = getDataFileName "res/phraselists-ugly/warez-hacking.json"
  phraselistPath Weapons         = getDataFileName "res/phraselists-ugly/weapons.json"
  phraselistPath Webmail         = getDataFileName "res/phraselists-ugly/webmail.json"

-- |Placeholder type used to read JSON. The JSON schema (currently, at
-- least) is such that one needs this type to read the JSON. You can
-- use 'fromPAT' to convert this type into a 'PhraseTree'
data PhraseAlmostTree = PhraseAlmostTree { patPhrase :: Text
                                         , patScore  :: Int
                                         , patForest :: [PhraseAlmostTree]
                                         }
  deriving (Show, Read)

-- |Alias for 'PhraseAlmostTree'
type PAT = PhraseAlmostTree

-- |You can read the
-- <https://raw.githubusercontent.com/pharpend/eros/master/res/phraselist-schema.json JSON schema>
-- to see how this works.
instance FromJSON PAT where
  parseJSON (Object v) = PhraseAlmostTree
    <$> v .: "phrase"
    <*> v .: "score"
    <*> v .: "forest"
  parseJSON _          = fail "Object not a PhraseAlmostTree."

-- |Convert a 'PAT' into a 'PhraseTree'.
fromPAT :: PAT -> PhraseTree
fromPAT (PhraseAlmostTree p s f) = Node (Phrase p s) $ map fromPAT f

-- |I figure some people like to type a lot.
fromPhraseAlmostTree :: PAT -> PhraseTree
fromPhraseAlmostTree = fromPAT

-- |Alias for 'readPhraselist'
loadPhraselist :: Phraselist t => t -> IO PhraseForest
loadPhraselist = readPhraselist

-- |Alias for 'readPhraselist'
readPhraseFile :: Phraselist t => t -> IO PhraseForest
readPhraseFile = readPhraselist

-- |Alias for 'readPhraselist'
loadPhraseFile :: Phraselist t => t -> IO PhraseForest
loadPhraseFile = readPhraselist

-- |Alias for 'readPhraselist'
readPhraseForest :: Phraselist t => t -> IO PhraseForest
readPhraseForest = readPhraselist

-- |Alias for 'readPhraselist'
loadPhraseForest :: Phraselist t => t -> IO PhraseForest
loadPhraseForest = readPhraselist

-- |Alias for 'readPhraseMap'
loadPhraseMap :: Phraselist t => t -> IO PhraseMap
loadPhraseMap = readPhraseMap