module Evaluation (Evaluation.main) where import Control.Monad import qualified Data.ByteString.Char8 as BS import qualified Data.ByteString.Lazy as L import qualified Data.ByteString.Lazy.Char8 as BSL import Data.Char import Data.Csv import Data.Either import Data.List import qualified Data.Map as M import Data.Ord import qualified Data.PSQueue as PS import qualified Data.Text as T import Data.Text.Encoding import qualified Data.Text.IO as TI import qualified Data.Vector as V import Hag import Helpers import NLP.Tokenize import Preprocess import Preprocess import qualified System.Directory as S import System.Environment import Tweets -- |Create a dictionary ('M.Map String Float' /TODO/: probably define -- dictionary type) from a 'V.Vector' of 'Tweet's createDictionary :: V.Vector Tweet -> FeatureMap createDictionary tweets = V.foldl (M.unionWith (+)) M.empty allTweets where allTweets = V.map extractFeatures tweets -- |Create a 'grand dictionary' ('M.Map String Float' /TODO/: probably -- define dictionary type) from a 'mini dictionary' ('M.Map Tweet -- (M.Map String Float)' createDictionaryFromMap :: M.Map Tweet FeatureMap -> FeatureMap createDictionaryFromMap tweetMap = foldl (M.unionWith (+)) M.empty $ M.elems tweetMap main :: IO () main = do (dir:_) <- getArgs files <- getFiles dir csvs <- mapM TI.readFile $ sort files let processedCsvs = map preprocess csvs r = map parseCsv processedCsvs listOfVectorOfTweets = rights r :: [V.Vector Tweet] -- Tweet vectors vTweets = V.concat listOfVectorOfTweets :: V.Vector Tweet aggTweets = filterByLabel vTweets "aggressive" :: V.Vector Tweet nonAggTweets = filterByLabel vTweets "non_aggressive" :: V.Vector Tweet -- Tweet dictionaries dict = createDictionary vTweets aggDict = createDictionary aggTweets nonAggDict = createDictionary nonAggTweets words = (encode $ sortBy (comparing $ snd) $ M.toList dict) :: L.ByteString aggWords = (encode $ sortBy (comparing $ snd) $ M.toList aggDict) :: L.ByteString nonAggWords = (encode $ sortBy (comparing $ snd) $ M.toList nonAggDict) :: L.ByteString -- Create a header header = encode ["word","frequency"] -- Write output to csv L.writeFile "agg_words.csv" $ header `L.append` aggWords L.writeFile "non_agg_words.csv" $ header `L.append` nonAggWords L.writeFile "words.csv" $ header `L.append` words print $ V.length aggTweets