module Data.Text.WordCount
( topN
, displayWords
, filterTop
, small
, common
, makeFile
, makeDistribution
, processFile
, globFile
, buildFreq
) where
import Control.Lens hiding (argument)
import Data.Char
import Data.List
import qualified Data.Map.Lazy as M
import Data.Map.Lens
import Data.Monoid
import Data.Ord
import qualified Data.Text as TL
import Data.Text.WordCount.FileRead
import Data.Tuple
import Graphics.Rendering.Chart.Backend.Diagrams
import Graphics.Rendering.Chart.Easy hiding (argument)
topN :: Int -> TL.Text -> [(Int,TL.Text)]
topN n = take n . order . buildFreq
filterTop :: Int -> (TL.Text -> Bool) -> TL.Text -> [(Int, TL.Text)]
filterTop n p = take n . filter (p . snd) . order . buildFreq
small :: TL.Text -> Bool
small = (> 5) . TL.length . TL.filter (/= '\'')
common :: TL.Text -> Bool
common = flip elem ["the","and","a","an","or","not","but","on","so","if","in","that","this","for"]
displayWords :: [(Int,TL.Text)] -> TL.Text
displayWords [] = ""
displayWords (pair:pairs) = display pair <> "\n" <> displayWords pairs
where display (n,str) = (TL.pack . show) n <> ": " <> str
buildFreq :: TL.Text -> M.Map TL.Text Int
buildFreq = count . TL.words . TL.map toLower
order :: M.Map TL.Text Int -> [(Int, TL.Text)]
order = sortBy (flip (comparing fst)) . fmap swap . M.toList
count :: [TL.Text] -> M.Map TL.Text Int
count words = foldr ((.) . wordFunction) id words M.empty
where wordFunction word map = case map ^. at word of
Nothing -> at word ?~ 1 $ map
_ -> ix word %~ (+1) $ map
makeFile :: [(Int,TL.Text)] -> FilePath -> IO ()
makeFile points out = toFile def out (makeDistribution points)
makeDistribution points = do
let values = addIndexes (fmap fst points)
let alabels = fmap (TL.unpack . snd) points
let fillStyle = solidFillStyle (opaque lightblue)
layout_title .= "Word Frequencies"
layout_x_axis . laxis_generate .= autoIndexAxis alabels
layout_y_axis . laxis_override .= axisGridHide
layout_left_axis_visibility . axis_show_ticks .= False
plot $ fmap plotBars $ liftEC $ do
plot_bars_values .= fmap (over _2 pure) values
plot_bars_item_styles .= pure (fillStyle, Nothing)