{-# LANGUAGE FlexibleContexts #-}

module Data.Text.WordCount
    ( topN
    , displayWords
    -- * For making graphs
    , makeFile
    , makeDistribution
    -- * File processing with pandoc
    , processFile
    , globFile
    -- * Low level-ish
    , buildFreq
    ) where

import qualified Data.Map.Lazy as M
import Data.Map.Lens
import Control.Lens hiding (argument)
import qualified Data.Text as TL -- .Lazy as TL
import Data.Tuple
import Data.Monoid
import Data.List
import Data.Ord
import Graphics.Rendering.Chart.Easy hiding (argument)
import Graphics.Rendering.Chart.Backend.Diagrams
import Data.Char
import Data.Text.WordCount.FileRead

-- | Return top n words and their frequencies
--
-- @
-- >>> topN 2 "hello hello goodbye it is time is it why why why it it"
-- [(4,"it"),(3,"why")]
-- @
topN :: Int -> TL.Text -> [(Int,TL.Text)]
topN n = take n . order . buildFreq

displayWords :: [(Int,TL.Text)] -> TL.Text
displayWords [] = ""
displayWords (pair:pairs) = display pair <> "\n" <> displayWords pairs
    where display (n,str) = (TL.pack . show) n <> ": " <> str

buildFreq :: TL.Text -> M.Map TL.Text Int
buildFreq = count . TL.words . TL.map toLower

order :: M.Map TL.Text Int -> [(Int, TL.Text)]
order = sortBy (flip (comparing fst)) . fmap swap . M.toList

count :: [TL.Text] -> M.Map TL.Text Int
count words = foldr ((.) . wordFunction) id words M.empty
    where wordFunction word map = case map ^. at word of
            Nothing -> at word ?~ 1 $ map
            _ -> ix word %~ (+1) $ map

-- | Make a bar graph from the word frequencies
-- 
-- @
-- makeFile :: IO ()
-- makeFile [(4,"it"),(3,"why")] "out.html"
-- @
makeFile :: [(Int,TL.Text)] -> FilePath -> IO ()
makeFile points out = toFile def out (makeDistribution points)

makeDistribution points = do
    let values = addIndexes (fmap fst points)
    let alabels = fmap (TL.unpack . snd) points
    let fillStyle = solidFillStyle (opaque lightblue)
    layout_title .= "Word Frequencies"
    layout_x_axis . laxis_generate .= autoIndexAxis alabels
    layout_y_axis . laxis_override .= axisGridHide
    layout_left_axis_visibility . axis_show_ticks .= False
    plot $ fmap plotBars $ liftEC $ do
        plot_bars_values .= fmap (over _2 pure) values
        plot_bars_item_styles .= pure (fillStyle, Nothing)