{-# LANGUAGE FlexibleContexts #-}

-- Have a look at: https://sparverius.github.io/tmplats-doc/docs/CountingWords
module Data.Text.WordCount
    ( topN
    , displayWords
    , filterTop
    -- * Filters
    , small
    , common
    -- * For making graphs
    , makeFile
    , makeDistribution
    -- * File processing with pandoc
    , processFile
    , globFile
    -- * Keyed maps
    , indexed
    ) where

import           Control.Arrow                             ((&&&))
import           Control.Composition                       (on)
import           Control.Lens                              hiding (indexed)
import           Data.Char
import qualified Data.IntMap                               as IM
import           Data.List
import qualified Data.Map.Lazy                             as M
import           Data.Ord
import qualified Data.Text                                 as T
import qualified Data.Text.Lazy                            as TL
import           Data.Text.WordCount.FileRead
import           Data.Tuple
import           Graphics.Rendering.Chart.Backend.Diagrams
import           Graphics.Rendering.Chart.Easy             hiding (indexed)

-- | Return an `IntMap` containing words indexed by their frequencies.
indexed :: TL.Text -> IM.IntMap [TL.Text]
indexed :: Text -> IntMap [Text]
indexed = Map Text Int -> IntMap [Text]
orderM forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Map Text Int
buildFreq

-- | Return top n words and their frequencies
--
-- @
-- >>> topN 2 "hello hello goodbye it is time is it why why why it it"
-- [(4,"it"),(3,"why")]
-- @
topN :: Int -> TL.Text -> [(Int,TL.Text)]
topN :: Int -> Text -> [(Int, Text)]
topN Int
n = forall a. Int -> [a] -> [a]
take Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Int, Text)]
order forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Map Text Int
buildFreq

-- | Return the top n words, with some filter applied.
filterTop :: Int -> (TL.Text -> Bool) -> TL.Text -> [(Int, TL.Text)]
filterTop :: Int -> (Text -> Bool) -> Text -> [(Int, Text)]
filterTop Int
n Text -> Bool
p = forall a. Int -> [a] -> [a]
take Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (Text -> Bool
p forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Int, Text)]
order forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Map Text Int
buildFreq

small :: TL.Text -> Bool
small :: Text -> Bool
small = (forall a. Ord a => a -> a -> Bool
> Int64
5) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Int64
TL.length forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
TL.filter (forall a. Eq a => a -> a -> Bool
/= Char
'\'')

common :: TL.Text -> Bool
common :: Text -> Bool
common = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem [Text
"the",Text
"and",Text
"a",Text
"an",Text
"or",Text
"not",Text
"but",Text
"on",Text
"so",Text
"if",Text
"in",Text
"that",Text
"this",Text
"for"]

displayWords :: [(Int,TL.Text)] -> T.Text
displayWords :: [(Int, Text)] -> Text
displayWords = forall a. Monoid a => [a] -> a
mconcat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(Int
i, Text
str) -> (String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> String
show) Int
i forall a. Semigroup a => a -> a -> a
<> Text
": " forall a. Semigroup a => a -> a -> a
<> Text -> Text
TL.toStrict Text
str forall a. Semigroup a => a -> a -> a
<> Text
"\n")

buildFreq :: TL.Text -> M.Map TL.Text Int
buildFreq :: Text -> Map Text Int
buildFreq = [Text] -> Map Text Int
count forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (forall a. Eq a => a -> a -> Bool
/=Text
" ") forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> [Text]
TL.split (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
" \n;:,./" :: String)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> Text -> Text
TL.map Char -> Char
toLower

orderM :: M.Map TL.Text Int -> IM.IntMap [TL.Text]
orderM :: Map Text Int -> IntMap [Text]
orderM = forall a. [(Int, a)] -> IntMap a
IM.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head) forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> b
snd) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (Int, Text) -> (Int, Text) -> Bool
go forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn (forall a. a -> Down a
Down forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> (b, a)
swap forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k a. Map k a -> [(k, a)]
M.toList
    where go :: (Int, TL.Text) -> (Int, TL.Text) -> Bool
          go :: (Int, Text) -> (Int, Text) -> Bool
go = forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
on forall a. Eq a => a -> a -> Bool
(==) forall a b. (a, b) -> a
fst

order :: M.Map TL.Text Int -> [(Int, TL.Text)]
order :: Map Text Int -> [(Int, Text)]
order = forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn (forall a. a -> Down a
Down forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> (b, a)
swap forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k a. Map k a -> [(k, a)]
M.toList

count :: [TL.Text] -> M.Map TL.Text Int
count :: [Text] -> Map Text Int
count = forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith forall a. Num a => a -> a -> a
(+) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a b. [a] -> [b] -> [(a, b)]
`zip` forall a. a -> [a]
repeat Int
1)

-- | Make a bar graph from the word frequencies
--
-- @
-- makeFile :: IO ()
-- makeFile [(4,"it"),(3,"why")] "out.html"
-- @
makeFile :: [(Int,TL.Text)] -> FilePath -> IO ()
makeFile :: [(Int, Text)] -> String -> IO ()
makeFile [(Int, Text)]
ps String
out = forall r.
(Default r, ToRenderable r) =>
FileOptions -> String -> EC r () -> IO ()
toFile forall a. Default a => a
def String
out (forall {y}.
BarsPlotValue y =>
[(y, Text)] -> StateT (Layout PlotIndex y) (State CState) ()
makeDistribution [(Int, Text)]
ps)

makeDistribution :: [(y, Text)] -> StateT (Layout PlotIndex y) (State CState) ()
makeDistribution [(y, Text)]
ps = do
    let values :: [(PlotIndex, y)]
values = forall a. [a] -> [(PlotIndex, a)]
addIndexes (forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(y, Text)]
ps)
    let alabels :: [String]
alabels = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text -> String
TL.unpack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) [(y, Text)]
ps
    let fillStyle :: FillStyle
fillStyle = AlphaColour Double -> FillStyle
solidFillStyle (forall a. Num a => Colour a -> AlphaColour a
opaque forall a. (Ord a, Floating a) => Colour a
lightblue)
    forall x y. Lens' (Layout x y) String
layout_title forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= String
"Word Frequencies"
    forall x y. Lens' (Layout x y) (LayoutAxis x)
layout_x_axis forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall x. Lens' (LayoutAxis x) (AxisFn x)
laxis_generate forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= forall i. Integral i => [String] -> [i] -> AxisData i
autoIndexAxis [String]
alabels
    forall x y. Lens' (Layout x y) (LayoutAxis y)
layout_y_axis forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall x. Lens' (LayoutAxis x) (AxisData x -> AxisData x)
laxis_override forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= forall x. AxisData x -> AxisData x
axisGridHide
    forall x y. Lens' (Layout x y) AxisVisibility
layout_left_axis_visibility forall b c a. (b -> c) -> (a -> b) -> a -> c
. Lens' AxisVisibility Bool
axis_show_ticks forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Bool
False
    forall (p :: * -> * -> *) x y.
ToPlot p =>
EC (Layout x y) (p x y) -> EC (Layout x y) ()
plot forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall y x. BarsPlotValue y => PlotBars x y -> Plot x y
plotBars forall a b. (a -> b) -> a -> b
$ forall l1 a l2. Default l1 => EC l1 a -> EC l2 l1
liftEC forall a b. (a -> b) -> a -> b
$ do
        forall x1 y x2.
Lens (PlotBars x1 y) (PlotBars x2 y) [(x1, [y])] [(x2, [y])]
plot_bars_values forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
over forall s t a b. Field2 s t a b => Lens s t a b
_2 forall (f :: * -> *) a. Applicative f => a -> f a
pure) [(PlotIndex, y)]
values
        forall x y. Lens' (PlotBars x y) [(FillStyle, Maybe LineStyle)]
plot_bars_item_styles forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= forall (f :: * -> *) a. Applicative f => a -> f a
pure (FillStyle
fillStyle, forall a. Maybe a
Nothing)