{-# LANGUAGE FlexibleContexts #-}
module Data.Text.WordCount
( topN
, displayWords
, filterTop
, small
, common
, makeFile
, makeDistribution
, processFile
, globFile
, indexed
) where
import Control.Arrow ((&&&))
import Control.Composition (on)
import Control.Lens hiding (indexed)
import Data.Char
import qualified Data.IntMap as IM
import Data.List
import qualified Data.Map.Lazy as M
import Data.Ord
import qualified Data.Text as T
import qualified Data.Text.Lazy as TL
import Data.Text.WordCount.FileRead
import Data.Tuple
import Graphics.Rendering.Chart.Backend.Diagrams
import Graphics.Rendering.Chart.Easy hiding (indexed)
indexed :: TL.Text -> IM.IntMap [TL.Text]
indexed :: Text -> IntMap [Text]
indexed = Map Text Int -> IntMap [Text]
orderM forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Map Text Int
buildFreq
topN :: Int -> TL.Text -> [(Int,TL.Text)]
topN :: Int -> Text -> [(Int, Text)]
topN Int
n = forall a. Int -> [a] -> [a]
take Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Int, Text)]
order forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Map Text Int
buildFreq
filterTop :: Int -> (TL.Text -> Bool) -> TL.Text -> [(Int, TL.Text)]
filterTop :: Int -> (Text -> Bool) -> Text -> [(Int, Text)]
filterTop Int
n Text -> Bool
p = forall a. Int -> [a] -> [a]
take Int
n forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (Text -> Bool
p forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Map Text Int -> [(Int, Text)]
order forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Map Text Int
buildFreq
small :: TL.Text -> Bool
small :: Text -> Bool
small = (forall a. Ord a => a -> a -> Bool
> Int64
5) forall b c a. (b -> c) -> (a -> b) -> a -> c
. Text -> Int64
TL.length forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> Text
TL.filter (forall a. Eq a => a -> a -> Bool
/= Char
'\'')
common :: TL.Text -> Bool
common :: Text -> Bool
common = forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
elem [Text
"the",Text
"and",Text
"a",Text
"an",Text
"or",Text
"not",Text
"but",Text
"on",Text
"so",Text
"if",Text
"in",Text
"that",Text
"this",Text
"for"]
displayWords :: [(Int,TL.Text)] -> T.Text
displayWords :: [(Int, Text)] -> Text
displayWords = forall a. Monoid a => [a] -> a
mconcat forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (\(Int
i, Text
str) -> (String -> Text
T.pack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Show a => a -> String
show) Int
i forall a. Semigroup a => a -> a -> a
<> Text
": " forall a. Semigroup a => a -> a -> a
<> Text -> Text
TL.toStrict Text
str forall a. Semigroup a => a -> a -> a
<> Text
"\n")
buildFreq :: TL.Text -> M.Map TL.Text Int
buildFreq :: Text -> Map Text Int
buildFreq = [Text] -> Map Text Int
count forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> Bool) -> [a] -> [a]
filter (forall a. Eq a => a -> a -> Bool
/=Text
" ") forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Bool) -> Text -> [Text]
TL.split (forall (t :: * -> *) a. (Foldable t, Eq a) => a -> t a -> Bool
`elem` (String
" \n;:,./" :: String)) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (Char -> Char) -> Text -> Text
TL.map Char -> Char
toLower
orderM :: M.Map TL.Text Int -> IM.IntMap [TL.Text]
orderM :: Map Text Int -> IntMap [Text]
orderM = forall a. [(Int, a)] -> IntMap a
IM.fromList forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap ((forall a b. (a, b) -> a
fst forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. [a] -> a
head) forall (a :: * -> * -> *) b c c'.
Arrow a =>
a b c -> a b c' -> a b (c, c')
&&& forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> b
snd) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. (a -> a -> Bool) -> [a] -> [[a]]
groupBy (Int, Text) -> (Int, Text) -> Bool
go forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn (forall a. a -> Down a
Down forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> (b, a)
swap forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k a. Map k a -> [(k, a)]
M.toList
where go :: (Int, TL.Text) -> (Int, TL.Text) -> Bool
go :: (Int, Text) -> (Int, Text) -> Bool
go = forall b c a. (b -> b -> c) -> (a -> b) -> a -> a -> c
on forall a. Eq a => a -> a -> Bool
(==) forall a b. (a, b) -> a
fst
order :: M.Map TL.Text Int -> [(Int, TL.Text)]
order :: Map Text Int -> [(Int, Text)]
order = forall b a. Ord b => (a -> b) -> [a] -> [a]
sortOn (forall a. a -> Down a
Down forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> a
fst) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall a b. (a, b) -> (b, a)
swap forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall k a. Map k a -> [(k, a)]
M.toList
count :: [TL.Text] -> M.Map TL.Text Int
count :: [Text] -> Map Text Int
count = forall k a. Ord k => (a -> a -> a) -> [(k, a)] -> Map k a
M.fromListWith forall a. Num a => a -> a -> a
(+) forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall a b. [a] -> [b] -> [(a, b)]
`zip` forall a. a -> [a]
repeat Int
1)
makeFile :: [(Int,TL.Text)] -> FilePath -> IO ()
makeFile :: [(Int, Text)] -> String -> IO ()
makeFile [(Int, Text)]
ps String
out = forall r.
(Default r, ToRenderable r) =>
FileOptions -> String -> EC r () -> IO ()
toFile forall a. Default a => a
def String
out (forall {y}.
BarsPlotValue y =>
[(y, Text)] -> StateT (Layout PlotIndex y) (State CState) ()
makeDistribution [(Int, Text)]
ps)
makeDistribution :: [(y, Text)] -> StateT (Layout PlotIndex y) (State CState) ()
makeDistribution [(y, Text)]
ps = do
let values :: [(PlotIndex, y)]
values = forall a. [a] -> [(PlotIndex, a)]
addIndexes (forall a b. (a, b) -> a
fst forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> [(y, Text)]
ps)
let alabels :: [String]
alabels = forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (Text -> String
TL.unpack forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b. (a, b) -> b
snd) [(y, Text)]
ps
let fillStyle :: FillStyle
fillStyle = AlphaColour Double -> FillStyle
solidFillStyle (forall a. Num a => Colour a -> AlphaColour a
opaque forall a. (Ord a, Floating a) => Colour a
lightblue)
forall x y. Lens' (Layout x y) String
layout_title forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= String
"Word Frequencies"
forall x y. Lens' (Layout x y) (LayoutAxis x)
layout_x_axis forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall x. Lens' (LayoutAxis x) (AxisFn x)
laxis_generate forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= forall i. Integral i => [String] -> [i] -> AxisData i
autoIndexAxis [String]
alabels
forall x y. Lens' (Layout x y) (LayoutAxis y)
layout_y_axis forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall x. Lens' (LayoutAxis x) (AxisData x -> AxisData x)
laxis_override forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= forall x. AxisData x -> AxisData x
axisGridHide
forall x y. Lens' (Layout x y) AxisVisibility
layout_left_axis_visibility forall b c a. (b -> c) -> (a -> b) -> a -> c
. Lens' AxisVisibility Bool
axis_show_ticks forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= Bool
False
forall (p :: * -> * -> *) x y.
ToPlot p =>
EC (Layout x y) (p x y) -> EC (Layout x y) ()
plot forall a b. (a -> b) -> a -> b
$ forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap forall y x. BarsPlotValue y => PlotBars x y -> Plot x y
plotBars forall a b. (a -> b) -> a -> b
$ forall l1 a l2. Default l1 => EC l1 a -> EC l2 l1
liftEC forall a b. (a -> b) -> a -> b
$ do
forall x1 y x2.
Lens (PlotBars x1 y) (PlotBars x2 y) [(x1, [y])] [(x2, [y])]
plot_bars_values forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap (forall s t a b. ASetter s t a b -> (a -> b) -> s -> t
over forall s t a b. Field2 s t a b => Lens s t a b
_2 forall (f :: * -> *) a. Applicative f => a -> f a
pure) [(PlotIndex, y)]
values
forall x y. Lens' (PlotBars x y) [(FillStyle, Maybe LineStyle)]
plot_bars_item_styles forall s (m :: * -> *) a b.
MonadState s m =>
ASetter s s a b -> b -> m ()
.= forall (f :: * -> *) a. Applicative f => a -> f a
pure (FillStyle
fillStyle, forall a. Maybe a
Nothing)