module Main where import Control.Arrow ((&&&)) import Control.Lens (over, _2) import Data.Binary (decode, encode) import qualified Data.ByteString as BS import qualified Data.ByteString.Lazy as BSL import qualified Data.IntMap as IM import Data.Maybe import Data.Monoid import qualified Data.Text.IO as TIO import qualified Data.Text.Lazy as TL import Data.Text.WordCount import Data.Version import Options.Applicative import Paths_wordchoice import System.Directory (doesFileExist) -- | Program datatype to be parsed data Program = Program { file :: FilePath , num :: Maybe Int , output :: Maybe FilePath , filterOutput :: Bool , cacheIndex :: Bool } -- TODO add option for separators -- | Command line argument parser program :: Parser Program program = Program <$> (argument str (metavar "FILEPATH" <> completer (bashCompleter "file -o plusdirs") <> help "File to analyze")) <*> (optional (read <$> strOption (short 'n' <> long "number" <> metavar "NUM" <> help "Top NUM words will be listed"))) <*> (optional (strOption (short 'o' <> long "output" <> metavar "OUTPUT" <> help "Filepath for output graph"))) <*> switch (short 'f' <> long "filter" <> help "Filter common English words from output.") <*> switch (short 'd' <> long "dump" <> help "Cache word frequency indices") -- | Parse for version info versionInfo :: Parser (a -> a) versionInfo = infoOption ("wordchoice version: " <> showVersion version) (short 'v' <> long "version" <> help "Show version") -- | Wraps parser with help parser wrapper :: ParserInfo Program wrapper = info (helper <*> versionInfo <*> program) (fullDesc <> progDesc "Word choice is a command-line tool meant to help you improve your writing. Simply point it to a file containing text and it will list your most frequently used words and their frequencies." <> header "Word choice command-line utility") -- | Actual executable main :: IO () main = execParser wrapper >>= pick -- | Run parsed record pick :: Program -> IO () pick rec = do let n = fromMaybe 25 (num rec) contents <- TL.fromStrict <$> globFile (file rec) pickContents <- case (filterOutput &&& num) rec of { (True, _) -> pure $ filterTop n small contents ; (False, Just x) -> pure $ topN x contents ; (False, Nothing) -> do { cacheExists <- doesFileExist "index.bin" ; let toDisplay = (>>= (\(i, ws) -> zip (repeat i) ws)) in if cacheExists then toDisplay . IM.toList . (decode :: BSL.ByteString -> IM.IntMap [TL.Text]) . BSL.fromStrict <$> BS.readFile "index.bin" else pure . toDisplay . IM.toList . indexed $ contents } } if cacheIndex rec then do { BS.writeFile "index.bin" . BSL.toStrict $ encode $ indexed contents ; putStrLn "...finished indexing" ; TIO.putStrLn . displayWords $ pickContents } else TIO.putStrLn . displayWords $ pickContents case output rec of (Just out) -> flip makeFile out . topN n $ contents _ -> pure ()