-- | Module with function to read file in with pandoc and discard everything superfluous.
module Data.Text.WordCount.FileRead ( processFile
                                    , globFile
                                    ) where

import           Control.Monad             ((<=<))
import           Control.Monad.IO.Class    (liftIO)
import qualified Data.ByteString.Lazy      as BSL
import qualified Data.Text                 as T
import qualified Data.Text.IO              as TIO
import           Filesystem.Path.CurrentOS as F
import           System.FilePath.Glob
import           Text.Pandoc               hiding (glob)

-- | Process files given a filename glob
globFile :: String -> IO T.Text
globFile :: String -> IO Text
globFile String
str = do
    [String]
files <- String -> IO [String]
glob String
str
    forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap [Text] -> Text
T.concat (forall (t :: * -> *) (m :: * -> *) a b.
(Traversable t, Monad m) =>
(a -> m b) -> t a -> m (t b)
mapM forall a. PandocIO a -> IO a
runIOorExplode (forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap String -> PandocIO Text
processFile [String]
files))

-- | Process a file given a filename. Return text only, discarding superflouous material.
processFile :: String -> PandocIO T.Text
processFile :: String -> PandocIO Text
processFile String
filepath = (Char -> Bool) -> Text -> Text
T.filter Char -> Bool
goodChar forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> case FilePath -> Maybe Text
extension forall b c a. (b -> c) -> (a -> b) -> a -> c
. String -> FilePath
decodeString forall a b. (a -> b) -> a -> b
$ String
filepath of
  (Just Text
"md") -> (forall (m :: * -> *).
PandocMonad m =>
WriterOptions -> Pandoc -> m Text
writePlain forall a. Default a => a
def forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Pandoc -> Pandoc
filterCode forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (m :: * -> *) a.
(PandocMonad m, ToSources a) =>
ReaderOptions -> a -> m Pandoc
readMarkdown forall a. Default a => a
def forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<)) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ String -> IO Text
TIO.readFile String
filepath
  (Just Text
"dbk") -> (forall (m :: * -> *).
PandocMonad m =>
WriterOptions -> Pandoc -> m Text
writePlain forall a. Default a => a
def forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Pandoc -> Pandoc
filterCode forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (m :: * -> *) a.
(PandocMonad m, ToSources a) =>
ReaderOptions -> a -> m Pandoc
readDocBook forall a. Default a => a
def forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<)) forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
<$> forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ String -> IO Text
TIO.readFile String
filepath
  (Just Text
"docx") -> (forall (m :: * -> *).
PandocMonad m =>
WriterOptions -> Pandoc -> m Text
writePlain forall a. Default a => a
def forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Pandoc -> Pandoc
filterCode forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> ByteString -> m Pandoc
readDocx forall a. Default a => a
def forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ String -> IO ByteString
BSL.readFile String
filepath
  (Just Text
"epub") -> (forall (m :: * -> *).
PandocMonad m =>
WriterOptions -> Pandoc -> m Text
writePlain forall a. Default a => a
def forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Pandoc -> Pandoc
filterCode forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> ByteString -> m Pandoc
readEPUB forall a. Default a => a
def forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ String -> IO ByteString
BSL.readFile String
filepath
  (Just Text
"html") -> (forall (m :: * -> *).
PandocMonad m =>
WriterOptions -> Pandoc -> m Text
writePlain forall a. Default a => a
def forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Pandoc -> Pandoc
filterCode forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (m :: * -> *) a.
(PandocMonad m, ToSources a) =>
ReaderOptions -> a -> m Pandoc
readHtml forall a. Default a => a
def forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ String -> IO Text
TIO.readFile String
filepath
  (Just Text
"tex") -> (forall (m :: * -> *).
PandocMonad m =>
WriterOptions -> Pandoc -> m Text
writePlain forall a. Default a => a
def forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Pandoc -> Pandoc
filterCode forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (m :: * -> *) a.
(PandocMonad m, ToSources a) =>
ReaderOptions -> a -> m Pandoc
readLaTeX forall a. Default a => a
def forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ String -> IO Text
TIO.readFile String
filepath
  (Just Text
"xml") -> (forall (m :: * -> *).
PandocMonad m =>
WriterOptions -> Pandoc -> m Text
writePlain forall a. Default a => a
def forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Pandoc -> Pandoc
filterCode forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (m :: * -> *) a.
(PandocMonad m, ToSources a) =>
ReaderOptions -> a -> m Pandoc
readOPML forall a. Default a => a
def forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ String -> IO Text
TIO.readFile String
filepath
  (Just Text
"odt") -> (forall (m :: * -> *).
PandocMonad m =>
WriterOptions -> Pandoc -> m Text
writePlain forall a. Default a => a
def forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Pandoc -> Pandoc
filterCode forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (m :: * -> *).
PandocMonad m =>
ReaderOptions -> ByteString -> m Pandoc
readODT forall a. Default a => a
def forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ String -> IO ByteString
BSL.readFile String
filepath
  (Just Text
"rst") -> (forall (m :: * -> *).
PandocMonad m =>
WriterOptions -> Pandoc -> m Text
writePlain forall a. Default a => a
def forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Pandoc -> Pandoc
filterCode forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (m :: * -> *) a.
(PandocMonad m, ToSources a) =>
ReaderOptions -> a -> m Pandoc
readRST forall a. Default a => a
def forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ String -> IO Text
TIO.readFile String
filepath
  (Just Text
"textile") -> (forall (m :: * -> *).
PandocMonad m =>
WriterOptions -> Pandoc -> m Text
writePlain forall a. Default a => a
def forall (m :: * -> *) b c a.
Monad m =>
(b -> m c) -> (a -> m b) -> a -> m c
<=< forall (f :: * -> *) a b. Functor f => (a -> b) -> f a -> f b
fmap Pandoc -> Pandoc
filterCode forall b c a. (b -> c) -> (a -> b) -> a -> c
. (forall (m :: * -> *) a.
(PandocMonad m, ToSources a) =>
ReaderOptions -> a -> m Pandoc
readTextile forall a. Default a => a
def forall (m :: * -> *) a b. Monad m => (a -> m b) -> m a -> m b
=<<)) forall a b. (a -> b) -> a -> b
$ forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ String -> IO Text
TIO.readFile String
filepath
  Maybe Text
_ -> forall (m :: * -> *) a. MonadIO m => IO a -> m a
liftIO forall a b. (a -> b) -> a -> b
$ String -> IO Text
TIO.readFile String
filepath

goodChar :: Char -> Bool
goodChar :: Char -> Bool
goodChar = Bool -> Bool
not forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a b c. (a -> b -> c) -> b -> a -> c
flip forall (t :: * -> *) a. Foldable t => (a -> Bool) -> t a -> Bool
any (String
".,?_()![]{}*&$#" :: String) forall b c a. (b -> c) -> (a -> b) -> a -> c
. forall a. Eq a => a -> a -> Bool
(==)

-- | Filter out code and tables from the document
filterCode :: Pandoc -> Pandoc
filterCode :: Pandoc -> Pandoc
filterCode (Pandoc Meta
meta [Block]
content) = Meta -> [Block] -> Pandoc
Pandoc Meta
meta forall a b. (a -> b) -> a -> b
$ forall a. (a -> Bool) -> [a] -> [a]
filter Block -> Bool
rightBlock [Block]
content
    where rightBlock :: Block -> Bool
rightBlock CodeBlock { } = Bool
False
          rightBlock Table { }     = Bool
False
          rightBlock Block
_             = Bool
True