module Data.Text.WordCount.FileRead where
import Control.Monad ((<=<))
import Control.Monad.IO.Class (liftIO)
import qualified Data.ByteString.Lazy as BSL
import qualified Data.Text as T
import qualified Data.Text.IO as TIO
import Filesystem.Path.CurrentOS as F
import System.FilePath.Glob
import Text.Pandoc hiding (glob)
globFile :: String -> IO T.Text
globFile str = do
files <- glob str
fmap T.concat . sequence $ fmap runIOorExplode $ fmap processFile files
processFile :: String -> PandocIO T.Text
processFile filepath = (T.filter goodChar) <$> case (extension . decodeString $ filepath) of
(Just "md") -> (writePlain def <=< fmap filterCode . (readMarkdown def =<<)) <$> liftIO $ TIO.readFile filepath
(Just "dbk") -> (writePlain def <=< fmap filterCode . (readDocBook def =<<)) <$> liftIO $ TIO.readFile filepath
(Just "docx") -> (writePlain def <=< fmap filterCode . (readDocx def =<<)) $ liftIO $ BSL.readFile filepath
(Just "epub") -> (writePlain def <=< fmap filterCode . (readEPUB def =<<)) $ liftIO $ BSL.readFile filepath
(Just "html") -> (writePlain def <=< fmap filterCode . (readHtml def =<<)) $ liftIO $ TIO.readFile filepath
(Just "tex") -> (writePlain def <=< fmap filterCode . (readLaTeX def =<<)) $ liftIO $ TIO.readFile filepath
(Just "xml") -> (writePlain def <=< fmap filterCode . (readOPML def =<<)) $ liftIO $ TIO.readFile filepath
(Just "odt") -> (writePlain def <=< fmap filterCode . (readOdt def =<<)) $ liftIO $ BSL.readFile filepath
(Just "rst") -> (writePlain def <=< fmap filterCode . (readRST def =<<)) $ liftIO $ TIO.readFile filepath
(Just "textile") -> (writePlain def <=< fmap filterCode . (readTextile def =<<)) $ liftIO $ TIO.readFile filepath
_ -> liftIO $ TIO.readFile filepath
goodChar :: Char -> Bool
goodChar = not . flip any (".,?_()![]{}*&$#" :: String) . (==)
filterCode :: Pandoc -> Pandoc
filterCode (Pandoc meta content) = Pandoc meta $ filter rightBlock content
where rightBlock CodeBlock { } = False
rightBlock Table { } = False
rightBlock _ = True