import Text.HTML.TagSoup
import System.Environment
import System.Directory
import System.FilePath
import Data.Maybe (catMaybes)
import Control.Monad (liftM)
import System.IO
import Network.URL
import Wiki4e.Commands
import Network.Wikipedia (isArticleURL)
defaultEbookName = "Wikipedia_Articles_From_Cache"
main = do
name <- getProgName
args <- getArgs
case args of
[] -> cache2epub defaultEbookName
['-':_] -> usageHelp name
['/':_] -> usageHelp name
[xs] -> cache2epub xs
_ -> usageHelp name
usageHelp name = putStrLn $ "Usage: " ++ name ++ " [
]"
cache2epub bookName = do
hSetBinaryMode stdout True
config <- wiki4e_initConfig
arts <- wiki4e_listCacheURLs config
putStrLn "# STAGE 1/5 - Verify Articles..."
wiki4e_fetchArticles config arts
putStrLn "# STAGE 2/5 - Sanitize Articles..."
wiki4e_sanitizeArticles config arts
putStrLn "# STAGE 3/5 - Inspect Articles for Images..."
imgs <- wiki4e_listArticlesImages config arts
putStrLn "# STAGE 4/5 - Download Images..."
wiki4e_fetchImages config imgs
putStrLn "# STAGE 5/5 - Constructing EPUB..."
wiki4e_createEpub config bookName arts imgs
putStrLn "Done."
-- | It is expected that all articles are from english wikipedia
wiki4e_listCacheURLs :: Wiki4eConfig -> IO [URL]
wiki4e_listCacheURLs config = do
files <- liftM (filter (\(c:_) -> c /= '.')) $ getDirectoryContents (w4confDirFetch config)
return $ catMaybes $ map (importURL) (map (\x -> "http://en.wikipedia.org/wiki/"++x) files)