module GetImages where import ImperativeState import UrlAnalyse import qualified Data.ByteString as BStr import Network.URL as URL import qualified Data.ByteString.UTF8 as UTF8Str import Data.List.Split (splitOn) import Control.Monad import Data.Maybe import Control.Monad.State import Data.List import Tools import System.FilePath import Control.Concurrent.MVar import Parallel modpath2 :: String -> URL -> URL modpath2 s u = u{url_path = if p /= [] then p ++ "/File:" ++ s else "/File:" ++ s} where pp = (url_path u) p = case reverse pp of ('/' : xs) -> (reverse xs) xs -> (reverse xs) conv :: URL -> String -> String conv u s = if take 5 s == "http:" then s else if take 6 s == "https:" then "http:" ++ (drop 6 s) else if (take 2 s) == "//" then "http:" ++ s else replace2 (exportURL u{url_path = case s of ('/' : xs) -> xs _ -> s}) "%25" "%" getImageUrl2 :: (String, URL) -> Maybe String getImageUrl2 (s, u) = (getImageUrl "fullImageLink" u s) `mplus` (getImageUrl "fullMedia" u s) getImageUrl3 :: String -> Maybe String getImageUrl3 s = return s getImageUrl :: String -> URL -> String -> Maybe String getImageUrl fi u ss = if isInfixOf fil s then case splitOn fil s of (_ : (y : _)) -> case splitOn theHref y of (_ : (yy : _)) -> case splitOn q yy of (z : _) -> Just ((conv u) . UTF8Str.toString $ (BStr.pack z)) _ -> Nothing _ -> Nothing _ -> Nothing else Nothing where s = BStr.unpack (UTF8Str.fromString ss) fil = BStr.unpack (UTF8Str.fromString fi) theHref = BStr.unpack (UTF8Str.fromString "href=\"") q = BStr.unpack (UTF8Str.fromString "\"") getImagePage :: String -> WikiUrl -> (Integer, String) -> IO (Maybe ([String], Integer)) getImagePage dir u (i, ss) = do l <- (mapM (geturl . unify . exportURL . modpath2 ss) (parses u)) :: IO [String] let xx = (map (getImageUrl2) (zip l (parses u))) :: [Maybe String] let gg = (zip (parses u) xx) :: [(URL, Maybe String)] let yy = (map go gg) :: [[(URL, String)]] let zz = (listToMaybe (concat yy)) :: Maybe (URL, String) case zz of Just (_, x) -> do img <- (geturl2 x) :: (IO BStr.ByteString) BStr.writeFile (dir (show i)) img return (Just (map (unify . exportURL . (modpath2 ss)) (parses u), i)) _ -> return Nothing where go :: (URL, Maybe String) -> [(URL, String)] go (uu, Just x) = [(uu, x)] go _ = [] doImage :: String -> WikiUrl -> (Integer, String) -> IO [Maybe (String, Integer, [String])] doImage dir theWikiUrl img = do p <- getImagePage dir theWikiUrl (fst img, theName) case p of Just (u, pp) -> return [Just (theName, pp, u)] _ -> return [Nothing] where theName = case dropWhile (/= ':') (takeWhile (/= '|') (snd img)) of (_ : xs) -> xs _ -> [] getImages :: String -> [String] -> WikiUrl -> ImperativeMonad [MVar [(Maybe (String, Integer, [String]))]] getImages dir images theWikiUrl = liftIO $ do ddir <- base dir thetheWikiUrl <- base theWikiUrl iimages <- mapM base ((zip [1 ..] images)) (mapM (liftA3 doImage ddir thetheWikiUrl) iimages)