{-| module to download the wiki source text form the wiki website -} module Load where import ImperativeState import Tools import Hex import UrlAnalyse import Control.Monad.State import System.IO.Temp import System.Directory import System.FilePath.Posix import Text.ParserCombinators.Parsec hiding (try) import Text.Parsec.Prim import Codec.Binary.UTF8.String import Data.String.HT import Data.ByteString hiding (takeWhile, isInfixOf, intercalate, concat, map, sort) import Data.List.Split import Data.Map as Map hiding (map) import Data.List hiding (lookup) import MagicStrings import SimpleContributors import WikiHelper import MediaWikiParseTree import MediaWikiParser import Network.URL import Control.Monad.Except import System.Process notendyet :: (String -> ImperativeMonad String) -> ParsecT String () ImperativeMonad String -> ParsecT String () ImperativeMonad String -> String -> ParsecT String () ImperativeMonad String notendyet action sstart eend aku = try (do eof return aku) <|> try (do _ <- eend r <- startToEnd action sstart eend a <- lift (action aku) return (a ++ r)) <|> do a <- anyChar notendyet action sstart eend (aku ++ [a]) beginning :: (String -> ImperativeMonad String) -> ParsecT String () ImperativeMonad String -> ParsecT String () ImperativeMonad String -> ParsecT String () ImperativeMonad [Char] beginning action sstart eend = try (do eof return []) <|> do _ <- sstart ne <- notendyet action sstart eend [] return (ne) startToEnd :: (String -> ImperativeMonad String) -> ParsecT String () ImperativeMonad String -> ParsecT String () ImperativeMonad String -> ParsecT String () ImperativeMonad String startToEnd action sstart eend = try (do eof return []) <|> try (beginning action sstart eend) <|> do a <- anyChar s <- startToEnd action sstart eend return (a : s) zeroAction :: (Monad m) => t -> t1 -> m [Char] zeroAction _ _ = return "" runAction :: String -> String -> (String -> ImperativeMonad String) -> String -> ImperativeMonad String runAction sstart eend action text = do x <- (runParserT (startToEnd action (string sstart) (string eend)) () "" text) case x of Left _ -> return "" Right xs -> return xs chapterAction :: WikiUrl -> String -> ImperativeMonad String chapterAction wurl text = do pp <- liftIO (getpage d (wurl)) case pp of Just p -> do _ <- addContributors d Nothing noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p) _ -> return "" where d = (trim (takeWhile (/= '|') text)) chapterAction2 :: FullWikiUrl -> String -> ImperativeMonad String chapterAction2 fu text = do pp <- liftIO (getpage d (wurl)) case pp of Just p -> do _ <- addContributors d Nothing noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ "= " ++ e ++ " =\n" ++ p) _ -> return "" where e = (trim (takeWhile (/= '|') text)) d = (removePrintVersion (lemma fu)) ++ "/" ++ e wurl = wikiUrl fu chapterAction3 :: FullWikiUrl -> String -> ImperativeMonad String chapterAction3 fu text = do pp <- liftIO (getpage d (wurl)) case pp of Just p -> do _ <- addContributors d Nothing noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ "= " ++ e ++ " =\n" ++ p) _ -> return "" where e = (trim (takeWhile (/= '|') text)) d = (removePrintVersion (lemma fu)) ++ "/ " ++ e wurl = wikiUrl fu includeAction :: WikiUrl -> String -> ImperativeMonad String includeAction = qIncludeAction qIncludeAction :: WikiUrl -> String -> ImperativeMonad String qIncludeAction wurl text = if isInfixOf "Vorlage" text then return ("{{" ++ text ++ "}}") else do pp <- (liftIO (print d)) >> liftIO (getpage d (wurl)) case pp of Just p -> do _ <- addContributors d Nothing noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p) _ -> return "" where d = (trim (takeWhile (/= '|') text)) qBookIncludeAction :: WikiUrl -> String -> ImperativeMonad String qBookIncludeAction wurl text = if isInfixOf "Vorlage" text then return ("{{" ++ text ++ "}}") else do pp <- (liftIO (print d)) >> liftIO (getBookpage d (wurl)) case pp of Just p -> do _ <- addContributors d Nothing x <- noinclude wurl ("\n\ndhunparserurl " ++ d ++ "\n\n" ++ p) st <- get systempdir <- liftIO getTemporaryDirectory tempdir <- liftIO $ createTempDirectory systempdir "MediaWiki2LaTeXParser" liftIO $ Tools.writeFile (tempdir > "input") x _ <- liftIO $ system ("mediawiki2latex -x " ++ (Hex.hex (show (fullconfigbase{compile = Just tempdir})))) t <- liftIO $ Tools.readFile (tempdir > "output") put st{loadacu = ((read t) ++ (loadacu st) :: [Anything Char])} return x _ -> return "" where d = (trim (takeWhile (/= '|') text)) makeUrl :: String -> String -> String -> [Char] makeUrl lang theFam thePage = (unify . exportURL) (if isInfixOf "commons" lang then (URL{url_path = "~daniel/WikiSense/Contributors.php", url_params = [("wikifam", "commons.wikimedia.org"), ("page", thePage), ("since", ""), ("until", ""), ("grouped", "on"), ("hideanons", "on"), ("max", "100000"), ("format", "html")], url_type = Absolute (Host{protocol = HTTP True, host = "toolserver.org", port = Nothing})}) else (URL{url_path = "~daniel/WikiSense/Contributors.php", url_params = [("wikilang", lang), ("wikifam", theFam), ("page", thePage), ("since", ""), ("until", ""), ("grouped", "on"), ("hideanons", "on"), ("max", "100000"), ("format", "html")], url_type = Absolute (Host{protocol = HTTP True, host = "toolserver.org", port = Nothing})})) langau :: Map String String langau = fromList [("hi", "\2354\2375\2326\2325"), ("ja", "\33879\32773"), ("pl", "Autorzy"), ("lo", "\3737\3761\3713\3739\3760\3742\3761\3737"), ("fi", "Tekij\228"), ("sv", "F\246rfattare"), ("pt", "Autores"), ("ru", "\1040\769\1074\1090\1086\1088\1099"), ("ko", "\51200\51088"), ("tr", "Yazar"), ("sk", "Avtor"), ("hy", "\1344\1381\1394\1387\1398\1377\1391"), ("lt", "Autorius"), ("ta", "\2986\2975\3016\2986\3021\2986\3006\2995\2992\3021"), ("en", "Contributors"), ("ro", "Autor"), ("it", "Autori"), ("hr", "\192utor"), ("vo", "Lautan"), ("eo", "Verkinto"), ("hu", "Szerz\337"), ("is", "H\246fundur"), ("gd", "\217ghdar"), ("de", "Autoren"), ("ca", "Autor"), ("el", "\931\965\947\947\961\945\966\941\945\962"), ("bg", "\1040\1074\1090\1086\1088"), ("ce", "\1071\1079\1076\1072\1088\1093\1086"), ("nl", "Auteurs"), ("es", "Autores"), ("eu", "Egile"), ("fr", "Auteurs"), ("cs", "Autor"), ("br", "Aozer")] makeHeader :: FullWikiUrl -> Maybe String -> [Char] makeHeader fullurl m = let mmm = m >>= (\ yy -> Map.lookup yy langau) in "\\chapter{" ++ (case mmm `mplus` (case splitOn "." (hostname fullurl) of (x : _) -> Map.lookup x langau _ -> Nothing) `mplus` (Map.lookup "en" langau) of Just x -> x _ -> "Contributors") ++ "}\n" ++ "\\label{Contributors}\n" ++ "\\begin{longtable}{rp{0.6\\linewidth}}\n" ++ "\\textbf{Edits}&\\textbf{User}\\\\\n" makeHeaderHTML :: FullWikiUrl -> Maybe String -> [Char] makeHeaderHTML fullurl m = let mmm = m >>= (\ yy -> Map.lookup yy langau) in "
Edits | User |
" ++ (show (edits v)) ++ " | " ++ (concat (map chartrans (name v))) ++ " |