{-# LANGUAGE TemplateHaskell, QuasiQuotes, NamedFieldPuns #-} module CodeMushu.Repo where import CodeMushu.Env import Prelude () import System.Process import Data.List hiding (length) import qualified Data.ByteString.Char8 as B import Control.Monad hiding (join) import System.Posix hiding (sleep) import Data.Map (Map) import System.FilePath import Database.HDBC import Database.HDBC.Sqlite3 import MPS.TH import MPS.Extra import MPSUTF8 (mkdir_p, rm_rf) import System.Directory import Control.Concurrent.STM import GHC.IO.Exception import qualified Prelude as P import CodeMushu.Project import Network.Curl hiding (content) is_text :: String -> IO Bool is_text x = do r <- readProcess "file" ["-b", x] "" return - "text" `isInfixOf` r is_image :: String -> Bool is_image = let image_extensions = [".png", ".jpg"] in takeExtension > belongs_to image_extensions not_too_big :: String -> IO Bool not_too_big = getFileStatus >=> fileSize > (<= limit) > return where limit = _500k _1m = 1000000 _500k = 500000 find_source :: StatusCallback -> String -> IO ([String], [String]) find_source cb path = do cb - DBStage - DBAnalysing r <- readProcess "find" [path] "" let clean_files = reject (splitPath > has ".svn/") paths = r.lines .clean_files files <- paths.filterM (getFileStatus >=> isRegularFile > return) dirs <- paths.filterM (getFileStatus >=> isDirectory > return) fs <- files.filterM is_text >>= filterM not_too_big let imgs = files.filter is_image return - (dirs, fs) load_code :: String -> String -> IO Code load_code _ [] = error "code path can not be empty" load_code base x = do let relative_path = makeRelative base x content <- B.readFile x ^ (B.unpack > b2u) let tokens = relative_path.splitPath path = relative_path depth = length tokens .from_i parent = if depth.is 1 then "/" else tokens.init.concat filesize <- getFileStatus x ^ fileSize ^ from_i return - def { path , parent , depth , filesize , content } load_dir :: String -> String -> IO Code load_dir _ [] = error "code path can not be empty" load_dir base x = do let relative_path = makeRelative base x let tokens = relative_path.splitPath path = relative_path depth = length tokens .from_i parent = if depth.is 1 then "/" else tokens.init.concat return - def { path , parent , depth , is_directory = 1 } insert_code :: (IConnection d) => d -> Code -> IO () insert_code db code = do let { vals = [ toSql - code.path , toSql - code.parent , toSql - code.depth , toSql - code.filesize , toSql - code.content , toSql - code.is_directory ] } -- withTransaction db - \d -> run d insert_sql vals run db insert_sql vals -- log - "inserted " + code.path -- [[uid]] <- quickQuery db "select last_insert_rowid()" [] -- return (fromSql uid) return () insert_code_from_path :: (IConnection d) => d -> String -> String -> IO () insert_code_from_path db base x = load_code base x >>= insert_code db insert_dir_from_path :: (IConnection d) => d -> String -> String -> IO () insert_dir_from_path db base x = load_dir base x >>= insert_code db init_db :: StatusCallback -> Project -> IO Connection init_db cb p = do cb - DBStage - DBInit mkdir_p - p.project_db_path db <- handleSqlError - connectSqlite3 - p.project_db_file create_table_if_missing db return db insert_project :: StatusCallback -> Connection -> Project -> IO () insert_project cb db p = do (dirs, source_files) <- find_source cb (p.project_source_path) dirs.mapM_ (insert_dir_from_path db (p.project_source_path)) let total = source_files.length int_counter_ref <- newIORef 0 source_files.indexed.mapM_ ( \(i, path) -> do let progress = i.realToFrac P./ total.realToFrac int_counter <- readIORef int_counter_ref let int_percent = floor - progress * 100 when (int_counter P.< int_percent) - do cb - DBStage - DBImporting int_percent writeIORef int_counter_ref int_percent insert_code_from_path db (p.project_source_path) path ) create_db :: StatusCallback -> Project -> IO () create_db cb p = do db <- init_db cb p insert_project cb db p index_db cb db p commit db release_db cb db p return () index_db :: StatusCallback -> Connection -> Project -> IO () index_db cb db _ = do cb - DBStage - DBIndexing sql_indecies.lines.reject null.mapM_ (\sql_index -> run db sql_index []) commit db release_db :: StatusCallback -> Connection -> Project -> IO () release_db cb db _ = do cb - DBStage - DBComplete disconnect db create_table_if_missing :: (IConnection a) => a -> IO () create_table_if_missing db = do tables <- handleSqlError $ getTables db when (not - "code" `elem` tables) - handleSqlError - do run db sql_schema [] commit db -- repo / project_id / [src|db] | public/repo/project_id.sqlite init_project :: Integer -> String -> Repo -> IO Project init_project job_id project_url repo = do let project_id = job_id let project_name = project_url.takeFileName return - def { project_name , project_id , repo , project_url } make_project_id :: IO String make_project_id = now_in_micro_seconds ^ show -- get_project_name_from_url = split "/" .last unpack_git_repo :: StatusCallback -> Project -> IO ExitCode unpack_git_repo cb p = do mkdir_p - p.project_root_path cb - UnpackStage - Unpacking exit_code <- rawSystem "git" ["clone", "-q", p.project_url, p.project_source_path] when ( exit_code .is ExitSuccess ) - do rm_rf - p.project_source_path / ".git" return exit_code unpack_hg_repo :: StatusCallback -> Project -> IO ExitCode unpack_hg_repo cb p = do mkdir_p - p.project_root_path cb - UnpackStage - Unpacking exit_code <- rawSystem "hg" ["clone", p.project_url, p.project_source_path] when ( exit_code .is ExitSuccess ) - do rm_rf - p.project_source_path / ".hg" return exit_code unpack_darcs_repo :: StatusCallback -> Project -> IO ExitCode unpack_darcs_repo cb p = do mkdir_p - p.project_root_path cb - UnpackStage - Unpacking exit_code <- rawSystem "darcs" ["get", "--lazy", p.project_url, p.project_source_path] log - "exit code: " + show exit_code when ( exit_code .is ExitSuccess ) - do rm_rf - p.project_source_path / "_darcs" return exit_code validate_repo_url :: String -> IO (Maybe String) validate_repo_url url = do (r, xs) <- curlHead url [CurlFollowLocation True] return - if r.null.not then case xs.reverse.lookup "Content-Length" >>= readMay of Just i -> -- archive less then 50 MB is valid if i `div` (1024 * 1024) <= 50 then Nothing else Just "repo too big" Nothing -> Nothing else Just "invalid url" with_valid_repo :: StatusCallback -> String -> IO ExitCode -> IO ExitCode with_valid_repo cb url io = do status <- validate_repo_url - url case status of Nothing -> do io Just err -> do cb - Failed - Exception err return - ExitFailure 0 unpack_raw_repo :: StatusCallback -> Project -> IO ExitCode unpack_raw_repo cb p = do mkdir_p - p.project_source_path cb - UnpackStage - Unpacking with_valid_repo cb (p.project_url) - do exit_code <- rawSystem "wget" [p.project_url, "--directory-prefix", p.project_source_path] log - "exit code: " + show exit_code return exit_code unpack_archive_repo :: StatusCallback -> Project -> IO ExitCode unpack_archive_repo cb p = do mkdir_p - p.project_source_path cb - UnpackStage - Unpacking with_valid_repo cb (p.project_url) - do exit_code <- rawSystem "wget" [p.project_url, "--directory-prefix", p.project_source_path] downloads <- ls - p.project_source_path case downloads of [] -> log "could not found downloaded archives" x:_ -> do system - "cd " + p.project_source_path + "; unp " + x removeFile - p.project_source_path / x log - "exit code: " + show exit_code return exit_code unpack_svn_repo :: StatusCallback -> Project -> IO ExitCode unpack_svn_repo cb p = do mkdir_p - p.project_root_path cb - UnpackStage - Unpacking exit_code <- rawSystem "svn" ["co", "--non-interactive", "-q", p.project_url, p.project_source_path] log - "exit code: " + show exit_code return exit_code unpack_repo :: Repo -> StatusCallback -> Project -> IO ExitCode unpack_repo Git = unpack_git_repo unpack_repo Darcs = unpack_darcs_repo unpack_repo Raw = unpack_raw_repo unpack_repo Archive = unpack_archive_repo unpack_repo SVN = unpack_svn_repo unpack_repo Hg = unpack_hg_repo bounce_repo :: StatusCallback -> Project -> IO () bounce_repo cb p = do cb Bouncing copyFile (p.project_db_file) (p.project_bounce_file) cleanup_repo :: StatusCallback -> Project -> IO () cleanup_repo cb p = do cb Cleanup -- hack -- fork - do -- sleep 10 rm_rf - p.project_root_path create_project :: StatusCallback -> Integer -> Repo -> String -> IO () create_project cb job_id repo x = do p <- init_project job_id x repo exit_code <- return_after 30 - unpack_repo repo cb p case exit_code of ExitSuccess -> do cb - UnpackStage - UnpackComplete create_db cb p bounce_repo cb p cleanup_repo cb p cb - Completed p ExitFailure error_code -> do cleanup_repo cb p cb - Failed FailedToFetchRepo -- sql_schema :: String sql_schema = [$here| create table code ( id INTEGER PRIMARY KEY , path TEXT , parent TEXT , depth INTEGER , filesize INTEGER , content TEXT , is_directory INTEGER ); |] sql_indecies = [$here| create index path_index on code (path); create index parent_index on code (parent); create index depth_index on code (depth); create index is_directory_index on code (is_directory); |] insert_sql = [$here| INSERT INTO code(path, parent, depth, filesize, content, is_directory) VALUES(?, ?, ?, ?, ?, ?) |]