From daae9e4a9f6723b191886461c23c71b799138f0a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Hannes=20Rosen=C3=B6gger?= <123haynes@gmail.com> Date: Thu, 11 Aug 2016 16:29:37 +0200 Subject: [PATCH] Use pigz for increased performance on multicore machines if available This commit changes the backup and restore tasks to use pigz if it is installed on the system. If it isn't the tasks will fall back to the slower gzip. --- CHANGELOG | 1 + doc/raketasks/backup_restore.md | 13 +++++++++++++ lib/backup/database.rb | 5 +++-- lib/backup/files.rb | 5 +++-- lib/backup/helper.rb | 30 ++++++++++++++++++++++++++++++ lib/backup/manager.rb | 6 ++++++ 6 files changed, 56 insertions(+), 4 deletions(-) create mode 100644 lib/backup/helper.rb diff --git a/CHANGELOG b/CHANGELOG index ef38d3e29f5d..5ff5893ad415 100644 --- a/CHANGELOG +++ b/CHANGELOG @@ -2,6 +2,7 @@ Please view this file on the master branch, on stable branches it's out of date. v 8.12.0 (unreleased) - Change merge_error column from string to text type + - Take advantage of pigz for backups if it is available on the system. !5775 (Hannes Rosenögger) - Optimistic locking for Issues and Merge Requests (title and description overriding prevention) - Added tests for diff notes diff --git a/doc/raketasks/backup_restore.md b/doc/raketasks/backup_restore.md index 835af5443a32..9360f4427770 100644 --- a/doc/raketasks/backup_restore.md +++ b/doc/raketasks/backup_restore.md @@ -64,6 +64,19 @@ Deleting tmp directories...[DONE] Deleting old backups... [SKIPPING] ``` +## Speed up the backup/restore process +If you have a multicore machine, you can speed up the backup process a lot by installing [pigz](http://zlib.net/pigz/). +It is a parallel implementation of gzip which has a better performance than gzip, because it uses all cores of your cpu. +Pigz is available in the repositories of most distributions. +For Ubuntu installing it is as easy as typing: +`sudo apt-get install pigz` + +GitLab will recognize if pigz is installed on the machine and automatically try to use it for the backup/restore process. + +**Warning:** +The extra speed comes at the expense of needing more ram and cpu time. +This means your system can potentially be slow during the backup/restore, because all cpu cores are being used. + ## Upload backups to remote (cloud) storage Starting with GitLab 7.4 you can let the backup script upload the '.tar' file it creates. diff --git a/lib/backup/database.rb b/lib/backup/database.rb index 22319ec6623f..da14eaae05bb 100644 --- a/lib/backup/database.rb +++ b/lib/backup/database.rb @@ -7,13 +7,14 @@ class Database def initialize @config = YAML.load_file(File.join(Rails.root,'config','database.yml'))[Rails.env] @db_file_name = File.join(Gitlab.config.backup.path, 'db', 'database.sql.gz') + @compression_command = Backup::Helper.compression_command end def dump FileUtils.mkdir_p(File.dirname(db_file_name)) FileUtils.rm_f(db_file_name) compress_rd, compress_wr = IO.pipe - compress_pid = spawn(*%W(gzip -1 -c), in: compress_rd, out: [db_file_name, 'w', 0600]) + compress_pid = spawn(*%W(#{@compression_command} -1 -c), in: compress_rd, out: [db_file_name, 'w', 0600]) compress_rd.close dump_pid = case config["adapter"] @@ -42,7 +43,7 @@ def dump def restore decompress_rd, decompress_wr = IO.pipe - decompress_pid = spawn(*%W(gzip -cd), out: decompress_wr, in: db_file_name) + decompress_pid = spawn(*%W(#{@compression_command} -cd), out: decompress_wr, in: db_file_name) decompress_wr.close restore_pid = case config["adapter"] diff --git a/lib/backup/files.rb b/lib/backup/files.rb index cedbb289f6a8..c6fe8fce68e3 100644 --- a/lib/backup/files.rb +++ b/lib/backup/files.rb @@ -9,20 +9,21 @@ def initialize(name, app_files_dir) @app_files_dir = File.realpath(app_files_dir) @files_parent_dir = File.realpath(File.join(@app_files_dir, '..')) @backup_tarball = File.join(Gitlab.config.backup.path, name + '.tar.gz') + @compression_command = Backup::Helper.compression_command end # Copy files from public/files to backup/files def dump FileUtils.mkdir_p(Gitlab.config.backup.path) FileUtils.rm_f(backup_tarball) - run_pipeline!([%W(tar -C #{app_files_dir} -cf - .), %W(gzip -c -1)], out: [backup_tarball, 'w', 0600]) + run_pipeline!([%W(tar -C #{app_files_dir} -cf - .), %W(#{@compression_command} -c -1)], out: [backup_tarball, 'w', 0600]) end def restore backup_existing_files_dir create_files_dir - run_pipeline!([%W(gzip -cd), %W(tar -C #{app_files_dir} -xf -)], in: backup_tarball) + run_pipeline!([%W(#{@compression_command} -cd), %W(tar -C #{app_files_dir} -xf -)], in: backup_tarball) end def backup_existing_files_dir diff --git a/lib/backup/helper.rb b/lib/backup/helper.rb new file mode 100644 index 000000000000..f75c4de1a7a6 --- /dev/null +++ b/lib/backup/helper.rb @@ -0,0 +1,30 @@ +module Backup + module Helper + # Cross-platform way of finding an executable in the $PATH. + # + # which('ruby') #=> /usr/bin/ruby + def self.which(cmd) + exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : [''] + + ENV['PATH'].split(File::PATH_SEPARATOR).each do |path| + exts.each do |ext| + exe = File.join(path, "#{cmd}#{ext}") + return exe if File.executable?(exe) && !File.directory?(exe) + end + end + + nil + end + + # if available use pigz (http://zlib.net/pigz/) as the compression command instead of gzip + # pigz is "A parallel implementation of gzip for modern multi-processor, multi-core machines" + # which makes backup creation and restoration on these machines a lot faster + def self.compression_command + if self.which('pigz') + 'pigz' + else + 'gzip' + end + end + end +end diff --git a/lib/backup/manager.rb b/lib/backup/manager.rb index 0dfffaf0bc6d..21a0a7389ae2 100644 --- a/lib/backup/manager.rb +++ b/lib/backup/manager.rb @@ -22,8 +22,11 @@ def pack file << s.to_yaml.gsub(/^---\n/,'') end + # display compression tool + $progress.puts "Detected #{Backup::Helper.compression_command} as the fastest compression command." # create archive $progress.print "Creating backup archive: #{tar_file} ... " + # Set file permissions on open to prevent chmod races. tar_system_options = {out: [tar_file, 'w', Gitlab.config.backup.archive_permissions]} if Kernel.system('tar', '-cf', '-', *backup_contents, tar_system_options) @@ -119,6 +122,9 @@ def unpack exit 1 end + # display extration tool + $progress.puts "Detected #{Backup::Helper.compression_command} as the fastest extraction command." + $progress.print "Unpacking backup ... " unless Kernel.system(*%W(tar -xf #{tar_file})) -- GitLab