diff --git a/managed/devops/bin/yb_backup.py b/managed/devops/bin/yb_backup.py index fa47f0cc2437..c30dbeb7d3f8 100755 --- a/managed/devops/bin/yb_backup.py +++ b/managed/devops/bin/yb_backup.py @@ -67,6 +67,9 @@ STARTED_SNAPSHOT_CREATION_RE = re.compile(r'[\S\s]*Started snapshot creation: (?P.*)') YSQL_CATALOG_VERSION_RE = re.compile(r'[\S\s]*Version: (?P.*)') +YSQL_SHELL_ERROR_RE = re.compile('.*ERROR.*') +ENABLE_STOP_ON_YSQL_DUMP_RESTORE_ERROR = False + ROCKSDB_PATH_PREFIX = '/yb-data/tserver/data/rocksdb' SNAPSHOT_DIR_GLOB = '*/table-*/tablet-*.snapshots/*' @@ -148,6 +151,11 @@ class YbAdminOpNotSupportedException(BackupException): pass +class YsqlDumpApplyFailedException(BackupException): + """Exception raised if 'ysqlsh' tool failed to apply the YSQL dump file.""" + pass + + def split_by_tab(line): return [item.replace(' ', '') for item in line.split("\t")] @@ -1208,8 +1216,14 @@ def parse_arguments(self): '--remote_ysql_shell_binary', default=DEFAULT_REMOTE_YSQL_SHELL_PATH, help="Path to the remote ysql shell binary") parser.add_argument( - '--pg_based_backup', action='store_true', default=False, help="Use it to trigger " - "pg based backup.") + '--pg_based_backup', action='store_true', default=False, + help="Use it to trigger pg based backup.") + parser.add_argument( + '--ysql_ignore_restore_errors', action='store_true', default=False, + help="Do NOT use ON_ERROR_STOP mode when applying YSQL dumps in backup-restore.") + parser.add_argument( + '--ysql_disable_db_drop_on_restore_errors', action='store_true', default=False, + help="Do NOT drop just created (or existing) YSQL DB if YSQL dump apply failed.") parser.add_argument( '--disable_xxhash_checksum', action='store_true', default=False, help="Disables xxhash algorithm for checksum computation.") @@ -1525,7 +1539,7 @@ def post_process_arguments(self): xxh64_bin = XXH64_X86_BIN self.xxhash_checksum_path = os.path.join(xxhash_tool_path, xxh64_bin) except Exception: - logging.warn("[app] xxhsum tool missing on the host, continuing with sha256") + logging.warning("[app] xxhsum tool missing on the host, continuing with sha256") else: raise BackupException("No Live TServer exists. " "Check the TServer nodes status & try again.") @@ -3429,8 +3443,12 @@ def import_ysql_dump(self, dump_file_path): """ Import the YSQL dump using the provided file. """ - new_db_name = None - if self.args.keyspace: + on_error_stop = (not self.args.ysql_ignore_restore_errors and + ENABLE_STOP_ON_YSQL_DUMP_RESTORE_ERROR) + + old_db_name = None + if self.args.keyspace or on_error_stop: + # Get YSQL DB name from the dump file. cmd = get_db_name_cmd(dump_file_path) if self.args.local_yb_admin_binary: @@ -3443,6 +3461,8 @@ def import_ysql_dump(self, dump_file_path): # ssh_librabry(yugabyte-db/managed/devops/opscli/ybops/utils/ssh.py). old_db_name = old_db_name.splitlines()[-1].strip() + new_db_name = None + if self.args.keyspace: if old_db_name: new_db_name = keyspace_name(self.args.keyspace[0]) if new_db_name == old_db_name: @@ -3472,7 +3492,45 @@ def import_ysql_dump(self, dump_file_path): else: self.run_ssh_cmd(cmd, self.get_main_host_ip()) - self.run_ysql_shell(['--echo-all', '--file=' + dump_file_path]) + ysql_shell_args = ['--echo-all', '--file=' + dump_file_path] + + if on_error_stop: + logging.info( + "[app] Enable ON_ERROR_STOP mode when applying '{}'".format(dump_file_path)) + ysql_shell_args.append('--variable=ON_ERROR_STOP=1') + error_msg = None + try: + self.run_ysql_shell(ysql_shell_args) + except subprocess.CalledProcessError as ex: + output = str(ex.output.decode('utf-8', errors='replace') + .encode("ascii", "ignore") + .decode("ascii")) + errors = [] + for line in output.splitlines(): + if YSQL_SHELL_ERROR_RE.match(line): + errors.append(line) + + error_msg = "Failed to apply YSQL dump '{}' with RC={}: {}".format( + dump_file_path, ex.returncode, '; '.join(errors)) + except Exception as ex: + error_msg = "Failed to apply YSQL dump '{}': {}".format(dump_file_path, ex) + + if error_msg: + logging.error("[app] {}".format(error_msg)) + db_name_to_drop = new_db_name if new_db_name else old_db_name + if db_name_to_drop: + logging.info("[app] YSQL DB '{}' will be deleted at exit...".format( + db_name_to_drop)) + atexit.register(self.delete_created_ysql_db, db_name_to_drop) + else: + logging.warning("[app] Skip new YSQL DB drop because YSQL DB name " + "was not found in file '{}'".format(dump_file_path)) + + raise YsqlDumpApplyFailedException(error_msg) + else: + logging.warning("[app] Ignoring YSQL errors when applying '{}'".format(dump_file_path)) + self.run_ysql_shell(ysql_shell_args) + return new_db_name def import_snapshot(self, metadata_file_path): @@ -3872,6 +3930,21 @@ def delete_created_snapshot(self, snapshot_id): return self.run_yb_admin(['delete_snapshot', snapshot_id]) + def delete_created_ysql_db(self, db_name): + """ + Callback run on exit to delete incomplete/partly created YSQL DB. + """ + if db_name: + if self.args.ysql_disable_db_drop_on_restore_errors: + logging.warning("[app] Skip YSQL DB '{}' drop because " + "the clean-up is disabled".format(db_name)) + else: + ysql_shell_args = ['-c', 'DROP DATABASE {};'.format(db_name)] + logging.info("[app] Do clean-up due to previous error: " + "'{}'".format(' '.join(ysql_shell_args))) + output = self.run_ysql_shell(ysql_shell_args) + logging.info("YSQL DB drop result: {}".format(output.replace('\n', ' '))) + def TEST_yb_admin_unsupported_commands(self): try: self.run_yb_admin(["fake_command"])