diff --git a/CHANGELOG.md b/CHANGELOG.md index a73d09b..231a8ea 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -4,6 +4,10 @@ The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/) and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html). ## [Unreleased] +### Added +- sat_import now checks for exports that have not been imported (missed/skipped) +- --fixhistory option in sat_import to align import/export histories + ### Changed - --notar export saved in /cdn_export dir rather than /export to prevent it being deleted diff --git a/README.md b/README.md index ba389e5..6b8dce1 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ hammer user create --login svc-api-user --firstname API --lastname User \ --organization-ids 1 --default-organization-id 1 --admin true ``` -Foreman needs to be configured to export content to the location you require. By default the path is +Foreman needs to be configured to export content to the location you require. By default the path is /var/lib/pulp/katello-export - this will result in you probably filling your /var/lib/pulp partition! The configs in these scripts assume that the exports are going to /var/sat-export - this should be a dedicated partition or even better dedicated disk just for export content. @@ -223,11 +223,14 @@ This companion script to sat_export, running on the Disconnected Satellite performs a sha256sum verification of each part of the specified archive prior to extracting the transferred content to disk. -Once the content has been extracted, a sync is triggered of each repository -in the import set. Note that repositories MUST be enabled on the disconnected -satellite prior to the sync working - for this reason a `nosync` option (-n) -exists so that the repos can be extracted to disk and then enabled before the -sync occurs. In order to not overload the Satellite during the sync, the +Once the content has been extracted, a check is performed to see if any exports +performed have not yet been imported. This is to assist with data integrity on +the disconnected Satellite system. Any missing imports will be displayed and the +option to continue or abort will be presented. Upon continuing, a sync is triggered +of each repository in the import set. Note that repositories MUST be enabled on the +disconnected satellite prior to the sync working - for this reason a `nosync` +option (-n) exists so that the repos can be extracted to disk and then enabled +before the sync occurs. In order to not overload the Satellite during the sync, the repositories will be synced in smaller batches, the number of repos in a batch being defined in the config.yml file. (It has been observed on systems with a large number of repos that triggering a sync on all repos at once pretty much @@ -245,6 +248,13 @@ All previously imported datasets can be shown with the (-L) flag. Note that a dataset can normally only be imported ONCE. To force an import of an already completed dataset, use the (-f) flag. +In the event that missing import datasets are detected, they should be imported to +ensure data integrity and consistency. There may however be cases that result in +the missing imports being included by other means, or no longer required at all. +In these cases, the --fixhistory flag can be used to 'reset' the import history +so that it matches the export history of the current import dataset, clearing +these warnings. + ### Help Output ``` usage: sat_import.py [-h] [-o ORG] -d DATE [-n] [-r] [-l] [-L] [-f] @@ -260,7 +270,8 @@ optional arguments: -l, --last Show the last successfully completed import date -L, --list List all successfully completed imports -c, --count Display all package counts after import - -f, --force Force import of data if it has previously been done + -f, --force Force import of data if it has previously been done + --fixhistory Force import history to match export history ``` ### Examples diff --git a/man/sat_import.8 b/man/sat_import.8 index 174579f..c8762e7 100644 --- a/man/sat_import.8 +++ b/man/sat_import.8 @@ -30,6 +30,12 @@ Any repositories found in the import that do not exist in Satellite will be indi .RE .RE .RS 3 +- A check is performed to see if any exports performed have not yet been imported. +.RS 2 +Any missing imports will be displayed and the option to continue or abort will be presented. +.RE +.RE +.RS 3 - Satellite will perform a bulk repository sync of the repositories within the import dataset. .RE .RS 3 @@ -107,6 +113,16 @@ repositories to be shown, even if there is no mis-match. Normally the script will prevent the importing of a dataset that has already been imported. However, using this option will force an import of the dataset to be performed. .RE +.PP +.BR "--fixhistory" +.RS 3 +In the event that missing import datasets are detected, they should be imported to +ensure data integrity and consistency. There may however be cases that result in +the missing imports being included by other means, or no longer required at all. +In these cases, this flag can be used to 'reset' the import history +so that it matches the export history of the current import dataset, clearing +these warnings. +.RE .SH EXAMPLES Check when the last import was performed: diff --git a/sat_import.py b/sat_import.py index 297b042..fc40a1c 100644 --- a/sat_import.py +++ b/sat_import.py @@ -264,6 +264,34 @@ def check_counts(org_id, package_count, count): print '\n' +def check_missing(imports, exports, dataset, fixhistory, vardir): + """ + Compare export history with import history to find any datasets that have not been imported + """ + missing = False + + if fixhistory: + # Remove the last element (this import) before saving - we haven't imported yet! + exports = exports[:-1] + pickle.dump(exports, open(vardir + '/imports.pkl', "wb")) + + # Copy the current 'exporthistory' over the 'importhistory' to 'fix' current mismatches + msg = "Saved export history as import history. Please re-run this import." + helpers.log_msg(msg, 'INFO') + print msg + sys.exit(2) + + else: + for ds in exports: + if not ds in imports: + if not dataset in ds: + msg = "Import dataset " + ds + " has not been imported" + helpers.log_msg(msg, 'WARNING') + missing = True + + return(missing) + + def main(args): """ Main Routine @@ -307,6 +335,8 @@ def main(args): required=False, action="store_true") parser.add_argument('-f', '--force', help='Force import of data if it has previously been done', required=False, action="store_true") + parser.add_argument('--fixhistory', help='Force import history to match export history', + required=False, action="store_true") args = parser.parse_args() # Set our script variables from the input args @@ -316,6 +346,11 @@ def main(args): org_name = helpers.ORG_NAME dataset = args.dataset + if args.fixhistory: + fixhistory = True + else: + fixhistory = False + # Record where we are running from script_dir = str(os.getcwd()) @@ -367,6 +402,24 @@ def main(args): # Extract the input files extract_content(basename) + # Read in the export history from the input dataset + dsname = dataset.split('_')[1] + exports = pickle.load(open(helpers.IMPORTDIR + '/exporthistory_' + dsname + '.pkl', 'rb')) + + # Check for and let the user decide if they want to continue with missing imports + missing_imports = check_missing(imports, exports, dataset, fixhistory, vardir) + if missing_imports: + print "Run sat_import with the --fixhistory flag to reset the import history to this export" + answer = helpers.query_yes_no("Continue with import?", "no") + if not answer: + msg = "Import Aborted" + helpers.log_msg(msg, 'ERROR') + sys.exit(1) + else: + msg = "Import continued by user" + helpers.log_msg(msg, 'INFO') + + # Trigger a sync of the content into the Library if args.nosync: #print helpers.GREEN + "Import complete.\n" + helpers.ENDC