From e76a94894841c6ce2b306aea8c6a9a7e17c7e9d2 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 25 May 2020 13:06:40 +0200 Subject: [PATCH 01/11] De-duplicate builds When a build is triggered, it could be marked as to be skipped by builders if: * there is already a build running/queued for the same commit Multiple builds of the same commit will lead to the same results. So, we skip it if there is one already running/queued. * there is already a build queued for the same version When building a version without specifying the commit, the last commit is built. In this case, we can only skip the new triggered build if there is one build queued because both builds will just pick the same commit. --- readthedocs/core/utils/__init__.py | 38 +++++++++++++++++++++++++-- readthedocs/doc_builder/exceptions.py | 4 +++ readthedocs/projects/tasks.py | 11 ++++++++ 3 files changed, 51 insertions(+), 2 deletions(-) diff --git a/readthedocs/core/utils/__init__.py b/readthedocs/core/utils/__init__.py index c257470406b..4267f975469 100644 --- a/readthedocs/core/utils/__init__.py +++ b/readthedocs/core/utils/__init__.py @@ -19,7 +19,7 @@ ) from readthedocs.doc_builder.constants import DOCKER_LIMITS from readthedocs.projects.constants import CELERY_LOW, CELERY_MEDIUM, CELERY_HIGH -from readthedocs.doc_builder.exceptions import BuildMaxConcurrencyError +from readthedocs.doc_builder.exceptions import BuildMaxConcurrencyError, DuplicatedBuildError log = logging.getLogger(__name__) @@ -165,8 +165,42 @@ def prepare_build( # External builds should be lower priority. options['priority'] = CELERY_LOW + skip_build = False + if commit: + skip_build = ( + Build.objects + .filter( + project=project, + version=version, + commit=commit, + ).exclude( + state=BUILD_STATE_FINISHED, + ).exists() + ) + else: + skip_build = Build.objects.filter( + project=project, + version=version, + state=BUILD_STATE_TRIGGERED, + ).count() > 1 + if skip_build: + # TODO: we could mark the old build as duplicated, however we reset our + # position in the queue and go back to the end of it --penalization + log.warning( + 'Marking build to be skipped by builder. project=%s version=%s build=%s commit=%s', + project.slug, + version.slug, + build.pk, + commit, + ) + build.error = DuplicatedBuildError.message + build.success = False + build.exit_code = 1 + build.state = BUILD_STATE_FINISHED + build.save() + # Start the build in X minutes and mark it as limited - if project.has_feature(Feature.LIMIT_CONCURRENT_BUILDS): + if not skip_build and project.has_feature(Feature.LIMIT_CONCURRENT_BUILDS): running_builds = ( Build.objects .filter(project__slug=project.slug) diff --git a/readthedocs/doc_builder/exceptions.py b/readthedocs/doc_builder/exceptions.py index d7a511430a4..32239828434 100644 --- a/readthedocs/doc_builder/exceptions.py +++ b/readthedocs/doc_builder/exceptions.py @@ -57,6 +57,10 @@ class BuildMaxConcurrencyError(BuildEnvironmentError): message = ugettext_noop('Concurrency limit reached ({limit}), retrying in 5 minutes.') +class DuplicatedBuildError(BuildEnvironmentError): + message = ugettext_noop('Duplicated build.') + + class BuildEnvironmentWarning(BuildEnvironmentException): pass diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 79df64cfb6d..2607f76ebc1 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -58,6 +58,7 @@ BuildEnvironmentWarning, BuildMaxConcurrencyError, BuildTimeoutError, + DuplicatedBuildError, MkDocsYAMLParseError, ProjectBuildsSkippedError, VersionLockedError, @@ -542,6 +543,16 @@ def run( self.commit = commit self.config = None + if self.build['state'] == BUILD_STATE_FINISHED and self.build['error'] == DuplicatedBuildError.message: + log.warning( + 'NOOP: build is marked as duplicated. project=%s version=%s build=%s commit=%s', + self.project.slug, + self.version.slug, + build_pk, + self.commit, + ) + return True + if self.project.has_feature(Feature.LIMIT_CONCURRENT_BUILDS): response = api_v2.build.running.get(project__slug=self.project.slug) builds_running = response.get('count', 0) From 95350aac852850747222c721afd1b38d403b6808 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 27 May 2020 11:46:06 +0200 Subject: [PATCH 02/11] Lint --- readthedocs/projects/tasks.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 2607f76ebc1..07ab8c7e667 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -543,7 +543,10 @@ def run( self.commit = commit self.config = None - if self.build['state'] == BUILD_STATE_FINISHED and self.build['error'] == DuplicatedBuildError.message: + if all([ + self.build['state'] == BUILD_STATE_FINISHED, + self.build['error'] == DuplicatedBuildError.message, + ]): log.warning( 'NOOP: build is marked as duplicated. project=%s version=%s build=%s commit=%s', self.project.slug, From 030b0a5b3a7b3d46d9ff45e1d93a30bb32b95e9f Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 3 Jun 2020 17:19:06 +0200 Subject: [PATCH 03/11] Add Build.status_code to show different user's messages --- .../builds/migrations/0023_add_status_code.py | 18 ++++++++++++++++++ readthedocs/builds/models.py | 1 + readthedocs/core/utils/__init__.py | 3 ++- readthedocs/doc_builder/exceptions.py | 2 ++ 4 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 readthedocs/builds/migrations/0023_add_status_code.py diff --git a/readthedocs/builds/migrations/0023_add_status_code.py b/readthedocs/builds/migrations/0023_add_status_code.py new file mode 100644 index 00000000000..5eb94244dd7 --- /dev/null +++ b/readthedocs/builds/migrations/0023_add_status_code.py @@ -0,0 +1,18 @@ +# Generated by Django 2.2.12 on 2020-06-03 15:17 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ('builds', '0022_migrate_protected_versions'), + ] + + operations = [ + migrations.AddField( + model_name='build', + name='status_code', + field=models.BooleanField(blank=True, default=None, null=True, verbose_name='Status code'), + ), + ] diff --git a/readthedocs/builds/models.py b/readthedocs/builds/models.py index 99ae7824de9..53466aa3e7c 100644 --- a/readthedocs/builds/models.py +++ b/readthedocs/builds/models.py @@ -615,6 +615,7 @@ class Build(models.Model): choices=BUILD_STATE, default='finished', ) + status_code = models.BooleanField(_('Status code'), null=True, default=None, blank=True) date = models.DateTimeField(_('Date'), auto_now_add=True) success = models.BooleanField(_('Success'), default=True) diff --git a/readthedocs/core/utils/__init__.py b/readthedocs/core/utils/__init__.py index 4267f975469..59359b0a52d 100644 --- a/readthedocs/core/utils/__init__.py +++ b/readthedocs/core/utils/__init__.py @@ -194,8 +194,9 @@ def prepare_build( commit, ) build.error = DuplicatedBuildError.message + build.status_code = DuplicatedBuildError.status_code + build.exit_code = DuplicatedBuildError.exit_code build.success = False - build.exit_code = 1 build.state = BUILD_STATE_FINISHED build.save() diff --git a/readthedocs/doc_builder/exceptions.py b/readthedocs/doc_builder/exceptions.py index 32239828434..4abf606f584 100644 --- a/readthedocs/doc_builder/exceptions.py +++ b/readthedocs/doc_builder/exceptions.py @@ -59,6 +59,8 @@ class BuildMaxConcurrencyError(BuildEnvironmentError): class DuplicatedBuildError(BuildEnvironmentError): message = ugettext_noop('Duplicated build.') + exit_code = 1 + status_code = 0 class BuildEnvironmentWarning(BuildEnvironmentException): From 9fcf10dd3cb04eeb11b2a0f8354ec3fe2d96d4e4 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 3 Jun 2020 17:19:41 +0200 Subject: [PATCH 04/11] Exclude the build triggered itself from the query --- readthedocs/core/utils/__init__.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/readthedocs/core/utils/__init__.py b/readthedocs/core/utils/__init__.py index 59359b0a52d..e30e0c539be 100644 --- a/readthedocs/core/utils/__init__.py +++ b/readthedocs/core/utils/__init__.py @@ -175,6 +175,8 @@ def prepare_build( commit=commit, ).exclude( state=BUILD_STATE_FINISHED, + ).exclude( + pk=build.pk, ).exists() ) else: From 141bbf591685af27920a23fdb48ea2a0a85da2ef Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 3 Jun 2020 17:19:56 +0200 Subject: [PATCH 05/11] Add test cases for de-duplicate builds --- readthedocs/rtd_tests/tests/test_builds.py | 105 +++++++++++++++++++++ 1 file changed, 105 insertions(+) diff --git a/readthedocs/rtd_tests/tests/test_builds.py b/readthedocs/rtd_tests/tests/test_builds.py index b3280d17290..928a602bb25 100644 --- a/readthedocs/rtd_tests/tests/test_builds.py +++ b/readthedocs/rtd_tests/tests/test_builds.py @@ -18,8 +18,10 @@ GENERIC_EXTERNAL_VERSION_NAME ) from readthedocs.builds.models import Build, Version +from readthedocs.core.utils import trigger_build from readthedocs.doc_builder.config import load_yaml_config from readthedocs.doc_builder.environments import LocalBuildEnvironment +from readthedocs.doc_builder.exceptions import DuplicatedBuildError from readthedocs.doc_builder.python_environments import Virtualenv from readthedocs.oauth.models import RemoteRepository from readthedocs.projects.models import EnvironmentVariable, Project @@ -747,3 +749,106 @@ def test_get_commit_url_internal_version(self): sha=build.commit ) self.assertEqual(build.get_commit_url(), expected_url) + + + +@mock.patch('readthedocs.projects.tasks.update_docs_task') +class DeDuplicateBuildTests(TestCase): + + def setUp(self): + self.project = get(Project) + self.version = get( + Version, + project=self.project + ) + + def test_trigger_duplicated_build_by_commit(self, update_docs_task): + """ + Trigger a build for the same commit twice. + + The second build should be marked as NOOP. + """ + self.assertEqual(Build.objects.count(), 0) + trigger_build(self.project, self.version, commit='a1b2c3') + self.assertEqual(Build.objects.count(), 1) + build = Build.objects.first() + self.assertEqual(build.state, 'triggered') + + trigger_build(self.project, self.version, commit='a1b2c3') + self.assertEqual(Build.objects.count(), 2) + build = Build.objects.first() + self.assertEqual(build.error, DuplicatedBuildError.message) + self.assertEqual(build.success, False) + self.assertEqual(build.exit_code, DuplicatedBuildError.exit_code) + self.assertEqual(build.status_code, DuplicatedBuildError.status_code) + self.assertEqual(build.state, 'finished') + + def test_trigger_duplicated_finshed_build_by_commit(self, update_docs_task): + """ + Trigger a build for the same commit twice. + + The second build should not be marked as NOOP if the previous + duplicated builds are in 'finished' state. + """ + self.assertEqual(Build.objects.count(), 0) + trigger_build(self.project, self.version, commit='a1b2c3') + self.assertEqual(Build.objects.count(), 1) + + # Mark the build as finished + build = Build.objects.first() + build.state = 'finished' + build.save() + build.refresh_from_db() + + trigger_build(self.project, self.version, commit='a1b2c3') + self.assertEqual(Build.objects.count(), 2) + build = Build.objects.first() + self.assertEqual(build.state, 'triggered') + self.assertIsNone(build.status_code) + + def test_trigger_duplicated_build_by_version(self, update_docs_task): + """ + Trigger a build for the same version. + + The second build should be marked as NOOP if there is already a build + for the same project and version on 'triggered' state. + """ + self.assertEqual(Build.objects.count(), 0) + trigger_build(self.project, self.version, commit=None) + self.assertEqual(Build.objects.count(), 1) + build = Build.objects.first() + self.assertEqual(build.state, 'triggered') + + trigger_build(self.project, self.version, commit=None) + self.assertEqual(Build.objects.count(), 2) + build = Build.objects.first() + self.assertEqual(build.error, DuplicatedBuildError.message) + self.assertEqual(build.success, False) + self.assertEqual(build.exit_code, DuplicatedBuildError.exit_code) + self.assertEqual(build.status_code, DuplicatedBuildError.status_code) + self.assertEqual(build.state, 'finished') + + def test_trigger_duplicated_non_triggered_build_by_version(self, update_docs_task): + """ + Trigger a build for the same version. + + The second build should not be marked as NOOP because the previous build + for the same project and version is on 'building' state (any non 'triggered') + """ + self.assertEqual(Build.objects.count(), 0) + trigger_build(self.project, self.version, commit=None) + self.assertEqual(Build.objects.count(), 1) + build = Build.objects.first() + self.assertEqual(build.state, 'triggered') + + # Mark the build as building + build = Build.objects.first() + build.state = 'building' + build.save() + build.refresh_from_db() + + trigger_build(self.project, self.version, commit=None) + self.assertEqual(Build.objects.count(), 2) + build = Build.objects.first() + self.assertEqual(build.state, 'triggered') + self.assertIsNone(build.status_code) From 6ce7617993c6447828b244ec2ff142d9917a0403 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Wed, 3 Jun 2020 17:24:14 +0200 Subject: [PATCH 06/11] Check Build.status_code from the task --- readthedocs/projects/tasks.py | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 07ab8c7e667..61fb08c0da4 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -543,10 +543,7 @@ def run( self.commit = commit self.config = None - if all([ - self.build['state'] == BUILD_STATE_FINISHED, - self.build['error'] == DuplicatedBuildError.message, - ]): + if self.build['status_code'] == DuplicatedBuildError.status_code: log.warning( 'NOOP: build is marked as duplicated. project=%s version=%s build=%s commit=%s', self.project.slug, From 2802da96897684c048ad313fd573429089d10943 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 8 Jun 2020 12:55:37 +0200 Subject: [PATCH 07/11] Make StatusCode an Enum --- readthedocs/builds/constants.py | 6 ++++++ readthedocs/doc_builder/exceptions.py | 3 ++- 2 files changed, 8 insertions(+), 1 deletion(-) diff --git a/readthedocs/builds/constants.py b/readthedocs/builds/constants.py index 343ef2bff05..759b64ba731 100644 --- a/readthedocs/builds/constants.py +++ b/readthedocs/builds/constants.py @@ -1,5 +1,7 @@ """Constants for the builds app.""" +from enum import Enum + from django.conf import settings from django.utils.translation import ugettext_lazy as _ @@ -126,3 +128,7 @@ ALL_VERSIONS: ALL_VERSIONS_REGEX, SEMVER_VERSIONS: SEMVER_VERSIONS_REGEX, } + + +class StatusCode(Enum): + DUPLICATED_BUILD = 0 diff --git a/readthedocs/doc_builder/exceptions.py b/readthedocs/doc_builder/exceptions.py index 4abf606f584..0c2f11831b5 100644 --- a/readthedocs/doc_builder/exceptions.py +++ b/readthedocs/doc_builder/exceptions.py @@ -3,6 +3,7 @@ """Exceptions raised when building documentation.""" from django.utils.translation import ugettext_noop +from readthedocs.builds.constants import StatusCode class BuildEnvironmentException(Exception): @@ -60,7 +61,7 @@ class BuildMaxConcurrencyError(BuildEnvironmentError): class DuplicatedBuildError(BuildEnvironmentError): message = ugettext_noop('Duplicated build.') exit_code = 1 - status_code = 0 + status_code = StatusCode.DUPLICATED_BUILD class BuildEnvironmentWarning(BuildEnvironmentException): From e9767f1041e414c6d3564ad004bbb3f4c130549a Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 8 Jun 2020 13:05:08 +0200 Subject: [PATCH 08/11] Be more permissive when checking `Build.status_code` --- readthedocs/projects/tasks.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/readthedocs/projects/tasks.py b/readthedocs/projects/tasks.py index 61fb08c0da4..a9b5f5d8907 100644 --- a/readthedocs/projects/tasks.py +++ b/readthedocs/projects/tasks.py @@ -543,7 +543,7 @@ def run( self.commit = commit self.config = None - if self.build['status_code'] == DuplicatedBuildError.status_code: + if self.build.get('status_code') == DuplicatedBuildError.status_code: log.warning( 'NOOP: build is marked as duplicated. project=%s version=%s build=%s commit=%s', self.project.slug, From 30dde399027278bc4ed5c183701d9dd7eba4a016 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 8 Jun 2020 13:10:42 +0200 Subject: [PATCH 09/11] De-duplicate builds under a Feature flag --- readthedocs/core/utils/__init__.py | 5 +++++ readthedocs/projects/models.py | 5 +++++ 2 files changed, 10 insertions(+) diff --git a/readthedocs/core/utils/__init__.py b/readthedocs/core/utils/__init__.py index e30e0c539be..95207b1f2c1 100644 --- a/readthedocs/core/utils/__init__.py +++ b/readthedocs/core/utils/__init__.py @@ -185,6 +185,11 @@ def prepare_build( version=version, state=BUILD_STATE_TRIGGERED, ).count() > 1 + + if not project.has_feature(Feature.DEDUPLICATE_BUILDS): + log.debug('Skipping deduplication of builds. Feature not enabled. project=%s', project.slug) + skip_build = False + if skip_build: # TODO: we could mark the old build as duplicated, however we reset our # position in the queue and go back to the end of it --penalization diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 40af8e6fc44..088e577398d 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -1469,6 +1469,7 @@ def add_features(sender, **kwargs): LIST_PACKAGES_INSTALLED_ENV = 'list_packages_installed_env' VCS_REMOTE_LISTING = 'vcs_remote_listing' STORE_PAGEVIEWS = 'store_pageviews' + DEDUPLICATE_BUILDS = 'deduplicate_builds' FEATURES = ( (USE_SPHINX_LATEST, _('Use latest version of Sphinx')), @@ -1562,6 +1563,10 @@ def add_features(sender, **kwargs): STORE_PAGEVIEWS, _('Store pageviews for this project'), ), + ( + DEDUPLICATE_BUILDS, + _('Mark duplicated builds as NOOP to be skipped by builders'), + ), ) projects = models.ManyToManyField( From 954e004c3c82d15d5a03258d60601dbbd21d42b4 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 8 Jun 2020 14:07:05 +0200 Subject: [PATCH 10/11] Enable feature on tests --- readthedocs/rtd_tests/tests/test_builds.py | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/readthedocs/rtd_tests/tests/test_builds.py b/readthedocs/rtd_tests/tests/test_builds.py index 328b06160c0..48895ebc391 100644 --- a/readthedocs/rtd_tests/tests/test_builds.py +++ b/readthedocs/rtd_tests/tests/test_builds.py @@ -24,7 +24,7 @@ from readthedocs.doc_builder.exceptions import DuplicatedBuildError from readthedocs.doc_builder.python_environments import Virtualenv from readthedocs.oauth.models import RemoteRepository -from readthedocs.projects.models import EnvironmentVariable, Project +from readthedocs.projects.models import EnvironmentVariable, Feature, Project from readthedocs.projects.tasks import UpdateDocsTaskStep from readthedocs.rtd_tests.tests.test_config_integration import create_load @@ -763,6 +763,12 @@ def setUp(self): project=self.project ) + get( + Feature, + feature_id=Feature.DEDUPLICATE_BUILDS, + projects=[self.project], + ) + def test_trigger_duplicated_build_by_commit(self, update_docs_task): """ Trigger a build for the same commit twice. From 190a62812bc02a39a5d3d614a0b4822657aa6702 Mon Sep 17 00:00:00 2001 From: Manuel Kaufmann Date: Mon, 8 Jun 2020 14:07:18 +0200 Subject: [PATCH 11/11] Use a simple constant instead of an Enum Django does not convert Enum automatically to its value :/ --- readthedocs/builds/constants.py | 6 +----- readthedocs/doc_builder/exceptions.py | 4 ++-- 2 files changed, 3 insertions(+), 7 deletions(-) diff --git a/readthedocs/builds/constants.py b/readthedocs/builds/constants.py index 759b64ba731..c8d44ad4e41 100644 --- a/readthedocs/builds/constants.py +++ b/readthedocs/builds/constants.py @@ -1,7 +1,5 @@ """Constants for the builds app.""" -from enum import Enum - from django.conf import settings from django.utils.translation import ugettext_lazy as _ @@ -129,6 +127,4 @@ SEMVER_VERSIONS: SEMVER_VERSIONS_REGEX, } - -class StatusCode(Enum): - DUPLICATED_BUILD = 0 +BUILD_STATUS_CODE_DUPLICATED_BUILD = 0 diff --git a/readthedocs/doc_builder/exceptions.py b/readthedocs/doc_builder/exceptions.py index 0c2f11831b5..68dd5e10476 100644 --- a/readthedocs/doc_builder/exceptions.py +++ b/readthedocs/doc_builder/exceptions.py @@ -3,7 +3,7 @@ """Exceptions raised when building documentation.""" from django.utils.translation import ugettext_noop -from readthedocs.builds.constants import StatusCode +from readthedocs.builds.constants import BUILD_STATUS_CODE_DUPLICATED_BUILD class BuildEnvironmentException(Exception): @@ -61,7 +61,7 @@ class BuildMaxConcurrencyError(BuildEnvironmentError): class DuplicatedBuildError(BuildEnvironmentError): message = ugettext_noop('Duplicated build.') exit_code = 1 - status_code = StatusCode.DUPLICATED_BUILD + status_code = BUILD_STATUS_CODE_DUPLICATED_BUILD class BuildEnvironmentWarning(BuildEnvironmentException):