Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

De-duplicate builds #7123

Merged
merged 12 commits into from
Jun 8, 2020
18 changes: 18 additions & 0 deletions readthedocs/builds/migrations/0023_add_status_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 2.2.12 on 2020-06-03 15:17

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('builds', '0022_migrate_protected_versions'),
]

operations = [
migrations.AddField(
model_name='build',
name='status_code',
field=models.BooleanField(blank=True, default=None, null=True, verbose_name='Status code'),
),
]
1 change: 1 addition & 0 deletions readthedocs/builds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,7 @@ class Build(models.Model):
choices=BUILD_STATE,
default='finished',
)
status_code = models.BooleanField(_('Status code'), null=True, default=None, blank=True)
date = models.DateTimeField(_('Date'), auto_now_add=True)
success = models.BooleanField(_('Success'), default=True)

Expand Down
41 changes: 39 additions & 2 deletions readthedocs/core/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
)
from readthedocs.doc_builder.constants import DOCKER_LIMITS
from readthedocs.projects.constants import CELERY_LOW, CELERY_MEDIUM, CELERY_HIGH
from readthedocs.doc_builder.exceptions import BuildMaxConcurrencyError
from readthedocs.doc_builder.exceptions import BuildMaxConcurrencyError, DuplicatedBuildError


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -165,8 +165,45 @@ def prepare_build(
# External builds should be lower priority.
options['priority'] = CELERY_LOW

skip_build = False
if commit:
skip_build = (
Build.objects
.filter(
project=project,
version=version,
commit=commit,
).exclude(
state=BUILD_STATE_FINISHED,
).exclude(
pk=build.pk,
).exists()
)
else:
skip_build = Build.objects.filter(
project=project,
version=version,
state=BUILD_STATE_TRIGGERED,
).count() > 1
if skip_build:
# TODO: we could mark the old build as duplicated, however we reset our
# position in the queue and go back to the end of it --penalization
humitos marked this conversation as resolved.
Show resolved Hide resolved
log.warning(
'Marking build to be skipped by builder. project=%s version=%s build=%s commit=%s',
project.slug,
version.slug,
build.pk,
commit,
)
build.error = DuplicatedBuildError.message
build.status_code = DuplicatedBuildError.status_code
build.exit_code = DuplicatedBuildError.exit_code
build.success = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should make this nullable and set it to None here? It didn't succeed or fail, so it kinda makes sense.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes sense to me. However, I don't think we can change it now without changing the UI (knockout code) that renders in the frontend; which I'm not sure if we want to touch at this point.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I just worry this will break the badge display.

build.state = BUILD_STATE_FINISHED
build.save()

# Start the build in X minutes and mark it as limited
if project.has_feature(Feature.LIMIT_CONCURRENT_BUILDS):
if not skip_build and project.has_feature(Feature.LIMIT_CONCURRENT_BUILDS):
running_builds = (
Build.objects
.filter(project__slug=project.slug)
Expand Down
6 changes: 6 additions & 0 deletions readthedocs/doc_builder/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -57,6 +57,12 @@ class BuildMaxConcurrencyError(BuildEnvironmentError):
message = ugettext_noop('Concurrency limit reached ({limit}), retrying in 5 minutes.')


class DuplicatedBuildError(BuildEnvironmentError):
message = ugettext_noop('Duplicated build.')
exit_code = 1
status_code = 0
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This should be a constant probably, and something other than 0?

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I made an Enum for this. I don't think it's a problem if it's 0. Why did you think it's better to have it different than 0? and what value?

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Because 0 to me means "normal" status -- we want a status that represents the issue of "duplicated build", which should be assigned a specific number.



class BuildEnvironmentWarning(BuildEnvironmentException):
pass

Expand Down
11 changes: 11 additions & 0 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
BuildEnvironmentWarning,
BuildMaxConcurrencyError,
BuildTimeoutError,
DuplicatedBuildError,
MkDocsYAMLParseError,
ProjectBuildsSkippedError,
VersionLockedError,
Expand Down Expand Up @@ -542,6 +543,16 @@ def run(
self.commit = commit
self.config = None

if self.build['status_code'] == DuplicatedBuildError.status_code:
log.warning(
'NOOP: build is marked as duplicated. project=%s version=%s build=%s commit=%s',
self.project.slug,
self.version.slug,
build_pk,
self.commit,
)
return True

if self.project.has_feature(Feature.LIMIT_CONCURRENT_BUILDS):
response = api_v2.build.running.get(project__slug=self.project.slug)
builds_running = response.get('count', 0)
Expand Down
105 changes: 105 additions & 0 deletions readthedocs/rtd_tests/tests/test_builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,10 @@
GENERIC_EXTERNAL_VERSION_NAME
)
from readthedocs.builds.models import Build, Version
from readthedocs.core.utils import trigger_build
from readthedocs.doc_builder.config import load_yaml_config
from readthedocs.doc_builder.environments import LocalBuildEnvironment
from readthedocs.doc_builder.exceptions import DuplicatedBuildError
from readthedocs.doc_builder.python_environments import Virtualenv
from readthedocs.oauth.models import RemoteRepository
from readthedocs.projects.models import EnvironmentVariable, Project
Expand Down Expand Up @@ -747,3 +749,106 @@ def test_get_commit_url_internal_version(self):
sha=build.commit
)
self.assertEqual(build.get_commit_url(), expected_url)



@mock.patch('readthedocs.projects.tasks.update_docs_task')
class DeDuplicateBuildTests(TestCase):

def setUp(self):
self.project = get(Project)
self.version = get(
Version,
project=self.project
)

def test_trigger_duplicated_build_by_commit(self, update_docs_task):
"""
Trigger a build for the same commit twice.

The second build should be marked as NOOP.
"""
self.assertEqual(Build.objects.count(), 0)
trigger_build(self.project, self.version, commit='a1b2c3')
self.assertEqual(Build.objects.count(), 1)
build = Build.objects.first()
self.assertEqual(build.state, 'triggered')

trigger_build(self.project, self.version, commit='a1b2c3')
self.assertEqual(Build.objects.count(), 2)
build = Build.objects.first()
self.assertEqual(build.error, DuplicatedBuildError.message)
self.assertEqual(build.success, False)
self.assertEqual(build.exit_code, DuplicatedBuildError.exit_code)
self.assertEqual(build.status_code, DuplicatedBuildError.status_code)
self.assertEqual(build.state, 'finished')

def test_trigger_duplicated_finshed_build_by_commit(self, update_docs_task):
"""
Trigger a build for the same commit twice.

The second build should not be marked as NOOP if the previous
duplicated builds are in 'finished' state.
"""
self.assertEqual(Build.objects.count(), 0)
trigger_build(self.project, self.version, commit='a1b2c3')
self.assertEqual(Build.objects.count(), 1)

# Mark the build as finished
build = Build.objects.first()
build.state = 'finished'
build.save()
build.refresh_from_db()

trigger_build(self.project, self.version, commit='a1b2c3')
self.assertEqual(Build.objects.count(), 2)
build = Build.objects.first()
self.assertEqual(build.state, 'triggered')
self.assertIsNone(build.status_code)

def test_trigger_duplicated_build_by_version(self, update_docs_task):
"""
Trigger a build for the same version.

The second build should be marked as NOOP if there is already a build
for the same project and version on 'triggered' state.
"""
self.assertEqual(Build.objects.count(), 0)
trigger_build(self.project, self.version, commit=None)
self.assertEqual(Build.objects.count(), 1)
build = Build.objects.first()
self.assertEqual(build.state, 'triggered')

trigger_build(self.project, self.version, commit=None)
self.assertEqual(Build.objects.count(), 2)
build = Build.objects.first()
self.assertEqual(build.error, DuplicatedBuildError.message)
self.assertEqual(build.success, False)
self.assertEqual(build.exit_code, DuplicatedBuildError.exit_code)
self.assertEqual(build.status_code, DuplicatedBuildError.status_code)
self.assertEqual(build.state, 'finished')

def test_trigger_duplicated_non_triggered_build_by_version(self, update_docs_task):
"""
Trigger a build for the same version.

The second build should not be marked as NOOP because the previous build
for the same project and version is on 'building' state (any non 'triggered')
"""
self.assertEqual(Build.objects.count(), 0)
trigger_build(self.project, self.version, commit=None)
self.assertEqual(Build.objects.count(), 1)
build = Build.objects.first()
self.assertEqual(build.state, 'triggered')

# Mark the build as building
build = Build.objects.first()
build.state = 'building'
build.save()
build.refresh_from_db()

trigger_build(self.project, self.version, commit=None)
self.assertEqual(Build.objects.count(), 2)
build = Build.objects.first()
self.assertEqual(build.state, 'triggered')
self.assertIsNone(build.status_code)