Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

De-duplicate builds #7123

Merged
merged 12 commits into from
Jun 8, 2020
2 changes: 2 additions & 0 deletions readthedocs/builds/constants.py
Original file line number Diff line number Diff line change
Expand Up @@ -126,3 +126,5 @@
ALL_VERSIONS: ALL_VERSIONS_REGEX,
SEMVER_VERSIONS: SEMVER_VERSIONS_REGEX,
}

BUILD_STATUS_CODE_DUPLICATED_BUILD = 0
18 changes: 18 additions & 0 deletions readthedocs/builds/migrations/0023_add_status_code.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
# Generated by Django 2.2.12 on 2020-06-03 15:17

from django.db import migrations, models


class Migration(migrations.Migration):

dependencies = [
('builds', '0022_migrate_protected_versions'),
]

operations = [
migrations.AddField(
model_name='build',
name='status_code',
field=models.BooleanField(blank=True, default=None, null=True, verbose_name='Status code'),
),
]
1 change: 1 addition & 0 deletions readthedocs/builds/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -615,6 +615,7 @@ class Build(models.Model):
choices=BUILD_STATE,
default='finished',
)
status_code = models.BooleanField(_('Status code'), null=True, default=None, blank=True)
date = models.DateTimeField(_('Date'), auto_now_add=True)
success = models.BooleanField(_('Success'), default=True)

Expand Down
46 changes: 44 additions & 2 deletions readthedocs/core/utils/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@
)
from readthedocs.doc_builder.constants import DOCKER_LIMITS
from readthedocs.projects.constants import CELERY_LOW, CELERY_MEDIUM, CELERY_HIGH
from readthedocs.doc_builder.exceptions import BuildMaxConcurrencyError
from readthedocs.doc_builder.exceptions import BuildMaxConcurrencyError, DuplicatedBuildError


log = logging.getLogger(__name__)
Expand Down Expand Up @@ -165,8 +165,50 @@ def prepare_build(
# External builds should be lower priority.
options['priority'] = CELERY_LOW

skip_build = False
if commit:
skip_build = (
Build.objects
.filter(
project=project,
version=version,
commit=commit,
).exclude(
state=BUILD_STATE_FINISHED,
).exclude(
pk=build.pk,
).exists()
)
else:
skip_build = Build.objects.filter(
project=project,
version=version,
state=BUILD_STATE_TRIGGERED,
).count() > 1

if not project.has_feature(Feature.DEDUPLICATE_BUILDS):
log.debug('Skipping deduplication of builds. Feature not enabled. project=%s', project.slug)
skip_build = False

if skip_build:
# TODO: we could mark the old build as duplicated, however we reset our
# position in the queue and go back to the end of it --penalization
humitos marked this conversation as resolved.
Show resolved Hide resolved
log.warning(
'Marking build to be skipped by builder. project=%s version=%s build=%s commit=%s',
project.slug,
version.slug,
build.pk,
commit,
)
build.error = DuplicatedBuildError.message
build.status_code = DuplicatedBuildError.status_code
build.exit_code = DuplicatedBuildError.exit_code
build.success = False
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I wonder if we should make this nullable and set it to None here? It didn't succeed or fail, so it kinda makes sense.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

It makes sense to me. However, I don't think we can change it now without changing the UI (knockout code) that renders in the frontend; which I'm not sure if we want to touch at this point.

Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yea, I just worry this will break the badge display.

build.state = BUILD_STATE_FINISHED
build.save()

# Start the build in X minutes and mark it as limited
if project.has_feature(Feature.LIMIT_CONCURRENT_BUILDS):
if not skip_build and project.has_feature(Feature.LIMIT_CONCURRENT_BUILDS):
running_builds = (
Build.objects
.filter(project__slug=project.slug)
Expand Down
7 changes: 7 additions & 0 deletions readthedocs/doc_builder/exceptions.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
"""Exceptions raised when building documentation."""

from django.utils.translation import ugettext_noop
from readthedocs.builds.constants import BUILD_STATUS_CODE_DUPLICATED_BUILD


class BuildEnvironmentException(Exception):
Expand Down Expand Up @@ -57,6 +58,12 @@ class BuildMaxConcurrencyError(BuildEnvironmentError):
message = ugettext_noop('Concurrency limit reached ({limit}), retrying in 5 minutes.')


class DuplicatedBuildError(BuildEnvironmentError):
message = ugettext_noop('Duplicated build.')
exit_code = 1
status_code = BUILD_STATUS_CODE_DUPLICATED_BUILD


class BuildEnvironmentWarning(BuildEnvironmentException):
pass

Expand Down
7 changes: 6 additions & 1 deletion readthedocs/projects/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -1597,6 +1597,7 @@ def add_features(sender, **kwargs):
STORE_PAGEVIEWS = 'store_pageviews'
SPHINX_PARALLEL = 'sphinx_parallel'
USE_SPHINX_BUILDERS = 'use_sphinx_builders'
DEDUPLICATE_BUILDS = 'deduplicate_builds'

FEATURES = (
(USE_SPHINX_LATEST, _('Use latest version of Sphinx')),
Expand Down Expand Up @@ -1705,7 +1706,11 @@ def add_features(sender, **kwargs):
(
USE_SPHINX_BUILDERS,
_('Use regular sphinx builders instead of custom RTD builders'),
)
),
(
DEDUPLICATE_BUILDS,
_('Mark duplicated builds as NOOP to be skipped by builders'),
),
)

projects = models.ManyToManyField(
Expand Down
11 changes: 11 additions & 0 deletions readthedocs/projects/tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,7 @@
BuildEnvironmentWarning,
BuildMaxConcurrencyError,
BuildTimeoutError,
DuplicatedBuildError,
MkDocsYAMLParseError,
ProjectBuildsSkippedError,
VersionLockedError,
Expand Down Expand Up @@ -542,6 +543,16 @@ def run(
self.commit = commit
self.config = None

if self.build.get('status_code') == DuplicatedBuildError.status_code:
log.warning(
'NOOP: build is marked as duplicated. project=%s version=%s build=%s commit=%s',
self.project.slug,
self.version.slug,
build_pk,
self.commit,
)
return True

if self.project.has_feature(Feature.LIMIT_CONCURRENT_BUILDS):
response = api_v2.build.running.get(project__slug=self.project.slug)
builds_running = response.get('count', 0)
Expand Down
113 changes: 112 additions & 1 deletion readthedocs/rtd_tests/tests/test_builds.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,11 +18,13 @@
GENERIC_EXTERNAL_VERSION_NAME
)
from readthedocs.builds.models import Build, Version
from readthedocs.core.utils import trigger_build
from readthedocs.doc_builder.config import load_yaml_config
from readthedocs.doc_builder.environments import LocalBuildEnvironment
from readthedocs.doc_builder.exceptions import DuplicatedBuildError
from readthedocs.doc_builder.python_environments import Virtualenv
from readthedocs.oauth.models import RemoteRepository
from readthedocs.projects.models import EnvironmentVariable, Project
from readthedocs.projects.models import EnvironmentVariable, Feature, Project
from readthedocs.projects.tasks import UpdateDocsTaskStep
from readthedocs.rtd_tests.tests.test_config_integration import create_load

Expand Down Expand Up @@ -748,3 +750,112 @@ def test_get_commit_url_internal_version(self):
sha=build.commit
)
self.assertEqual(build.get_commit_url(), expected_url)



@mock.patch('readthedocs.projects.tasks.update_docs_task')
class DeDuplicateBuildTests(TestCase):

def setUp(self):
self.project = get(Project)
self.version = get(
Version,
project=self.project
)

get(
Feature,
feature_id=Feature.DEDUPLICATE_BUILDS,
projects=[self.project],
)

def test_trigger_duplicated_build_by_commit(self, update_docs_task):
"""
Trigger a build for the same commit twice.

The second build should be marked as NOOP.
"""
self.assertEqual(Build.objects.count(), 0)
trigger_build(self.project, self.version, commit='a1b2c3')
self.assertEqual(Build.objects.count(), 1)
build = Build.objects.first()
self.assertEqual(build.state, 'triggered')

trigger_build(self.project, self.version, commit='a1b2c3')
self.assertEqual(Build.objects.count(), 2)
build = Build.objects.first()
self.assertEqual(build.error, DuplicatedBuildError.message)
self.assertEqual(build.success, False)
self.assertEqual(build.exit_code, DuplicatedBuildError.exit_code)
self.assertEqual(build.status_code, DuplicatedBuildError.status_code)
self.assertEqual(build.state, 'finished')

def test_trigger_duplicated_finshed_build_by_commit(self, update_docs_task):
"""
Trigger a build for the same commit twice.

The second build should not be marked as NOOP if the previous
duplicated builds are in 'finished' state.
"""
self.assertEqual(Build.objects.count(), 0)
trigger_build(self.project, self.version, commit='a1b2c3')
self.assertEqual(Build.objects.count(), 1)

# Mark the build as finished
build = Build.objects.first()
build.state = 'finished'
build.save()
build.refresh_from_db()

trigger_build(self.project, self.version, commit='a1b2c3')
self.assertEqual(Build.objects.count(), 2)
build = Build.objects.first()
self.assertEqual(build.state, 'triggered')
self.assertIsNone(build.status_code)

def test_trigger_duplicated_build_by_version(self, update_docs_task):
"""
Trigger a build for the same version.

The second build should be marked as NOOP if there is already a build
for the same project and version on 'triggered' state.
"""
self.assertEqual(Build.objects.count(), 0)
trigger_build(self.project, self.version, commit=None)
self.assertEqual(Build.objects.count(), 1)
build = Build.objects.first()
self.assertEqual(build.state, 'triggered')

trigger_build(self.project, self.version, commit=None)
self.assertEqual(Build.objects.count(), 2)
build = Build.objects.first()
self.assertEqual(build.error, DuplicatedBuildError.message)
self.assertEqual(build.success, False)
self.assertEqual(build.exit_code, DuplicatedBuildError.exit_code)
self.assertEqual(build.status_code, DuplicatedBuildError.status_code)
self.assertEqual(build.state, 'finished')

def test_trigger_duplicated_non_triggered_build_by_version(self, update_docs_task):
"""
Trigger a build for the same version.

The second build should not be marked as NOOP because the previous build
for the same project and version is on 'building' state (any non 'triggered')
"""
self.assertEqual(Build.objects.count(), 0)
trigger_build(self.project, self.version, commit=None)
self.assertEqual(Build.objects.count(), 1)
build = Build.objects.first()
self.assertEqual(build.state, 'triggered')

# Mark the build as building
build = Build.objects.first()
build.state = 'building'
build.save()
build.refresh_from_db()

trigger_build(self.project, self.version, commit=None)
self.assertEqual(Build.objects.count(), 2)
build = Build.objects.first()
self.assertEqual(build.state, 'triggered')
self.assertIsNone(build.status_code)