diff --git a/docs/user/user-defined-redirects.rst b/docs/user/user-defined-redirects.rst index 1930b4a71ee..8cc6648bcca 100644 --- a/docs/user/user-defined-redirects.rst +++ b/docs/user/user-defined-redirects.rst @@ -3,28 +3,30 @@ User-defined Redirects You can set up redirects for a project in your project dashboard's :guilabel:`Redirects` page. +.. contents:: Table of contents + :local: + Quick Summary ------------- -* Log into your readthedocs.org account. -* From your dashboard, select the project on which you wish to add redirects. -* From the project's top navigation bar, select the :guilabel:`Admin` tab. +* Go to the :guilabel:`Admin` tab of your project. * From the left navigation menu, select :guilabel:`Redirects`. -* In the form box "Redirect Type" select the type of redirect you want. See below for detail. -* Depending on the redirect type you select, enter FROM and/or TO URL as needed. +* In the form box "Redirect Type" select the type of redirect you want. + :ref:`See below ` for detail. +* Depending on the redirect type you select, enter ``From URL`` and/or ``To URL`` as needed. * When finished, click the :guilabel:`Add` button. Your redirects will be effective immediately. -Limitations -~~~~~~~~~~~ - -Redirects are only implemented in case of a *404 File Not Found* error. -If you need to redirect a large number of files that still exist, -please reach out to :doc:`/support`. +Features +-------- -Page & Exact Redirects can redirect to URLs outside Read the Docs. -Define the `To URL` as the absolute URL you want to redirect to. +- By default, redirects are followed only if the requested page doesn't exist + (*404 File Not Found* error), if you need to apply a redirect for files that exist, + mark the :guilabel:`Force redirect` option. +- :ref:`user-defined-redirects:page redirects` and :ref:`user-defined-redirects:exact redirects` + can redirect to URLs outside Read the Docs, + just include the protocol in ``To URL``, e.g ``https://example.com``. Redirect Types -------------- @@ -82,7 +84,7 @@ You would set the following configuration:: Because of this, the ``/`` at the start of the ``From URL`` doesn't include the ``/$lang/$version`` prefix (e.g. ``/en/latest``), but just the version-specific part of the URL. -If you want to set directs only for some languages or some versions, you should use +If you want to set redirects only for some languages or some versions, you should use :ref:`user-defined-redirects:exact redirects` with the fully-specified path. Exact Redirects @@ -128,6 +130,17 @@ Similarly, if you maintain several branches of your documentation (e.g. ``3.0`` ``latest``) and decide to move pages in ``latest`` but not the older branches, you can use *Exact Redirects* to do so. +You can use an exact redirect to migrate your documentation to another domain, +for example:: + + Type: Exact Redirect + From URL: /$rest + To URL: https://newdocs.example.com/ + Force Redirect: True + +Then all pages will redirect to the new domain, for example +``https://docs.example.com/en/latest/install.html`` will redirect to +``https://newdocs.example.com/en/latest/install.html``. Sphinx Redirects ~~~~~~~~~~~~~~~~ diff --git a/readthedocs/projects/forms.py b/readthedocs/projects/forms.py index a2d2bd087f9..c0d06053370 100644 --- a/readthedocs/projects/forms.py +++ b/readthedocs/projects/forms.py @@ -585,21 +585,30 @@ class RedirectForm(forms.ModelForm): class Meta: model = Redirect - fields = ['redirect_type', 'from_url', 'to_url'] + fields = ["redirect_type", "from_url", "to_url", "force"] def __init__(self, *args, **kwargs): self.project = kwargs.pop('project', None) super().__init__(*args, **kwargs) + if self.project.has_feature(Feature.ALLOW_FORCED_REDIRECTS): + # Remove the nullable option from the form. + # TODO: remove after migration. + self.fields["force"].widget = forms.CheckboxInput() + self.fields["force"].empty_value = False + else: + self.fields.pop("force") + def save(self, **_): # pylint: disable=arguments-differ # TODO this should respect the unused argument `commit`. It's not clear # why this needs to be a call to `create`, instead of relying on the # super `save()` call. redirect = Redirect.objects.create( project=self.project, - redirect_type=self.cleaned_data['redirect_type'], - from_url=self.cleaned_data['from_url'], - to_url=self.cleaned_data['to_url'], + redirect_type=self.cleaned_data["redirect_type"], + from_url=self.cleaned_data["from_url"], + to_url=self.cleaned_data["to_url"], + force=self.cleaned_data.get("force", False), ) return redirect diff --git a/readthedocs/projects/models.py b/readthedocs/projects/models.py index 67bee25656e..01e54d3c6b1 100644 --- a/readthedocs/projects/models.py +++ b/readthedocs/projects/models.py @@ -1770,6 +1770,7 @@ def add_features(sender, **kwargs): CDN_ENABLED = "cdn_enabled" DOCKER_GVISOR_RUNTIME = "gvisor_runtime" RECORD_404_PAGE_VIEWS = "record_404_page_views" + ALLOW_FORCED_REDIRECTS = "allow_forced_redirects" # Versions sync related features SKIP_SYNC_TAGS = 'skip_sync_tags' @@ -1861,6 +1862,10 @@ def add_features(sender, **kwargs): RECORD_404_PAGE_VIEWS, _("Record 404s page views."), ), + ( + ALLOW_FORCED_REDIRECTS, + _("Allow forced redirects."), + ), # Versions sync related features ( diff --git a/readthedocs/proxito/tests/test_full.py b/readthedocs/proxito/tests/test_full.py index ad14294ba90..65b8d3d5bf6 100644 --- a/readthedocs/proxito/tests/test_full.py +++ b/readthedocs/proxito/tests/test_full.py @@ -26,6 +26,7 @@ ) from readthedocs.projects.models import Domain, Feature, Project from readthedocs.proxito.views.mixins import ServeDocsMixin +from readthedocs.redirects.models import Redirect from readthedocs.rtd_tests.storage import BuildMediaFileSystemStorageTest from readthedocs.subscriptions.models import Plan, PlanFeature, Subscription @@ -1223,6 +1224,23 @@ def _test_cache_control_header_project(self, expected_value, host=None): self.assertEqual(resp.headers['CDN-Cache-Control'], 'private', url) self.assertNotIn('Cache-Tag', resp.headers, url) + # Forced redirects will be cached only if the version is public. + get( + Redirect, + project=self.project, + redirect_type="exact", + from_url="/en/latest/install.html", + to_url="/en/latest/tutorial/install.html", + force=True, + ) + url = "/en/latest/install.html" + resp = self.client.get(url, secure=True, HTTP_HOST=host) + self.assertEqual( + resp["Location"], f"https://{host}/en/latest/tutorial/install.html", url + ) + self.assertEqual(resp.headers["CDN-Cache-Control"], expected_value, url) + self.assertEqual(resp.headers["Cache-Tag"], "project,project:latest", url) + def _test_cache_control_header_subproject(self, expected_value, host=None): """ Test the CDN-Cache-Control header on requests for `self.subproject`. diff --git a/readthedocs/proxito/tests/test_old_redirects.py b/readthedocs/proxito/tests/test_old_redirects.py index 4f6b7e5e59a..8f86f6ef05d 100644 --- a/readthedocs/proxito/tests/test_old_redirects.py +++ b/readthedocs/proxito/tests/test_old_redirects.py @@ -164,6 +164,24 @@ def test_root_redirect_with_query_params(self): ROOT_URLCONF='readthedocs.proxito.tests.handler_404_urls', ) class UserRedirectTests(MockStorageMixin, BaseDocServing): + def test_forced_redirect(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="exact", + from_url="/en/latest/install.html", + to_url="/en/latest/tutorial/install.html", + force=True, + ) + r = self.client.get( + "/en/latest/install.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 302) + self.assertEqual( + r["Location"], + "http://project.dev.readthedocs.io/en/latest/tutorial/install.html", + ) def test_redirect_prefix_infinite(self): """ @@ -533,6 +551,267 @@ def test_not_found_page_without_trailing_slash(self): ) +@override_settings(PUBLIC_DOMAIN="dev.readthedocs.io") +class UserForcedRedirectTests(BaseDocServing): + def test_no_forced_redirect(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="exact", + from_url="/en/latest/install.html", + to_url="/en/latest/tutorial/install.html", + force=False, + ) + r = self.client.get( + "/en/latest/install.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 200) + + def test_prefix_redirect(self): + """ + Test prefix redirect. + + Prefix redirects don't match a version, + so they will return 404, and the redirect will + be handled there. + """ + fixture.get( + Redirect, + project=self.project, + redirect_type="prefix", + from_url="/woot/", + force=True, + ) + r = self.client.get( + "/woot/install.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 404) + + def test_infinite_redirect(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="page", + from_url="/install.html", + to_url="/install.html", + force=True, + ) + r = self.client.get( + "/en/latest/install.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 200) + + def test_redirect_page(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="page", + from_url="/install.html", + to_url="/tutorial/install.html", + force=True, + ) + r = self.client.get( + "/en/latest/install.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 302) + self.assertEqual( + r["Location"], + "http://project.dev.readthedocs.io/en/latest/tutorial/install.html", + ) + + def test_redirect_with_query_params(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="page", + from_url="/install.html", + to_url="/tutorial/install.html", + force=True, + ) + r = self.client.get( + "/en/latest/install.html?foo=bar", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 302) + self.assertEqual( + r["Location"], + "http://project.dev.readthedocs.io/en/latest/tutorial/install.html?foo=bar", + ) + + def test_redirect_exact(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="exact", + from_url="/en/latest/install.html", + to_url="/en/latest/tutorial/install.html", + force=True, + ) + r = self.client.get( + "/en/latest/install.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 302) + self.assertEqual( + r["Location"], + "http://project.dev.readthedocs.io/en/latest/tutorial/install.html", + ) + + def test_redirect_exact_with_rest(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="exact", + from_url="/en/latest/$rest", + to_url="/en/version/", + force=True, + ) + self.assertEqual(self.project.redirects.count(), 1) + r = self.client.get( + "/en/latest/guides/install.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 302) + self.assertEqual( + r["Location"], + "http://project.dev.readthedocs.io/en/version/guides/install.html", + ) + + fixture.get( + Redirect, + project=self.translation, + redirect_type="exact", + from_url="/es/latest/$rest", + to_url="/en/master/", + force=True, + ) + r = self.client.get( + "/es/latest/guides/install.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 302) + self.assertEqual( + r["Location"], + "http://project.dev.readthedocs.io/en/master/guides/install.html", + ) + + def test_redirect_keeps_language(self): + self.project.language = "de" + self.project.save() + fixture.get( + Redirect, + project=self.project, + redirect_type="page", + from_url="/how_to_install.html", + to_url="/install.html", + force=True, + ) + r = self.client.get( + "/de/latest/how_to_install.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 302) + self.assertEqual( + r["Location"], + "http://project.dev.readthedocs.io/de/latest/install.html", + ) + + def test_redirect_recognizes_custom_cname(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="page", + from_url="/install.html", + to_url="/tutorial/install.html", + force=True, + ) + r = self.client.get( + "/en/latest/install.html", + HTTP_HOST="docs.project.io", + HTTP_X_RTD_SLUG="project", + ) + self.assertEqual(r.status_code, 302) + self.assertEqual( + r["Location"], + "http://docs.project.io/en/latest/tutorial/install.html", + ) + + def test_redirect_html(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="sphinx_html", + force=True, + ) + r = self.client.get( + "/en/latest/faq/", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 302) + self.assertEqual( + r["Location"], + "http://project.dev.readthedocs.io/en/latest/faq.html", + ) + + def test_redirect_html_index(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="sphinx_html", + force=True, + ) + r = self.client.get( + "/en/latest/faq/index.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 302) + self.assertEqual( + r["Location"], + "http://project.dev.readthedocs.io/en/latest/faq.html", + ) + + def test_redirect_htmldir(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="sphinx_htmldir", + force=True, + ) + r = self.client.get( + "/en/latest/faq.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 302) + self.assertEqual( + r["Location"], + "http://project.dev.readthedocs.io/en/latest/faq/", + ) + + def test_redirect_with_301_status(self): + fixture.get( + Redirect, + project=self.project, + redirect_type="exact", + from_url="/en/latest/install.html", + to_url="/en/latest/tutorial/install.html", + http_status=301, + force=True, + ) + r = self.client.get( + "/en/latest/install.html", + HTTP_HOST="project.dev.readthedocs.io", + ) + self.assertEqual(r.status_code, 301) + self.assertEqual( + r["Location"], + "http://project.dev.readthedocs.io/en/latest/tutorial/install.html", + ) + + @override_settings( PYTHON_MEDIA=True, PUBLIC_DOMAIN="dev.readthedocs.io", diff --git a/readthedocs/proxito/views/mixins.py b/readthedocs/proxito/views/mixins.py index e60ee488dcc..54548f68efc 100644 --- a/readthedocs/proxito/views/mixins.py +++ b/readthedocs/proxito/views/mixins.py @@ -258,7 +258,9 @@ def canonical_redirect(self, request, final_project, version_slug, filename): resp['X-RTD-Redirect'] = getattr(request, 'canonicalize', 'unknown') return resp - def get_redirect(self, project, lang_slug, version_slug, filename, full_path): + def get_redirect( + self, project, lang_slug, version_slug, filename, full_path, forced_only=False + ): """ Check for a redirect for this project that matches ``full_path``. @@ -270,6 +272,7 @@ def get_redirect(self, project, lang_slug, version_slug, filename, full_path): version_slug=version_slug, path=filename, full_path=full_path, + forced_only=forced_only, ) return redirect_path, http_status diff --git a/readthedocs/proxito/views/serve.py b/readthedocs/proxito/views/serve.py index 5bb3aadb2b8..527edc97782 100644 --- a/readthedocs/proxito/views/serve.py +++ b/readthedocs/proxito/views/serve.py @@ -176,16 +176,25 @@ def get(self, ) raise Http404('Invalid URL for project with versions') - # TODO: un-comment when ready to perform redirect here - # redirect_path, http_status = self.get_redirect( - # final_project, - # lang_slug, - # version_slug, - # filename, - # request.path, - # ) - # if redirect_path and http_status: - # return self.get_redirect_response(request, redirect_path, http_status) + redirect_path, http_status = self.get_redirect( + project=final_project, + lang_slug=lang_slug, + version_slug=version_slug, + filename=filename, + full_path=request.path, + forced_only=True, + ) + if redirect_path and http_status: + try: + return self.get_redirect_response( + request=request, + redirect_path=redirect_path, + proxito_path=request.path, + http_status=http_status, + ) + except InfiniteRedirectException: + # Continue with our normal serve. + pass # Check user permissions and return an unauthed response if needed if not self.allowed_user(request, final_project, version_slug): @@ -304,26 +313,6 @@ def get(self, request, proxito_path, template_name='404.html'): # (from URL == to URL) return HttpResponseRedirect(redirect_url) - # ``redirect_filename`` is the path without ``//`` and - # without query, starting with a ``/``. This matches our old logic: - # https://github.com/readthedocs/readthedocs.org/blob/4b09c7a0ab45cd894c3373f7f07bad7161e4b223/readthedocs/redirects/utils.py#L60 # noqa - # - # We parse ``filename`` to: - # - Remove the query params (probably it doesn't contain any query params at this point) - # - Remove any invalid URL chars (\r, \n, \t). - # - # We don't use ``.path`` to avoid parsing the filename as a full url. - # For example if the filename is ``http://example.com/my-path``, - # ``.path`` would return ``my-path``. - parsed = urlparse(filename) - redirect_filename = parsed._replace(query="").geturl() - - # We can't check for lang and version here to decide if we need to add - # the ``/`` or not because ``/install.html`` is a valid path to use as - # redirect and does not include lang and version on it. It should be - # fine always adding the ``/`` to the beginning. - redirect_filename = '/' + redirect_filename.lstrip('/') - # Check and perform redirects on 404 handler # NOTE: this redirect check must be done after trying files like # ``index.html`` and ``README.html`` to emulate the behavior we had when @@ -332,7 +321,7 @@ def get(self, request, proxito_path, template_name='404.html'): project=final_project, lang_slug=lang_slug, version_slug=version_slug, - filename=redirect_filename, + filename=filename, full_path=proxito_path, ) if redirect_path and http_status: @@ -389,7 +378,7 @@ def get(self, request, proxito_path, template_name='404.html'): self._register_broken_link( project=final_project, version=version, - path=redirect_filename, + path=filename, full_path=proxito_path, ) return resp @@ -397,7 +386,7 @@ def get(self, request, proxito_path, template_name='404.html'): self._register_broken_link( project=final_project, version=version, - path=redirect_filename, + path=filename, full_path=proxito_path, ) raise Http404('No custom 404 page found.') diff --git a/readthedocs/redirects/migrations/0005_allow_to_force_redirects.py b/readthedocs/redirects/migrations/0005_allow_to_force_redirects.py new file mode 100644 index 00000000000..7ddb50c0ba1 --- /dev/null +++ b/readthedocs/redirects/migrations/0005_allow_to_force_redirects.py @@ -0,0 +1,23 @@ +# Generated by Django 3.2.13 on 2022-05-18 19:41 + +from django.db import migrations, models + + +class Migration(migrations.Migration): + + dependencies = [ + ("redirects", "0004_denormalize-from-url"), + ] + + operations = [ + migrations.AddField( + model_name="redirect", + name="force", + field=models.BooleanField( + default=False, + help_text="Apply the redirect even if the page exists.", + null=True, + verbose_name="Force redirect", + ), + ), + ] diff --git a/readthedocs/redirects/models.py b/readthedocs/redirects/models.py index cc23a98e60c..8bd9193d0e6 100644 --- a/readthedocs/redirects/models.py +++ b/readthedocs/redirects/models.py @@ -91,6 +91,12 @@ class Redirect(models.Model): help_text=to_url_helptext, blank=True, ) + force = models.BooleanField( + _("Force redirect"), + null=True, + default=False, + help_text=_("Apply the redirect even if the page exists."), + ) http_status = models.SmallIntegerField( _('HTTP Status'), diff --git a/readthedocs/redirects/querysets.py b/readthedocs/redirects/querysets.py index 968644ec7dc..9ca00ab7e77 100644 --- a/readthedocs/redirects/querysets.py +++ b/readthedocs/redirects/querysets.py @@ -1,6 +1,7 @@ """Queryset for the redirects app.""" -import structlog +from urllib.parse import urlparse +import structlog from django.db import models from django.db.models import CharField, F, Q, Value @@ -35,16 +36,52 @@ def api(self, user=None): queryset = self._add_from_user_projects(queryset, user) return queryset - def get_redirect_path_with_status(self, path, full_path=None, language=None, version_slug=None): + def get_redirect_path_with_status( + self, path, full_path=None, language=None, version_slug=None, forced_only=False + ): + """ + Get the final redirect with its status code. + + :param path: Is the path without the language and version parts. + :param full_path: Is the full path including the language and version parts. + :param forced_only: Include only forced redirects in the results. + """ + + # Small optimization to skip executing the big query below. + if forced_only and not self.filter(force=True).exists(): + return None, None + + def normalize_path(path): + """ + Normalize path. + + We normalize ``path`` to: + + - Remove the query params. + - Remove any invalid URL chars (\r, \n, \t). + - Always start the path with ``/``. + + We don't use ``.path`` to avoid parsing the filename as a full url. + For example if the path is ``http://example.com/my-path``, + ``.path`` would return ``my-path``. + """ + parsed_path = urlparse(path) + normalized_path = parsed_path._replace(query="").geturl() + normalized_path = "/" + normalized_path.lstrip("/") + return normalized_path + + normalized_path = normalize_path(path) + normalized_full_path = normalize_path(full_path) + # add extra fields with the ``path`` and ``full_path`` to perform a - # filter at db level instead with Python + # filter at db level instead with Python. queryset = self.annotate( path=Value( - path, + normalized_path, output_field=CharField(), ), full_path=Value( - full_path, + normalized_full_path, output_field=CharField(), ), ) @@ -81,13 +118,15 @@ def get_redirect_path_with_status(self, path, full_path=None, language=None, ver ) queryset = queryset.filter(prefix | page | exact | sphinx_html | sphinx_htmldir) + if forced_only: + queryset = queryset.filter(force=True) # There should be one and only one redirect returned by this query. I # can't think in a case where there can be more at this point. I'm # leaving the loop just in case for now for redirect in queryset.select_related('project'): new_path = redirect.get_redirect_path( - path=path, + path=normalized_path, language=language, version_slug=version_slug, )