Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Templatize robots.txt #478

Merged
merged 2 commits into from
Jan 29, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
18 changes: 18 additions & 0 deletions core/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,7 @@
from typing import ClassVar

import markdown_it
from django.conf import settings
from django.http import HttpResponse
from django.shortcuts import redirect
from django.templatetags.static import static
Expand Down Expand Up @@ -69,6 +70,23 @@ def get_static_content(self) -> str | bytes:
raise NotImplementedError()


@method_decorator(cache_page(60 * 60), name="dispatch")
class RobotsTxt(TemplateView):
"""
Serves the robots.txt for Takahē

To specify additional user-agents to disallow, use TAKAHE_ROBOTS_TXT_DISALLOWED_USER_AGENTS
"""

template_name = "robots.txt"
content_type = "text/plain"

def get_context_data(self):
return {
"user_agents": getattr(settings, "ROBOTS_TXT_DISALLOWED_USER_AGENTS", []),
}


@method_decorator(cache_control(max_age=60 * 15), name="dispatch")
class AppManifest(StaticContentView):
"""
Expand Down
5 changes: 0 additions & 5 deletions docker/nginx.conf.d/default.conf.tpl
Original file line number Diff line number Diff line change
Expand Up @@ -29,11 +29,6 @@ server {
proxy_hide_header X-Takahe-User;
proxy_hide_header X-Takahe-Identity;

# Serve robots.txt from the non-collected dir as a special case.
location /robots.txt {
alias /takahe/static/robots.txt;
}

# Serves static files from the collected dir
location /static/ {
# Files in static have cache-busting hashes in the name, thus can be cached forever
Expand Down
6 changes: 6 additions & 0 deletions takahe/settings.py
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,10 @@ class Settings(BaseSettings):
AUTO_ADMIN_EMAIL: EmailStr | None = None
ERROR_EMAILS: list[EmailStr] | None = None

#: If set, a list of user agents to completely disallow in robots.txt
#: List formatting must be a valid JSON list, such as `["Agent1", "Agent2"]`
ROBOTS_TXT_DISALLOWED_USER_AGENTS: list[str] = Field(default_factory=list)

MEDIA_URL: str = "/media/"
MEDIA_ROOT: str = str(BASE_DIR / "media")
MEDIA_BACKEND: MediaBackendUrl | None = None
Expand Down Expand Up @@ -313,6 +317,8 @@ class Config:
STATOR_CONCURRENCY = SETUP.STATOR_CONCURRENCY
STATOR_CONCURRENCY_PER_MODEL = SETUP.STATOR_CONCURRENCY_PER_MODEL

ROBOTS_TXT_DISALLOWED_USER_AGENTS = SETUP.ROBOTS_TXT_DISALLOWED_USER_AGENTS

CORS_ORIGIN_ALLOW_ALL = True # Temporary
CORS_ORIGIN_WHITELIST = SETUP.CORS_HOSTS
CORS_ALLOW_CREDENTIALS = True
Expand Down
1 change: 1 addition & 0 deletions takahe/urls.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

urlpatterns = [
path("", core.homepage),
path("robots.txt", core.RobotsTxt.as_view()),
path("manifest.json", core.AppManifest.as_view()),
# Activity views
path("notifications/", timelines.Notifications.as_view(), name="notifications"),
Expand Down
13 changes: 13 additions & 0 deletions templates/robots.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
User-Agent: *

# Don't allow any bot to crawl tags.
Disallow: /tags/
Disallow: /tags/*

# Don't allow bots to crawl through the proxy
Disallow: /proxy/*

{% for user_agent in user_agents %}
User-agent: {{user_agent}}
Disallow: /
{% endfor %}