Skip to content

Commit

Permalink
Add option to allow all URLs to be crawlable via robots.txt
Browse files Browse the repository at this point in the history
  • Loading branch information
acelaya committed Apr 22, 2024
1 parent a89b53a commit 163244f
Show file tree
Hide file tree
Showing 8 changed files with 72 additions and 14 deletions.
17 changes: 17 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file.

The format is based on [Keep a Changelog](https://keepachangelog.com), and this project adheres to [Semantic Versioning](https://semver.org).

## [Unreleased]
### Added
* [#2018](https://github.com/shlinkio/shlink/issues/2018) Add option to allow all short URLs to be unconditionally crawlable in robots.txt, via `ROBOTS_ALLOW_ALL_SHORT_URLS=true` env var, or config options.

### Changed
* *Nothing*

### Deprecated
* *Nothing*

### Removed
* *Nothing*

### Fixed
* *Nothing*


## [4.1.0] - 2024-04-14
### Added
* [#1330](https://github.com/shlinkio/shlink/issues/1330) All visit-related endpoints now expose the `visitedUrl` prop for any visit.
Expand Down
2 changes: 1 addition & 1 deletion composer.json
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@
"shlinkio/shlink-config": "^3.0",
"shlinkio/shlink-event-dispatcher": "^4.1",
"shlinkio/shlink-importer": "^5.3.2",
"shlinkio/shlink-installer": "^9.1",
"shlinkio/shlink-installer": "dev-develop#11e66d8 as 9.2",
"shlinkio/shlink-ip-geolocation": "^4.0",
"shlinkio/shlink-json": "^1.1",
"spiral/roadrunner": "^2023.3",
Expand Down
1 change: 1 addition & 0 deletions config/autoload/installer.global.php
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@
Option\UrlShortener\EnableMultiSegmentSlugsConfigOption::class,
Option\UrlShortener\EnableTrailingSlashConfigOption::class,
Option\UrlShortener\ShortUrlModeConfigOption::class,
Option\UrlShortener\RobotsAllowAllShortUrlsConfigOption::class,
Option\Tracking\IpAnonymizationConfigOption::class,
Option\Tracking\OrphanVisitsTrackingConfigOption::class,
Option\Tracking\DisableTrackParamConfigOption::class,
Expand Down
13 changes: 13 additions & 0 deletions config/autoload/robots.global.php
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
<?php

declare(strict_types=1);

namespace Shlinkio\Shlink\Core;

return [

'robots' => [
'allow-all-short-urls' => (bool) Config\EnvVars::ROBOTS_ALLOW_ALL_SHORT_URLS->loadFromEnv(false),
],

];
2 changes: 1 addition & 1 deletion module/Core/config/dependencies.config.php
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,7 @@
'Logger_Shlink',
Options\QrCodeOptions::class,
],
Action\RobotsAction::class => [Crawling\CrawlingHelper::class],
Action\RobotsAction::class => [Crawling\CrawlingHelper::class, 'config.robots.allow-all-short-urls'],

ShortUrl\Resolver\PersistenceShortUrlRelationResolver::class => [
'em',
Expand Down
10 changes: 8 additions & 2 deletions module/Core/src/Action/RobotsAction.php
Original file line number Diff line number Diff line change
Expand Up @@ -15,9 +15,9 @@

use const PHP_EOL;

class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
readonly class RobotsAction implements RequestHandlerInterface, StatusCodeInterface
{
public function __construct(private readonly CrawlingHelperInterface $crawlingHelper)
public function __construct(private CrawlingHelperInterface $crawlingHelper, private bool $allowAllShortUrls)
{
}

Expand All @@ -37,6 +37,12 @@ private function buildRobots(): iterable
ROBOTS;

if ($this->allowAllShortUrls) {
// Disallow rest URLs, but allow all short codes
yield 'Disallow: /rest/';
return;
}

$shortCodes = $this->crawlingHelper->listCrawlableShortCodes();
foreach ($shortCodes as $shortCode) {
yield sprintf('Allow: /%s%s', $shortCode, PHP_EOL);
Expand Down
3 changes: 2 additions & 1 deletion module/Core/src/Config/EnvVars.php
Original file line number Diff line number Diff line change
Expand Up @@ -69,8 +69,9 @@ enum EnvVars: string
case DEFAULT_DOMAIN = 'DEFAULT_DOMAIN';
case AUTO_RESOLVE_TITLES = 'AUTO_RESOLVE_TITLES';
case REDIRECT_APPEND_EXTRA_PATH = 'REDIRECT_APPEND_EXTRA_PATH';
case TIMEZONE = 'TIMEZONE';
case MULTI_SEGMENT_SLUGS_ENABLED = 'MULTI_SEGMENT_SLUGS_ENABLED';
case ROBOTS_ALLOW_ALL_SHORT_URLS = 'ROBOTS_ALLOW_ALL_SHORT_URLS';
case TIMEZONE = 'TIMEZONE';
case MEMORY_LIMIT = 'MEMORY_LIMIT';

public function loadFromEnv(mixed $default = null): mixed
Expand Down
38 changes: 29 additions & 9 deletions module/Core/test/Action/RobotsActionTest.php
Original file line number Diff line number Diff line change
Expand Up @@ -14,24 +14,25 @@

class RobotsActionTest extends TestCase
{
private RobotsAction $action;
private MockObject & CrawlingHelperInterface $helper;

protected function setUp(): void
{
$this->helper = $this->createMock(CrawlingHelperInterface::class);
$this->action = new RobotsAction($this->helper);
}

#[Test, DataProvider('provideShortCodes')]
public function buildsRobotsLinesFromCrawlableShortCodes(array $shortCodes, string $expected): void
{
public function buildsRobotsLinesFromCrawlableShortCodes(
array $shortCodes,
bool $allowAllShortUrls,
string $expected,
): void {
$this->helper
->expects($this->once())
->expects($allowAllShortUrls ? $this->never() : $this->once())
->method('listCrawlableShortCodes')
->willReturn($shortCodes);

$response = $this->action->handle(ServerRequestFactory::fromGlobals());
$response = $this->action($allowAllShortUrls)->handle(ServerRequestFactory::fromGlobals());

self::assertEquals(200, $response->getStatusCode());
self::assertEquals($expected, $response->getBody()->__toString());
Expand All @@ -40,7 +41,7 @@ public function buildsRobotsLinesFromCrawlableShortCodes(array $shortCodes, stri

public static function provideShortCodes(): iterable
{
yield 'three short codes' => [['foo', 'bar', 'baz'], <<<ROBOTS
yield 'three short codes' => [['foo', 'bar', 'baz'], false, <<<ROBOTS
# For more information about the robots.txt standard, see:
# https://www.robotstxt.org/orig.html

Expand All @@ -50,7 +51,7 @@ public static function provideShortCodes(): iterable
Allow: /baz
Disallow: /
ROBOTS];
yield 'five short codes' => [['foo', 'bar', 'some', 'thing', 'baz'], <<<ROBOTS
yield 'five short codes' => [['foo', 'bar', 'some', 'thing', 'baz'], false, <<<ROBOTS
# For more information about the robots.txt standard, see:
# https://www.robotstxt.org/orig.html

Expand All @@ -62,12 +63,31 @@ public static function provideShortCodes(): iterable
Allow: /baz
Disallow: /
ROBOTS];
yield 'no short codes' => [[], <<<ROBOTS
yield 'no short codes' => [[], false, <<<ROBOTS
# For more information about the robots.txt standard, see:
# https://www.robotstxt.org/orig.html

User-agent: *
Disallow: /
ROBOTS];
yield 'three short codes and allow all short urls' => [['foo', 'bar', 'some'], true, <<<ROBOTS
# For more information about the robots.txt standard, see:
# https://www.robotstxt.org/orig.html

User-agent: *
Disallow: /rest/
ROBOTS];
yield 'no short codes and allow all short urls' => [[], true, <<<ROBOTS
# For more information about the robots.txt standard, see:
# https://www.robotstxt.org/orig.html

User-agent: *
Disallow: /rest/
ROBOTS];
}

private function action(bool $allowAllShortUrls = false): RobotsAction
{
return new RobotsAction($this->helper, allowAllShortUrls: $allowAllShortUrls);
}
}

0 comments on commit 163244f

Please sign in to comment.