From 03516dc3a7b0a4ade527b93c8dd7dc5649be8b80 Mon Sep 17 00:00:00 2001
From: Joshua Chen
Date: Thu, 14 Apr 2022 17:31:09 +0800
Subject: [PATCH] fix(sitemap): exclude pages with robots noindex from sitemap
(#7143)
---
packages/docusaurus-plugin-debug/src/index.ts | 16 ++++-----
.../src/plugin-debug.d.ts | 4 ---
.../src/theme/DebugLayout/index.tsx | 1 +
.../src/__tests__/createSitemap.test.ts | 35 +++++++++++++++++-
.../src/createSitemap.ts | 36 +++++++++++++------
.../docusaurus-plugin-sitemap/src/index.ts | 3 +-
.../docusaurus-preset-classic/src/index.ts | 12 ++-----
packages/docusaurus-types/package.json | 1 +
packages/docusaurus-types/src/index.d.ts | 8 ++++-
.../docusaurus/src/client/serverEntry.tsx | 2 ++
packages/docusaurus/src/commands/build.ts | 11 +++++-
packages/docusaurus/src/deps.d.ts | 6 ++++
packages/docusaurus/src/webpack/server.ts | 11 +++---
13 files changed, 105 insertions(+), 41 deletions(-)
diff --git a/packages/docusaurus-plugin-debug/src/index.ts b/packages/docusaurus-plugin-debug/src/index.ts
index 26fcc297a9d3..01a9e0a4e03f 100644
--- a/packages/docusaurus-plugin-debug/src/index.ts
+++ b/packages/docusaurus-plugin-debug/src/index.ts
@@ -9,8 +9,6 @@ import type {LoadContext, Plugin} from '@docusaurus/types';
import {docuHash, normalizeUrl, posixPath} from '@docusaurus/utils';
import path from 'path';
-export const routeBasePath = '__docusaurus/debug';
-
export default function pluginDebug({
siteConfig: {baseUrl},
generatedFilesDir,
@@ -42,37 +40,37 @@ export default function pluginDebug({
// Home is config (duplicate for now)
addRoute({
- path: normalizeUrl([baseUrl, routeBasePath]),
+ path: normalizeUrl([baseUrl, '__docusaurus/debug']),
component: '@theme/DebugConfig',
exact: true,
});
addRoute({
- path: normalizeUrl([baseUrl, routeBasePath, 'config']),
+ path: normalizeUrl([baseUrl, '__docusaurus/debug/config']),
component: '@theme/DebugConfig',
exact: true,
});
addRoute({
- path: normalizeUrl([baseUrl, routeBasePath, 'metadata']),
+ path: normalizeUrl([baseUrl, '__docusaurus/debug/metadata']),
component: '@theme/DebugSiteMetadata',
exact: true,
});
addRoute({
- path: normalizeUrl([baseUrl, routeBasePath, 'registry']),
+ path: normalizeUrl([baseUrl, '__docusaurus/debug/registry']),
component: '@theme/DebugRegistry',
exact: true,
});
addRoute({
- path: normalizeUrl([baseUrl, routeBasePath, 'routes']),
+ path: normalizeUrl([baseUrl, '__docusaurus/debug/routes']),
component: '@theme/DebugRoutes',
exact: true,
});
addRoute({
- path: normalizeUrl([baseUrl, routeBasePath, 'content']),
+ path: normalizeUrl([baseUrl, '__docusaurus/debug/content']),
component: '@theme/DebugContent',
exact: true,
modules: {
@@ -81,7 +79,7 @@ export default function pluginDebug({
});
addRoute({
- path: normalizeUrl([baseUrl, routeBasePath, 'globalData']),
+ path: normalizeUrl([baseUrl, '__docusaurus/debug/globalData']),
component: '@theme/DebugGlobalData',
exact: true,
});
diff --git a/packages/docusaurus-plugin-debug/src/plugin-debug.d.ts b/packages/docusaurus-plugin-debug/src/plugin-debug.d.ts
index 6bde34dab9bc..93b666dd4f31 100644
--- a/packages/docusaurus-plugin-debug/src/plugin-debug.d.ts
+++ b/packages/docusaurus-plugin-debug/src/plugin-debug.d.ts
@@ -7,10 +7,6 @@
///
-declare module '@docusaurus/plugin-debug' {
- export const routeBasePath: string;
-}
-
declare module '@theme/DebugConfig' {
export default function DebugMetadata(): JSX.Element;
}
diff --git a/packages/docusaurus-plugin-debug/src/theme/DebugLayout/index.tsx b/packages/docusaurus-plugin-debug/src/theme/DebugLayout/index.tsx
index e8e57fde16de..6285f7effb95 100644
--- a/packages/docusaurus-plugin-debug/src/theme/DebugLayout/index.tsx
+++ b/packages/docusaurus-plugin-debug/src/theme/DebugLayout/index.tsx
@@ -35,6 +35,7 @@ export default function DebugLayout({
Docusaurus debug panel
+
diff --git a/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts b/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts
index 193002e4bef1..c9008bd31004 100644
--- a/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts
+++ b/packages/docusaurus-plugin-sitemap/src/__tests__/createSitemap.test.ts
@@ -5,6 +5,7 @@
* LICENSE file in the root directory of this source tree.
*/
+import React from 'react';
import createSitemap from '../createSitemap';
import type {DocusaurusConfig} from '@docusaurus/types';
import {EnumChangefreq} from 'sitemap';
@@ -16,6 +17,7 @@ describe('createSitemap', () => {
url: 'https://example.com',
} as DocusaurusConfig,
['/', '/test'],
+ {},
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
@@ -29,7 +31,7 @@ describe('createSitemap', () => {
it('empty site', () =>
expect(async () => {
- await createSitemap({} as DocusaurusConfig, [], {});
+ await createSitemap({} as DocusaurusConfig, [], {}, {});
}).rejects.toThrow(
'URL in docusaurus.config.js cannot be empty/undefined.',
));
@@ -40,6 +42,7 @@ describe('createSitemap', () => {
url: 'https://example.com',
} as DocusaurusConfig,
['/', '/404.html', '/my-page'],
+ {},
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
@@ -55,6 +58,7 @@ describe('createSitemap', () => {
url: 'https://example.com',
} as DocusaurusConfig,
['/', '/search/', '/tags/', '/search/foo', '/tags/foo/bar'],
+ {},
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
@@ -78,6 +82,7 @@ describe('createSitemap', () => {
trailingSlash: undefined,
} as DocusaurusConfig,
['/', '/test', '/nested/test', '/nested/test2/'],
+ {},
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
@@ -98,6 +103,7 @@ describe('createSitemap', () => {
trailingSlash: true,
} as DocusaurusConfig,
['/', '/test', '/nested/test', '/nested/test2/'],
+ {},
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
@@ -118,6 +124,7 @@ describe('createSitemap', () => {
trailingSlash: false,
} as DocusaurusConfig,
['/', '/test', '/nested/test', '/nested/test2/'],
+ {},
{
changefreq: EnumChangefreq.DAILY,
priority: 0.7,
@@ -130,4 +137,30 @@ describe('createSitemap', () => {
expect(sitemap).toContain('https://example.com/nested/test');
expect(sitemap).toContain('https://example.com/nested/test2');
});
+
+ it('filters pages with noindex', async () => {
+ const sitemap = await createSitemap(
+ {
+ url: 'https://example.com',
+ trailingSlash: false,
+ } as DocusaurusConfig,
+ ['/', '/noindex', '/nested/test', '/nested/test2/'],
+ {
+ '/noindex': {
+ meta: {
+ toComponent: () => [
+ React.createElement('meta', {name: 'robots', content: 'noindex'}),
+ ],
+ },
+ },
+ },
+ {
+ changefreq: EnumChangefreq.DAILY,
+ priority: 0.7,
+ ignorePatterns: [],
+ },
+ );
+
+ expect(sitemap).not.toContain('/noindex');
+ });
});
diff --git a/packages/docusaurus-plugin-sitemap/src/createSitemap.ts b/packages/docusaurus-plugin-sitemap/src/createSitemap.ts
index 8df3581badd1..fa4aefb75e52 100644
--- a/packages/docusaurus-plugin-sitemap/src/createSitemap.ts
+++ b/packages/docusaurus-plugin-sitemap/src/createSitemap.ts
@@ -10,10 +10,13 @@ import type {PluginOptions} from '@docusaurus/plugin-sitemap';
import type {DocusaurusConfig} from '@docusaurus/types';
import {applyTrailingSlash} from '@docusaurus/utils-common';
import {createMatcher} from '@docusaurus/utils';
+import type {HelmetServerState} from 'react-helmet-async';
+import type {ReactElement} from 'react';
export default async function createSitemap(
siteConfig: DocusaurusConfig,
routesPaths: string[],
+ head: {[location: string]: HelmetServerState},
options: PluginOptions,
): Promise {
const {url: hostname} = siteConfig;
@@ -26,18 +29,29 @@ export default async function createSitemap(
const sitemapStream = new SitemapStream({hostname});
- routesPaths
- .filter((route) => !route.endsWith('404.html') && !ignoreMatcher(route))
- .forEach((routePath) =>
- sitemapStream.write({
- url: applyTrailingSlash(routePath, {
- trailingSlash: siteConfig.trailingSlash,
- baseUrl: siteConfig.baseUrl,
- }),
- changefreq,
- priority,
- }),
+ function routeShouldBeIncluded(route: string) {
+ if (route.endsWith('404.html') || ignoreMatcher(route)) {
+ return false;
+ }
+ // https://github.com/staylor/react-helmet-async/pull/167
+ const meta = head[route]?.meta.toComponent() as unknown as
+ | ReactElement[]
+ | undefined;
+ return !meta?.some(
+ (tag) => tag.props.name === 'robots' && tag.props.content === 'noindex',
);
+ }
+
+ routesPaths.filter(routeShouldBeIncluded).forEach((routePath) =>
+ sitemapStream.write({
+ url: applyTrailingSlash(routePath, {
+ trailingSlash: siteConfig.trailingSlash,
+ baseUrl: siteConfig.baseUrl,
+ }),
+ changefreq,
+ priority,
+ }),
+ );
sitemapStream.end();
diff --git a/packages/docusaurus-plugin-sitemap/src/index.ts b/packages/docusaurus-plugin-sitemap/src/index.ts
index 2b77da6e0b24..5f66eeaa3e5a 100644
--- a/packages/docusaurus-plugin-sitemap/src/index.ts
+++ b/packages/docusaurus-plugin-sitemap/src/index.ts
@@ -18,7 +18,7 @@ export default function pluginSitemap(
return {
name: 'docusaurus-plugin-sitemap',
- async postBuild({siteConfig, routesPaths, outDir}) {
+ async postBuild({siteConfig, routesPaths, outDir, head}) {
if (siteConfig.noIndex) {
return;
}
@@ -26,6 +26,7 @@ export default function pluginSitemap(
const generatedSitemap = await createSitemap(
siteConfig,
routesPaths,
+ head,
options,
);
diff --git a/packages/docusaurus-preset-classic/src/index.ts b/packages/docusaurus-preset-classic/src/index.ts
index 79a60b65c9c7..e3059a520665 100644
--- a/packages/docusaurus-preset-classic/src/index.ts
+++ b/packages/docusaurus-preset-classic/src/index.ts
@@ -5,7 +5,6 @@
* LICENSE file in the root directory of this source tree.
*/
-import {routeBasePath as debugPluginRouteBasePath} from '@docusaurus/plugin-debug';
import type {
Preset,
LoadContext,
@@ -29,7 +28,7 @@ export default function preset(
opts: Options = {},
): Preset {
const {siteConfig} = context;
- const {themeConfig, baseUrl} = siteConfig;
+ const {themeConfig} = siteConfig;
const {algolia} = themeConfig as Partial;
const isProd = process.env.NODE_ENV === 'production';
const {
@@ -37,13 +36,12 @@ export default function preset(
docs,
blog,
pages,
- sitemap = {},
+ sitemap,
theme,
googleAnalytics,
gtag,
...rest
} = opts;
- const isDebugEnabled = debug || (debug === undefined && !isProd);
const themes: PluginConfig[] = [];
themes.push(makePluginConfig('@docusaurus/theme-classic', theme));
@@ -76,17 +74,13 @@ export default function preset(
makePluginConfig('@docusaurus/plugin-google-analytics', googleAnalytics),
);
}
- if (isDebugEnabled) {
+ if (debug || (debug === undefined && !isProd)) {
plugins.push(require.resolve('@docusaurus/plugin-debug'));
}
if (gtag) {
plugins.push(makePluginConfig('@docusaurus/plugin-google-gtag', gtag));
}
if (isProd && sitemap !== false) {
- if (isDebugEnabled) {
- sitemap.ignorePatterns ??= [];
- sitemap.ignorePatterns.push(`${baseUrl}${debugPluginRouteBasePath}/**`);
- }
plugins.push(makePluginConfig('@docusaurus/plugin-sitemap', sitemap));
}
if (Object.keys(rest).length > 0) {
diff --git a/packages/docusaurus-types/package.json b/packages/docusaurus-types/package.json
index 932e14154524..1fc67b649de6 100644
--- a/packages/docusaurus-types/package.json
+++ b/packages/docusaurus-types/package.json
@@ -19,6 +19,7 @@
"commander": "^5.1.0",
"history": "^4.9.0",
"joi": "^17.6.0",
+ "react-helmet-async": "^1.2.3",
"utility-types": "^3.10.0",
"webpack": "^5.72.0",
"webpack-merge": "^5.8.0"
diff --git a/packages/docusaurus-types/src/index.d.ts b/packages/docusaurus-types/src/index.d.ts
index 348a3ded439e..189c5ef3ebeb 100644
--- a/packages/docusaurus-types/src/index.d.ts
+++ b/packages/docusaurus-types/src/index.d.ts
@@ -10,6 +10,7 @@ import type {CustomizeRuleString} from 'webpack-merge/dist/types';
import type {CommanderStatic} from 'commander';
import type {ParsedUrlQueryInput} from 'querystring';
import type Joi from 'joi';
+import type {HelmetServerState} from 'react-helmet-async';
import type {
DeepRequired,
Required as RequireKeys,
@@ -319,7 +320,12 @@ export type Plugin = {
actions: PluginContentLoadedActions;
}) => Promise | void;
routesLoaded?: (routes: RouteConfig[]) => void; // TODO remove soon, deprecated (alpha-60)
- postBuild?: (props: Props & {content: Content}) => Promise | void;
+ postBuild?: (
+ props: Props & {
+ content: Content;
+ head: {[location: string]: HelmetServerState};
+ },
+ ) => Promise | void;
// TODO refactor the configureWebpack API surface: use an object instead of
// multiple params (requires breaking change)
configureWebpack?: (
diff --git a/packages/docusaurus/src/client/serverEntry.tsx b/packages/docusaurus/src/client/serverEntry.tsx
index 78694932d089..c2c57e9348aa 100644
--- a/packages/docusaurus/src/client/serverEntry.tsx
+++ b/packages/docusaurus/src/client/serverEntry.tsx
@@ -70,6 +70,7 @@ async function doRender(locals: Locals & {path: string}) {
preBodyTags,
postBodyTags,
onLinksCollected,
+ onHeadTagsCollected,
baseUrl,
ssrTemplate,
noIndex,
@@ -105,6 +106,7 @@ async function doRender(locals: Locals & {path: string}) {
helmet.link.toString(),
helmet.script.toString(),
];
+ onHeadTagsCollected(location, helmet);
const metaAttributes = metaStrings.filter(Boolean);
const {generatedFilesDir} = locals;
diff --git a/packages/docusaurus/src/commands/build.ts b/packages/docusaurus/src/commands/build.ts
index 252c797d6a09..98f7d2bf85fc 100644
--- a/packages/docusaurus/src/commands/build.ts
+++ b/packages/docusaurus/src/commands/build.ts
@@ -27,6 +27,7 @@ import {
import CleanWebpackPlugin from '../webpack/plugins/CleanWebpackPlugin';
import {loadI18n} from '../server/i18n';
import {mapAsyncSequential} from '@docusaurus/utils';
+import type {HelmetServerState} from 'react-helmet-async';
export async function build(
siteDir: string,
@@ -149,12 +150,16 @@ async function buildLocale({
);
const allCollectedLinks: {[location: string]: string[]} = {};
+ const headTags: {[location: string]: HelmetServerState} = {};
let serverConfig: Configuration = await createServerConfig({
props,
onLinksCollected: (staticPagePath, links) => {
allCollectedLinks[staticPagePath] = links;
},
+ onHeadTagsCollected: (staticPagePath, tags) => {
+ headTags[staticPagePath] = tags;
+ },
});
if (staticDirectories.length > 0) {
@@ -224,7 +229,11 @@ async function buildLocale({
if (!plugin.postBuild) {
return;
}
- await plugin.postBuild({...props, content: plugin.content});
+ await plugin.postBuild({
+ ...props,
+ head: headTags,
+ content: plugin.content,
+ });
}),
);
diff --git a/packages/docusaurus/src/deps.d.ts b/packages/docusaurus/src/deps.d.ts
index acaccfcebb0c..fe48ab85493b 100644
--- a/packages/docusaurus/src/deps.d.ts
+++ b/packages/docusaurus/src/deps.d.ts
@@ -35,6 +35,8 @@ declare module 'react-loadable-ssr-addon-v5-slorber' {
}
declare module '@slorber/static-site-generator-webpack-plugin' {
+ import type {HelmetServerState} from 'react-helmet-async';
+
export type Locals = {
routesLocation: {[filePath: string]: string};
generatedFilesDir: string;
@@ -42,6 +44,10 @@ declare module '@slorber/static-site-generator-webpack-plugin' {
preBodyTags: string;
postBodyTags: string;
onLinksCollected: (staticPagePath: string, links: string[]) => void;
+ onHeadTagsCollected: (
+ staticPagePath: string,
+ tags: HelmetServerState,
+ ) => void;
baseUrl: string;
ssrTemplate: string;
noIndex: boolean;
diff --git a/packages/docusaurus/src/webpack/server.ts b/packages/docusaurus/src/webpack/server.ts
index 93cb258c3489..fa2472828ed4 100644
--- a/packages/docusaurus/src/webpack/server.ts
+++ b/packages/docusaurus/src/webpack/server.ts
@@ -17,14 +17,16 @@ import {NODE_MAJOR_VERSION, NODE_MINOR_VERSION} from '@docusaurus/utils';
import ssrDefaultTemplate from './templates/ssr.html.template';
// Forked for Docusaurus: https://github.com/slorber/static-site-generator-webpack-plugin
-import StaticSiteGeneratorPlugin from '@slorber/static-site-generator-webpack-plugin';
+import StaticSiteGeneratorPlugin, {
+ type Locals,
+} from '@slorber/static-site-generator-webpack-plugin';
export default async function createServerConfig({
props,
- onLinksCollected = () => {},
-}: {
+ onLinksCollected,
+ onHeadTagsCollected,
+}: Pick & {
props: Props;
- onLinksCollected?: (staticPagePath: string, links: string[]) => void;
}): Promise {
const {
baseUrl,
@@ -73,6 +75,7 @@ export default async function createServerConfig({
preBodyTags,
postBodyTags,
onLinksCollected,
+ onHeadTagsCollected,
ssrTemplate: ssrTemplate ?? ssrDefaultTemplate,
noIndex,
},