Skip to content

Commit

Permalink
Sitemap should only exclude 404 and 500 pages (#7655)
Browse files Browse the repository at this point in the history
* fix(#7472): sitemap should only exclude 404 and 500 pages

* chore: refactor logic, add test
  • Loading branch information
natemoo-re authored Jul 14, 2023
1 parent 795d598 commit c258492
Show file tree
Hide file tree
Showing 7 changed files with 60 additions and 29 deletions.
5 changes: 5 additions & 0 deletions .changeset/chatty-pigs-film.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
'@astrojs/sitemap': minor
---

Ensure sitemap only excludes numerical pages matching `/404` and `/500` exactly
4 changes: 1 addition & 3 deletions packages/integrations/sitemap/src/generate-sitemap.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,13 +2,11 @@ import type { EnumChangefreq } from 'sitemap';
import type { SitemapItem, SitemapOptions } from './index.js';
import { parseUrl } from './utils/parse-url.js';

const STATUS_CODE_PAGE_REGEXP = /\/[0-9]{3}\/?$/;

/** Construct sitemap.xml given a set of URLs */
export function generateSitemap(pages: string[], finalSiteUrl: string, opts: SitemapOptions) {
const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts!;
// TODO: find way to respect <link rel="canonical"> URLs here
const urls = [...pages].filter((url) => !STATUS_CODE_PAGE_REGEXP.test(url));
const urls = [...pages];
urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time

const lastmod = lastmodSrc?.toISOString();
Expand Down
40 changes: 21 additions & 19 deletions packages/integrations/sitemap/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -22,24 +22,24 @@ export type LinkItem = LinkItemBase;

export type SitemapOptions =
| {
filter?(page: string): boolean;
customPages?: string[];

i18n?: {
defaultLocale: string;
locales: Record<string, string>;
};
// number of entries per sitemap file
entryLimit?: number;

// sitemap specific
changefreq?: ChangeFreq;
lastmod?: Date;
priority?: number;

// called for each sitemap item just before to save them on disk, sync or async
serialize?(item: SitemapItem): SitemapItem | Promise<SitemapItem | undefined> | undefined;
}
filter?(page: string): boolean;
customPages?: string[];

i18n?: {
defaultLocale: string;
locales: Record<string, string>;
};
// number of entries per sitemap file
entryLimit?: number;

// sitemap specific
changefreq?: ChangeFreq;
lastmod?: Date;
priority?: number;

// called for each sitemap item just before to save them on disk, sync or async
serialize?(item: SitemapItem): SitemapItem | Promise<SitemapItem | undefined> | undefined;
}
| undefined;

function formatConfigErrorMessage(err: ZodError) {
Expand All @@ -49,6 +49,7 @@ function formatConfigErrorMessage(err: ZodError) {

const PKG_NAME = '@astrojs/sitemap';
const OUTFILE = 'sitemap-index.xml';
const STATUS_CODE_PAGES = new Set(['/404', '/500']);

const createPlugin = (options?: SitemapOptions): AstroIntegration => {
let config: AstroConfig;
Expand Down Expand Up @@ -85,7 +86,7 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => {
return;
}

let pageUrls = pages.map((p) => {
let pageUrls = pages.filter((p) => !STATUS_CODE_PAGES.has('/' + p.pathname.slice(0, -1))).map((p) => {
if (p.pathname !== '' && !finalSiteUrl.pathname.endsWith('/'))
finalSiteUrl.pathname += '/';
const path = finalSiteUrl.pathname + p.pathname;
Expand All @@ -97,6 +98,7 @@ const createPlugin = (options?: SitemapOptions): AstroIntegration => {
* Dynamic URLs have entries with `undefined` pathnames
*/
if (r.pathname) {
if (STATUS_CODE_PAGES.has(r.pathname)) return urls;
/**
* remove the initial slash from relative pathname
* because `finalSiteUrl` always has trailing slash
Expand Down
4 changes: 2 additions & 2 deletions packages/integrations/sitemap/test/filter.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ describe('Filter support', () => {
root: './fixtures/static/',
integrations: [
sitemap({
filter: (page) => page !== 'http://example.com/two/',
filter: (page) => page === 'http://example.com/one/',
}),
],
});
Expand All @@ -32,7 +32,7 @@ describe('Filter support', () => {
root: './fixtures/ssr/',
integrations: [
sitemap({
filter: (page) => page !== 'http://example.com/two/',
filter: (page) => page === 'http://example.com/one/',
}),
],
});
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<html>
<head>
<title>123</title>
</head>
<body>
<h1>123</h1>
</body>
</html>
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
<html>
<head>
<title>404</title>
</head>
<body>
<h1>404</h1>
</body>
</html>
20 changes: 15 additions & 5 deletions packages/integrations/sitemap/test/staticPaths.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,19 +4,29 @@ import { expect } from 'chai';
describe('getStaticPaths support', () => {
/** @type {import('./test-utils.js').Fixture} */
let fixture;
/** @type {string[]} */
let urls;

before(async () => {
fixture = await loadFixture({
root: './fixtures/static/',
});
await fixture.build();
});

it('getStaticPath pages require zero config', async () => {
const data = await readXML(fixture.readFile('/sitemap-0.xml'));
const urls = data.urlset.url;
urls = data.urlset.url.map(url => url.loc[0]);
});

expect(urls[0].loc[0]).to.equal('http://example.com/one/');
expect(urls[1].loc[0]).to.equal('http://example.com/two/');
it('requires zero config for getStaticPaths', async () => {
expect(urls).to.include('http://example.com/one/');
expect(urls).to.include('http://example.com/two/');
});

it('does not include 404 pages', () => {
expect(urls).to.not.include('http://example.com/404/');
});

it('includes numerical pages', () => {
expect(urls).to.include('http://example.com/123/');
})
});

0 comments on commit c258492

Please sign in to comment.