Skip to content

Commit

Permalink
Improve sitemap generate performance (#10795)
Browse files Browse the repository at this point in the history
  • Loading branch information
bluwy authored Apr 16, 2024
1 parent b2ef6ab commit 1ce2288
Show file tree
Hide file tree
Showing 5 changed files with 254 additions and 73 deletions.
5 changes: 5 additions & 0 deletions .changeset/famous-seals-camp.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"@astrojs/sitemap": patch
---

Improves performance when generating the sitemap data
94 changes: 60 additions & 34 deletions packages/integrations/sitemap/src/generate-sitemap.ts
Original file line number Diff line number Diff line change
@@ -1,51 +1,77 @@
import type { EnumChangefreq } from 'sitemap';
import type { SitemapItem, SitemapOptions } from './index.js';
import { parseUrl } from './utils/parse-url.js';
import { parseI18nUrl } from './utils/parse-i18n-url.js';

/** Construct sitemap.xml given a set of URLs */
export function generateSitemap(pages: string[], finalSiteUrl: string, opts: SitemapOptions) {
const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts!;
export function generateSitemap(pages: string[], finalSiteUrl: string, opts?: SitemapOptions) {
const { changefreq, priority, lastmod: lastmodSrc, i18n } = opts ?? {};
// TODO: find way to respect <link rel="canonical"> URLs here
const urls = [...pages];
urls.sort((a, b) => a.localeCompare(b, 'en', { numeric: true })); // sort alphabetically so sitemap is same each time

const lastmod = lastmodSrc?.toISOString();

const { locales, defaultLocale } = i18n || {};
const localeCodes = Object.keys(locales || {});
// Parse URLs for i18n matching later
const { defaultLocale, locales } = i18n ?? {};
let getI18nLinks: GetI18nLinks | undefined;
if (defaultLocale && locales) {
getI18nLinks = createGetI18nLinks(urls, defaultLocale, locales, finalSiteUrl);
}

const getPath = (url: string) => {
const result = parseUrl(url, i18n?.defaultLocale || '', localeCodes, finalSiteUrl);
return result?.path;
};
const getLocale = (url: string) => {
const result = parseUrl(url, i18n?.defaultLocale || '', localeCodes, finalSiteUrl);
return result?.locale;
};
const urlData: SitemapItem[] = urls.map((url, i) => ({
url,
links: getI18nLinks?.(i),
lastmod,
priority,
changefreq: changefreq as EnumChangefreq,
}));

return urlData;
}

type GetI18nLinks = (urlIndex: number) => SitemapItem['links'] | undefined;

function createGetI18nLinks(
urls: string[],
defaultLocale: string,
locales: Record<string, string>,
finalSiteUrl: string
): GetI18nLinks {
// `parsedI18nUrls` will have the same length as `urls`, matching correspondingly
const parsedI18nUrls = urls.map((url) => parseI18nUrl(url, defaultLocale, locales, finalSiteUrl));
// Cache as multiple i18n URLs with the same path will have the same links
const i18nPathToLinksCache = new Map<string, SitemapItem['links']>();

const urlData: SitemapItem[] = urls.map((url) => {
let links;
if (defaultLocale && locales) {
const currentPath = getPath(url);
if (currentPath) {
const filtered = urls.filter((subUrl) => getPath(subUrl) === currentPath);
if (filtered.length > 1) {
links = filtered.map((subUrl) => ({
url: subUrl,
lang: locales[getLocale(subUrl)!],
}));
}
return (urlIndex) => {
const i18nUrl = parsedI18nUrls[urlIndex];
if (!i18nUrl) {
return undefined;
}

const cached = i18nPathToLinksCache.get(i18nUrl.path);
if (cached) {
return cached;
}

// Find all URLs with the same path (without the locale part), e.g. /en/foo and /es/foo
const links: NonNullable<SitemapItem['links']> = [];
for (let i = 0; i < parsedI18nUrls.length; i++) {
const parsed = parsedI18nUrls[i];
if (parsed?.path === i18nUrl.path) {
links.push({
url: urls[i],
lang: locales[parsed.locale],
});
}
}

return {
url,
links,
lastmod,
priority,
changefreq: changefreq as EnumChangefreq,
};
});
// If 0 or 1 (which is itself), return undefined to not create any links.
// We also don't need to cache this as we know there's no other URLs that would've match this.
if (links.length <= 1) {
return undefined;
}

return urlData;
i18nPathToLinksCache.set(i18nUrl.path, links);
return links;
};
}
42 changes: 42 additions & 0 deletions packages/integrations/sitemap/src/utils/parse-i18n-url.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
interface ParsedI18nUrl {
locale: string;
path: string;
}

// NOTE: The parameters have been schema-validated with Zod
export function parseI18nUrl(
url: string,
defaultLocale: string,
locales: Record<string, string>,
base: string
): ParsedI18nUrl | undefined {
if (!url.startsWith(base)) {
return undefined;
}

let s = url.slice(base.length);

// Handle root URL
if (!s || s === '/') {
return { locale: defaultLocale, path: '/' };
}

if (s[0] !== '/') {
s = '/' + s;
}

// Get locale from path, e.g.
// "/en-US/" -> "en-US"
// "/en-US/foo" -> "en-US"
const locale = s.split('/')[1];
if (locale in locales) {
// "/en-US/foo" -> "/foo"
let path = s.slice(1 + locale.length);
if (!path) {
path = '/';
}
return { locale, path };
}

return { locale: defaultLocale, path: s };
}
39 changes: 0 additions & 39 deletions packages/integrations/sitemap/src/utils/parse-url.ts

This file was deleted.

147 changes: 147 additions & 0 deletions packages/integrations/sitemap/test/units/generate-sitemap.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,147 @@
import assert from 'node:assert/strict';
import { describe, it } from 'node:test';
import { generateSitemap } from '../../dist/generate-sitemap.js';

const site = 'http://example.com';

describe('generateSitemap', () => {
describe('basic', () => {
it('works', () => {
const items = generateSitemap(
[
// All pages
`${site}/a`,
`${site}/b`,
`${site}/c`,
],
site
);
assert.equal(items.length, 3);
assert.equal(items[0].url, `${site}/a`);
assert.equal(items[1].url, `${site}/b`);
assert.equal(items[2].url, `${site}/c`);
});

it('sorts the items', () => {
const items = generateSitemap(
[
// All pages
`${site}/c`,
`${site}/a`,
`${site}/b`,
],
site
);
assert.equal(items.length, 3);
assert.equal(items[0].url, `${site}/a`);
assert.equal(items[1].url, `${site}/b`);
assert.equal(items[2].url, `${site}/c`);
});

it('sitemap props are passed to items', () => {
const now = new Date();
const items = generateSitemap(
[
// All pages
`${site}/a`,
`${site}/b`,
`${site}/c`,
],
site,
{
changefreq: 'monthly',
lastmod: now,
priority: 0.5,
}
);

assert.equal(items.length, 3);

assert.equal(items[0].url, `${site}/a`);
assert.equal(items[0].changefreq, 'monthly');
assert.equal(items[0].lastmod, now.toISOString());
assert.equal(items[0].priority, 0.5);

assert.equal(items[1].url, `${site}/b`);
assert.equal(items[1].changefreq, 'monthly');
assert.equal(items[1].lastmod, now.toISOString());
assert.equal(items[1].priority, 0.5);

assert.equal(items[2].url, `${site}/c`);
assert.equal(items[2].changefreq, 'monthly');
assert.equal(items[2].lastmod, now.toISOString());
assert.equal(items[2].priority, 0.5);
});
});

describe('i18n', () => {
it('works', () => {
const items = generateSitemap(
[
// All pages
`${site}/a`,
`${site}/b`,
`${site}/c`,
`${site}/es/a`,
`${site}/es/b`,
`${site}/es/c`,
`${site}/fr/a`,
`${site}/fr/b`,
// `${site}/fr-CA/c`, (intentionally missing for testing)
],
site,
{
i18n: {
defaultLocale: 'en',
locales: {
en: 'en-US',
es: 'es-ES',
fr: 'fr-CA',
},
},
}
);

assert.equal(items.length, 8);

const aLinks = [
{ url: `${site}/a`, lang: 'en-US' },
{ url: `${site}/es/a`, lang: 'es-ES' },
{ url: `${site}/fr/a`, lang: 'fr-CA' },
];
const bLinks = [
{ url: `${site}/b`, lang: 'en-US' },
{ url: `${site}/es/b`, lang: 'es-ES' },
{ url: `${site}/fr/b`, lang: 'fr-CA' },
];
const cLinks = [
{ url: `${site}/c`, lang: 'en-US' },
{ url: `${site}/es/c`, lang: 'es-ES' },
];

assert.equal(items[0].url, `${site}/a`);
assert.deepEqual(items[0].links, aLinks);

assert.equal(items[1].url, `${site}/b`);
assert.deepEqual(items[1].links, bLinks);

assert.equal(items[2].url, `${site}/c`);
assert.deepEqual(items[2].links, cLinks);

assert.equal(items[3].url, `${site}/es/a`);
assert.deepEqual(items[3].links, aLinks);

assert.equal(items[4].url, `${site}/es/b`);
assert.deepEqual(items[4].links, bLinks);

assert.equal(items[5].url, `${site}/es/c`);
assert.deepEqual(items[5].links, cLinks);

assert.equal(items[6].url, `${site}/fr/a`);
assert.deepEqual(items[6].links, aLinks);

assert.equal(items[7].url, `${site}/fr/b`);
assert.deepEqual(items[7].links, bLinks);
});
});
});

0 comments on commit 1ce2288

Please sign in to comment.