Skip to content

Commit

Permalink
Sitemap rewrite first commit (experimental)
Browse files Browse the repository at this point in the history
  • Loading branch information
NJKode committed Apr 8, 2021
1 parent 9ad6d57 commit 2ec6f5b
Show file tree
Hide file tree
Showing 6 changed files with 384 additions and 137 deletions.
7 changes: 5 additions & 2 deletions cli.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,9 @@ const commands = {
'clean': createCommand(runner.clean, ['dest']),
'clone-assets': createCommand(runner.clone_assets, ['baseurl', 'dest']),
'reseed': createCommand(runner.build, ['baseurl', 'dest']),
'rewrite_css': createCommand(runner.rewrite_css, ['baseurl', 'dest']),
'rewrite-css': createCommand(runner.rewrite_css, ['baseurl', 'dest']),
'rewrite-html': createCommand(runner.rewrite_html, ['baseurl', 'dest']),
'rewrite-sitemap': createCommand(runner.rewrite_sitemap, ['baseurl', 'dest']),
'serve': createCommand(runner.buildAndServe, ['baseurl', 'dest']),
'watch': createCommand(runner.watch, ['baseurl', 'dest'])
/* eslint-enable quote-props */
Expand Down Expand Up @@ -93,6 +94,7 @@ module.exports = {
const source = flags.source || defaultSrc;
const destination = flags.dest;
const baseurl = flags.baseurl || '';
const sitemap = flags.sitemap || 'sitemap.xml';
const port = this.checkPortNumber(flags.port) || defaultPort;
const split = flags.split || 1;
const partition = flags.partition || 1;
Expand All @@ -104,7 +106,8 @@ module.exports = {
paths: {
src: source,
dest: destination,
baseurl: baseurl
baseurl: baseurl,
sitemap: sitemap
},
serve: {
port: port,
Expand Down
6 changes: 6 additions & 0 deletions index.js
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ Commands:
clone-assets Clones non CSS and HTML files from src to dest
rewrite-css Clones CSS files from src to dest and rewrites urls to include baseurl
rewrite-html Clones HTML files from src to dest and rewrites attributes to include baseurl
rewrite-sitemap Finds all sitemap files based on index sitemap, and rewrites links to include baseurl
serve Runs 'build' then a local webserver on the dest folder
watch Watches the src folder and triggers builds
Expand All @@ -23,6 +24,7 @@ Options:
-b | --baseurl The base-URL to prepend to the files once copied
-p | --port The portnumber to serve the cloned site on
-e | --extrasrc A list of extra src attributes to be rewritten
-m | --sitemap A path to a valid sitemap file
-o | --overwrite When cleaning --dest, don't prompt for confirmation
--split The number of partitions to divide files into
--partition The partition number to process
Expand Down Expand Up @@ -57,6 +59,10 @@ const inputs = meow(
alias: 'e',
isMultiple: true
},
sitemap: {
type: 'string',
alias: 'm'
},
overwrite: {
type: 'boolean',
alias: 'o'
Expand Down
76 changes: 76 additions & 0 deletions lib/processors/sitemap.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
const fs = require('fs-extra');
const cheerio = require('cheerio');
const log = require('fancy-log');
const { URL } = require('url');

let extraPaths = [];

function rewritePath(baseurl, link) {
try {
const url = new URL(link);
const newPath = `${baseurl}/${url.pathname}`.replace('//', '/');
return `${url.origin}/${newPath}`;
} catch (urlError) {
return link;
}
}

function rewriteXML(xml, baseurl) {
const $ = cheerio.load(xml, {
xmlMode: true
});

const rootNode = $.root().children()[0];
const isIndex = rootNode && rootNode.name === 'sitemapindex';

$('loc').each(function () {
const $el = $(this);
const originalValue = $el.text();
const updated = rewritePath(baseurl, originalValue);
$el.text(updated);

if (isIndex && $el.parent()[0].name === 'sitemap') {
extraPaths.push(originalValue);
}
});

$('xhtml\\:link').each(function () {
const $el = $(this);
const originalValue = $el.attr('href');
const updated = rewritePath(baseurl, originalValue);
$el.attr('href', updated);
});
return $.xml();
}

module.exports = {
rewrite: rewriteXML,

/** Handles rewriting urls in sitemap(s)
*
* @param {string} file the absolute path to the sitemap file.
* @param {string} destination the absolute path to the destination directory.
* @param {string} baseurl the baseurl to prepend to the source files.
*/
plugin: function (file, destination, baseurl) {
extraPaths = [];
if (!file) {
log.error('Error rewriting XML: Invalid file specified.');
return 1;
}
if (!destination || !baseurl) {
log.error('Error rewriting XML: No destination specified.');
return 1;
}
const contents = fs.readFileSync(file);
const xml = contents.toString('utf-8');

if (!xml) {
return 0;
}
const rewritten = rewriteXML(xml, baseurl);

fs.writeFileSync(file, rewritten);
return extraPaths.length ? extraPaths : 0;
}
};
Loading

0 comments on commit 2ec6f5b

Please sign in to comment.