Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

core(duplicated-javascript): new audit #10314

Merged
merged 25 commits into from
Mar 6, 2020
Merged
Show file tree
Hide file tree
Changes from 18 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
224 changes: 224 additions & 0 deletions lighthouse-core/audits/byte-efficiency/bundle-duplication.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,224 @@
/**
* @license Copyright 2020 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
'use strict';

const ByteEfficiencyAudit = require('./byte-efficiency-audit.js');
const JavascriptDuplication = require('../../computed/javascript-duplication.js');
const i18n = require('../../lib/i18n/i18n.js');

const UIStrings = {
/** Imperative title of a Lighthouse audit that tells the user to remove duplicate JavaScript from their code. This is displayed in a list of audit titles that Lighthouse generates. */
title: 'Remove duplicate modules in JavaScript bundles',
/** Description of a Lighthouse audit that tells the user *why* they should defer loading any content in CSS that isn’t needed at page load. This is displayed after a user expands the section to see more. No word length limits. 'Learn More' becomes link text to additional documentation. */
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
/** Description of a Lighthouse audit that tells the user *why* they should defer loading any content in CSS that isn’t needed at page load. This is displayed after a user expands the section to see more. No word length limits. 'Learn More' becomes link text to additional documentation. */
/** Description of a Lighthouse audit that tells the user *why* they should remote duplicate JavaScript from their scripts. This is displayed after a user expands the section to see more. No word length limits. 'Learn More' becomes link text to additional documentation. */

description: 'Remove large, duplicate JavaScript modules from bundles ' +
'to reduce unnecessary bytes consumed by network activity. ', // +
// TODO: we need docs.
// '[Learn more](https://web.dev/bundle-duplication).',
};

const str_ = i18n.createMessageInstanceIdFn(__filename, UIStrings);

const IGNORE_THRESHOLD_IN_BYTES = 1024;

/**
* @param {string} haystack
* @param {string} needle
* @param {number} startPosition
*/
function indexOfOrEnd(haystack, needle, startPosition = 0) {
const index = haystack.indexOf(needle, startPosition);
return index === -1 ? haystack.length : index;
}

class BundleDuplication extends ByteEfficiencyAudit {
/**
* @return {LH.Audit.Meta}
*/
static get meta() {
return {
id: 'bundle-duplication',
title: str_(UIStrings.title),
description: str_(UIStrings.description),
scoreDisplayMode: ByteEfficiencyAudit.SCORING_MODES.NUMERIC,
requiredArtifacts: ['devtoolsLogs', 'traces', 'SourceMaps', 'ScriptElements'],
};
}

/**
* @param {string} source
*/
static _getNodeModuleName(source) {
const sourceSplit = source.split('node_modules/');
source = sourceSplit[sourceSplit.length - 1];

const indexFirstSlash = indexOfOrEnd(source, '/');
if (source[0] === '@') {
return source.slice(0, indexOfOrEnd(source, '/', indexFirstSlash + 1));
}

return source.slice(0, indexFirstSlash);
}

/**
* @param {LH.Artifacts} artifacts
* @param {LH.Audit.Context} context
*/
static async _getDuplicationGroupedByNodeModules(artifacts, context) {
const duplication = await JavascriptDuplication.request(artifacts, context);

/** @type {typeof duplication} */
const groupedDuplication = new Map();
for (const [source, sourceDatas] of duplication.entries()) {
if (!source.includes('node_modules')) {
groupedDuplication.set(source, sourceDatas);
continue;
}

const normalizedSource = 'node_modules/' + BundleDuplication._getNodeModuleName(source);
const aggregatedSourceDatas = groupedDuplication.get(normalizedSource) || [];
for (const {scriptUrl, size} of sourceDatas) {
let sourceData = aggregatedSourceDatas.find(d => d.scriptUrl === scriptUrl);
if (!sourceData) {
sourceData = {scriptUrl, size: 0};
aggregatedSourceDatas.push(sourceData);
}
sourceData.size += size;
}
groupedDuplication.set(normalizedSource, aggregatedSourceDatas);
}

for (const sourceDatas of duplication.values()) {
sourceDatas.sort((a, b) => b.size - a.size);
}

return groupedDuplication;
}

/**
* This audit highlights JavaScript modules that appear to be duplicated across all resources,
* either within the same bundle or between different bundles. Each details item returned is
* a module with subrows for each resource that includes it. The wastedBytes for the details
* item is the number of bytes occupied by the sum of all but the largest copy of the module.
* wastedBytesByUrl attributes the cost of the bytes to a specific resource, for use by lantern.
* @param {LH.Artifacts} artifacts
* @param {Array<LH.Artifacts.NetworkRequest>} networkRecords
* @param {LH.Audit.Context} context
* @return {Promise<ByteEfficiencyAudit.ByteEfficiencyProduct>}
*/
static async audit_(artifacts, networkRecords, context) {
connorjclark marked this conversation as resolved.
Show resolved Hide resolved
const ignoreThresholdInBytes =
context.options && context.options.ignoreThresholdInBytes || IGNORE_THRESHOLD_IN_BYTES;

const duplication =
await BundleDuplication._getDuplicationGroupedByNodeModules(artifacts, context);

/**
* @typedef ItemSubrows
* @property {string[]} urls
* @property {number[]} sourceBytes
*/

/**
* @typedef {LH.Audit.ByteEfficiencyItem & ItemSubrows} Item
*/

/** @type {Item[]} */
const items = [];

/** @type {Map<string, number>} */
const wastedBytesByUrl = new Map();
for (const [source, sourceDatas] of duplication.entries()) {
// One copy of this module is treated as the canonical version - the rest will have
// non-zero `wastedBytes`. In the case of all copies being the same version, all sizes are
// equal and the selection doesn't matter. When the copies are different versions, it does
// matter. Ideally the newest version would be the canonical copy, but version information
// is not present. Instead, size is used as a heuristic for latest version. This makes the
// audit conserative in its estimation.

const urls = [];
const bytesValues = [];
let wastedBytesTotal = 0;
for (let i = 0; i < sourceDatas.length; i++) {
const sourceData = sourceDatas[i];
const url = sourceData.scriptUrl;
urls.push(url);
bytesValues.push(sourceData.size);
if (i === 0) continue;
wastedBytesTotal += sourceData.size;
wastedBytesByUrl.set(url, (wastedBytesByUrl.get(url) || 0) + sourceData.size);
}

items.push({
source,
wastedBytes: wastedBytesTotal,
// Not needed, but keeps typescript happy.
url: '',
// Not needed, but keeps typescript happy.
totalBytes: 0,
urls,
sourceBytes: bytesValues,
});
}

/** @type {Item} */
const otherItem = {
source: 'Other',
wastedBytes: 0,
url: '',
totalBytes: 0,
urls: [],
sourceBytes: [],
};
for (const item of items.filter(item => item.wastedBytes <= ignoreThresholdInBytes)) {
otherItem.wastedBytes += item.wastedBytes;
for (let i = 0; i < item.urls.length; i++) {
const url = item.urls[i];
if (!otherItem.urls.includes(url)) {
otherItem.urls.push(url);
}
}
items.splice(items.indexOf(item), 1);
}
if (otherItem.wastedBytes > ignoreThresholdInBytes) {
items.push(otherItem);
}

// Convert bytes to transfer size estimation.
for (const [url, bytes] of wastedBytesByUrl.entries()) {
patrickhulce marked this conversation as resolved.
Show resolved Hide resolved
const networkRecord = networkRecords.find(n => n.url === url);
const script = artifacts.ScriptElements.find(script => script.src === url);
if (!networkRecord || !script || script.content === null) {
// ?
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Suggested change
// ?
// this should never happen because we found the content from scripts that were bundles with source maps

could they have been inline scripts with a source map comment though? maybe we should fallback to the main resource for estimation which is what we did normally for estimateTransferSize (in unused css for example)

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah that's exactly what we should do for inline bundles.

continue;
}

const contentLength = script.content.length;
const transferSize =
ByteEfficiencyAudit.estimateTransferSize(networkRecord, contentLength, 'Script');
const transferRatio = transferSize / contentLength;
wastedBytesByUrl.set(url, bytes * transferRatio);
}

/** @type {LH.Audit.Details.OpportunityColumnHeading[]} */
const headings = [
/* eslint-disable max-len */
{key: 'source', valueType: 'code', subRows: {key: 'urls', valueType: 'url'}, label: str_(i18n.UIStrings.columnName)}, // TODO: or 'Source'?
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

let's decide this Source bit now? I'm fine with either, personally I don't think any sufficiently short name can completely capture what we're surfacing here, but source probably edges out a little bit

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I would actually propose module today.

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I guess source is better since this can be a module or a package name

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

yeah source now that we've got the awesome node_module business, sgtm 👍

{key: '_', valueType: 'bytes', subRows: {key: 'sourceBytes'}, granularity: 0.05, label: str_(i18n.UIStrings.columnSize)},
{key: 'wastedBytes', valueType: 'bytes', granularity: 0.05, label: str_(i18n.UIStrings.columnWastedBytes)},
/* eslint-enable max-len */
];

// TODO: show warning somewhere if no source maps.
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

or maybe notApplicable if nothing looks bundleish?

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that seems fine, but gotta wire that to the parent class ...

we good with that? n/a if bundles.length === 0

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

ah if we gotta wire it let's just punt then, this has grown enough :)

return {
items,
headings,
wastedBytesByUrl,
};
}
}

module.exports = BundleDuplication;
module.exports.UIStrings = UIStrings;
22 changes: 13 additions & 9 deletions lighthouse-core/audits/byte-efficiency/byte-efficiency-audit.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ const WASTED_MS_FOR_SCORE_OF_ZERO = 5000;
/**
* @typedef {object} ByteEfficiencyProduct
* @property {Array<LH.Audit.ByteEfficiencyItem>} items
* @property {Map<string, number>=} wastedBytesByUrl
* @property {LH.Audit.Details.Opportunity['headings']} headings
* @property {string} [displayValue]
* @property {string} [explanation]
Expand Down Expand Up @@ -135,7 +136,7 @@ class UnusedBytes extends Audit {
* @param {Array<LH.Audit.ByteEfficiencyItem>} results The array of byte savings results per resource
* @param {Node} graph
* @param {Simulator} simulator
* @param {{includeLoad?: boolean, label?: string}=} options
* @param {{includeLoad?: boolean, label?: string, providedWastedBytesByUrl?: Map<string, number>}=} options
* @return {number}
*/
static computeWasteWithTTIGraph(results, graph, simulator, options) {
Expand All @@ -144,24 +145,25 @@ class UnusedBytes extends Audit {
const afterLabel = `${options.label}-after`;

const simulationBeforeChanges = simulator.simulate(graph, {label: beforeLabel});
/** @type {Map<string, LH.Audit.ByteEfficiencyItem>} */
const resultsByUrl = new Map();
for (const result of results) {
resultsByUrl.set(result.url, result);

const wastedBytesByUrl = options.providedWastedBytesByUrl || new Map();
if (!options.providedWastedBytesByUrl) {
for (const {url, wastedBytes} of results) {
wastedBytesByUrl.set(url, (wastedBytesByUrl.get(url) || 0) + wastedBytes);
}
}

// Update all the transfer sizes to reflect implementing our recommendations
/** @type {Map<string, number>} */
const originalTransferSizes = new Map();
graph.traverse(node => {
if (node.type !== 'network') return;
const result = resultsByUrl.get(node.record.url);
if (!result) return;
const wastedBytes = wastedBytesByUrl.get(node.record.url);
if (!wastedBytes) return;

const original = node.record.transferSize;
originalTransferSizes.set(node.record.requestId, original);

const wastedBytes = result.wastedBytes;
node.record.transferSize = Math.max(original - wastedBytes, 0);
});

Expand Down Expand Up @@ -197,7 +199,9 @@ class UnusedBytes extends Audit {

const wastedBytes = results.reduce((sum, item) => sum + item.wastedBytes, 0);
const wastedKb = Math.round(wastedBytes / KB_IN_BYTES);
const wastedMs = this.computeWasteWithTTIGraph(results, graph, simulator);
const wastedMs = this.computeWasteWithTTIGraph(results, graph, simulator, {
providedWastedBytesByUrl: result.wastedBytesByUrl,
});

let displayValue = result.displayValue || '';
if (typeof result.displayValue === 'undefined' && wastedBytes) {
Expand Down
104 changes: 104 additions & 0 deletions lighthouse-core/computed/javascript-duplication.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
/**
* @license Copyright 2020 Google Inc. All Rights Reserved.
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. You may obtain a copy of the License at http://www.apache.org/licenses/LICENSE-2.0
* Unless required by applicable law or agreed to in writing, software distributed under the License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License.
*/
'use strict';

const makeComputedArtifact = require('./computed-artifact.js');
const JsBundles = require('./js-bundles.js');

class JavascriptDuplication {
/**
* @param {string} source
*/
static _normalizeSource(source) {
// Trim trailing question mark - b/c webpack.
source = source.replace(/\?$/, '');

// Normalize paths for dependencies by only keeping everything after the last `node_modules`.
const lastNodeModulesIndex = source.lastIndexOf('node_modules');
if (lastNodeModulesIndex !== -1) {
source = source.substring(lastNodeModulesIndex);
}

return source;
}

/**
* @param {string} source
*/
static _shouldIgnoreSource(source) {
// Ignore bundle overhead.
if (source.includes('webpack/bootstrap')) return true;
if (source.includes('(webpack)/buildin')) return true;
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

haha they really call this buildin instead of builtin? I'm sure there's a story there but that's funny 😆

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

just curious did you take a look at any sites built with parcel for their overhead names?

https://lhci-canary.herokuapp.com/app/projects/d1e4b15c-e644-4552-b136-e975f486a2ce/dashboard should have sourcemaps and multiple bundles for future examples if you need one

Copy link
Collaborator Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  JSBundles:error https://lhci-canary.herokuapp.com/app/entry.b99b2aba.js.map mapping for last column out of bounds: 1:1070 +21ms

:o

Copy link
Collaborator Author

@connorjclark connorjclark Mar 5, 2020

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

  1. js-bundles doesn't propagate these errors to consumers so the bundle dupe audit just says everything is green. boo.
  2. here are all the parcel modules that i found in maps on that page
"../../../../node_modules/parcel-bundler/src/builtins/bundle-url.js",
"../../../../node_modules/parcel-bundler/src/builtins/bundle-loader.js",
"../../../../node_modules/parcel-bundler/src/builtins/loaders/browser/css-loader.js",
"../../../../node_modules/parcel-bundler/src/builtins/loaders/browser/js-loader.js"
"../../../../node_modules/parcel-bundler/src/builtins/bundle-url.js",
"../../../../node_modules/parcel-bundler/src/builtins/bundle-loader.js",
"../../../../node_modules/parcel-bundler/src/builtins/loaders/browser/css-loader.js",
"../../../../node_modules/parcel-bundler/src/builtins/loaders/browser/js-loader.js"

FWIW, I audited my own project that uses parcel and got none of these

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

interesting thanks for looking into ti! we can followup here if we end up seeing this in practice later


// Ignore shims.
connorjclark marked this conversation as resolved.
Show resolved Hide resolved
if (source.includes('external ')) return true;

return false;
}

/**
* @param {LH.Artifacts} artifacts
* @param {LH.Audit.Context} context
*/
static async compute_(artifacts, context) {
const bundles = await JsBundles.request(artifacts, context);

/**
* @typedef SourceData
* @property {string} source
* @property {number} size
*/

/** @type {Map<LH.Artifacts.RawSourceMap, SourceData[]>} */
const sourceDatasMap = new Map();

// Determine size of each `sources` entry.
for (const {rawMap, sizes} of bundles) {
/** @type {SourceData[]} */
const sourceDataArray = [];
sourceDatasMap.set(rawMap, sourceDataArray);

for (let i = 0; i < rawMap.sources.length; i++) {
if (this._shouldIgnoreSource(rawMap.sources[i])) continue;

const sourceKey = (rawMap.sourceRoot || '') + rawMap.sources[i];
const sourceSize = sizes.files[sourceKey];
sourceDataArray.push({
source: JavascriptDuplication._normalizeSource(rawMap.sources[i]),
size: sourceSize,
});
}
}

/** @type {Map<string, Array<{scriptUrl: string, size: number}>>} */
const sourceDataAggregated = new Map();
for (const {rawMap, script} of bundles) {
const sourceDataArray = sourceDatasMap.get(rawMap);
if (!sourceDataArray) continue;

for (const sourceData of sourceDataArray) {
let data = sourceDataAggregated.get(sourceData.source);
if (!data) {
data = [];
sourceDataAggregated.set(sourceData.source, data);
}
data.push({
scriptUrl: script.src || '',
size: sourceData.size,
});
}
}

for (const [key, value] of sourceDataAggregated.entries()) {
patrickhulce marked this conversation as resolved.
Show resolved Hide resolved
if (value.length === 1) sourceDataAggregated.delete(key);
else value.sort((a, b) => b.size - a.size);
}

return sourceDataAggregated;
}
}

module.exports = makeComputedArtifact(JavascriptDuplication);
Loading