Skip to content

Commit

Permalink
[CRX] Add fallback PDF detection for Chrome 127-
Browse files Browse the repository at this point in the history
  • Loading branch information
Rob--W committed Sep 8, 2024
1 parent 3fe256c commit 4327502
Show file tree
Hide file tree
Showing 3 changed files with 58 additions and 2 deletions.
22 changes: 22 additions & 0 deletions extensions/chromium/contentscript.js
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,9 @@ function getViewerURL(pdf_url) {
}

document.addEventListener("animationstart", onAnimationStart, true);
if (document.contentType === "application/pdf") {
chrome.runtime.sendMessage({ action: "canRequestBody" }, maybeRenderPdfDoc);
}

function onAnimationStart(event) {
if (event.animationName === "pdfjs-detected-object-or-embed") {
Expand Down Expand Up @@ -221,3 +224,22 @@ function getEmbeddedViewerURL(path) {
path = a.href;
return getViewerURL(path) + fragment;
}

function maybeRenderPdfDoc(isNotPOST) {
if (!isNotPOST) {
// The document was loaded through a POST request, but we cannot access the
// original response body, nor safely send a new request to fetch the PDF.
// Until #4483 is fixed, POST requests should be ignored.
return;
}

// Detected PDF that was not redirected by the declarativeNetRequest rules.
// Maybe because this was served without Content-Type and sniffed as PDF.
// Or because this is Chrome 127-, which does not support responseHeaders
// condition in declarativeNetRequest (DNR), and PDF requests are therefore
// not redirected via DNR.

// In any case, load the viewer.
console.log(`Detected PDF via document, opening viewer for ${document.URL}`);
location.href = getEmbeddedViewerURL(document.URL);
}
9 changes: 8 additions & 1 deletion extensions/chromium/pdfHandler.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,8 @@ See the License for the specific language governing permissions and
limitations under the License.
*/

/* globals canRequestBody */ // From preserve-referer.js

"use strict";

var VIEWER_URL = chrome.runtime.getURL("content/web/viewer.html");
Expand Down Expand Up @@ -202,7 +204,7 @@ async function registerPdfRedirectRule() {
// https://github.com/w3c/webextensions/issues/638#issuecomment-2181124486
//
// We do not bother with detecting that because we fall back to catching
// PDF documents via the content script.
// PDF documents via maybeRenderPdfDoc in contentscript.js.
} catch (e) {
console.error("Failed to register rules to redirect PDF requests.");
console.error(e);
Expand Down Expand Up @@ -303,6 +305,11 @@ chrome.runtime.onMessage.addListener(function (message, sender, sendResponse) {
url,
});
}
return undefined;
}
if (message && message.action === "canRequestBody") {
sendResponse(canRequestBody(sender.tab.id, sender.frameId));
return undefined;
}
return undefined;
});
29 changes: 28 additions & 1 deletion extensions/chromium/preserve-referer.js
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,8 @@ limitations under the License.
* See setReferer in chromecom.js for more explanation of this logic.
*/

/* exported canRequestBody */ // Used in pdfHandler.js

// g_referrers[tabId][frameId] = referrer of PDF frame.
var g_referrers = {};
var g_referrerTimers = {};
Expand All @@ -38,14 +40,18 @@ var g_referrerTimers = {};
// from being kept in memory for too long, cap the data duration to 5 minutes.
var REFERRER_IN_MEMORY_TIME = 300000;

// g_postRequests[tabId] = Set of frameId that were loaded via POST.
var g_postRequests = {};

var rIsReferer = /^referer$/i;
chrome.webRequest.onSendHeaders.addListener(
function saveReferer(details) {
const { tabId, frameId, requestHeaders } = details;
const { tabId, frameId, requestHeaders, method } = details;
g_referrers[tabId] ??= {};
g_referrers[tabId][frameId] = requestHeaders.find(h =>
rIsReferer.test(h.name)
)?.value;
setCanRequestBody(tabId, frameId, method !== "GET");
forgetReferrerEventually(tabId);
},
{ urls: ["*://*/*"], types: ["main_frame", "sub_frame"] },
Expand All @@ -59,9 +65,30 @@ function forgetReferrerEventually(tabId) {
g_referrerTimers[tabId] = setTimeout(() => {
delete g_referrers[tabId];
delete g_referrerTimers[tabId];
delete g_postRequests[tabId];
}, REFERRER_IN_MEMORY_TIME);
}

// Keeps track of whether a document in tabId + frameId is loaded through a
// POST form submission. Although this logic has nothing to do with referrer
// tracking, it is still here to enable re-use of the webRequest listener above.
function setCanRequestBody(tabId, frameId, isPOST) {
if (isPOST) {
g_postRequests[tabId] ??= new Set();
g_postRequests[tabId].add(frameId);
} else {
g_postRequests[tabId]?.delete(frameId);
}
}

function canRequestBody(tabId, frameId) {
// Returns true unless the frame is known to be loaded through a POST request.
// If the background suspends, the information may be lost. This is acceptable
// because the information is only potentially needed shortly after document
// load, by contentscript.js.
return !g_postRequests[tabId]?.has(frameId);
}

// This method binds a webRequest event handler which adds the Referer header
// to matching PDF resource requests (only if the Referer is non-empty). The
// handler is removed as soon as the PDF viewer frame is unloaded.
Expand Down

0 comments on commit 4327502

Please sign in to comment.