Skip to content

Commit

Permalink
Remove pixel cookie sharing detection for now
Browse files Browse the repository at this point in the history
  • Loading branch information
ghostwords committed Jul 21, 2020
1 parent 236e304 commit de00f9e
Show file tree
Hide file tree
Showing 4 changed files with 3 additions and 276 deletions.
165 changes: 3 additions & 162 deletions src/js/heuristicblocking.js
Original file line number Diff line number Diff line change
Expand Up @@ -102,9 +102,8 @@ HeuristicBlocker.prototype = {
* Use updateTrackerPrevalence for non-webRequest initiated bookkeeping.
*
* @param {Object} details request/response details
* @param {Boolean} check_for_cookie_share whether to check for cookie sharing
*/
heuristicBlockingAccounting: function (details, check_for_cookie_share) {
heuristicBlockingAccounting: function (details) {
// ignore requests that are outside a tabbed window
if (details.tabId < 0 || !incognito.learningEnabled(details.tabId)) {
return {};
Expand Down Expand Up @@ -145,119 +144,6 @@ HeuristicBlocker.prototype = {
self._recordPrevalence(request_host, request_origin, tab_origin);
return {};
}

// check for cookie sharing iff this is an image in the top-level frame, and the request URL has parameters
if (check_for_cookie_share && details.type == 'image' && details.frameId === 0 && details.url.indexOf('?') > -1) {
// get all non-HttpOnly cookies for the top-level frame
// and pass those to the cookie-share accounting function
let tab_url = self.tabUrls[details.tabId];

let config = {
url: tab_url
};
if (badger.firstPartyDomainPotentiallyRequired) {
config.firstPartyDomain = null;
}

chrome.cookies.getAll(config, function (cookies) {
cookies = cookies.filter(cookie => !cookie.httpOnly);
if (cookies.length >= 1) {
self.pixelCookieShareAccounting(tab_url, tab_origin, details.url, request_host, request_origin, cookies);
}
});
}
},

/**
* Checks for cookie sharing: requests to third-party domains that include
* high entropy data from first-party cookies (associated with the top-level
* frame). Only catches plain-text verbatim sharing (b64 encoding + the like
* defeat it). Assumes any long string that doesn't contain URL fragments or
* stopwords is an identifier. Doesn't catch cookie syncing (3rd party -> 3rd
* party), but most of those tracking cookies should be blocked anyway.
*
* @param details are those from onBeforeSendHeaders
* @param cookies are the result of chrome.cookies.getAll()
* @returns {*}
*/
pixelCookieShareAccounting: function (tab_url, tab_origin, request_url, request_host, request_origin, cookies) {
let params = (new URL(request_url)).searchParams,
TRACKER_ENTROPY_THRESHOLD = 33,
MIN_STR_LEN = 8;

for (let p of params) {
let key = p[0],
value = p[1];

// the argument must be sufficiently long
if (!value || value.length < MIN_STR_LEN) {
continue;
}

// check if this argument is derived from a high-entropy first-party cookie
for (let cookie of cookies) {
// the cookie value must be sufficiently long
if (!cookie.value || cookie.value.length < MIN_STR_LEN) {
continue;
}

// find the longest common substring between this arg and the cookies
// associated with the document
let substrings = utils.findCommonSubstrings(cookie.value, value) || [];
for (let s of substrings) {
// ignore the substring if it's part of the first-party URL. sometimes
// content servers take the url of the page they're hosting content
// for as an argument. e.g.
// https://example-cdn.com/content?u=http://example.com/index.html
if (tab_url.indexOf(s) != -1) {
continue;
}

// elements of the user agent string are also commonly included in
// both cookies and arguments; e.g. "Mozilla/5.0" might be in both.
// This is not a special tracking risk since third parties can see
// this info anyway.
if (navigator.userAgent.indexOf(s) != -1) {
continue;
}

// Sometimes the entire url and then some is included in the
// substring -- the common string might be "https://example.com/:true"
// In that case, we only care about the information around the URL.
if (s.indexOf(tab_url) != -1) {
s = s.replace(tab_url, "");
}

// During testing we found lots of common values like "homepage",
// "referrer", etc. were being flagged as high entropy. This searches
// for a few of those and removes them before we go further.
let lower = s.toLowerCase();
lowEntropyQueryValues.forEach(function (qv) {
let start = lower.indexOf(qv);
if (start != -1) {
s = s.replace(s.substring(start, start + qv.length), "");
}
});

// at this point, since we might have removed things, make sure the
// string is still long enough to bother with
if (s.length < MIN_STR_LEN) {
continue;
}

// compute the entropy of this common substring. if it's greater than
// our threshold, record the tracking action and exit the function.
let entropy = utils.estimateMaxEntropy(s);
if (entropy > TRACKER_ENTROPY_THRESHOLD) {
log("Found high-entropy cookie share from", tab_origin, "to", request_host,
":", entropy, "bits\n cookie:", cookie.name, '=', cookie.value,
"\n arg:", key, "=", value, "\n substring:", s);
this._recordPrevalence(request_host, request_origin, tab_origin);
return;
}
}
}
}
},

/**
Expand Down Expand Up @@ -548,51 +434,6 @@ var lowEntropyCookieValues = {
"zu":8
};

const lowEntropyQueryValues = [
"https",
"http",
"://",
"%3A%2F%2F",
"www",
"url",
"undefined",
"impression",
"session",
"homepage",
"client",
"version",
"business",
"title",
"get",
"site",
"name",
"category",
"account_id",
"smartadserver",
"front",
"page",
"view",
"first",
"visit",
"platform",
"language",
"automatic",
"disabled",
"landing",
"entertainment",
"amazon",
"official",
"webvisor",
"anonymous",
"across",
"narrative",
"\":null",
"\":false",
"\":\"",
"\",\"",
"\",\"",
];

/**
* Extract cookies from onBeforeSendHeaders
*
Expand Down Expand Up @@ -682,7 +523,7 @@ function startListeners() {
extraInfoSpec.push('extraHeaders');
}
chrome.webRequest.onBeforeSendHeaders.addListener(function(details) {
return badger.heuristicBlocking.heuristicBlockingAccounting(details, true);
return badger.heuristicBlocking.heuristicBlockingAccounting(details);
}, {urls: ["<all_urls>"]}, extraInfoSpec);

/**
Expand All @@ -701,7 +542,7 @@ function startListeners() {
}
}
if (hasSetCookie) {
return badger.heuristicBlocking.heuristicBlockingAccounting(details, false);
return badger.heuristicBlocking.heuristicBlockingAccounting(details);
}
},
{urls: ["<all_urls>"]}, extraInfoSpec);
Expand Down
43 changes: 0 additions & 43 deletions src/js/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -200,48 +200,6 @@ function estimateMaxEntropy(str) {
return max_bits;
}

// Adapted from https://gist.github.com/jaewook77/cd1e3aa9449d7ea4fb4f
// Find all common substrings more than 8 characters long, using DYNAMIC
// PROGRAMMING
function findCommonSubstrings(str1, str2) {
/*
Let D[i,j] be the length of the longest matching string suffix between
str1[1]..str1[i] and a segment of str2 between str2[1]..str2[j].
If the ith character in str1 doesn’t match the jth character in str2, then
D[i,j] is zero to indicate that there is no matching suffix
*/

// we only care about strings >= 8 chars
let D = [], LCS = [], LCS_MIN = 8;

// runs in O(M x N) time!
for (let i = 0; i < str1.length; i++) {
D[i] = [];
for (let j = 0; j < str2.length; j++) {
if (str1[i] == str2[j]) {
if (i == 0 || j == 0) {
D[i][j] = 1;
} else {
D[i][j] = D[i-1][j-1] + 1;
}

// store all common substrings longer than the minimum length
if (D[i][j] == LCS_MIN) {
LCS.push(str1.substring(i-D[i][j]+1, i+1));
} else if (D[i][j] > LCS_MIN) {
// remove the shorter substring and add the new, longer one
LCS.pop();
LCS.push(str1.substring(i-D[i][j]+1, i+1));
}
} else {
D[i][j] = 0;
}
}
}

return LCS;
}

function oneSecond() {
return 1000;
}
Expand Down Expand Up @@ -468,7 +426,6 @@ let exports = {
arrayBufferToBase64,
estimateMaxEntropy,
explodeSubdomains,
findCommonSubstrings,
getHostFromDomainInput,
isRestrictedUrl,
isThirdPartyDomain,
Expand Down
24 changes: 0 additions & 24 deletions src/tests/tests/utils.js
Original file line number Diff line number Diff line change
Expand Up @@ -487,30 +487,6 @@ QUnit.test("getHostFromDomainInput", assert => {
);
});

// Tests algorithm used in the pixel tracking heuristic
// It should return a common substring between two given values
QUnit.test("findCommonSubstrings", assert => {

assert.deepEqual(
utils.findCommonSubstrings('www.foo.bar', 'www.foob.ar'),
[],
"substrings under the length threshold of 8 are ignored"
);

assert.equal(
utils.findCommonSubstrings('foobar.com/foo/fizz/buzz/bar', 'foobar.com/foo/bizz/fuzz/bar')[0],
'foobar.com/foo/',
"returns longest matching value from the pair of URLs"
);

assert.deepEqual(
utils.findCommonSubstrings('foobar.com/fizz/buzz/bar/foo', 'foobar.com/fizzbuzz/buzz/bar/foo'),
['foobar.com/fizz', "zz/buzz/bar/foo"],
"returns multiple substrings if multiple are present in comparison"
);

});

// used in pixel tracking heuristic, given a string the estimateMaxEntropy function
// will return the estimated entropy value from it, based on logic parsing the string's length,
// and classes of character complication included in the string
Expand Down
47 changes: 0 additions & 47 deletions tests/selenium/cookie_sharing_test.py

This file was deleted.

0 comments on commit de00f9e

Please sign in to comment.