From a654af574754c98ad0b0b00cfc528c4671126f52 Mon Sep 17 00:00:00 2001 From: uBlock <35694050+uBlockAdmin@users.noreply.github.com> Date: Thu, 4 Apr 2019 10:15:41 +0530 Subject: [PATCH 1/2] Convert string-based processing into typed array-based processing of element hiding specific filters only --- platform/chromium/manifest.json | 2 +- src/js/background.js | 7 +- src/js/cosmetic-filtering.js | 172 ++++++++++++++++++---- src/js/start.js | 3 - src/js/storage.js | 3 - src/js/ublock.js | 246 ++++++++++++++++++++++++++++++++ 6 files changed, 394 insertions(+), 39 deletions(-) diff --git a/platform/chromium/manifest.json b/platform/chromium/manifest.json index 7d3c5cc49..be6c012f7 100644 --- a/platform/chromium/manifest.json +++ b/platform/chromium/manifest.json @@ -2,7 +2,7 @@ "manifest_version": 2, "name": "uBlock", - "version": "0.9.5.14", + "version": "0.9.5.15", "default_locale": "en", "description": "__MSG_extShortDesc__", diff --git a/src/js/background.js b/src/js/background.js index 63664f4ca..84652b40e 100644 --- a/src/js/background.js +++ b/src/js/background.js @@ -91,8 +91,8 @@ return { // read-only systemSettings: { - compiledMagic: 'eopszukpsabe', - selfieMagic: 'menhiasrsabe' + compiledMagic: 'eopszukpsddd', + selfieMagic: 'menhiasrsddd' }, restoreBackupSettings: { lastRestoreFile: '', @@ -147,8 +147,7 @@ return { // so that I don't have to care for last comma dummy: 0, - turnOffAA: true, - versionUpdateTo13: false + turnOffAA: true }; /******************************************************************************/ diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index 9329c69e1..caf444ce4 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -448,7 +448,8 @@ if(proceduremask != "") { hval |= proceduremask; } - return hval.toString(36); + //return hval.toString(36); + return hval; }; /******************************************************************************/ @@ -525,6 +526,8 @@ // hostname, entity-based filters this.hostnameFilters = {}; + this.hostnameFilterDataView = new this.hostnameFilterDataViewWrapper(); + this.hostnameFilterByteLength = 0; this.entityFilters = {}; }; @@ -534,7 +537,109 @@ // Detect and report invalid CSS selectors. FilterContainer.prototype.div = document.createElement('div'); - + /* + I took an idea from the technique used here: + https://github.com/cliqz-oss/adblocker/blob/master/src/engine/reverse-index.ts + */ + FilterContainer.prototype.hostnameFilterDataViewWrapper = function() { + this.objView; + this.tokenBucket; + this.tokenIndex = {}; + this.lru = new µb.LRUCache(16); + } + FilterContainer.prototype.hostnameFilterDataViewWrapper.prototype = { + pushToBuffer: function(hostnameFilters, bufferLength) { + let hostCssOffset = new Map(); + let computedIdSet = new Map(); + let tokenLength = {}; + let totalTokens = 0; + let totalHostnames = 0; + let tokenBucketIndex = 0; + let additionalBufferSpace = 50; + this.objView = new µb.dataView(bufferLength); + for (var token in hostnameFilters) { + totalTokens++; + tokenLength[token] = hostnameFilters[token].size; + if(hostCssOffset[token] === undefined) { + hostCssOffset[token] = new Map(); + } + let ob = this.objView; + hostnameFilters[token].forEach(function(value, key, map) { + totalHostnames++; + let computeId = µb.computeSelectorsId(value); + let cssOffset = computedIdSet.get(computeId); + if(cssOffset === undefined) { + cssOffset = ob.getPos(); + computedIdSet.set(computeId, cssOffset); + let cssString; + if(Array.isArray(value)) + cssString = value.join('\n'); + else + cssString = value; + ob.pushUTF8(cssString); + } + hostCssOffset[token].set(key, cssOffset); + }); + } + this.objView.buffer = this.objView.buffer.slice(0, this.objView.pos + (((totalTokens * 2) + (totalHostnames * 2)) * 4) + additionalBufferSpace); + let tokenBucketSize = totalTokens * 2; + this.tokenBucket = this.objView.getUint32ArrayView((totalTokens * 2) + (totalHostnames * 2)); + for (var token in hostnameFilters) { + this.tokenIndex[token] = tokenBucketIndex; + this.tokenBucket[tokenBucketIndex++] = token; + this.tokenBucket[tokenBucketIndex++] = tokenLength[token]; + let tb = this.tokenBucket; + hostnameFilters[token].forEach(function(value, key, map) { + tb[tokenBucketIndex++] = key; + tb[tokenBucketIndex++] = hostCssOffset[token].get(key); + }); + } + }, + retrieve: function(hosthashes, tokenHash, out) { + let loop; + let str = this.lru.get(tokenHash); + if(str != undefined) { + if(hosthashes.indexOf(str.k) != -1) { + out.push(...str.v); + } + } else { + loop = this.tokenIndex[tokenHash]; + if(loop !== undefined) { + loop++; + let hostLen = this.tokenBucket[loop]; + let next = loop + 1; + let ln = next + (hostLen * 2); + while(next < ln) { + let hostHash = this.tokenBucket[next]; + if(hosthashes.includes(hostHash)) { + let cssOffset = this.tokenBucket[next + 1]; + this.objView.setPos(cssOffset); + let cssDataString = this.objView.getUTF8(); + out.push(...cssDataString.split('\n')); + this.lru.set(tokenHash,{'k': hostHash,'v':cssDataString.split('\n')}); + break; + } + next += 2; + } + } + } + }, + toSelfie: function() { + return JSON.stringify({ + "buffer": Array.from(this.objView.buffer), + "tokenBucket": {"offset": this.tokenBucket.byteOffset, "length": this.tokenBucket.length}, + "tokenIndex": this.tokenIndex + }); + }, + fromSelfie: function(serializeObj) { + let arr = JSON.parse(serializeObj); + this.objView = new µb.dataView(arr["buffer"].length); + this.objView.buffer.set(arr["buffer"]); + this.tokenBucket = new Uint32Array(this.objView.buffer.buffer, arr["tokenBucket"].offset, arr["tokenBucket"].length); + this.tokenIndex = arr["tokenIndex"]; + } + } + // Not all browsers support `Element.matches`: // http://caniuse.com/#feat=matchesselector @@ -692,7 +797,7 @@ let hash; let domain = this.µburi.domainFromHostname(hostname); if ( domain === '' ) { - hash = unhide === 0 ? this.type0NoDomainHash : this.type1NoDomainHash; + hash = unhide === 0 ? makeHash(0, this.type0NoDomainHash, this.domainHashMask) : makeHash(0, this.type1NoDomainHash, this.domainHashMask); } else { hash = abpSelectorRegexp.test(parsed.suffix) ? makeHash(unhide, domain, this.domainHashMask, this.procedureMask) : makeHash(unhide, domain, this.domainHashMask); } @@ -713,6 +818,9 @@ return; } var line, fields, filter, bucket; + const BUCKET_TOKEN_SIZE = 8; //Size of Token Hash [4 bytes] inside TokenBucket + Size of Token's hostnames length [4 bytes] inside TokenBucket + const BUCKET_HOST_SIZE = 12; //Size of Token's Hashname Hash [4 bytes] inside TokenBucket + Size of Hostname's Css Length Offset [4 bytes] inside TokenBucket + Size of Hostname's Css Length inside CssBucket [2 bytes] + Size of Css Length [2 bytes] inside CssBucket + const BUCKET_SEPARATOR_SIZE = 1; //Size of separator '\n' for(let i = 0; i < text.length; i++) { @@ -726,14 +834,25 @@ this.duplicateBuster[line] = true; // h ir twitter.com .promoted-tweet if ( fields[0] === 'h' ) { - bucket = this.hostnameFilters[fields[1]]; - if ( bucket === undefined ) { - this.hostnameFilters[fields[1]] = new FilterHostname(fields[3], fields[2]); - } else if ( bucket instanceof FilterHostnamesSelectors) { - bucket.add(fields[2], fields[3]); + let hshash = µb.tokenHash(fields[2]); + if(this.hostnameFilters[fields[1]] === undefined) { + this.hostnameFilters[fields[1]] = new Map(); + this.hostnameFilters[fields[1]].set(hshash,[fields[3]]) + this.hostnameFilterByteLength += BUCKET_TOKEN_SIZE + BUCKET_HOST_SIZE; } else { - this.hostnameFilters[fields[1]] = bucket.add(fields[2], fields[3]); + let selectors = this.hostnameFilters[fields[1]].get(hshash); + if ( selectors === undefined ) { + this.hostnameFilters[fields[1]].set(hshash, fields[3]); + this.hostnameFilterByteLength += BUCKET_HOST_SIZE; + } else if ( typeof selectors === 'string' ) { + this.hostnameFilters[fields[1]].set(hshash, [ selectors, fields[3] ]); + this.hostnameFilterByteLength += BUCKET_SEPARATOR_SIZE; + } else { + selectors.push(fields[3]); + this.hostnameFilterByteLength += BUCKET_SEPARATOR_SIZE; + } } + this.hostnameFilterByteLength += fields[3].length; //Size of Css Data inside CssBucket continue; } @@ -805,6 +924,8 @@ /******************************************************************************/ FilterContainer.prototype.freeze = function() { + this.hostnameFilterDataView.pushToBuffer(this.hostnameFilters, this.hostnameFilterByteLength); + this.hostnameFilters = {}; this.duplicateBuster = {}; if ( this.highHighGenericHide !== '' ) { @@ -831,7 +952,7 @@ return { acceptedCount: this.acceptedCount, duplicateCount: this.duplicateCount, - hostnameSpecificFilters: stringify(this.hostnameFilters), + hostnameFilterDataView: this.hostnameFilterDataView.toSelfie(), entitySpecificFilters: this.entityFilters, lowGenericHide: {"lg": Array.from(this.lowGenericHide.lg), "lgm": Array.from(this.lowGenericHide.lgm)}, highLowGenericHide: this.highLowGenericHide, @@ -874,10 +995,10 @@ } return categoriesDict; } - + this.acceptedCount = selfie.acceptedCount; this.duplicateCount = selfie.duplicateCount; - this.hostnameFilters = filterFromSelfie(selfie.hostnameSpecificFilters); + this.hostnameFilterDataView.fromSelfie(selfie.hostnameFilterDataView); this.entityFilters = selfie.entitySpecificFilters; this.lowGenericHide = {"lg": new Set(selfie.lowGenericHide.lg),"lgm": new Map(selfie.lowGenericHide.lgm)}; this.highLowGenericHide = selfie.highLowGenericHide; @@ -1060,7 +1181,7 @@ if ( !request.locationURL ) { return; } - + //quickProfiler.start('FilterContainer.retrieve()'); var hostname = µb.URI.hostnameFromURI(request.locationURL); @@ -1086,15 +1207,15 @@ return r; } var hash, bucket; + let hosthashes = µb.getHostnameHashesFromLabelsBackward(hostname, domain); + hash = makeHash(0, domain, this.domainHashMask); - if ( bucket = this.hostnameFilters[hash] ) { - bucket.retrieve(hostname, r.cosmeticHide); - } + this.hostnameFilterDataView.retrieve(hosthashes, hash, r.cosmeticHide); + // https://github.com/uBlockAdmin/uBlock/issues/188 // Special bucket for those filters without a valid domain name as per PSL - if ( bucket = this.hostnameFilters[this.type0NoDomainHash] ) { - bucket.retrieve(hostname, r.cosmeticHide); - } + hash = makeHash(0, this.type0NoDomainHash, this.domainHashMask); + this.hostnameFilterDataView.retrieve(hosthashes, hash, r.cosmeticHide); // entity filter buckets are always plain js array if ( bucket = this.entityFilters[r.entity] ) { @@ -1103,23 +1224,18 @@ // No entity exceptions as of now hash = makeHash(1, domain, this.domainHashMask); - if ( bucket = this.hostnameFilters[hash] ) { - bucket.retrieve(hostname, r.cosmeticDonthide); - } + this.hostnameFilterDataView.retrieve(hosthashes, hash, r.cosmeticDonthide); hash = makeHash(0, domain, this.domainHashMask, this.procedureMask); - if ( bucket = this.hostnameFilters[hash] ) { - bucket.retrieve(hostname, r.procedureHide); - } + this.hostnameFilterDataView.retrieve(hosthashes, hash, r.procedureHide); if(request.procedureSelectorsOnly) { return r.procedureHide; } // https://github.com/uBlockAdmin/uBlock/issues/188 // Special bucket for those filters without a valid domain name as per PSL - if ( bucket = this.hostnameFilters[this.type1NoDomainHash] ) { - bucket.retrieve(hostname, r.cosmeticDonthide); - } + hash = makeHash(0, this.type1NoDomainHash, this.domainHashMask); + this.hostnameFilterDataView.retrieve(hosthashes, hash, r.cosmeticDonthide); this.retrieveFromSelectorCache(hostname, 'cosmetic', r.cosmeticHide); this.retrieveFromSelectorCache(hostname, 'net', r.netHide); diff --git a/src/js/start.js b/src/js/start.js index be7a12e0e..51926d430 100644 --- a/src/js/start.js +++ b/src/js/start.js @@ -220,9 +220,6 @@ var onInstalled = function() { } if(!firstInstall) { - if(lastVersion == "0.9.5.12" || (lastVersion == "0.9.5.13" && vAPI.browserInfo.flavor == "Firefox")) { - µb.versionUpdateTo13 = true; - } return; } else { µb.turnOffAA = false; diff --git a/src/js/storage.js b/src/js/storage.js index 6703a7d93..29442cf12 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -206,9 +206,6 @@ var onSelectedListsLoaded = function(store) { var µb = µBlock; var lists = store.remoteBlacklists; - if(lists.hasOwnProperty(locationOfAA) && µb.versionUpdateTo13) { - delete lists[locationOfAA]; - } var locations = Object.keys(lists); var location, availableEntry, storedEntry; var off; diff --git a/src/js/ublock.js b/src/js/ublock.js index 5d7195eaa..1585ef2b9 100644 --- a/src/js/ublock.js +++ b/src/js/ublock.js @@ -439,6 +439,252 @@ https://github.com/darkskyapp/string-hash/blob/master/index.js }; })(); +/* + The code below is taken from here: https://github.com/sindresorhus/quick-lru/blob/master/index.js + Author: https://github.com/sindresorhus + benchmark: https://github.com/dominictarr/bench-lru +*/ +µBlock.LRUCache = function(maxSize) { + this.maxSize = maxSize; + this.cache = new Map(); + this.oldCache = new Map(); + this._size = 0; +}; +µBlock.LRUCache.prototype = { + _set: function(key, value) { + this.cache.set(key, value); + this._size++; + + if (this._size >= this.maxSize) { + this._size = 0; + this.oldCache = this.cache; + this.cache = new Map(); + } + }, + get: function(key) { + if (this.cache.has(key)) { + return this.cache.get(key); + } + + if (this.oldCache.has(key)) { + const value = this.oldCache.get(key); + this._set(key, value); + return value; + } + }, + set: function(key, value) { + if (this.cache.has(key)) { + this.cache.set(key, value); + } else { + this._set(key, value); + } + + return this; + }, + has: function(key){ + return this.cache.has(key) || this.oldCache.has(key); + }, + peek: function(key){ + if (this.cache.has(key)) { + return this.cache.get(key); + } + + if (this.oldCache.has(key)) { + return this.oldCache.get(key); + } + }, + delete: function(key) { + const deleted = this.cache.delete(key); + if (deleted) { + this._size--; + } + return this.oldCache.delete(key) || deleted; + }, + clear: function() { + this.cache.clear(); + this.oldCache.clear(); + this._size = 0; + }, + keys: function* (){ + for (const [key] of this) { + yield key; + } + }, + values: function* (){ + for (const [, value] of this) { + yield value; + } + }, + [Symbol.iterator]: function* (){ + for (const item of this.cache) { + yield item; + } + + for (const item of this.oldCache) { + const [key] = item; + if (!this.cache.has(key)) { + yield item; + } + } + } +}; + +/* + The code below is taken from here: https://github.com/cliqz-oss/adblocker/blob/master/src/data-view.ts + License: https://github.com/cliqz-oss/adblocker/blob/master/LICENSE +*/ +µBlock.dataView = function(length) { + this.pos = 0; + this.buffer = new Uint8Array(length); + this.reHasUnicode = /[^\x00-\x7F]/; + this.puny_encoded = 1 << 15; +} +µBlock.dataView.prototype = { + getPos: function() { + return this.pos; + }, + seekZero: function() { + this.pos = 0; + }, + setByte: function(pos, byte) { + this.buffer[pos] = byte; + }, + getUint8: function() { + return this.buffer[this.pos++]; + }, + align: function(alignement) { + this.pos = + this.pos % alignement === 0 + ? this.pos + : Math.floor(this.pos / alignement) * alignement + alignement; + }, + pushUint8: function(uint8) { + this.buffer[this.pos++] = uint8; + }, + pushUint32: function(uint32) { + this.buffer[this.pos++] = uint32 >>> 24; + this.buffer[this.pos++] = uint32 >>> 16; + this.buffer[this.pos++] = uint32 >>> 8; + this.buffer[this.pos++] = uint32; + }, + pushUint32Array: function(arr) { + this.pushUint32(arr.length); + for (let i = 0; i < arr.length; i += 1) { + this.pushUint32(arr[i]); + } + }, + pushUTF8: function(raw) { + let str = raw; + if ( this.reHasUnicode.test(raw) ) { + str = punycode.encode(raw); + this.pushUint16(this.setBit(str.length, this.puny_encoded)); + } else { + this.pushUint16(str.length); + } + for (let i = 0; i < str.length; i += 1) { + this.buffer[this.pos++] = str.charCodeAt(i); + } + }, + slice: function() { + this.checkSize(); + return this.buffer.slice(0, this.pos); + }, + checkSize: function() { + if (this.pos !== 0 && this.pos > this.buffer.byteLength) { + throw new Error( + `StaticDataView too small: ${this.buffer.byteLength}, but required ${this.pos - 1} bytes`, + ); + } + }, + setPos: function(pos){ + this.pos = pos; + }, + getUTF8: function() { + const lengthAndMask = this.getUint16(); + const byteLength = this.clearBit(lengthAndMask, this.puny_encoded); + const punyEncoded = this.getBit(lengthAndMask, this.puny_encoded); + this.pos += byteLength; + const str = String.fromCharCode.apply( + null, + this.buffer.subarray(this.pos - byteLength, this.pos), + ); + if (punyEncoded) { + return punycode.decode(str); + } + return str; + }, + getUint32ArrayView: function(desiredSize) { + this.align(4); + const view = new Uint32Array( + this.buffer.buffer, + this.pos + this.buffer.byteOffset, + desiredSize, + ); + this.pos += desiredSize * 4; + return view; + }, + getUint16: function() { + return ((this.buffer[this.pos++] << 8) | this.buffer[this.pos++]) >>> 0; + }, + pushUint16: function(uint16) { + this.buffer[this.pos++] = uint16 >>> 8; + this.buffer[this.pos++] = uint16; + }, + setBit: function(n, mask) { + return n | mask; + }, + getBit: function(n, mask) { + return !!(n & mask); + }, + clearBit: function(n, mask) { + return n & ~mask; + } +} +/* + The code below is taken from here: https://github.com/cliqz-oss/adblocker/blob/32397857de8c439fb4c961f12d7e17c750b3fc98/src/filters/cosmetic.ts#L114 + License: https://github.com/cliqz-oss/adblocker/blob/master/LICENSE +*/ +µBlock.computeSelectorsId = function(selectors) { + let hash = (5408 * 33); + for(let i = 0; i < selectors.length; i++) { + for (let j = 0; j < selectors[i].length; j += 1) { + hash = (hash * 33) ^ selectors[i].charCodeAt(j); + } + } + return hash >>> 0; +} +/* + The code below is taken from here: https://github.com/cliqz-oss/adblocker/blob/32397857de8c439fb4c961f12d7e17c750b3fc98/src/filters/cosmetic.ts#L51 + License: https://github.com/cliqz-oss/adblocker/blob/master/LICENSE +*/ +µBlock.getHostnameHashesFromLabelsBackward = function(hostname, domain) { + if(hostname == domain && hostname.indexOf('www.') !== -1) { + domain = hostname.slice(hostname.indexOf('.') + 1); + } + return µBlock.getHashesFromLabelsBackward(hostname, hostname.length, hostname.length - domain.length); +} +/* + The code below is taken from here: https://github.com/cliqz-oss/adblocker/blob/32397857de8c439fb4c961f12d7e17c750b3fc98/src/filters/cosmetic.ts#L16 + License: https://github.com/cliqz-oss/adblocker/blob/master/LICENSE +*/ +µBlock.getHashesFromLabelsBackward = function(hostname, end, startOfDomain) { + const hashes = []; + let hash = 5381; + + // Compute hash backward, label per label + for (let i = end - 1; i >= 0; i -= 1) { + // Process label + if (hostname[i] === '.' && i < startOfDomain) { + hashes.push(hash >>> 0); + } + + // Update hash + hash = (hash * 33) ^ hostname.charCodeAt(i); + } + + hashes.push(hash >>> 0); + return hashes; +} µBlock.logCosmeticFilters = (function() { var tabIdToTimerMap = {}; From 6aa703920bd124d7bdc55493f190508b5f5d483d Mon Sep 17 00:00:00 2001 From: uBlock <35694050+uBlockAdmin@users.noreply.github.com> Date: Fri, 19 Apr 2019 13:45:32 +0530 Subject: [PATCH 2/2] - Converted string-based processing into typed array-based processing of blocking by domain name rule and domains listed for $domain option in blocking rules - Removed support for $rewrite option --- src/js/contentscript-end.js | 4 +- src/js/cosmetic-filtering.js | 267 ++++++------------------ src/js/start.js | 1 - src/js/static-net-filtering.js | 368 +++++++++++++++++++-------------- src/js/storage.js | 5 +- src/js/traffic.js | 9 - src/js/ublock.js | 201 ++++-------------- 7 files changed, 321 insertions(+), 534 deletions(-) diff --git a/src/js/contentscript-end.js b/src/js/contentscript-end.js index c14009324..7b8f6dc7f 100644 --- a/src/js/contentscript-end.js +++ b/src/js/contentscript-end.js @@ -1306,8 +1306,8 @@ var uBlockCollapser = (function() { } else { var localMessager = vAPI.messaging.channel('contentscript-start.js'); var proceduresHandler = function(details) { - if(details.procedureHide.length > 0) { - vAPI.hideProcedureFilters = details.procedureHide; + if(details.length > 0) { + vAPI.hideProcedureFilters = details; vAPI.proceduralCosmeticFiltering.applyPatterns(vAPI.hideProcedureFilters); } localMessager.close(); diff --git a/src/js/cosmetic-filtering.js b/src/js/cosmetic-filtering.js index caf444ce4..d76415218 100644 --- a/src/js/cosmetic-filtering.js +++ b/src/js/cosmetic-filtering.js @@ -51,141 +51,6 @@ // lindaikeji.blogspot.com##a > img[height="600"] // japantimes.co.jp##table[align="right"][width="250"] // mobilephonetalk.com##[align="center"] > b > a[href^="http://tinyurl.com/"] - - /* - I took the idea to store specific filters from here: - - Commits: - https://github.com/gorhill/uBlock/blob/8a616bcafb8c5143f76b5ff4b0d94a78c11786b2/src/js/cosmetic-filtering.js#L64 - https://github.com/gorhill/uBlock/blob/8a616bcafb8c5143f76b5ff4b0d94a78c11786b2/src/js/cosmetic-filtering.js#L110 - https://github.com/gorhill/uBlock/blob/8a616bcafb8c5143f76b5ff4b0d94a78c11786b2/src/js/cosmetic-filtering.js#L156 - - Author: https://github.com/gorhill - - License is GPL3: https://github.com/gorhill/uBlock/blob/master/README.md - */ - - let FilterHostnamesSelectors = function(mapping){ - this.hs = new Map(mapping); - } - FilterHostnamesSelectors.prototype.add = function(hostname, selector) { - let selectors = this.hs.get(hostname); - if ( selectors === undefined ) { - this.hs.set(hostname, selector); - } else if ( typeof selectors === 'string' ) { - this.hs.set(hostname, [ selectors, selector ]); - } else { - selectors.push(selector); - } - } - FilterHostnamesSelectors.prototype.retrieve = function(hostname, out) { - this.hs.forEach(function(value, key) { - if ( hostname.slice(-key.length) === key ) { - let selectors = value; - if ( typeof selectors === 'string' ) { - out.push(selectors); - } - else { - out.push(...selectors); - } - } - }); - }; - FilterHostnamesSelectors.prototype.fid = 'hmm'; - - FilterHostnamesSelectors.prototype.toSelfie = function() { - return JSON.stringify(Array.from(this.hs)); - }; - FilterHostnamesSelectors.fromSelfie = function(s) { - let f = new FilterHostnamesSelectors(); - let o = JSON.parse(s); - f.hs = new Map(o); - return f; - }; - FilterHostnamesSelectors.prototype.toJSON = function() { - return {[this.fid]: this.toSelfie()}; - } - - /*************************************************************/ - - let FilterHostnameSeletors = function(hostname, selectors) { - this.hostname = hostname; - this.selectors = selectors; - } - FilterHostnameSeletors.prototype.add = function(hostname, selector) { - if(this.hostname == hostname){ - this.selectors.push(selector); - return this; - } else { - return new FilterHostnamesSelectors([[this.hostname,this.selectors],[hostname, selector]]); - } - } - FilterHostnameSeletors.prototype.retrieve = function(hostname, out) { - if ( hostname.slice(-this.hostname.length) === this.hostname ) { - out.push(...this.selectors); - } - }; - - FilterHostnameSeletors.prototype.fid = 'hm'; - - FilterHostnameSeletors.prototype.toSelfie = function() { - let str = this.selectors.map( - function(x){ - return encode(x); - } - ) - return JSON.stringify(str) + '\t' + this.hostname; - }; - - FilterHostnameSeletors.fromSelfie = function(s) { - let opts = s.split('\t'); - let selectors = JSON.parse(opts[0]).map( - function(x) { - return decode(x); - } - ) - return new FilterHostnameSeletors(opts[1], selectors); - }; - FilterHostnameSeletors.prototype.toJSON = function() { - return {[this.fid]: this.toSelfie()}; - } - - /*************************************************************/ - - let FilterHostname = function(s, hostname) { - this.s = s; - this.hostname = hostname; - }; - - FilterHostname.prototype.add = function(hostname, selector) { - if ( hostname === this.hostname ) { - return new FilterHostnameSeletors(this.hostname,[ this.s, selector ]); - } - else { - return new FilterHostnamesSelectors([[this.hostname, this.s],[ hostname, selector ]]); - } - } - - FilterHostname.prototype.retrieve = function(hostname, out) { - if ( hostname.slice(-this.hostname.length) === this.hostname ) { - out.push(this.s); - } - }; - - FilterHostname.prototype.fid = 'h'; - - FilterHostname.prototype.toSelfie = function() { - return encode(this.s) + '\t' + this.hostname; - }; - - FilterHostname.fromSelfie = function(s) { - let pos = s.indexOf('\t'); - return new FilterHostname(decode(s.slice(0, pos)), s.slice(pos + 1)); - }; - FilterHostname.prototype.toJSON = function() { - return {[this.fid]:this.toSelfie()}; - }; - /******************************************************************************/ // Any selector specific to an entity @@ -470,6 +335,10 @@ // Generic filters can only be enforced once the main document is loaded. // Specific filers can be enforced before the main document is loaded. + const BUCKET_TOKEN_SIZE = 8; //Size of Token Hash [4 bytes] inside TokenBucket + Size of Token's hostnames length [4 bytes] inside TokenBucket + const BUCKET_HOST_SIZE = 12; //Size of Token's Hashname Hash [4 bytes] inside TokenBucket + Size of Hostname's Css Length Offset [4 bytes] inside TokenBucket + Size of Hostname's Css Length inside CssBucket [2 bytes] + Size of Css Length [2 bytes] inside CssBucket + const BUCKET_SEPARATOR_SIZE = 1; //Size of separator '\n' + let FilterContainer = function() { this.domainHashMask = (1 << 10) - 1; // 10 bits this.genericHashMask = (1 << 15) - 1; // 15 bits @@ -582,7 +451,6 @@ }); } this.objView.buffer = this.objView.buffer.slice(0, this.objView.pos + (((totalTokens * 2) + (totalHostnames * 2)) * 4) + additionalBufferSpace); - let tokenBucketSize = totalTokens * 2; this.tokenBucket = this.objView.getUint32ArrayView((totalTokens * 2) + (totalHostnames * 2)); for (var token in hostnameFilters) { this.tokenIndex[token] = tokenBucketIndex; @@ -617,7 +485,6 @@ let cssDataString = this.objView.getUTF8(); out.push(...cssDataString.split('\n')); this.lru.set(tokenHash,{'k': hostHash,'v':cssDataString.split('\n')}); - break; } next += 2; } @@ -637,6 +504,35 @@ this.objView.buffer.set(arr["buffer"]); this.tokenBucket = new Uint32Array(this.objView.buffer.buffer, arr["tokenBucket"].offset, arr["tokenBucket"].length); this.tokenIndex = arr["tokenIndex"]; + }, + rebuildHostnameFilters: function() { + let hostnameFilters = {}; + let loop = 0; + let hn; + let selector; + let entry; + + while(loop < this.tokenBucket.length) { + let tokenHash = this.tokenBucket[loop]; + if(hostnameFilters[tokenHash] === undefined) { + hostnameFilters[tokenHash] = new Map(); + } + entry = hostnameFilters[tokenHash]; + loop++; + let hostLen = this.tokenBucket[loop]; + let next = loop + 1; + let ln = next + (hostLen * 2); + while(next < ln) { + let hostHash = this.tokenBucket[next]; + let cssOffset = this.tokenBucket[next + 1]; + this.objView.setPos(cssOffset); + let selectors = this.objView.getUTF8().split('\n'); + entry.set(hostHash, selectors); + next += 2; + } + loop += (this.tokenBucket[loop] * 2) + 1; + } + return hostnameFilters; } } @@ -801,7 +697,8 @@ } else { hash = abpSelectorRegexp.test(parsed.suffix) ? makeHash(unhide, domain, this.domainHashMask, this.procedureMask) : makeHash(unhide, domain, this.domainHashMask); } - out.push(['h', hash, hostname, parsed.suffix]); + let hshash = µb.tokenHash(hostname); + out.push(['h', hash, hshash, parsed.suffix]); }; /******************************************************************************/ @@ -810,17 +707,42 @@ let entity = hostname.slice(0, -2); out.push(['e',entity, parsed.suffix]); }; - + + FilterContainer.prototype.appendHostnameFilters = function(compiledFilters) { + this.hostnameFilters = this.hostnameFilterDataView.rebuildHostnameFilters(); + let fc = this; + compiledFilters.forEach(function(fields) { + fc.addHostnameFilters(fields); + }); + this.hostnameFilterDataView = new this.hostnameFilterDataViewWrapper(); + } /******************************************************************************/ - + FilterContainer.prototype.addHostnameFilters = function(fields) { + let hshash = fields[2]; + if(this.hostnameFilters[fields[1]] === undefined) { + this.hostnameFilters[fields[1]] = new Map(); + this.hostnameFilters[fields[1]].set(hshash,[fields[3]]); + this.hostnameFilterByteLength += BUCKET_TOKEN_SIZE + BUCKET_HOST_SIZE; + } else { + let selectors = this.hostnameFilters[fields[1]].get(hshash); + if ( selectors === undefined ) { + this.hostnameFilters[fields[1]].set(hshash, fields[3]); + this.hostnameFilterByteLength += BUCKET_HOST_SIZE; + } else if ( typeof selectors === 'string' ) { + this.hostnameFilters[fields[1]].set(hshash, [ selectors, fields[3] ]); + this.hostnameFilterByteLength += BUCKET_SEPARATOR_SIZE; + } else { + selectors.push(fields[3]); + this.hostnameFilterByteLength += BUCKET_SEPARATOR_SIZE; + } + } + this.hostnameFilterByteLength += fields[3].length; //Size of Css Data inside CssBucket + } FilterContainer.prototype.fromCompiledContent = function(text, skip) { if ( skip ) { return; } var line, fields, filter, bucket; - const BUCKET_TOKEN_SIZE = 8; //Size of Token Hash [4 bytes] inside TokenBucket + Size of Token's hostnames length [4 bytes] inside TokenBucket - const BUCKET_HOST_SIZE = 12; //Size of Token's Hashname Hash [4 bytes] inside TokenBucket + Size of Hostname's Css Length Offset [4 bytes] inside TokenBucket + Size of Hostname's Css Length inside CssBucket [2 bytes] + Size of Css Length [2 bytes] inside CssBucket - const BUCKET_SEPARATOR_SIZE = 1; //Size of separator '\n' for(let i = 0; i < text.length; i++) { @@ -834,25 +756,7 @@ this.duplicateBuster[line] = true; // h ir twitter.com .promoted-tweet if ( fields[0] === 'h' ) { - let hshash = µb.tokenHash(fields[2]); - if(this.hostnameFilters[fields[1]] === undefined) { - this.hostnameFilters[fields[1]] = new Map(); - this.hostnameFilters[fields[1]].set(hshash,[fields[3]]) - this.hostnameFilterByteLength += BUCKET_TOKEN_SIZE + BUCKET_HOST_SIZE; - } else { - let selectors = this.hostnameFilters[fields[1]].get(hshash); - if ( selectors === undefined ) { - this.hostnameFilters[fields[1]].set(hshash, fields[3]); - this.hostnameFilterByteLength += BUCKET_HOST_SIZE; - } else if ( typeof selectors === 'string' ) { - this.hostnameFilters[fields[1]].set(hshash, [ selectors, fields[3] ]); - this.hostnameFilterByteLength += BUCKET_SEPARATOR_SIZE; - } else { - selectors.push(fields[3]); - this.hostnameFilterByteLength += BUCKET_SEPARATOR_SIZE; - } - } - this.hostnameFilterByteLength += fields[3].length; //Size of Css Data inside CssBucket + this.addHostnameFilters(fields); continue; } @@ -924,7 +828,8 @@ /******************************************************************************/ FilterContainer.prototype.freeze = function() { - this.hostnameFilterDataView.pushToBuffer(this.hostnameFilters, this.hostnameFilterByteLength); + if(Object.entries(this.hostnameFilters).length > 0) + this.hostnameFilterDataView.pushToBuffer(this.hostnameFilters, this.hostnameFilterByteLength); this.hostnameFilters = {}; this.duplicateBuster = {}; @@ -941,14 +846,6 @@ /******************************************************************************/ FilterContainer.prototype.toSelfie = function() { - - let stringify = function(hostnameFilters) { - let arr = []; - for (const [key, value] of Object.entries(hostnameFilters)) { - arr.push({[key]: JSON.stringify(value)}); - } - return JSON.stringify(arr); - } return { acceptedCount: this.acceptedCount, duplicateCount: this.duplicateCount, @@ -965,37 +862,9 @@ }; }; - FilterContainer.factories = { - 'h': FilterHostname, - 'hmm': FilterHostnamesSelectors, - 'hm' : FilterHostnameSeletors - }; /******************************************************************************/ FilterContainer.prototype.fromSelfie = function(selfie) { - - - var filterFromSelfie = function(s) { - - function getSelfie(tokenEntries) { - let selfie; - for(let prop in tokenEntries) { - var item = tokenEntries[prop]; - selfie = FilterContainer.factories[prop].fromSelfie(item); - } - return selfie; - } - - let categories = JSON.parse(s); - let categoriesDict = {}; - for(let item of categories) { - for (const [key, value] of Object.entries(item)) { - categoriesDict[key] = getSelfie(JSON.parse(value)); - } - } - return categoriesDict; - } - this.acceptedCount = selfie.acceptedCount; this.duplicateCount = selfie.duplicateCount; this.hostnameFilterDataView.fromSelfie(selfie.hostnameFilterDataView); diff --git a/src/js/start.js b/src/js/start.js index 51926d430..cde690e3f 100644 --- a/src/js/start.js +++ b/src/js/start.js @@ -107,7 +107,6 @@ var onSelfieReady = function(selfie) { } //console.log('start.js/onSelfieReady: selfie looks good'); µb.remoteBlacklists = selfie.filterLists; - µb.domainHolder.fromSelfie(selfie.domainList); µb.staticNetFilteringEngine.fromSelfie(selfie.staticNetFilteringEngine); µb.cosmeticFilteringEngine.fromSelfie(selfie.cosmeticFilteringEngine); return true; diff --git a/src/js/static-net-filtering.js b/src/js/static-net-filtering.js index 3ab9e1a17..f16396845 100644 --- a/src/js/static-net-filtering.js +++ b/src/js/static-net-filtering.js @@ -70,7 +70,6 @@ 'popup': 15 << 4, 'csp' : 16 << 4, 'webrtc' : 17 << 4, - 'rewrite' : 18 << 4, 'generichide' : 19 << 4, 'genericblock': 20 << 4 }; @@ -102,8 +101,13 @@ // valid until the next evaluation. var pageHostnameRegister = ''; + var pageDomainRegister = ''; + var pageHostnameHashes; var requestHostnameRegister = ''; + var requestDomainRegister = ''; + var requestHostnameHashes = ''; var skipGenericBlocking = false; + var objDataView; //var filterRegister = null; //var categoryRegister = ''; @@ -261,34 +265,143 @@ } return false; } - /**************************************************/ - var FilterDomain = function(index) { - this.index = index; + + const PAGE_SIZE = 65000; + let hostnameFilterDataViewWrapper = function() { + this.objView = new µb.dataView(6000000); + this.computedIds = new Map(); } + hostnameFilterDataViewWrapper.prototype = { + pushToBuffer: function(hostnames, notHostnames, allHostnames) { + let computedId = this.computeUniqueId(new Uint32Array(allHostnames).sort()); + if(!this.computedIds.has(computedId)) { + if((this.objView.buffer.length - this.objView.pos) < (allHostnames.length * 4)) { + let len; + if(PAGE_SIZE < (allHostnames.length * 4)) + len = PAGE_SIZE + ((allHostnames.length * 4) + this.objView.pos); + else + len = this.objView.pos + PAGE_SIZE; + this.growBuffer(len); + } + let hostnamesView = this.objView.getUint32ArrayView(hostnames.length); + let notHostnamesView = this.objView.getUint32ArrayView(notHostnames.length); + hostnamesView.set(new Uint32Array(hostnames).sort()); + notHostnamesView.set(new Uint32Array(notHostnames).sort()); + let details = { + '+': {"offset": hostnamesView.byteOffset, "length": hostnamesView.length}, + '-': {"offset": notHostnamesView.byteOffset, "length": notHostnamesView.length} + }; + this.computedIds.set(computedId, details); + return details; + } else { + return this.computedIds.get(computedId); + } + }, + growBuffer: function(bufferLength) { + const newBuffer = new Uint8Array(bufferLength); + newBuffer.set(this.objView.buffer); + this.objView.buffer = newBuffer; + }, + toSelfie: function() { + return JSON.stringify({ + "buffer": Array.from(this.objView.buffer) + }); + }, + fromSelfie: function(serializeObj) { + let arr = JSON.parse(serializeObj); + this.objView = new µb.dataView(arr["buffer"].length); + this.objView.buffer.set(arr["buffer"]); + }, + parseOptHostnames: function(domainStr) { + let hostnames = []; + let notHostnames = []; + let allHostnames = []; + domainStr.split("|").forEach( + function(hostname) { + let tokenHash = µb.tokenHash(hostname); + if ( hostname.charAt(0) === '~' ) { + notHostnames.push(µb.tokenHash(hostname.slice(1))); + } else { + hostnames.push(tokenHash); + } + allHostnames.push(tokenHash); + } + ); + return [hostnames, notHostnames, allHostnames]; + }, + computeUniqueId: function(hostnames) { + let hash = (5408 * 33); + if (hostnames !== undefined) { + for (let i = 0; i < hostnames.length; i += 1) { + hash = (hash * 33) ^ hostnames[i]; + } + } + return hash >>> 0; + }, + match: function(details, hostnameOnlyFilter) { + let hostnamesView = new Uint32Array(this.objView.buffer.buffer, details['+'].offset, details['+'].length); + let notHostnamesView = new Uint32Array(this.objView.buffer.buffer, details['-'].offset, details['-'].length); + let hostHashes; + let blnStatus = false; + let matchHostname = ''; + if(hostnameOnlyFilter) { + hostHashes = requestHostnameHashes; + } else { + hostHashes = pageHostnameHashes; + } + if(hostnamesView.length == 0) { + blnStatus = true; + } else { + Array.from(hostHashes).some(function(element) { + if(µb.binSearch(hostnamesView, element[0]) !== -1) { + matchHostname = element[1]; + blnStatus = true; + return true; + } else { + return false; + } + }); + } + if(blnStatus) + blnStatus = (blnStatus && (Array.from(hostHashes).some(element => µb.binSearch(notHostnamesView, element[0]) !== -1) === false)); + + return blnStatus +'|'+ matchHostname; + } + }; + + var FilterDomain = function(offsets, hostnameOnlyFilter) { + this.offsets = offsets; + this.hostnameOnlyFilter = hostnameOnlyFilter; + this.h = ''; // short-lived register + } FilterDomain.fid = FilterDomain.prototype.fid = 'd'; - - FilterDomain.prototype.toString = function() { - return µb.domainHolder.toString(this.index); - }; - FilterDomain.prototype.toSelfie = function() { - return this.index; - }; + FilterDomain.prototype.match = function() { - return µb.domainHolder.match(this.index, pageHostnameRegister); + let result = objDataView.match(this.offsets, this.hostnameOnlyFilter); + let pos = result.indexOf('|'); + let matchHostname = result.slice(pos + 1); + let blnMatch = (result.slice(0, pos) == 'true'); + if(this.hostnameOnlyFilter) { + this.h = '||' + matchHostname + '^'; + } else { + this.h = matchHostname != '' ? '$domain=' + matchHostname : ''; + } + return blnMatch; } - FilterDomain.compile = function(domainList) { - if(domainList != "") - return domainList; - else return ""; - }; - FilterDomain.fromSelfie = function(index) { - return new FilterDomain(index); + FilterDomain.prototype.toSelfie = function() { + return JSON.stringify(this.offsets) + '\t' + this.hostnameOnlyFilter; + } + FilterDomain.prototype.toString = function() { + return this.h; }; FilterDomain.prototype.toJSON = function() { return {[this.fid]: this.toSelfie()}; - } - + }; + FilterDomain.fromSelfie = function(s) { + let pos = s.indexOf('\t'); + return new FilterDomain(JSON.parse(s.slice(0, pos)), (s.slice(pos + 1) == 'true')); + }; /******************************************************************************/ var FilterPlain = function(s, tokenBeg) { @@ -603,74 +716,7 @@ FilterRegex.prototype.toJSON = function() { return {[this.fid]: this.toSelfie()}; } - - var FilterRegexRewrite = function(s,rewrite){ - FilterRegex.call(this,s); - this.rewrite = rewrite; - } - FilterRegexRewrite.prototype = Object.create(FilterRegex.prototype); - FilterRegexRewrite.prototype.constructor = FilterRegexRewrite; - - FilterRegexRewrite.prototype.match = function(url) { - return FilterRegex.prototype.match.call(this, url); - }; - - FilterRegexRewrite.fid = FilterRegexRewrite.prototype.fid = '//r'; - - FilterRegexRewrite.prototype.toString = function() { - return '/' + this.re.source + '/' + '$rewrite=' + this.rewrite; - }; - - FilterRegexRewrite.prototype.toSelfie = function() { - return this.re.source + '\t' + this.rewrite; - }; - - FilterRegexRewrite.compile = function(details) { - return details.f + '\t' + details.rewrite; - }; - - FilterRegexRewrite.fromSelfie = function(s) { - let pos = s.indexOf('\t'); - return new FilterRegexRewrite(s.slice(0, pos), s.slice(pos + 1)); - }; - FilterRegexRewrite.prototype.toJSON = function() { - return {[this.fid]: this.toSelfie()}; - } - - var FilterRewrite = function(s,rewrite) { - this.s = s; - this.rewrite = rewrite; - } - FilterRewrite.prototype = Object.create(FilterPlainHnAnchored.prototype); - FilterRewrite.prototype.constructor = FilterRewrite; - - FilterRewrite.prototype.match = function(url,tokenBeg) { - return FilterPlainHnAnchored.prototype.match.call(this, url,tokenBeg); - }; - - FilterRewrite.fid = FilterRewrite.prototype.fid = '||r'; - - FilterRewrite.prototype.toString = function() { - return '||' + this.s + '$rewrite=' + this.rewrite; - }; - - FilterRewrite.prototype.toSelfie = function() { - return this.s + '\t' + this.rewrite; - }; - - FilterRewrite.compile = function(details) { - return details.f + '\t' + details.rewrite; - }; - - FilterRewrite.fromSelfie = function(s) { - let pos = s.indexOf('\t'); - return new FilterRewrite(s.slice(0, pos), s.slice(pos + 1)); - }; - FilterRewrite.prototype.toJSON = function() { - return {[this.fid]: this.toSelfie()}; - } - /******************************************************************************/ - + /******************************************************************************/ /******************************************************************************/ @@ -700,7 +746,7 @@ FilterHostnameDict.prototype.match = function() { // TODO: mind IP addresses - + let pos, hostname = requestHostnameRegister; while ( this.dict.has(hostname) === false ) { @@ -714,27 +760,6 @@ this.h = '||' + hostname + '^'; return this; }; - - FilterHostnameDict.fid = FilterHostnameDict.prototype.fid = '{h}'; - - FilterHostnameDict.prototype.toString = function() { - return this.h; - }; - - FilterHostnameDict.prototype.toSelfie = function() { - return JSON.stringify(Array.from(this.dict)); - }; - FilterHostnameDict.prototype.toJSON = function() { - return {[this.fid]:this.toSelfie()}; - }; - - FilterHostnameDict.fromSelfie = function(s) { - let f = new FilterHostnameDict(); - let o = JSON.parse(s); - f.dict = new Set(o); - return f; - }; - /******************************************************************************/ /******************************************************************************/ @@ -845,7 +870,11 @@ FilterBucket.prototype.toString = function() { if ( this.f !== null ) { - return this.f.toString(); + if(Array.isArray(this.f)) { + return this.f[0].toString() + this.f[1].toString(); + } else { + return this.f.toString(); + } } return ''; }; @@ -897,14 +926,8 @@ var getFilterClass = function(details) { if ( details.isRegex ) { - if(details.rewrite != '') - return FilterRegexRewrite; - else - return FilterRegex; + return FilterRegex; } - if(details.rewrite != '') - return FilterRewrite; - var s = details.f; if ( s.indexOf('*') !== -1 || details.token === '*' ) { if ( details.hostnameAnchored ) { @@ -964,7 +987,6 @@ this.notHostnames = []; this.dataType = ''; this.dataStr = ''; - this.rewrite = ''; this.reset(); }; @@ -988,7 +1010,6 @@ 'csp' : 'csp', 'websocket': 'websocket', 'webrtc': 'webrtc', - 'rewrite': 'rewrite', 'generichide': 'generichide', 'genericblock': 'genericblock' }; @@ -1116,11 +1137,6 @@ this.dataStr = ''; continue; } - if ( opt.slice(0,8) === 'rewrite=') { - this.parseOptType('rewrite', not); - this.rewrite = opt.slice(8); - continue; - } if ( this.toNormalizedType.hasOwnProperty(opt) ) { this.parseOptType(opt, not); continue; @@ -1378,7 +1394,7 @@ this.filterParser.reset(); this.filterCounts = {}; this.cspSubsets = new Map(); - µb.domainHolder.reset(); + objDataView = new hostnameFilterDataViewWrapper(); }; /******************************************************************************/ @@ -1387,6 +1403,19 @@ this.duplicateBuster = {}; this.filterParser.reset(); this.frozen = true; + let pushToBuffer = false; + for(let category in this.categories) { + let notHostnames = []; + if(this.categories[category]['.'] !== undefined && this.categories[category]['.'].hasOwnProperty('dict')) { + let details = objDataView.pushToBuffer(Array.from(this.categories[category]['.'].dict), notHostnames, Array.from(this.categories[category]['.'].dict)); + this.categories[category]['.'] = FilterDomain.fromSelfie(JSON.stringify(details) + '\t' + true); + pushToBuffer = true; + } + } + if(pushToBuffer) { + objDataView.objView.buffer = objDataView.objView.slice(); + objDataView.computedIds = new Map(); + } }; /******************************************************************************/ @@ -1399,9 +1428,7 @@ '|a': FilterPlainLeftAnchored, 'a|': FilterPlainRightAnchored, '||a': FilterPlainHnAnchored, - '||r': FilterRewrite, '//': FilterRegex, - '//r': FilterRegexRewrite, '{h}': FilterHostnameDict, '_': FilterGeneric, '||_': FilterGenericHnAnchored, @@ -1419,6 +1446,7 @@ allowFilterCount: this.allowFilterCount, blockFilterCount: this.blockFilterCount, duplicateCount: this.duplicateCount, + hostnameFilterDataView: objDataView.toSelfie(), categories: JSON.stringify(this.categories), cspFilters: JSON.stringify(this.cspFilters) }; @@ -1473,6 +1501,7 @@ } return categoriesDict; } + objDataView.fromSelfie(selfie.hostnameFilterDataView); this.categories = filterFromSelfie(selfie.categories); this.cspFilters = filterFromSelfie(selfie.cspFilters); }; @@ -1535,7 +1564,7 @@ FilterContainer.prototype.compileHostnameOnlyFilter = function(parsed, out) { // Can't fit the filter in a pure hostname dictionary. - if ( parsed.hostnames.length !== 0 || parsed.notHostnames.length !== 0 || parsed.dataType == 'csp' || parsed.rewrite != '' || parsed.domainList != '') { + if ( parsed.hostnames.length !== 0 || parsed.notHostnames.length !== 0 || parsed.dataType == 'csp' || parsed.domainList != '') { return; } @@ -1630,9 +1659,9 @@ let line, fields; fields = text[i]; - + line = fields.join("\v"); - + this.acceptedCount += 1; let blnCspMatch = false; @@ -1663,7 +1692,8 @@ if ( entry === undefined ) { entry = bucket['.'] = new FilterHostnameDict(); } - if ( entry.add(fields[2]) === false ) { + let hshash = µb.tokenHash(fields[2]); + if ( entry.add(hshash) === false ) { this.duplicateCount += 1; } continue; @@ -1681,8 +1711,9 @@ fields[3] = JSON.parse(fields[3]); if(fields[3].domains != undefined) { - let domainIndex = µb.domainHolder.getIndex(fields[3].domains); - filter = [factory.fromSelfie(fields[3].compiled), FilterDomain.fromSelfie(domainIndex)]; + let [hostnames , notHostnames, allHostnames] = objDataView.parseOptHostnames(fields[3].domains); + let details = objDataView.pushToBuffer(hostnames, notHostnames, allHostnames); + filter = [factory.fromSelfie(fields[3].compiled), FilterDomain.fromSelfie(JSON.stringify(details) + '\t' + false)]; } else { filter = factory.fromSelfie(fields[3].compiled); } @@ -1747,6 +1778,7 @@ /******************************************************************************/ FilterContainer.prototype.matchTokens = function(bucket, url) { + // Hostname-only filters let f = bucket['.']; if ( f !== undefined && !skipGenericBlocking && f.match() !== false) { @@ -1771,7 +1803,7 @@ continue; } if ( f0 !== undefined && f0.match(url, tokenEntry.beg) !== false && f1.match()) { - return f0; + return f; } } else { if(f !== undefined && f.fid.indexOf('h') === -1 && f.fid != "[]" && skipGenericBlocking) { @@ -1789,7 +1821,7 @@ let f0 = f[0]; let f1 = f[1]; if (f0.match(url) !== false && f1.match()) { - return f0; + return f; } } else { if ( f !== undefined && f.match(url) !== false ) { @@ -1813,7 +1845,13 @@ // These registers will be used by various filters pageHostnameRegister = context.pageHostname || ''; + pageDomainRegister = µb.URI.domainFromHostname(pageHostnameRegister) || pageHostnameRegister; + pageHostnameHashes = µb.getHostnameHashesFromLabelsBackward(pageHostnameRegister, pageDomainRegister, false); + requestHostnameRegister = decode(encode(µb.URI.hostnameFromURI(requestURL).trim())); + requestDomainRegister = µb.URI.domainFromHostname(requestHostnameRegister) || requestHostnameRegister; + requestHostnameHashes = µb.getHostnameHashesFromLabelsBackward(requestHostnameRegister, requestDomainRegister, false); + skipGenericBlocking = context.skipGenericBlocking; let party = isFirstParty(context.pageDomain, requestHostnameRegister) ? FirstParty : ThirdParty; @@ -1835,13 +1873,13 @@ if ( bucket = categories[this.makeCategoryKey(BlockAnyParty | Important | type)] ) { bf = this.matchTokens(bucket, url); if ( bf !== false ) { - return 'sb:' + bf.toString(); + return Array.isArray(bf) ? 'sb:' + bf[0].toString() + '$important' + bf[1].toString() : 'sb:' + bf.toString() + '$important'; } } if ( bucket = categories[this.makeCategoryKey(BlockAction | Important | type | party)] ) { bf = this.matchTokens(bucket, url); if ( bf !== false ) { - return 'sb:' + bf.toString(); + return Array.isArray(bf) ? 'sb:' + bf[0].toString() + '$important' + bf[1].toString() : 'sb:' + bf.toString() + '$important'; } } @@ -1865,22 +1903,26 @@ if ( bucket = categories[this.makeCategoryKey(AllowAnyParty | type)] ) { af = this.matchTokens(bucket, url); if ( af !== false ) { - return 'sa:' + af.toString(); + return Array.isArray(af) ? 'sa:' + af[0].toString() + af[1].toString() : 'sa:' + af.toString(); } } if ( bucket = categories[this.makeCategoryKey(AllowAction | type | party)] ) { af = this.matchTokens(bucket, url); if ( af !== false ) { - return 'sa:' + af.toString(); + return Array.isArray(af) ? 'sa:' + af[0].toString() + af[1].toString() : 'sa:' + af.toString(); } } - - return 'sb:' + bf.toString(); + return Array.isArray(bf) ? 'sb:' + bf[0].toString() + bf[1].toString() : 'sb:' + bf.toString(); }; FilterContainer.prototype.matchStringExceptionOnlyRule = function(url,requestType) { pageHostnameRegister = µb.URI.hostnameFromURI(url) || ''; + pageDomainRegister = µb.URI.domainFromHostname(pageHostnameRegister) || pageHostnameRegister; + pageHostnameHashes = µb.getHostnameHashesFromLabelsBackward(pageHostnameRegister, pageDomainRegister, false); + requestHostnameRegister = µb.URI.hostnameFromURI(url); + requestDomainRegister = µb.URI.domainFromHostname(requestHostnameRegister) || requestHostnameRegister; + requestHostnameHashes = µb.getHostnameHashesFromLabelsBackward(requestHostnameRegister, requestDomainRegister, false); let categories = this.categories; let af = false,bf = false, bucket; skipGenericBlocking = false; @@ -1895,19 +1937,19 @@ if ( bucket = categories[this.makeCategoryKey(BlockAction | AnyParty | type | Important)] ) { bf = this.matchTokens(bucket, url); if ( bf !== false ) { - return 'sb:' + bf.toString(); + return Array.isArray(bf) ? 'sb:' + bf[0].toString() + bf[1].toString() : 'sb:' + bf.toString(); } } if ( bucket = categories[this.makeCategoryKey(AllowAnyParty | type)] ) { af = this.matchTokens(bucket, url); if ( af !== false ) { - return 'sa:' + af.toString(); + return Array.isArray(af) ? 'sa:' + af[0].toString() + af[1].toString() : 'sa:' + af.toString(); } } if ( bucket = categories[this.makeCategoryKey(AllowAction | type | party)] ) { af = this.matchTokens(bucket, url); if ( af !== false ) { - return 'sa:' + af.toString(); + return Array.isArray(af) ? 'sa:' + af[0].toString() + af[1].toString() : 'sa:' + af.toString(); } } return ''; @@ -1941,7 +1983,11 @@ FilterContainer.prototype.matchAndFetchCspData = function(context) { pageHostnameRegister = context.pageHostname || ''; + pageDomainRegister = µb.URI.domainFromHostname(pageHostnameRegister) || pageHostnameRegister; + pageHostnameHashes = µb.getHostnameHashesFromLabelsBackward(pageHostnameRegister, pageDomainRegister, false); requestHostnameRegister = context.requestHostname; + requestDomainRegister = µb.URI.domainFromHostname(requestHostnameRegister) || requestHostnameRegister; + requestHostnameHashes = µb.getHostnameHashesFromLabelsBackward(requestHostnameRegister, requestDomainRegister, false); let bucket; let type = typeNameToTypeValue["csp"]; let toBlockCSP = new Map(); @@ -1987,7 +2033,7 @@ // `match-case` option not supported, but then, I saw only one // occurrence of it in all the supported lists (bulgaria list). let url = context.requestURL.toLowerCase(); - + // The logic here is simple: // // block = !whitelisted && blacklisted @@ -2012,16 +2058,20 @@ // These registers will be used by various filters pageHostnameRegister = context.pageHostname || ''; + pageDomainRegister = µb.URI.domainFromHostname(pageHostnameRegister) || pageHostnameRegister; + pageHostnameHashes = µb.getHostnameHashesFromLabelsBackward(pageHostnameRegister, pageDomainRegister, false); requestHostnameRegister = context.requestHostname; + requestDomainRegister = µb.URI.domainFromHostname(requestHostnameRegister) || requestHostnameRegister; + requestHostnameHashes = µb.getHostnameHashesFromLabelsBackward(requestHostnameRegister, requestDomainRegister, false); skipGenericBlocking = context.skipGenericBlocking; let party = isFirstParty(context.pageDomain, context.requestHostname) ? FirstParty : ThirdParty; let filterClasses = this.categories; let bucket; - + // Tokenize only once this.tokenize(url); - + let bf = false; // https://github.com/uBlockAdmin/uBlock/issues/139 @@ -2032,25 +2082,25 @@ if ( bucket = filterClasses[this.makeCategoryKey(BlockAnyTypeAnyParty | Important)] ) { bf = this.matchTokens(bucket, url); if ( bf !== false ) { - return 'sb:' + bf.toString() + '$important'; + return Array.isArray(bf) ? 'sb:' + bf[0].toString() + '$important' + bf[1].toString() : 'sb:' + bf.toString() + '$important'; } } if ( bucket = filterClasses[this.makeCategoryKey(BlockAnyType | Important | party)] ) { bf = this.matchTokens(bucket, url); if ( bf !== false ) { - return 'sb:' + bf.toString() + '$important'; + return Array.isArray(bf) ? 'sb:' + bf[0].toString() + '$important' + bf[1].toString() : 'sb:' + bf.toString() + '$important'; } } if ( bucket = filterClasses[this.makeCategoryKey(BlockAnyParty | Important | type)] ) { bf = this.matchTokens(bucket, url); if ( bf !== false ) { - return 'sb:' + bf.toString() + '$important'; + return Array.isArray(bf) ? 'sb:' + bf[0].toString() + '$important' + bf[1].toString() : 'sb:' + bf.toString() + '$important'; } } if ( bucket = filterClasses[this.makeCategoryKey(BlockAction | Important | type | party)] ) { bf = this.matchTokens(bucket, url); if ( bf !== false ) { - return 'sb:' + bf.toString() + '$important'; + return Array.isArray(bf) ? 'sb:' + bf[0].toString() + '$important' + bf[1].toString() : 'sb:' + bf.toString() + '$important'; } } @@ -2088,29 +2138,29 @@ if ( bucket = filterClasses[this.makeCategoryKey(AllowAnyTypeAnyParty)] ) { af = this.matchTokens(bucket, url); if ( af !== false ) { - return 'sa:' + af.toString(); + return Array.isArray(af) ? 'sa:' + af[0].toString() + af[1].toString() : 'sa:' + af.toString(); } } if ( bucket = filterClasses[this.makeCategoryKey(AllowAnyType | party)] ) { af = this.matchTokens(bucket, url); if ( af !== false ) { - return 'sa:' + af.toString(); + return Array.isArray(af) ? 'sa:' + af[0].toString() + af[1].toString() : 'sa:' + af.toString(); } } if ( bucket = filterClasses[this.makeCategoryKey(AllowAnyParty | type)] ) { af = this.matchTokens(bucket, url); if ( af !== false ) { - return 'sa:' + af.toString(); + return Array.isArray(af) ? 'sa:' + af[0].toString() + af[1].toString() : 'sa:' + af.toString(); } } if ( bucket = filterClasses[this.makeCategoryKey(AllowAction | type | party)] ) { af = this.matchTokens(bucket, url); if ( af !== false ) { - return 'sa:' + af.toString(); + return Array.isArray(af) ? 'sa:' + af[0].toString() + af[1].toString() : 'sa:' + af.toString(); } } - - return 'sb:' + bf.toString(); + + return Array.isArray(bf) ? 'sb:' + bf[0].toString() + bf[1].toString() : 'sb:' + bf.toString(); }; /******************************************************************************/ diff --git a/src/js/storage.js b/src/js/storage.js index 29442cf12..ee19a6cec 100644 --- a/src/js/storage.js +++ b/src/js/storage.js @@ -163,6 +163,10 @@ var compiledFilters = µb.compileFilters(filters); var snfe = µb.staticNetFilteringEngine; var cfe = µb.cosmeticFilteringEngine; + if(compiledFilters.get("c").length > 0) { + cfe.appendHostnameFilters(compiledFilters.get("c")); + compiledFilters.set("c", []); + } var acceptedCount = snfe.acceptedCount + cfe.acceptedCount; var duplicateCount = snfe.duplicateCount + cfe.duplicateCount; µb.applyCompiledFilters(compiledFilters); @@ -664,7 +668,6 @@ magic: this.systemSettings.selfieMagic, publicSuffixList: publicSuffixList.toSelfie(), filterLists: this.remoteBlacklists, - domainList: this.domainHolder.toSelfie(), staticNetFilteringEngine: this.staticNetFilteringEngine.toSelfie(), cosmeticFilteringEngine: this.cosmeticFilteringEngine.toSelfie() }; diff --git a/src/js/traffic.js b/src/js/traffic.js index d8de7978c..1bbbbc265 100644 --- a/src/js/traffic.js +++ b/src/js/traffic.js @@ -102,15 +102,6 @@ var onBeforeRequest = function(details) { if ( µb.isAllowResult(result) ) { //console.debug('traffic.js > onBeforeRequest(): ALLOW "%s" (%o) because "%s"', details.url, details, result); - requestContext.requestType = 'rewrite'; - var result = pageStore.filterRequest(requestContext); - - if ( µb.isBlockResult(result) ) { - let rewrittenUrl = µBlock.rewriteEngine.rewriteUrl(requestContext.requestURL,result.replace("sb:","")); - if (rewrittenUrl != details.url) - return {redirectUrl: rewrittenUrl}; - } - // https://github.com/uBlockAdmin/uBlock/issues/114 frameId = details.frameId; if ( frameId > 0 ) { diff --git a/src/js/ublock.js b/src/js/ublock.js index 1585ef2b9..280c237a5 100644 --- a/src/js/ublock.js +++ b/src/js/ublock.js @@ -342,103 +342,6 @@ https://github.com/darkskyapp/string-hash/blob/master/index.js }; /******************************************************************************/ - -µBlock.domainHolder = (function() { - var domains = []; - var parseOptHostnames = function(raw) { - let ihostnames = []; - let notHostnames = [] - let hostnames = raw.split('|'); - let hostname; - for ( let i = 0; i < hostnames.length; i++ ) { - hostname = hostnames[i]; - if ( hostname.charAt(0) === '~' ) { - notHostnames.push(hostname.slice(1)); - } else { - ihostnames.push(hostname); - } - } - return [ihostnames, notHostnames]; - }; - - var FilterDomain = function(domainList) { - this.domainList = domainList; - } - FilterDomain.prototype.toString = function() { - return '$domain='+ this.domainList; - }; - FilterDomain.prototype.match = function(pageHostnameRegister) { - let [hostnames , notHostnames] = parseOptHostnames(this.domainList); - return ( - (hostnames.length == 0 ? true : hostnames.some(hostname => pageHostnameRegister.slice(-hostname.length) === hostname)) && - (notHostnames.some(hostname => pageHostnameRegister.slice(-hostname.length) === hostname) === false) - ); - } - FilterDomain.prototype.toSelfie = function() { - return this.domainList; - } - FilterDomain.fromSelfie = function(domainList) { - return FilterDomain(domainList); - } - FilterDomain.prototype.toJSON = function() { - return this.toSelfie(); - } - - var getIndex = function(domainStr) { - let objHostname; - var finddomain = function(element) { - return element.domainList === this; - } - let index = domains.findIndex(finddomain, domainStr); - if(index != -1) { - return index; - } else { - domains.push(new FilterDomain(domainStr)); - return domains.findIndex(finddomain, domainStr); - } - } - var reset = function(){ - domains.length = 0; - } - var getData = function(index) { - return domains[index]; - } - var match = function(index, hostname) { - let objFilterDomain = domains[index]; - if(objFilterDomain === undefined) { - console.error("match: missing Index: " + index); - return false; - } - return objFilterDomain.match(hostname); - } - var toSelfie = function() { - return JSON.stringify(domains); - } - var fromSelfie = function(domainslst) { - let arr = JSON.parse(domainslst); - arr.forEach(function(value, key) { - domains[key] = new FilterDomain(value); - }); - } - var toString = function(index) { - let objFilterDomain = domains[index]; - if(objFilterDomain === undefined) { - console.error("toString: missing Index: " + index); - return ''; - } - return objFilterDomain.toString(); - } - return { - "getIndex": getIndex, - "getData": getData, - "match": match, - "toString": toString, - "toSelfie": toSelfie, - "fromSelfie": fromSelfie, - "reset": reset - }; -})(); - /* The code below is taken from here: https://github.com/sindresorhus/quick-lru/blob/master/index.js Author: https://github.com/sindresorhus @@ -657,33 +560,63 @@ https://github.com/darkskyapp/string-hash/blob/master/index.js The code below is taken from here: https://github.com/cliqz-oss/adblocker/blob/32397857de8c439fb4c961f12d7e17c750b3fc98/src/filters/cosmetic.ts#L51 License: https://github.com/cliqz-oss/adblocker/blob/master/LICENSE */ -µBlock.getHostnameHashesFromLabelsBackward = function(hostname, domain) { +µBlock.getHostnameHashesFromLabelsBackward = function(hostname, domain, hashesOnly = true) { if(hostname == domain && hostname.indexOf('www.') !== -1) { domain = hostname.slice(hostname.indexOf('.') + 1); } - return µBlock.getHashesFromLabelsBackward(hostname, hostname.length, hostname.length - domain.length); + return µBlock.getDomainHashesFromBackward(hostname, hostname.length, hostname.length - domain.length, hashesOnly); } + /* The code below is taken from here: https://github.com/cliqz-oss/adblocker/blob/32397857de8c439fb4c961f12d7e17c750b3fc98/src/filters/cosmetic.ts#L16 License: https://github.com/cliqz-oss/adblocker/blob/master/LICENSE */ -µBlock.getHashesFromLabelsBackward = function(hostname, end, startOfDomain) { - const hashes = []; +µBlock.getDomainHashesFromBackward = function(hostname, end, startOfDomain, hashesOnly) { + const hashes = new Map(); let hash = 5381; // Compute hash backward, label per label for (let i = end - 1; i >= 0; i -= 1) { // Process label if (hostname[i] === '.' && i < startOfDomain) { - hashes.push(hash >>> 0); + hashes.set(hash >>> 0,hostname.slice(-(end - i - 1))); } - + // Update hash hash = (hash * 33) ^ hostname.charCodeAt(i); } + hashes.set(hash >>> 0, hostname); + + if(hashesOnly) { + return [ ...hashes.keys() ]; + } else { + return hashes; + } +} +/* + The code below is taken from here: https://github.com/cliqz-oss/adblocker/blob/675755584a2c9b45f66ab26e7683f513e1253b01/src/utils.ts#L225 + License: https://github.com/cliqz-oss/adblocker/blob/master/LICENSE +*/ +µBlock.binSearch = function(arr, elt) { + if (arr.length === 0) { + return -1; + } - hashes.push(hash >>> 0); - return hashes; + let low = 0; + let high = arr.length - 1; + + while (low <= high) { + const mid = (low + high) >>> 1; + const midVal = arr[mid]; + if (midVal < elt) { + low = mid + 1; + } else if (midVal > elt) { + high = mid - 1; + } else { + return mid; + } + } + return -1; } µBlock.logCosmeticFilters = (function() { var tabIdToTimerMap = {}; @@ -705,64 +638,6 @@ https://github.com/darkskyapp/string-hash/blob/master/index.js return injectAsync; })(); - -µBlock.rewriteEngine = (function (){ - - var parseResult = function(result) { - let rewrite = ''; - let pos = result.indexOf('$'); - let text = result.slice(0, pos); - if ( pos !== -1 ) { - rewrite = result.slice(pos + 1).slice(8); - } - return [text,rewrite]; - } - - var convertTextToRexExp = function (text){ - // remove multiple wildcards - if (text.length >= 2 && text[0] == "/" && text[text.length - 1] == "/") { - text = text.substr(1, text.length - 2); - } else { - text = text.replace(/\*+/g, "*"); - - text = text - // remove anchors following separator placeholder - .replace(/\^\|$/, "^") - // escape special symbols - .replace(/\W/g, "\\$&") - // replace wildcards by .* - .replace(/\\\*/g, ".*") - // process separator placeholders (all ANSI characters but alphanumeric - // characters and _%.-) - .replace(/\\\^/g, "(?:[\\x00-\\x24\\x26-\\x2C\\x2F\\x3A-\\x40\\x5B-\\x5E\\x60\\x7B-\\x7F]|$)") - // process extended anchor at expression start - .replace(/^\\\|\\\|/, "^[\\w\\-]+:\\/+(?!\\/)(?:[^\\/]+\\.)?") - // process anchor at expression start - .replace(/^\\\|/, "^") - // process anchor at expression end - .replace(/\\\|$/, "$"); - } - let regexp = new RegExp(text,false ? "" : "i"); - return regexp; - } - - var rewriteUrl = function(url,result) { - let [text,rewrite] = parseResult(result); - let regexp = convertTextToRexExp(text); - try - { - let rewrittenUrl = new URL(url.replace(regexp, rewrite), url); - if (rewrittenUrl.origin == new URL(url).origin) - return rewrittenUrl.href; - } - catch (e) - { - } - return url; - } - return {rewriteUrl : rewriteUrl}; -})(); - /* The below code is borrowed from: https://github.com/gorhill/uBlock/blob/13f2b6b86ff00827650ee2e70ea5f4779845ce4a/src/js/scriptlet-filtering.js#L61