From 260b53581e0a233e2b56690cef09792dbc5c23ca Mon Sep 17 00:00:00 2001 From: Anton Lazarev Date: Sun, 30 Jul 2023 17:06:13 -0700 Subject: [PATCH] initial plumbing for resource dependency support --- benches/bench_redirect_performance.rs | 1 + src/blocker.rs | 42 ++++------------ src/cosmetic_filter_cache.rs | 16 ++----- src/engine.rs | 45 +++++------------ src/resources/mod.rs | 69 ++++++++++++++++++++++++++- src/resources/resource_assembler.rs | 2 + src/resources/resource_storage.rs | 43 ++++++++++++----- tests/matching.rs | 1 + 8 files changed, 128 insertions(+), 91 deletions(-) diff --git a/benches/bench_redirect_performance.rs b/benches/bench_redirect_performance.rs index 9cd28797..7b389533 100644 --- a/benches/bench_redirect_performance.rs +++ b/benches/bench_redirect_performance.rs @@ -142,6 +142,7 @@ fn build_resources_for_filters(#[allow(unused)] filters: &[NetworkFilter]) -> Re aliases: vec![], kind: ResourceType::Mime(MimeType::from_extension(&redirect)), content: base64::encode(redirect), + dependencies: vec![], } }) .for_each(|resource| { diff --git a/src/blocker.rs b/src/blocker.rs index 98b763a8..f4d5caef 100644 --- a/src/blocker.rs +++ b/src/blocker.rs @@ -1401,12 +1401,9 @@ mod blocker_tests { let blocker = Blocker::new(network_filters, &blocker_options); let mut resources = ResourceStorage::default(); - resources.add_resource(Resource { - name: "noop-0.1s.mp3".to_string(), - aliases: vec![], - kind: crate::resources::ResourceType::Mime(crate::resources::MimeType::AudioMp3), - content: base64::encode("mp3"), - }).unwrap(); + resources.add_resource( + Resource::simple("noop-0.1s.mp3", crate::resources::MimeType::AudioMp3, "mp3"), + ).unwrap(); let matched_rule = blocker.check(&request, &resources); assert_eq!(matched_rule.matched, false); @@ -1433,12 +1430,9 @@ mod blocker_tests { let blocker = Blocker::new(network_filters, &blocker_options); let mut resources = ResourceStorage::default(); - resources.add_resource(Resource { - name: "noop-0.1s.mp3".to_string(), - aliases: vec![], - kind: crate::resources::ResourceType::Mime(crate::resources::MimeType::AudioMp3), - content: base64::encode("mp3"), - }).unwrap(); + resources.add_resource( + Resource::simple("noop-0.1s.mp3", crate::resources::MimeType::AudioMp3, "mp3"), + ).unwrap(); let matched_rule = blocker.check(&request, &resources); assert_eq!(matched_rule.matched, false); @@ -1465,12 +1459,7 @@ mod blocker_tests { let blocker = Blocker::new(network_filters, &blocker_options); let mut resources = ResourceStorage::default(); - resources.add_resource(Resource { - name: "noop.txt".to_string(), - aliases: vec![], - kind: crate::resources::ResourceType::Mime(crate::resources::MimeType::TextPlain), - content: base64::encode("noop"), - }).unwrap(); + resources.add_resource(Resource::simple("noop.txt", crate::resources::MimeType::TextPlain, "noop")).unwrap(); let matched_rule = blocker.check(&request, &resources); assert_eq!(matched_rule.matched, true); @@ -1645,12 +1634,7 @@ mod blocker_tests { let blocker = Blocker::new(network_filters, &blocker_options); let mut resources = ResourceStorage::default(); - resources.add_resource(Resource { - name: "noopjs".into(), - aliases: vec![], - kind: crate::resources::ResourceType::Mime(crate::resources::MimeType::ApplicationJavascript), - content: base64::encode("(() => {})()"), - }).unwrap(); + resources.add_resource(Resource::simple("noopjs", crate::resources::MimeType::ApplicationJavascript, "(() => {})()")).unwrap(); let result = blocker.check(&Request::new("https://example.com?q=1&test=2#blue", "https://antonok.com", "script").unwrap(), &resources); assert_eq!(result.rewritten_url, Some("https://example.com?q=1#blue".into())); @@ -1846,14 +1830,8 @@ fn test_removeparam_same_tokens() { let blocker = Blocker::new(network_filters, &blocker_options); let mut resources = ResourceStorage::default(); fn add_simple_resource(resources: &mut ResourceStorage, identifier: &str) -> Option { - let b64 = base64::encode(identifier); - resources.add_resource(Resource { - name: identifier.into(), - aliases: vec![], - kind: crate::resources::ResourceType::Mime(crate::resources::MimeType::TextPlain), - content: base64::encode(identifier), - }).unwrap(); - return Some(format!("data:text/plain;base64,{}", b64)); + resources.add_resource(Resource::simple(identifier, crate::resources::MimeType::TextPlain, identifier)).unwrap(); + Some(format!("data:text/plain;base64,{}", base64::encode(identifier))) } let a_redirect = add_simple_resource(&mut resources, "a"); let b_redirect = add_simple_resource(&mut resources, "b"); diff --git a/src/cosmetic_filter_cache.rs b/src/cosmetic_filter_cache.rs index ed3c2d05..1b994c84 100644 --- a/src/cosmetic_filter_cache.rs +++ b/src/cosmetic_filter_cache.rs @@ -662,19 +662,10 @@ mod cosmetic_cache_tests { aliases: vec![], kind: ResourceType::Template, content: base64::encode("set-constant.js, {{1}}, {{2}}"), + dependencies: vec![], }, - Resource { - name: "nowebrtc.js".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("nowebrtc.js"), - }, - Resource { - name: "window.open-defuser.js".into(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("window.open-defuser.js"), - }, + Resource::simple("nowebrtc.js", MimeType::ApplicationJavascript, "nowebrtc.js"), + Resource::simple("window.open-defuser.js", MimeType::ApplicationJavascript, "window.open-defuser.js"), ]); let out = cfcache.hostname_cosmetic_resources(&resources, "sub.example.com", false); @@ -1040,6 +1031,7 @@ mod cosmetic_cache_tests { aliases: vec!["aopr".to_string()], kind: ResourceType::Template, content: base64::encode("abort-on-property-read.js, {{1}}"), + dependencies: vec![], } ]); diff --git a/src/engine.rs b/src/engine.rs index 1927cb26..4f119a4a 100644 --- a/src/engine.rs +++ b/src/engine.rs @@ -298,7 +298,7 @@ fn _assertions() { #[cfg(test)] mod tests { use super::*; - use crate::resources::{ResourceType, MimeType}; + use crate::resources::MimeType; use crate::lists::FilterFormat; #[test] @@ -523,18 +523,8 @@ mod tests { ], Default::default()); engine.use_resources([ - Resource { - name: "nooptext".to_string(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::TextPlain), - content: base64::encode(""), - }, - Resource { - name: "noopcss".to_string(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::TextPlain), - content: base64::encode(""), - }, + Resource::simple("nooptext", MimeType::TextPlain, ""), + Resource::simple("noopcss", MimeType::TextCss, ""), ]); let serialized = engine.serialize_compressed().unwrap(); @@ -555,20 +545,12 @@ mod tests { })(); "#; - engine.use_resources([ - Resource { - name: "nooptext".to_owned(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::TextPlain), - content: "".to_owned(), - }, - Resource { - name: "noopjs".to_owned(), - aliases: vec!["noop.js".to_owned()], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode(script), - }, - ]); + let mut resources = [ + Resource::simple("nooptext", MimeType::TextPlain, ""), + Resource::simple("noopjs", MimeType::ApplicationJavascript, script), + ]; + resources[1].aliases.push("noop.js".to_string()); + engine.use_resources(resources); let url = "http://example.com/ad-banner.gif"; let request = Request::new(url, "", "").unwrap(); @@ -670,12 +652,9 @@ mod tests { ], Default::default()); let mut engine = Engine::from_filter_set(filter_set, false); - engine.add_resource(Resource { - name: "addthis.com/addthis_widget.js".to_owned(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("window.addthis = undefined"), - }).unwrap(); + engine.add_resource( + Resource::simple("addthis.com/addthis_widget.js", MimeType::ApplicationJavascript, "window.addthis = undefined"), + ).unwrap(); let request = Request::new("https://s7.addthis.com/js/250/addthis_widget.js?pub=resto", "https://www.rhmodern.com/catalog/product/product.jsp?productId=prod14970086&categoryId=cat7150028", "script").unwrap(); let result = engine.check_network_request(&request); diff --git a/src/resources/mod.rs b/src/resources/mod.rs index d8ce76cf..f4673f15 100644 --- a/src/resources/mod.rs +++ b/src/resources/mod.rs @@ -25,11 +25,37 @@ pub struct Resource { /// Represents the primary name of the resource, often a filename pub name: String, /// Represents secondary names that can be used to access the resource + #[serde(default)] pub aliases: Vec, /// How to interpret the resource data within `content` pub kind: ResourceType, /// The resource data, encoded using standard base64 configuration pub content: String, + /// Optionally contains the name of any dependencies used by this resource. Currently, this + /// only applies to `application/javascript` and `fn/javascript` MIME types. + /// + /// Aliases should never be added to this list. It should only contain primary/canonical + /// resource names. + /// + /// Currently ignored, but will be respected in a future release. Bundle any required + /// dependencies inside the resource for now. + #[serde(default, skip_serializing_if = "Vec::is_empty")] + pub dependencies: Vec, +} + +impl Resource { + /// Convenience constructor for tests. Creates a new [`Resource`] with no aliases or + /// dependencies. Content will be automatically base64-encoded by the constructor. + #[cfg(test)] + pub fn simple(name: &str, kind: MimeType, content: &str) -> Self { + Self { + name: name.to_string(), + aliases: vec![], + kind: ResourceType::Mime(kind), + content: base64::encode(content), + dependencies: vec![], + } + } } /// Different ways that the data within the `content` field of a `Resource` can be interpreted. @@ -44,20 +70,46 @@ pub enum ResourceType { Template, } +impl ResourceType { + /// Can resources of this type be used as network redirects? + pub fn supports_redirect(&self) -> bool { + !matches!(self, ResourceType::Template | ResourceType::Mime(MimeType::FnJavascript)) + } + + /// Can resources of this type be used for scriptlet injections? + pub fn supports_scriptlet_injection(&self) -> bool { + matches!(self, ResourceType::Template | ResourceType::Mime(MimeType::ApplicationJavascript)) + } +} + /// Acceptable MIME types for resources used by `$redirect` and `+js(...)` adblock rules. #[derive(Serialize, Deserialize, Debug, Clone, PartialEq)] #[serde(into = "&str")] #[serde(from = "&str")] pub enum MimeType { + /// `"text/css"` TextCss, + /// `"image/gif"` ImageGif, + /// `"text/html"` TextHtml, + /// `"application/javascript"` ApplicationJavascript, + /// `"audio/mp3"` AudioMp3, + /// `"video/mp4"` VideoMp4, + /// `"image/png"` ImagePng, + /// `"text/plain"` TextPlain, + /// `"text/xml"` TextXml, + /// Custom MIME type invented for the uBlock Origin project. Represented by `"fn/javascript"`. + /// Used to describe JavaScript functions that can be used as dependencies of other JavaScript + /// resources. + FnJavascript, + /// Any other unhandled MIME type. Maps to `"application/octet-stream"` when re-serialized. Unknown, } @@ -88,7 +140,20 @@ impl MimeType { /// Should the MIME type decode as valid UTF8? pub fn is_textual(&self) -> bool { - matches!(self, MimeType::ApplicationJavascript | MimeType::TextCss | MimeType::TextPlain | MimeType::TextHtml | MimeType::TextXml) + matches!( + self, + Self::ApplicationJavascript + | Self::FnJavascript + | Self::TextCss + | Self::TextPlain + | Self::TextHtml + | Self::TextXml + ) + } + + /// Can the MIME type have dependencies on other resources? + pub fn supports_dependencies(&self) -> bool { + matches!(self, Self::ApplicationJavascript | Self::FnJavascript) } } @@ -104,6 +169,7 @@ impl From<&str> for MimeType { "image/png" => MimeType::ImagePng, "text/plain" => MimeType::TextPlain, "text/xml" => MimeType::TextXml, + "fn/javascript" => MimeType::FnJavascript, _ => MimeType::Unknown, } } @@ -121,6 +187,7 @@ impl From<&MimeType> for &str { MimeType::ImagePng => "image/png", MimeType::TextPlain => "text/plain", MimeType::TextXml => "text/xml", + MimeType::FnJavascript => "fn/javascript", MimeType::Unknown => "application/octet-stream", } } diff --git a/src/resources/resource_assembler.rs b/src/resources/resource_assembler.rs index b6b66e47..3572e11e 100644 --- a/src/resources/resource_assembler.rs +++ b/src/resources/resource_assembler.rs @@ -194,6 +194,7 @@ fn read_template_resources(scriptlets_data: &str) -> Vec { .unwrap_or_default(), kind, content: base64::encode(&script), + dependencies: vec![], }); name = None; @@ -230,6 +231,7 @@ fn build_resource_from_file_contents( aliases, kind: ResourceType::Mime(mimetype), content, + dependencies: vec![], } } diff --git a/src/resources/resource_storage.rs b/src/resources/resource_storage.rs index 9dc37eeb..45b6108d 100644 --- a/src/resources/resource_storage.rs +++ b/src/resources/resource_storage.rs @@ -38,6 +38,14 @@ impl ResourceStorage { /// Adds a resource to storage so that it can be retrieved later. pub fn add_resource(&mut self, resource: Resource) -> Result<(), AddResourceError> { if let ResourceType::Mime(content_type) = &resource.kind { + if matches!(content_type, MimeType::FnJavascript) { + return Err(AddResourceError::FnJavascriptNotSupported); + } + + if !resource.dependencies.is_empty() && !content_type.supports_dependencies() { + return Err(AddResourceError::ContentTypeDoesNotSupportDependencies); + } + // Ensure the resource contents are valid base64 (and utf8 if applicable) let decoded = base64::decode(&resource.content)?; if content_type.is_textual() { @@ -78,7 +86,7 @@ impl ResourceStorage { .get_internal_resource(&scriptlet_name) .ok_or(ScriptletResourceError::NoMatchingScriptlet)?; - if !matches!(resource.kind, ResourceType::Template | ResourceType::Mime(MimeType::ApplicationJavascript)) { + if !resource.kind.supports_scriptlet_injection() { return Err(ScriptletResourceError::ContentTypeNotInjectable); } @@ -99,6 +107,9 @@ impl ResourceStorage { let resource = self.get_internal_resource(resource_ident); resource.and_then(|resource| { + if !resource.kind.supports_redirect() { + return None; + } if let ResourceType::Mime(mime) = &resource.kind { Some(format!("data:{};base64,{}", mime, &resource.content)) } else { @@ -130,6 +141,10 @@ pub enum AddResourceError { InvalidUtf8Content, #[error("resource name already added")] NameAlreadyAdded, + #[error("fn/javascript mime type is not yet supported")] + FnJavascriptNotSupported, + #[error("resource content type does not support dependencies")] + ContentTypeDoesNotSupportDependencies, } impl From for AddResourceError { @@ -265,12 +280,9 @@ mod redirect_storage_tests { fn get_resource_by_name() { let mut storage = ResourceStorage::default(); storage - .add_resource(Resource { - name: "name.js".to_owned(), - aliases: vec![], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("resource data"), - }) + .add_resource( + Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"), + ) .unwrap(); assert_eq!( @@ -282,13 +294,10 @@ mod redirect_storage_tests { #[test] fn get_resource_by_alias() { let mut storage = ResourceStorage::default(); + let mut r = Resource::simple("name.js", MimeType::ApplicationJavascript, "resource data"); + r.aliases.push("alias.js".to_string()); storage - .add_resource(Resource { - name: "name.js".to_owned(), - aliases: vec!["alias.js".to_owned()], - kind: ResourceType::Mime(MimeType::ApplicationJavascript), - content: base64::encode("resource data"), - }) + .add_resource(r) .unwrap(); assert_eq!( @@ -351,30 +360,35 @@ mod scriptlet_storage_tests { aliases: vec![], kind: ResourceType::Template, content: base64::encode("console.log('Hello {{1}}, my name is {{2}}')"), + dependencies: vec![], }, Resource { name: "alert.js".to_owned(), aliases: vec![], kind: ResourceType::Template, content: base64::encode("alert('{{1}}')"), + dependencies: vec![], }, Resource { name: "blocktimer.js".to_owned(), aliases: vec![], kind: ResourceType::Template, content: base64::encode("setTimeout(blockAds, {{1}})"), + dependencies: vec![], }, Resource { name: "null.js".to_owned(), aliases: vec![], kind: ResourceType::Template, content: base64::encode("(()=>{})()"), + dependencies: vec![], }, Resource { name: "set-local-storage-item.js".to_owned(), aliases: vec![], kind: ResourceType::Template, content: base64::encode(r#"{{1}} that dollar signs in {{2}} are untouched"#), + dependencies: vec![], }, ]); @@ -431,18 +445,21 @@ mod scriptlet_storage_tests { aliases: vec!["acis.js".into()], kind: ResourceType::Mime(MimeType::ApplicationJavascript), content: base64::encode("(function() {alert(\"hi\");})();"), + dependencies: vec![], }, Resource { name: "abort-on-property-read.js".into(), aliases: vec!["aopr.js".into()], kind: ResourceType::Template, content: base64::encode("(function() {confirm(\"Do you want to {{1}}?\");})();"), + dependencies: vec![], }, Resource { name: "googletagservices_gpt.js".into(), aliases: vec!["googletagservices.com/gpt.js".into(), "googletagservices-gpt".into()], kind: ResourceType::Template, content: base64::encode("function(a1 = '', a2 = '') {console.log(a1, a2)}"), + dependencies: vec![], }, ]); diff --git a/tests/matching.rs b/tests/matching.rs index 8f851d21..9f10d354 100644 --- a/tests/matching.rs +++ b/tests/matching.rs @@ -45,6 +45,7 @@ fn build_resources_from_filters(filters: &[String]) -> Vec { aliases: vec![], kind: ResourceType::Mime(MimeType::from_extension(&redirect)), content: base64::encode(redirect), + dependencies: vec![], } }) .collect()