diff --git a/.github/workflows/CI.yml b/.github/workflows/CI.yml index 0b2a8ce..efb3970 100644 --- a/.github/workflows/CI.yml +++ b/.github/workflows/CI.yml @@ -117,7 +117,7 @@ jobs: manylinux: auto args: --release --out dist sccache: true - - uses: uraimo/run-on-arch-action@v2.3.0 + - uses: uraimo/run-on-arch-action@v2.8.1 if: matrix.target != 'ppc64' name: Install built wheel with: @@ -197,7 +197,7 @@ jobs: manylinux: musllinux_1_2 args: --release --out dist sccache: true - - uses: uraimo/run-on-arch-action@v2.3.0 + - uses: uraimo/run-on-arch-action@v2.8.1 name: Install built wheel with: arch: ${{ matrix.platform.arch }} diff --git a/Cargo.lock b/Cargo.lock index d0cb9fb..8a404cb 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -17,9 +17,9 @@ dependencies = [ [[package]] name = "autocfg" -version = "1.3.0" +version = "1.4.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0c4b4d0bd25bd0b74681c0ad21497610ce1b7c91b1022cd21c80c6fbdd9476b0" +checksum = "ace50bade8e6234aa140d9a2f552bbee1db4d353f69b8217bc503490fc1a9f26" [[package]] name = "bitflags" @@ -27,12 +27,29 @@ version = "2.6.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b048fb63fd8b5923fc5aa7b340d8e156aec7ec02f0c78fa8a6ddc2613f6f71de" +[[package]] +name = "byteorder" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1fd0f2584146f6f2ef48085050886acf353beff7305ebd1ae69500e27c67f64b" + [[package]] name = "cfg-if" version = "1.0.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "baf1de4339761588bc0619e3cbc0120ee582ebb74b53b4efbf79117bd2da40fd" +[[package]] +name = "displaydoc" +version = "0.2.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "form_urlencoded" version = "1.2.1" @@ -83,14 +100,143 @@ dependencies = [ "syn", ] +[[package]] +name = "icu_collections" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "db2fa452206ebee18c4b5c2274dbf1de17008e874b4dc4f0aea9d01ca79e4526" +dependencies = [ + "displaydoc", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_locid" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "13acbb8371917fc971be86fc8057c41a64b521c184808a698c02acc242dbf637" +dependencies = [ + "displaydoc", + "litemap", + "tinystr", + "writeable", + "zerovec", +] + +[[package]] +name = "icu_locid_transform" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "01d11ac35de8e40fdeda00d9e1e9d92525f3f9d887cdd7aa81d727596788b54e" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_locid_transform_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_locid_transform_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fdc8ff3388f852bede6b579ad4e978ab004f139284d7b28715f773507b946f6e" + +[[package]] +name = "icu_normalizer" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "19ce3e0da2ec68599d193c93d088142efd7f9c5d6fc9b803774855747dc6a84f" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_normalizer_data", + "icu_properties", + "icu_provider", + "smallvec", + "utf16_iter", + "utf8_iter", + "write16", + "zerovec", +] + +[[package]] +name = "icu_normalizer_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8cafbf7aa791e9b22bec55a167906f9e1215fd475cd22adfcf660e03e989516" + +[[package]] +name = "icu_properties" +version = "1.5.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "93d6020766cfc6302c15dbbc9c8778c37e62c14427cb7f6e601d849e092aeef5" +dependencies = [ + "displaydoc", + "icu_collections", + "icu_locid_transform", + "icu_properties_data", + "icu_provider", + "tinystr", + "zerovec", +] + +[[package]] +name = "icu_properties_data" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67a8effbc3dd3e4ba1afa8ad918d5684b8868b3b26500753effea8d2eed19569" + +[[package]] +name = "icu_provider" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6ed421c8a8ef78d3e2dbc98a973be2f3770cb42b606e3ab18d6237c4dfde68d9" +dependencies = [ + "displaydoc", + "icu_locid", + "icu_provider_macros", + "stable_deref_trait", + "tinystr", + "writeable", + "yoke", + "zerofrom", + "zerovec", +] + +[[package]] +name = "icu_provider_macros" +version = "1.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ec89e9337638ecdc08744df490b221a7399bf8d164eb52a665454e60e075ad6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "idna" -version = "0.5.0" +version = "1.0.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "634d9b1461af396cad843f47fdba5597a4f9e6ddd4bfb6ff5d85028c25cb12f6" +checksum = "686f825264d630750a544639377bae737628043f20d38bbc029e8f29ea968a7e" dependencies = [ - "unicode-bidi", - "unicode-normalization", + "idna_adapter", + "smallvec", + "utf8_iter", +] + +[[package]] +name = "idna_adapter" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "daca1df1c957320b2cf139ac61e7bd64fed304c5040df000a745aa1de3b4ef71" +dependencies = [ + "icu_normalizer", + "icu_properties", ] [[package]] @@ -101,9 +247,15 @@ checksum = "b248f5224d1d606005e02c97f5aa4e88eeb230488bcc03bc9ca4d7991399f2b5" [[package]] name = "libc" -version = "0.2.155" +version = "0.2.166" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c2ccc108bbc0b1331bd061864e7cd823c0cab660bbe6970e66e2c0614decde36" + +[[package]] +name = "litemap" +version = "0.7.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "97b3888a4aecf77e811145cadf6eef5901f4782c53886191b2f693f24761847c" +checksum = "4ee93343901ab17bd981295f2cf0026d4ad018c7c31ba84549a4ddbb47a45104" [[package]] name = "lock_api" @@ -172,9 +324,9 @@ dependencies = [ [[package]] name = "once_cell" -version = "1.19.0" +version = "1.20.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3fdb12b2476b595f9358c5161aa467c2438859caa136dec86c26fdd2efe17b92" +checksum = "1261fe7e33c73b354eab43b1273a57c8f967d0391e80353e51f764ac02cf6775" [[package]] name = "parking_lot" @@ -264,15 +416,18 @@ dependencies = [ [[package]] name = "portable-atomic" -version = "1.6.0" +version = "1.10.0" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "7170ef9988bc169ba16dd36a7fa041e5c4cbeb6a35b76d4c03daded371eae7c0" +checksum = "280dc24453071f1b63954171985a0b0d30058d287960968b9b2aca264c8d4ee6" [[package]] name = "ppv-lite86" -version = "0.2.17" +version = "0.2.20" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5b40af805b3121feab8a3c29f04d8ad262fa8e0561883e7653e024ae4479e6de" +checksum = "77957b295656769bb8ad2b6a6b09d897d94f05c41b069aede1fcdaa675eaea04" +dependencies = [ + "zerocopy", +] [[package]] name = "precomputed-hash" @@ -282,18 +437,18 @@ checksum = "925383efa346730478fb4838dbe9137d2a47675ad789c546d150a6e1dd4ab31c" [[package]] name = "proc-macro2" -version = "1.0.86" +version = "1.0.92" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5e719e8df665df0d1c8fbfd238015744736151d4445ec0836b8e628aae103b77" +checksum = "37d3544b3f2748c54e147655edb5025752e2303145b5aefb3c3ea2c78b973bb0" dependencies = [ "unicode-ident", ] [[package]] name = "pyo3" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "f402062616ab18202ae8319da13fa4279883a2b8a9d9f83f20dbade813ce1884" +checksum = "f54b3d09cbdd1f8c20650b28e7b09e338881482f4aa908a5f61a00c98fba2690" dependencies = [ "cfg-if", "indoc", @@ -309,9 +464,9 @@ dependencies = [ [[package]] name = "pyo3-build-config" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "b14b5775b5ff446dd1056212d778012cbe8a0fbffd368029fd9e25b514479c38" +checksum = "3015cf985888fe66cfb63ce0e321c603706cd541b7aec7ddd35c281390af45d8" dependencies = [ "once_cell", "target-lexicon", @@ -319,9 +474,9 @@ dependencies = [ [[package]] name = "pyo3-ffi" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9ab5bcf04a2cdcbb50c7d6105de943f543f9ed92af55818fd17b660390fc8636" +checksum = "6fca7cd8fd809b5ac4eefb89c1f98f7a7651d3739dfb341ca6980090f554c270" dependencies = [ "libc", "pyo3-build-config", @@ -329,9 +484,9 @@ dependencies = [ [[package]] name = "pyo3-macros" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fd24d897903a9e6d80b968368a34e1525aeb719d568dba8b3d4bfa5dc67d453" +checksum = "34e657fa5379a79151b6ff5328d9216a84f55dc93b17b08e7c3609a969b73aa0" dependencies = [ "proc-macro2", "pyo3-macros-backend", @@ -341,9 +496,9 @@ dependencies = [ [[package]] name = "pyo3-macros-backend" -version = "0.22.6" +version = "0.23.2" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "36c011a03ba1e50152b4b394b479826cad97e7a21eb52df179cd91ac411cbfbe" +checksum = "295548d5ffd95fd1981d2d3cf4458831b21d60af046b729b6fd143b0ba7aee2f" dependencies = [ "heck", "proc-macro2", @@ -354,9 +509,9 @@ dependencies = [ [[package]] name = "quote" -version = "1.0.36" +version = "1.0.37" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0fa76aaf39101c457836aec0ce2316dbdc3ab723cdda1c6bd4e6ad4208acaca7" +checksum = "b5b9d34b8991d19d98081b46eacdd8eb58c6f2b201139f7c5f643cc155a633af" dependencies = [ "proc-macro2", ] @@ -393,9 +548,9 @@ dependencies = [ [[package]] name = "redox_syscall" -version = "0.5.2" +version = "0.5.7" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "c82cf8cff14456045f55ec4241383baeff27af886adb72ffb2162f99911de0fd" +checksum = "9b6dfecf2c74bce2466cabf93f6664d6998a69eb21e39f4207930065b27b771f" dependencies = [ "bitflags", ] @@ -408,18 +563,18 @@ checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49" [[package]] name = "serde" -version = "1.0.204" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bc76f558e0cbb2a839d37354c575f1dc3fdc6546b5be373ba43d95f231bf7c12" +checksum = "6513c1ad0b11a9376da888e3e0baa0077f1aed55c17f50e7b2397136129fb88f" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.204" +version = "1.0.215" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e0cd7e117be63d3c3678776753929474f3b04a43a080c744d6b0ae2a8c28e222" +checksum = "ad1e866f866923f252f05c889987993144fb74e722403468a4ebd70c3cd756c0" dependencies = [ "proc-macro2", "quote", @@ -438,6 +593,12 @@ version = "1.13.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "3c5e1a9a646d36c3599cd173a41282daf47c44583ad367b8e6837255952e5c67" +[[package]] +name = "stable_deref_trait" +version = "1.2.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a8f112729512f8e442d81f95a8a7ddf2b7c6b8a1a6f509a95864142b30cab2d3" + [[package]] name = "string_cache" version = "0.8.7" @@ -466,20 +627,31 @@ dependencies = [ [[package]] name = "syn" -version = "2.0.69" +version = "2.0.89" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "201fcda3845c23e8212cd466bfebf0bd20694490fc0356ae8e428e0824a915a6" +checksum = "44d46482f1c1c87acd84dea20c1bf5ebff4c757009ed6bf19cfd36fb10e92c4e" dependencies = [ "proc-macro2", "quote", "unicode-ident", ] +[[package]] +name = "synstructure" +version = "0.13.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8af7666ab7b6390ab78131fb5b0fce11d6b7a6951602017c35fa82800708971" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + [[package]] name = "target-lexicon" -version = "0.12.14" +version = "0.12.16" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e1fc403891a21bcfb7c37834ba66a547a8f402146eba7265b5a6d88059c9ff2f" +checksum = "61c41af27dd6d1e27b1b16b489db798443478cef1f06a660c96db617ba5de3b1" [[package]] name = "tendril" @@ -493,40 +665,20 @@ dependencies = [ ] [[package]] -name = "tinyvec" -version = "1.7.0" +name = "tinystr" +version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "ce6b6a2fb3a985e99cebfaefa9faa3024743da73304ca1c683a36429613d3d22" +checksum = "9117f5d4db391c1cf6927e7bea3db74b9a1c1add8f7eda9ffd5364f40f57b82f" dependencies = [ - "tinyvec_macros", + "displaydoc", + "zerovec", ] -[[package]] -name = "tinyvec_macros" -version = "0.1.1" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20" - -[[package]] -name = "unicode-bidi" -version = "0.3.15" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "08f95100a766bf4f8f28f90d77e0a5461bbdb219042e7679bebe79004fed8d75" - [[package]] name = "unicode-ident" -version = "1.0.12" +version = "1.0.14" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "3354b9ac3fae1ff6755cb6db53683adb661634f67557942dea4facebec0fee4b" - -[[package]] -name = "unicode-normalization" -version = "0.1.23" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "a56d1686db2308d901306f92a263857ef59ea39678a5458e7cb17f01415101f5" -dependencies = [ - "tinyvec", -] +checksum = "adb9e6ca4f869e1180728b7950e35922a7fc6397f7b641499e8f3ef06e50dc83" [[package]] name = "unindent" @@ -536,9 +688,9 @@ checksum = "c7de7d73e1754487cb58364ee906a499937a0dfabd86bcb980fa99ec8c8fa2ce" [[package]] name = "url" -version = "2.5.2" +version = "2.5.4" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "22784dbdf76fdde8af1aeda5622b546b422b6fc585325248a2bf9f5e41e94d6c" +checksum = "32f8b686cadd1473f4bd0117a5d28d36b1ade384ea9b5069a1c40aefed7fda60" dependencies = [ "form_urlencoded", "idna", @@ -551,6 +703,18 @@ version = "0.7.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "09cc8ee72d2a9becf2f2febe0205bbed8fc6615b7cb429ad062dc7b7ddd036a9" +[[package]] +name = "utf16_iter" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8232dd3cdaed5356e0f716d285e4b40b932ac434100fe9b7e0e8e935b9e6246" + +[[package]] +name = "utf8_iter" +version = "1.0.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be" + [[package]] name = "wasi" version = "0.11.0+wasi-snapshot-preview1" @@ -620,3 +784,103 @@ name = "windows_x86_64_msvc" version = "0.52.6" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec" + +[[package]] +name = "write16" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "d1890f4022759daae28ed4fe62859b1236caebfc61ede2f63ed4e695f3f6d936" + +[[package]] +name = "writeable" +version = "0.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1e9df38ee2d2c3c5948ea468a8406ff0db0b29ae1ffde1bcf20ef305bcc95c51" + +[[package]] +name = "yoke" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "120e6aef9aa629e3d4f52dc8cc43a015c7724194c97dfaf45180d2daf2b77f40" +dependencies = [ + "serde", + "stable_deref_trait", + "yoke-derive", + "zerofrom", +] + +[[package]] +name = "yoke-derive" +version = "0.7.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2380878cad4ac9aac1e2435f3eb4020e8374b5f13c296cb75b4620ff8e229154" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerocopy" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1b9b4fd18abc82b8136838da5d50bae7bdea537c574d8dc1a34ed098d6c166f0" +dependencies = [ + "byteorder", + "zerocopy-derive", +] + +[[package]] +name = "zerocopy-derive" +version = "0.7.35" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa4f8080344d4671fb4e831a13ad1e68092748387dfc4f55e356242fae12ce3e" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] + +[[package]] +name = "zerofrom" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "cff3ee08c995dee1859d998dea82f7374f2826091dd9cd47def953cae446cd2e" +dependencies = [ + "zerofrom-derive", +] + +[[package]] +name = "zerofrom-derive" +version = "0.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "595eed982f7d355beb85837f651fa22e90b3c044842dc7f2c2842c086f295808" +dependencies = [ + "proc-macro2", + "quote", + "syn", + "synstructure", +] + +[[package]] +name = "zerovec" +version = "0.10.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079" +dependencies = [ + "yoke", + "zerofrom", + "zerovec-derive", +] + +[[package]] +name = "zerovec-derive" +version = "0.10.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6eafa6dfb17584ea3e2bd6e76e0cc15ad7af12b09abdd1ca55961bed9b1063c6" +dependencies = [ + "proc-macro2", + "quote", + "syn", +] diff --git a/Cargo.toml b/Cargo.toml index 099809b..2366598 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -15,4 +15,4 @@ crate-type = ["cdylib"] [dependencies] ammonia = "4.0.0" -pyo3 = { version = "0.22.6", features = ["abi3-py37", "gil-refs"] } +pyo3 = { version = "0.23.2", features = ["abi3-py38"] } diff --git a/src/lib.rs b/src/lib.rs index 056747f..8ee667b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -71,19 +71,19 @@ use pyo3::types::{PyString, PyTuple}; fn clean( py: Python, html: &str, - tags: Option>, - clean_content_tags: Option>, - attributes: Option>>, + tags: Option>, + clean_content_tags: Option>, + mut attributes: Option>>, attribute_filter: Option, strip_comments: bool, link_rel: Option<&str>, - generic_attribute_prefixes: Option>, - tag_attribute_values: Option>>>, - set_tag_attribute_values: Option>>, - url_schemes: Option>, + generic_attribute_prefixes: Option>, + tag_attribute_values: Option>>>, + set_tag_attribute_values: Option>>, + url_schemes: Option>, ) -> PyResult { if let Some(callback) = attribute_filter.as_ref() { - if !callback.as_ref(py).is_callable() { + if !callback.bind(py).is_callable() { return Err(PyTypeError::new_err("attribute_filter must be callable")); } } @@ -94,58 +94,88 @@ fn clean( || attributes.is_some() || attribute_filter.is_some() || !strip_comments - || link_rel != Some("noopener noreferrer") + || link_rel.as_deref() != Some("noopener noreferrer") || generic_attribute_prefixes.is_some() || tag_attribute_values.is_some() || set_tag_attribute_values.is_some() || url_schemes.is_some() { + let generic_attrs = attributes.as_mut().and_then(|attrs| attrs.remove("*")); let mut cleaner = ammonia::Builder::default(); - if let Some(tags) = tags { + if let Some(tags) = tags.as_ref() { + let tags: HashSet<&str> = tags.iter().map(|s| s.as_str()).collect(); cleaner.tags(tags); } - if let Some(tags) = clean_content_tags { + if let Some(tags) = clean_content_tags.as_ref() { + let tags: HashSet<&str> = tags.iter().map(|s| s.as_str()).collect(); cleaner.clean_content_tags(tags); } - if let Some(mut attrs) = attributes { - if let Some(generic_attrs) = attrs.remove("*") { + if let Some(attrs) = attributes.as_ref() { + let attrs: HashMap<&str, HashSet<&str>> = attrs + .iter() + .map(|(k, v)| (k.as_str(), v.iter().map(|s| s.as_str()).collect())) + .collect(); + cleaner.tag_attributes(attrs); + if let Some(generic_attrs) = generic_attrs.as_ref() { + let generic_attrs: HashSet<&str> = + generic_attrs.iter().map(|s| s.as_str()).collect(); cleaner.generic_attributes(generic_attrs); } - cleaner.tag_attributes(attrs); } - if let Some(prefixes) = generic_attribute_prefixes { + if let Some(prefixes) = generic_attribute_prefixes.as_ref() { + let prefixes: HashSet<&str> = prefixes.iter().map(|s| s.as_str()).collect(); cleaner.generic_attribute_prefixes(prefixes); } - if let Some(values) = tag_attribute_values { + if let Some(values) = tag_attribute_values.as_ref() { + let values: HashMap<&str, HashMap<&str, HashSet<&str>>> = values + .iter() + .map(|(tag, attrs)| { + let inner: HashMap<&str, HashSet<&str>> = attrs + .iter() + .map(|(attr, vals)| { + (attr.as_str(), vals.iter().map(|v| v.as_str()).collect()) + }) + .collect(); + (tag.as_str(), inner) + }) + .collect(); cleaner.tag_attribute_values(values); } - if let Some(values) = set_tag_attribute_values { + if let Some(values) = set_tag_attribute_values.as_ref() { + let values: HashMap<&str, HashMap<&str, &str>> = values + .iter() + .map(|(tag, attrs)| { + let inner: HashMap<&str, &str> = attrs + .iter() + .map(|(attr, val)| (attr.as_str(), val.as_str())) + .collect(); + (tag.as_str(), inner) + }) + .collect(); cleaner.set_tag_attribute_values(values); } if let Some(callback) = attribute_filter { cleaner.attribute_filter(move |element, attribute, value| { Python::with_gil(|py| { - let res = callback.call_bound( + let res = callback.call( py, - PyTuple::new_bound( + PyTuple::new( py, [ - PyString::new_bound(py, element), - PyString::new_bound(py, attribute), - PyString::new_bound(py, value), + PyString::new(py, element), + PyString::new(py, attribute), + PyString::new(py, value), ], - ), + ) + .unwrap(), None, ); let err = match res { Ok(val) => { if val.is_none(py) { return None; - } else if let Ok(s) = val.downcast::(py) { - match s.to_str() { - Ok(s) => return Some(Cow::::Owned(s.to_string())), - Err(err) => err, - } + } else if let Ok(s) = val.extract::(py) { + return Some(Cow::::Owned(s)); } else { PyTypeError::new_err( "expected attribute_filter to return str or None", @@ -154,24 +184,28 @@ fn clean( } Err(err) => err, }; - err.write_unraisable_bound( + err.write_unraisable( py, - Some(&PyTuple::new_bound( - py, - [ - PyString::new_bound(py, element), - PyString::new_bound(py, attribute), - PyString::new_bound(py, value), - ], - )), + Some( + &PyTuple::new( + py, + [ + PyString::new(py, element), + PyString::new(py, attribute), + PyString::new(py, value), + ], + ) + .unwrap(), + ), ); Some(value.into()) }) }); } cleaner.strip_comments(strip_comments); - cleaner.link_rel(link_rel); - if let Some(url_schemes) = url_schemes { + cleaner.link_rel(link_rel.as_deref()); + if let Some(url_schemes) = url_schemes.as_ref() { + let url_schemes: HashSet<_> = url_schemes.iter().map(|s| s.as_str()).collect(); cleaner.url_schemes(url_schemes); } cleaner.clean(html).to_string() @@ -198,6 +232,7 @@ fn clean( /// /// .. code-block:: pycon /// +/// >>> import nh3 /// >>> nh3.clean_text('Robert"); abuse();//') /// 'Robert"); abuse();//' #[pyfunction]