From d4d96c6acc5b1b2ae771c6208832b015db0d4c01 Mon Sep 17 00:00:00 2001 From: Jeff Muizelaar Date: Tue, 10 Oct 2017 13:35:07 -0400 Subject: [PATCH 1/2] Update bincode so that read_exact will work. --- Cargo.lock | 12 ++++++------ webrender_api/Cargo.toml | 2 +- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 9912ff7420..1289ec67d3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4,7 +4,7 @@ version = "0.2.3" dependencies = [ "app_units 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", "base64 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bincode 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "clap 2.25.0 (registry+https://github.com/rust-lang/crates.io-index)", "crossbeam 0.2.10 (registry+https://github.com/rust-lang/crates.io-index)", @@ -93,7 +93,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "bincode" -version = "0.8.0" +version = "0.8.1" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", @@ -460,7 +460,7 @@ name = "ipc-channel" version = "0.8.0" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bincode 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "fnv 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)", "lazy_static 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)", "libc 0.2.20 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1041,7 +1041,7 @@ version = "0.52.1" dependencies = [ "angle 0.5.0 (git+https://github.com/servo/angle?branch=servo)", "app_units 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", - "bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bincode 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "core-foundation 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1075,7 +1075,7 @@ name = "webrender_api" version = "0.52.1" dependencies = [ "app_units 0.5.6 (registry+https://github.com/rust-lang/crates.io-index)", - "bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)", + "bincode 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)", "bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)", "byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "core-foundation 0.4.4 (registry+https://github.com/rust-lang/crates.io-index)", @@ -1164,7 +1164,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" "checksum atty 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "d912da0db7fa85514874458ca3651fe2cddace8d0b0505571dbdcd41ab490159" "checksum base64 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d156a04ec694d726e92ea3c13e4a62949b4f0488a9344f04341d679ec6b127b" "checksum binary-space-partition 0.1.2 (registry+https://github.com/rust-lang/crates.io-index)" = "88ceb0d16c4fd0e42876e298d7d3ce3780dd9ebdcbe4199816a32c77e08597ff" -"checksum bincode 0.8.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e103c8b299b28a9c6990458b7013dc4a8356a9b854c51b9883241f5866fac36e" +"checksum bincode 0.8.1 (registry+https://github.com/rust-lang/crates.io-index)" = "6e4a2d3bab374f96192eade8f41914373d7b5fcf030ca2f45141f1e939057259" "checksum bitflags 0.9.1 (registry+https://github.com/rust-lang/crates.io-index)" = "4efd02e230a02e18f92fc2735f44597385ed02ad8f831e7c1c1156ee5e1ab3a5" "checksum block 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "0d8c1fef690941d3e7788d328517591fecc684c084084702d6ff1641e993699a" "checksum byteorder 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "c40977b0ee6b9885c9013cd41d9feffdd22deb3bb4dc3a71d901cc7a77de18c8" diff --git a/webrender_api/Cargo.toml b/webrender_api/Cargo.toml index d48d086d2d..4804f3b5b4 100644 --- a/webrender_api/Cargo.toml +++ b/webrender_api/Cargo.toml @@ -11,7 +11,7 @@ ipc = ["ipc-channel"] [dependencies] app_units = "0.5.6" -bincode = "0.8" +bincode = "0.8.1" bitflags = "0.9" byteorder = "1.0" euclid = "0.15" From 9818e4d4e133d641392c503de13b2041cf458a81 Mon Sep 17 00:00:00 2001 From: Jeff Muizelaar Date: Mon, 9 Oct 2017 22:18:39 -0400 Subject: [PATCH 2/2] Improve deserialization performance with a custom Read implementation With this change I go from 140fps to 200fps on my gmail yaml. --- webrender_api/src/display_list.rs | 75 +++++++++++++++++++++++++++++-- 1 file changed, 71 insertions(+), 4 deletions(-) diff --git a/webrender_api/src/display_list.rs b/webrender_api/src/display_list.rs index 5a1c5b6520..97fc0e8c7e 100644 --- a/webrender_api/src/display_list.rs +++ b/webrender_api/src/display_list.rs @@ -17,9 +17,10 @@ use YuvImageDisplayItem; use bincode; use serde::{Deserialize, Serialize, Serializer}; use serde::ser::{SerializeMap, SerializeSeq}; -use std::io::Write; +use std::io::{Read, Write}; use std::{io, ptr}; use std::marker::PhantomData; +use std::slice; use time::precise_time_ns; // We don't want to push a long text-run. If a text-run is too long, split it into several parts. @@ -171,6 +172,7 @@ fn skip_slice Deserialize<'de>>( (range, count) } + impl<'a> BuiltDisplayListIter<'a> { pub fn new(list: &'a BuiltDisplayList) -> Self { Self::new_with_list_and_data(list, list.item_slice()) @@ -221,7 +223,7 @@ impl<'a> BuiltDisplayListIter<'a> { return None; } - self.cur_item = bincode::deserialize_from(&mut self.data, bincode::Infinite) + self.cur_item = bincode::deserialize_from(&mut UnsafeReader::new(&mut self.data), bincode::Infinite) .expect("MEH: malicious process?"); match self.cur_item.item { @@ -363,7 +365,7 @@ impl<'de, 'a, T: Deserialize<'de>> AuxIter<'a, T> { let size: usize = if data.len() == 0 { 0 // Accept empty ItemRanges pointing anywhere } else { - bincode::deserialize_from(&mut data, bincode::Infinite).expect("MEH: malicious input?") + bincode::deserialize_from(&mut UnsafeReader::new(&mut data), bincode::Infinite).expect("MEH: malicious input?") }; AuxIter { @@ -383,7 +385,7 @@ impl<'a, T: for<'de> Deserialize<'de>> Iterator for AuxIter<'a, T> { } else { self.size -= 1; Some( - bincode::deserialize_from(&mut self.data, bincode::Infinite) + bincode::deserialize_from(&mut UnsafeReader::new(&mut self.data), bincode::Infinite) .expect("MEH: malicious input?"), ) } @@ -525,6 +527,71 @@ fn serialize_fast(vec: &mut Vec, e: &T) { debug_assert!(((w.0 as usize) - (vec.as_ptr() as usize)) == vec.len()); } +// This uses a (start, end) representation instead of (start, len) so that +// only need to update a single field as we read through it. This +// makes it easier for llvm to understand what's going on. (https://github.com/rust-lang/rust/issues/45068) +// We update the slice only once we're done reading +struct UnsafeReader<'a: 'b, 'b> { + start: *const u8, + end: *const u8, + slice: &'b mut &'a [u8], +} + +impl<'a, 'b> UnsafeReader<'a, 'b> { + fn new(buf: &'b mut &'a [u8]) -> UnsafeReader<'a, 'b> { + unsafe { + let end = buf.as_ptr().offset(buf.len() as isize); + let start = buf.as_ptr(); + UnsafeReader { start: start, end, slice: buf } + } + } + + // This read implementation is significantly faster than the standard &[u8] one. + // + // First, it only supports reading exactly buf.len() bytes. This ensures that + // the argument to memcpy is always buf.len() and will allow a constant buf.len() + // to be propagated through to memcpy which LLVM will turn into explicit loads and + // stores. The standard implementation does a len = min(slice.len(), buf.len()) + // + // Second, we only need to adjust 'start' after reading and it's only adjusted by a + // constant. This allows LLVM to avoid adjusting the length field after ever read + // and lets it be aggregated into a single adjustment. + #[inline(always)] + fn read_internal(&mut self, buf: &mut [u8]) { + // this is safe because we panic if start + buf.len() > end + unsafe { + assert!(self.start.offset(buf.len() as isize) <= self.end, "UnsafeReader: read past end of target"); + ptr::copy_nonoverlapping(self.start, buf.as_mut_ptr(), buf.len()); + self.start = self.start.offset(buf.len() as isize); + } + } +} + +impl<'a, 'b> Drop for UnsafeReader<'a, 'b> { + // this adjusts input slice so that it properly represents the amount that's left. + fn drop(&mut self) { + // this is safe because we know that start and end are contained inside the original slice + unsafe { + *self.slice = slice::from_raw_parts(self.start, (self.end as usize) - (self.start as usize)); + } + } +} + +impl<'a, 'b> Read for UnsafeReader<'a, 'b> { + // These methods were not being inlined and we need them to be so that the memcpy + // is for a constant size + #[inline(always)] + fn read(&mut self, buf: &mut [u8]) -> io::Result { + self.read_internal(buf); + Ok(buf.len()) + } + #[inline(always)] + fn read_exact(&mut self, buf: &mut [u8]) -> io::Result<()> { + self.read_internal(buf); + Ok(()) + } +} + #[derive(Clone, Debug)] pub struct SaveState { dl_len: usize,