diff --git a/library/core/benches/ascii.rs b/library/core/benches/ascii.rs index 64938745a4a16..71ec9fed2fe75 100644 --- a/library/core/benches/ascii.rs +++ b/library/core/benches/ascii.rs @@ -63,6 +63,7 @@ macro_rules! benches { } } +use std::fmt::Write; use test::black_box; use test::Bencher; @@ -351,3 +352,30 @@ static ASCII_CHARACTER_CLASS: [AsciiCharacterClass; 256] = [ N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, N, ]; + +const ASCII_PATH: &[u8] = b"home/kyubey/rust/build/x86_64-unknown-linux-gnu/stage0/lib:/home/kyubey/workspace/rust/build/x86_64-unknown-linux-gnu/stage0-tools/release/deps"; +const RUST_INCANTATION: &[u8] = br#"AR_x86_64_unknown_linux_gnu="ar" CARGO_INCREMENTAL="0" CARGO_PROFILE_RELEASE_DEBUG="1" CARGO_PROFILE_RELEASE_DEBUG_ASSERTIONS="false" CARGO_PROFILE_RELEASE_OVERFLOW_CHECKS="false" CARGO_TARGET_DIR="/home/kyubey/workspace/rust/build/x86_64-unknown-linux-gnu/stage0-std" CC_x86_64_unknown_linux_gnu="cc" CFG_COMPILER_HOST_TRIPLE="x86_64-unknown-linux-gnu" CFG_RELEASE_CHANNEL="dev" CFLAGS_x86_64_unknown_linux_gnu="-ffunction-sections -fdata-sections -fPIC -m64" CXXFLAGS_x86_64_unknown_linux_gnu="-ffunction-sections -fdata-sections -fPIC -m64" CXX_x86_64_unknown_linux_gnu="c++" LD_LIBRARY_PATH="/home/kyubey/workspace/rust/build/x86_64-unknown-linux-gnu/stage0-sysroot/lib/rustlib/x86_64-unknown-linux-gnu/lib" LIBC_CHECK_CFG="1" RANLIB_x86_64_unknown_linux_gnu="ar s" REAL_LIBRARY_PATH_VAR="LD_LIBRARY_PATH" RUSTBUILD_NATIVE_DIR="/home/kyubey/workspace/rust/build/x86_64-unknown-linux-gnu/native" RUSTC="/home/kyubey/workspace/rust/build/bootstrap/debug/rustc" RUSTC_BOOTSTRAP="1" RUSTC_BREAK_ON_ICE="1" RUSTC_ERROR_METADATA_DST="/home/kyubey/workspace/rust/build/tmp/extended-error-metadata" RUSTC_FORCE_UNSTABLE="1" RUSTC_HOST_FUSE_LD_LLD="1" RUSTC_INSTALL_BINDIR="bin" RUSTC_LIBDIR="/home/kyubey/workspace/rust/build/x86_64-unknown-linux-gnu/stage0/lib" RUSTC_LINT_FLAGS="-Wrust_2018_idioms -Wunused_lifetimes -Wsemicolon_in_expressions_from_macros" RUSTC_REAL="/home/kyubey/workspace/rust/build/x86_64-unknown-linux-gnu/stage0/bin/rustc" RUSTC_SNAPSHOT="/home/kyubey/workspace/rust/build/x86_64-unknown-linux-gnu/stage0/bin/rustc" RUSTC_SNAPSHOT_LIBDIR="/home/kyubey/workspace/rust/build/x86_64-unknown-linux-gnu/stage0/lib" RUSTC_STAGE="0" RUSTC_SYSROOT="/home/kyubey/workspace/rust/build/x86_64-unknown-linux-gnu/stage0-sysroot" RUSTC_VERBOSE="0" RUSTDOC="/home/kyubey/workspace/rust/build/bootstrap/debug/rustdoc" RUSTDOCFLAGS="-C target-cpu=native --cfg=bootstrap -Csymbol-mangling-version=legacy -Zunstable-options -Zunstable-options --check-cfg=values(bootstrap) --check-cfg=values(stdarch_intel_sde) --check-cfg=values(no_fp_fmt_parse) --check-cfg=values(no_global_oom_handling) --check-cfg=values(no_rc) --check-cfg=values(no_sync) --check-cfg=values(freebsd12) --check-cfg=values(freebsd13) --check-cfg=values(backtrace_in_libstd) --check-cfg=values(target_env,\"libnx\") --check-cfg=values(target_arch,\"asmjs\",\"spirv\",\"nvptx\",\"xtensa\") -Clink-arg=-fuse-ld=lld -Clink-arg=-Wl,--threads=1 -Wrustdoc::invalid_codeblock_attributes --crate-version 1.72.0-dev -Zcrate-attr=doc(html_root_url=\"https://doc.rust-lang.org/nightly/\") -Zcrate-attr=warn(rust_2018_idioms)" RUSTDOC_FUSE_LD_LLD="1" RUSTDOC_LIBDIR="/home/kyubey/workspace/rust/build/x86_64-unknown-linux-gnu/stage0/lib" RUSTDOC_REAL="/path/to/nowhere/rustdoc/not/required" RUSTFLAGS="-C target-cpu=native --cfg=bootstrap -Csymbol-mangling-version=legacy -Zunstable-options -Zunstable-options --check-cfg=values(bootstrap) --check-cfg=values(stdarch_intel_sde) --check-cfg=values(no_fp_fmt_parse) --check-cfg=values(no_global_oom_handling) --check-cfg=values(no_rc) --check-cfg=values(no_sync) --check-cfg=values(freebsd12) --check-cfg=values(freebsd13) --check-cfg=values(backtrace_in_libstd) --check-cfg=values(target_env,\"libnx\") --check-cfg=values(target_arch,\"asmjs\",\"spirv\",\"nvptx\",\"xtensa\") -Zmacro-backtrace -Clink-args=-Wl,-z,origin -Clink-args=-Wl,-rpath,$ORIGIN/../lib -Clink-args=-fuse-ld=lld -Csplit-debuginfo=off -Cprefer-dynamic -Zinline-mir -Clto=off -Zcrate-attr=doc(html_root_url=\"https://doc.rust-lang.org/nightly/\")" RUST_COMPILER_RT_ROOT="/home/kyubey/workspace/rust/src/llvm-project/compiler-rt" RUST_TEST_THREADS="48" WINAPI_NO_BUNDLED_LIBRARIES="1" __CARGO_DEFAULT_LIB_METADATA="bootstrapstd" "/home/kyubey/workspace/rust/build/x86_64-unknown-linux-gnu/stage0/bin/cargo" "bench" "--target" "x86_64-unknown-linux-gnu" "-Zcheck-cfg=names,values,output" "-Zbinary-dep-depinfo" "-j" "48" "--features" " panic-unwind backtrace compiler-builtins-c" "--manifest-path" "/home/kyubey/workspace/rust/library/sysroot/Cargo.toml" "-p" "core" "--" "bench_ascii_escape_display" "--quiet" "-Z" "unstable-options" "--format" "json""#; + +#[bench] +fn bench_ascii_escape_display_no_escape(b: &mut Bencher) { + let mut writer = String::with_capacity(8 * 1024); + + b.iter(move || { + writer.clear(); + let iter = ASCII_PATH.escape_ascii(); + write!(writer, "{}", iter).unwrap(); + writer.len() + }) +} + +#[bench] +fn bench_ascii_escape_display_mixed(b: &mut Bencher) { + let mut writer = String::with_capacity(8 * 1024); + + b.iter(move || { + writer.clear(); + let iter = RUST_INCANTATION.escape_ascii(); + write!(writer, "{}", iter).unwrap(); + writer.len() + }) +} diff --git a/library/core/src/ascii.rs b/library/core/src/ascii.rs index ef8e4d098ed95..02867789b79dd 100644 --- a/library/core/src/ascii.rs +++ b/library/core/src/ascii.rs @@ -96,6 +96,17 @@ pub fn escape_default(c: u8) -> EscapeDefault { EscapeDefault(escape::EscapeIterInner::new(data, range)) } +impl EscapeDefault { + pub(crate) fn empty() -> Self { + let data = [Char::Null; 4]; + EscapeDefault(escape::EscapeIterInner::new(data, 0..0)) + } + + pub(crate) fn as_str(&self) -> &str { + self.0.as_str() + } +} + #[stable(feature = "rust1", since = "1.0.0")] impl Iterator for EscapeDefault { type Item = u8; diff --git a/library/core/src/iter/adapters/flatten.rs b/library/core/src/iter/adapters/flatten.rs index 2568aaf34f3fb..f3992b500adf8 100644 --- a/library/core/src/iter/adapters/flatten.rs +++ b/library/core/src/iter/adapters/flatten.rs @@ -18,6 +18,14 @@ impl U> FlatMap { pub(in crate::iter) fn new(iter: I, f: F) -> FlatMap { FlatMap { inner: FlattenCompat::new(iter.map(f)) } } + + pub(crate) fn into_parts(self) -> (Option, Option, Option) { + ( + self.inner.frontiter, + self.inner.iter.into_inner().map(Map::into_inner), + self.inner.backiter, + ) + } } #[stable(feature = "rust1", since = "1.0.0")] diff --git a/library/core/src/iter/adapters/fuse.rs b/library/core/src/iter/adapters/fuse.rs index b1fa4f92117b1..e38234eae4990 100644 --- a/library/core/src/iter/adapters/fuse.rs +++ b/library/core/src/iter/adapters/fuse.rs @@ -24,6 +24,10 @@ impl Fuse { pub(in crate::iter) fn new(iter: I) -> Fuse { Fuse { iter: Some(iter) } } + + pub(crate) fn into_inner(self) -> Option { + self.iter + } } #[stable(feature = "fused", since = "1.26.0")] diff --git a/library/core/src/iter/adapters/map.rs b/library/core/src/iter/adapters/map.rs index 31d02a4da6ea5..2563f27d16933 100644 --- a/library/core/src/iter/adapters/map.rs +++ b/library/core/src/iter/adapters/map.rs @@ -68,6 +68,10 @@ impl Map { pub(in crate::iter) fn new(iter: I, f: F) -> Map { Map { iter, f } } + + pub(crate) fn into_inner(self) -> I { + self.iter + } } #[stable(feature = "core_impl_debug", since = "1.9.0")] diff --git a/library/core/src/slice/ascii.rs b/library/core/src/slice/ascii.rs index f3311f76a7f06..324bf5c05af20 100644 --- a/library/core/src/slice/ascii.rs +++ b/library/core/src/slice/ascii.rs @@ -5,6 +5,7 @@ use crate::fmt::{self, Write}; use crate::iter; use crate::mem; use crate::ops; +use core::ascii::EscapeDefault; #[cfg(not(test))] impl [u8] { @@ -250,7 +251,45 @@ impl<'a> iter::FusedIterator for EscapeAscii<'a> {} #[stable(feature = "inherent_ascii_escape", since = "1.60.0")] impl<'a> fmt::Display for EscapeAscii<'a> { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - self.clone().try_for_each(|b| f.write_char(b as char)) + // disassemble iterator, including front/back parts of flatmap in case it has been partially consumed + let (front, slice, back) = self.clone().inner.into_parts(); + let front = front.unwrap_or(EscapeDefault::empty()); + let mut bytes = slice.unwrap_or_default().as_slice(); + let back = back.unwrap_or(EscapeDefault::empty()); + + // usually empty, so the formatter won't have to do any work + for byte in front { + f.write_char(byte as char)?; + } + + fn needs_escape(b: u8) -> bool { + b > 0x7E || b < 0x20 || b == b'\\' || b == b'\'' || b == b'"' + } + + while bytes.len() > 0 { + // fast path for the printable, non-escaped subset of ascii + let prefix = bytes.iter().take_while(|&&b| !needs_escape(b)).count(); + // SAFETY: prefix length was derived by counting bytes in the same splice, so it's in-bounds + let (prefix, remainder) = unsafe { bytes.split_at_unchecked(prefix) }; + // SAFETY: prefix is a valid utf8 sequence, as it's a subset of ASCII + let prefix = unsafe { crate::str::from_utf8_unchecked(prefix) }; + + f.write_str(prefix)?; // the fast part + + bytes = remainder; + + if let Some(&b) = bytes.first() { + // guaranteed to be non-empty, better to write it as a str + f.write_str(ascii::escape_default(b).as_str())?; + bytes = &bytes[1..]; + } + } + + // also usually empty + for byte in back { + f.write_char(byte as char)?; + } + Ok(()) } } #[stable(feature = "inherent_ascii_escape", since = "1.60.0")] diff --git a/library/core/tests/ascii.rs b/library/core/tests/ascii.rs index f5f2dd0477885..3d3f8ac10c603 100644 --- a/library/core/tests/ascii.rs +++ b/library/core/tests/ascii.rs @@ -479,3 +479,13 @@ fn ascii_ctype_const() { is_ascii_control => [false, false, false, false, false]; } } + +#[test] +fn test_ascii_display() { + assert_eq!(b"foo'bar".escape_ascii().to_string(), r#"foo\'bar"#); + assert_eq!(b"\0\xff".escape_ascii().to_string(), r#"\x00\xff"#); + let mut it = b"\0fastpath\xffremainder\xff".escape_ascii(); + let _ = it.advance_by(4); + let _ = it.advance_back_by(4); + assert_eq!(it.to_string(), r#"fastpath\xffremainder"#); +}