Skip to content

Commit

Permalink
Auto merge of #94079 - petrochenkov:cstr, r=joshtriplett
Browse files Browse the repository at this point in the history
library: Move `CStr` to libcore, and `CString` to liballoc

Closes rust-lang/rust#46736

Interesting points:
- Stability:
    - To make `CStr(ing)` from libcore/liballoc unusable without enabling features I had to make these structures unstable, and reexport them from libstd using stable type aliases instead of `pub use` reexports. (Because stability of `use` items is not checked.)
- Relying on target ABI in libcore is ok:
    - rust-lang/rust#94079 (comment)
- `trait CStrExt` (UPDATE: used only in `cfg(bootstrap)` mode, otherwise lang items are used instead)
    - rust-lang/rust#94079 (comment)
- `strlen`
    - rust-lang/rust#94079 (comment)

Otherwise it's just a code move + some minor hackery usual for liballoc in `cfg(test)` mode.
  • Loading branch information
bors committed Apr 15, 2022
2 parents ac7235e + 51893cd commit b39e6f1
Show file tree
Hide file tree
Showing 21 changed files with 908 additions and 681 deletions.
743 changes: 132 additions & 611 deletions std/src/ffi/c_str.rs → alloc/src/ffi/c_str.rs

Large diffs are not rendered by default.

32 changes: 10 additions & 22 deletions std/src/ffi/c_str/tests.rs → alloc/src/ffi/c_str/tests.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,12 @@
use super::*;
use crate::borrow::Cow::{Borrowed, Owned};
use crate::collections::hash_map::DefaultHasher;
use crate::hash::{Hash, Hasher};
use crate::os::raw::c_char;
use crate::rc::Rc;
use crate::sync::Arc;
use core::assert_matches::assert_matches;
use core::ffi::FromBytesUntilNulError;
use core::hash::{Hash, Hasher};

#[allow(deprecated)]
use core::hash::SipHasher13 as DefaultHasher;

#[test]
fn c_to_rust() {
Expand Down Expand Up @@ -47,22 +49,6 @@ fn borrowed() {
}
}

#[test]
fn to_str() {
let data = b"123\xE2\x80\xA6\0";
let ptr = data.as_ptr() as *const c_char;
unsafe {
assert_eq!(CStr::from_ptr(ptr).to_str(), Ok("123…"));
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Borrowed("123…"));
}
let data = b"123\xE2\0";
let ptr = data.as_ptr() as *const c_char;
unsafe {
assert!(CStr::from_ptr(ptr).to_str().is_err());
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Owned::<str>(format!("123\u{FFFD}")));
}
}

#[test]
fn to_owned() {
let data = b"123\0";
Expand All @@ -78,9 +64,11 @@ fn equal_hash() {
let ptr = data.as_ptr() as *const c_char;
let cstr: &'static CStr = unsafe { CStr::from_ptr(ptr) };

#[allow(deprecated)]
let mut s = DefaultHasher::new();
cstr.hash(&mut s);
let cstr_hash = s.finish();
#[allow(deprecated)]
let mut s = DefaultHasher::new();
CString::new(&data[..data.len() - 1]).unwrap().hash(&mut s);
let cstring_hash = s.finish();
Expand Down Expand Up @@ -122,11 +110,11 @@ fn cstr_from_bytes_until_nul() {
// Test an empty slice. This should fail because it
// does not contain a nul byte.
let b = b"";
assert_eq!(CStr::from_bytes_until_nul(&b[..]), Err(FromBytesUntilNulError(())));
assert_matches!(CStr::from_bytes_until_nul(&b[..]), Err(FromBytesUntilNulError { .. }));

// Test a non-empty slice, that does not contain a nul byte.
let b = b"hello";
assert_eq!(CStr::from_bytes_until_nul(&b[..]), Err(FromBytesUntilNulError(())));
assert_matches!(CStr::from_bytes_until_nul(&b[..]), Err(FromBytesUntilNulError { .. }));

// Test an empty nul-terminated string
let b = b"\0";
Expand Down
91 changes: 91 additions & 0 deletions alloc/src/ffi/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,91 @@
//! Utilities related to FFI bindings.
//!
//! This module provides utilities to handle data across non-Rust
//! interfaces, like other programming languages and the underlying
//! operating system. It is mainly of use for FFI (Foreign Function
//! Interface) bindings and code that needs to exchange C-like strings
//! with other languages.
//!
//! # Overview
//!
//! Rust represents owned strings with the [`String`] type, and
//! borrowed slices of strings with the [`str`] primitive. Both are
//! always in UTF-8 encoding, and may contain nul bytes in the middle,
//! i.e., if you look at the bytes that make up the string, there may
//! be a `\0` among them. Both `String` and `str` store their length
//! explicitly; there are no nul terminators at the end of strings
//! like in C.
//!
//! C strings are different from Rust strings:
//!
//! * **Encodings** - Rust strings are UTF-8, but C strings may use
//! other encodings. If you are using a string from C, you should
//! check its encoding explicitly, rather than just assuming that it
//! is UTF-8 like you can do in Rust.
//!
//! * **Character size** - C strings may use `char` or `wchar_t`-sized
//! characters; please **note** that C's `char` is different from Rust's.
//! The C standard leaves the actual sizes of those types open to
//! interpretation, but defines different APIs for strings made up of
//! each character type. Rust strings are always UTF-8, so different
//! Unicode characters will be encoded in a variable number of bytes
//! each. The Rust type [`char`] represents a '[Unicode scalar
//! value]', which is similar to, but not the same as, a '[Unicode
//! code point]'.
//!
//! * **Nul terminators and implicit string lengths** - Often, C
//! strings are nul-terminated, i.e., they have a `\0` character at the
//! end. The length of a string buffer is not stored, but has to be
//! calculated; to compute the length of a string, C code must
//! manually call a function like `strlen()` for `char`-based strings,
//! or `wcslen()` for `wchar_t`-based ones. Those functions return
//! the number of characters in the string excluding the nul
//! terminator, so the buffer length is really `len+1` characters.
//! Rust strings don't have a nul terminator; their length is always
//! stored and does not need to be calculated. While in Rust
//! accessing a string's length is an *O*(1) operation (because the
//! length is stored); in C it is an *O*(*n*) operation because the
//! length needs to be computed by scanning the string for the nul
//! terminator.
//!
//! * **Internal nul characters** - When C strings have a nul
//! terminator character, this usually means that they cannot have nul
//! characters in the middle — a nul character would essentially
//! truncate the string. Rust strings *can* have nul characters in
//! the middle, because nul does not have to mark the end of the
//! string in Rust.
//!
//! # Representations of non-Rust strings
//!
//! [`CString`] and [`CStr`] are useful when you need to transfer
//! UTF-8 strings to and from languages with a C ABI, like Python.
//!
//! * **From Rust to C:** [`CString`] represents an owned, C-friendly
//! string: it is nul-terminated, and has no internal nul characters.
//! Rust code can create a [`CString`] out of a normal string (provided
//! that the string doesn't have nul characters in the middle), and
//! then use a variety of methods to obtain a raw <code>\*mut [u8]</code> that can
//! then be passed as an argument to functions which use the C
//! conventions for strings.
//!
//! * **From C to Rust:** [`CStr`] represents a borrowed C string; it
//! is what you would use to wrap a raw <code>\*const [u8]</code> that you got from
//! a C function. A [`CStr`] is guaranteed to be a nul-terminated array
//! of bytes. Once you have a [`CStr`], you can convert it to a Rust
//! <code>&[str]</code> if it's valid UTF-8, or lossily convert it by adding
//! replacement characters.
//!
//! [`String`]: crate::string::String
//! [`CStr`]: core::ffi::CStr

#![unstable(feature = "alloc_ffi", issue = "94079")]

#[cfg(bootstrap)]
#[unstable(feature = "cstr_internals", issue = "none")]
pub use self::c_str::CStrExt;
#[unstable(feature = "alloc_c_string", issue = "94079")]
pub use self::c_str::FromVecWithNulError;
#[unstable(feature = "alloc_c_string", issue = "94079")]
pub use self::c_str::{CString, IntoStringError, NulError};

mod c_str;
9 changes: 9 additions & 0 deletions alloc/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -86,11 +86,13 @@
#![allow(explicit_outlives_requirements)]
//
// Library features:
#![cfg_attr(not(no_global_oom_handling), feature(alloc_c_string))]
#![feature(alloc_layout_extra)]
#![feature(allocator_api)]
#![feature(array_chunks)]
#![feature(array_methods)]
#![feature(array_windows)]
#![feature(assert_matches)]
#![feature(async_iterator)]
#![feature(coerce_unsized)]
#![cfg_attr(not(no_global_oom_handling), feature(const_alloc_error))]
Expand All @@ -104,9 +106,12 @@
#![feature(const_maybe_uninit_write)]
#![feature(const_maybe_uninit_as_mut_ptr)]
#![feature(const_refs_to_cell)]
#![feature(core_c_str)]
#![feature(core_intrinsics)]
#![feature(core_ffi_c)]
#![feature(const_eval_select)]
#![feature(const_pin)]
#![feature(cstr_from_bytes_until_nul)]
#![feature(dispatch_from_dyn)]
#![feature(exact_size_is_empty)]
#![feature(extend_one)]
Expand Down Expand Up @@ -152,6 +157,7 @@
#![feature(exclusive_range_pattern)]
#![feature(fundamental)]
#![cfg_attr(not(test), feature(generator_trait))]
#![feature(hashmap_internals)]
#![feature(lang_items)]
#![feature(let_else)]
#![feature(min_specialization)]
Expand All @@ -160,6 +166,7 @@
#![feature(nll)] // Not necessary, but here to test the `nll` feature.
#![feature(rustc_allow_const_fn_unstable)]
#![feature(rustc_attrs)]
#![feature(slice_internals)]
#![feature(staged_api)]
#![cfg_attr(test, feature(test))]
#![feature(unboxed_closures)]
Expand Down Expand Up @@ -205,6 +212,8 @@ mod boxed {
}
pub mod borrow;
pub mod collections;
#[cfg(not(no_global_oom_handling))]
pub mod ffi;
pub mod fmt;
pub mod rc;
pub mod slice;
Expand Down
2 changes: 1 addition & 1 deletion alloc/src/slice.rs
Original file line number Diff line number Diff line change
Expand Up @@ -153,7 +153,7 @@ pub use hack::to_vec;
// functions are actually methods that are in `impl [T]` but not in
// `core::slice::SliceExt` - we need to supply these functions for the
// `test_permutations` test
mod hack {
pub(crate) mod hack {
use core::alloc::Allocator;

use crate::boxed::Box;
Expand Down
18 changes: 18 additions & 0 deletions alloc/tests/c_str.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
use std::borrow::Cow::{Borrowed, Owned};
use std::ffi::{c_char, CStr};

#[test]
fn to_str() {
let data = b"123\xE2\x80\xA6\0";
let ptr = data.as_ptr() as *const c_char;
unsafe {
assert_eq!(CStr::from_ptr(ptr).to_str(), Ok("123…"));
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Borrowed("123…"));
}
let data = b"123\xE2\0";
let ptr = data.as_ptr() as *const c_char;
unsafe {
assert!(CStr::from_ptr(ptr).to_str().is_err());
assert_eq!(CStr::from_ptr(ptr).to_string_lossy(), Owned::<str>(format!("123\u{FFFD}")));
}
}
3 changes: 3 additions & 0 deletions alloc/tests/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
#![feature(const_nonnull_slice_from_raw_parts)]
#![feature(const_ptr_write)]
#![feature(const_try)]
#![feature(core_c_str)]
#![feature(core_ffi_c)]
#![feature(core_intrinsics)]
#![feature(drain_filter)]
#![feature(exact_size_is_empty)]
Expand Down Expand Up @@ -49,6 +51,7 @@ mod binary_heap;
mod borrow;
mod boxed;
mod btree_set_hash;
mod c_str;
mod const_fns;
mod cow_str;
mod fmt;
Expand Down
Loading

0 comments on commit b39e6f1

Please sign in to comment.