Skip to content

Commit

Permalink
fix(extract): unicode files on Linux
Browse files Browse the repository at this point in the history
Unicode files being extracted to directories still caused issues on Linux.
For example, if a file `❤️.txt` was part of a RAR archive, extracting it to
CWD or by providing a parent dir would fail. This commit adds extensive tests
and fixes all those issues definitively.

Relates-to #44
  • Loading branch information
muja committed Sep 8, 2024
1 parent 8ce991c commit 5e6b83d
Show file tree
Hide file tree
Showing 7 changed files with 161 additions and 37 deletions.
Binary file added data/unicode-entry.rar
Binary file not shown.
1 change: 1 addition & 0 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ pub use archive::Archive;
use unrar_sys as native;
mod archive;
pub mod error;
mod pathed;
mod open_archive;
pub use error::UnrarResult;
pub use open_archive::{
Expand Down
72 changes: 36 additions & 36 deletions src/open_archive.rs
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
use super::error::*;
use super::*;
use std::ffi::CString;
use std::fmt;
use std::os::raw::{c_int, c_uint};
use std::path::{Path, PathBuf};
use std::ptr::NonNull;
use widestring::WideCString;

bitflags::bitflags! {
#[derive(Default)]
Expand Down Expand Up @@ -53,7 +51,7 @@ pub struct OpenArchive<M: OpenMode, C: Cursor> {
extra: C,
marker: std::marker::PhantomData<M>,
}
type Userdata<T> = (T, Option<WideCString>);
type Userdata<T> = (T, Option<widestring::WideCString>);

mod private {
use super::native;
Expand Down Expand Up @@ -204,7 +202,7 @@ impl<Mode: OpenMode, C: Cursor> OpenArchive<Mode, C> {
///
/// ```no_run
/// use unrar::{Archive, error::{When, Code}};
///
///
/// let mut archive = Archive::new("corrupt.rar").open_for_listing().expect("archive error");
/// loop {
/// let mut error = None;
Expand Down Expand Up @@ -233,12 +231,7 @@ impl<Mode: OpenMode> OpenArchive<Mode, CursorBeforeHeader> {
password: Option<&[u8]>,
recover: Option<&mut Option<Self>>,
) -> UnrarResult<Self> {
#[cfg(target_os = "linux")]
// on Linux there is an issue with unicode filenames when using wide strings
// so we use non-wide strings. See https://github.com/muja/unrar.rs/issues/44
let filename = CString::new(filename.as_os_str().as_encoded_bytes()).unwrap();
#[cfg(not(target_os = "linux"))]
let filename = WideCString::from_os_str(&filename).unwrap();
let filename = pathed::construct(filename);

let mut data =
native::OpenArchiveDataEx::new(filename.as_ptr() as *const _, Mode::VALUE as u32);
Expand All @@ -247,7 +240,7 @@ impl<Mode: OpenMode> OpenArchive<Mode, CursorBeforeHeader> {

let arc = handle.and_then(|handle| {
if let Some(pw) = password {
let cpw = CString::new(pw).unwrap();
let cpw = std::ffi::CString::new(pw).unwrap();
unsafe { native::RARSetPassword(handle.as_ptr(), cpw.as_ptr() as *const _) }
}
Some(OpenArchive {
Expand Down Expand Up @@ -360,16 +353,16 @@ impl<M: OpenMode> OpenArchive<M, CursorBeforeFile> {

fn process_file<PM: ProcessMode>(
self,
path: Option<&WideCString>,
file: Option<&WideCString>,
path: Option<&pathed::RarStr>,
file: Option<&pathed::RarStr>,
) -> UnrarResult<OpenArchive<M, CursorBeforeHeader>> {
Ok(self.process_file_x::<PM>(path, file)?.1)
}

fn process_file_x<PM: ProcessMode>(
self,
path: Option<&WideCString>,
file: Option<&WideCString>,
path: Option<&pathed::RarStr>,
file: Option<&pathed::RarStr>,
) -> UnrarResult<(PM::Output, OpenArchive<M, CursorBeforeHeader>)> {
let result = Ok((
Internal::<PM>::process_file_raw(&self.handle, path, file)?,
Expand Down Expand Up @@ -400,7 +393,7 @@ impl OpenArchive<Process, CursorBeforeFile> {
/// Extracts the file into the current working directory
/// Returns the OpenArchive for further processing
pub fn extract(self) -> UnrarResult<OpenArchive<Process, CursorBeforeHeader>> {
self.process_file::<Extract>(None, None)
self.dir_extract(None)
}

/// Extracts the file into the specified directory.
Expand All @@ -413,8 +406,7 @@ impl OpenArchive<Process, CursorBeforeFile> {
self,
base: P,
) -> UnrarResult<OpenArchive<Process, CursorBeforeHeader>> {
let wdest = WideCString::from_os_str(base.as_ref()).expect("Unexpected nul in destination");
self.process_file::<Extract>(Some(&wdest), None)
self.dir_extract(Some(base.as_ref()))
}

/// Extracts the file into the specified file.
Expand All @@ -425,10 +417,20 @@ impl OpenArchive<Process, CursorBeforeFile> {
/// This function will panic if `dest` contains nul characters.
pub fn extract_to<P: AsRef<Path>>(
self,
dest: P,
file: P,
) -> UnrarResult<OpenArchive<Process, CursorBeforeHeader>> {
let wdest = WideCString::from_os_str(dest.as_ref()).expect("Unexpected nul in destination");
self.process_file::<Extract>(None, Some(&wdest))
let dest = pathed::construct(file.as_ref());
self.process_file::<Extract>(None, Some(&dest))
}

/// extracting into a directory if the filename has unicode characters
/// does not work on Linux, so we must specify the full path for Linux
fn dir_extract(
self,
base: Option<&Path>,
) -> UnrarResult<OpenArchive<Process, CursorBeforeHeader>> {
let (path, file) = pathed::preprocess_extract(base, &self.entry().filename);
self.process_file::<Extract>(path.as_deref(), file.as_deref())
}
}

Expand Down Expand Up @@ -513,7 +515,8 @@ impl<M: ProcessMode> Internal<M> {
native::UCM_CHANGEVOLUMEW => {
// 2048 seems to be the buffer size in unrar,
// also it's the maximum path length since 5.00.
let next = unsafe { WideCString::from_ptr_truncate(p1 as *const _, 2048) };
let next =
unsafe { widestring::WideCString::from_ptr_truncate(p1 as *const _, 2048) };
user_data.1 = Some(next);
match p2 {
// Next volume not found. -1 means stop
Expand All @@ -533,8 +536,8 @@ impl<M: ProcessMode> Internal<M> {

fn process_file_raw(
handle: &Handle,
path: Option<&WideCString>,
file: Option<&WideCString>,
path: Option<&pathed::RarStr>,
file: Option<&pathed::RarStr>,
) -> UnrarResult<M::Output> {
let mut user_data: Userdata<M::Output> = Default::default();
unsafe {
Expand All @@ -544,16 +547,12 @@ impl<M: ProcessMode> Internal<M> {
&mut user_data as *mut _ as native::LPARAM,
);
}
let process_result = Code::from(unsafe {
native::RARProcessFileW(
handle.0.as_ptr(),
M::OPERATION as i32,
path.map(|path| path.as_ptr() as *const _)
.unwrap_or(std::ptr::null()),
file.map(|file| file.as_ptr() as *const _)
.unwrap_or(std::ptr::null()),
)
})
let process_result = Code::from(pathed::process_file(
handle.0.as_ptr(),
M::OPERATION as i32,
path,
file,
))
.unwrap();
match process_result {
Code::Success => Ok(user_data.0),
Expand Down Expand Up @@ -646,8 +645,9 @@ impl fmt::Display for FileHeader {

impl From<native::HeaderDataEx> for FileHeader {
fn from(header: native::HeaderDataEx) -> Self {
let filename =
unsafe { WideCString::from_ptr_truncate(header.filename_w.as_ptr() as *const _, 1024) };
let filename = unsafe {
widestring::WideCString::from_ptr_truncate(header.filename_w.as_ptr() as *const _, 1024)
};

FileHeader {
filename: PathBuf::from(filename.to_os_string()),
Expand Down
32 changes: 32 additions & 0 deletions src/pathed/all.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use std::path::{Path, PathBuf};
use widestring::{WideCString, WideCStr};

pub(crate) type RarString = WideCString;
pub(crate) type RarStr = WideCStr;

pub(crate) fn construct(path: &Path) -> RarString {
WideCString::from_os_str(path).expect("Unexpected nul in path")
}

pub(crate) fn process_file(
handle: *const unrar_sys::Handle,
operation: i32,
dest_path: Option<&RarStr>,
dest_name: Option<&RarStr>,
) -> i32 {
unsafe {
unrar_sys::RARProcessFileW(
handle,
operation,
dest_path.map(|path| path.as_ptr().cast()).unwrap_or(std::ptr::null()),
dest_name.map(|file| file.as_ptr().cast()).unwrap_or(std::ptr::null()),
)
}
}

pub(crate) fn preprocess_extract(
base: Option<&Path>,
_filename: &PathBuf,
) -> (Option<RarString>, Option<RarString>) {
(base.map(construct), None)
}
32 changes: 32 additions & 0 deletions src/pathed/linux.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,32 @@
use std::ffi::{CString, CStr};
use std::path::{Path, PathBuf};

pub(crate) type RarString = CString;
pub(crate) type RarStr = CStr;

pub(crate) fn construct<P: AsRef<std::path::Path>>(path: P) -> RarString {
CString::new(path.as_ref().as_os_str().as_encoded_bytes()).unwrap()
}

pub(crate) fn process_file(
handle: *const unrar_sys::Handle,
operation: i32,
dest_path: Option<&RarStr>,
dest_name: Option<&RarStr>,
) -> i32 {
unsafe {
unrar_sys::RARProcessFile(
handle,
operation,
dest_path.map(|path| path.as_ptr().cast()).unwrap_or(std::ptr::null()),
dest_name.map(|file| file.as_ptr().cast()).unwrap_or(std::ptr::null()),
)
}
}

pub(crate) fn preprocess_extract(
base: Option<&Path>,
filename: &PathBuf,
) -> (Option<RarString>, Option<RarString>) {
(None, Some(construct(base.unwrap_or(".".as_ref()).join(filename))))
}
5 changes: 5 additions & 0 deletions src/pathed/mod.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
#[cfg_attr(target_os = "linux", path = "linux.rs")]
#[cfg_attr(not(target_os = "linux"), path = "all.rs")]
mod os;

pub(crate) use os::*;
56 changes: 55 additions & 1 deletion tests/utf8.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use unrar::Archive;
use std::path::PathBuf;
use unrar::Archive;

#[test]
fn unicode_list() {
Expand All @@ -12,3 +12,57 @@ fn unicode_file() {
let mut entries = Archive::new("data/unicodefilename❤️.rar").open_for_listing().unwrap();
assert_eq!(entries.next().unwrap().unwrap().filename, PathBuf::from(".gitignore"));
}

#[test]
fn unicode_extract_to() {
let parent = tempfile::tempdir().unwrap();
let unicode_file = parent.path().join("unicodefilename❤️.txt");
let archive = Archive::new("data/version.rar").open_for_processing().unwrap();
let archive = archive.read_header().unwrap().unwrap();
archive.extract_to(&unicode_file).expect("extraction failed");
assert_eq!("unrar-0.4.0", std::fs::read_to_string(unicode_file).expect("read failed"));
}

#[test]
fn extract_with_unicode_base() {
let parent = tempfile::tempdir().unwrap();
let unicode_dir = parent.path().join("unicodefilename❤️");
std::fs::create_dir(&unicode_dir).expect("create dir");
let archive = Archive::new("data/version.rar")
.open_for_processing()
.unwrap()
.read_header()
.unwrap()
.unwrap();
archive.extract_with_base(&unicode_dir).expect("extraction failed");
assert_eq!(
"unrar-0.4.0",
std::fs::read_to_string(unicode_dir.join("VERSION")).expect("read failed")
);
}

#[test]
fn unicode_entry() {
let archive = Archive::new("data/unicode-entry.rar").open_for_listing().unwrap();
let archive = archive.read_header().unwrap().unwrap();
assert_eq!(archive.entry().filename.as_os_str(), "unicodefilename❤️.txt");
}

#[test]
fn unicode_entry_process_mode() {
let archive = Archive::new("data/unicode-entry.rar").open_for_processing().unwrap();
let archive = archive.read_header().unwrap().unwrap();
assert_eq!(archive.entry().filename.as_os_str(), "unicodefilename❤️.txt");
assert_eq!(&String::from_utf8(archive.read().unwrap().0).unwrap(), "foobar\n");
}

#[test]
fn unicode_entry_extract() {
let parent = tempfile::tempdir().unwrap();
let archive = Archive::new("data/unicode-entry.rar").open_for_processing().unwrap();
let archive = archive.read_header().unwrap().unwrap();
archive.extract_with_base(&parent).expect("extract");
let entries = std::fs::read_dir(&parent).expect("read_dir").collect::<Result<Vec<_>, _>>().expect("read_dir[0]");
assert_eq!(entries.len(), 1);
assert_eq!(&entries[0].file_name(), "unicodefilename❤️.txt");
}

1 comment on commit 5e6b83d

@ttys3
Copy link
Contributor

@ttys3 ttys3 commented on 5e6b83d Nov 30, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

can you release a patch version ? since this commit is very important

Please sign in to comment.