Skip to content

Commit

Permalink
Add support for building GNU archives (issue #10)
Browse files Browse the repository at this point in the history
  • Loading branch information
mdsteele committed Apr 11, 2019
1 parent 0362a68 commit 1cfa68d
Showing 1 changed file with 254 additions and 19 deletions.
273 changes: 254 additions & 19 deletions src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,7 @@
//! stores filenames in a slightly different, incompatible way, and has its
//! own strategy for supporting long filenames.
//!
//! Currently, this crate supports reading all three of these variants, but
//! only supports writing the BSD/common variant.
//! This crate supports reading and writing all three of these variants.
//!
//! # Example usage
//!
Expand Down Expand Up @@ -71,6 +70,7 @@ extern crate byteorder;

use byteorder::{BigEndian, LittleEndian, ReadBytesExt};
use std::cmp;
use std::collections::{HashMap, HashSet};
use std::ffi::OsStr;
use std::fs::{File, Metadata};
use std::io::{self, BufRead, BufReader, Error, ErrorKind, Read, Result, Seek,
Expand All @@ -97,7 +97,7 @@ const ENTRY_HEADER_LEN: usize = 60;
const BSD_SYMBOL_LOOKUP_TABLE_ID: &[u8] = b"__.SYMDEF";
const BSD_SORTED_SYMBOL_LOOKUP_TABLE_ID: &[u8] = b"__.SYMDEF SORTED";

const GNU_NAME_TABLE_ID: &[u8] = b"//";
const GNU_NAME_TABLE_ID: &str = "//";
const GNU_SYMBOL_LOOKUP_TABLE_ID: &[u8] = b"/";

// ========================================================================= //
Expand Down Expand Up @@ -130,12 +130,12 @@ impl Header {
/// other fields set to zero.
pub fn new(identifier: Vec<u8>, size: u64) -> Header {
Header {
identifier: identifier,
identifier,
mtime: 0,
uid: 0,
gid: 0,
mode: 0,
size: size,
size,
}
}

Expand All @@ -144,7 +144,7 @@ impl Header {
#[cfg(unix)]
pub fn from_metadata(identifier: Vec<u8>, meta: &Metadata) -> Header {
Header {
identifier: identifier,
identifier,
mtime: meta.mtime() as u64,
uid: meta.uid(),
gid: meta.gid(),
Expand Down Expand Up @@ -230,7 +230,7 @@ impl Header {
if identifier == GNU_SYMBOL_LOOKUP_TABLE_ID {
io::copy(&mut reader.by_ref().take(size), &mut io::sink())?;
return Ok(Some((Header::new(identifier, size), header_len)));
} else if identifier == GNU_NAME_TABLE_ID {
} else if identifier == GNU_NAME_TABLE_ID.as_bytes() {
*name_table = vec![0; size as usize];
reader.read_exact(name_table as &mut [u8]).map_err(|err| {
annotate(err, "failed to read name table")
Expand Down Expand Up @@ -308,12 +308,12 @@ impl Header {
}
Ok(Some((
Header {
identifier: identifier,
mtime: mtime,
uid: uid,
gid: gid,
mode: mode,
size: size,
identifier,
mtime,
uid,
gid,
mode,
size,
},
header_len,
)))
Expand Down Expand Up @@ -350,6 +350,31 @@ impl Header {
}
Ok(())
}

fn write_gnu<W>(&self, writer: &mut W, names: &HashMap<Vec<u8>, usize>)
-> Result<()>
where
W: Write,
{
if self.identifier.len() > 15 {
let offset = names[&self.identifier];
write!(writer, "/{:<15}", offset)?;
} else {
writer.write_all(&self.identifier)?;
writer.write_all(b"/")?;
writer.write_all(&vec![b' '; 15 - self.identifier.len()])?;
}
write!(
writer,
"{:<12}{:<6}{:<6}{:<8o}{:<10}`\n",
self.mtime,
self.uid,
self.gid,
self.mode,
self.size
)?;
Ok(())
}
}

fn parse_number(field_name: &str, bytes: &[u8], radix: u32) -> Result<u64> {
Expand Down Expand Up @@ -419,7 +444,7 @@ impl<R: Read> Archive<R> {
/// source of all data read.
pub fn new(reader: R) -> Archive<R> {
Archive {
reader: reader,
reader,
variant: Variant::Common,
name_table: Vec::new(),
entry_headers: Vec::new(),
Expand All @@ -446,7 +471,8 @@ impl<R: Read> Archive<R> {
pub fn into_inner(self) -> Result<R> { Ok(self.reader) }

fn is_name_table_id(&self, identifier: &[u8]) -> bool {
self.variant == Variant::GNU && identifier == GNU_NAME_TABLE_ID
self.variant == Variant::GNU &&
identifier == GNU_NAME_TABLE_ID.as_bytes()
}

fn is_symbol_lookup_table_id(&self, identifier: &[u8]) -> bool {
Expand Down Expand Up @@ -650,7 +676,7 @@ impl<R: Read + Seek> Archive<R> {
}
self.next_entry_index = index + 1;
Ok(Entry {
header: header,
header,
reader: self.reader.by_ref(),
length: size,
position: 0,
Expand Down Expand Up @@ -851,7 +877,8 @@ impl<'a, R: Read> ExactSizeIterator for Symbols<'a, R> {}

// ========================================================================= //

/// A structure for building archives.
/// A structure for building Common or BSD-variant archives (the archive format
/// typically used on e.g. BSD and Mac OS X systems).
///
/// This structure has methods for building up an archive from scratch into any
/// arbitrary writer.
Expand All @@ -865,7 +892,7 @@ impl<W: Write> Builder<W> {
/// destination of all data written.
pub fn new(writer: W) -> Builder<W> {
Builder {
writer: writer,
writer,
started: false,
}
}
Expand Down Expand Up @@ -921,6 +948,135 @@ impl<W: Write> Builder<W> {
}
}

// ========================================================================= //

/// A structure for building GNU-variant archives (the archive format typically
/// used on e.g. GNU/Linux and Windows systems).
///
/// This structure has methods for building up an archive from scratch into any
/// arbitrary writer.
pub struct GnuBuilder<W: Write> {
writer: W,
short_names: HashSet<Vec<u8>>,
long_names: HashMap<Vec<u8>, usize>,
name_table_size: usize,
started: bool,
}

impl<W: Write> GnuBuilder<W> {
/// Create a new archive builder with the underlying writer object as the
/// destination of all data written. The `identifiers` parameter must give
/// the complete list of entry identifiers that will be included in this
/// archive.
pub fn new(writer: W, identifiers: Vec<Vec<u8>>) -> GnuBuilder<W> {
let mut short_names = HashSet::<Vec<u8>>::new();
let mut long_names = HashMap::<Vec<u8>, usize>::new();
let mut name_table_size: usize = 0;
for identifier in identifiers.into_iter() {
let length = identifier.len();
if length > 15 {
long_names.insert(identifier, name_table_size);
name_table_size += length + 2;
} else {
short_names.insert(identifier);
}
}
GnuBuilder {
writer,
short_names,
long_names,
name_table_size,
started: false,
}
}

/// Unwrap this archive builder, returning the underlying writer object.
pub fn into_inner(self) -> Result<W> { Ok(self.writer) }

/// Adds a new entry to this archive.
pub fn append<R: Read>(&mut self, header: &Header, mut data: R)
-> Result<()> {
let is_long_name = header.identifier().len() > 15;
let has_name = if is_long_name {
self.long_names.contains_key(header.identifier())
} else {
self.short_names.contains(header.identifier())
};
if !has_name {
let msg = format!(
"Identifier {:?} was not in the list of \
identifiers passed to GnuBuilder::new()",
String::from_utf8_lossy(header.identifier())
);
return Err(Error::new(ErrorKind::InvalidInput, msg));
}

if !self.started {
self.writer.write_all(GLOBAL_HEADER)?;
if !self.long_names.is_empty() {
write!(
self.writer,
"{:<48}{:<10}`\n",
GNU_NAME_TABLE_ID,
self.name_table_size
)?;
let mut entries: Vec<(usize, &[u8])> = self.long_names
.iter()
.map(|(id, &start)| (start, id.as_slice()))
.collect();
entries.sort();
for (_, id) in entries {
self.writer.write_all(id)?;
self.writer.write_all(b"/\n")?;
}
}
self.started = true;
}

header.write_gnu(&mut self.writer, &self.long_names)?;
let actual_size = io::copy(&mut data, &mut self.writer)?;
if actual_size != header.size() {
let msg = format!(
"Wrong file size (header.size() = {}, actual \
size was {})",
header.size(),
actual_size
);
return Err(Error::new(ErrorKind::InvalidData, msg));
}
if actual_size % 2 != 0 {
self.writer.write_all(&['\n' as u8])?;
}

Ok(())
}

/// Adds a file on the local filesystem to this archive, using the file
/// name as its identifier.
pub fn append_path<P: AsRef<Path>>(&mut self, path: P) -> Result<()> {
let name: &OsStr = path.as_ref().file_name().ok_or_else(|| {
let msg = "Given path doesn't have a file name";
Error::new(ErrorKind::InvalidInput, msg)
})?;
let identifier = osstr_to_bytes(name)?;
let mut file = File::open(&path)?;
self.append_file_id(identifier, &mut file)
}

/// Adds a file to this archive, with the given name as its identifier.
pub fn append_file(&mut self, name: &[u8], file: &mut File) -> Result<()> {
self.append_file_id(name.to_vec(), file)
}

fn append_file_id(&mut self, id: Vec<u8>, file: &mut File) -> Result<()> {
let metadata = file.metadata()?;
let header = Header::from_metadata(id, &metadata);
self.append(&header, file)
}
}

// ========================================================================= //

#[cfg(unix)]
fn osstr_to_bytes(string: &OsStr) -> Result<Vec<u8>> {
Ok(string.as_bytes().to_vec())
Expand Down Expand Up @@ -960,7 +1116,7 @@ fn annotate(error: io::Error, msg: &str) -> io::Error {

#[cfg(test)]
mod tests {
use super::{Archive, Builder, Header, Variant};
use super::{Archive, Builder, GnuBuilder, Header, Variant};
use std::io::{Cursor, Read, Result, Seek, SeekFrom};
use std::str;

Expand Down Expand Up @@ -1040,6 +1196,85 @@ mod tests {
assert_eq!(str::from_utf8(&actual).unwrap(), expected);
}

#[test]
fn build_gnu_archive() {
let names = vec![b"baz.txt".to_vec(), b"foo.txt".to_vec()];
let mut builder = GnuBuilder::new(Vec::new(), names);
let mut header1 = Header::new(b"foo.txt".to_vec(), 7);
header1.set_mtime(1487552916);
header1.set_uid(501);
header1.set_gid(20);
header1.set_mode(0o100644);
builder.append(&header1, "foobar\n".as_bytes()).unwrap();
let header2 = Header::new(b"baz.txt".to_vec(), 4);
builder.append(&header2, "baz\n".as_bytes()).unwrap();
let actual = builder.into_inner().unwrap();
let expected = "\
!<arch>\n\
foo.txt/ 1487552916 501 20 100644 7 `\n\
foobar\n\n\
baz.txt/ 0 0 0 0 4 `\n\
baz\n";
assert_eq!(str::from_utf8(&actual).unwrap(), expected);
}

#[test]
fn build_gnu_archive_with_long_filenames() {
let names = vec![
b"this_is_a_very_long_filename.txt".to_vec(),
b"and_this_is_another_very_long_filename.txt".to_vec(),
];
let mut builder = GnuBuilder::new(Vec::new(), names);
let mut header1 = Header::new(b"short".to_vec(), 1);
header1.set_identifier(b"this_is_a_very_long_filename.txt".to_vec());
header1.set_mtime(1487552916);
header1.set_uid(501);
header1.set_gid(20);
header1.set_mode(0o100644);
header1.set_size(7);
builder.append(&header1, "foobar\n".as_bytes()).unwrap();
let header2 = Header::new(
b"and_this_is_another_very_long_filename.txt".to_vec(),
4,
);
builder.append(&header2, "baz\n".as_bytes()).unwrap();
let actual = builder.into_inner().unwrap();
let expected = "\
!<arch>\n\
// 78 `\n\
this_is_a_very_long_filename.txt/\n\
and_this_is_another_very_long_filename.txt/\n\
/0 1487552916 501 20 100644 7 `\n\
foobar\n\n\
/34 0 0 0 0 4 `\n\
baz\n";
assert_eq!(str::from_utf8(&actual).unwrap(), expected);
}

#[test]
fn build_gnu_archive_with_space_in_filename() {
let names = vec![b"foo bar".to_vec()];
let mut builder = GnuBuilder::new(Vec::new(), names);
let header = Header::new(b"foo bar".to_vec(), 4);
builder.append(&header, "baz\n".as_bytes()).unwrap();
let actual = builder.into_inner().unwrap();
let expected = "\
!<arch>\n\
foo bar/ 0 0 0 0 4 `\n\
baz\n";
assert_eq!(str::from_utf8(&actual).unwrap(), expected);
}

#[test]
#[should_panic(expected = "Identifier \\\"bar\\\" was not in the list of \
identifiers passed to GnuBuilder::new()")]
fn build_gnu_archive_with_unexpected_identifier() {
let names = vec![b"foo".to_vec()];
let mut builder = GnuBuilder::new(Vec::new(), names);
let header = Header::new(b"bar".to_vec(), 4);
builder.append(&header, "baz\n".as_bytes()).unwrap();
}

#[test]
fn read_common_archive() {
let input = "\
Expand Down

0 comments on commit 1cfa68d

Please sign in to comment.