Skip to content

Commit

Permalink
ref: Be incredibly lenient when parsing ELFs (#434)
Browse files Browse the repository at this point in the history
This changes ELF parsing behaviour so that malformed ELFs
will not immediately throw an error unless their metadata 
(program and section headers) are unreadable. The parser 
will now return an object containing everything that could be 
read up until the malformed sections as opposed to erroring 
out.

Co-authored-by: Arpad Borsos <[email protected]>
  • Loading branch information
relaxolotl and Swatinem authored Sep 28, 2021
1 parent cdffd4f commit 9d308ef
Show file tree
Hide file tree
Showing 11 changed files with 354 additions and 5 deletions.
1 change: 1 addition & 0 deletions examples/object_debug/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ fn inspect_object<P: AsRef<Path>>(path: P) -> Result<(), Box<dyn std::error::Err
println!(" symbol table: {}", object.has_symbols());
println!(" debug info: {}", object.has_debug_info());
println!(" unwind info: {}", object.has_unwind_info());
println!(" is malformed: {}", object.is_malformed());
}
Err(e) => {
print!(" - ");
Expand Down
3 changes: 3 additions & 0 deletions symbolic-debuginfo/src/base.rs
Original file line number Diff line number Diff line change
Expand Up @@ -694,6 +694,9 @@ pub trait ObjectLike<'data, 'object> {

/// Determines whether this object contains embedded sources.
fn has_sources(&self) -> bool;

/// Determines whether this object is malformed and was only partially parsed
fn is_malformed(&self) -> bool;
}

mod derive_serde {
Expand Down
10 changes: 10 additions & 0 deletions symbolic-debuginfo/src/breakpad.rs
Original file line number Diff line number Diff line change
Expand Up @@ -902,6 +902,11 @@ impl<'data> BreakpadObject<'data> {
false
}

/// Determines whether this object is malformed and was only partially parsed
pub fn is_malformed(&self) -> bool {
false
}

/// Returns an iterator over info records.
pub fn info_records(&self) -> BreakpadInfoRecords<'data> {
BreakpadInfoRecords {
Expand Down Expand Up @@ -966,6 +971,7 @@ impl fmt::Debug for BreakpadObject<'_> {
.field("has_symbols", &self.has_symbols())
.field("has_debug_info", &self.has_debug_info())
.field("has_unwind_info", &self.has_unwind_info())
.field("is_malformed", &self.is_malformed())
.finish()
}
}
Expand Down Expand Up @@ -1046,6 +1052,10 @@ impl<'data: 'object, 'object> ObjectLike<'data, 'object> for BreakpadObject<'dat
fn has_sources(&self) -> bool {
self.has_sources()
}

fn is_malformed(&self) -> bool {
self.is_malformed()
}
}

/// An iterator over symbols in the Breakpad object.
Expand Down
278 changes: 273 additions & 5 deletions symbolic-debuginfo/src/elf.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@ use std::error::Error;
use std::fmt;
use std::io::Cursor;

use core::cmp;
use flate2::{Decompress, FlushDecompress};
use goblin::elf::compression_header::{CompressionHeader, ELFCOMPRESS_ZLIB};
use goblin::elf::SectionHeader;
use goblin::elf64::sym::SymIterator;
use goblin::strtab::Strtab;
use goblin::{container::Ctx, elf, strtab};
use goblin::{
container::{Container, Ctx},
elf, strtab,
};
use scroll::Pread;
use thiserror::Error;

use symbolic_common::{Arch, AsSelf, CodeId, DebugId, Uuid};
Expand Down Expand Up @@ -62,6 +67,7 @@ impl ElfError {
pub struct ElfObject<'data> {
elf: elf::Elf<'data>,
data: &'data [u8],
is_malformed: bool,
}

impl<'data> ElfObject<'data> {
Expand All @@ -73,11 +79,263 @@ impl<'data> ElfObject<'data> {
)
}

/// Tries to parse an ELF object from the given slice.
// Pulled from https://github.com/m4b/goblin/blob/master/src/elf/mod.rs#L393-L424 as it
// currently isn't public, but we need this to parse an ELF.
fn gnu_hash_len(bytes: &[u8], offset: usize, ctx: Ctx) -> goblin::error::Result<usize> {
let buckets_num = bytes.pread_with::<u32>(offset, ctx.le)? as usize;
let min_chain = bytes.pread_with::<u32>(offset + 4, ctx.le)? as usize;
let bloom_size = bytes.pread_with::<u32>(offset + 8, ctx.le)? as usize;
// We could handle min_chain==0 if we really had to, but it shouldn't happen.
if buckets_num == 0 || min_chain == 0 || bloom_size == 0 {
return Err(goblin::error::Error::Malformed(format!(
"Invalid DT_GNU_HASH: buckets_num={} min_chain={} bloom_size={}",
buckets_num, min_chain, bloom_size
)));
}
// Find the last bucket.
let buckets_offset = offset + 16 + bloom_size * if ctx.container.is_big() { 8 } else { 4 };
let mut max_chain = 0;
for bucket in 0..buckets_num {
let chain = bytes.pread_with::<u32>(buckets_offset + bucket * 4, ctx.le)? as usize;
if max_chain < chain {
max_chain = chain;
}
}
if max_chain < min_chain {
return Ok(0);
}
// Find the last chain within the bucket.
let mut chain_offset = buckets_offset + buckets_num * 4 + (max_chain - min_chain) * 4;
loop {
let hash = bytes.pread_with::<u32>(chain_offset, ctx.le)?;
max_chain += 1;
chain_offset += 4;
if hash & 1 != 0 {
return Ok(max_chain);
}
}
}

// Pulled from https://github.com/m4b/goblin/blob/master/src/elf/mod.rs#L426-L434 as it
// currently isn't public, but we need this to parse an ELF.
fn hash_len(
bytes: &[u8],
offset: usize,
machine: u16,
ctx: Ctx,
) -> goblin::error::Result<usize> {
// Based on readelf code.
let nchain = if (machine == elf::header::EM_FAKE_ALPHA || machine == elf::header::EM_S390)
&& ctx.container.is_big()
{
bytes.pread_with::<u64>(offset.saturating_add(4), ctx.le)? as usize
} else {
bytes.pread_with::<u32>(offset.saturating_add(4), ctx.le)? as usize
};
Ok(nchain)
}

/// Tries to parse an ELF object from the given slice. Will return a partially parsed ELF object
/// if at least the program and section headers can be parsed.
pub fn parse(data: &'data [u8]) -> Result<Self, ElfError> {
elf::Elf::parse(data)
.map(|elf| ElfObject { elf, data })
.map_err(ElfError::new)
let header =
elf::Elf::parse_header(data).map_err(|_| ElfError::new("ELF header unreadable"))?;
// dummy Elf with only header
let mut obj =
elf::Elf::lazy_parse(header).map_err(|_| ElfError::new("cannot parse ELF header"))?;

let ctx = Ctx {
container: if obj.is_64 {
Container::Big
} else {
Container::Little
},
le: if obj.little_endian {
scroll::Endian::Little
} else {
scroll::Endian::Big
},
};

macro_rules! return_partial_on_err {
($parse_func:expr) => {
if let Ok(expected) = $parse_func() {
expected
} else {
// does this snapshot?
return Ok(ElfObject {
elf: obj,
data,
is_malformed: true,
});
}
};
}

obj.program_headers =
elf::ProgramHeader::parse(data, header.e_phoff as usize, header.e_phnum as usize, ctx)
.map_err(|_| ElfError::new("unable to parse program headers"))?;

for ph in &obj.program_headers {
if ph.p_type == elf::program_header::PT_INTERP && ph.p_filesz != 0 {
let count = (ph.p_filesz - 1) as usize;
let offset = ph.p_offset as usize;
obj.interpreter = data
.pread_with::<&str>(offset, ::scroll::ctx::StrCtx::Length(count))
.ok();
}
}

obj.section_headers =
SectionHeader::parse(data, header.e_shoff as usize, header.e_shnum as usize, ctx)
.map_err(|_| ElfError::new("unable to parse section headers"))?;

let get_strtab = |section_headers: &[SectionHeader], section_idx: usize| {
if section_idx >= section_headers.len() {
// FIXME: warn! here
Ok(Strtab::default())
} else {
let shdr = &section_headers[section_idx];
shdr.check_size(data.len())?;
Strtab::parse(data, shdr.sh_offset as usize, shdr.sh_size as usize, 0x0)
}
};

let strtab_idx = header.e_shstrndx as usize;
obj.shdr_strtab = return_partial_on_err!(|| get_strtab(&obj.section_headers, strtab_idx));

obj.syms = elf::Symtab::default();
obj.strtab = Strtab::default();
for shdr in &obj.section_headers {
if shdr.sh_type as u32 == elf::section_header::SHT_SYMTAB {
let size = shdr.sh_entsize;
let count = if size == 0 { 0 } else { shdr.sh_size / size };
obj.syms = return_partial_on_err!(|| elf::Symtab::parse(
data,
shdr.sh_offset as usize,
count as usize,
ctx
));

obj.strtab = return_partial_on_err!(|| get_strtab(
&obj.section_headers,
shdr.sh_link as usize
));
}
}

obj.soname = None;
obj.libraries = vec![];
obj.dynsyms = elf::Symtab::default();
obj.dynrelas = elf::RelocSection::default();
obj.dynrels = elf::RelocSection::default();
obj.pltrelocs = elf::RelocSection::default();
obj.dynstrtab = Strtab::default();
let dynamic =
return_partial_on_err!(|| elf::Dynamic::parse(data, &obj.program_headers, ctx));
if let Some(ref dynamic) = dynamic {
let dyn_info = &dynamic.info;
obj.dynstrtab = return_partial_on_err!(|| Strtab::parse(
data,
dyn_info.strtab,
dyn_info.strsz,
0x0
));

if dyn_info.soname != 0 {
// FIXME: warn! here
obj.soname = obj.dynstrtab.get_at(dyn_info.soname);
}
if dyn_info.needed_count > 0 {
obj.libraries = dynamic.get_libraries(&obj.dynstrtab);
}
// parse the dynamic relocations
obj.dynrelas = return_partial_on_err!(|| elf::RelocSection::parse(
data,
dyn_info.rela,
dyn_info.relasz,
true,
ctx
));
obj.dynrels = return_partial_on_err!(|| elf::RelocSection::parse(
data,
dyn_info.rel,
dyn_info.relsz,
false,
ctx
));
let is_rela = dyn_info.pltrel as u64 == elf::dynamic::DT_RELA;
obj.pltrelocs = return_partial_on_err!(|| elf::RelocSection::parse(
data,
dyn_info.jmprel,
dyn_info.pltrelsz,
is_rela,
ctx
));

let mut num_syms = if let Some(gnu_hash) = dyn_info.gnu_hash {
return_partial_on_err!(|| ElfObject::gnu_hash_len(data, gnu_hash as usize, ctx))
} else if let Some(hash) = dyn_info.hash {
return_partial_on_err!(|| ElfObject::hash_len(
data,
hash as usize,
header.e_machine,
ctx
))
} else {
0
};
let max_reloc_sym = obj
.dynrelas
.iter()
.chain(obj.dynrels.iter())
.chain(obj.pltrelocs.iter())
.fold(0, |num, reloc| cmp::max(num, reloc.r_sym));
if max_reloc_sym != 0 {
num_syms = cmp::max(num_syms, max_reloc_sym + 1);
}

obj.dynsyms =
return_partial_on_err!(|| elf::Symtab::parse(data, dyn_info.symtab, num_syms, ctx));
}

obj.shdr_relocs = vec![];
for (idx, section) in obj.section_headers.iter().enumerate() {
let is_rela = section.sh_type == elf::section_header::SHT_RELA;
if is_rela || section.sh_type == elf::section_header::SHT_REL {
return_partial_on_err!(|| section.check_size(data.len()));
let sh_relocs = return_partial_on_err!(|| elf::RelocSection::parse(
data,
section.sh_offset as usize,
section.sh_size as usize,
is_rela,
ctx,
));
obj.shdr_relocs.push((idx, sh_relocs));
}
}

obj.versym = return_partial_on_err!(|| elf::symver::VersymSection::parse(
data,
&obj.section_headers,
ctx
));
obj.verdef = return_partial_on_err!(|| elf::symver::VerdefSection::parse(
data,
&obj.section_headers,
ctx
));
obj.verneed = return_partial_on_err!(|| elf::symver::VerneedSection::parse(
data,
&obj.section_headers,
ctx
));

Ok(ElfObject {
elf: obj,
data,
is_malformed: false,
})
}

/// The container file format, which is always `FileFormat::Elf`.
Expand Down Expand Up @@ -269,6 +527,11 @@ impl<'data> ElfObject<'data> {
false
}

/// Determines whether this object is malformed and was only partially parsed
pub fn is_malformed(&self) -> bool {
self.is_malformed
}

/// Returns the raw data of the ELF file.
pub fn data(&self) -> &'data [u8] {
self.data
Expand Down Expand Up @@ -433,6 +696,7 @@ impl fmt::Debug for ElfObject<'_> {
.field("has_symbols", &self.has_symbols())
.field("has_debug_info", &self.has_debug_info())
.field("has_unwind_info", &self.has_unwind_info())
.field("is_malformed", &self.is_malformed())
.finish()
}
}
Expand Down Expand Up @@ -513,6 +777,10 @@ impl<'data: 'object, 'object> ObjectLike<'data, 'object> for ElfObject<'data> {
fn has_sources(&self) -> bool {
self.has_sources()
}

fn is_malformed(&self) -> bool {
self.is_malformed()
}
}

impl<'data> Dwarf<'data> for ElfObject<'data> {
Expand Down
Loading

0 comments on commit 9d308ef

Please sign in to comment.