diff --git a/src/asm_files.cpp b/src/asm_files.cpp index 6045be64c..874156290 100644 --- a/src/asm_files.cpp +++ b/src/asm_files.cpp @@ -60,7 +60,7 @@ bool is_map_section(const std::string& name) { return name == "maps" || (name.length() > 5 && name.compare(0, maps_prefix.length(), maps_prefix) == 0); } -std::tuple get_symbol_name_and_section_index(ELFIO::const_symbol_section_accessor& symbols, +std::tuple get_symbol_name_and_section_index(const ELFIO::const_symbol_section_accessor& symbols, ELFIO::Elf_Xword index) { string symbol_name; ELFIO::Elf64_Addr value{}; @@ -74,7 +74,7 @@ std::tuple get_symbol_name_and_section_index(ELFIO::con } std::tuple get_value(const ELFIO::const_symbol_section_accessor& symbols, - ELFIO::Elf_Word index) { + ELFIO::Elf_Xword index) { string symbol_name; ELFIO::Elf64_Addr value{}; ELFIO::Elf_Xword size{}; @@ -162,12 +162,6 @@ std::tuple get_program_name_and_size(ELFIO::section& s return {program_name, size}; } -void relocate_function(ebpf_inst& inst, ELFIO::Elf64_Addr offset, ELFIO::Elf_Word index, - const ELFIO::const_symbol_section_accessor& symbols) { - auto [relocation_offset, relocation_type] = get_value(symbols, index); - inst.imm = ((relocation_offset - offset) / sizeof(ebpf_inst)) - 1; -} - void relocate_map(ebpf_inst& inst, const std::string& symbol_name, const std::variant>& map_record_size_or_map_offsets, const program_info& info, ELFIO::Elf64_Addr offset, ELFIO::Elf_Word index, @@ -205,6 +199,69 @@ void relocate_map(ebpf_inst& inst, const std::string& symbol_name, } } +// Structure used to keep track of subprogram relocation data until any subprograms +// are loaded and can be appended to the calling program. +struct function_relocation { + size_t prog_index{}; // Index of source program in vector of raw programs. + ELFIO::Elf_Xword source_offset{}; // Instruction offset in source section of source instruction. + ELFIO::Elf_Xword relocation_entry_index{}; + std::string target_function_name; +}; + +static void append_subprogram(raw_program& prog, ELFIO::section& subprogram_section, + ELFIO::const_symbol_section_accessor& symbols, const std::string& symbol_name) { + // Find subprogram by name. + for (ELFIO::Elf_Xword subprogram_offset = 0; subprogram_offset < subprogram_section.get_size();) { + auto [subprogram_name, subprogram_size] = + get_program_name_and_size(subprogram_section, subprogram_offset, symbols); + if (subprogram_size == 0) { + throw std::runtime_error("Zero-size subprogram '" + subprogram_name + "' in section '" + + subprogram_section.get_name() + "'"); + } + if (subprogram_name == symbol_name) { + // Append subprogram instructions to the main program. + auto subprogram = vector_of(subprogram_section.get_data() + subprogram_offset, subprogram_size); + prog.prog.insert(prog.prog.end(), subprogram.begin(), subprogram.end()); + return; + } + subprogram_offset += subprogram_size; + } + throw std::runtime_error("Subprogram '" + symbol_name + "' not found in section '" + + subprogram_section.get_name() + "'"); +} + +static void append_subprograms(raw_program& prog, vector& res, const vector& function_relocations, ELFIO::elfio& reader, + ELFIO::const_symbol_section_accessor& symbols) { + // Perform function relocations and fill in the inst.imm values of CallLocal instructions. + std::map subprogram_offsets; + for (auto& reloc : function_relocations) { + if (reloc.prog_index >= res.size()) { + continue; + } + if (res[reloc.prog_index].function_name != prog.function_name) { + continue; + } + + // Check whether we already appended the target program, and append it if not. + if (subprogram_offsets.find(reloc.target_function_name) == subprogram_offsets.end()) { + subprogram_offsets[reloc.target_function_name] = prog.prog.size(); + + auto [symbol_name, section_index] = get_symbol_name_and_section_index(symbols, reloc.relocation_entry_index); + ELFIO::section& subprogram_section = *reader.sections[section_index]; + append_subprogram(prog, subprogram_section, symbols, symbol_name); + } + + // Fill in the PC offset into the imm field of the CallLocal instruction. + ELFIO::Elf_Xword target_offset = subprogram_offsets[reloc.target_function_name]; + int64_t offset_diff = (int64_t)(target_offset - reloc.source_offset - 1); + if (offset_diff < INT32_MIN || offset_diff > INT32_MAX) { + throw std::runtime_error("Offset difference out of int32_t range for instruction at source offset " + + std::to_string(reloc.source_offset)); + } + prog.prog[reloc.source_offset].imm = (int32_t)offset_diff; + } +} + vector read_elf(std::istream& input_stream, const std::string& path, const std::string& desired_section, const ebpf_verifier_options_t* options, const ebpf_platform_t* platform) { if (options == nullptr) { @@ -298,6 +355,7 @@ vector read_elf(std::istream& input_stream, const std::string& path vector res; vector unresolved_symbols; + vector function_relocations; for (const auto& section : reader.sections) { const string name = section->get_name(); if (!desired_section.empty() && name != desired_section) { @@ -322,6 +380,7 @@ vector read_elf(std::istream& input_stream, const std::string& path program_name, vector_of(section->get_data() + program_offset, program_size), info}; + auto prelocs = reader.sections[string(".rel") + name]; if (!prelocs) { prelocs = reader.sections[string(".rela") + name]; @@ -357,10 +416,14 @@ vector read_elf(std::istream& input_stream, const std::string& path auto [symbol_name, symbol_section_index] = get_symbol_name_and_section_index(symbols, index); - // Perform relocation for function symbols. + // Queue up relocation for function symbols. if ((inst.opcode == INST_OP_CALL) && (inst.src == INST_CALL_LOCAL) && (reader.sections[symbol_section_index] == section.get())) { - relocate_function(inst, offset, index, symbols); + function_relocation fr{.prog_index = res.size(), + .source_offset = offset / sizeof(ebpf_inst), + .relocation_entry_index = index, + .target_function_name = symbol_name}; + function_relocations.push_back(fr); continue; } @@ -381,6 +444,14 @@ vector read_elf(std::istream& input_stream, const std::string& path } } + // Now that we have all programs in the list, we can recursively append any subprograms + // to the calling programs. We have to keep them as programs themselves in case the caller + // wants to verify them separately, but we also have to append them if used as subprograms to + // allow the caller to be fully verified since inst.imm can only point into the same program. + for (auto& prog : res) { + append_subprograms(prog, res, function_relocations, reader, symbols); + } + // Below, only relocations of symbols located in the map sections are allowed, // so if there are relocations there needs to be a maps section. if (!unresolved_symbols.empty()) { diff --git a/src/test/test_verify.cpp b/src/test/test_verify.cpp index a41ec685c..810ea82f8 100644 --- a/src/test/test_verify.cpp +++ b/src/test/test_verify.cpp @@ -76,6 +76,11 @@ FAIL_UNMARSHAL("invalid", "invalid-lddw.o", ".text") VERIFY_PROGRAM(project, filename, section_name, program_name, nullptr, &g_ebpf_platform_linux, true); \ } +#define TEST_PROGRAM_REJECT(project, filename, section_name, program_name) \ + TEST_CASE("./check ebpf-samples/" project "/" filename " " program_name, "[verify][samples][" project "]") { \ + VERIFY_PROGRAM(project, filename, section_name, program_name, nullptr, &g_ebpf_platform_linux, false); \ + } + #define TEST_SECTION_REJECT(project, filename, section) \ TEST_CASE("./check ebpf-samples/" project "/" filename " " section, "[verify][samples][" project "]") { \ VERIFY_SECTION(project, filename, section, nullptr, &g_ebpf_platform_linux, false); \ @@ -497,6 +502,8 @@ TEST_SECTION("raw_tracepoint/filler/sys_sendmsg_x") TEST_SECTION("raw_tracepoint/filler/proc_startupdate_2") TEST_SECTION("raw_tracepoint/filler/sys_recvfrom_x") */ +TEST_PROGRAM_REJECT("build", "bpf2bpf.o", ".text", "plus1"); // Subprogram will fail verification. +TEST_PROGRAM("build", "bpf2bpf.o", ".text", "func"); // Subprogram can be called from main program. TEST_SECTION("build", "byteswap.o", ".text") TEST_SECTION("build", "stackok.o", ".text") TEST_SECTION("build", "packet_start_ok.o", "xdp")