Skip to content

Commit

Permalink
Store sample inputs as testvector::SampleInputsProto
Browse files Browse the repository at this point in the history
There are various serializations to represent sample inputs: for
functions, it is args.txt, for proc it is some other ad-hoc serialization,
and internal to the fuzzer datastructure it is serialized as
testvector::SampleInputsProto.

The tools have been prepared to take a single protobuffer. This CL
writes the protobuffer (as testvector.pbtxt) but still alongside
the 'old' serialization formats.

Part of the refactoring to universally use testvector::SampleInputsProto
(follow-up steps: use this format exclusivey and remove old
serialization formats).

Issues: #1645
PiperOrigin-RevId: 694671946
  • Loading branch information
hzeller authored and copybara-github committed Nov 8, 2024
1 parent eedb4e3 commit abc11e1
Show file tree
Hide file tree
Showing 7 changed files with 107 additions and 45 deletions.
2 changes: 2 additions & 0 deletions xls/fuzzer/BUILD
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ cc_library(
"//xls/common/file:get_runfile_path",
"//xls/common/status:status_macros",
"//xls/dslx/frontend:pos",
"//xls/tests:testvector_cc_proto",
"@boringssl//:crypto",
"@com_google_absl//absl/flags:flag",
"@com_google_absl//absl/log",
Expand Down Expand Up @@ -324,6 +325,7 @@ cc_library(
"//xls/ir:value",
"//xls/public:runtime_build_actions",
"//xls/simulation:check_simulator",
"//xls/tests:testvector_cc_proto",
"//xls/tools:eval_utils",
"@com_google_absl//absl/algorithm:container",
"@com_google_absl//absl/container:btree",
Expand Down
9 changes: 9 additions & 0 deletions xls/fuzzer/run_fuzz.cc
Original file line number Diff line number Diff line change
Expand Up @@ -51,6 +51,7 @@
#include "xls/fuzzer/sample_generator.h"
#include "xls/fuzzer/sample_runner.h"
#include "xls/fuzzer/sample_summary.pb.h"
#include "xls/tests/testvector.pb.h"

ABSL_DECLARE_FLAG(int32_t, v);
ABSL_DECLARE_FLAG(std::string, vmodule);
Expand Down Expand Up @@ -205,6 +206,14 @@ absl::Status RunSample(const Sample& smp, const std::filesystem::path& run_dir,
SetTextProtoFile(options_file_name, smp.options().proto()));
argv.push_back("--options_file=options.pbtxt");

std::filesystem::path testvector_path = run_dir / "testvector.pbtxt";
testvector::SampleInputsProto testvector;
XLS_RETURN_IF_ERROR(smp.FillSampleInputs(&testvector));
XLS_RETURN_IF_ERROR(SetTextProtoFile(testvector_path, testvector));
// TODO(hzeller): This is a preparation, but testvector.pbtxt is not yet
// passed to tools. This is the egg, chicken follows in next change.
// argv.push_back("--testvector_textproto=testvector.pbtxt");

std::filesystem::path args_file_name = run_dir / "args.txt";
XLS_RETURN_IF_ERROR(
SetFileContents(args_file_name, ArgsBatchToText(smp.args_batch())));
Expand Down
113 changes: 70 additions & 43 deletions xls/fuzzer/sample.cc
Original file line number Diff line number Diff line change
Expand Up @@ -202,6 +202,47 @@ bool Sample::ArgsBatchEqual(const Sample& other) const {
return true;
}

// Extract args batch from SampleInputsProto. If to be interpreted as
// proc_samples, also extract "ir_channel_names" (which must not be a nullptr
// then).
/* static */ absl::Status Sample::ExtractArgsBatch(
bool is_proc_samples, const testvector::SampleInputsProto& testvector,
std::vector<std::vector<InterpValue>>& args_batch,
std::vector<std::string>* ir_channel_names) {
// In the serialization channel inputs are grouped by channel, but the
// fuzzer expects inputs to be grouped by input number.
// TODO(meheff): Change the fuzzer to accept inputs grouped by channel. This
// would enable a different number of inputs per channel.
if (is_proc_samples) {
XLS_RET_CHECK(!testvector.has_function_args()); // proc samples expected
XLS_RET_CHECK(ir_channel_names != nullptr);
for (const testvector::ChannelInputProto& channel_input :
testvector.channel_inputs().inputs()) {
ir_channel_names->push_back(channel_input.channel_name());
for (int i = 0; i < channel_input.values().size(); ++i) {
const std::string& value_str = channel_input.values(i);
XLS_ASSIGN_OR_RETURN(Value value, Parser::ParseTypedValue(value_str));
XLS_ASSIGN_OR_RETURN(InterpValue interp_value,
dslx::ValueToInterpValue(value));
if (args_batch.size() <= i) {
args_batch.resize(i + 1);
}
args_batch[i].push_back(interp_value);
}
}
return absl::OkStatus();
}

// Otherwise just extract function information.
XLS_RET_CHECK(!testvector.has_channel_inputs()); // function samples expected
for (const std::string& arg : testvector.function_args().args()) {
XLS_ASSIGN_OR_RETURN(std::vector<InterpValue> args, dslx::ParseArgs(arg));
args_batch.push_back(args);
}

return absl::OkStatus();
}

/* static */ absl::StatusOr<Sample> Sample::Deserialize(std::string_view s) {
bool in_config = false;
std::vector<std::string_view> config_lines;
Expand Down Expand Up @@ -238,37 +279,39 @@ bool Sample::ArgsBatchEqual(const Sample& other) const {
XLS_ASSIGN_OR_RETURN(SampleOptions options,
SampleOptions::FromProto(proto.sample_options()));

std::string dslx_code = absl::StrJoin(dslx_lines, "\n");
// Make sure we see the kind of inputs we expect.
XLS_RET_CHECK_EQ(proto.inputs().has_function_args(),
options.IsFunctionSample());

// In the serialization channel inputs are grouped by channel, but the
// fuzzer expects inputs to be grouped by input number.
// TODO(meheff): Change the fuzzer to accept inputs grouped by channel. This
// would enable a different number of inputs per channel.
std::vector<std::string> ir_channel_names;
std::vector<std::vector<InterpValue>> args_batch;
if (proto.sample_options().sample_type() == fuzzer::SAMPLE_TYPE_PROC) {
for (const testvector::ChannelInputProto& channel_input :
proto.inputs().channel_inputs().inputs()) {
ir_channel_names.push_back(channel_input.channel_name());
for (int i = 0; i < channel_input.values().size(); ++i) {
const std::string& value_str = channel_input.values(i);
XLS_ASSIGN_OR_RETURN(Value value, Parser::ParseTypedValue(value_str));
XLS_ASSIGN_OR_RETURN(InterpValue interp_value,
dslx::ValueToInterpValue(value));
if (args_batch.size() <= i) {
args_batch.resize(i + 1);
}
args_batch[i].push_back(interp_value);
}
std::vector<std::string> ir_channel_names;
XLS_RETURN_IF_ERROR(ExtractArgsBatch(options.IsProcSample(), proto.inputs(),
args_batch, &ir_channel_names));

std::string dslx_code = absl::StrJoin(dslx_lines, "\n");
return Sample(dslx_code, options, args_batch, ir_channel_names);
}

absl::Status Sample::FillSampleInputs(
testvector::SampleInputsProto* proto) const {
if (options().IsFunctionSample()) {
testvector::FunctionArgsProto* args_proto = proto->mutable_function_args();
for (const std::vector<InterpValue>& args : args_batch_) {
args_proto->add_args(InterpValueListToString(args));
}
} else {
XLS_RET_CHECK(proto.inputs().has_function_args());
for (const std::string& arg : proto.inputs().function_args().args()) {
XLS_ASSIGN_OR_RETURN(std::vector<InterpValue> args, dslx::ParseArgs(arg));
args_batch.push_back(args);
XLS_RET_CHECK(options().IsProcSample());
testvector::ChannelInputsProto* inputs_proto =
proto->mutable_channel_inputs();
for (int64_t i = 0; i < ir_channel_names_.size(); ++i) {
testvector::ChannelInputProto* input_proto = inputs_proto->add_inputs();
input_proto->set_channel_name(ir_channel_names_[i]);
for (const std::vector<InterpValue>& args : args_batch_) {
input_proto->add_values(ToArgString(args[i]));
}
}
}
return Sample(dslx_code, options, args_batch, ir_channel_names);
return absl::OkStatus();
}

std::string Sample::Serialize(
Expand All @@ -285,24 +328,8 @@ std::string Sample::Serialize(
config.set_issue(std::string("DO NOT ") +
"SUBMIT Insert link to GitHub issue here.");
*config.mutable_sample_options() = options().proto();
if (options().IsFunctionSample()) {
testvector::FunctionArgsProto* args_proto =
config.mutable_inputs()->mutable_function_args();
for (const std::vector<InterpValue>& args : args_batch_) {
args_proto->add_args(InterpValueListToString(args));
}
} else {
CHECK(options().IsProcSample());
testvector::ChannelInputsProto* inputs_proto =
config.mutable_inputs()->mutable_channel_inputs();
for (int64_t i = 0; i < ir_channel_names_.size(); ++i) {
testvector::ChannelInputProto* input_proto = inputs_proto->add_inputs();
input_proto->set_channel_name(ir_channel_names_[i]);
for (const std::vector<InterpValue>& args : args_batch_) {
input_proto->add_values(ToArgString(args[i]));
}
}
}
CHECK_OK(FillSampleInputs(config.mutable_inputs()));

std::string config_text;
CHECK(google::protobuf::TextFormat::PrintToString(config, &config_text));
for (std::string_view line : absl::StrSplit(config_text, '\n')) {
Expand Down
13 changes: 13 additions & 0 deletions xls/fuzzer/sample.h
Original file line number Diff line number Diff line change
Expand Up @@ -24,11 +24,13 @@
#include <utility>
#include <vector>

#include "absl/status/status.h"
#include "absl/status/statusor.h"
#include "absl/types/span.h"
#include "xls/common/proto_adaptor_utils.h"
#include "xls/dslx/interp_value.h"
#include "xls/fuzzer/sample.pb.h"
#include "xls/tests/testvector.pb.h"
#include "re2/re2.h"

namespace xls {
Expand Down Expand Up @@ -213,6 +215,14 @@ class Sample {
// // END_CONFIG
// <code sample>
static absl::StatusOr<Sample> Deserialize(std::string_view s);

// Utility function to convert testvector::SampleInputsProto to
// args batch and channel names used in this object.
static absl::Status ExtractArgsBatch(
bool is_proc_samples, const testvector::SampleInputsProto& testvector,
std::vector<std::vector<dslx::InterpValue>>& args_batch,
std::vector<std::string>* ir_channel_names = nullptr);

std::string Serialize(
std::optional<std::string_view> error_message = std::nullopt) const;

Expand Down Expand Up @@ -247,6 +257,9 @@ class Sample {
}
bool operator!=(const Sample& other) const { return !((*this) == other); }

// Convert internal argument representation to a sample inputs proto.
absl::Status FillSampleInputs(testvector::SampleInputsProto* proto) const;

private:
// Returns whether the argument batch is the same as in "other".
bool ArgsBatchEqual(const Sample& other) const;
Expand Down
3 changes: 3 additions & 0 deletions xls/fuzzer/sample_cc_test.cc
Original file line number Diff line number Diff line change
Expand Up @@ -74,6 +74,9 @@ TEST(SampleCcTest, DeserializationCanHandleNewlinesInStringLiterals) {
// BEGIN_CONFIG
// # proto-message: xls.fuzzer.CrasherConfigurationProto
// issue: "Foo"
// sample_options {
// sample_type: SAMPLE_TYPE_FUNCTION
// }
// inputs {
// function_args {
// args: "(bits[32]:0x01,
Expand Down
8 changes: 7 additions & 1 deletion xls/fuzzer/sample_runner.cc
Original file line number Diff line number Diff line change
Expand Up @@ -76,6 +76,7 @@
#include "xls/ir/value.h"
#include "xls/public/runtime_build_actions.h"
#include "xls/simulation/check_simulator.h"
#include "xls/tests/testvector.pb.h"
#include "xls/tools/eval_utils.h"
#include "re2/re2.h"

Expand Down Expand Up @@ -828,12 +829,17 @@ absl::Status SampleRunner::Run(const Sample& sample) {
} else {
input_path /= "sample.ir";
}

XLS_RETURN_IF_ERROR(SetFileContents(input_path, sample.input_text()));

std::filesystem::path options_path = run_dir_ / "options.pbtxt";
XLS_RETURN_IF_ERROR(SetTextProtoFile(options_path, sample.options().proto()));

std::filesystem::path testvector_path = run_dir_ / "testvector.pbtxt";
testvector::SampleInputsProto testvector;
XLS_RETURN_IF_ERROR(sample.FillSampleInputs(&testvector));
XLS_RETURN_IF_ERROR(SetTextProtoFile(testvector_path, testvector));

// TODO(hzeller): retire the following files and use testvector
std::filesystem::path args_path = run_dir_ / "args.txt";
XLS_RETURN_IF_ERROR(
SetFileContents(args_path, ArgsBatchToText(sample.args_batch())));
Expand Down
4 changes: 3 additions & 1 deletion xls/tests/testvector.proto
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,8 @@ package xls.testvector;

// TODO(google/xls#1645) As a first step, this is lifted out of
// fuzzer/sample.proto without change; more refactoring steps follow here.
// The channel data should possibly even be a xls::ProcChannelValuesProto
// but requires to re-write existing crasher*.x

// Inputs fed to a single input channel of the sample proc.
message ChannelInputProto {
Expand All @@ -37,7 +39,7 @@ message FunctionArgsProto {
// Each entry is a semicolon-separated list of xls::Values with one Value per
// function parameter. Example entry: "bits[1]:0; bits[32]:0x42"
// TODO(google/xls#1645) instead of the semicolon-separation, this should be
// repeated field of values.
// repeated field of ValueProtos
repeated string args = 1;
}

Expand Down

0 comments on commit abc11e1

Please sign in to comment.