Skip to content

Commit

Permalink
Reorganizing IND mining algorithms tests
Browse files Browse the repository at this point in the history
The code for testing ind algorithms has been completely refactored. Thanks to
this, we were able to reuse faida tests for other dependency mining algorithms.
Add tests for Mind algorithm
  • Loading branch information
vs9h authored and chernishev committed May 11, 2024
1 parent 7d8f95b commit 6af5c4a
Show file tree
Hide file tree
Showing 4 changed files with 240 additions and 172 deletions.
164 changes: 31 additions & 133 deletions src/tests/test_faida.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -6,158 +6,56 @@
#include "config/names.h"
#include "test_ind_util.h"

namespace fs = std::filesystem;

namespace tests {

using INDTestSet = std::set<INDTest>;

void CheckINDsListsEquality(std::list<model::IND> const& actual, INDTestSet const& expected) {
ASSERT_EQ(actual.size(), expected.size())
<< "count of generated dependencies does not match: expected " << expected.size()
<< ", got " << actual.size();

for (auto const& dep : actual) {
if (expected.find(ToINDTest(dep)) == expected.end()) {
FAIL() << "generated dep '" << dep.ToShortString() << "' is not present in expected";
}
}
SUCCEED();
}

void CheckResultContainsINDs(std::list<model::IND> const& actual, INDTestSet expected_subset) {
ASSERT_NE(actual.size(), expected_subset.size())
<< "count of generated dependencies must not not be equal to the subset size: got "
<< actual.size();

for (auto const& dep : actual) {
auto iter = expected_subset.find(ToINDTest(dep));

if (iter != expected_subset.end()) {
expected_subset.erase(iter);
}
}
ASSERT_EQ(expected_subset.size(), 0);
}

namespace {
// Since Faida is an approximate algorithm, its results may differ from all the other IND discovery
// algorithms, which are exact. Of course, on these small examples the results of both exact and
// approximate algos must be the same, but in the future the set of test datasets will grow and
// large tests may be added. Therefore the decision was made to move Faida tests to a separate
// class.
class FaidaINDAlgorithmTest : public ::testing::Test {
public:
struct Config {
int sample_size;
double hll_accuracy;
unsigned short num_threads;
};

protected:
static algos::StdParamsMap GetParamMap(CSVConfigs const& csv_configs, int sample_size,
double hll_accuracy, bool find_nary,
unsigned short num_threads) {
static std::unique_ptr<algos::INDAlgorithm> CreateFaidaInstance(CSVConfigs const& csv_configs,
Config const& config) {
using namespace config::names;
return {{kCsvConfigs, csv_configs},
{kFindNary, find_nary},
{kSampleSize, sample_size},
{kHllAccuracy, hll_accuracy},
{kThreads, num_threads}};
}

template <typename... Args>
static std::unique_ptr<algos::INDAlgorithm> CreateFaidaInstance(Args&&... args) {
return algos::CreateAndLoadAlgorithm<algos::Faida>(
GetParamMap(std::forward<Args>(args)...));
return algos::CreateAndLoadAlgorithm<algos::Mind>(algos::StdParamsMap{
{kCsvConfigs, csv_configs},
{kSampleSize, config.sample_size},
{kHllAccuracy, config.hll_accuracy},
{kThreads, config.num_threads},
});
}
};

TEST_F(FaidaINDAlgorithmTest, TestWide2) {
INDTestSet expected_inds{
{{0, {2}}, {0, {0}}}, {{0, {3}}, {0, {1}}}, {{0, {2, 3}}, {0, {0, 1}}}};

int sample_size = 500;
double hll_accuracy = 0.001;
bool find_nary = true;
unsigned short num_threads = 4;

auto algorithm = CreateFaidaInstance(CSVConfigs{kIndTestWide2}, sample_size, hll_accuracy,
find_nary, num_threads);
algorithm->Execute();
CheckINDsListsEquality(algorithm->INDList(), expected_inds);
}

TEST_F(FaidaINDAlgorithmTest, TestEmpty) {
int sample_size = 500;
double hll_accuracy = 0.001;
bool find_nary = true;
unsigned short num_threads = 4;

ASSERT_THROW(CreateFaidaInstance(CSVConfigs{kIndTestEmpty}, sample_size, hll_accuracy,
find_nary, num_threads),
std::runtime_error);
}

TEST_F(FaidaINDAlgorithmTest, TestEmptyInput) {
int sample_size = 500;
double hll_accuracy = 0.001;
bool find_nary = true;
unsigned short num_threads = 4;

ASSERT_THROW(
CreateFaidaInstance(CSVConfigs{}, sample_size, hll_accuracy, find_nary, num_threads),
config::ConfigurationError);
}

TEST_F(FaidaINDAlgorithmTest, TestPlanets) {
INDTestSet expected_inds{{{0, {0}}, {0, {1}}}, {{0, {1}}, {0, {0}}},
{{0, {2}}, {0, {3}}}, {{0, {3}}, {0, {2}}},
{{0, {1, 3}}, {0, {0, 2}}}, {{0, {0, 2}}, {0, {1, 3}}},
{{0, {0, 3}}, {0, {1, 2}}}, {{0, {1, 2}}, {0, {0, 3}}}};

int sample_size = 500;
double hll_accuracy = 0.001;
bool find_nary = true;
unsigned short num_threads = 4;
using FaidaTestConfig = FaidaINDAlgorithmTest::Config;

auto algorithm = CreateFaidaInstance(CSVConfigs{kIndTestPlanets}, sample_size, hll_accuracy,
find_nary, num_threads);
algorithm->Execute();
CheckINDsListsEquality(algorithm->INDList(), expected_inds);
}

TEST_F(FaidaINDAlgorithmTest, Test3ary) {
INDTestSet expected_inds{{{0, {3}}, {0, {0}}},
{{0, {4}}, {0, {1}}},
{{0, {5}}, {0, {2}}},
{{0, {2}}, {0, {5}}},
{{0, {3, 4}}, {0, {0, 1}}},
{{0, {4, 5}}, {0, {1, 2}}},
{{0, {3, 5}}, {0, {0, 2}}},
{{0, {3, 4, 5}}, {0, {0, 1, 2}}},
{{0, {3, 4, 5}}, {0, {0, 1, 2}}}};

int sample_size = 500;
double hll_accuracy = 0.001;
bool find_nary = true;
unsigned short num_threads = 4;
static FaidaTestConfig parallel_test_config{
.sample_size = 500,
.hll_accuracy = 0.001,
.num_threads = 4,
};
} // namespace

auto algorithm = CreateFaidaInstance(CSVConfigs{kIndTest3aryInds}, sample_size, hll_accuracy,
find_nary, num_threads);
algorithm->Execute();
CheckINDsListsEquality(algorithm->INDList(), expected_inds);
TEST_F(FaidaINDAlgorithmTest, EqualityTest) {
for (auto& [csv_configs, expected_inds] : kINDEqualityTestConfigs) {
CheckINDsListsEqualityTest(CreateFaidaInstance(csv_configs, parallel_test_config),
expected_inds);
}
}

TEST_F(FaidaINDAlgorithmTest, TestTwoTables) {
INDTestSet expected_inds_subset{{{0, {0, 1, 2, 3}}, {1, {0, 1, 3, 4}}},
{{1, {0, 1, 3, 4}}, {0, {0, 1, 2, 3}}}};
size_t constexpr expected_result_size = 47;

int sample_size = 500;
double hll_accuracy = 0.001;
bool find_nary = true;
unsigned short num_threads = 4;

auto algorithm = CreateFaidaInstance(CSVConfigs{kIndTestTableFirst, kIndTestTableSecond},
sample_size, hll_accuracy, find_nary, num_threads);
algorithm->Execute();
auto result = algorithm->INDList();

ASSERT_EQ(result.size(), expected_result_size);
CheckResultContainsINDs(result, expected_inds_subset);
CheckINDsResultContainsINDsTest(
CreateFaidaInstance({kIndTestTableFirst, kIndTestTableSecond}, parallel_test_config),
expected_inds_subset, 47);
}

} // namespace tests
Loading

0 comments on commit 6af5c4a

Please sign in to comment.