Skip to content

Commit

Permalink
Add a disjoint sets (union-find) utility (#6797)
Browse files Browse the repository at this point in the history
This will be used in an upcoming type optimization pass and may be
generally useful.
  • Loading branch information
tlively authored Aug 1, 2024
1 parent 705c28d commit 2a7c093
Show file tree
Hide file tree
Showing 3 changed files with 194 additions and 0 deletions.
87 changes: 87 additions & 0 deletions src/support/disjoint_sets.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,87 @@
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#ifndef wasm_support_disjoint_sets_h
#define wasm_support_disjoint_sets_h

#include <cassert>
#include <cstddef>
#include <vector>

namespace wasm {

// A disjoint set forest (a.k.a. union-find) implementation. See
// https://en.wikipedia.org/wiki/Disjoint-set_data_structure.
struct DisjointSets {
struct ElemInfo {
// The index of the parent element, or the index of the element itself if it
// has no parent.
size_t parent;
// An upper bound on the height of the tree rooted at this element.
size_t rank;
};
std::vector<ElemInfo> info;

// Add an element and return its index.
size_t addSet() {
size_t ret = info.size();
info.push_back({ret, 0});
return ret;
}

// Get the representative element of the set to which `elem` belongs.
size_t getRoot(size_t elem) {
assert(elem < info.size());
size_t root = elem;
// Follow parent pointers up to the root.
for (; info[root].parent != root; root = info[root].parent) {
}
// Compress the path to make subsequent getRoots of this set faster.
while (elem != root) {
size_t parent = info[elem].parent;
info[elem].parent = root;
elem = parent;
}
return root;
}

// Join the sets to which the elements belong and return the representative
// element of the union.
size_t getUnion(size_t elem1, size_t elem2) {
assert(elem1 < info.size() && elem2 < info.size());
size_t root1 = getRoot(elem1);
size_t root2 = getRoot(elem2);
if (root1 == root2) {
// Already in the same set.
return root1;
}
// Canonicalize so that root1 has the greater rank.
if (info[root1].rank < info[root2].rank) {
std::swap(root1, root2);
}
// Merge the trees, smaller into larger.
info[root2].parent = root1;
// If the ranks were equal, the new root has a larger rank.
if (info[root1].rank == info[root2].rank) {
++info[root1].rank;
}
return root1;
}
};

} // namespace wasm

#endif // wasm_support_disjoint_sets_h
1 change: 1 addition & 0 deletions test/gtest/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ include_directories(../../src/wasm)
set(unittest_SOURCES
cfg.cpp
dfa_minimization.cpp
disjoint_sets.cpp
json.cpp
lattices.cpp
possible-contents.cpp
Expand Down
106 changes: 106 additions & 0 deletions test/gtest/disjoint_sets.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
/*
* Copyright 2024 WebAssembly Community Group participants
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include "support/disjoint_sets.h"
#include "gtest/gtest.h"

using namespace wasm;

TEST(DisjointSetsTest, NewSets) {
DisjointSets sets;
auto elem1 = sets.addSet();
auto elem2 = sets.addSet();
EXPECT_NE(elem1, elem2);

auto root1 = sets.getRoot(elem1);
EXPECT_EQ(elem1, root1);

auto root2 = sets.getRoot(elem2);
EXPECT_EQ(elem2, root2);
}

TEST(DisjointSetsTest, Union) {
DisjointSets sets;
auto elem1 = sets.addSet();
auto elem2 = sets.addSet();
auto root = sets.getUnion(elem1, elem2);
EXPECT_TRUE(root == elem1 || root == elem2);

auto root1 = sets.getRoot(elem1);
auto root2 = sets.getRoot(elem2);
EXPECT_EQ(root1, root);
EXPECT_EQ(root2, root);
}

TEST(DisjointSetsTest, TwoUnions) {
DisjointSets sets;
auto elem1 = sets.addSet();
auto elem2 = sets.addSet();
auto elem3 = sets.addSet();
auto elem4 = sets.addSet();

auto rootA = sets.getUnion(elem1, elem3);
auto rootB = sets.getUnion(elem2, elem4);
EXPECT_EQ(sets.getRoot(elem1), rootA);
EXPECT_EQ(sets.getRoot(elem2), rootB);
EXPECT_EQ(sets.getRoot(elem3), rootA);
EXPECT_EQ(sets.getRoot(elem4), rootB);
EXPECT_NE(rootA, rootB);
}

TEST(DisjointSetsTest, UnionList) {
constexpr size_t count = 16;
DisjointSets sets;
size_t elems[count];
for (size_t i = 0; i < count; ++i) {
elems[i] = sets.addSet();
}

for (size_t i = 1; i < count; ++i) {
sets.getUnion(elems[i], elems[i - 1]);
}

auto root = sets.getRoot(elems[0]);
for (size_t rep = 0; rep < 2; ++rep) {
for (size_t i = 0; i < count; ++i) {
auto currRoot = sets.getRoot(elems[i]);
EXPECT_EQ(currRoot, root);
}
}
}

TEST(DisjointSetsTest, UnionTree) {
constexpr size_t count = 16;
DisjointSets sets;
size_t elems[count];
for (size_t i = 0; i < count; ++i) {
elems[i] = sets.addSet();
}

for (size_t stride = 2; stride <= count; stride *= 2) {
for (size_t i = 0; i < count; i += stride) {
sets.getUnion(elems[i], elems[i + stride / 2]);
}
}

auto root = sets.getRoot(elems[0]);
for (size_t rep = 0; rep < 2; ++rep) {
for (size_t i = 0; i < count; ++i) {
auto currRoot = sets.getRoot(elems[i]);
EXPECT_EQ(currRoot, root);
}
}
}

0 comments on commit 2a7c093

Please sign in to comment.