Skip to content
This repository has been archived by the owner on Aug 1, 2024. It is now read-only.

Commit

Permalink
rgw/sfs: honor retry_raced_bucket_write mechanism
Browse files Browse the repository at this point in the history
Updating bucket's metadata concurrently by two or more threads is allowed in radosgw.
There is a retry mechanism: retry_raced_bucket_write(), that expects the bucket references to fetch the latest data from the persistent store.
rgw/sfs driver didn't implement try_refresh_info() in its bucket class definition; this could cause two references to the same bucket to potentially lead to partial metadata updates.

Fixes: https://github.com/aquarist-labs/s3gw/issues/637
Signed-off-by: Giuseppe Baccini <[email protected]>
  • Loading branch information
Giuseppe Baccini committed Nov 2, 2023
1 parent 4976c75 commit 1846c13
Show file tree
Hide file tree
Showing 3 changed files with 266 additions and 2 deletions.
20 changes: 18 additions & 2 deletions src/rgw/driver/sfs/bucket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,11 @@ namespace rgw::sal {

SFSBucket::SFSBucket(SFStore* _store, sfs::BucketRef _bucket)
: StoreBucket(_bucket->get_info()), store(_store), bucket(_bucket) {
update_views();
}

void SFSBucket::update_views() {
get_info() = bucket->get_info();
set_attrs(bucket->get_attrs());

auto it = attrs.find(RGW_ATTR_ACL);
Expand Down Expand Up @@ -534,11 +539,22 @@ int SFSBucket::abort_multiparts(
return sfs::SFSMultipartUploadV2::abort_multiparts(dpp, store, this);
}

/**
* @brief Refresh this bucket object with the state obtained from the store.
Indeed it can happen that the state of this bucket is obsolete due to
concurrent threads updating metadata using their own SFSBucket instance.
*/
int SFSBucket::try_refresh_info(
const DoutPrefixProvider* dpp, ceph::real_time* /*pmtime*/
) {
ldpp_dout(dpp, 10) << __func__ << ": TODO" << dendl;
return -ENOTSUP;
auto bref = store->get_bucket_ref(get_name());
if (!bref) {
lsfs_dout(dpp, 0) << fmt::format("no such bucket! {}", get_name()) << dendl;
return -ERR_NO_SUCH_BUCKET;
}
bucket = bref;
update_views();
return 0;
}

int SFSBucket::read_usage(
Expand Down
14 changes: 14 additions & 0 deletions src/rgw/driver/sfs/bucket.h
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,20 @@ class SFSBucket : public StoreBucket {
SFSBucket(SFStore* _store, sfs::BucketRef _bucket);
SFSBucket& operator=(const SFSBucket&) = delete;

/**
* This method updates the in-memory views of this object fetching
* from this.bucket.
* This method should be called every time this.bucket is updated
* from the backing storage.
*
* Views updated:
*
* - get_info()
* - get_attrs()
* - acls
*/
void update_views();

virtual std::unique_ptr<Bucket> clone() override {
return std::unique_ptr<Bucket>(new SFSBucket{*this});
}
Expand Down
234 changes: 234 additions & 0 deletions src/test/rgw/sfs/test_rgw_sfs_sfs_bucket.cc
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,49 @@
#include "rgw/driver/sfs/sqlite/sqlite_users.h"
#include "rgw/rgw_sal_sfs.h"

/*
These structs are in-memory mockable versions of actual structs/classes
that have a private rep.
Real types normally populate their rep via encode/decode methods.
For the sake of convenience, we define binary equivalent types with
public editable members.
*/
namespace mockable {
struct DefaultRetention {
std::string mode;
int days;
int years;

bool operator==(const DefaultRetention& other) const {
return this->mode == other.mode && this->days == other.days &&
this->years == other.years;
}
};

struct ObjectLockRule {
mockable::DefaultRetention defaultRetention;

bool operator==(const ObjectLockRule& other) const {
return this->defaultRetention == other.defaultRetention;
}
};

struct RGWObjectLock {
bool enabled;
bool rule_exist;
mockable::ObjectLockRule rule;

bool operator==(const RGWObjectLock& other) const {
return this->enabled == other.enabled &&
this->rule_exist == other.rule_exist && this->rule == other.rule;
}
};

mockable::RGWObjectLock& actual2mock(::RGWObjectLock& actual) {
return (mockable::RGWObjectLock&)actual;
}
} // namespace mockable

/*
HINT
s3gw.db will create here: /tmp/rgw_sfs_tests
Expand Down Expand Up @@ -1438,6 +1481,7 @@ TEST_F(TestSFSBucket, ListNamespaceMultipartsBasics) {
.path_uuid = uuid,
.meta_str = "metastr",
.mtime = now};

int id = multipart.insert(mpop);
ASSERT_GE(id, 0);

Expand All @@ -1453,3 +1497,193 @@ TEST_F(TestSFSBucket, ListNamespaceMultipartsBasics) {
EXPECT_EQ(results.objs[0].key.name, std::to_string(id));
EXPECT_EQ(results.objs[0].meta.mtime, now);
}

TEST_F(TestSFSBucket, RacedBucketMetadataWriteOperations) {
auto ceph_context = std::make_shared<CephContext>(CEPH_ENTITY_TYPE_CLIENT);
ceph_context->_conf.set_val("rgw_sfs_data_path", getTestDir());
ceph_context->_log->start();
auto store = new rgw::sal::SFStore(ceph_context.get(), getTestDir());

NoDoutPrefix ndp(ceph_context.get(), 1);
RGWEnv env;
env.init(ceph_context.get());
createUser("usr_id", store->db_conn);

rgw_user arg_user("", "usr_id", "");
auto user = store->get_user(arg_user);

rgw_bucket arg_bucket("t_id", "b_name", "");
rgw_placement_rule arg_pl_rule("default", "STANDARD");
std::string arg_swift_ver_location;
RGWQuotaInfo arg_quota_info;
RGWAccessControlPolicy arg_aclp_d = get_aclp_default();
rgw::sal::Attrs arg_attrs;

RGWBucketInfo arg_info = get_binfo();
obj_version arg_objv;
bool existed = false;
req_info arg_req_info(ceph_context.get(), &env);

std::unique_ptr<rgw::sal::Bucket> bucket_from_create;

EXPECT_EQ(
user->create_bucket(
&ndp, //dpp
arg_bucket, //b
"zg1", //zonegroup_id
arg_pl_rule, //placement_rule
arg_swift_ver_location, //swift_ver_location
&arg_quota_info, //pquota_info
arg_aclp_d, //policy
arg_attrs, //attrs
arg_info, //info
arg_objv, //ep_objv
false, //exclusive
false, //obj_lock_enabled
&existed, //existed
arg_req_info, //req_info
&bucket_from_create, //bucket
null_yield //optional_yield
),
0
);

std::unique_ptr<rgw::sal::Bucket> bucket_from_store_1;

EXPECT_EQ(
store->get_bucket(
&ndp, user.get(), arg_info.bucket, &bucket_from_store_1, null_yield
),
0
);

std::unique_ptr<rgw::sal::Bucket> bucket_from_store_2;

EXPECT_EQ(
store->get_bucket(
&ndp, user.get(), arg_info.bucket, &bucket_from_store_2, null_yield
),
0
);

EXPECT_EQ(*bucket_from_store_1, *bucket_from_store_2);

// merge_and_store_attrs

rgw::sal::Attrs new_attrs;
RGWAccessControlPolicy arg_aclp = get_aclp_1();
{
bufferlist acl_bl;
arg_aclp.encode(acl_bl);
new_attrs[RGW_ATTR_ACL] = acl_bl;
}

EXPECT_EQ(
bucket_from_store_1->merge_and_store_attrs(&ndp, new_attrs, null_yield), 0
);

// assert bucket_from_store_1 contains the RGW_ATTR_ACL attribute
auto acl_bl_1 = bucket_from_store_1->get_attrs().find(RGW_ATTR_ACL);
EXPECT_NE(bucket_from_store_1->get_attrs().end(), acl_bl_1);

// assert bucket_from_store_2 does not contain the RGW_ATTR_ACL attribute
auto acl_bl_2 = bucket_from_store_2->get_attrs().find(RGW_ATTR_ACL);
EXPECT_EQ(bucket_from_store_2->get_attrs().end(), acl_bl_2);

// put_info

RGWObjectLock obj_lock;
mockable::RGWObjectLock& ol = mockable::actual2mock(obj_lock);
ol.enabled = true;
ol.rule.defaultRetention.years = 12;
ol.rule.defaultRetention.days = 31;
ol.rule.defaultRetention.mode = "GOVERNANCE";
ol.rule_exist = true;

bucket_from_store_2->get_info().obj_lock = obj_lock;
EXPECT_EQ(bucket_from_store_2->put_info(&ndp, false, real_time()), 0);

auto& ol1 = mockable::actual2mock(bucket_from_store_1->get_info().obj_lock);
auto& ol2 = mockable::actual2mock(bucket_from_store_2->get_info().obj_lock);

// obj lock structure in the respective memory cannot be equal at this point for the
// two bucket_from_store_1 and bucket_from_store_2 references; this simulates two threads updating
// the metadata over the same bucket using their own bucket reference (as it happens actually when 2
// concurrent calls are issued from one or more S3 clients).
EXPECT_NE(ol1, ol2);

// Getting now a third reference from the backing store should fetch an image equal to
// bucket_from_store_2 since that reference is the latest one that did a put_info().
// merge_and_store_attrs() done with bucket_from_store_1 should now be lost due to
// bucket_from_store_2.put_info().
std::unique_ptr<rgw::sal::Bucket> bucket_from_store_3;
EXPECT_EQ(
store->get_bucket(
&ndp, user.get(), arg_info.bucket, &bucket_from_store_3, null_yield
),
0
);

// ol2 and ol3 should be the same.
auto& ol3 = mockable::actual2mock(bucket_from_store_3->get_info().obj_lock);
EXPECT_EQ(ol2, ol3);

// We expect to have lost RGW_ATTR_ACL attribute in the backing store.
auto acl_bl_3 = bucket_from_store_3->get_attrs().find(RGW_ATTR_ACL);
EXPECT_EQ(bucket_from_store_3->get_attrs().end(), acl_bl_3);

// Now we repeat the updates interposing the try_refresh_info() on bucket_from_store_2.
// try_refresh_info() refreshes bucket_from_store_2's memory with the state obtained
// from the store.
EXPECT_EQ(
bucket_from_store_1->merge_and_store_attrs(&ndp, new_attrs, null_yield), 0
);
EXPECT_EQ(bucket_from_store_2->try_refresh_info(&ndp, nullptr), 0);
EXPECT_EQ(bucket_from_store_2->put_info(&ndp, false, real_time()), 0);

// let's refetch bucket_from_store_3 from store.
EXPECT_EQ(
store->get_bucket(
&ndp, user.get(), arg_info.bucket, &bucket_from_store_3, null_yield
),
0
);

// Now all the views over bucket_from_store_2, bucket_from_store_2 and
// bucket_from_store_3 should be the same, given that the
// underlying sfs::BucketRef are (hopefully) the same.

// get_info() view
ol1 = mockable::actual2mock(bucket_from_store_1->get_info().obj_lock);
ol2 = mockable::actual2mock(bucket_from_store_2->get_info().obj_lock);
ol3 = mockable::actual2mock(bucket_from_store_3->get_info().obj_lock);
EXPECT_EQ(ol1, ol2);
EXPECT_EQ(ol2, ol3);

// get_attrs() view and acls views
acl_bl_1 = bucket_from_store_1->get_attrs().find(RGW_ATTR_ACL);
EXPECT_NE(bucket_from_store_1->get_attrs().end(), acl_bl_1);
acl_bl_2 = bucket_from_store_2->get_attrs().find(RGW_ATTR_ACL);
EXPECT_NE(bucket_from_store_2->get_attrs().end(), acl_bl_2);
acl_bl_3 = bucket_from_store_3->get_attrs().find(RGW_ATTR_ACL);
EXPECT_NE(bucket_from_store_3->get_attrs().end(), acl_bl_3);

{
RGWAccessControlPolicy aclp;
auto ci_lval = acl_bl_1->second.cbegin();
aclp.decode(ci_lval);
EXPECT_EQ(aclp, arg_aclp);
}
{
RGWAccessControlPolicy aclp;
auto ci_lval = acl_bl_2->second.cbegin();
aclp.decode(ci_lval);
EXPECT_EQ(aclp, arg_aclp);
}
{
RGWAccessControlPolicy aclp;
auto ci_lval = acl_bl_3->second.cbegin();
aclp.decode(ci_lval);
EXPECT_EQ(aclp, arg_aclp);
}
}

0 comments on commit 1846c13

Please sign in to comment.