Skip to content

Commit

Permalink
curvebs: support 512 aligned IO
Browse files Browse the repository at this point in the history
Signed-off-by: Hanqing Wu <[email protected]>
  • Loading branch information
wu-hanqing committed Jul 26, 2022
1 parent b8cccd2 commit 89ca60c
Show file tree
Hide file tree
Showing 137 changed files with 2,077 additions and 1,386 deletions.
7 changes: 5 additions & 2 deletions conf/chunkserver.conf
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,13 @@ global.external_subnet=127.0.0.0/24
global.chunk_size=16777216
# chunk 元数据页大小,一般4KB
global.meta_page_size=4096
# chunk's block size, IO requests must align with it, supported value is |512| and |4096|
# it should consist with `block_size` in chunkfilepool.meta_path and `mds.volume.blockSize` in MDS's configurations
# for clone chunk and snapshot chunk, it's also the minimum granularity that each bit represents
# if set to |512|, we need 4096 bytes bitmap for each chunk, so meta_page_size should be 8192 or larger.
global.block_size=4096
# clone chunk允许的最长location长度
global.location_limit=3000
# minimum alignment for io request
global.min_io_alignment=512

#
# MDS settings
Expand Down
5 changes: 5 additions & 0 deletions conf/chunkserver.conf.example
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,11 @@ global.chunk_size=16777216
# chunk 元数据页大小,一般4KB
global.meta_page_size=4096
# clone chunk允许的最长location长度
# chunk's block size, IO requests must align with it, supported value is |512| and |4096|
# it should consist with `block_size` in chunkfilepool.meta_path and `mds.volume.blockSize` in MDS's configurations
# for clone chunk and snapshot chunk, it's also the minimum granularity that each bit represents
# if set to |512|, we need 4096 bytes bitmap for each chunk, so meta_page_size should be 8192 or larger.
global.block_size=4096
global.location_limit=3000

#
Expand Down
2 changes: 2 additions & 0 deletions conf/mds.conf
Original file line number Diff line number Diff line change
Expand Up @@ -195,6 +195,8 @@ mds.curvefs.defaultSegmentSize=1073741824
mds.curvefs.minFileLength=10737418240
# curvefs的默认最大文件大小,20TB = 20*1024*1024*1024*1024 = 21990232555520
mds.curvefs.maxFileLength=21990232555520
# smallest read/write unit for volume, support |512| and |4096|
mds.curvefs.blockSize=4096

#
# chunkseverclient config
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@ global.external_subnet={{ chunkserver_external_subnet }}
global.chunk_size={{ chunk_size }}
# chunk 元数据页大小,一般4KB
global.meta_page_size={{ chunkserver_meta_page_size }}
# chunk block size,一般4KB
global.block_size={{ chunkserver_block_size }}
# clone chunk允许的最长location长度
global.location_limit={{ chunkserver_location_limit }}

Expand Down Expand Up @@ -108,9 +110,9 @@ copyset.scan_rpc_timeout_ms={{ chunkserver_copyset_scan_rpc_timeout_ms }}
copyset.scan_rpc_retry_times={{ chunkserver_copyset_scan_rpc_retry_times }}
# the follower send scanmap to leader rpc retry interval
copyset.scan_rpc_retry_interval_us={{ chunkserver_copyset_scan_rpc_retry_interval_us }}
copyset.copyset_enable_odsync_when_open_chunkfile={{ chunkserver_copyset_enable_odsync_when_open_chunkfile }}
copyset.copyset_synctimer_interval_ms={{ chunkserver_copyset_synctimer_interval_ms }}
copyset.copyset_check_syncing_interval_ms={{ chunkserver_copyset_check_syncing_interval_ms }}
copyset.enable_odsync_when_open_chunkfile={{ chunkserver_copyset_enable_odsync_when_open_chunkfile }}
copyset.synctimer_interval_ms={{ chunkserver_copyset_synctimer_interval_ms }}
copyset.check_syncing_interval_ms={{ chunkserver_copyset_check_syncing_interval_ms }}

#
# Clone settings
Expand Down
2 changes: 2 additions & 0 deletions curve-ansible/roles/generate_config/templates/mds.conf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,8 @@ mds.curvefs.defaultSegmentSize={{ segment_size }}
mds.curvefs.minFileLength={{ min_file_length }}
# curvefs的默认最大文件大小,20TB = 20*1024*1024*1024*1024 = 21990232555520
mds.curvefs.maxFileLength={{ max_file_length }}
# smallest read/write unit for volume, support |512| and |4096|
mds.curvefs.blockSize={{ chunkserver_block_size }}

#
# chunkseverclient config
Expand Down
1 change: 1 addition & 0 deletions curve-ansible/roles/generate_config/templates/s3.conf.j2
Original file line number Diff line number Diff line change
Expand Up @@ -25,3 +25,4 @@ s3.throttle.iopsWriteLimit={{ s3_throttle_iopsWriteLimit }}
s3.throttle.bpsTotalMB= {{ s3_throttle_bpsTotalLimit }}
s3.throttle.bpsReadMB= {{ s3_throttle_bpsReadLimit }}
s3.throttle.bpsWriteMB= {{ s3_throttle_bpsWriteLimit }}
s3.useVirtualAddressing=False
Original file line number Diff line number Diff line change
Expand Up @@ -167,7 +167,9 @@ do
-filePoolDir=$dataDir/chunkserver$i/chunkfilepool \
-filePoolMetaPath=$dataDir/chunkserver$i/chunkfilepool.meta \
-fileSize={{ chunk_size }} \
-fileSystemPath=$dataDir/chunkserver$i/chunkfilepool &
-fileSystemPath=$dataDir/chunkserver$i/chunkfilepool \
-metaPageSize={{ chunkserver_meta_page_size }} \
-blockSize={{ chunkserver_block_size }} &
done
wait
}
Expand All @@ -185,7 +187,9 @@ function deploy_one_walfile_pool {
-filePoolDir=$dataDir/chunkserver$1/walfilepool \
-filePoolMetaPath=$dataDir/chunkserver$1/walfilepool.meta \
-fileSize={{ chunkserver_walfilepool_segment_size }} \
-fileSystemPath=$dataDir/chunkserver$1/walfilepool &
-fileSystemPath=$dataDir/chunkserver$1/walfilepool \
-metaPageSize={{ chunkserver_meta_page_size }} \
-blockSize={{ chunkserver_block_size }} &
}


Expand Down Expand Up @@ -286,7 +290,9 @@ function deploy_one {
-filePoolDir=$dirname/chunkfilepool \
-filePoolMetaPath=$dirname/chunkfilepool.meta \
-fileSize={{ chunk_size }} \
-fileSystemPath=$dirname/chunkfilepool &
-fileSystemPath=$dirname/chunkfilepool \
-metaPageSize={{ chunkserver_meta_page_size }} \
-blockSize={{ chunkserver_block_size }} &
wait
# release disk reserved space
sudo tune2fs -m 0 $diskname
Expand Down
2 changes: 2 additions & 0 deletions curve-ansible/server.ini
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,8 @@ chunk_size=16777216
chunkserver_walfilepool_segment_size=8388608
retain_pool=False
walfilepool_use_chunk_file_pool=True
chunkserver_meta_page_size=4096
chunkserver_block_size=4096

[snapshotclone_nginx:vars]
snapshot_nginx_package_version="0.0.6.1.1+7af4d6a4"
Expand Down
4 changes: 2 additions & 2 deletions curvefs/src/client/fuse_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
#include "curvefs/src/client/rpcclient/mds_client.h"
#include "curvefs/src/client/rpcclient/metaserver_client.h"
#include "curvefs/src/client/s3/client_s3_adaptor.h"
#include "curvefs/src/common/fast_align.h"
#include "src/common/fast_align.h"
#include "curvefs/src/client/metric/client_metric.h"
#include "src/common/concurrent/concurrent.h"
#include "curvefs/src/common/define.h"
Expand Down Expand Up @@ -69,7 +69,7 @@ using rpcclient::MdsClientImpl;
using rpcclient::MetaServerClient;
using rpcclient::MetaServerClientImpl;

using curvefs::common::is_aligned;
using curve::common::is_aligned;

const uint32_t kMaxHostNameLength = 255u;

Expand Down
1 change: 1 addition & 0 deletions deploy/local/chunkserver/conf/chunkserver.conf.0
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ global.external_ip=127.0.0.1
global.external_subnet=127.0.0.0/24
global.chunk_size=16777216
global.meta_page_size=4096
global.block_size=4096
global.location_limit=3000

#
Expand Down
1 change: 1 addition & 0 deletions deploy/local/chunkserver/conf/chunkserver.conf.1
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ global.external_ip=127.0.0.1
global.external_subnet=127.0.0.0/24
global.chunk_size=16777216
global.meta_page_size=4096
global.block_size=4096
global.location_limit=3000

#
Expand Down
1 change: 1 addition & 0 deletions deploy/local/chunkserver/conf/chunkserver.conf.2
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ global.external_ip=127.0.0.1
global.external_subnet=127.0.0.0/24
global.chunk_size=16777216
global.meta_page_size=4096
global.block_size=4096
global.location_limit=3000

#
Expand Down
3 changes: 0 additions & 3 deletions include/chunkserver/chunkserver_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -123,9 +123,6 @@ inline std::string ToGroupIdString(const LogicPoolID &logicPoolId,
}
#define ToGroupIdStr ToGroupIdString

// TODO(wudmeiao): 是否需要考虑可配置
const uint32_t kOpRequestAlignSize = 4096;

} // namespace chunkserver
} // namespace curve

Expand Down
3 changes: 3 additions & 0 deletions include/client/libcurve.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,6 +56,7 @@ typedef struct FileStatInfo {
int fileStatus;
uint64_t stripeUnit;
uint64_t stripeCount;
uint32_t blocksize;
} FileStatInfo_t;

// 存储用户信息
Expand Down Expand Up @@ -436,6 +437,8 @@ class CurveClient {
*/
virtual int64_t StatFile(const std::string& filename);

virtual int64_t StatFile(const std::string& filename,
FileStatInfo* fileStat);
/**
* 异步读
* @param fd 文件fd
Expand Down
6 changes: 5 additions & 1 deletion nbd/src/ImageInstance.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -77,10 +77,14 @@ void ImageInstance::Flush(NebdClientAioContext* context) {
nebd_lib_flush(fd_, context);
}

int64_t ImageInstance::GetImageSize() {
int64_t ImageInstance::GetImageSize() const {
return nebd_lib_filesize(fd_);
}

int64_t ImageInstance::GetBlockSize() const {
return nebd_lib_blocksize(fd_);
}

ImageInstance::~ImageInstance() {
if (fd_ != -1) {
Close();
Expand Down
9 changes: 8 additions & 1 deletion nbd/src/ImageInstance.h
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,14 @@ class ImageInstance {
* @return 获取成功返回文件大小(正值)
* 获取失败返回错误码(负值)
*/
virtual int64_t GetImageSize();
virtual int64_t GetImageSize() const;

/**
* @brief Get image's block size
* @return return block size when success,
* otherwise return negative error code
*/
virtual int64_t GetBlockSize() const;

private:
// nebd返回的文件描述符
Expand Down
52 changes: 38 additions & 14 deletions nbd/src/NBDController.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -42,12 +42,16 @@
namespace curve {
namespace nbd {

int IOController::InitDevAttr(NBDConfig* config, uint64_t size,
int g_nbd_index;

int IOController::InitDevAttr(NBDConfig* config,
uint64_t size,
uint32_t blocksize,
uint64_t flags) {
int ret = -1;

do {
ret = ioctl(nbdFd_, NBD_SET_BLKSIZE, CURVE_NBD_BLKSIZE);
ret = ioctl(nbdFd_, NBD_SET_BLKSIZE, blocksize);
if (ret < 0) {
break;
}
Expand Down Expand Up @@ -133,11 +137,15 @@ int IOController::MapOnNbdDeviceByDevPath(int sockfd,

nbdFd_ = devfd;
nbdIndex_ = index;
g_nbd_index = nbdIndex_;
return 0;
}

int IOController::SetUp(NBDConfig* config, int sockfd,
uint64_t size, uint64_t flags) {
int IOController::SetUp(NBDConfig* config,
int sockfd,
uint64_t size,
uint32_t blocksize,
uint64_t flags) {
int ret = -1;

if (config->devpath.empty()) {
Expand All @@ -150,10 +158,20 @@ int IOController::SetUp(NBDConfig* config, int sockfd,
return -1;
}

ret = InitDevAttr(config, size, flags);
if (ret == 0) {
do {
ret = InitDevAttr(config, size, blocksize, flags);
if (ret < 0) {
break;
}

ret = check_device_size(nbdIndex_, size);
}
if (ret < 0) {
break;
}

ret = check_block_size(nbdIndex_, blocksize);
} while (0);

if (ret < 0) {
dout << "curve-nbd: failed to map, status: "
<< cpp_strerror(ret) << std::endl;
Expand Down Expand Up @@ -240,16 +258,19 @@ void NetLinkController::Uninit() {
nlId_ = -1;
}

int NetLinkController::SetUp(NBDConfig* config, int sockfd,
uint64_t size, uint64_t flags) {
int NetLinkController::SetUp(NBDConfig* config,
int sockfd,
uint64_t size,
uint32_t blocksize,
uint64_t flags) {
int ret = Init();
if (ret < 0) {
dout << "curve-nbd: Netlink interface not supported."
<< " Using ioctl interface." << std::endl;
return ret;
}

ret = ConnectInternal(config, sockfd, size, flags);
ret = ConnectInternal(config, sockfd, size, blocksize, flags);
Uninit();
if (ret < 0) {
return ret;
Expand All @@ -259,7 +280,7 @@ int NetLinkController::SetUp(NBDConfig* config, int sockfd,
if (index < 0) {
return index;
}
ret = check_block_size(index, CURVE_NBD_BLKSIZE);
ret = check_block_size(index, blocksize);
if (ret < 0) {
return ret;
}
Expand Down Expand Up @@ -360,8 +381,11 @@ static int netlink_connect_cb(struct nl_msg *msg, void *arg) {
return NL_OK;
}

int NetLinkController::ConnectInternal(NBDConfig* config, int sockfd,
uint64_t size, uint64_t flags) {
int NetLinkController::ConnectInternal(NBDConfig* config,
int sockfd,
uint64_t size,
uint32_t blocksize,
uint64_t flags) {
struct nlattr *sock_attr = nullptr;
struct nlattr *sock_opt = nullptr;
struct nl_msg *msg = nullptr;
Expand Down Expand Up @@ -393,7 +417,7 @@ int NetLinkController::ConnectInternal(NBDConfig* config, int sockfd,
NLA_PUT_U64(msg, NBD_ATTR_TIMEOUT, config->timeout);
}
NLA_PUT_U64(msg, NBD_ATTR_SIZE_BYTES, size);
NLA_PUT_U64(msg, NBD_ATTR_BLOCK_SIZE_BYTES, CURVE_NBD_BLKSIZE);
NLA_PUT_U64(msg, NBD_ATTR_BLOCK_SIZE_BYTES, blocksize);
NLA_PUT_U64(msg, NBD_ATTR_SERVER_FLAGS, flags);

sock_attr = nla_nest_start(msg, NBD_ATTR_SOCKETS);
Expand Down
Loading

0 comments on commit 89ca60c

Please sign in to comment.