From 3be046c047ca12231e4a2d18f4df650566fef4e7 Mon Sep 17 00:00:00 2001 From: mohit84 Date: Fri, 16 Sep 2022 11:33:19 +0530 Subject: [PATCH] dht: Implement seek fop at dht level (#3811) Before kernel minor version (.24) fuse does not wind a seek fop but after that fuse winds a seek fop so implement the fop at dht level. > Fixes: #3373 > Change-Id: Ie9ef2f941099157996ab353fc4dc208a28fa8fc6 > Signed-off-by: Mohit Agrawal moagrawa@redhat.com > (Reviewed on upstream link https://github.com/gluster/glusterfs/pull/3792) Fixes: #3373 Change-Id: Ie9ef2f941099157996ab353fc4dc208a28fa8fc6 Signed-off-by: Mohit Agrawal moagrawa@redhat.com Signed-off-by: Mohit Agrawal moagrawa@redhat.com --- tests/basic/distribute/dht_seek_test.t | 43 ++++++++ xlators/cluster/dht/src/dht-common.h | 8 ++ xlators/cluster/dht/src/dht-helper.c | 8 ++ xlators/cluster/dht/src/dht-inode-read.c | 123 +++++++++++++++++++++++ xlators/cluster/dht/src/dht.c | 1 + 5 files changed, 183 insertions(+) create mode 100644 tests/basic/distribute/dht_seek_test.t diff --git a/tests/basic/distribute/dht_seek_test.t b/tests/basic/distribute/dht_seek_test.t new file mode 100644 index 0000000000..cbc892281e --- /dev/null +++ b/tests/basic/distribute/dht_seek_test.t @@ -0,0 +1,43 @@ +#!/bin/bash + +. $(dirname $0)/../../include.rc +. $(dirname $0)/../../volume.rc +. $(dirname $0)/../../dht.rc + +TESTS_EXPECTED_IN_LOOP=57 +# Initialize +#------------------------------------------------------------ +cleanup; + +# Start glusterd +TEST glusterd; +TEST pidof glusterd; +TEST $CLI volume info; + +# Create a volume +TEST $CLI volume create $V0 $H0:$B0/${V0}{1,2,3}; + +# Verify volume creation +EXPECT "$V0" volinfo_field $V0 'Volume Name'; +EXPECT 'Created' volinfo_field $V0 'Status'; + +# Start volume and verify successful start +TEST $CLI volume start $V0; +EXPECT 'Started' volinfo_field $V0 'Status'; +TEST glusterfs --volfile-id=$V0 --volfile-server=$H0 --entry-timeout=0 $M0; + +for i in {1..20}; do + TEST dd if=/dev/urandom of=${M0}/file.${i} bs=1k count=1 seek=128 +done + +TEST mkdir $M0/dst + +for i in {1..20}; do + TEST cp --sparse=always ${M0}/file.${i} ${M0}/dst +done + +for i in {1..20}; do + TEST cmp ${M0}/file.${i} ${M0}/dst/file.${i} +done + +cleanup; diff --git a/xlators/cluster/dht/src/dht-common.h b/xlators/cluster/dht/src/dht-common.h index a402ad7e7f..553cff5b6d 100644 --- a/xlators/cluster/dht/src/dht-common.h +++ b/xlators/cluster/dht/src/dht-common.h @@ -974,6 +974,10 @@ int32_t dht_open(call_frame_t *frame, xlator_t *this, loc_t *loc, int32_t flags, fd_t *fd, dict_t *xdata); +int32_t +dht_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata); + int32_t dht_readv(call_frame_t *frame, xlator_t *this, fd_t *fd, size_t size, off_t offset, uint32_t flags, dict_t *xdata); @@ -1382,4 +1386,8 @@ dht_dir_layout_error_check(xlator_t *this, inode_t *inode); int dht_inode_ctx_mdsvol_set(inode_t *inode, xlator_t *this, xlator_t *mds_subvol); + +int +dht_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, off_t offset, dict_t *xdata); #endif /* _DHT_H */ diff --git a/xlators/cluster/dht/src/dht-helper.c b/xlators/cluster/dht/src/dht-helper.c index d42d39a338..81bfdc8520 100644 --- a/xlators/cluster/dht/src/dht-helper.c +++ b/xlators/cluster/dht/src/dht-helper.c @@ -382,6 +382,14 @@ dht_check_and_open_fd_on_subvol_complete(int ret, call_frame_t *frame, local->key, local->fd, local->rebalance.lock_cmd, &local->rebalance.flock, local->xattr_req); break; + + case GF_FOP_SEEK: + STACK_WIND_COOKIE(frame, dht_seek_cbk, subvol, subvol, + subvol->fops->seek, local->fd, + local->rebalance.offset, local->rebalance.flags, + local->xattr_req); + break; + default: gf_smsg(this->name, GF_LOG_ERROR, 0, DHT_MSG_UNKNOWN_FOP, "fd=%p", fd, "gfid=%s", uuid_utoa(fd->inode->gfid), "name=%s", diff --git a/xlators/cluster/dht/src/dht-inode-read.c b/xlators/cluster/dht/src/dht-inode-read.c index dbb8070b0d..ac421d8260 100644 --- a/xlators/cluster/dht/src/dht-inode-read.c +++ b/xlators/cluster/dht/src/dht-inode-read.c @@ -1656,3 +1656,126 @@ err: return 0; } + +static int +dht_seek2(xlator_t *this, xlator_t *subvol, call_frame_t *frame, int ret) +{ + dht_local_t *local = NULL; + int op_errno = EINVAL; + off_t offset = 0; + + if (!frame) + goto out; + + local = frame->local; + op_errno = local->op_errno; + offset = local->rebalance.offset; + + if (we_are_not_migrating(ret)) { + /* This DHT layer is not migrating the file */ + DHT_STACK_UNWIND(seek, frame, -1, local->op_errno, 0, NULL); + return 0; + } + + if (subvol == NULL) + goto out; + + local->call_cnt = 2; + STACK_WIND_COOKIE(frame, dht_seek_cbk, subvol, subvol, subvol->fops->seek, + local->fd, offset, local->rebalance.flags, + local->xattr_req); + return 0; + +out: + DHT_STACK_UNWIND(seek, frame, -1, op_errno, 0, NULL); + return 0; +} + +int +dht_seek_cbk(call_frame_t *frame, void *cookie, xlator_t *this, int op_ret, + int op_errno, off_t offset, dict_t *xdata) +{ + dht_local_t *local = NULL; + xlator_t *prev = NULL; + int ret = 0; + + local = frame->local; + prev = cookie; + + /* lseek fails with EBADF if dht has not yet opened the fd + * on the cached subvol. This could happen if the file was migrated + * and a lookup updated the cached subvol in the inode ctx. + * We only check once as this could be a valid bad fd error. + */ + + if (dht_check_remote_fd_failed_error(local, op_ret, op_errno)) { + ret = dht_check_and_open_fd_on_subvol(this, frame); + if (ret) + goto out; + return 0; + } + + local->op_errno = op_errno; + if ((op_ret == -1) && !dht_inode_missing(op_errno)) { + gf_msg_debug(this->name, op_errno, "subvolume %s returned -1", + prev->name); + goto out; + } + + if ((op_ret == -1) && ((op_errno == ENXIO) || (op_errno == EOVERFLOW))) + goto out; + + if (!op_ret || (local->call_cnt != 1)) + goto out; + + /* rebalance would have happened */ + local->rebalance.target_op_fn = dht_seek2; + ret = dht_rebalance_complete_check(this, frame); + if (!ret) + return 0; + +out: + DHT_STACK_UNWIND(seek, frame, op_ret, op_errno, offset, xdata); + + return 0; +} + +int +dht_seek(call_frame_t *frame, xlator_t *this, fd_t *fd, off_t offset, + gf_seek_what_t what, dict_t *xdata) +{ + xlator_t *subvol = NULL; + dht_local_t *local = NULL; + int op_errno = EINVAL; + + local = dht_local_init(frame, NULL, fd, GF_FOP_SEEK); + if (!local) { + op_errno = ENOMEM; + goto err; + } + + subvol = local->cached_subvol; + if (!subvol) { + gf_msg_debug(this->name, 0, "no cached subvolume for fd=%p", fd); + op_errno = EINVAL; + goto err; + } + + if (xdata) + local->xattr_req = dict_ref(xdata); + + local->rebalance.offset = offset; + local->rebalance.flags = what; + local->call_cnt = 1; + + STACK_WIND_COOKIE(frame, dht_seek_cbk, subvol, subvol, subvol->fops->seek, + fd, local->rebalance.offset, local->rebalance.flags, + local->xattr_req); + + return 0; + +err: + DHT_STACK_UNWIND(seek, frame, -1, op_errno, offset, xdata); + + return 0; +} diff --git a/xlators/cluster/dht/src/dht.c b/xlators/cluster/dht/src/dht.c index 53de829270..5f6e994475 100644 --- a/xlators/cluster/dht/src/dht.c +++ b/xlators/cluster/dht/src/dht.c @@ -72,6 +72,7 @@ struct xlator_fops fops = { .finodelk = dht_finodelk, .lk = dht_lk, .lease = dht_lease, + .seek = dht_seek, /* Inode write operations */ .fremovexattr = dht_fremovexattr,