mirror of
https://github.com/gluster/glusterfs.git
synced 2026-02-05 15:48:40 +01:00
cluster/afr: Delay post-op for fsync
Problem: AFR doesn't delay post-op for fsync fop. For fsync heavy workloads this leads to un-necessary fxattrop/finodelk for every fsync leading to bad performance. Fix: Have delayed post-op for fsync. Add special flag in xdata to indicate that afr shouldn't delay post-op in cases where either the process will terminate or graph-switch would happen. Otherwise it leads to un-necessary heals when the graph-switch/process-termination happens before delayed-post-op completes. Fixes: #1253 Change-Id: I531940d13269a111c49e0510d49514dc169f4577 Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
This commit is contained in:
committed by
Pranith Kumar Karampuri
parent
a113d93621
commit
af89d9e623
@@ -728,6 +728,7 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
|
||||
0,
|
||||
};
|
||||
char uuid1[64];
|
||||
dict_t *xdata = NULL;
|
||||
|
||||
oldinode = oldfd->inode;
|
||||
oldsubvol = oldinode->table->xl;
|
||||
@@ -736,7 +737,16 @@ glfs_migrate_fd_safe(struct glfs *fs, xlator_t *newsubvol, fd_t *oldfd)
|
||||
return fd_ref(oldfd);
|
||||
|
||||
if (!oldsubvol->switched) {
|
||||
ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, NULL, NULL);
|
||||
xdata = dict_new();
|
||||
if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
|
||||
gf_smsg(fs->volname, GF_LOG_WARNING, ENOMEM, API_MSG_FSYNC_FAILED,
|
||||
"err=%s", "last-fsync set failed", "gfid=%s",
|
||||
uuid_utoa_r(oldfd->inode->gfid, uuid1), "subvol=%s",
|
||||
graphid_str(oldsubvol), "id=%d", oldsubvol->graph->id,
|
||||
NULL);
|
||||
}
|
||||
|
||||
ret = syncop_fsync(oldsubvol, oldfd, 0, NULL, NULL, xdata, NULL);
|
||||
DECODE_SYNCOP_ERR(ret);
|
||||
if (ret) {
|
||||
gf_smsg(fs->volname, GF_LOG_WARNING, errno, API_MSG_FSYNC_FAILED,
|
||||
@@ -816,6 +826,9 @@ out:
|
||||
newfd = NULL;
|
||||
}
|
||||
|
||||
if (xdata)
|
||||
dict_unref(xdata);
|
||||
|
||||
return newfd;
|
||||
}
|
||||
|
||||
|
||||
@@ -26,6 +26,8 @@ TEST $CLI volume heal $V0
|
||||
EXPECT_WITHIN $HEAL_TIMEOUT "0" get_pending_heal_count $V0
|
||||
EXPECT "^0$" echo $($CLI volume profile $V0 info | grep -w FSYNC | wc -l)
|
||||
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 0
|
||||
EXPECT_WITHIN $CHILD_UP_TIMEOUT "1" afr_child_up_status $V0 1
|
||||
#Test that fsyncs happen when durability is on
|
||||
TEST $CLI volume set $V0 cluster.ensure-durability on
|
||||
TEST $CLI volume set $V0 performance.strict-write-ordering on
|
||||
|
||||
44
tests/basic/gfapi/gfapi-graph-switch-open-fd.t
Normal file
44
tests/basic/gfapi/gfapi-graph-switch-open-fd.t
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/bin/bash
|
||||
|
||||
. $(dirname $0)/../../include.rc
|
||||
. $(dirname $0)/../../volume.rc
|
||||
|
||||
cleanup;
|
||||
|
||||
TEST glusterd
|
||||
|
||||
TEST $CLI volume create $V0 replica 3 ${H0}:$B0/brick{0..2};
|
||||
EXPECT 'Created' volinfo_field $V0 'Status';
|
||||
|
||||
TEST $CLI volume start $V0;
|
||||
EXPECT 'Started' volinfo_field $V0 'Status';
|
||||
|
||||
TEST $GFS --volfile-id=$V0 --volfile-server=$H0 $M0;
|
||||
TEST touch $M0/sync
|
||||
logdir=`gluster --print-logdir`
|
||||
|
||||
TEST build_tester $(dirname $0)/gfapi-keep-writing.c -lgfapi
|
||||
|
||||
|
||||
#Launch a program to keep doing writes on an fd
|
||||
./$(dirname $0)/gfapi-keep-writing ${H0} $V0 $logdir/gfapi-async-calls-test.log sync &
|
||||
p=$!
|
||||
sleep 1 #Let some writes go through
|
||||
#Check if graph switch will lead to any pending markers for ever
|
||||
TEST $CLI volume set $V0 performance.quick-read off
|
||||
TEST $CLI volume set $V0 performance.io-cache off
|
||||
TEST $CLI volume set $V0 performance.stat-prefetch off
|
||||
TEST $CLI volume set $V0 performance.read-ahead off
|
||||
|
||||
|
||||
TEST rm -f $M0/sync #Make sure the glfd is closed
|
||||
TEST wait #Wait for background process to die
|
||||
#Goal is to check if there is permanent FOOL changelog
|
||||
sleep 5
|
||||
EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick0/glfs_test.txt trusted.afr.dirty
|
||||
EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick1/glfs_test.txt trusted.afr.dirty
|
||||
EXPECT "0x000000000000000000000000" afr_get_changelog_xattr $B0/brick2/glfs_test.txt trusted.afr.dirty
|
||||
|
||||
cleanup_tester $(dirname $0)/gfapi-async-calls-test
|
||||
|
||||
cleanup;
|
||||
129
tests/basic/gfapi/gfapi-keep-writing.c
Normal file
129
tests/basic/gfapi/gfapi-keep-writing.c
Normal file
@@ -0,0 +1,129 @@
|
||||
#include <fcntl.h>
|
||||
#include <unistd.h>
|
||||
#include <time.h>
|
||||
#include <limits.h>
|
||||
#include <string.h>
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <glusterfs/api/glfs.h>
|
||||
#include <glusterfs/api/glfs-handles.h>
|
||||
|
||||
#define LOG_ERR(msg) \
|
||||
do { \
|
||||
fprintf(stderr, "%s : Error (%s)\n", msg, strerror(errno)); \
|
||||
} while (0)
|
||||
|
||||
glfs_t *
|
||||
init_glfs(const char *hostname, const char *volname, const char *logfile)
|
||||
{
|
||||
int ret = -1;
|
||||
glfs_t *fs = NULL;
|
||||
|
||||
fs = glfs_new(volname);
|
||||
if (!fs) {
|
||||
LOG_ERR("glfs_new failed");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
ret = glfs_set_volfile_server(fs, "tcp", hostname, 24007);
|
||||
if (ret < 0) {
|
||||
LOG_ERR("glfs_set_volfile_server failed");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = glfs_set_logging(fs, logfile, 7);
|
||||
if (ret < 0) {
|
||||
LOG_ERR("glfs_set_logging failed");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = glfs_init(fs);
|
||||
if (ret < 0) {
|
||||
LOG_ERR("glfs_init failed");
|
||||
goto out;
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret) {
|
||||
glfs_fini(fs);
|
||||
fs = NULL;
|
||||
}
|
||||
|
||||
return fs;
|
||||
}
|
||||
|
||||
int
|
||||
glfs_test_function(const char *hostname, const char *volname,
|
||||
const char *logfile, const char *syncfile)
|
||||
{
|
||||
int ret = -1;
|
||||
int flags = O_CREAT | O_RDWR;
|
||||
glfs_t *fs = NULL;
|
||||
glfs_fd_t *glfd = NULL;
|
||||
const char *buff = "This is from my prog\n";
|
||||
const char *filename = "glfs_test.txt";
|
||||
struct stat buf = {0};
|
||||
|
||||
fs = init_glfs(hostname, volname, logfile);
|
||||
if (fs == NULL) {
|
||||
LOG_ERR("init_glfs failed");
|
||||
return -1;
|
||||
}
|
||||
|
||||
glfd = glfs_creat(fs, filename, flags, 0644);
|
||||
if (glfd == NULL) {
|
||||
LOG_ERR("glfs_creat failed");
|
||||
goto out;
|
||||
}
|
||||
|
||||
while (glfs_stat(fs, syncfile, &buf) == 0) {
|
||||
ret = glfs_write(glfd, buff, strlen(buff), flags);
|
||||
if (ret < 0) {
|
||||
LOG_ERR("glfs_write failed");
|
||||
goto out;
|
||||
}
|
||||
}
|
||||
|
||||
ret = glfs_close(glfd);
|
||||
if (ret < 0) {
|
||||
LOG_ERR("glfs_write failed");
|
||||
goto out;
|
||||
}
|
||||
|
||||
out:
|
||||
ret = glfs_fini(fs);
|
||||
if (ret) {
|
||||
LOG_ERR("glfs_fini failed");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
int
|
||||
main(int argc, char *argv[])
|
||||
{
|
||||
int ret = 0;
|
||||
char *hostname = NULL;
|
||||
char *volname = NULL;
|
||||
char *logfile = NULL;
|
||||
char *syncfile = NULL;
|
||||
|
||||
if (argc != 5) {
|
||||
fprintf(stderr, "Invalid argument\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
hostname = argv[1];
|
||||
volname = argv[2];
|
||||
logfile = argv[3];
|
||||
syncfile = argv[4];
|
||||
|
||||
ret = glfs_test_function(hostname, volname, logfile, syncfile);
|
||||
if (ret) {
|
||||
LOG_ERR("glfs_test_function failed");
|
||||
}
|
||||
|
||||
return ret;
|
||||
}
|
||||
@@ -2506,6 +2506,7 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
|
||||
call_frame_t *transaction_frame = NULL;
|
||||
int ret = -1;
|
||||
int32_t op_errno = ENOMEM;
|
||||
int8_t last_fsync = 0;
|
||||
|
||||
AFR_ERROR_OUT_IF_FDCTX_INVALID(fd, this, op_errno, out);
|
||||
transaction_frame = copy_frame(frame);
|
||||
@@ -2516,10 +2517,16 @@ afr_fsync(call_frame_t *frame, xlator_t *this, fd_t *fd, int32_t datasync,
|
||||
if (!local)
|
||||
goto out;
|
||||
|
||||
if (xdata)
|
||||
if (xdata) {
|
||||
local->xdata_req = dict_copy_with_ref(xdata, NULL);
|
||||
else
|
||||
if (dict_get_int8(xdata, "last-fsync", &last_fsync) == 0) {
|
||||
if (last_fsync) {
|
||||
local->transaction.disable_delayed_post_op = _gf_true;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
local->xdata_req = dict_new();
|
||||
}
|
||||
|
||||
if (!local->xdata_req)
|
||||
goto out;
|
||||
|
||||
@@ -2422,8 +2422,13 @@ afr_is_delayed_changelog_post_op_needed(call_frame_t *frame, xlator_t *this,
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP)) {
|
||||
/*Only allow writes but shard does [f]xattrops on writes, so
|
||||
if (local->transaction.disable_delayed_post_op) {
|
||||
goto out;
|
||||
}
|
||||
|
||||
if ((local->op != GF_FOP_WRITE) && (local->op != GF_FOP_FXATTROP) &&
|
||||
(local->op != GF_FOP_FSYNC)) {
|
||||
/*Only allow writes/fsyncs but shard does [f]xattrops on writes, so
|
||||
* they are fine too*/
|
||||
goto out;
|
||||
}
|
||||
|
||||
@@ -901,7 +901,7 @@ typedef struct _afr_local {
|
||||
gf_boolean_t uninherit_done;
|
||||
gf_boolean_t uninherit_value;
|
||||
|
||||
/* post-op hook */
|
||||
gf_boolean_t disable_delayed_post_op;
|
||||
} transaction;
|
||||
|
||||
syncbarrier_t barrier;
|
||||
|
||||
@@ -1525,6 +1525,7 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
||||
xlator_t *old_target = NULL;
|
||||
xlator_t *hashed_subvol = NULL;
|
||||
fd_t *linkto_fd = NULL;
|
||||
dict_t *xdata = NULL;
|
||||
|
||||
if (from == to) {
|
||||
gf_msg_debug(this->name, 0,
|
||||
@@ -1848,7 +1849,15 @@ dht_migrate_file(xlator_t *this, loc_t *loc, xlator_t *from, xlator_t *to,
|
||||
|
||||
/* TODO: Sync the locks */
|
||||
|
||||
ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, NULL, NULL);
|
||||
xdata = dict_new();
|
||||
if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
|
||||
gf_log(this->name, GF_LOG_ERROR,
|
||||
"%s: failed to set last-fsync flag on "
|
||||
"%s (%s)",
|
||||
loc->path, to->name, strerror(ENOMEM));
|
||||
}
|
||||
|
||||
ret = syncop_fsync(to, dst_fd, 0, NULL, NULL, xdata, NULL);
|
||||
if (ret) {
|
||||
gf_log(this->name, GF_LOG_WARNING, "%s: failed to fsync on %s (%s)",
|
||||
loc->path, to->name, strerror(-ret));
|
||||
@@ -2311,11 +2320,15 @@ out:
|
||||
|
||||
if (dst_fd)
|
||||
syncop_close(dst_fd);
|
||||
|
||||
if (src_fd)
|
||||
syncop_close(src_fd);
|
||||
if (linkto_fd)
|
||||
syncop_close(linkto_fd);
|
||||
|
||||
if (xdata)
|
||||
dict_unref(xdata);
|
||||
|
||||
loc_wipe(&tmp_loc);
|
||||
loc_wipe(&parent_loc);
|
||||
|
||||
|
||||
@@ -5617,6 +5617,7 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
|
||||
char create_in_progress = 0;
|
||||
fuse_fd_ctx_t *basefd_ctx = NULL;
|
||||
fd_t *oldfd = NULL;
|
||||
dict_t *xdata = NULL;
|
||||
|
||||
basefd_ctx = fuse_fd_ctx_get(this, basefd);
|
||||
GF_VALIDATE_OR_GOTO("glusterfs-fuse", basefd_ctx, out);
|
||||
@@ -5653,10 +5654,23 @@ fuse_migrate_fd(xlator_t *this, fd_t *basefd, xlator_t *old_subvol,
|
||||
}
|
||||
|
||||
if (oldfd->inode->table->xl == old_subvol) {
|
||||
if (IA_ISDIR(oldfd->inode->ia_type))
|
||||
if (IA_ISDIR(oldfd->inode->ia_type)) {
|
||||
ret = syncop_fsyncdir(old_subvol, oldfd, 0, NULL, NULL);
|
||||
else
|
||||
ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, NULL, NULL);
|
||||
} else {
|
||||
xdata = dict_new();
|
||||
if (!xdata || dict_set_int8(xdata, "last-fsync", 1)) {
|
||||
gf_log("glusterfs-fuse", GF_LOG_WARNING,
|
||||
"last-fsync set failed (%s) on fd (%p)"
|
||||
"(basefd:%p basefd-inode.gfid:%s) "
|
||||
"(old-subvolume:%s-%d new-subvolume:%s-%d)",
|
||||
strerror(ENOMEM), oldfd, basefd,
|
||||
uuid_utoa(basefd->inode->gfid), old_subvol->name,
|
||||
old_subvol->graph->id, new_subvol->name,
|
||||
new_subvol->graph->id);
|
||||
}
|
||||
|
||||
ret = syncop_fsync(old_subvol, oldfd, 0, NULL, NULL, xdata, NULL);
|
||||
}
|
||||
|
||||
if (ret < 0) {
|
||||
gf_log("glusterfs-fuse", GF_LOG_WARNING,
|
||||
@@ -5711,6 +5725,9 @@ out:
|
||||
|
||||
fd_unref(oldfd);
|
||||
|
||||
if (xdata)
|
||||
dict_unref(xdata);
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
Reference in New Issue
Block a user