mirror of
https://github.com/gluster/glusterfs.git
synced 2026-02-05 15:48:40 +01:00
rpcsvc: Add latency tracking for rpc programs
Added latency tracking of rpc-handling code. With this change we should be able to monitor the amount of time rpc-handling code is consuming for each of the rpc call. fixes: #1466 Change-Id: I04fc7f3b12bfa5053c0fc36885f271cb78f581cd Signed-off-by: Pranith Kumar K <pkarampu@redhat.com>
This commit is contained in:
committed by
Xavi Hernandez
parent
c696144b36
commit
0935afb947
@@ -11,13 +11,23 @@
|
||||
#ifndef __LATENCY_H__
|
||||
#define __LATENCY_H__
|
||||
|
||||
#include "glusterfs/glusterfs.h"
|
||||
#include <inttypes.h>
|
||||
#include <time.h>
|
||||
|
||||
typedef struct fop_latency {
|
||||
double min; /* min time for the call (microseconds) */
|
||||
double max; /* max time for the call (microseconds) */
|
||||
double total; /* total time (microseconds) */
|
||||
typedef struct _gf_latency {
|
||||
uint64_t min; /* min time for the call (nanoseconds) */
|
||||
uint64_t max; /* max time for the call (nanoseconds) */
|
||||
uint64_t total; /* total time (nanoseconds) */
|
||||
uint64_t count;
|
||||
} fop_latency_t;
|
||||
} gf_latency_t;
|
||||
|
||||
gf_latency_t *
|
||||
gf_latency_new(size_t n);
|
||||
|
||||
void
|
||||
gf_latency_reset(gf_latency_t *lat);
|
||||
|
||||
void
|
||||
gf_latency_update(gf_latency_t *lat, struct timespec *begin,
|
||||
struct timespec *end);
|
||||
#endif /* __LATENCY_H__ */
|
||||
|
||||
@@ -133,6 +133,7 @@ enum gf_common_mem_types_ {
|
||||
gf_common_volfile_t,
|
||||
gf_common_mt_mgmt_v3_lock_timer_t, /* used only in one location */
|
||||
gf_common_mt_server_cmdline_t, /* used only in one location */
|
||||
gf_common_mt_latency_t,
|
||||
gf_common_mt_end
|
||||
};
|
||||
#endif
|
||||
|
||||
@@ -45,6 +45,9 @@ typedef int32_t (*ret_fn_t)(call_frame_t *frame, call_frame_t *prev_frame,
|
||||
xlator_t *this, int32_t op_ret, int32_t op_errno,
|
||||
...);
|
||||
|
||||
void
|
||||
gf_frame_latency_update(call_frame_t *frame);
|
||||
|
||||
struct call_pool {
|
||||
union {
|
||||
struct list_head all_frames;
|
||||
@@ -149,8 +152,6 @@ struct _call_stack {
|
||||
} while (0);
|
||||
|
||||
struct xlator_fops;
|
||||
void
|
||||
gf_update_latency(call_frame_t *frame);
|
||||
|
||||
static inline void
|
||||
FRAME_DESTROY(call_frame_t *frame)
|
||||
@@ -158,7 +159,7 @@ FRAME_DESTROY(call_frame_t *frame)
|
||||
void *local = NULL;
|
||||
|
||||
if (frame->root->ctx->measure_latency)
|
||||
gf_update_latency(frame);
|
||||
gf_frame_latency_update(frame);
|
||||
|
||||
list_del_init(&frame->frames);
|
||||
if (frame->local) {
|
||||
|
||||
@@ -127,4 +127,6 @@ gf_proc_dump_xlator_meminfo(xlator_t *this, strfd_t *strfd);
|
||||
void
|
||||
gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd);
|
||||
|
||||
void
|
||||
gf_latency_statedump_and_reset(char *key, gf_latency_t *lat);
|
||||
#endif /* STATEDUMP_H */
|
||||
|
||||
@@ -805,7 +805,7 @@ struct _xlator {
|
||||
|
||||
struct {
|
||||
/* for latency measurement */
|
||||
fop_latency_t latencies[GF_FOP_MAXVALUE];
|
||||
gf_latency_t latencies[GF_FOP_MAXVALUE];
|
||||
/* for latency measurement */
|
||||
fop_metrics_t metrics[GF_FOP_MAXVALUE];
|
||||
|
||||
|
||||
@@ -16,34 +16,32 @@
|
||||
#include "glusterfs/glusterfs.h"
|
||||
#include "glusterfs/statedump.h"
|
||||
|
||||
void
|
||||
gf_update_latency(call_frame_t *frame)
|
||||
gf_latency_t *
|
||||
gf_latency_new(size_t n)
|
||||
{
|
||||
double elapsed;
|
||||
struct timespec *begin, *end;
|
||||
int i = 0;
|
||||
gf_latency_t *lat = NULL;
|
||||
|
||||
fop_latency_t *lat;
|
||||
lat = GF_MALLOC(n * sizeof(*lat), gf_common_mt_latency_t);
|
||||
if (!lat)
|
||||
return NULL;
|
||||
|
||||
begin = &frame->begin;
|
||||
end = &frame->end;
|
||||
for (i = 0; i < n; i++) {
|
||||
gf_latency_reset(lat + i);
|
||||
}
|
||||
return lat;
|
||||
}
|
||||
|
||||
if (!(begin->tv_sec && end->tv_sec))
|
||||
goto out;
|
||||
|
||||
elapsed = gf_tsdiff(begin, end);
|
||||
|
||||
if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
|
||||
gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
|
||||
frame->op);
|
||||
void
|
||||
gf_latency_update(gf_latency_t *lat, struct timespec *begin,
|
||||
struct timespec *end)
|
||||
{
|
||||
if (!(begin->tv_sec && end->tv_sec)) {
|
||||
/*Measure latency might have been enabled/disabled during the op*/
|
||||
return;
|
||||
}
|
||||
|
||||
/* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
|
||||
set it right anyways for those frames */
|
||||
if (!frame->op)
|
||||
frame->op = frame->root->op;
|
||||
|
||||
lat = &frame->this->stats.interval.latencies[frame->op];
|
||||
double elapsed = gf_tsdiff(begin, end);
|
||||
|
||||
if (lat->max < elapsed)
|
||||
lat->max = elapsed;
|
||||
@@ -53,42 +51,34 @@ gf_update_latency(call_frame_t *frame)
|
||||
|
||||
lat->total += elapsed;
|
||||
lat->count++;
|
||||
out:
|
||||
return;
|
||||
}
|
||||
|
||||
void
|
||||
gf_proc_dump_latency_info(xlator_t *xl)
|
||||
gf_latency_reset(gf_latency_t *lat)
|
||||
{
|
||||
char key_prefix[GF_DUMP_MAX_BUF_LEN];
|
||||
char key[GF_DUMP_MAX_BUF_LEN];
|
||||
int i;
|
||||
|
||||
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
|
||||
gf_proc_dump_add_section("%s", key_prefix);
|
||||
|
||||
for (i = 0; i < GF_FOP_MAXVALUE; i++) {
|
||||
gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
|
||||
|
||||
fop_latency_t *lat = &xl->stats.interval.latencies[i];
|
||||
|
||||
/* Doesn't make sense to continue if there are no fops
|
||||
came in the given interval */
|
||||
if (!lat->count)
|
||||
continue;
|
||||
|
||||
gf_proc_dump_write(
|
||||
key, "AVG:%.03f,CNT:%" PRId64 ",TOTAL:%.03f,MIN:%.03f,MAX:%.03f",
|
||||
(lat->total / lat->count), lat->count, lat->total, lat->min,
|
||||
lat->max);
|
||||
}
|
||||
|
||||
memset(xl->stats.interval.latencies, 0,
|
||||
sizeof(xl->stats.interval.latencies));
|
||||
|
||||
if (!lat)
|
||||
return;
|
||||
memset(lat, 0, sizeof(*lat));
|
||||
lat->min = ULLONG_MAX;
|
||||
/* make sure 'min' is set to high value, so it would be
|
||||
properly set later */
|
||||
for (i = 0; i < GF_FOP_MAXVALUE; i++) {
|
||||
xl->stats.interval.latencies[i].min = 0xffffffff;
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
gf_frame_latency_update(call_frame_t *frame)
|
||||
{
|
||||
gf_latency_t *lat;
|
||||
/* Can happen mostly at initiator xlator, as STACK_WIND/UNWIND macros
|
||||
set it right anyways for those frames */
|
||||
if (!frame->op)
|
||||
frame->op = frame->root->op;
|
||||
|
||||
if (frame->op < 0 || frame->op >= GF_FOP_MAXVALUE) {
|
||||
gf_log("[core]", GF_LOG_WARNING, "Invalid frame op value: %d",
|
||||
frame->op);
|
||||
return;
|
||||
}
|
||||
|
||||
lat = &frame->this->stats.interval.latencies[frame->op];
|
||||
gf_latency_update(lat, &frame->begin, &frame->end);
|
||||
}
|
||||
|
||||
@@ -1186,3 +1186,8 @@ gf_nanosleep
|
||||
gf_syncfs
|
||||
graph_total_client_xlator
|
||||
get_xattrs_to_heal
|
||||
gf_latency_statedump_and_reset
|
||||
gf_latency_new
|
||||
gf_latency_reset
|
||||
gf_latency_update
|
||||
gf_frame_latency_update
|
||||
|
||||
@@ -113,15 +113,15 @@ dump_latency_and_count(xlator_t *xl, int fd)
|
||||
dprintf(fd, "%s.interval.%s.fail_count %" PRIu64 "\n", xl->name,
|
||||
gf_fop_list[index], cbk);
|
||||
}
|
||||
if (xl->stats.interval.latencies[index].count != 0.0) {
|
||||
if (xl->stats.interval.latencies[index].count != 0) {
|
||||
dprintf(fd, "%s.interval.%s.latency %lf\n", xl->name,
|
||||
gf_fop_list[index],
|
||||
(xl->stats.interval.latencies[index].total /
|
||||
(((double)xl->stats.interval.latencies[index].total) /
|
||||
xl->stats.interval.latencies[index].count));
|
||||
dprintf(fd, "%s.interval.%s.max %lf\n", xl->name,
|
||||
dprintf(fd, "%s.interval.%s.max %" PRIu64 "\n", xl->name,
|
||||
gf_fop_list[index],
|
||||
xl->stats.interval.latencies[index].max);
|
||||
dprintf(fd, "%s.interval.%s.min %lf\n", xl->name,
|
||||
dprintf(fd, "%s.interval.%s.min %" PRIu64 "\n", xl->name,
|
||||
gf_fop_list[index],
|
||||
xl->stats.interval.latencies[index].min);
|
||||
}
|
||||
|
||||
@@ -199,6 +199,40 @@ gf_proc_dump_write(char *key, char *value, ...)
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
gf_latency_statedump_and_reset(char *key, gf_latency_t *lat)
|
||||
{
|
||||
/* Doesn't make sense to continue if there are no fops
|
||||
came in the given interval */
|
||||
if (!lat || !lat->count)
|
||||
return;
|
||||
gf_proc_dump_write(key,
|
||||
"AVG:%lf CNT:%" PRIu64 " TOTAL:%" PRIu64 " MIN:%" PRIu64
|
||||
" MAX:%" PRIu64,
|
||||
(((double)lat->total) / lat->count), lat->count,
|
||||
lat->total, lat->min, lat->max);
|
||||
gf_latency_reset(lat);
|
||||
}
|
||||
|
||||
void
|
||||
gf_proc_dump_xl_latency_info(xlator_t *xl)
|
||||
{
|
||||
char key_prefix[GF_DUMP_MAX_BUF_LEN];
|
||||
char key[GF_DUMP_MAX_BUF_LEN];
|
||||
int i;
|
||||
|
||||
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s.latency", xl->name);
|
||||
gf_proc_dump_add_section("%s", key_prefix);
|
||||
|
||||
for (i = 0; i < GF_FOP_MAXVALUE; i++) {
|
||||
gf_proc_dump_build_key(key, key_prefix, "%s", (char *)gf_fop_list[i]);
|
||||
|
||||
gf_latency_t *lat = &xl->stats.interval.latencies[i];
|
||||
|
||||
gf_latency_statedump_and_reset(key, lat);
|
||||
}
|
||||
}
|
||||
|
||||
static void
|
||||
gf_proc_dump_xlator_mem_info(xlator_t *xl)
|
||||
{
|
||||
@@ -467,7 +501,7 @@ gf_proc_dump_single_xlator_info(xlator_t *trav)
|
||||
return;
|
||||
|
||||
if (ctx->measure_latency)
|
||||
gf_proc_dump_latency_info(trav);
|
||||
gf_proc_dump_xl_latency_info(trav);
|
||||
|
||||
gf_proc_dump_xlator_mem_info(trav);
|
||||
|
||||
@@ -1011,7 +1045,7 @@ gf_proc_dump_xlator_profile(xlator_t *this, strfd_t *strfd)
|
||||
{
|
||||
gf_dump_strfd = strfd;
|
||||
|
||||
gf_proc_dump_latency_info(this);
|
||||
gf_proc_dump_xl_latency_info(this);
|
||||
|
||||
gf_dump_strfd = NULL;
|
||||
}
|
||||
|
||||
@@ -260,6 +260,7 @@ xlator_dynload_apis(xlator_t *xl)
|
||||
void *handle = NULL;
|
||||
volume_opt_list_t *vol_opt = NULL;
|
||||
xlator_api_t *xlapi = NULL;
|
||||
int i = 0;
|
||||
|
||||
handle = xl->dlhandle;
|
||||
|
||||
@@ -357,6 +358,10 @@ xlator_dynload_apis(xlator_t *xl)
|
||||
memcpy(xl->op_version, xlapi->op_version,
|
||||
sizeof(uint32_t) * GF_MAX_RELEASES);
|
||||
|
||||
for (i = 0; i < GF_FOP_MAXVALUE; i++) {
|
||||
gf_latency_reset(&xl->stats.interval.latencies[i]);
|
||||
}
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
return ret;
|
||||
|
||||
@@ -65,3 +65,4 @@ rpc_transport_unix_options_build
|
||||
rpc_transport_unref
|
||||
rpc_clnt_mgmt_pmap_signout
|
||||
rpcsvc_autoscale_threads
|
||||
rpcsvc_statedump
|
||||
|
||||
@@ -13,6 +13,7 @@
|
||||
#include <glusterfs/dict.h>
|
||||
#include <glusterfs/byte-order.h>
|
||||
#include <glusterfs/compat-errno.h>
|
||||
#include <glusterfs/statedump.h>
|
||||
#include "xdr-rpc.h"
|
||||
#include <glusterfs/iobuf.h>
|
||||
#include "xdr-common.h"
|
||||
@@ -341,6 +342,10 @@ rpcsvc_program_actor(rpcsvc_request_t *req)
|
||||
goto err;
|
||||
}
|
||||
|
||||
if (svc->xl->ctx->measure_latency) {
|
||||
timespec_now(&req->begin);
|
||||
}
|
||||
|
||||
req->ownthread = program->ownthread;
|
||||
req->synctask = program->synctask;
|
||||
|
||||
@@ -1490,10 +1495,18 @@ rpcsvc_submit_generic(rpcsvc_request_t *req, struct iovec *proghdr,
|
||||
size_t hdrlen = 0;
|
||||
char new_iobref = 0;
|
||||
rpcsvc_drc_globals_t *drc = NULL;
|
||||
gf_latency_t *lat = NULL;
|
||||
|
||||
if ((!req) || (!req->trans))
|
||||
return -1;
|
||||
|
||||
if (req->prog && req->begin.tv_sec) {
|
||||
if ((req->procnum >= 0) && (req->procnum < req->prog->numactors)) {
|
||||
timespec_now(&req->end);
|
||||
lat = &req->prog->latencies[req->procnum];
|
||||
gf_latency_update(lat, &req->begin, &req->end);
|
||||
}
|
||||
}
|
||||
trans = req->trans;
|
||||
|
||||
for (i = 0; i < hdrcount; i++) {
|
||||
@@ -1824,6 +1837,15 @@ rpcsvc_submit_message(rpcsvc_request_t *req, struct iovec *proghdr,
|
||||
iobref);
|
||||
}
|
||||
|
||||
void
|
||||
rpcsvc_program_destroy(rpcsvc_program_t *program)
|
||||
{
|
||||
if (program) {
|
||||
GF_FREE(program->latencies);
|
||||
GF_FREE(program);
|
||||
}
|
||||
}
|
||||
|
||||
int
|
||||
rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
|
||||
{
|
||||
@@ -1881,8 +1903,7 @@ rpcsvc_program_unregister(rpcsvc_t *svc, rpcsvc_program_t *program)
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (prog)
|
||||
GF_FREE(prog);
|
||||
rpcsvc_program_destroy(prog);
|
||||
|
||||
if (ret == -1) {
|
||||
if (program) {
|
||||
@@ -2267,6 +2288,11 @@ rpcsvc_program_register(rpcsvc_t *svc, rpcsvc_program_t *program,
|
||||
}
|
||||
|
||||
memcpy(newprog, program, sizeof(*program));
|
||||
newprog->latencies = gf_latency_new(program->numactors);
|
||||
if (!newprog->latencies) {
|
||||
rpcsvc_program_destroy(newprog);
|
||||
goto out;
|
||||
}
|
||||
|
||||
INIT_LIST_HEAD(&newprog->program);
|
||||
pthread_mutexattr_init(&thr_attr);
|
||||
@@ -3212,6 +3238,48 @@ out:
|
||||
return ret;
|
||||
}
|
||||
|
||||
void
|
||||
rpcsvc_program_dump(rpcsvc_program_t *prog)
|
||||
{
|
||||
char key_prefix[GF_DUMP_MAX_BUF_LEN];
|
||||
char key[GF_DUMP_MAX_BUF_LEN];
|
||||
int i;
|
||||
|
||||
snprintf(key_prefix, GF_DUMP_MAX_BUF_LEN, "%s", prog->progname);
|
||||
gf_proc_dump_add_section("%s", key_prefix);
|
||||
|
||||
gf_proc_dump_build_key(key, key_prefix, "program-number");
|
||||
gf_proc_dump_write(key, "%d", prog->prognum);
|
||||
|
||||
gf_proc_dump_build_key(key, key_prefix, "program-version");
|
||||
gf_proc_dump_write(key, "%d", prog->progver);
|
||||
|
||||
strncat(key_prefix, ".latency",
|
||||
sizeof(key_prefix) - strlen(key_prefix) - 1);
|
||||
|
||||
for (i = 0; i < prog->numactors; i++) {
|
||||
gf_proc_dump_build_key(key, key_prefix, "%s", prog->actors[i].procname);
|
||||
gf_latency_statedump_and_reset(key, &prog->latencies[i]);
|
||||
}
|
||||
}
|
||||
|
||||
void
|
||||
rpcsvc_statedump(rpcsvc_t *svc)
|
||||
{
|
||||
rpcsvc_program_t *prog = NULL;
|
||||
int ret = 0;
|
||||
ret = pthread_rwlock_tryrdlock(&svc->rpclock);
|
||||
if (ret)
|
||||
return;
|
||||
{
|
||||
list_for_each_entry(prog, &svc->programs, program)
|
||||
{
|
||||
rpcsvc_program_dump(prog);
|
||||
}
|
||||
}
|
||||
pthread_rwlock_unlock(&svc->rpclock);
|
||||
}
|
||||
|
||||
static rpcsvc_actor_t gluster_dump_actors[GF_DUMP_MAXVALUE] = {
|
||||
[GF_DUMP_NULL] = {"NULL", NULL, NULL, GF_DUMP_NULL, DRC_NA, 0},
|
||||
[GF_DUMP_DUMP] = {"DUMP", rpcsvc_dump, NULL, GF_DUMP_DUMP, DRC_NA, 0},
|
||||
|
||||
@@ -264,6 +264,8 @@ struct rpcsvc_request {
|
||||
gf_boolean_t ownthread;
|
||||
|
||||
gf_boolean_t synctask;
|
||||
struct timespec begin; /*req handling start time*/
|
||||
struct timespec end; /*req handling end time*/
|
||||
};
|
||||
|
||||
#define rpcsvc_request_program(req) ((rpcsvc_program_t *)((req)->prog))
|
||||
@@ -419,6 +421,7 @@ struct rpcsvc_program {
|
||||
supported by the system. */
|
||||
/* Program specific state handed to actors */
|
||||
void *private;
|
||||
gf_latency_t *latencies; /*Tracks latency statistics for the rpc call*/
|
||||
|
||||
/* This upcall is provided by the program during registration.
|
||||
* It is used to notify the program about events like connection being
|
||||
@@ -686,4 +689,6 @@ rpcsvc_autoscale_threads(glusterfs_ctx_t *ctx, rpcsvc_t *rpc, int incr);
|
||||
|
||||
extern int
|
||||
rpcsvc_destroy(rpcsvc_t *svc);
|
||||
void
|
||||
rpcsvc_statedump(rpcsvc_t *svc);
|
||||
#endif
|
||||
|
||||
@@ -267,6 +267,8 @@ server_priv(xlator_t *this)
|
||||
gf_proc_dump_build_key(key, "server", "total-bytes-write");
|
||||
gf_proc_dump_write(key, "%" PRIu64, total_write);
|
||||
|
||||
rpcsvc_statedump(conf->rpc);
|
||||
|
||||
ret = 0;
|
||||
out:
|
||||
if (ret)
|
||||
|
||||
Reference in New Issue
Block a user