1
0
mirror of https://github.com/lxc/lxcfs.git synced 2026-02-05 09:46:18 +01:00

proc_fuse: add psi(pressure stall information) procfs

Kernel support psi(pressure stall information) since 4.20
with procfs /proc/pressure/{io,cpu,memory} and
cgroupv2 {io.pressure, cpu.pressure, memory.pressure}.

This patch add read-only psi procfs,
and people can get pressure information now.
Full functional feature for monitoring are still under investigation.

Signed-off-by: Feng Sun <loyou85@gmail.com>
This commit is contained in:
Feng Sun
2024-11-14 14:22:09 -06:00
committed by Stéphane Graber
parent d7a0f9183b
commit 31da3ae731
8 changed files with 435 additions and 21 deletions

View File

@@ -16,6 +16,9 @@ such as:
/proc/swaps
/proc/uptime
/proc/slabinfo
/proc/pressure/io
/proc/pressure/cpu
/proc/pressure/memory
/sys/devices/system/cpu/online
```
@@ -109,6 +112,9 @@ docker run -it -m 256m --memory-swap 256m \
-v /var/lib/lxcfs/proc/swaps:/proc/swaps:rw \
-v /var/lib/lxcfs/proc/uptime:/proc/uptime:rw \
-v /var/lib/lxcfs/proc/slabinfo:/proc/slabinfo:rw \
-v /var/lib/lxcfs/proc/pressure/io:/proc/pressure/io:rw \
-v /var/lib/lxcfs/proc/pressure/cpu:/proc/pressure/cpu:rw \
-v /var/lib/lxcfs/proc/pressure/memory:/proc/pressure/memory:rw \
-v /var/lib/lxcfs/sys/devices/system/cpu:/sys/devices/system/cpu:rw \
ubuntu:18.04 /bin/bash
```

View File

@@ -23,6 +23,9 @@ static char *api_extensions[] = {
"proc_swaps",
"proc_uptime",
"proc_slabinfo",
"proc_pressure_io",
"proc_pressure_cpu",
"proc_pressure_memory",
"shared_pidns",
"cpuview_daemon",
"loadavg_daemon",

View File

@@ -66,12 +66,24 @@ enum lxcfs_virt_t {
LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE,
#define LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE_PATH "/sys/devices/system/cpu/online"
LXC_TYPE_PROC,
LXC_TYPE_PROC_PRESSURE,
LXC_TYPE_PROC_PRESSURE_IO,
#define LXC_TYPE_PROC_PRESSURE_IO_PATH "/proc/pressure/io"
LXC_TYPE_PROC_PRESSURE_CPU,
#define LXC_TYPE_PROC_PRESSURE_CPU_PATH "/proc/pressure/cpu"
LXC_TYPE_PROC_PRESSURE_MEMORY,
#define LXC_TYPE_PROC_PRESSURE_MEMORY_PATH "/proc/pressure/memory"
LXC_TYPE_MAX,
};
/* Macros below used to check the class from the file types above */
#define LXCFS_TYPE_CGROUP(type) (type >= LXC_TYPE_CGDIR && type <= LXC_TYPE_CGFILE)
#define LXCFS_TYPE_PROC(type) (type >= LXC_TYPE_PROC_MEMINFO && type <= LXC_TYPE_PROC_SLABINFO)
#define LXCFS_TYPE_PROC(type) ((type >= LXC_TYPE_PROC_MEMINFO && type <= LXC_TYPE_PROC_SLABINFO) || \
(type >= LXC_TYPE_PROC && type <= LXC_TYPE_PROC_PRESSURE_MEMORY))
#define LXCFS_TYPE_SYS(type) (type >= LXC_TYPE_SYS && type <= LXC_TYPE_SYS_DEVICES_SYSTEM_CPU_ONLINE)
#define LXCFS_TYPE_OK(type) (type >= LXC_TYPE_CGDIR && type < LXC_TYPE_MAX)

View File

@@ -854,6 +854,54 @@ static bool cgfsng_can_use_cpuview(struct cgroup_ops *ops)
return true;
}
static int cgfsng_get_pressure_io_fd(struct cgroup_ops *ops, const char *cgroup)
{
__do_free char *path = NULL;
struct hierarchy *h;
h = ops->get_hierarchy(ops, "blkio");
if (!h)
return -1;
if (faccessat(h->fd, "io.pressure", F_OK, 0))
return -1;
path = must_make_path_relative(cgroup, "io.pressure", NULL);
return openat(h->fd, path, O_RDWR | O_CLOEXEC | O_NOFOLLOW);
}
static int cgfsng_get_pressure_cpu_fd(struct cgroup_ops *ops, const char *cgroup)
{
__do_free char *path = NULL;
struct hierarchy *h;
h = ops->get_hierarchy(ops, "cpu");
if (!h)
return -1;
if (faccessat(h->fd, "cpu.pressure", F_OK, 0))
return -1;
path = must_make_path_relative(cgroup, "cpu.pressure", NULL);
return openat(h->fd, path, O_RDWR | O_CLOEXEC | O_NOFOLLOW);
}
static int cgfsng_get_pressure_memory_fd(struct cgroup_ops *ops, const char *cgroup)
{
__do_free char *path = NULL;
struct hierarchy *h;
h = ops->get_hierarchy(ops, "memory");
if (!h)
return -1;
if (faccessat(h->fd, "memory.pressure", F_OK, 0))
return -1;
path = must_make_path_relative(cgroup, "memory.pressure", NULL);
return openat(h->fd, path, O_RDWR | O_CLOEXEC | O_NOFOLLOW);
}
/* At startup, parse_hierarchies finds all the info we need about cgroup
* mountpoints and current cgroups, and stores it in @d.
*/
@@ -1074,6 +1122,10 @@ struct cgroup_ops *cgfsng_ops_init(void)
cgfsng_ops->get_io_merged = cgfsng_get_io_merged;
cgfsng_ops->get_io_wait_time = cgfsng_get_io_wait_time;
/* psi */
cgfsng_ops->get_pressure_io_fd = cgfsng_get_pressure_io_fd;
cgfsng_ops->get_pressure_cpu_fd = cgfsng_get_pressure_cpu_fd;
cgfsng_ops->get_pressure_memory_fd = cgfsng_get_pressure_memory_fd;
return move_ptr(cgfsng_ops);
}

View File

@@ -155,7 +155,7 @@ struct cgroup_ops {
char **value);
bool (*can_use_cpuview)(struct cgroup_ops *ops);
/* io */
/* blkio */
int (*get_io_service_bytes)(struct cgroup_ops *ops, const char *cgroup,
char **value);
int (*get_io_service_time)(struct cgroup_ops *ops, const char *cgroup,
@@ -166,6 +166,11 @@ struct cgroup_ops {
char **value);
int (*get_io_wait_time)(struct cgroup_ops *ops, const char *cgroup,
char **value);
/* psi */
int (*get_pressure_io_fd)(struct cgroup_ops *ops, const char *cgroup);
int (*get_pressure_cpu_fd)(struct cgroup_ops *ops, const char *cgroup);
int (*get_pressure_memory_fd)(struct cgroup_ops *ops,
const char *cgroup);
};
extern struct cgroup_ops *cgroup_ops;

View File

@@ -524,6 +524,20 @@ static int do_proc_open(const char *path, struct fuse_file_info *fi)
return __proc_open(path, fi);
}
static int do_proc_opendir(const char *path, struct fuse_file_info *fi)
{
char *error;
int (*__proc_opendir)(const char *path, struct fuse_file_info *fi);
dlerror();
__proc_opendir = (int (*)(const char *path, struct fuse_file_info *fi))dlsym(dlopen_handle, "proc_opendir");
error = dlerror();
if (error)
return log_error(-1, "%s - Failed to find proc_opendir()", error);
return __proc_opendir(path, fi);
}
static int do_proc_access(const char *path, int mode)
{
char *error;
@@ -608,6 +622,20 @@ static int do_proc_release(const char *path, struct fuse_file_info *fi)
return __proc_release(path, fi);
}
static int do_proc_releasedir(const char *path, struct fuse_file_info *fi)
{
char *error;
int (*__proc_releasedir)(const char *path, struct fuse_file_info *fi);
dlerror();
__proc_releasedir = (int (*)(const char *path, struct fuse_file_info *)) dlsym(dlopen_handle, "proc_releasedir");
error = dlerror();
if (error)
return log_error(-1, "%s - Failed to find proc_releasedir()", error);
return __proc_releasedir(path, fi);
}
static int do_sys_release(const char *path, struct fuse_file_info *fi)
{
char *error;
@@ -724,8 +752,12 @@ static int lxcfs_opendir(const char *path, struct fuse_file_info *fi)
return ret;
}
if (strcmp(path, "/proc") == 0)
return 0;
if (strncmp(path, "/proc", 5) == 0) {
up_users();
ret = do_proc_opendir(path, fi);
down_users();
return ret;
}
if (strncmp(path, "/sys", 4) == 0) {
up_users();
@@ -768,7 +800,7 @@ static int lxcfs_readdir(const char *path, void *buf, fuse_fill_dir_t filler,
return ret;
}
if (strcmp(path, "/proc") == 0) {
if (LXCFS_TYPE_PROC(type)) {
up_users();
ret = do_proc_readdir(path, buf, filler, offset, fi);
down_users();
@@ -837,12 +869,14 @@ static int lxcfs_releasedir(const char *path, struct fuse_file_info *fi)
return ret;
}
if (path) {
if (strcmp(path, "/") == 0)
return 0;
if (strcmp(path, "/proc") == 0)
return 0;
if (LXCFS_TYPE_PROC(type)) {
up_users();
ret = do_proc_releasedir(path, fi);
down_users();
return ret;
}
if (path && strcmp(path, "/") == 0)
return 0;
lxcfs_error("unknown file type: path=%s, type=%d, fi->fh=%" PRIu64,
path, type, fi->fh);

View File

@@ -136,6 +136,11 @@ __lxcfs_fuse_ops int proc_getattr(const char *path, struct stat *sb)
sb->st_nlink = 2;
return 0;
}
if (strcmp(path, "/proc/pressure") == 0) {
sb->st_mode = S_IFDIR | 00555;
sb->st_nlink = 2;
return 0;
}
if (strcmp(path, "/proc/meminfo") == 0 ||
strcmp(path, "/proc/cpuinfo") == 0 ||
@@ -156,6 +161,21 @@ __lxcfs_fuse_ops int proc_getattr(const char *path, struct stat *sb)
sb->st_nlink = 1;
return 0;
}
if (strcmp(path, "/proc/pressure/io") == 0 ||
strcmp(path, "/proc/pressure/cpu") == 0 ||
strcmp(path, "/proc/pressure/memory") == 0) {
if (liblxcfs_functional()) {
if (!can_access_personality())
return log_error(-EACCES, RESTRICTED_PERSONALITY_ACCESS_POLICY);
sb->st_size = get_procfile_size_with_personality(path);
}
else
sb->st_size = get_procfile_size(path);
/* TODO: read-only now, will be writable after monitoring support */
sb->st_mode = S_IFREG | 00444;
sb->st_nlink = 1;
return 0;
}
return -ENOENT;
}
@@ -164,17 +184,30 @@ __lxcfs_fuse_ops int proc_readdir(const char *path, void *buf,
fuse_fill_dir_t filler, off_t offset,
struct fuse_file_info *fi)
{
if (dir_filler(filler, buf, ".", 0) != 0 ||
dir_filler(filler, buf, "..", 0) != 0 ||
dir_filler(filler, buf, "cpuinfo", 0) != 0 ||
dir_filler(filler, buf, "meminfo", 0) != 0 ||
dir_filler(filler, buf, "stat", 0) != 0 ||
dir_filler(filler, buf, "uptime", 0) != 0 ||
dir_filler(filler, buf, "diskstats", 0) != 0 ||
dir_filler(filler, buf, "swaps", 0) != 0 ||
dir_filler(filler, buf, "loadavg", 0) != 0 ||
dir_filler(filler, buf, "slabinfo", 0) != 0)
return -EINVAL;
if (strcmp(path, "/proc") == 0) {
if (dir_filler(filler, buf, ".", 0) != 0 ||
dir_filler(filler, buf, "..", 0) != 0 ||
dir_filler(filler, buf, "cpuinfo", 0) != 0 ||
dir_filler(filler, buf, "meminfo", 0) != 0 ||
dir_filler(filler, buf, "stat", 0) != 0 ||
dir_filler(filler, buf, "uptime", 0) != 0 ||
dir_filler(filler, buf, "diskstats", 0) != 0 ||
dir_filler(filler, buf, "swaps", 0) != 0 ||
dir_filler(filler, buf, "loadavg", 0) != 0 ||
dir_filler(filler, buf, "slabinfo", 0) != 0 ||
dirent_filler(filler, path, "pressure", buf, 0) != 0)
return -EINVAL;
return 0;
}
if (strcmp(path, "/proc/pressure") == 0) {
if (dir_filler(filler, buf, ".", 0) != 0 ||
dir_filler(filler, buf, "..", 0) != 0 ||
dir_filler(filler, buf, "io", 0) != 0 ||
dir_filler(filler, buf, "cpu", 0) != 0 ||
dir_filler(filler, buf, "memory", 0) != 0)
return -EINVAL;
return 0;
}
return 0;
}
@@ -200,6 +233,12 @@ __lxcfs_fuse_ops int proc_open(const char *path, struct fuse_file_info *fi)
type = LXC_TYPE_PROC_LOADAVG;
else if (strcmp(path, "/proc/slabinfo") == 0)
type = LXC_TYPE_PROC_SLABINFO;
else if (strcmp(path, "/proc/pressure/io") == 0)
type = LXC_TYPE_PROC_PRESSURE_IO;
else if (strcmp(path, "/proc/pressure/cpu") == 0)
type = LXC_TYPE_PROC_PRESSURE_CPU;
else if (strcmp(path, "/proc/pressure/memory") == 0)
type = LXC_TYPE_PROC_PRESSURE_MEMORY;
if (type == -1)
return -ENOENT;
@@ -227,10 +266,40 @@ __lxcfs_fuse_ops int proc_open(const char *path, struct fuse_file_info *fi)
return 0;
}
__lxcfs_fuse_ops int proc_opendir(const char *path, struct fuse_file_info *fi)
{
__do_free struct file_info *dir_info = NULL;
int type = -1;
if (!liblxcfs_functional())
return -EIO;
if (strcmp(path, "/proc") == 0)
type = LXC_TYPE_PROC;
else if (strcmp(path, "/proc/pressure") == 0)
type = LXC_TYPE_PROC_PRESSURE;
if (type == -1)
return -ENOENT;
dir_info = zalloc(sizeof(*dir_info));
if (!dir_info)
return -ENOMEM;
dir_info->type = type;
dir_info->buf = NULL;
dir_info->file = NULL;
dir_info->buflen = 0;
fi->fh = PTR_TO_UINT64(move_ptr(dir_info));
return 0;
}
__lxcfs_fuse_ops int proc_access(const char *path, int mask)
{
if (strcmp(path, "/proc") == 0 && access(path, R_OK) == 0)
return 0;
if (strcmp(path, "/proc/pressure") == 0 && access(path, R_OK) == 0)
return 0;
/* these are all read-only */
if ((mask & ~R_OK) != 0)
@@ -245,6 +314,12 @@ __lxcfs_fuse_ops int proc_release(const char *path, struct fuse_file_info *fi)
return 0;
}
__lxcfs_fuse_ops int proc_releasedir(const char *path, struct fuse_file_info *fi)
{
do_release_file_info(fi);
return 0;
}
/**
* Gets a non-hierarchical memory controller limit, or UINT64_MAX if no limit is
* in place. If `swap` is true, reads 'swap' (v2) or 'memsw' (v1); otherwise
@@ -1599,6 +1674,213 @@ static int proc_slabinfo_read(char *buf, size_t size, off_t offset,
return total_len;
}
static int proc_pressure_io_read(char *buf, size_t size, off_t offset,
struct fuse_file_info *fi)
{
__do_free char *cgroup = NULL, *line = NULL;
__do_free void *fopen_cache = NULL;
__do_fclose FILE *f = NULL;
__do_close int fd = -EBADF;
struct fuse_context *fc = fuse_get_context();
struct file_info *d = INTTYPE_TO_PTR(fi->fh);
size_t linelen = 0, total_len = 0;
char *cache = d->buf;
size_t cache_size = d->buflen;
pid_t initpid;
if (offset) {
size_t left;
if (offset > d->size)
return -EINVAL;
if (!d->cached)
return 0;
left = d->size - offset;
total_len = left > size ? size : left;
memcpy(buf, cache + offset, total_len);
return total_len;
}
initpid = lookup_initpid_in_store(fc->pid);
if (initpid <= 1 || is_shared_pidns(initpid))
initpid = fc->pid;
cgroup = get_pid_cgroup(initpid, "blkio");
if (!cgroup)
return read_file_fuse("/proc/pressure/io", buf, size, d);
prune_init_slice(cgroup);
fd = cgroup_ops->get_pressure_io_fd(cgroup_ops, cgroup);
if (fd < 0)
return read_file_fuse("/proc/pressure/io", buf, size, d);
f = fdopen_cached(fd, "re", &fopen_cache);
if (!f)
return read_file_fuse("/proc/pressure/io", buf, size, d);
while (getline(&line, &linelen, f) != -1) {
ssize_t l = snprintf(cache, cache_size, "%s", line);
if (l < 0)
return log_error(0, "Failed to write cache");
if ((size_t)l >= cache_size)
return log_error(0, "Write to cache was truncated");
cache += l;
cache_size -= l;
total_len += l;
}
d->cached = 1;
d->size = total_len;
if (total_len > size)
total_len = size;
memcpy(buf, d->buf, total_len);
return total_len;
}
static int proc_pressure_cpu_read(char *buf, size_t size, off_t offset,
struct fuse_file_info *fi)
{
__do_free char *cgroup = NULL, *line = NULL;
__do_free void *fopen_cache = NULL;
__do_fclose FILE *f = NULL;
__do_close int fd = -EBADF;
struct fuse_context *fc = fuse_get_context();
struct file_info *d = INTTYPE_TO_PTR(fi->fh);
size_t linelen = 0, total_len = 0;
char *cache = d->buf;
size_t cache_size = d->buflen;
pid_t initpid;
if (offset) {
size_t left;
if (offset > d->size)
return -EINVAL;
if (!d->cached)
return 0;
left = d->size - offset;
total_len = left > size ? size : left;
memcpy(buf, cache + offset, total_len);
return total_len;
}
initpid = lookup_initpid_in_store(fc->pid);
if (initpid <= 1 || is_shared_pidns(initpid))
initpid = fc->pid;
cgroup = get_pid_cgroup(initpid, "cpu");
if (!cgroup)
return read_file_fuse("/proc/pressure/cpu", buf, size, d);
prune_init_slice(cgroup);
fd = cgroup_ops->get_pressure_cpu_fd(cgroup_ops, cgroup);
if (fd < 0)
return read_file_fuse("/proc/pressure/cpu", buf, size, d);
f = fdopen_cached(fd, "re", &fopen_cache);
if (!f)
return read_file_fuse("/proc/pressure/cpu", buf, size, d);
while (getline(&line, &linelen, f) != -1) {
ssize_t l = snprintf(cache, cache_size, "%s", line);
if (l < 0)
return log_error(0, "Failed to write cache");
if ((size_t)l >= cache_size)
return log_error(0, "Write to cache was truncated");
cache += l;
cache_size -= l;
total_len += l;
}
d->cached = 1;
d->size = total_len;
if (total_len > size)
total_len = size;
memcpy(buf, d->buf, total_len);
return total_len;
}
static int proc_pressure_memory_read(char *buf, size_t size, off_t offset,
struct fuse_file_info *fi)
{
__do_free char *cgroup = NULL, *line = NULL;
__do_free void *fopen_cache = NULL;
__do_fclose FILE *f = NULL;
__do_close int fd = -EBADF;
struct fuse_context *fc = fuse_get_context();
struct file_info *d = INTTYPE_TO_PTR(fi->fh);
size_t linelen = 0, total_len = 0;
char *cache = d->buf;
size_t cache_size = d->buflen;
pid_t initpid;
if (offset) {
size_t left;
if (offset > d->size)
return -EINVAL;
if (!d->cached)
return 0;
left = d->size - offset;
total_len = left > size ? size : left;
memcpy(buf, cache + offset, total_len);
return total_len;
}
initpid = lookup_initpid_in_store(fc->pid);
if (initpid <= 1 || is_shared_pidns(initpid))
initpid = fc->pid;
cgroup = get_pid_cgroup(initpid, "memory");
if (!cgroup)
return read_file_fuse("/proc/pressure/memory", buf, size, d);
prune_init_slice(cgroup);
fd = cgroup_ops->get_pressure_memory_fd(cgroup_ops, cgroup);
if (fd < 0)
return read_file_fuse("/proc/pressure/memory", buf, size, d);
f = fdopen_cached(fd, "re", &fopen_cache);
if (!f)
return read_file_fuse("/proc/pressure/memory", buf, size, d);
while (getline(&line, &linelen, f) != -1) {
ssize_t l = snprintf(cache, cache_size, "%s", line);
if (l < 0)
return log_error(0, "Failed to write cache");
if ((size_t)l >= cache_size)
return log_error(0, "Write to cache was truncated");
cache += l;
cache_size -= l;
total_len += l;
}
d->cached = 1;
d->size = total_len;
if (total_len > size)
total_len = size;
memcpy(buf, d->buf, total_len);
return total_len;
}
static int proc_read_with_personality(int (*do_proc_read)(char *, size_t, off_t,
struct fuse_file_info *), char *buf, size_t size, off_t offset,
struct fuse_file_info *fi)
@@ -1696,6 +1978,24 @@ __lxcfs_fuse_ops int proc_read(const char *path, char *buf, size_t size,
return read_file_fuse_with_offset(LXC_TYPE_PROC_SLABINFO_PATH,
buf, size, offset, f);
case LXC_TYPE_PROC_PRESSURE_IO:
if (liblxcfs_functional())
return proc_pressure_io_read(buf, size, offset, fi);
return read_file_fuse_with_offset(LXC_TYPE_PROC_PRESSURE_IO_PATH,
buf, size, offset, f);
case LXC_TYPE_PROC_PRESSURE_CPU:
if (liblxcfs_functional())
return proc_pressure_cpu_read(buf, size, offset, fi);
return read_file_fuse_with_offset(LXC_TYPE_PROC_PRESSURE_CPU_PATH,
buf, size, offset, f);
case LXC_TYPE_PROC_PRESSURE_MEMORY:
if (liblxcfs_functional())
return proc_pressure_memory_read(buf, size, offset, fi);
return read_file_fuse_with_offset(LXC_TYPE_PROC_PRESSURE_MEMORY_PATH,
buf, size, offset, f);
}
return -EINVAL;

View File

@@ -18,8 +18,10 @@
__visible extern int proc_getattr(const char *path, struct stat *sb);
__visible extern int proc_readdir(const char *path, void *buf, fuse_fill_dir_t filler, off_t offset, struct fuse_file_info *fi);
__visible extern int proc_open(const char *path, struct fuse_file_info *fi);
__visible extern int proc_opendir(const char *path, struct fuse_file_info *fi);
__visible extern int proc_access(const char *path, int mask);
__visible extern int proc_read(const char *path, char *buf, size_t size, off_t offset, struct fuse_file_info *fi);
__visible extern int proc_release(const char *path, struct fuse_file_info *fi);
__visible extern int proc_releasedir(const char *path, struct fuse_file_info *fi);
#endif /* __LXCFS_PROC_FUSE_H */