#define _GNU_SOURCE
#include <linux/limits.h>
#include <unistd.h>
#include <stdio.h>
#include <signal.h>
#include <sys/sysinfo.h>
#include <string.h>
#include <sys/wait.h>
#include <sys/mman.h>
#include "kselftest.h"
#include "cgroup_util.h"
static int read_int(const char *path, size_t *value)
{
FILE *file;
int ret = 0;
file = fopen(path, "r");
if (!file)
return -1;
if (fscanf(file, "%ld", value) != 1)
ret = -1;
fclose(file);
return ret;
}
static int set_min_free_kb(size_t value)
{
FILE *file;
int ret;
file = fopen("/proc/sys/vm/min_free_kbytes", "w");
if (!file)
return -1;
ret = fprintf(file, "%ld\n", value);
fclose(file);
return ret;
}
static int read_min_free_kb(size_t *value)
{
return read_int("/proc/sys/vm/min_free_kbytes", value);
}
static int get_zswap_stored_pages(size_t *value)
{
return read_int("/sys/kernel/debug/zswap/stored_pages", value);
}
static long get_cg_wb_count(const char *cg)
{
return cg_read_key_long(cg, "memory.stat", "zswpwb");
}
static long get_zswpout(const char *cgroup)
{
return cg_read_key_long(cgroup, "memory.stat", "zswpout ");
}
static int allocate_and_read_bytes(const char *cgroup, void *arg)
{
size_t size = (size_t)arg;
char *mem = (char *)malloc(size);
int ret = 0;
if (!mem)
return -1;
for (int i = 0; i < size; i += 4095)
mem[i] = 'a';
for (int i = 0; i < size; i += 4095) {
if (mem[i] != 'a')
ret = -1;
}
free(mem);
return ret;
}
static int allocate_bytes(const char *cgroup, void *arg)
{
size_t size = (size_t)arg;
char *mem = (char *)malloc(size);
if (!mem)
return -1;
for (int i = 0; i < size; i += 4095)
mem[i] = 'a';
free(mem);
return 0;
}
static char *setup_test_group_1M(const char *root, const char *name)
{
char *group_name = cg_name(root, name);
if (!group_name)
return NULL;
if (cg_create(group_name))
goto fail;
if (cg_write(group_name, "memory.max", "1M")) {
cg_destroy(group_name);
goto fail;
}
return group_name;
fail:
free(group_name);
return NULL;
}
static int test_zswap_usage(const char *root)
{
long zswpout_before, zswpout_after;
int ret = KSFT_FAIL;
char *test_group;
test_group = cg_name(root, "no_shrink_test");
if (!test_group)
goto out;
if (cg_create(test_group))
goto out;
if (cg_write(test_group, "memory.max", "1M"))
goto out;
zswpout_before = get_zswpout(test_group);
if (zswpout_before < 0) {
ksft_print_msg("Failed to get zswpout\n");
goto out;
}
if (cg_run(test_group, allocate_bytes, (void *)MB(4)))
goto out;
zswpout_after = get_zswpout(test_group);
if (zswpout_after <= zswpout_before) {
ksft_print_msg("zswpout does not increase after test program\n");
goto out;
}
ret = KSFT_PASS;
out:
cg_destroy(test_group);
free(test_group);
return ret;
}
static int test_swapin_nozswap(const char *root)
{
int ret = KSFT_FAIL;
char *test_group;
long swap_peak, zswpout;
test_group = cg_name(root, "no_zswap_test");
if (!test_group)
goto out;
if (cg_create(test_group))
goto out;
if (cg_write(test_group, "memory.max", "8M"))
goto out;
if (cg_write(test_group, "memory.zswap.max", "0"))
goto out;
if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
goto out;
swap_peak = cg_read_long(test_group, "memory.swap.peak");
if (swap_peak < 0) {
ksft_print_msg("failed to get cgroup's swap_peak\n");
goto out;
}
if (swap_peak < MB(24)) {
ksft_print_msg("at least 24MB of memory should be swapped out\n");
goto out;
}
zswpout = get_zswpout(test_group);
if (zswpout < 0) {
ksft_print_msg("failed to get zswpout\n");
goto out;
}
if (zswpout > 0) {
ksft_print_msg("zswapout > 0 when memory.zswap.max = 0\n");
goto out;
}
ret = KSFT_PASS;
out:
cg_destroy(test_group);
free(test_group);
return ret;
}
static int test_zswapin(const char *root)
{
int ret = KSFT_FAIL;
char *test_group;
long zswpin;
test_group = cg_name(root, "zswapin_test");
if (!test_group)
goto out;
if (cg_create(test_group))
goto out;
if (cg_write(test_group, "memory.max", "8M"))
goto out;
if (cg_write(test_group, "memory.zswap.max", "max"))
goto out;
if (cg_run(test_group, allocate_and_read_bytes, (void *)MB(32)))
goto out;
zswpin = cg_read_key_long(test_group, "memory.stat", "zswpin ");
if (zswpin < 0) {
ksft_print_msg("failed to get zswpin\n");
goto out;
}
if (zswpin < MB(24) / PAGE_SIZE) {
ksft_print_msg("at least 24MB should be brought back from zswap\n");
goto out;
}
ret = KSFT_PASS;
out:
cg_destroy(test_group);
free(test_group);
return ret;
}
static int attempt_writeback(const char *cgroup, void *arg)
{
long pagesize = sysconf(_SC_PAGESIZE);
size_t memsize = MB(4);
char buf[pagesize];
long zswap_usage;
bool wb_enabled = *(bool *) arg;
int ret = -1;
char *mem;
mem = (char *)malloc(memsize);
if (!mem)
return ret;
for (int i = 0; i < pagesize; i++)
buf[i] = i < pagesize/2 ? (char) i : 0;
for (int i = 0; i < memsize; i += pagesize)
memcpy(&mem[i], buf, pagesize);
if (cg_write_numeric(cgroup, "memory.reclaim", memsize)) {
ksft_print_msg("Failed to reclaim all of the requested memory\n");
goto out;
}
zswap_usage = cg_read_long(cgroup, "memory.zswap.current");
for (int i = 0; i < memsize; i += pagesize) {
if (memcmp(&mem[i], buf, pagesize)) {
ksft_print_msg("invalid memory\n");
goto out;
}
}
if (cg_write_numeric(cgroup, "memory.zswap.max", zswap_usage/2))
goto out;
ret = cg_write_numeric(cgroup, "memory.reclaim", memsize);
if (!wb_enabled)
ret = (ret == -EAGAIN) ? 0 : -1;
out:
free(mem);
return ret;
}
static int test_zswap_writeback_one(const char *cgroup, bool wb)
{
long zswpwb_before, zswpwb_after;
zswpwb_before = get_cg_wb_count(cgroup);
if (zswpwb_before != 0) {
ksft_print_msg("zswpwb_before = %ld instead of 0\n", zswpwb_before);
return -1;
}
if (cg_run(cgroup, attempt_writeback, (void *) &wb))
return -1;
zswpwb_after = get_cg_wb_count(cgroup);
if (zswpwb_after < 0)
return -1;
if (wb != !!zswpwb_after) {
ksft_print_msg("zswpwb_after is %ld while wb is %s\n",
zswpwb_after, wb ? "enabled" : "disabled");
return -1;
}
return 0;
}
static int test_zswap_writeback(const char *root, bool wb)
{
int ret = KSFT_FAIL;
char *test_group, *test_group_child = NULL;
if (cg_read_strcmp(root, "memory.zswap.writeback", "1"))
return KSFT_SKIP;
test_group = cg_name(root, "zswap_writeback_test");
if (!test_group)
goto out;
if (cg_create(test_group))
goto out;
if (cg_write(test_group, "memory.zswap.writeback", wb ? "1" : "0"))
goto out;
if (test_zswap_writeback_one(test_group, wb))
goto out;
if (cg_write(test_group, "memory.zswap.max", "max"))
goto out;
if (cg_write(test_group, "cgroup.subtree_control", "+memory"))
goto out;
test_group_child = cg_name(test_group, "zswap_writeback_test_child");
if (!test_group_child)
goto out;
if (cg_create(test_group_child))
goto out;
if (cg_write(test_group_child, "memory.zswap.writeback", "1"))
goto out;
if (test_zswap_writeback_one(test_group_child, wb))
goto out;
ret = KSFT_PASS;
out:
if (test_group_child) {
cg_destroy(test_group_child);
free(test_group_child);
}
cg_destroy(test_group);
free(test_group);
return ret;
}
static int test_zswap_writeback_enabled(const char *root)
{
return test_zswap_writeback(root, true);
}
static int test_zswap_writeback_disabled(const char *root)
{
return test_zswap_writeback(root, false);
}
static int test_no_invasive_cgroup_shrink(const char *root)
{
int ret = KSFT_FAIL;
size_t control_allocation_size = MB(10);
char *control_allocation = NULL, *wb_group = NULL, *control_group = NULL;
wb_group = setup_test_group_1M(root, "per_memcg_wb_test1");
if (!wb_group)
return KSFT_FAIL;
if (cg_write(wb_group, "memory.zswap.max", "10K"))
goto out;
control_group = setup_test_group_1M(root, "per_memcg_wb_test2");
if (!control_group)
goto out;
if (cg_enter_current(control_group))
goto out;
control_allocation = malloc(control_allocation_size);
for (int i = 0; i < control_allocation_size; i += 4095)
control_allocation[i] = 'a';
if (cg_read_key_long(control_group, "memory.stat", "zswapped") < 1)
goto out;
if (cg_run(wb_group, allocate_bytes, (void *)MB(10)))
goto out;
if (get_cg_wb_count(wb_group) > 0 && get_cg_wb_count(control_group) == 0)
ret = KSFT_PASS;
out:
cg_enter_current(root);
if (control_group) {
cg_destroy(control_group);
free(control_group);
}
cg_destroy(wb_group);
free(wb_group);
if (control_allocation)
free(control_allocation);
return ret;
}
struct no_kmem_bypass_child_args {
size_t target_alloc_bytes;
size_t child_allocated;
};
static int no_kmem_bypass_child(const char *cgroup, void *arg)
{
struct no_kmem_bypass_child_args *values = arg;
void *allocation;
allocation = malloc(values->target_alloc_bytes);
if (!allocation) {
values->child_allocated = true;
return -1;
}
for (long i = 0; i < values->target_alloc_bytes; i += 4095)
((char *)allocation)[i] = 'a';
values->child_allocated = true;
pause();
free(allocation);
return 0;
}
static int test_no_kmem_bypass(const char *root)
{
size_t min_free_kb_high, min_free_kb_low, min_free_kb_original;
struct no_kmem_bypass_child_args *values;
size_t trigger_allocation_size;
int wait_child_iteration = 0;
long stored_pages_threshold;
struct sysinfo sys_info;
int ret = KSFT_FAIL;
int child_status;
char *test_group = NULL;
pid_t child_pid;
if (sysinfo(&sys_info) != 0)
return KSFT_FAIL;
if (sys_info.totalram > 5000000000)
return KSFT_SKIP;
values = mmap(0, sizeof(struct no_kmem_bypass_child_args), PROT_READ |
PROT_WRITE, MAP_SHARED | MAP_ANONYMOUS, -1, 0);
if (values == MAP_FAILED)
return KSFT_FAIL;
if (read_min_free_kb(&min_free_kb_original))
return KSFT_FAIL;
min_free_kb_high = sys_info.totalram / 2000;
min_free_kb_low = sys_info.totalram / 500000;
values->target_alloc_bytes = (sys_info.totalram - min_free_kb_high * 1000) +
sys_info.totalram * 5 / 100;
stored_pages_threshold = sys_info.totalram / 5 / 4096;
trigger_allocation_size = sys_info.totalram / 20;
test_group = cg_name(root, "kmem_bypass_test");
if (!test_group)
goto out;
set_min_free_kb(min_free_kb_low);
if (cg_create(test_group))
goto out;
values->child_allocated = false;
child_pid = cg_run_nowait(test_group, no_kmem_bypass_child, values);
if (child_pid < 0)
goto out;
while (!values->child_allocated && wait_child_iteration++ < 10000)
usleep(1000);
set_min_free_kb(min_free_kb_high);
for (int i = 0; i < 20; i++) {
size_t stored_pages;
char *trigger_allocation = malloc(trigger_allocation_size);
if (!trigger_allocation)
break;
for (int i = 0; i < trigger_allocation_size; i += 4095)
trigger_allocation[i] = 'b';
usleep(100000);
free(trigger_allocation);
if (get_zswap_stored_pages(&stored_pages))
break;
if (stored_pages < 0)
break;
if (stored_pages > stored_pages_threshold) {
int zswapped = cg_read_key_long(test_group, "memory.stat", "zswapped ");
int delta = stored_pages * 4096 - zswapped;
int result_ok = delta < stored_pages * 4096 / 4;
ret = result_ok ? KSFT_PASS : KSFT_FAIL;
break;
}
}
kill(child_pid, SIGTERM);
waitpid(child_pid, &child_status, 0);
out:
set_min_free_kb(min_free_kb_original);
cg_destroy(test_group);
free(test_group);
return ret;
}
#define T(x) { x, #x }
struct zswap_test {
int (*fn)(const char *root);
const char *name;
} tests[] = {
T(test_zswap_usage),
T(test_swapin_nozswap),
T(test_zswapin),
T(test_zswap_writeback_enabled),
T(test_zswap_writeback_disabled),
T(test_no_kmem_bypass),
T(test_no_invasive_cgroup_shrink),
};
#undef T
static bool zswap_configured(void)
{
return access("/sys/module/zswap", F_OK) == 0;
}
int main(int argc, char **argv)
{
char root[PATH_MAX];
int i;
ksft_print_header();
ksft_set_plan(ARRAY_SIZE(tests));
if (cg_find_unified_root(root, sizeof(root), NULL))
ksft_exit_skip("cgroup v2 isn't mounted\n");
if (!zswap_configured())
ksft_exit_skip("zswap isn't configured\n");
if (cg_read_strstr(root, "cgroup.controllers", "memory"))
ksft_exit_skip("memory controller isn't available\n");
if (cg_read_strstr(root, "cgroup.subtree_control", "memory"))
if (cg_write(root, "cgroup.subtree_control", "+memory"))
ksft_exit_skip("Failed to set memory controller\n");
for (i = 0; i < ARRAY_SIZE(tests); i++) {
switch (tests[i].fn(root)) {
case KSFT_PASS:
ksft_test_result_pass("%s\n", tests[i].name);
break;
case KSFT_SKIP:
ksft_test_result_skip("%s\n", tests[i].name);
break;
default:
ksft_test_result_fail("%s\n", tests[i].name);
break;
}
}
ksft_finished();
}