root/tools/testing/selftests/pidfd/pidfd_test.c
/* SPDX-License-Identifier: GPL-2.0 */

#define _GNU_SOURCE
#include <errno.h>
#include <fcntl.h>
#include <linux/types.h>
#include <pthread.h>
#include <sched.h>
#include <signal.h>
#include <stdio.h>
#include <stdbool.h>
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
#include <sys/epoll.h>
#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/wait.h>
#include <time.h>
#include <unistd.h>

#include "pidfd.h"
#include "kselftest.h"

#define str(s) _str(s)
#define _str(s) #s
#define CHILD_THREAD_MIN_WAIT 3 /* seconds */

#define MAX_EVENTS 5

static bool have_pidfd_send_signal;

static pid_t pidfd_clone(int flags, int *pidfd, int (*fn)(void *))
{
        size_t stack_size = 1024;
        char *stack[1024] = { 0 };

#ifdef __ia64__
        return __clone2(fn, stack, stack_size, flags | SIGCHLD, NULL, pidfd);
#else
        return clone(fn, stack + stack_size, flags | SIGCHLD, NULL, pidfd);
#endif
}

static pthread_t signal_received;

static void set_signal_received_on_sigusr1(int sig)
{
        if (sig == SIGUSR1)
                signal_received = pthread_self();
}

static int send_signal(int pidfd)
{
        int ret = 0;

        if (sys_pidfd_send_signal(pidfd, SIGUSR1, NULL, 0) < 0) {
                ret = -EINVAL;
                goto exit;
        }

        if (signal_received != pthread_self()) {
                ret = -EINVAL;
                goto exit;
        }

exit:
        signal_received = 0;
        return ret;
}

static void *send_signal_worker(void *arg)
{
        int pidfd = (int)(intptr_t)arg;
        int ret;

        /* We forward any errors for the caller to handle. */
        ret = send_signal(pidfd);
        return (void *)(intptr_t)ret;
}

/*
 * Straightforward test to see whether pidfd_send_signal() works is to send
 * a signal to ourself.
 */
static int test_pidfd_send_signal_simple_success(void)
{
        int pidfd;
        const char *test_name = "pidfd_send_signal send SIGUSR1";
        pthread_t thread;
        void *thread_res;
        int err;

        if (!have_pidfd_send_signal) {
                ksft_test_result_skip(
                        "%s test: pidfd_send_signal() syscall not supported\n",
                        test_name);
                return 0;
        }

        signal(SIGUSR1, set_signal_received_on_sigusr1);

        /* Try sending a signal to ourselves via /proc/self. */
        pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC);
        if (pidfd < 0)
                ksft_exit_fail_msg(
                        "%s test: Failed to open process file descriptor\n",
                        test_name);
        err = send_signal(pidfd);
        if (err)
                ksft_exit_fail_msg(
                        "%s test: Error %d on sending pidfd signal\n",
                        test_name, err);
        close(pidfd);

        /* Now try the same thing only using PIDFD_SELF_THREAD_GROUP. */
        err = send_signal(PIDFD_SELF_THREAD_GROUP);
        if (err)
                ksft_exit_fail_msg(
                        "%s test: Error %d on PIDFD_SELF_THREAD_GROUP signal\n",
                        test_name, err);

        /*
         * Now try the same thing in a thread and assert thread ID is equal to
         * worker thread ID.
         */
        if (pthread_create(&thread, NULL, send_signal_worker,
                           (void *)(intptr_t)PIDFD_SELF_THREAD))
                ksft_exit_fail_msg("%s test: Failed to create thread\n",
                                   test_name);
        if (pthread_join(thread, &thread_res))
                ksft_exit_fail_msg("%s test: Failed to join thread\n",
                                   test_name);
        err = (int)(intptr_t)thread_res;
        if (err)
                ksft_exit_fail_msg(
                        "%s test: Error %d on PIDFD_SELF_THREAD signal\n",
                        test_name, err);

        ksft_test_result_pass("%s test: Sent signal\n", test_name);
        return 0;
}

static int test_pidfd_send_signal_exited_fail(void)
{
        int pidfd, ret, saved_errno;
        char buf[256];
        pid_t pid;
        const char *test_name = "pidfd_send_signal signal exited process";

        if (!have_pidfd_send_signal) {
                ksft_test_result_skip(
                        "%s test: pidfd_send_signal() syscall not supported\n",
                        test_name);
                return 0;
        }

        pid = fork();
        if (pid < 0)
                ksft_exit_fail_msg("%s test: Failed to create new process\n",
                                   test_name);

        if (pid == 0)
                _exit(EXIT_SUCCESS);

        snprintf(buf, sizeof(buf), "/proc/%d", pid);

        pidfd = open(buf, O_DIRECTORY | O_CLOEXEC);

        ret = wait_for_pid(pid);
        ksft_print_msg("waitpid WEXITSTATUS=%d\n", ret);

        if (pidfd < 0)
                ksft_exit_fail_msg(
                        "%s test: Failed to open process file descriptor\n",
                        test_name);

        ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0);
        saved_errno = errno;
        close(pidfd);
        if (ret == 0)
                ksft_exit_fail_msg(
                        "%s test: Managed to send signal to process even though it should have failed\n",
                        test_name);

        if (saved_errno != ESRCH)
                ksft_exit_fail_msg(
                        "%s test: Expected to receive ESRCH as errno value but received %d instead\n",
                        test_name, saved_errno);

        ksft_test_result_pass("%s test: Failed to send signal as expected\n",
                              test_name);
        return 0;
}

/*
 * Maximum number of cycles we allow. This is equivalent to PID_MAX_DEFAULT.
 * If users set a higher limit or we have cycled PIDFD_MAX_DEFAULT number of
 * times then we skip the test to not go into an infinite loop or block for a
 * long time.
 */
#define PIDFD_MAX_DEFAULT 0x8000

static int test_pidfd_send_signal_recycled_pid_fail(void)
{
        int i, ret;
        pid_t pid1;
        const char *test_name = "pidfd_send_signal signal recycled pid";

        if (!have_pidfd_send_signal) {
                ksft_test_result_skip(
                        "%s test: pidfd_send_signal() syscall not supported\n",
                        test_name);
                return 0;
        }

        ret = unshare(CLONE_NEWPID);
        if (ret < 0) {
                if (errno == EPERM) {
                        ksft_test_result_skip("%s test: Unsharing pid namespace not permitted\n",
                                              test_name);
                        return 0;
                }
                ksft_exit_fail_msg("%s test: Failed to unshare pid namespace\n",
                                   test_name);
        }

        ret = unshare(CLONE_NEWNS);
        if (ret < 0) {
                if (errno == EPERM) {
                        ksft_test_result_skip("%s test: Unsharing mount namespace not permitted\n",
                                              test_name);
                        return 0;
                }
                ksft_exit_fail_msg("%s test: Failed to unshare mount namespace\n",
                                   test_name);
        }

        ret = mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0);
        if (ret < 0)
                ksft_exit_fail_msg("%s test: Failed to remount / private\n",
                                   test_name);

        /* pid 1 in new pid namespace */
        pid1 = fork();
        if (pid1 < 0)
                ksft_exit_fail_msg("%s test: Failed to create new process\n",
                                   test_name);

        if (pid1 == 0) {
                char buf[256];
                pid_t pid2;
                int pidfd = -1;

                (void)umount2("/proc", MNT_DETACH);
                ret = mount("proc", "/proc", "proc", 0, NULL);
                if (ret < 0)
                        _exit(PIDFD_ERROR);

                /* grab pid PID_RECYCLE */
                for (i = 0; i <= PIDFD_MAX_DEFAULT; i++) {
                        pid2 = fork();
                        if (pid2 < 0)
                                _exit(PIDFD_ERROR);

                        if (pid2 == 0)
                                _exit(PIDFD_PASS);

                        if (pid2 == PID_RECYCLE) {
                                snprintf(buf, sizeof(buf), "/proc/%d", pid2);
                                ksft_print_msg("pid to recycle is %d\n", pid2);
                                pidfd = open(buf, O_DIRECTORY | O_CLOEXEC);
                        }

                        if (wait_for_pid(pid2))
                                _exit(PIDFD_ERROR);

                        if (pid2 >= PID_RECYCLE)
                                break;
                }

                /*
                 * We want to be as predictable as we can so if we haven't been
                 * able to grab pid PID_RECYCLE skip the test.
                 */
                if (pid2 != PID_RECYCLE) {
                        /* skip test */
                        close(pidfd);
                        _exit(PIDFD_SKIP);
                }

                if (pidfd < 0)
                        _exit(PIDFD_ERROR);

                for (i = 0; i <= PIDFD_MAX_DEFAULT; i++) {
                        char c;
                        int pipe_fds[2];
                        pid_t recycled_pid;
                        int child_ret = PIDFD_PASS;

                        ret = pipe2(pipe_fds, O_CLOEXEC);
                        if (ret < 0)
                                _exit(PIDFD_ERROR);

                        recycled_pid = fork();
                        if (recycled_pid < 0)
                                _exit(PIDFD_ERROR);

                        if (recycled_pid == 0) {
                                close(pipe_fds[1]);
                                (void)read(pipe_fds[0], &c, 1);
                                close(pipe_fds[0]);

                                _exit(PIDFD_PASS);
                        }

                        /*
                         * Stop the child so we can inspect whether we have
                         * recycled pid PID_RECYCLE.
                         */
                        close(pipe_fds[0]);
                        ret = kill(recycled_pid, SIGSTOP);
                        close(pipe_fds[1]);
                        if (ret) {
                                (void)wait_for_pid(recycled_pid);
                                _exit(PIDFD_ERROR);
                        }

                        /*
                         * We have recycled the pid. Try to signal it. This
                         * needs to fail since this is a different process than
                         * the one the pidfd refers to.
                         */
                        if (recycled_pid == PID_RECYCLE) {
                                ret = sys_pidfd_send_signal(pidfd, SIGCONT,
                                                            NULL, 0);
                                if (ret && errno == ESRCH)
                                        child_ret = PIDFD_XFAIL;
                                else
                                        child_ret = PIDFD_FAIL;
                        }

                        /* let the process move on */
                        ret = kill(recycled_pid, SIGCONT);
                        if (ret)
                                (void)kill(recycled_pid, SIGKILL);

                        if (wait_for_pid(recycled_pid))
                                _exit(PIDFD_ERROR);

                        switch (child_ret) {
                        case PIDFD_FAIL:
                                /* fallthrough */
                        case PIDFD_XFAIL:
                                _exit(child_ret);
                        case PIDFD_PASS:
                                break;
                        default:
                                /* not reached */
                                _exit(PIDFD_ERROR);
                        }

                        /*
                         * If the user set a custom pid_max limit we could be
                         * in the millions.
                         * Skip the test in this case.
                         */
                        if (recycled_pid > PIDFD_MAX_DEFAULT)
                                _exit(PIDFD_SKIP);
                }

                /* failed to recycle pid */
                _exit(PIDFD_SKIP);
        }

        ret = wait_for_pid(pid1);
        switch (ret) {
        case PIDFD_FAIL:
                ksft_exit_fail_msg(
                        "%s test: Managed to signal recycled pid %d\n",
                        test_name, PID_RECYCLE);
        case PIDFD_PASS:
                ksft_exit_fail_msg("%s test: Failed to recycle pid %d\n",
                                   test_name, PID_RECYCLE);
        case PIDFD_SKIP:
                ksft_test_result_skip("%s test: Skipping test\n", test_name);
                ret = 0;
                break;
        case PIDFD_XFAIL:
                ksft_test_result_pass(
                        "%s test: Failed to signal recycled pid as expected\n",
                        test_name);
                ret = 0;
                break;
        default /* PIDFD_ERROR */:
                ksft_exit_fail_msg("%s test: Error while running tests\n",
                                   test_name);
        }

        return ret;
}

static int test_pidfd_send_signal_syscall_support(void)
{
        int pidfd, ret;
        const char *test_name = "pidfd_send_signal check for support";

        pidfd = open("/proc/self", O_DIRECTORY | O_CLOEXEC);
        if (pidfd < 0)
                ksft_exit_fail_msg(
                        "%s test: Failed to open process file descriptor\n",
                        test_name);

        ret = sys_pidfd_send_signal(pidfd, 0, NULL, 0);
        if (ret < 0) {
                if (errno == ENOSYS) {
                        ksft_test_result_skip(
                                "%s test: pidfd_send_signal() syscall not supported\n",
                                test_name);
                        return 0;
                }
                ksft_exit_fail_msg("%s test: Failed to send signal\n",
                                   test_name);
        }

        have_pidfd_send_signal = true;
        close(pidfd);
        ksft_test_result_pass(
                "%s test: pidfd_send_signal() syscall is supported. Tests can be executed\n",
                test_name);
        return 0;
}

static void *test_pidfd_poll_exec_thread(void *priv)
{
        ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n",
                        getpid(), syscall(SYS_gettid));
        ksft_print_msg("Child Thread: doing exec of sleep\n");

        execl("/bin/sleep", "sleep", str(CHILD_THREAD_MIN_WAIT), (char *)NULL);

        ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n",
                        getpid(), syscall(SYS_gettid));
        return NULL;
}

static void poll_pidfd(const char *test_name, int pidfd)
{
        int c;
        int epoll_fd = epoll_create1(EPOLL_CLOEXEC);
        struct epoll_event event, events[MAX_EVENTS];

        if (epoll_fd == -1)
                ksft_exit_fail_msg("%s test: Failed to create epoll file descriptor "
                                   "(errno %d)\n",
                                   test_name, errno);

        event.events = EPOLLIN;
        event.data.fd = pidfd;

        if (epoll_ctl(epoll_fd, EPOLL_CTL_ADD, pidfd, &event)) {
                ksft_exit_fail_msg("%s test: Failed to add epoll file descriptor "
                                   "(errno %d)\n",
                                   test_name, errno);
        }

        c = epoll_wait(epoll_fd, events, MAX_EVENTS, 5000);
        if (c != 1 || !(events[0].events & EPOLLIN))
                ksft_exit_fail_msg("%s test: Unexpected epoll_wait result (c=%d, events=%x) "
                                   "(errno %d)\n",
                                   test_name, c, events[0].events, errno);

        close(epoll_fd);
        return;

}

static int child_poll_exec_test(void *args)
{
        pthread_t t1;

        ksft_print_msg("Child (pidfd): starting. pid %d tid %ld\n", getpid(),
                        syscall(SYS_gettid));
        pthread_create(&t1, NULL, test_pidfd_poll_exec_thread, NULL);
        /*
         * Exec in the non-leader thread will destroy the leader immediately.
         * If the wait in the parent returns too soon, the test fails.
         */
        while (1)
                sleep(1);

        return 0;
}

static void test_pidfd_poll_exec(int use_waitpid)
{
        int pid, pidfd = 0;
        int status, ret;
        time_t prog_start = time(NULL);
        const char *test_name = "pidfd_poll check for premature notification on child thread exec";

        ksft_print_msg("Parent: pid: %d\n", getpid());
        pid = pidfd_clone(CLONE_PIDFD, &pidfd, child_poll_exec_test);
        if (pid < 0)
                ksft_exit_fail_msg("%s test: pidfd_clone failed (ret %d, errno %d)\n",
                                   test_name, pid, errno);

        ksft_print_msg("Parent: Waiting for Child (%d) to complete.\n", pid);

        if (use_waitpid) {
                ret = waitpid(pid, &status, 0);
                if (ret == -1)
                        ksft_print_msg("Parent: error\n");

                if (ret == pid)
                        ksft_print_msg("Parent: Child process waited for.\n");
        } else {
                poll_pidfd(test_name, pidfd);
        }

        time_t prog_time = time(NULL) - prog_start;

        ksft_print_msg("Time waited for child: %lu\n", prog_time);

        close(pidfd);

        if (prog_time < CHILD_THREAD_MIN_WAIT || prog_time > CHILD_THREAD_MIN_WAIT + 2)
                ksft_exit_fail_msg("%s test: Failed\n", test_name);
        else
                ksft_test_result_pass("%s test: Passed\n", test_name);
}

static void *test_pidfd_poll_leader_exit_thread(void *priv)
{
        ksft_print_msg("Child Thread: starting. pid %d tid %ld ; and sleeping\n",
                        getpid(), syscall(SYS_gettid));
        sleep(CHILD_THREAD_MIN_WAIT);
        ksft_print_msg("Child Thread: DONE. pid %d tid %ld\n", getpid(), syscall(SYS_gettid));
        return NULL;
}

static time_t *child_exit_secs;
static int child_poll_leader_exit_test(void *args)
{
        pthread_t t1, t2;

        ksft_print_msg("Child: starting. pid %d tid %ld\n", getpid(), syscall(SYS_gettid));
        pthread_create(&t1, NULL, test_pidfd_poll_leader_exit_thread, NULL);
        pthread_create(&t2, NULL, test_pidfd_poll_leader_exit_thread, NULL);

        /*
         * glibc exit calls exit_group syscall, so explicitly call exit only
         * so that only the group leader exits, leaving the threads alone.
         */
        *child_exit_secs = time(NULL);
        syscall(SYS_exit, 0);
        /* Never reached, but appeases compiler thinking we should return. */
        exit(0);
}

static void test_pidfd_poll_leader_exit(int use_waitpid)
{
        int pid, pidfd = 0;
        int status, ret = 0;
        const char *test_name = "pidfd_poll check for premature notification on non-empty"
                                "group leader exit";

        child_exit_secs = mmap(NULL, sizeof *child_exit_secs, PROT_READ | PROT_WRITE,
                        MAP_SHARED | MAP_ANONYMOUS, -1, 0);

        if (child_exit_secs == MAP_FAILED)
                ksft_exit_fail_msg("%s test: mmap failed (errno %d)\n",
                                   test_name, errno);

        ksft_print_msg("Parent: pid: %d\n", getpid());
        pid = pidfd_clone(CLONE_PIDFD, &pidfd, child_poll_leader_exit_test);
        if (pid < 0)
                ksft_exit_fail_msg("%s test: pidfd_clone failed (ret %d, errno %d)\n",
                                   test_name, pid, errno);

        ksft_print_msg("Parent: Waiting for Child (%d) to complete.\n", pid);

        if (use_waitpid) {
                ret = waitpid(pid, &status, 0);
                if (ret == -1)
                        ksft_print_msg("Parent: error\n");
        } else {
                /*
                 * This sleep tests for the case where if the child exits, and is in
                 * EXIT_ZOMBIE, but the thread group leader is non-empty, then the poll
                 * doesn't prematurely return even though there are active threads
                 */
                sleep(1);
                poll_pidfd(test_name, pidfd);
        }

        if (ret == pid)
                ksft_print_msg("Parent: Child process waited for.\n");

        time_t since_child_exit = time(NULL) - *child_exit_secs;

        ksft_print_msg("Time since child exit: %lu\n", since_child_exit);

        close(pidfd);

        if (since_child_exit < CHILD_THREAD_MIN_WAIT ||
                        since_child_exit > CHILD_THREAD_MIN_WAIT + 2)
                ksft_exit_fail_msg("%s test: Failed\n", test_name);
        else
                ksft_test_result_pass("%s test: Passed\n", test_name);
}

int main(int argc, char **argv)
{
        ksft_print_header();
        ksft_set_plan(8);

        test_pidfd_poll_exec(0);
        test_pidfd_poll_exec(1);
        test_pidfd_poll_leader_exit(0);
        test_pidfd_poll_leader_exit(1);
        test_pidfd_send_signal_syscall_support();
        test_pidfd_send_signal_simple_success();
        test_pidfd_send_signal_exited_fail();
        test_pidfd_send_signal_recycled_pid_fail();

        ksft_exit_pass();
}