root/tools/perf/tests/switch-tracking.c
// SPDX-License-Identifier: GPL-2.0
#include <sys/time.h>
#include <sys/prctl.h>
#include <errno.h>
#include <limits.h>
#include <time.h>
#include <stdlib.h>
#include <linux/zalloc.h>
#include <linux/err.h>
#include <perf/cpumap.h>
#include <perf/evlist.h>
#include <perf/mmap.h>

#include "debug.h"
#include "parse-events.h"
#include "evlist.h"
#include "evsel.h"
#include "thread_map.h"
#include "record.h"
#include "tests.h"
#include "util/mmap.h"
#include "util/sample.h"
#include "pmus.h"

static int spin_sleep(void)
{
        struct timeval start, now, diff, maxtime;
        struct timespec ts;
        int err, i;

        maxtime.tv_sec = 0;
        maxtime.tv_usec = 50000;

        err = gettimeofday(&start, NULL);
        if (err)
                return err;

        /* Spin for 50ms */
        while (1) {
                for (i = 0; i < 1000; i++)
                        barrier();

                err = gettimeofday(&now, NULL);
                if (err)
                        return err;

                timersub(&now, &start, &diff);
                if (timercmp(&diff, &maxtime, > /* For checkpatch */))
                        break;
        }

        ts.tv_nsec = 50 * 1000 * 1000;
        ts.tv_sec = 0;

        /* Sleep for 50ms */
        err = nanosleep(&ts, NULL);
        if (err == EINTR)
                err = 0;

        return err;
}

struct switch_tracking {
        struct evsel *switch_evsel;
        struct evsel *cycles_evsel;
        pid_t *tids;
        int nr_tids;
        int comm_seen[4];
        int cycles_before_comm_1;
        int cycles_between_comm_2_and_comm_3;
        int cycles_after_comm_4;
};

static int check_comm(struct switch_tracking *switch_tracking,
                      union perf_event *event, const char *comm, int nr)
{
        if (event->header.type == PERF_RECORD_COMM &&
            (pid_t)event->comm.pid == getpid() &&
            (pid_t)event->comm.tid == getpid() &&
            strcmp(event->comm.comm, comm) == 0) {
                if (switch_tracking->comm_seen[nr]) {
                        pr_debug("Duplicate comm event\n");
                        return -1;
                }
                switch_tracking->comm_seen[nr] = 1;
                pr_debug3("comm event: %s nr: %d\n", event->comm.comm, nr);
                return 1;
        }
        return 0;
}

static int check_cpu(struct switch_tracking *switch_tracking, int cpu)
{
        int i, nr = cpu + 1;

        if (cpu < 0)
                return -1;

        if (!switch_tracking->tids) {
                switch_tracking->tids = calloc(nr, sizeof(pid_t));
                if (!switch_tracking->tids)
                        return -1;
                for (i = 0; i < nr; i++)
                        switch_tracking->tids[i] = -1;
                switch_tracking->nr_tids = nr;
                return 0;
        }

        if (cpu >= switch_tracking->nr_tids) {
                void *addr;

                addr = realloc(switch_tracking->tids, nr * sizeof(pid_t));
                if (!addr)
                        return -1;
                switch_tracking->tids = addr;
                for (i = switch_tracking->nr_tids; i < nr; i++)
                        switch_tracking->tids[i] = -1;
                switch_tracking->nr_tids = nr;
                return 0;
        }

        return 0;
}

static int process_sample_event(struct evlist *evlist,
                                union perf_event *event,
                                struct switch_tracking *switch_tracking)
{
        struct perf_sample sample;
        struct evsel *evsel;
        pid_t next_tid, prev_tid;
        int cpu, err;

        perf_sample__init(&sample, /*all=*/false);
        if (evlist__parse_sample(evlist, event, &sample)) {
                pr_debug("evlist__parse_sample failed\n");
                err = -1;
                goto out;
        }

        evsel = evlist__id2evsel(evlist, sample.id);
        if (evsel == switch_tracking->switch_evsel) {
                next_tid = evsel__intval(evsel, &sample, "next_pid");
                prev_tid = evsel__intval(evsel, &sample, "prev_pid");
                cpu = sample.cpu;
                pr_debug3("sched_switch: cpu: %d prev_tid %d next_tid %d\n",
                          cpu, prev_tid, next_tid);
                err = check_cpu(switch_tracking, cpu);
                if (err)
                        goto out;
                /*
                 * Check for no missing sched_switch events i.e. that the
                 * evsel->core.system_wide flag has worked.
                 */
                if (switch_tracking->tids[cpu] != -1 &&
                    switch_tracking->tids[cpu] != prev_tid) {
                        pr_debug("Missing sched_switch events\n");
                        err = -1;
                        goto out;
                }
                switch_tracking->tids[cpu] = next_tid;
        }

        if (evsel == switch_tracking->cycles_evsel) {
                pr_debug3("cycles event\n");
                if (!switch_tracking->comm_seen[0])
                        switch_tracking->cycles_before_comm_1 = 1;
                if (switch_tracking->comm_seen[1] &&
                    !switch_tracking->comm_seen[2])
                        switch_tracking->cycles_between_comm_2_and_comm_3 = 1;
                if (switch_tracking->comm_seen[3])
                        switch_tracking->cycles_after_comm_4 = 1;
        }

        err = 0;
out:
        perf_sample__exit(&sample);
        return err;
}

static int process_event(struct evlist *evlist, union perf_event *event,
                         struct switch_tracking *switch_tracking)
{
        if (event->header.type == PERF_RECORD_SAMPLE)
                return process_sample_event(evlist, event, switch_tracking);

        if (event->header.type == PERF_RECORD_COMM) {
                int err, done = 0;

                err = check_comm(switch_tracking, event, "Test COMM 1", 0);
                if (err < 0)
                        return -1;
                done += err;
                err = check_comm(switch_tracking, event, "Test COMM 2", 1);
                if (err < 0)
                        return -1;
                done += err;
                err = check_comm(switch_tracking, event, "Test COMM 3", 2);
                if (err < 0)
                        return -1;
                done += err;
                err = check_comm(switch_tracking, event, "Test COMM 4", 3);
                if (err < 0)
                        return -1;
                done += err;
                if (done != 1) {
                        pr_debug("Unexpected comm event\n");
                        return -1;
                }
        }

        return 0;
}

struct event_node {
        struct list_head list;
        union perf_event *event;
        u64 event_time;
};

static int add_event(struct evlist *evlist, struct list_head *events,
                     union perf_event *event)
{
        struct perf_sample sample;
        struct event_node *node;

        node = malloc(sizeof(struct event_node));
        if (!node) {
                pr_debug("malloc failed\n");
                return -1;
        }
        node->event = event;
        list_add(&node->list, events);

        if (evlist__parse_sample(evlist, event, &sample)) {
                pr_debug("evlist__parse_sample failed\n");
                return -1;
        }

        if (!sample.time) {
                pr_debug("event with no time\n");
                return -1;
        }

        node->event_time = sample.time;

        return 0;
}

static void free_event_nodes(struct list_head *events)
{
        struct event_node *node;

        while (!list_empty(events)) {
                node = list_entry(events->next, struct event_node, list);
                list_del_init(&node->list);
                free(node);
        }
}

static int compar(const void *a, const void *b)
{
        const struct event_node *nodea = a;
        const struct event_node *nodeb = b;
        s64 cmp = nodea->event_time - nodeb->event_time;

        return cmp < 0 ? -1 : (cmp > 0 ? 1 : 0);
}

static int process_events(struct evlist *evlist,
                          struct switch_tracking *switch_tracking)
{
        union perf_event *event;
        unsigned pos, cnt = 0;
        LIST_HEAD(events);
        struct event_node *events_array, *node;
        struct mmap *md;
        int i, ret;

        for (i = 0; i < evlist->core.nr_mmaps; i++) {
                md = &evlist->mmap[i];
                if (perf_mmap__read_init(&md->core) < 0)
                        continue;

                while ((event = perf_mmap__read_event(&md->core)) != NULL) {
                        cnt += 1;
                        ret = add_event(evlist, &events, event);
                         perf_mmap__consume(&md->core);
                        if (ret < 0)
                                goto out_free_nodes;
                }
                perf_mmap__read_done(&md->core);
        }

        events_array = calloc(cnt, sizeof(struct event_node));
        if (!events_array) {
                pr_debug("calloc failed\n");
                ret = -1;
                goto out_free_nodes;
        }

        pos = 0;
        list_for_each_entry(node, &events, list)
                events_array[pos++] = *node;

        qsort(events_array, cnt, sizeof(struct event_node), compar);

        for (pos = 0; pos < cnt; pos++) {
                ret = process_event(evlist, events_array[pos].event,
                                    switch_tracking);
                if (ret < 0)
                        goto out_free;
        }

        ret = 0;
out_free:
        pr_debug("%u events recorded\n", cnt);
        free(events_array);
out_free_nodes:
        free_event_nodes(&events);
        return ret;
}

/**
 * test__switch_tracking - test using sched_switch and tracking events.
 *
 * This function implements a test that checks that sched_switch events and
 * tracking events can be recorded for a workload (current process) using the
 * evsel->core.system_wide and evsel->tracking flags (respectively) with other events
 * sometimes enabled or disabled.
 */
static int test__switch_tracking(struct test_suite *test __maybe_unused, int subtest __maybe_unused)
{
        const char *sched_switch = "sched:sched_switch";
        const char *cycles = "cpu-cycles:u";
        struct switch_tracking switch_tracking = { .tids = NULL, };
        struct record_opts opts = {
                .mmap_pages          = UINT_MAX,
                .user_freq           = UINT_MAX,
                .user_interval       = ULLONG_MAX,
                .freq                = 4000,
                .target              = {
                        .uses_mmap   = true,
                },
        };
        struct perf_thread_map *threads = NULL;
        struct perf_cpu_map *cpus = NULL;
        struct evlist *evlist = NULL;
        struct evsel *evsel, *cpu_clocks_evsel, *cycles_evsel;
        struct evsel *switch_evsel, *tracking_evsel;
        const char *comm;
        int err = -1;

        threads = thread_map__new_by_tid(getpid());
        if (!threads) {
                pr_debug("thread_map__new failed!\n");
                goto out_err;
        }

        cpus = perf_cpu_map__new_online_cpus();
        if (!cpus) {
                pr_debug("perf_cpu_map__new failed!\n");
                goto out_err;
        }

        evlist = evlist__new();
        if (!evlist) {
                pr_debug("evlist__new failed!\n");
                goto out_err;
        }

        perf_evlist__set_maps(&evlist->core, cpus, threads);

        /* First event */
        err = parse_event(evlist, "cpu-clock:u");
        if (err) {
                pr_debug("Failed to parse event dummy:u\n");
                goto out_err;
        }

        cpu_clocks_evsel = evlist__last(evlist);

        /* Second event */
        err = parse_event(evlist, cycles);
        if (err) {
                pr_debug("Failed to parse event %s\n", cycles);
                goto out_err;
        }

        cycles_evsel = evlist__last(evlist);

        /* Third event */
        if (!evlist__can_select_event(evlist, sched_switch)) {
                pr_debug("No sched_switch\n");
                err = 0;
                goto out;
        }

        switch_evsel = evlist__add_sched_switch(evlist, true);
        if (IS_ERR(switch_evsel)) {
                err = PTR_ERR(switch_evsel);
                pr_debug("Failed to create event %s\n", sched_switch);
                goto out_err;
        }

        switch_evsel->immediate = true;

        /* Test moving an event to the front */
        if (cycles_evsel == evlist__first(evlist)) {
                pr_debug("cycles event already at front");
                goto out_err;
        }
        evlist__to_front(evlist, cycles_evsel);
        if (cycles_evsel != evlist__first(evlist)) {
                pr_debug("Failed to move cycles event to front");
                goto out_err;
        }

        evsel__set_sample_bit(cycles_evsel, CPU);
        evsel__set_sample_bit(cycles_evsel, TIME);

        /* Fourth event */
        err = parse_event(evlist, "dummy:u");
        if (err) {
                pr_debug("Failed to parse event dummy:u\n");
                goto out_err;
        }

        tracking_evsel = evlist__last(evlist);

        evlist__set_tracking_event(evlist, tracking_evsel);

        tracking_evsel->core.attr.freq = 0;
        tracking_evsel->core.attr.sample_period = 1;

        evsel__set_sample_bit(tracking_evsel, TIME);

        /* Config events */
        evlist__config(evlist, &opts, NULL);

        /* Check moved event is still at the front */
        if (cycles_evsel != evlist__first(evlist)) {
                pr_debug("Front event no longer at front");
                goto out_err;
        }

        /* Check tracking event is tracking */
        if (!tracking_evsel->core.attr.mmap || !tracking_evsel->core.attr.comm) {
                pr_debug("Tracking event not tracking\n");
                goto out_err;
        }

        /* Check non-tracking events are not tracking */
        evlist__for_each_entry(evlist, evsel) {
                if (evsel != tracking_evsel) {
                        if (evsel->core.attr.mmap || evsel->core.attr.comm) {
                                pr_debug("Non-tracking event is tracking\n");
                                goto out_err;
                        }
                }
        }

        if (evlist__open(evlist) < 0) {
                pr_debug("Not supported\n");
                err = 0;
                goto out;
        }

        err = evlist__mmap(evlist, UINT_MAX);
        if (err) {
                pr_debug("evlist__mmap failed!\n");
                goto out_err;
        }

        evlist__enable(evlist);

        err = evsel__disable(cpu_clocks_evsel);
        if (err) {
                pr_debug("perf_evlist__disable_event failed!\n");
                goto out_err;
        }

        err = spin_sleep();
        if (err) {
                pr_debug("spin_sleep failed!\n");
                goto out_err;
        }

        comm = "Test COMM 1";
        err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
        if (err) {
                pr_debug("PR_SET_NAME failed!\n");
                goto out_err;
        }

        err = evsel__disable(cycles_evsel);
        if (err) {
                pr_debug("perf_evlist__disable_event failed!\n");
                goto out_err;
        }

        comm = "Test COMM 2";
        err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
        if (err) {
                pr_debug("PR_SET_NAME failed!\n");
                goto out_err;
        }

        err = spin_sleep();
        if (err) {
                pr_debug("spin_sleep failed!\n");
                goto out_err;
        }

        comm = "Test COMM 3";
        err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
        if (err) {
                pr_debug("PR_SET_NAME failed!\n");
                goto out_err;
        }

        err = evsel__enable(cycles_evsel);
        if (err) {
                pr_debug("perf_evlist__disable_event failed!\n");
                goto out_err;
        }

        comm = "Test COMM 4";
        err = prctl(PR_SET_NAME, (unsigned long)comm, 0, 0, 0);
        if (err) {
                pr_debug("PR_SET_NAME failed!\n");
                goto out_err;
        }

        err = spin_sleep();
        if (err) {
                pr_debug("spin_sleep failed!\n");
                goto out_err;
        }

        evlist__disable(evlist);

        switch_tracking.switch_evsel = switch_evsel;
        switch_tracking.cycles_evsel = cycles_evsel;

        err = process_events(evlist, &switch_tracking);

        zfree(&switch_tracking.tids);

        if (err)
                goto out_err;

        /* Check all 4 comm events were seen i.e. that evsel->tracking works */
        if (!switch_tracking.comm_seen[0] || !switch_tracking.comm_seen[1] ||
            !switch_tracking.comm_seen[2] || !switch_tracking.comm_seen[3]) {
                pr_debug("Missing comm events\n");
                goto out_err;
        }

        /* Check cycles event got enabled */
        if (!switch_tracking.cycles_before_comm_1) {
                pr_debug("Missing cycles events\n");
                goto out_err;
        }

        /* Check cycles event got disabled */
        if (switch_tracking.cycles_between_comm_2_and_comm_3) {
                pr_debug("cycles events even though event was disabled\n");
                goto out_err;
        }

        /* Check cycles event got enabled again */
        if (!switch_tracking.cycles_after_comm_4) {
                pr_debug("Missing cycles events\n");
                goto out_err;
        }
out:
        if (evlist) {
                evlist__disable(evlist);
                evlist__delete(evlist);
        }
        perf_cpu_map__put(cpus);
        perf_thread_map__put(threads);

        return err;

out_err:
        err = -1;
        goto out;
}

DEFINE_SUITE_EXCLUSIVE("Track with sched_switch", switch_tracking);