root/tools/perf/dlfilters/dlfilter-show-cycles.c
// SPDX-License-Identifier: GPL-2.0
/*
 * dlfilter-show-cycles.c: Print the number of cycles at the start of each line
 * Copyright (c) 2021, Intel Corporation.
 */
#include <perf/perf_dlfilter.h>
#include <string.h>
#include <stdio.h>

#define MAX_CPU 4096

enum {
        INSTR_CYC,
        BRNCH_CYC,
        OTHER_CYC,
        MAX_ENTRY
};

static __u64 cycles[MAX_CPU][MAX_ENTRY];
static __u64 cycles_rpt[MAX_CPU][MAX_ENTRY];

#define BITS            16
#define TABLESZ         (1 << BITS)
#define TABLEMAX        (TABLESZ / 2)
#define MASK            (TABLESZ - 1)

static struct entry {
        __u32 used;
        __s32 tid;
        __u64 cycles[MAX_ENTRY];
        __u64 cycles_rpt[MAX_ENTRY];
} table[TABLESZ];

static int tid_cnt;

static int event_entry(const char *event)
{
        if (!event)
                return OTHER_CYC;
        if (!strncmp(event, "instructions", 12))
                return INSTR_CYC;
        if (!strncmp(event, "branches", 8))
                return BRNCH_CYC;
        return OTHER_CYC;
}

static struct entry *find_entry(__s32 tid)
{
        __u32 pos = tid & MASK;
        struct entry *e;

        e = &table[pos];
        while (e->used) {
                if (e->tid == tid)
                        return e;
                if (++pos == TABLESZ)
                        pos = 0;
                e = &table[pos];
        }

        if (tid_cnt >= TABLEMAX) {
                fprintf(stderr, "Too many threads\n");
                return NULL;
        }

        tid_cnt += 1;
        e->used = 1;
        e->tid = tid;
        return e;
}

static void add_entry(__s32 tid, int pos, __u64 cnt)
{
        struct entry *e = find_entry(tid);

        if (e)
                e->cycles[pos] += cnt;
}

int filter_event_early(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
{
        __s32 cpu = sample->cpu;
        __s32 tid = sample->tid;
        int pos;

        if (!sample->cyc_cnt)
                return 0;

        pos = event_entry(sample->event);

        if (cpu >= 0 && cpu < MAX_CPU)
                cycles[cpu][pos] += sample->cyc_cnt;
        else if (tid != -1)
                add_entry(tid, pos, sample->cyc_cnt);
        return 0;
}

static void print_vals(__u64 cycles, __u64 delta)
{
        if (delta)
                printf("%10llu %10llu ", (unsigned long long)cycles, (unsigned long long)delta);
        else
                printf("%10llu %10s ", (unsigned long long)cycles, "");
}

int filter_event(void *data, const struct perf_dlfilter_sample *sample, void *ctx)
{
        __s32 cpu = sample->cpu;
        __s32 tid = sample->tid;
        int pos;

        pos = event_entry(sample->event);

        if (cpu >= 0 && cpu < MAX_CPU) {
                print_vals(cycles[cpu][pos], cycles[cpu][pos] - cycles_rpt[cpu][pos]);
                cycles_rpt[cpu][pos] = cycles[cpu][pos];
                return 0;
        }

        if (tid != -1) {
                struct entry *e = find_entry(tid);

                if (e) {
                        print_vals(e->cycles[pos], e->cycles[pos] - e->cycles_rpt[pos]);
                        e->cycles_rpt[pos] = e->cycles[pos];
                        return 0;
                }
        }

        printf("%22s", "");
        return 0;
}

const char *filter_description(const char **long_description)
{
        static char *long_desc = "Cycle counts are accumulated per CPU (or "
                "per thread if CPU is not recorded) from IPC information, and "
                "printed together with the change since the last print, at the "
                "start of each line. Separate counts are kept for branches, "
                "instructions or other events.";

        *long_description = long_desc;
        return "Print the number of cycles at the start of each line";
}