#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/module.h>
#include <linux/time.h>
#include <linux/perf_event.h>
#include <linux/completion.h>
#include <linux/sched.h>
#include <linux/workqueue.h>
#include <linux/kthread.h>
#include "time_bench.h"
static int verbose = 1;
#define PERF_FORMAT \
(PERF_FORMAT_GROUP | PERF_FORMAT_ID | PERF_FORMAT_TOTAL_TIME_ENABLED | \
PERF_FORMAT_TOTAL_TIME_RUNNING)
struct raw_perf_event {
uint64_t config;
uint64_t config1;
struct perf_event *save;
char *desc;
};
struct raw_perf_event perf_events[] = {
{ 0x3c, 0x00, NULL, "Unhalted CPU Cycles" },
{ 0xc0, 0x00, NULL, "Instruction Retired" }
};
#define NUM_EVTS (ARRAY_SIZE(perf_events))
bool time_bench_PMU_config(bool enable)
{
int i;
struct perf_event_attr perf_conf;
struct perf_event *perf_event;
int cpu;
preempt_disable();
cpu = smp_processor_id();
pr_info("DEBUG: cpu:%d\n", cpu);
preempt_enable();
memset(&perf_conf, 0, sizeof(struct perf_event_attr));
perf_conf.type = PERF_TYPE_RAW;
perf_conf.size = sizeof(struct perf_event_attr);
perf_conf.read_format = PERF_FORMAT;
perf_conf.pinned = 1;
perf_conf.exclude_user = 1;
perf_conf.exclude_kernel = 0;
for (i = 0; i < NUM_EVTS; i++) {
perf_conf.disabled = enable;
perf_conf.config = perf_events[i].config;
perf_conf.config1 = perf_events[i].config1;
if (verbose)
pr_info("%s() enable PMU counter: %s\n",
__func__, perf_events[i].desc);
perf_event = perf_event_create_kernel_counter(&perf_conf, cpu,
NULL ,
NULL ,
NULL );
if (perf_event) {
perf_events[i].save = perf_event;
pr_info("%s():DEBUG perf_event success\n", __func__);
perf_event_enable(perf_event);
} else {
pr_info("%s():DEBUG perf_event is NULL\n", __func__);
}
}
return true;
}
bool time_bench_calc_stats(struct time_bench_record *rec)
{
#define NANOSEC_PER_SEC 1000000000
uint64_t ns_per_call_tmp_rem = 0;
uint32_t ns_per_call_remainder = 0;
uint64_t pmc_ipc_tmp_rem = 0;
uint32_t pmc_ipc_remainder = 0;
uint32_t pmc_ipc_div = 0;
uint32_t invoked_cnt_precision = 0;
uint32_t invoked_cnt = 0;
if (rec->flags & TIME_BENCH_LOOP) {
if (rec->invoked_cnt < 1000) {
pr_err("ERR: need more(>1000) loops(%llu) for timing\n",
rec->invoked_cnt);
return false;
}
if (rec->invoked_cnt > ((1ULL << 32) - 1)) {
pr_err("ERR: Invoke cnt(%llu) too big overflow 32bit\n",
rec->invoked_cnt);
return false;
}
invoked_cnt = (uint32_t)rec->invoked_cnt;
}
if (rec->flags & TIME_BENCH_TSC) {
rec->tsc_interval = rec->tsc_stop - rec->tsc_start;
if (rec->tsc_interval == 0) {
pr_err("ABORT: timing took ZERO TSC time\n");
return false;
}
if (rec->flags & TIME_BENCH_LOOP)
rec->tsc_cycles = rec->tsc_interval / invoked_cnt;
else
rec->tsc_cycles = rec->tsc_interval;
}
if (rec->flags & TIME_BENCH_WALLCLOCK) {
rec->time_start = rec->ts_start.tv_nsec +
(NANOSEC_PER_SEC * rec->ts_start.tv_sec);
rec->time_stop = rec->ts_stop.tv_nsec +
(NANOSEC_PER_SEC * rec->ts_stop.tv_sec);
rec->time_interval = rec->time_stop - rec->time_start;
if (rec->time_interval == 0) {
pr_err("ABORT: timing took ZERO wallclock time\n");
return false;
}
rec->time_sec = div_u64_rem(rec->time_interval, NANOSEC_PER_SEC,
&rec->time_sec_remainder);
if (rec->flags & TIME_BENCH_LOOP) {
rec->ns_per_call_quotient =
div_u64_rem(rec->time_interval, invoked_cnt,
&ns_per_call_remainder);
ns_per_call_tmp_rem = ns_per_call_remainder;
invoked_cnt_precision = invoked_cnt / 1000;
if (invoked_cnt_precision > 0) {
rec->ns_per_call_decimal =
div_u64_rem(ns_per_call_tmp_rem,
invoked_cnt_precision,
&ns_per_call_remainder);
}
}
}
if (rec->flags & TIME_BENCH_PMU) {
rec->pmc_inst = rec->pmc_inst_stop - rec->pmc_inst_start;
rec->pmc_clk = rec->pmc_clk_stop - rec->pmc_clk_start;
rec->pmc_ipc_quotient = div_u64_rem(rec->pmc_inst, rec->pmc_clk,
&pmc_ipc_remainder);
pmc_ipc_tmp_rem = pmc_ipc_remainder;
pmc_ipc_div = rec->pmc_clk / 1000;
if (pmc_ipc_div > 0) {
rec->pmc_ipc_decimal = div_u64_rem(pmc_ipc_tmp_rem,
pmc_ipc_div,
&pmc_ipc_remainder);
}
}
return true;
}
bool time_bench_loop(uint32_t loops, int step, char *txt, void *data,
int (*func)(struct time_bench_record *record, void *data))
{
struct time_bench_record rec;
memset(&rec, 0, sizeof(rec));
rec.version_abi = 1;
rec.loops = loops;
rec.step = step;
rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC | TIME_BENCH_WALLCLOCK);
if (!func(&rec, data)) {
pr_err("ABORT: function being timed failed\n");
return false;
}
if (rec.invoked_cnt < loops)
pr_warn("WARNING: Invoke count(%llu) smaller than loops(%d)\n",
rec.invoked_cnt, loops);
time_bench_calc_stats(&rec);
pr_info("Type:%s Per elem: %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
txt, rec.tsc_cycles, rec.ns_per_call_quotient,
rec.ns_per_call_decimal, rec.step, rec.time_sec,
rec.time_sec_remainder, rec.time_interval, rec.invoked_cnt,
rec.tsc_interval);
if (rec.flags & TIME_BENCH_PMU)
pr_info("Type:%s PMU inst/clock%llu/%llu = %llu.%03llu IPC (inst per cycle)\n",
txt, rec.pmc_inst, rec.pmc_clk, rec.pmc_ipc_quotient,
rec.pmc_ipc_decimal);
return true;
}
static int invoke_test_on_cpu_func(void *private)
{
struct time_bench_cpu *cpu = private;
struct time_bench_sync *sync = cpu->sync;
cpumask_t newmask = CPU_MASK_NONE;
void *data = cpu->data;
cpumask_set_cpu(cpu->rec.cpu, &newmask);
set_cpus_allowed_ptr(current, &newmask);
atomic_inc(&sync->nr_tests_running);
wait_for_completion(&sync->start_event);
if (!cpu->bench_func(&cpu->rec, data)) {
pr_err("ERROR: function being timed failed on CPU:%d(%d)\n",
cpu->rec.cpu, smp_processor_id());
} else {
if (verbose)
pr_info("SUCCESS: ran on CPU:%d(%d)\n", cpu->rec.cpu,
smp_processor_id());
}
cpu->did_bench_run = true;
atomic_dec(&sync->nr_tests_running);
while (!kthread_should_stop()) {
set_current_state(TASK_INTERRUPTIBLE);
schedule();
}
__set_current_state(TASK_RUNNING);
return 0;
}
void time_bench_print_stats_cpumask(const char *desc,
struct time_bench_cpu *cpu_tasks,
const struct cpumask *mask)
{
uint64_t average = 0;
int cpu;
int step = 0;
struct sum {
uint64_t tsc_cycles;
int records;
} sum = { 0 };
for_each_cpu(cpu, mask) {
struct time_bench_cpu *c = &cpu_tasks[cpu];
struct time_bench_record *rec = &c->rec;
time_bench_calc_stats(rec);
pr_info("Type:%s CPU(%d) %llu cycles(tsc) %llu.%03llu ns (step:%d) - (measurement period time:%llu.%09u sec time_interval:%llu) - (invoke count:%llu tsc_interval:%llu)\n",
desc, cpu, rec->tsc_cycles, rec->ns_per_call_quotient,
rec->ns_per_call_decimal, rec->step, rec->time_sec,
rec->time_sec_remainder, rec->time_interval,
rec->invoked_cnt, rec->tsc_interval);
sum.records++;
sum.tsc_cycles += rec->tsc_cycles;
step = rec->step;
}
if (sum.records)
average = sum.tsc_cycles / sum.records;
pr_info("Sum Type:%s Average: %llu cycles(tsc) CPUs:%d step:%d\n", desc,
average, sum.records, step);
}
void time_bench_run_concurrent(uint32_t loops, int step, void *data,
const struct cpumask *mask,
struct time_bench_sync *sync,
struct time_bench_cpu *cpu_tasks,
int (*func)(struct time_bench_record *record, void *data))
{
int cpu, running = 0;
if (verbose)
pr_warn("%s() Started on CPU:%d\n", __func__,
smp_processor_id());
atomic_set(&sync->nr_tests_running, 0);
init_completion(&sync->start_event);
for_each_cpu(cpu, mask) {
struct time_bench_cpu *c = &cpu_tasks[cpu];
running++;
c->sync = sync;
c->data = data;
memset(&c->rec, 0, sizeof(struct time_bench_record));
c->rec.version_abi = 1;
c->rec.loops = loops;
c->rec.step = step;
c->rec.flags = (TIME_BENCH_LOOP | TIME_BENCH_TSC |
TIME_BENCH_WALLCLOCK);
c->rec.cpu = cpu;
c->bench_func = func;
c->task = kthread_run(invoke_test_on_cpu_func, c,
"time_bench%d", cpu);
if (IS_ERR(c->task)) {
pr_err("%s(): Failed to start test func\n", __func__);
return;
}
}
while (atomic_read(&sync->nr_tests_running) < running) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(10);
}
complete_all(&sync->start_event);
while (atomic_read(&sync->nr_tests_running)) {
set_current_state(TASK_UNINTERRUPTIBLE);
schedule_timeout(10);
}
for_each_cpu(cpu, mask) {
struct time_bench_cpu *c = &cpu_tasks[cpu];
kthread_stop(c->task);
}
if (verbose)
pr_warn("%s() Finished on CPU:%d\n", __func__,
smp_processor_id());
}