root/tools/perf/bench/find-bit-bench.c
// SPDX-License-Identifier: GPL-2.0
/*
 * Benchmark find_next_bit and related bit operations.
 *
 * Copyright 2020 Google LLC.
 */
#include <stdlib.h>
#include "bench.h"
#include "../util/stat.h"
#include <linux/bitmap.h>
#include <linux/bitops.h>
#include <linux/time64.h>
#include <subcmd/parse-options.h>

static unsigned int outer_iterations = 5;
static unsigned int inner_iterations = 100000;

static const struct option options[] = {
        OPT_UINTEGER('i', "outer-iterations", &outer_iterations,
                "Number of outer iterations used"),
        OPT_UINTEGER('j', "inner-iterations", &inner_iterations,
                "Number of inner iterations used"),
        OPT_END()
};

static const char *const bench_usage[] = {
        "perf bench mem find_bit <options>",
        NULL
};

static unsigned int accumulator;
static unsigned int use_of_val;

static noinline void workload(int val)
{
        use_of_val += val;
        accumulator++;
}

#if defined(__i386__) || defined(__x86_64__)
static bool asm_test_bit(long nr, const unsigned long *addr)
{
        bool oldbit;

        asm volatile("bt %2,%1"
                     : "=@ccc" (oldbit)
                     : "m" (*(unsigned long *)addr), "Ir" (nr) : "memory");

        return oldbit;
}
#else
#define asm_test_bit test_bit
#endif

static int do_for_each_set_bit(unsigned int num_bits)
{
        unsigned long *to_test = bitmap_zalloc(num_bits);
        struct timeval start, end, diff;
        u64 runtime_us;
        struct stats fb_time_stats, tb_time_stats;
        double time_average, time_stddev;
        unsigned int bit, i, j;
        unsigned int set_bits, skip;

        init_stats(&fb_time_stats);
        init_stats(&tb_time_stats);

        for (set_bits = 1; set_bits <= num_bits; set_bits <<= 1) {
                bitmap_zero(to_test, num_bits);
                skip = num_bits / set_bits;
                for (i = 0; i < num_bits; i += skip)
                        __set_bit(i, to_test);

                for (i = 0; i < outer_iterations; i++) {
#ifndef NDEBUG
                        unsigned int old = accumulator;
#endif

                        gettimeofday(&start, NULL);
                        for (j = 0; j < inner_iterations; j++) {
                                for_each_set_bit(bit, to_test, num_bits)
                                        workload(bit);
                        }
                        gettimeofday(&end, NULL);
                        assert(old + (inner_iterations * set_bits) == accumulator);
                        timersub(&end, &start, &diff);
                        runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
                        update_stats(&fb_time_stats, runtime_us);

#ifndef NDEBUG
                        old = accumulator;
#endif
                        gettimeofday(&start, NULL);
                        for (j = 0; j < inner_iterations; j++) {
                                for (bit = 0; bit < num_bits; bit++) {
                                        if (asm_test_bit(bit, to_test))
                                                workload(bit);
                                }
                        }
                        gettimeofday(&end, NULL);
                        assert(old + (inner_iterations * set_bits) == accumulator);
                        timersub(&end, &start, &diff);
                        runtime_us = diff.tv_sec * USEC_PER_SEC + diff.tv_usec;
                        update_stats(&tb_time_stats, runtime_us);
                }

                printf("%d operations %d bits set of %d bits\n",
                        inner_iterations, set_bits, num_bits);
                time_average = avg_stats(&fb_time_stats);
                time_stddev = stddev_stats(&fb_time_stats);
                printf("  Average for_each_set_bit took: %.3f usec (+- %.3f usec)\n",
                        time_average, time_stddev);
                time_average = avg_stats(&tb_time_stats);
                time_stddev = stddev_stats(&tb_time_stats);
                printf("  Average test_bit loop took:    %.3f usec (+- %.3f usec)\n",
                        time_average, time_stddev);

                if (use_of_val == accumulator)  /* Try to avoid compiler tricks. */
                        printf("\n");
        }
        bitmap_free(to_test);
        return 0;
}

int bench_mem_find_bit(int argc, const char **argv)
{
        int err = 0, i;

        argc = parse_options(argc, argv, options, bench_usage, 0);
        if (argc) {
                usage_with_options(bench_usage, options);
                exit(EXIT_FAILURE);
        }

        for (i = 1; i <= 2048; i <<= 1)
                do_for_each_set_bit(i);

        return err;
}