root/tools/tools/tscdrift/tscdrift.c
/*-
 * Copyright (c) 2014 Hudson River Trading LLC
 * Written by: John H. Baldwin <jhb@FreeBSD.org>
 * All rights reserved.
 *
 * Redistribution and use in source and binary forms, with or without
 * modification, are permitted provided that the following conditions
 * are met:
 * 1. Redistributions of source code must retain the above copyright
 *    notice, this list of conditions and the following disclaimer.
 * 2. Redistributions in binary form must reproduce the above copyright
 *    notice, this list of conditions and the following disclaimer in the
 *    documentation and/or other materials provided with the distribution.
 *
 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
 * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
 * SUCH DAMAGE.
 */

#include <sys/param.h>
#include <sys/cpuset.h>
#include <machine/atomic.h>
#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <assert.h>
#include <err.h>
#include <errno.h>
#include <math.h>
#include <pthread.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>

#define barrier()       __asm __volatile("" ::: "memory")

#define TESTS           1024

static volatile int gate;
static volatile uint64_t thread_tsc;

/* Bind the current thread to the specified CPU. */
static void
bind_cpu(int cpu)
{
        cpuset_t set;

        CPU_ZERO(&set);
        CPU_SET(cpu, &set);
        if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(set),
            &set) < 0)
                err(1, "cpuset_setaffinity(%d)", cpu);
}

static void *
thread_main(void *arg)
{
        int cpu, i;

        cpu = (intptr_t)arg;
        bind_cpu(cpu);
        for (i = 0; i < TESTS; i++) {
                gate = 1;
                while (gate == 1)
                        cpu_spinwait();
                barrier();

                __asm __volatile("lfence");
                thread_tsc = rdtsc();

                barrier();
                gate = 3;
                while (gate == 3)
                        cpu_spinwait();
        }
        return (NULL);
}

int
main(int ac __unused, char **av __unused)
{
        cpuset_t all_cpus;
        int64_t **skew, *aveskew, *minskew, *maxskew;
        float *stddev;
        double sumsq;
        pthread_t child;
        uint64_t tsc;
        int *cpus;
        int error, i, j, ncpu;

        /*
         * Find all the CPUs this program is eligible to run on and use
         * this as our global set.  This means you can use cpuset to
         * restrict this program to only run on a subset of CPUs.
         */
        if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1,
            sizeof(all_cpus), &all_cpus) < 0)
                err(1, "cpuset_getaffinity");
        for (ncpu = 0, i = 0; i < CPU_SETSIZE; i++) {
                if (CPU_ISSET(i, &all_cpus))
                        ncpu++;
        }
        if (ncpu < 2)
                errx(1, "Only one available CPU");
        cpus = calloc(ncpu, sizeof(*cpus));
        skew = calloc(ncpu, sizeof(*skew));
        for (i = 0; i < ncpu; i++)
                skew[i] = calloc(TESTS, sizeof(*skew[i]));
        for (i = 0, j = 0; i < CPU_SETSIZE; i++)
                if (CPU_ISSET(i, &all_cpus)) {
                        assert(j < ncpu);
                        cpus[j] = i;
                        j++;
                }

        /*
         * We bind this thread to the first CPU and then bind all the
         * other threads to other CPUs in turn saving TESTS counts of
         * skew calculations.
         */
        bind_cpu(cpus[0]);
        for (i = 1; i < ncpu; i++) {
                error = pthread_create(&child, NULL, thread_main,
                    (void *)(intptr_t)cpus[i]);
                if (error)
                        errc(1, error, "pthread_create");

                for (j = 0; j < TESTS; j++) {
                        while (gate != 1)
                                cpu_spinwait();
                        gate = 2;
                        barrier();

                        tsc = rdtsc();

                        barrier();
                        while (gate != 3)
                                cpu_spinwait();
                        gate = 4;

                        skew[i][j] = thread_tsc - tsc;
                }

                error = pthread_join(child, NULL);
                if (error)
                        errc(1, error, "pthread_join");
        }

        /*
         * Compute average skew for each CPU and output a summary of
         * the results.
         */
        aveskew = calloc(ncpu, sizeof(*aveskew));
        minskew = calloc(ncpu, sizeof(*minskew));
        maxskew = calloc(ncpu, sizeof(*maxskew));
        stddev = calloc(ncpu, sizeof(*stddev));
        stddev[0] = 0.0;
        for (i = 1; i < ncpu; i++) {
                sumsq = 0;
                minskew[i] = maxskew[i] = skew[i][0];
                for (j = 0; j < TESTS; j++) {
                        aveskew[i] += skew[i][j];
                        if (skew[i][j] < minskew[i])
                                minskew[i] = skew[i][j];
                        if (skew[i][j] > maxskew[i])
                                maxskew[i] = skew[i][j];
                        sumsq += (skew[i][j] * skew[i][j]);
                }
                aveskew[i] /= TESTS;
                sumsq /= TESTS;
                sumsq -= aveskew[i] * aveskew[i];
                stddev[i] = sqrt(sumsq);
        }

        printf("CPU | TSC skew (min/avg/max/stddev)\n");
        printf("----+------------------------------\n");
        for (i = 0; i < ncpu; i++)
                printf("%3d | %5jd %5jd %5jd   %6.3f\n", cpus[i],
                    (intmax_t)minskew[i], (intmax_t)aveskew[i],
                    (intmax_t)maxskew[i], stddev[i]);
        return (0);
}