#include <sys/param.h>
#include <sys/cpuset.h>
#include <machine/atomic.h>
#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <assert.h>
#include <err.h>
#include <errno.h>
#include <math.h>
#include <pthread.h>
#include <stdint.h>
#include <stdlib.h>
#include <stdio.h>
#define barrier() __asm __volatile("" ::: "memory")
#define TESTS 1024
static volatile int gate;
static volatile uint64_t thread_tsc;
static void
bind_cpu(int cpu)
{
cpuset_t set;
CPU_ZERO(&set);
CPU_SET(cpu, &set);
if (cpuset_setaffinity(CPU_LEVEL_WHICH, CPU_WHICH_TID, -1, sizeof(set),
&set) < 0)
err(1, "cpuset_setaffinity(%d)", cpu);
}
static void *
thread_main(void *arg)
{
int cpu, i;
cpu = (intptr_t)arg;
bind_cpu(cpu);
for (i = 0; i < TESTS; i++) {
gate = 1;
while (gate == 1)
cpu_spinwait();
barrier();
__asm __volatile("lfence");
thread_tsc = rdtsc();
barrier();
gate = 3;
while (gate == 3)
cpu_spinwait();
}
return (NULL);
}
int
main(int ac __unused, char **av __unused)
{
cpuset_t all_cpus;
int64_t **skew, *aveskew, *minskew, *maxskew;
float *stddev;
double sumsq;
pthread_t child;
uint64_t tsc;
int *cpus;
int error, i, j, ncpu;
if (cpuset_getaffinity(CPU_LEVEL_WHICH, CPU_WHICH_PID, -1,
sizeof(all_cpus), &all_cpus) < 0)
err(1, "cpuset_getaffinity");
for (ncpu = 0, i = 0; i < CPU_SETSIZE; i++) {
if (CPU_ISSET(i, &all_cpus))
ncpu++;
}
if (ncpu < 2)
errx(1, "Only one available CPU");
cpus = calloc(ncpu, sizeof(*cpus));
skew = calloc(ncpu, sizeof(*skew));
for (i = 0; i < ncpu; i++)
skew[i] = calloc(TESTS, sizeof(*skew[i]));
for (i = 0, j = 0; i < CPU_SETSIZE; i++)
if (CPU_ISSET(i, &all_cpus)) {
assert(j < ncpu);
cpus[j] = i;
j++;
}
bind_cpu(cpus[0]);
for (i = 1; i < ncpu; i++) {
error = pthread_create(&child, NULL, thread_main,
(void *)(intptr_t)cpus[i]);
if (error)
errc(1, error, "pthread_create");
for (j = 0; j < TESTS; j++) {
while (gate != 1)
cpu_spinwait();
gate = 2;
barrier();
tsc = rdtsc();
barrier();
while (gate != 3)
cpu_spinwait();
gate = 4;
skew[i][j] = thread_tsc - tsc;
}
error = pthread_join(child, NULL);
if (error)
errc(1, error, "pthread_join");
}
aveskew = calloc(ncpu, sizeof(*aveskew));
minskew = calloc(ncpu, sizeof(*minskew));
maxskew = calloc(ncpu, sizeof(*maxskew));
stddev = calloc(ncpu, sizeof(*stddev));
stddev[0] = 0.0;
for (i = 1; i < ncpu; i++) {
sumsq = 0;
minskew[i] = maxskew[i] = skew[i][0];
for (j = 0; j < TESTS; j++) {
aveskew[i] += skew[i][j];
if (skew[i][j] < minskew[i])
minskew[i] = skew[i][j];
if (skew[i][j] > maxskew[i])
maxskew[i] = skew[i][j];
sumsq += (skew[i][j] * skew[i][j]);
}
aveskew[i] /= TESTS;
sumsq /= TESTS;
sumsq -= aveskew[i] * aveskew[i];
stddev[i] = sqrt(sumsq);
}
printf("CPU | TSC skew (min/avg/max/stddev)\n");
printf("----+------------------------------\n");
for (i = 0; i < ncpu; i++)
printf("%3d | %5jd %5jd %5jd %6.3f\n", cpus[i],
(intmax_t)minskew[i], (intmax_t)aveskew[i],
(intmax_t)maxskew[i], stddev[i]);
return (0);
}