#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
#include <sys/clock.h>
#include <sys/conf.h>
#include <sys/fcntl.h>
#include <sys/limits.h>
#include <sys/mman.h>
#include <sys/proc.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/vdso.h>
#include <vm/vm.h>
#include <vm/pmap.h>
#include <machine/atomic.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/pvclock.h>
static volatile uint64_t pvclock_last_systime;
static uint64_t pvclock_getsystime(struct pvclock *pvc);
static void pvclock_read_time_info(
struct pvclock_vcpu_time_info *ti, uint64_t *ns, uint8_t *flags);
static void pvclock_read_wall_clock(struct pvclock_wall_clock *wc,
struct timespec *ts);
static u_int pvclock_tc_get_timecount(struct timecounter *tc);
static uint32_t pvclock_tc_vdso_timehands(
struct vdso_timehands *vdso_th, struct timecounter *tc);
#ifdef COMPAT_FREEBSD32
static uint32_t pvclock_tc_vdso_timehands32(
struct vdso_timehands32 *vdso_th, struct timecounter *tc);
#endif
static d_open_t pvclock_cdev_open;
static d_mmap_t pvclock_cdev_mmap;
static struct cdevsw pvclock_cdev_cdevsw = {
.d_version = D_VERSION,
.d_name = PVCLOCK_CDEVNAME,
.d_open = pvclock_cdev_open,
.d_mmap = pvclock_cdev_mmap,
};
void
pvclock_resume(void)
{
atomic_store_rel_64(&pvclock_last_systime, 0);
}
uint64_t
pvclock_tsc_freq(struct pvclock_vcpu_time_info *ti)
{
uint64_t freq;
freq = (1000000000ULL << 32) / ti->tsc_to_system_mul;
if (ti->tsc_shift < 0)
freq <<= -ti->tsc_shift;
else
freq >>= ti->tsc_shift;
return (freq);
}
static void
pvclock_read_time_info(struct pvclock_vcpu_time_info *ti,
uint64_t *ns, uint8_t *flags)
{
uint64_t delta;
uint32_t version;
do {
version = atomic_load_acq_32(&ti->version);
delta = rdtsc_ordered() - ti->tsc_timestamp;
*ns = ti->system_time + pvclock_scale_delta(delta,
ti->tsc_to_system_mul, ti->tsc_shift);
*flags = ti->flags;
atomic_thread_fence_acq();
} while ((ti->version & 1) != 0 || ti->version != version);
}
static void
pvclock_read_wall_clock(struct pvclock_wall_clock *wc, struct timespec *ts)
{
uint32_t version;
do {
version = atomic_load_acq_32(&wc->version);
ts->tv_sec = wc->sec;
ts->tv_nsec = wc->nsec;
atomic_thread_fence_acq();
} while ((wc->version & 1) != 0 || wc->version != version);
}
static uint64_t
pvclock_getsystime(struct pvclock *pvc)
{
uint64_t now, last, ret;
uint8_t flags;
critical_enter();
pvclock_read_time_info(&pvc->timeinfos[curcpu], &now, &flags);
ret = now;
if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
last = atomic_load_acq_64(&pvclock_last_systime);
do {
if (last > now) {
ret = last;
break;
}
} while (!atomic_fcmpset_rel_64(&pvclock_last_systime, &last,
now));
}
critical_exit();
return (ret);
}
uint64_t
pvclock_get_timecount(struct pvclock_vcpu_time_info *ti)
{
uint64_t now, last, ret;
uint8_t flags;
pvclock_read_time_info(ti, &now, &flags);
ret = now;
if ((flags & PVCLOCK_FLAG_TSC_STABLE) == 0) {
last = atomic_load_acq_64(&pvclock_last_systime);
do {
if (last > now) {
ret = last;
break;
}
} while (!atomic_fcmpset_rel_64(&pvclock_last_systime, &last,
now));
}
return (ret);
}
void
pvclock_get_wallclock(struct pvclock_wall_clock *wc, struct timespec *ts)
{
pvclock_read_wall_clock(wc, ts);
}
static int
pvclock_cdev_open(struct cdev *dev, int oflags, int devtype, struct thread *td)
{
if (oflags & FWRITE)
return (EPERM);
return (0);
}
static int
pvclock_cdev_mmap(struct cdev *dev, vm_ooffset_t offset, vm_paddr_t *paddr,
int nprot, vm_memattr_t *memattr)
{
if (offset >= mp_ncpus * sizeof(struct pvclock_vcpu_time_info))
return (EINVAL);
if (PROT_EXTRACT(nprot) != PROT_READ)
return (EACCES);
*paddr = vtophys((uintptr_t)dev->si_drv1 + offset);
*memattr = VM_MEMATTR_DEFAULT;
return (0);
}
static u_int
pvclock_tc_get_timecount(struct timecounter *tc)
{
struct pvclock *pvc = tc->tc_priv;
return (pvclock_getsystime(pvc) & UINT_MAX);
}
static uint32_t
pvclock_tc_vdso_timehands(struct vdso_timehands *vdso_th,
struct timecounter *tc)
{
struct pvclock *pvc = tc->tc_priv;
if (pvc->cdev == NULL)
return (0);
vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
vdso_th->th_x86_shift = 0;
vdso_th->th_x86_hpet_idx = 0;
vdso_th->th_x86_pvc_last_systime =
atomic_load_acq_64(&pvclock_last_systime);
vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
return ((amd_feature & AMDID_RDTSCP) != 0 ||
((vdso_th->th_x86_pvc_stable_mask & PVCLOCK_FLAG_TSC_STABLE) != 0 &&
pvc->vdso_enable_without_rdtscp));
}
#ifdef COMPAT_FREEBSD32
static uint32_t
pvclock_tc_vdso_timehands32(struct vdso_timehands32 *vdso_th,
struct timecounter *tc)
{
struct pvclock *pvc = tc->tc_priv;
if (pvc->cdev == NULL)
return (0);
vdso_th->th_algo = VDSO_TH_ALGO_X86_PVCLK;
vdso_th->th_x86_shift = 0;
vdso_th->th_x86_hpet_idx = 0;
*(uint64_t *)&vdso_th->th_x86_pvc_last_systime[0] =
atomic_load_acq_64(&pvclock_last_systime);
vdso_th->th_x86_pvc_stable_mask = !pvc->vdso_force_unstable &&
pvc->stable_flag_supported ? PVCLOCK_FLAG_TSC_STABLE : 0;
bzero(vdso_th->th_res, sizeof(vdso_th->th_res));
return ((amd_feature & AMDID_RDTSCP) != 0 ||
((vdso_th->th_x86_pvc_stable_mask & PVCLOCK_FLAG_TSC_STABLE) != 0 &&
pvc->vdso_enable_without_rdtscp));
}
#endif
void
pvclock_gettime(struct pvclock *pvc, struct timespec *ts)
{
struct timespec system_ts;
uint64_t system_ns;
pvclock_read_wall_clock(pvc->get_wallclock(pvc->get_wallclock_arg), ts);
system_ns = pvclock_getsystime(pvc);
system_ts.tv_sec = system_ns / 1000000000ULL;
system_ts.tv_nsec = system_ns % 1000000000ULL;
timespecadd(ts, &system_ts, ts);
}
void
pvclock_init(struct pvclock *pvc, device_t dev, const char *tc_name,
int tc_quality, u_int tc_flags)
{
struct make_dev_args mda;
int err;
KASSERT(((uintptr_t)pvc->timeinfos & PAGE_MASK) == 0,
("Specified time info page(s) address is not page-aligned."));
pvc->vdso_force_unstable = false;
SYSCTL_ADD_BOOL(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
"vdso_force_unstable", CTLFLAG_RW, &pvc->vdso_force_unstable, 0,
"Forcibly deassert stable flag in vDSO codepath");
pvc->vdso_enable_without_rdtscp = false;
SYSCTL_ADD_BOOL(device_get_sysctl_ctx(dev),
SYSCTL_CHILDREN(device_get_sysctl_tree(dev)), OID_AUTO,
"vdso_enable_without_rdtscp", CTLFLAG_RWTUN,
&pvc->vdso_enable_without_rdtscp, 0,
"Allow the use of a vDSO when rdtscp is not available");
pvc->tc.tc_get_timecount = pvclock_tc_get_timecount;
pvc->tc.tc_poll_pps = NULL;
pvc->tc.tc_counter_mask = ~0U;
pvc->tc.tc_frequency = 1000000000ULL;
pvc->tc.tc_name = tc_name;
pvc->tc.tc_quality = tc_quality;
pvc->tc.tc_flags = tc_flags;
pvc->tc.tc_priv = pvc;
pvc->tc.tc_fill_vdso_timehands = pvclock_tc_vdso_timehands;
#ifdef COMPAT_FREEBSD32
pvc->tc.tc_fill_vdso_timehands32 = pvclock_tc_vdso_timehands32;
#endif
make_dev_args_init(&mda);
mda.mda_devsw = &pvclock_cdev_cdevsw;
mda.mda_uid = UID_ROOT;
mda.mda_gid = GID_WHEEL;
mda.mda_mode = 0444;
mda.mda_si_drv1 = pvc->timeinfos;
err = make_dev_s(&mda, &pvc->cdev, PVCLOCK_CDEVNAME);
if (err != 0) {
device_printf(dev, "Could not create /dev/%s, error %d. Fast "
"time of day will be unavailable for this timecounter.\n",
PVCLOCK_CDEVNAME, err);
KASSERT(pvc->cdev == NULL,
("Failed make_dev_s() unexpectedly inited cdev."));
}
tc_init(&pvc->tc);
clock_register(dev, 1);
}
int
pvclock_destroy(struct pvclock *pvc)
{
return (EBUSY);
}