root/tools/virtio/ringtest/ring.c
// SPDX-License-Identifier: GPL-2.0-only
/*
 * Copyright (C) 2016 Red Hat, Inc.
 * Author: Michael S. Tsirkin <mst@redhat.com>
 *
 * Simple descriptor-based ring. virtio 0.9 compatible event index is used for
 * signalling, unconditionally.
 */
#define _GNU_SOURCE
#include "main.h"
#include <stdlib.h>
#include <stdio.h>
#include <string.h>

/* Next - Where next entry will be written.
 * Prev - "Next" value when event triggered previously.
 * Event - Peer requested event after writing this entry.
 */
static inline bool need_event(unsigned short event,
                              unsigned short next,
                              unsigned short prev)
{
        return (unsigned short)(next - event - 1) < (unsigned short)(next - prev);
}

/* Design:
 * Guest adds descriptors with unique index values and DESC_HW in flags.
 * Host overwrites used descriptors with correct len, index, and DESC_HW clear.
 * Flags are always set last.
 */
#define DESC_HW 0x1

struct desc {
        unsigned short flags;
        unsigned short index;
        unsigned len;
        unsigned long long addr;
};

/* how much padding is needed to avoid false cache sharing */
#define HOST_GUEST_PADDING 0x80

/* Mostly read */
struct event {
        unsigned short kick_index;
        unsigned char reserved0[HOST_GUEST_PADDING - 2];
        unsigned short call_index;
        unsigned char reserved1[HOST_GUEST_PADDING - 2];
};

struct data {
        void *buf; /* descriptor is writeable, we can't get buf from there */
        void *data;
} *data;

struct desc *ring;
struct event *event;

struct guest {
        unsigned avail_idx;
        unsigned last_used_idx;
        unsigned num_free;
        unsigned kicked_avail_idx;
        unsigned char reserved[HOST_GUEST_PADDING - 12];
} guest;

struct host {
        /* we do not need to track last avail index
         * unless we have more than one in flight.
         */
        unsigned used_idx;
        unsigned called_used_idx;
        unsigned char reserved[HOST_GUEST_PADDING - 4];
} host;

/* implemented by ring */
void alloc_ring(void)
{
        int ret;
        int i;

        ret = posix_memalign((void **)&ring, 0x1000, ring_size * sizeof *ring);
        if (ret) {
                perror("Unable to allocate ring buffer.\n");
                exit(3);
        }
        event = calloc(1, sizeof(*event));
        if (!event) {
                perror("Unable to allocate event buffer.\n");
                exit(3);
        }
        guest.avail_idx = 0;
        guest.kicked_avail_idx = -1;
        guest.last_used_idx = 0;
        host.used_idx = 0;
        host.called_used_idx = -1;
        for (i = 0; i < ring_size; ++i) {
                struct desc desc = {
                        .index = i,
                };
                ring[i] = desc;
        }
        guest.num_free = ring_size;
        data = calloc(ring_size, sizeof(*data));
        if (!data) {
                perror("Unable to allocate data buffer.\n");
                exit(3);
        }
}

/* guest side */
int add_inbuf(unsigned len, void *buf, void *datap)
{
        unsigned head, index;

        if (!guest.num_free)
                return -1;

        guest.num_free--;
        head = (ring_size - 1) & (guest.avail_idx++);

        /* Start with a write. On MESI architectures this helps
         * avoid a shared state with consumer that is polling this descriptor.
         */
        ring[head].addr = (unsigned long)(void*)buf;
        ring[head].len = len;
        /* read below might bypass write above. That is OK because it's just an
         * optimization. If this happens, we will get the cache line in a
         * shared state which is unfortunate, but probably not worth it to
         * add an explicit full barrier to avoid this.
         */
        barrier();
        index = ring[head].index;
        data[index].buf = buf;
        data[index].data = datap;
        /* Barrier A (for pairing) */
        smp_release();
        ring[head].flags = DESC_HW;

        return 0;
}

void *get_buf(unsigned *lenp, void **bufp)
{
        unsigned head = (ring_size - 1) & guest.last_used_idx;
        unsigned index;
        void *datap;

        if (ring[head].flags & DESC_HW)
                return NULL;
        /* Barrier B (for pairing) */
        smp_acquire();
        *lenp = ring[head].len;
        index = ring[head].index & (ring_size - 1);
        datap = data[index].data;
        *bufp = data[index].buf;
        data[index].buf = NULL;
        data[index].data = NULL;
        guest.num_free++;
        guest.last_used_idx++;
        return datap;
}

bool used_empty()
{
        unsigned head = (ring_size - 1) & guest.last_used_idx;

        return (ring[head].flags & DESC_HW);
}

void disable_call()
{
        /* Doing nothing to disable calls might cause
         * extra interrupts, but reduces the number of cache misses.
         */
}

bool enable_call()
{
        event->call_index = guest.last_used_idx;
        /* Flush call index write */
        /* Barrier D (for pairing) */
        smp_mb();
        return used_empty();
}

void kick_available(void)
{
        bool need;

        /* Flush in previous flags write */
        /* Barrier C (for pairing) */
        smp_mb();
        need = need_event(event->kick_index,
                           guest.avail_idx,
                           guest.kicked_avail_idx);

        guest.kicked_avail_idx = guest.avail_idx;
        if (need)
                kick();
}

/* host side */
void disable_kick()
{
        /* Doing nothing to disable kicks might cause
         * extra interrupts, but reduces the number of cache misses.
         */
}

bool enable_kick()
{
        event->kick_index = host.used_idx;
        /* Barrier C (for pairing) */
        smp_mb();
        return avail_empty();
}

bool avail_empty()
{
        unsigned head = (ring_size - 1) & host.used_idx;

        return !(ring[head].flags & DESC_HW);
}

bool use_buf(unsigned *lenp, void **bufp)
{
        unsigned head = (ring_size - 1) & host.used_idx;

        if (!(ring[head].flags & DESC_HW))
                return false;

        /* make sure length read below is not speculated */
        /* Barrier A (for pairing) */
        smp_acquire();

        /* simple in-order completion: we don't need
         * to touch index at all. This also means we
         * can just modify the descriptor in-place.
         */
        ring[head].len--;
        /* Make sure len is valid before flags.
         * Note: alternative is to write len and flags in one access -
         * possible on 64 bit architectures but wmb is free on Intel anyway
         * so I have no way to test whether it's a gain.
         */
        /* Barrier B (for pairing) */
        smp_release();
        ring[head].flags = 0;
        host.used_idx++;
        return true;
}

void call_used(void)
{
        bool need;

        /* Flush in previous flags write */
        /* Barrier D (for pairing) */
        smp_mb();

        need = need_event(event->call_index,
                        host.used_idx,
                        host.called_used_idx);

        host.called_used_idx = host.used_idx;

        if (need)
                call();
}