root/tools/perf/util/ordered-events.c
// SPDX-License-Identifier: GPL-2.0
#include <errno.h>
#include <inttypes.h>
#include <linux/list.h>
#include <linux/compiler.h>
#include <linux/string.h>
#include "ordered-events.h"
#include "session.h"
#include "asm/bug.h"
#include "debug.h"
#include "ui/progress.h"

#define pr_N(n, fmt, ...) \
        eprintf(n, debug_ordered_events, fmt, ##__VA_ARGS__)

#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__)

static void queue_event(struct ordered_events *oe, struct ordered_event *new)
{
        struct ordered_event *last = oe->last;
        u64 timestamp = new->timestamp;
        struct list_head *p;

        ++oe->nr_events;
        oe->last = new;

        pr_oe_time2(timestamp, "queue_event nr_events %u\n", oe->nr_events);

        if (!last) {
                list_add(&new->list, &oe->events);
                oe->max_timestamp = timestamp;
                return;
        }

        /*
         * last event might point to some random place in the list as it's
         * the last queued event. We expect that the new event is close to
         * this.
         */
        if (last->timestamp <= timestamp) {
                while (last->timestamp <= timestamp) {
                        p = last->list.next;
                        if (p == &oe->events) {
                                list_add_tail(&new->list, &oe->events);
                                oe->max_timestamp = timestamp;
                                return;
                        }
                        last = list_entry(p, struct ordered_event, list);
                }
                list_add_tail(&new->list, &last->list);
        } else {
                while (last->timestamp > timestamp) {
                        p = last->list.prev;
                        if (p == &oe->events) {
                                list_add(&new->list, &oe->events);
                                return;
                        }
                        last = list_entry(p, struct ordered_event, list);
                }
                list_add(&new->list, &last->list);
        }
}

static union perf_event *__dup_event(struct ordered_events *oe,
                                     union perf_event *event)
{
        union perf_event *new_event = NULL;

        if (oe->cur_alloc_size < oe->max_alloc_size) {
                new_event = memdup(event, event->header.size);
                if (new_event)
                        oe->cur_alloc_size += event->header.size;
        }

        return new_event;
}

static union perf_event *dup_event(struct ordered_events *oe,
                                   union perf_event *event)
{
        return oe->copy_on_queue ? __dup_event(oe, event) : event;
}

static void __free_dup_event(struct ordered_events *oe, union perf_event *event)
{
        if (event) {
                oe->cur_alloc_size -= event->header.size;
                free(event);
        }
}

static void free_dup_event(struct ordered_events *oe, union perf_event *event)
{
        if (oe->copy_on_queue)
                __free_dup_event(oe, event);
}

#define MAX_SAMPLE_BUFFER       (64 * 1024 / sizeof(struct ordered_event))
static struct ordered_event *alloc_event(struct ordered_events *oe,
                                         union perf_event *event)
{
        struct list_head *cache = &oe->cache;
        struct ordered_event *new = NULL;
        union perf_event *new_event;
        size_t size;

        new_event = dup_event(oe, event);
        if (!new_event)
                return NULL;

        /*
         * We maintain the following scheme of buffers for ordered
         * event allocation:
         *
         *   to_free list -> buffer1 (64K)
         *                   buffer2 (64K)
         *                   ...
         *
         * Each buffer keeps an array of ordered events objects:
         *    buffer -> event[0]
         *              event[1]
         *              ...
         *
         * Each allocated ordered event is linked to one of
         * following lists:
         *   - time ordered list 'events'
         *   - list of currently removed events 'cache'
         *
         * Allocation of the ordered event uses the following order
         * to get the memory:
         *   - use recently removed object from 'cache' list
         *   - use available object in current allocation buffer
         *   - allocate new buffer if the current buffer is full
         *
         * Removal of ordered event object moves it from events to
         * the cache list.
         */
        size = sizeof(*oe->buffer) + MAX_SAMPLE_BUFFER * sizeof(*new);

        if (!list_empty(cache)) {
                new = list_entry(cache->next, struct ordered_event, list);
                list_del_init(&new->list);
        } else if (oe->buffer) {
                new = &oe->buffer->event[oe->buffer_idx];
                if (++oe->buffer_idx == MAX_SAMPLE_BUFFER)
                        oe->buffer = NULL;
        } else if ((oe->cur_alloc_size + size) < oe->max_alloc_size) {
                oe->buffer = malloc(size);
                if (!oe->buffer) {
                        free_dup_event(oe, new_event);
                        return NULL;
                }

                pr("alloc size %" PRIu64 "B (+%zu), max %" PRIu64 "B\n",
                   oe->cur_alloc_size, size, oe->max_alloc_size);

                oe->cur_alloc_size += size;
                list_add(&oe->buffer->list, &oe->to_free);

                oe->buffer_idx = 1;
                new = &oe->buffer->event[0];
        } else {
                pr("allocation limit reached %" PRIu64 "B\n", oe->max_alloc_size);
                return NULL;
        }

        new->event = new_event;
        return new;
}

static struct ordered_event *
ordered_events__new_event(struct ordered_events *oe, u64 timestamp,
                    union perf_event *event)
{
        struct ordered_event *new;

        new = alloc_event(oe, event);
        if (new) {
                new->timestamp = timestamp;
                queue_event(oe, new);
        }

        return new;
}

void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event)
{
        list_move(&event->list, &oe->cache);
        oe->nr_events--;
        free_dup_event(oe, event->event);
        event->event = NULL;
}

int ordered_events__queue(struct ordered_events *oe, union perf_event *event,
                          u64 timestamp, u64 file_offset, const char *file_path)
{
        struct ordered_event *oevent;

        if (!timestamp || timestamp == ~0ULL)
                return -ETIME;

        if (timestamp < oe->last_flush) {
                pr_oe_time(timestamp,      "out of order event\n");
                pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n",
                           oe->last_flush_type);

                oe->nr_unordered_events++;
        }

        oevent = ordered_events__new_event(oe, timestamp, event);
        if (!oevent) {
                ordered_events__flush(oe, OE_FLUSH__HALF);
                oevent = ordered_events__new_event(oe, timestamp, event);
        }

        if (!oevent)
                return -ENOMEM;

        oevent->file_offset = file_offset;
        oevent->file_path = file_path;
        return 0;
}

static int do_flush(struct ordered_events *oe, bool show_progress)
{
        struct list_head *head = &oe->events;
        struct ordered_event *tmp, *iter;
        u64 limit = oe->next_flush;
        u64 last_ts = oe->last ? oe->last->timestamp : 0ULL;
        struct ui_progress prog;
        int ret;

        if (!limit)
                return 0;

        if (show_progress)
                ui_progress__init(&prog, oe->nr_events, "Processing time ordered events...");

        list_for_each_entry_safe(iter, tmp, head, list) {
                if (session_done())
                        return 0;

                if (iter->timestamp > limit)
                        break;
                ret = oe->deliver(oe, iter);
                if (ret)
                        return ret;

                ordered_events__delete(oe, iter);
                oe->last_flush = iter->timestamp;

                if (show_progress)
                        ui_progress__update(&prog, 1);
        }

        if (list_empty(head))
                oe->last = NULL;
        else if (last_ts <= limit)
                oe->last = list_entry(head->prev, struct ordered_event, list);

        if (show_progress)
                ui_progress__finish();

        return 0;
}

static int __ordered_events__flush(struct ordered_events *oe, enum oe_flush how,
                                   u64 timestamp)
{
        static const char * const str[] = {
                "NONE",
                "FINAL",
                "ROUND",
                "HALF ",
                "TOP  ",
                "TIME ",
        };
        int err;
        bool show_progress = false;

        if (oe->nr_events == 0)
                return 0;

        switch (how) {
        case OE_FLUSH__FINAL:
                show_progress = true;
                fallthrough;
        case OE_FLUSH__TOP:
                oe->next_flush = ULLONG_MAX;
                break;

        case OE_FLUSH__HALF:
        {
                struct ordered_event *first, *last;
                struct list_head *head = &oe->events;

                first = list_entry(head->next, struct ordered_event, list);
                last = oe->last;

                /* Warn if we are called before any event got allocated. */
                if (WARN_ONCE(!last || list_empty(head), "empty queue"))
                        return 0;

                oe->next_flush  = first->timestamp;
                oe->next_flush += (last->timestamp - first->timestamp) / 2;
                break;
        }

        case OE_FLUSH__TIME:
                oe->next_flush = timestamp;
                show_progress = false;
                break;

        case OE_FLUSH__ROUND:
        case OE_FLUSH__NONE:
        default:
                break;
        }

        pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush PRE  %s, nr_events %u\n",
                   str[how], oe->nr_events);
        pr_oe_time(oe->max_timestamp, "max_timestamp\n");

        err = do_flush(oe, show_progress);

        if (!err) {
                if (how == OE_FLUSH__ROUND)
                        oe->next_flush = oe->max_timestamp;

                oe->last_flush_type = how;
        }

        pr_oe_time(oe->next_flush, "next_flush - ordered_events__flush POST %s, nr_events %u\n",
                   str[how], oe->nr_events);
        pr_oe_time(oe->last_flush, "last_flush\n");

        return err;
}

int ordered_events__flush(struct ordered_events *oe, enum oe_flush how)
{
        return __ordered_events__flush(oe, how, 0);
}

int ordered_events__flush_time(struct ordered_events *oe, u64 timestamp)
{
        return __ordered_events__flush(oe, OE_FLUSH__TIME, timestamp);
}

u64 ordered_events__first_time(struct ordered_events *oe)
{
        struct ordered_event *event;

        if (list_empty(&oe->events))
                return 0;

        event = list_first_entry(&oe->events, struct ordered_event, list);
        return event->timestamp;
}

void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver,
                          void *data)
{
        INIT_LIST_HEAD(&oe->events);
        INIT_LIST_HEAD(&oe->cache);
        INIT_LIST_HEAD(&oe->to_free);
        oe->max_alloc_size = (u64) -1;
        oe->cur_alloc_size = 0;
        oe->deliver        = deliver;
        oe->data           = data;
}

static void
ordered_events_buffer__free(struct ordered_events_buffer *buffer,
                            unsigned int max, struct ordered_events *oe)
{
        if (oe->copy_on_queue) {
                unsigned int i;

                for (i = 0; i < max; i++)
                        __free_dup_event(oe, buffer->event[i].event);
        }

        free(buffer);
}

void ordered_events__free(struct ordered_events *oe)
{
        struct ordered_events_buffer *buffer, *tmp;

        if (list_empty(&oe->to_free))
                return;

        /*
         * Current buffer might not have all the events allocated
         * yet, we need to free only allocated ones ...
         */
        if (oe->buffer) {
                list_del_init(&oe->buffer->list);
                ordered_events_buffer__free(oe->buffer, oe->buffer_idx, oe);
        }

        /* ... and continue with the rest */
        list_for_each_entry_safe(buffer, tmp, &oe->to_free, list) {
                list_del_init(&buffer->list);
                ordered_events_buffer__free(buffer, MAX_SAMPLE_BUFFER, oe);
        }
}

void ordered_events__reinit(struct ordered_events *oe)
{
        ordered_events__deliver_t old_deliver = oe->deliver;

        ordered_events__free(oe);
        memset(oe, '\0', sizeof(*oe));
        ordered_events__init(oe, old_deliver, oe->data);
}