root/src/system/kernel/events/event_queue.cpp
/*
 * Copyright 2015, Hamish Morrison, hamishm53@gmail.com.
 * Copyright 2023, Haiku, Inc. All rights reserved.
 * Distributed under the terms of the MIT License.
 */

#include <event_queue.h>

#include <OS.h>

#include <AutoDeleter.h>

#include <fs/fd.h>
#include <port.h>
#include <sem.h>
#include <syscalls.h>
#include <syscall_restart.h>
#include <thread.h>
#include <util/AutoLock.h>
#include <util/AVLTree.h>
#include <util/DoublyLinkedList.h>
#include <AutoDeleterDrivers.h>
#include <StackOrHeapArray.h>
#include <wait_for_objects.h>

#include "select_ops.h"
#include "select_sync.h"


enum {
        B_EVENT_QUEUED                  = (1 << 28),
        B_EVENT_SELECTING               = (1 << 29),
        B_EVENT_DELETING                = (1 << 30),
        /* (signed) */
        B_EVENT_PRIVATE_MASK    = (0xf0000000)
};


#define EVENT_BEHAVIOR(events) ((events) & (B_EVENT_LEVEL_TRIGGERED | B_EVENT_ONE_SHOT))
#define USER_EVENTS(events) ((events) & ~B_EVENT_PRIVATE_MASK)

#define B_EVENT_NON_MASKABLE (B_EVENT_INVALID | B_EVENT_ERROR | B_EVENT_DISCONNECTED)



struct select_event : select_info, AVLTreeNode,
                DoublyLinkedListLinkImpl<select_event> {
        int32                           object;
        uint16                          type;
        uint32                          behavior;
        void*                           user_data;
};


struct EventQueueTreeDefinition {
        typedef struct {
                int32 object;
                uint16 type;
        }                                               Key;
        typedef select_event    Value;

        AVLTreeNode* GetAVLTreeNode(Value* value) const
        {
                return value;
        }

        Value* GetValue(AVLTreeNode* node) const
        {
                return static_cast<Value*>(node);
        }

        int Compare(Key a, const Value* b) const
        {
                if (a.object != b->object)
                        return a.object - b->object;
                else
                        return a.type - b->type;
        }

        int Compare(const Value* a, const Value* b) const
        {
                if (a->object != b->object)
                        return a->object - b->object;
                else
                        return a->type - b->type;
        }
};


//      #pragma mark -- EventQueue implementation


class EventQueue : public select_sync {
public:
                                                EventQueue(bool kernel);
                                                ~EventQueue();

        void                            Closed();

        status_t                        Select(int32 object, uint16 type, uint32 events, void* userData);
        status_t                        Query(int32 object, uint16 type, uint32* selectedEvents, void** userData);
        status_t                        Deselect(int32 object, uint16 type);

        status_t                        Notify(select_info* info, uint16 events);

        ssize_t                         Wait(event_wait_info* infos, int numInfos,
                                                        int32 flags, bigtime_t timeout);

private:
        void                            _Notify(select_event* event, uint16 events);
        status_t                        _DeselectEvent(select_event* event);

        ssize_t                         _DequeueEvents(event_wait_info* infos, int numInfos);

        select_event*           _GetEvent(int32 object, uint16 type);

private:
        typedef AVLTree<EventQueueTreeDefinition> EventTree;
        typedef DoublyLinkedList<select_event> EventList;

        bool                            fKernel;
        bool                            fClosing;

        /*
         * This flag is set in _DequeueEvents when we have to drop the lock to
         * deselect an object to prevent another _DequeueEvents call concurrently
         * modifying the list.
         */
        bool                            fDequeueing;

        EventList                       fEventList;
        EventTree                       fEventTree;

        /*
         * Protects the queue. We cannot call select or deselect while holding
         * this, because it will invert the locking order with EventQueue::Notify.
         */
        mutex                           fQueueLock;

        /*
         * Notified when events are available on the queue.
         */
        ConditionVariable       fQueueCondition;

        /*
         * Used to wait on a changing select_event while the queue lock is dropped
         * during a call to select/deselect.
         */
        ConditionVariable       fEventCondition;
};


EventQueue::EventQueue(bool kernel)
        :
        fKernel(kernel),
        fClosing(false),
        fDequeueing(false)
{
        mutex_init(&fQueueLock, "event_queue lock");
        fQueueCondition.Init(this, "evtq wait");
        fEventCondition.Init(this, "event_queue event change wait");
}


EventQueue::~EventQueue()
{
        mutex_lock(&fQueueLock);
        ASSERT(fClosing && !fDequeueing);

        EventTree::Iterator iter = fEventTree.GetIterator();
        while (iter.HasNext()) {
                select_event* event = iter.Next();
                event->events |= B_EVENT_DELETING;

                mutex_unlock(&fQueueLock);
                _DeselectEvent(event);
                mutex_lock(&fQueueLock);

                iter.Remove();
                if ((event->events & B_EVENT_QUEUED) != 0)
                        fEventList.Remove(event);
                delete event;
        }

        EventList::Iterator listIter = fEventList.GetIterator();
        while (listIter.HasNext()) {
                select_event* event = listIter.Next();

                // We already removed all events in the tree from this list.
                // The only remaining events will be INVALID ones already deselected.
                delete event;
        }

        mutex_destroy(&fQueueLock);
}


void
EventQueue::Closed()
{
        MutexLocker locker(&fQueueLock);

        fClosing = true;
        locker.Unlock();

        // Wake up all waiters
        fQueueCondition.NotifyAll(B_FILE_ERROR);
}


status_t
EventQueue::Select(int32 object, uint16 type, uint32 events, void* userData)
{
        MutexLocker locker(&fQueueLock);

        select_event* event = _GetEvent(object, type);
        if (event != NULL) {
                if ((event->selected_events | event->behavior)
                                == (USER_EVENTS(events) | B_EVENT_NON_MASKABLE))
                        return B_OK;

                // Rather than try to reuse the event object, which would be complicated
                // and error-prone, perform a full de-selection and then re-selection.
                locker.Unlock();
                status_t status = Deselect(object, type);
                if (status != B_OK)
                        return status;
                locker.Lock();

                // Make sure nothing else re-selected before we reacquired the lock.
                event = _GetEvent(object, type);
                if (event != NULL)
                        return EEXIST;
        }

        event = new(std::nothrow) select_event;
        if (event == NULL)
                return B_NO_MEMORY;
        ObjectDeleter<select_event> eventDeleter(event);

        event->sync = this;
        event->object = object;
        event->type = type;
        event->behavior = EVENT_BEHAVIOR(events);
        event->user_data = userData;
        event->events = 0;

        status_t result = fEventTree.Insert(event);
        if (result != B_OK)
                return result;

        // We drop the lock before calling select() to avoid inverting the
        // locking order with Notify(). Setting the B_EVENT_SELECTING flag prevents
        // the event from being used or even deleted before it is ready.
        event->events |= B_EVENT_SELECTING;
        event->selected_events = USER_EVENTS(events) | B_EVENT_NON_MASKABLE;

        locker.Unlock();

        status_t status = select_object(event->type, event->object, event, fKernel);
        if (status < 0) {
                locker.Lock();
                fEventTree.Remove(event);
                fEventCondition.NotifyAll();
                return status;
        }

        eventDeleter.Detach();

        atomic_and(&event->events, ~B_EVENT_SELECTING);
        fEventCondition.NotifyAll();

        return B_OK;
}


status_t
EventQueue::Query(int32 object, uint16 type, uint32* selectedEvents, void** userData)
{
        MutexLocker locker(&fQueueLock);

        select_event* event = _GetEvent(object, type);
        if (event == NULL)
                return B_ENTRY_NOT_FOUND;

        *selectedEvents = event->selected_events | event->behavior;
        *userData = event->user_data;

        return B_OK;
}


status_t
EventQueue::Deselect(int32 object, uint16 type)
{
        MutexLocker locker(&fQueueLock);

        select_event* event = _GetEvent(object, type);
        if (event == NULL)
                return B_ENTRY_NOT_FOUND;

        if ((atomic_or(&event->events, B_EVENT_DELETING) & B_EVENT_DELETING) != 0)
                return B_OK;

        locker.Unlock();
        _DeselectEvent(event);
        locker.Lock();

        if ((event->events & B_EVENT_INVALID) == 0)
                fEventTree.Remove(event);
        if ((event->events & B_EVENT_QUEUED) != 0)
                fEventList.Remove(event);

        delete event;

        locker.Unlock();
        fEventCondition.NotifyAll();
        return B_OK;
}


status_t
EventQueue::_DeselectEvent(select_event* event)
{
        return deselect_object(event->type, event->object, event, fKernel);
}


status_t
EventQueue::Notify(select_info* info, uint16 events)
{
        select_event* event = static_cast<select_event*>(info);
        _Notify(event, events);
        return B_OK;
}


void
EventQueue::_Notify(select_event* event, uint16 events)
{
        if ((events & event->selected_events) == 0)
                return;

        const int32 previousEvents = atomic_or(&event->events, (events & ~B_EVENT_INVALID));

        // If the event is already being deleted, we should ignore this notification.
        if ((previousEvents & B_EVENT_DELETING) != 0)
                return;

        // If the event is already queued, and it is not becoming invalid,
        // we don't need to do anything more.
        if ((previousEvents & B_EVENT_QUEUED) != 0 && (events & B_EVENT_INVALID) == 0)
                return;

        {
                MutexLocker _(&fQueueLock);

                // We need to recheck B_EVENT_DELETING now we have the lock.
                if ((event->events & B_EVENT_DELETING) != 0)
                        return;

                // If we get B_EVENT_INVALID it means the object we were monitoring was
                // deleted. The object's ID may now be reused, so we must remove it
                // from the event tree.
                if ((events & B_EVENT_INVALID) != 0) {
                        atomic_or(&event->events, B_EVENT_INVALID);
                        fEventTree.Remove(event);
                }

                // If it's not already queued, it's our responsibility to queue it.
                if ((atomic_or(&event->events, B_EVENT_QUEUED) & B_EVENT_QUEUED) == 0) {
                        fEventList.Add(event);
                        fQueueCondition.NotifyAll();
                }
        }
}


ssize_t
EventQueue::Wait(event_wait_info* infos, int numInfos,
        int32 flags, bigtime_t timeout)
{
        ASSERT((flags & B_ABSOLUTE_TIMEOUT) != 0
                || (timeout == B_INFINITE_TIMEOUT || timeout == 0));

        MutexLocker queueLocker(&fQueueLock);

        ssize_t count = 0;
        while (timeout == 0 || (system_time() < timeout)) {
                while ((fDequeueing || fEventList.IsEmpty()) && !fClosing) {
                        status_t status = fQueueCondition.Wait(queueLocker.Get(),
                                flags | B_CAN_INTERRUPT, timeout);
                        if (status != B_OK)
                                return status;
                }

                if (fClosing)
                        return B_FILE_ERROR;

                if (numInfos == 0)
                        return B_OK;

                fDequeueing = true;
                count = _DequeueEvents(infos, numInfos);
                fDequeueing = false;

                if (count != 0)
                        break;

                // Due to level-triggered events, it is possible for the event list to have
                // been not empty and _DequeueEvents() still returns nothing. Hence, we loop.
        }

        return count;
}


ssize_t
EventQueue::_DequeueEvents(event_wait_info* infos, int numInfos)
{
        ssize_t count = 0;

        const int32 kMaxToDeselect = 8;
        select_event* deselect[kMaxToDeselect];
        int32 deselectCount = 0;

        // Add a marker element, so we don't loop forever after unlocking the list.
        // (There is only one invocation of _DequeueEvents() at a time.)
        select_event marker = {};
        fEventList.Add(&marker);

        for (select_event* event = NULL; count < numInfos; ) {
                if (fEventList.Head() == NULL || fEventList.Head() == &marker)
                        break;

                event = fEventList.RemoveHead();
                int32 events = atomic_and(&event->events,
                        ~(event->selected_events | B_EVENT_QUEUED));

                if ((events & B_EVENT_DELETING) != 0)
                        continue;

                if ((events & B_EVENT_INVALID) == 0
                                && (event->behavior & B_EVENT_LEVEL_TRIGGERED) != 0) {
                        // This event is level-triggered. We need to deselect and reselect it,
                        // as its state may have changed since we were notified.
                        const select_event tmp = *event;

                        mutex_unlock(&fQueueLock);
                        status_t status = Deselect(tmp.object, tmp.type);
                        if (status == B_OK) {
                                event = NULL;
                                status = Select(tmp.object, tmp.type,
                                        tmp.selected_events | tmp.behavior, tmp.user_data);
                        }
                        mutex_lock(&fQueueLock);

                        if (status == B_OK) {
                                // Is the event still queued?
                                event = _GetEvent(tmp.object, tmp.type);
                                if (event == NULL)
                                        continue;
                                events = atomic_get(&event->events);
                                if ((events & B_EVENT_QUEUED) == 0)
                                        continue;
                        } else if (event == NULL) {
                                continue;
                        }
                }

                infos[count].object = event->object;
                infos[count].type = event->type;
                infos[count].user_data = event->user_data;
                infos[count].events = USER_EVENTS(events);
                count++;

                // All logic past this point has to do with deleting events.
                if ((events & B_EVENT_INVALID) == 0 && (event->behavior & B_EVENT_ONE_SHOT) == 0)
                        continue;

                // Check if the event was requeued.
                if ((atomic_and(&event->events, ~B_EVENT_QUEUED) & B_EVENT_QUEUED) != 0)
                        fEventList.Remove(event);

                if ((events & B_EVENT_INVALID) != 0) {
                        // The event will already have been removed from the tree.
                        delete event;
                } else if ((event->behavior & B_EVENT_ONE_SHOT) != 0) {
                        // We already checked B_EVENT_INVALID above, so we don't need to again.
                        fEventTree.Remove(event);
                        event->events = B_EVENT_DELETING;

                        deselect[deselectCount++] = event;
                        if (deselectCount == kMaxToDeselect)
                                break;
                }
        }

        fEventList.Remove(&marker);

        if (deselectCount != 0) {
                mutex_unlock(&fQueueLock);
                for (int32 i = 0; i < deselectCount; i++) {
                        select_event* event = deselect[i];

                        _DeselectEvent(event);
                        delete event;
                }
                mutex_lock(&fQueueLock);

                // We don't need to notify waiters, as we removed the events
                // from anywhere they could be found before dropping the lock.
        }

        return count;
}


/*
 * Get the select_event for the given object and type. Must be called with the
 * queue lock held. This method will sleep if the event is undergoing selection
 * or deletion.
 */
select_event*
EventQueue::_GetEvent(int32 object, uint16 type)
{
        EventQueueTreeDefinition::Key key = { object, type };

        while (true) {
                select_event* event = fEventTree.Find(key);
                if (event == NULL)
                        return NULL;

                if ((event->events & (B_EVENT_SELECTING | B_EVENT_DELETING)) == 0)
                        return event;

                fEventCondition.Wait(&fQueueLock);

                // At this point the select_event might have been deleted, so we
                // need to refetch it.
        }
}


//      #pragma mark -- File descriptor ops



static status_t
event_queue_close(file_descriptor* descriptor)
{
        EventQueue* queue = (EventQueue*)descriptor->cookie;
        queue->Closed();
        return B_OK;
}


static void
event_queue_free(file_descriptor* descriptor)
{
        EventQueue* queue = (EventQueue*)descriptor->cookie;
        put_select_sync(queue);
}


#define GET_QUEUE_FD_OR_RETURN(fd, kernel, descriptor)  \
        do {                                                                                            \
                status_t getError = get_queue_descriptor(fd, kernel, descriptor); \
                if (getError != B_OK)                                                   \
                        return getError;                                                        \
        } while (false)


static struct fd_ops sEventQueueFDOps = {
        &event_queue_close,
        &event_queue_free
};


static status_t
get_queue_descriptor(int fd, bool kernel, file_descriptor*& descriptor)
{
        if (fd < 0)
                return B_FILE_ERROR;

        descriptor = get_fd(get_current_io_context(kernel), fd);
        if (descriptor == NULL)
                return B_FILE_ERROR;

        if (descriptor->ops != &sEventQueueFDOps) {
                put_fd(descriptor);
                return B_BAD_VALUE;
        }

        return B_OK;
}


//      #pragma mark - User syscalls


int
_user_event_queue_create(int openFlags)
{
        EventQueue* queue = new(std::nothrow) EventQueue(false);
        if (queue == NULL)
                return B_NO_MEMORY;

        ObjectDeleter<EventQueue> deleter(queue);

        file_descriptor* descriptor = alloc_fd();
        if (descriptor == NULL)
                return B_NO_MEMORY;

        descriptor->ops = &sEventQueueFDOps;
        descriptor->cookie = (struct event_queue*)queue;
        descriptor->open_mode = O_RDWR | openFlags;

        io_context* context = get_current_io_context(false);
        int fd = new_fd(context, descriptor);
        if (fd < 0) {
                free(descriptor);
                return fd;
        }

        rw_lock_write_lock(&context->lock);
        fd_set_close_on_exec(context, fd, (openFlags & O_CLOEXEC) != 0);
        fd_set_close_on_fork(context, fd, (openFlags & O_CLOFORK) != 0);
        rw_lock_write_unlock(&context->lock);

        deleter.Detach();
        return fd;
}


status_t
_user_event_queue_select(int queue, event_wait_info* userInfos, int numInfos)
{
        if (numInfos <= 0)
                return B_BAD_VALUE;
        if (userInfos == NULL || !IS_USER_ADDRESS(userInfos))
                return B_BAD_ADDRESS;

        BStackOrHeapArray<event_wait_info, 16> infos(numInfos);
        if (!infos.IsValid())
                return B_NO_MEMORY;

        file_descriptor* descriptor;
        GET_QUEUE_FD_OR_RETURN(queue, false, descriptor);
        FileDescriptorPutter _(descriptor);

        EventQueue* eventQueue = (EventQueue*)descriptor->cookie;

        if (user_memcpy(infos, userInfos, sizeof(event_wait_info) * numInfos) != B_OK)
                return B_BAD_ADDRESS;

        status_t result = B_OK;

        for (int i = 0; i < numInfos; i++) {
                status_t error;
                if (infos[i].events > 0) {
                        error = eventQueue->Select(infos[i].object, infos[i].type,
                                infos[i].events, infos[i].user_data);
                } else if (infos[i].events < 0) {
                        uint32 selectedEvents = 0;
                        error = eventQueue->Query(infos[i].object, infos[i].type,
                                &selectedEvents, &infos[i].user_data);
                        if (error == B_OK) {
                                infos[i].events = selectedEvents;
                                error = user_memcpy(&userInfos[i], &infos[i], sizeof(event_wait_info));
                        }
                } else /* == 0 */ {
                        error = eventQueue->Deselect(infos[i].object, infos[i].type);
                }

                if (error != B_OK) {
                        user_memcpy(&userInfos[i].events, &error, sizeof(userInfos[i].events));
                        result = B_ERROR;
                }
        }

        return result;
}


ssize_t
_user_event_queue_wait(int queue, event_wait_info* userInfos, int numInfos,
        uint32 flags, bigtime_t timeout)
{
        syscall_restart_handle_timeout_pre(flags, timeout);

        if (numInfos < 0)
                return B_BAD_VALUE;
        if (numInfos > 0 && (userInfos == NULL || !IS_USER_ADDRESS(userInfos)))
                return B_BAD_ADDRESS;

        if ((flags & (B_RELATIVE_TIMEOUT | B_ABSOLUTE_TIMEOUT)) == 0)
                timeout = B_INFINITE_TIMEOUT;

        BStackOrHeapArray<event_wait_info, 16> infos(numInfos);
        if (!infos.IsValid())
                return B_NO_MEMORY;

        file_descriptor* descriptor;
        GET_QUEUE_FD_OR_RETURN(queue, false, descriptor);
        FileDescriptorPutter _(descriptor);

        EventQueue* eventQueue = (EventQueue*)descriptor->cookie;

        ssize_t result = eventQueue->Wait(infos, numInfos, flags, timeout);
        if (result < 0)
                return syscall_restart_handle_timeout_post(result, timeout);

        status_t status = B_OK;
        if (numInfos != 0)
                status = user_memcpy(userInfos, infos, sizeof(event_wait_info) * numInfos);

        return status == B_OK ? result : status;
}