root/src/system/kernel/posix/xsi_semaphore.cpp
/*
 * Copyright 2008-2023, Haiku, Inc. All rights reserved.
 * Distributed under the terms of the MIT License.
 *
 * Authors:
 *              Salvatore Benedetto <salvatore.benedetto@gmail.com>
 */

#include <posix/xsi_semaphore.h>

#include <new>

#include <sys/ipc.h>
#include <sys/types.h>

#include <OS.h>

#include <kernel.h>
#include <syscall_restart.h>

#include <util/atomic.h>
#include <util/AutoLock.h>
#include <util/DoublyLinkedList.h>
#include <util/OpenHashTable.h>
#include <AutoDeleter.h>
#include <StackOrHeapArray.h>


//#define TRACE_XSI_SEM
#ifdef TRACE_XSI_SEM
#       define TRACE(x)                 dprintf x
#       define TRACE_ERROR(x)   dprintf x
#else
#       define TRACE(x)                 /* nothing */
#       define TRACE_ERROR(x)   dprintf x
#endif


namespace {

class XsiSemaphoreSet;

struct sem_undo : DoublyLinkedListLinkImpl<sem_undo> {
        sem_undo(XsiSemaphoreSet *semaphoreSet, Team *team, int16 *undoValues)
                :
                semaphore_set(semaphoreSet),
                team(team),
                undo_values(undoValues)
        {
        }

        DoublyLinkedListLink<sem_undo>          team_link;
        XsiSemaphoreSet                                         *semaphore_set;
        Team                                                            *team;
        int16                                                           *undo_values;
};

typedef DoublyLinkedList<sem_undo> UndoList;
typedef DoublyLinkedList<sem_undo,
        DoublyLinkedListMemberGetLink<sem_undo, &sem_undo::team_link> > TeamList;

} // namespace


// Forward declared in global namespace.
struct xsi_sem_context {
        xsi_sem_context()
        {
                mutex_init(&lock, "Private team undo_list lock");
        }

        ~xsi_sem_context()
        {
                mutex_destroy(&lock);
        }

        TeamList        undo_list;
        mutex           lock;
};


namespace {

// Xsi semaphore definition
class XsiSemaphore {
public:
        XsiSemaphore()
                :
                fLastPidOperation(0),
                fValue(0)
        {
                fWaitingToIncrease.Init(this, "XsiSemaphore");
                fWaitingToBeZero.Init(this, "XsiSemaphore");
        }

        ~XsiSemaphore()
        {
                // For some reason the semaphore is getting destroyed.
                // Wake up any remaing awaiting threads
                fWaitingToIncrease.NotifyAll(EIDRM);
                fWaitingToBeZero.NotifyAll(EIDRM);

                // No need to remove any sem_undo request still
                // hanging. When the process exit and doesn't found
                // the semaphore set, it'll just ignore the sem_undo
                // request. That's better than iterating trough the
                // whole sUndoList. Beside we don't know our semaphore
                // number nor our semaphore set id.
        }

        // We return true in case the operation causes the
        // caller to wait, so it can undo all the operations
        // previously done
        bool Add(short value)
        {
                if ((int)(fValue + value) < 0) {
                        TRACE(("XsiSemaphore::Add: potentially going to sleep\n"));
                        return true;
                } else {
                        fValue += value;
                        if (fValue == 0)
                                WakeUpThreads(true);
                        else if (fValue > 0)
                                WakeUpThreads(false);
                        return false;
                }
        }

        static void Dequeue(ConditionVariableEntry *queueEntry)
        {
                queueEntry->Wait(B_RELATIVE_TIMEOUT, 0);
        }

        void Enqueue(ConditionVariableEntry *queueEntry, bool waitForZero)
        {
                if (waitForZero) {
                        fWaitingToBeZero.Add(queueEntry);
                } else {
                        fWaitingToIncrease.Add(queueEntry);
                }
        }

        pid_t LastPid() const
        {
                return fLastPidOperation;
        }

        void Revert(short value)
        {
                fValue -= value;
                if (fValue == 0)
                        WakeUpThreads(true);
                else if (fValue > 0)
                        WakeUpThreads(false);
        }

        void SetPid(pid_t pid)
        {
                fLastPidOperation = pid;
        }

        void SetValue(ushort value)
        {
                fValue = value;
        }

        ushort ThreadsWaitingToIncrease()
        {
                return fWaitingToIncrease.EntriesCount();
        }

        ushort ThreadsWaitingToBeZero()
        {
                return fWaitingToBeZero.EntriesCount();
        }

        ushort Value() const
        {
                return fValue;
        }

        void WakeUpThreads(bool waitingForZero)
        {
                if (waitingForZero) {
                        fWaitingToBeZero.NotifyAll();
                } else {
                        fWaitingToIncrease.NotifyAll();
                }
        }

private:
        pid_t                           fLastPidOperation;                              // sempid
        ushort                          fValue;                                                 // semval

        ConditionVariable       fWaitingToIncrease;
        ConditionVariable       fWaitingToBeZero;
};

#define MAX_XSI_SEMS_PER_TEAM   128

// Xsi semaphore set definition (semid_ds)
class XsiSemaphoreSet {
public:
        XsiSemaphoreSet(int numberOfSemaphores, int flags)
                : fInitOK(false),
                fLastSemctlTime((time_t)real_time_clock()),
                fLastSemopTime(0),
                fNumberOfSemaphores(numberOfSemaphores),
                fSemaphores(0)
        {
                mutex_init(&fLock, "XsiSemaphoreSet private mutex");
                SetIpcKey((key_t)-1);
                SetPermissions(flags);
                fSemaphores = new(std::nothrow) XsiSemaphore[numberOfSemaphores];
                if (fSemaphores == NULL) {
                        TRACE_ERROR(("XsiSemaphoreSet::XsiSemaphore(): failed to allocate "
                                "XsiSemaphore object\n"));
                } else
                        fInitOK = true;
        }

        ~XsiSemaphoreSet()
        {
                TRACE(("XsiSemaphoreSet::~XsiSemaphoreSet(): removing semaphore "
                        "set %d\n", fID));
                mutex_destroy(&fLock);
                delete[] fSemaphores;
        }

        void ClearUndo(ushort semaphoreNumber)
        {
                Team *team = thread_get_current_thread()->team;
                UndoList::Iterator iterator = fUndoList.GetIterator();
                while (iterator.HasNext()) {
                        struct sem_undo *current = iterator.Next();
                        if (current->team == team) {
                                TRACE(("XsiSemaphoreSet::ClearUndo: teamID = %d, "
                                        "semaphoreSetID = %d, semaphoreNumber = %d\n",
                                        fID, semaphoreNumber, (int)team->id));
                                MutexLocker _(team->xsi_sem_context->lock);
                                current->undo_values[semaphoreNumber] = 0;
                                return;
                        }
                }
        }

        void ClearUndos()
        {
                // Clear all undo_values (POSIX semadj equivalent)
                // of the calling team. This happens only on semctl SETALL.
                Team *team = thread_get_current_thread()->team;
                DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
                while (iterator.HasNext()) {
                        struct sem_undo *current = iterator.Next();
                        if (current->team == team) {
                                TRACE(("XsiSemaphoreSet::ClearUndos: teamID = %d, "
                                        "semaphoreSetID = %d\n", (int)team->id, fID));
                                MutexLocker _(team->xsi_sem_context->lock);
                                memset(current->undo_values, 0,
                                        sizeof(int16) * fNumberOfSemaphores);
                                return;
                        }
                }
        }

        void DoIpcSet(struct semid_ds *result)
        {
                fPermissions.uid = result->sem_perm.uid;
                fPermissions.gid = result->sem_perm.gid;
                fPermissions.mode = (fPermissions.mode & ~0x01ff)
                        | (result->sem_perm.mode & 0x01ff);
        }

        bool HasPermission() const
        {
                if ((fPermissions.mode & S_IWOTH) != 0)
                        return true;

                uid_t uid = geteuid();
                if (uid == 0 || (uid == fPermissions.uid
                        && (fPermissions.mode & S_IWUSR) != 0))
                        return true;

                gid_t gid = getegid();
                if (gid == fPermissions.gid && (fPermissions.mode & S_IWGRP) != 0)
                        return true;

                return false;
        }

        bool HasReadPermission() const
        {
                // TODO: fix this
                return HasPermission();
        }

        int ID() const
        {
                return fID;
        }

        bool InitOK()
        {
                return fInitOK;
        }

        key_t IpcKey() const
        {
                return fPermissions.key;
        }

        struct ipc_perm IpcPermission() const
        {
                return fPermissions;
        }

        time_t LastSemctlTime() const
        {
                return fLastSemctlTime;
        }

        time_t LastSemopTime() const
        {
                return fLastSemopTime;
        }

        mutex &Lock()
        {
                return fLock;
        }

        ushort NumberOfSemaphores() const
        {
                return fNumberOfSemaphores;
        }

        // Record the sem_undo operation into our private fUndoList and
        // the team undo_list. The only limit here is the memory needed
        // for creating a new sem_undo structure.
        int RecordUndo(ushort semaphoreNumber, short value)
        {
                // Look if there is already a record from the team caller
                // for the same semaphore set
                bool notFound = true;
                Team *team = thread_get_current_thread()->team;
                DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
                while (iterator.HasNext()) {
                        struct sem_undo *current = iterator.Next();
                        if (current->team == team) {
                                // Update its undo value
                                MutexLocker _(team->xsi_sem_context->lock);
                                int newValue = current->undo_values[semaphoreNumber] + value;
                                if (newValue > USHRT_MAX || newValue < -USHRT_MAX) {
                                        TRACE_ERROR(("XsiSemaphoreSet::RecordUndo: newValue %d "
                                                "out of range\n", newValue));
                                        return ERANGE;
                                }
                                current->undo_values[semaphoreNumber] = newValue;
                                notFound = false;
                                TRACE(("XsiSemaphoreSet::RecordUndo: found record. Team = %d, "
                                        "semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
                                        (int)team->id, fID, semaphoreNumber,
                                        current->undo_values[semaphoreNumber]));
                                break;
                        }
                }

                if (notFound) {
                        // First sem_undo request from this team for this
                        // semaphore set
                        int16 *undoValues
                                = (int16 *)malloc(sizeof(int16) * fNumberOfSemaphores);
                        if (undoValues == NULL)
                                return B_NO_MEMORY;
                        struct sem_undo *request
                                = new(std::nothrow) sem_undo(this, team, undoValues);
                        if (request == NULL) {
                                free(undoValues);
                                return B_NO_MEMORY;
                        }
                        memset(request->undo_values, 0, sizeof(int16) * fNumberOfSemaphores);
                        request->undo_values[semaphoreNumber] = value;

                        // Check if it's the very first sem_undo request for this team
                        xsi_sem_context *context = atomic_pointer_get(&team->xsi_sem_context);
                        if (context == NULL) {
                                // Create the context
                                context = new(std::nothrow) xsi_sem_context;
                                if (context == NULL) {
                                        free(request->undo_values);
                                        delete request;
                                        return B_NO_MEMORY;
                                }
                                // Since we don't hold any global lock, someone
                                // else could have been quicker than us, so we have
                                // to delete the one we just created and use the one
                                // in place.
                                if (atomic_pointer_test_and_set(&team->xsi_sem_context, context,
                                        (xsi_sem_context *)NULL) != NULL)
                                        delete context;
                        }

                        // Add the request to both XsiSemaphoreSet and team list
                        fUndoList.Add(request);
                        MutexLocker _(team->xsi_sem_context->lock);
                        team->xsi_sem_context->undo_list.Add(request);
                        TRACE(("XsiSemaphoreSet::RecordUndo: new record added. Team = %d, "
                                "semaphoreSetID = %d, semaphoreNumber = %d, value = %d\n",
                                (int)team->id, fID, semaphoreNumber, value));
                }
                return B_OK;
        }

        void RevertUndo(ushort semaphoreNumber, short value)
        {
                // This can be called only when RecordUndo fails.
                Team *team = thread_get_current_thread()->team;
                DoublyLinkedList<sem_undo>::Iterator iterator = fUndoList.GetIterator();
                while (iterator.HasNext()) {
                        struct sem_undo *current = iterator.Next();
                        if (current->team == team) {
                                MutexLocker _(team->xsi_sem_context->lock);
                                fSemaphores[semaphoreNumber].Revert(value);
                                break;
                        }
                }
        }

        XsiSemaphore* Semaphore(int nth) const
        {
                return &fSemaphores[nth];
        }

        uint32 SequenceNumber() const
        {
                return fSequenceNumber;
        }

        // Implemented after sGlobalSequenceNumber is declared
        void SetID();

        void SetIpcKey(key_t key)
        {
                fPermissions.key = key;
        }

        void SetLastSemctlTime()
        {
                fLastSemctlTime = real_time_clock();
        }

        void SetLastSemopTime()
        {
                fLastSemopTime = real_time_clock();
        }

        void SetPermissions(int flags)
        {
                fPermissions.uid = fPermissions.cuid = geteuid();
                fPermissions.gid = fPermissions.cgid = getegid();
                fPermissions.mode = (flags & 0x01ff);
        }

        UndoList &GetUndoList()
        {
                return fUndoList;
        }

        XsiSemaphoreSet*& Link()
        {
                return fLink;
        }

private:
        int                                                     fID;                                    // semaphore set id
        bool                                            fInitOK;
        time_t                                          fLastSemctlTime;                // sem_ctime
        time_t                                          fLastSemopTime;                 // sem_otime
        mutex                                           fLock;                                  // private lock
        ushort                                          fNumberOfSemaphores;    // sem_nsems
        struct ipc_perm                         fPermissions;                   // sem_perm
        XsiSemaphore                            *fSemaphores;                   // array of semaphores
        uint32                                          fSequenceNumber;                // used as a second id
        UndoList                                        fUndoList;                              // undo list requests

        XsiSemaphoreSet*                        fLink;
};

// Xsi semaphore set hash table
struct SemaphoreHashTableDefinition {
        typedef int                                     KeyType;
        typedef XsiSemaphoreSet         ValueType;

        size_t HashKey (const int key) const
        {
                return (size_t)key;
        }

        size_t Hash(XsiSemaphoreSet *variable) const
        {
                return (size_t)variable->ID();
        }

        bool Compare(const int key, XsiSemaphoreSet *variable) const
        {
                return (int)key == (int)variable->ID();
        }

        XsiSemaphoreSet*& GetLink(XsiSemaphoreSet *variable) const
        {
                return variable->Link();
        }
};


// IPC class
class Ipc {
public:
        Ipc(key_t key)
                : fKey(key),
                fSemaphoreSetId(-1)
        {
        }

        key_t Key() const
        {
                return fKey;
        }

        int SemaphoreSetID() const
        {
                return fSemaphoreSetId;
        }

        void SetSemaphoreSetID(XsiSemaphoreSet *semaphoreSet)
        {
                fSemaphoreSetId = semaphoreSet->ID();
        }

        Ipc*& Link()
        {
                return fLink;
        }

private:
        key_t                           fKey;
        int                                     fSemaphoreSetId;
        Ipc*                            fLink;
};


struct IpcHashTableDefinition {
        typedef key_t   KeyType;
        typedef Ipc             ValueType;

        size_t HashKey (const key_t key) const
        {
                return (size_t)(key);
        }

        size_t Hash(Ipc *variable) const
        {
                return (size_t)HashKey(variable->Key());
        }

        bool Compare(const key_t key, Ipc *variable) const
        {
                return (key_t)key == (key_t)variable->Key();
        }

        Ipc*& GetLink(Ipc *variable) const
        {
                return variable->Link();
        }
};

} // namespace


// Arbitrary limit
#define MAX_XSI_SEMAPHORE               4096
#define MAX_XSI_SEMAPHORE_SET   2048
static BOpenHashTable<IpcHashTableDefinition> sIpcHashTable;
static BOpenHashTable<SemaphoreHashTableDefinition> sSemaphoreHashTable;

static mutex sIpcLock;
static mutex sXsiSemaphoreSetLock;

static uint32 sGlobalSequenceNumber = 1;
static int32 sXsiSemaphoreCount = 0;
static int32 sXsiSemaphoreSetCount = 0;


//      #pragma mark -


void
XsiSemaphoreSet::SetID()
{
        fID = real_time_clock();
        // The lock is held before calling us
        while (true) {
                if (sSemaphoreHashTable.Lookup(fID) == NULL)
                        break;
                fID = (fID + 1) % INT_MAX;
        }
        sGlobalSequenceNumber = (sGlobalSequenceNumber + 1) % UINT_MAX;
        fSequenceNumber = sGlobalSequenceNumber;
}


//      #pragma mark - Kernel exported API


void
xsi_sem_init()
{
        // Initialize hash tables
        status_t status = sIpcHashTable.Init();
        if (status != B_OK)
                panic("xsi_sem_init() failed to initialize ipc hash table\n");
        status =  sSemaphoreHashTable.Init();
        if (status != B_OK)
                panic("xsi_sem_init() failed to initialize semaphore hash table\n");

        mutex_init(&sIpcLock, "global POSIX semaphore IPC table");
        mutex_init(&sXsiSemaphoreSetLock, "global POSIX xsi sem table");
}


/*!     Function called on team exit to process any sem_undo requests */
void
xsi_sem_undo(Team *team)
{
        if (team->xsi_sem_context == NULL)
                return;

        // By acquiring first the semaphore hash table lock
        // we make sure the semaphore set in our sem_undo
        // list won't get removed by IPC_RMID call
        MutexLocker _(sXsiSemaphoreSetLock);

        // Process all sem_undo request in the team sem undo list
        // if any
        TeamList::Iterator iterator
                = team->xsi_sem_context->undo_list.GetIterator();
        while (iterator.HasNext()) {
                struct sem_undo *current = iterator.Next();
                XsiSemaphoreSet *semaphoreSet = current->semaphore_set;
                // Acquire the set lock in order to prevent race
                // condition with RecordUndo
                MutexLocker setLocker(semaphoreSet->Lock());
                MutexLocker _(team->xsi_sem_context->lock);
                // Revert the changes done by this process
                for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++)
                        if (current->undo_values[i] != 0) {
                                TRACE(("xsi_sem_undo: TeamID = %d, SemaphoreSetID = %d, "
                                        "SemaphoreNumber = %d, undo value = %d\n", (int)team->id,
                                        semaphoreSet->ID(), i, (int)current->undo_values[i]));
                                semaphoreSet->Semaphore(i)->Revert(current->undo_values[i]);
                        }

                // Remove and free the sem_undo structure from both lists
                iterator.Remove();
                semaphoreSet->GetUndoList().Remove(current);
                delete current;
        }
        delete team->xsi_sem_context;
        team->xsi_sem_context = NULL;
}


//      #pragma mark - Syscalls


int
_user_xsi_semget(key_t key, int numberOfSemaphores, int flags)
{
        TRACE(("xsi_semget: key = %d, numberOfSemaphores = %d, flags = %d\n",
                (int)key, numberOfSemaphores, flags));
        XsiSemaphoreSet *semaphoreSet = NULL;
        Ipc *ipcKey = NULL;

        // Default assumption
        bool isPrivate = true;

        MutexLocker ipcLocker(sIpcLock);
        if (key != IPC_PRIVATE) {
                isPrivate = false;
                // Check if key already exist, if it does it already has a semaphore
                // set associated with it
                ipcKey = sIpcHashTable.Lookup(key);
                if (ipcKey != NULL) {
                        // The IPC key exist and it already has a semaphore
                        if ((flags & IPC_CREAT) && (flags & IPC_EXCL)) {
                                TRACE(("xsi_semget: key %d already exist\n", (int)key));
                                return EEXIST;
                        }
                        int semaphoreSetID = ipcKey->SemaphoreSetID();

                        MutexLocker semaphoreSetLocker(sXsiSemaphoreSetLock);
                        semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreSetID);
                        if (semaphoreSet == NULL) {
                                TRACE(("xsi_semget: calling process has no semaphore, "
                                        "key %d\n", (int)key));
                                return EINVAL;
                        }
                        if (!semaphoreSet->HasPermission()) {
                                TRACE(("xsi_semget: calling process has no permission "
                                        "on semaphore %d, key %d\n", semaphoreSet->ID(),
                                        (int)key));
                                return EACCES;
                        }
                        if (numberOfSemaphores > semaphoreSet->NumberOfSemaphores()
                                        && numberOfSemaphores != 0) {
                                TRACE(("xsi_semget: numberOfSemaphores greater than the "
                                        "one associated with semaphore %d, key %d\n",
                                        semaphoreSet->ID(), (int)key));
                                return EINVAL;
                        }

                        return semaphoreSet->ID();
                }

                // The ipc key does not exist. Create it and add it to the system
                if (!(flags & IPC_CREAT)) {
                        TRACE(("xsi_semget: key %d does not exist, but the "
                                "caller did not ask for creation\n",(int)key));
                        return ENOENT;
                }
                ipcKey = new(std::nothrow) Ipc(key);
                if (ipcKey == NULL) {
                        TRACE_ERROR(("xsi_semget: failed to create new Ipc object "
                                "for key %d\n", (int)key));
                        return ENOMEM;
                }
        }

        // Create a new semaphore set for this key
        if (numberOfSemaphores <= 0
                        || numberOfSemaphores >= MAX_XSI_SEMS_PER_TEAM) {
                TRACE_ERROR(("xsi_semget: numberOfSemaphores out of range\n"));
                delete ipcKey;
                return EINVAL;
        }
        if (sXsiSemaphoreCount >= MAX_XSI_SEMAPHORE
                        || sXsiSemaphoreSetCount >= MAX_XSI_SEMAPHORE_SET) {
                TRACE_ERROR(("xsi_semget: reached limit of maximum number of "
                        "semaphores allowed\n"));
                delete ipcKey;
                return ENOSPC;
        }

        semaphoreSet = new(std::nothrow) XsiSemaphoreSet(numberOfSemaphores, flags);
        if (semaphoreSet == NULL || !semaphoreSet->InitOK()) {
                TRACE_ERROR(("xsi_semget: failed to allocate a new xsi "
                        "semaphore set\n"));
                delete semaphoreSet;
                delete ipcKey;
                return ENOMEM;
        }

        atomic_add(&sXsiSemaphoreCount, numberOfSemaphores);
        atomic_add(&sXsiSemaphoreSetCount, 1);

        MutexLocker semaphoreSetLocker(sXsiSemaphoreSetLock);
        semaphoreSet->SetID();
        if (isPrivate) {
                semaphoreSet->SetIpcKey((key_t)-1);
        } else {
                sIpcHashTable.Insert(ipcKey);
                semaphoreSet->SetIpcKey(key);
                ipcKey->SetSemaphoreSetID(semaphoreSet);
        }
        sSemaphoreHashTable.Insert(semaphoreSet);
        TRACE(("semget: new set = %d created, sequence = %ld\n",
                semaphoreSet->ID(), semaphoreSet->SequenceNumber()));

        return semaphoreSet->ID();
}


int
_user_xsi_semctl(int semaphoreID, int semaphoreNumber, int command,
        union semun *_args)
{
        TRACE(("xsi_semctl: semaphoreID = %d, semaphoreNumber = %d, command = %d\n",
                semaphoreID, semaphoreNumber, command));

        union semun args = {0};
        if (_args != NULL) {
                if (!IS_USER_ADDRESS(_args)
                                || user_memcpy(&args, _args, sizeof(union semun)) != B_OK)
                        return B_BAD_ADDRESS;
        }

        MutexLocker ipcHashLocker(sIpcLock);
        MutexLocker setHashLocker(sXsiSemaphoreSetLock);
        XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
        if (semaphoreSet == NULL) {
                TRACE(("xsi_semctl: semaphore set id %d not valid\n",
                        semaphoreID));
                return EINVAL;
        }
        if (semaphoreNumber < 0
                || semaphoreNumber > semaphoreSet->NumberOfSemaphores()) {
                TRACE(("xsi_semctl: semaphore number %d not valid for "
                        "semaphore %d\n", semaphoreNumber, semaphoreID));
                return EINVAL;
        }

        // Lock the semaphore set itself and release both the semaphore
        // set hash table lock and the ipc hash table lock _only_ if
        // the command it's not IPC_RMID, this prevents undesidered
        // situation from happening while (hopefully) improving the
        // concurrency.
        MutexLocker setLocker(semaphoreSet->Lock());
        if (command != IPC_RMID) {
                setHashLocker.Unlock();
                ipcHashLocker.Unlock();
        }

        int result = 0;
        XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
        switch (command) {
                case GETVAL: {
                        if (!semaphoreSet->HasReadPermission()) {
                                TRACE(("xsi_semctl: calling process has not permission "
                                        "on semaphore %d, key %d\n", semaphoreSet->ID(),
                                        (int)semaphoreSet->IpcKey()));
                                result = EACCES;
                        } else
                                result = semaphore->Value();
                        break;
                }

                case SETVAL: {
                        if (!semaphoreSet->HasPermission()) {
                                TRACE(("xsi_semctl: calling process has not permission "
                                        "on semaphore %d, key %d\n", semaphoreSet->ID(),
                                        (int)semaphoreSet->IpcKey()));
                                result = EACCES;
                        } else {
                                if (args.val > USHRT_MAX) {
                                        TRACE(("xsi_semctl: value %d out of range\n", args.val));
                                        result = ERANGE;
                                } else {
                                        semaphore->SetValue(args.val);
                                        semaphoreSet->ClearUndo(semaphoreNumber);
                                }
                        }
                        break;
                }

                case GETPID: {
                        if (!semaphoreSet->HasReadPermission()) {
                                TRACE(("xsi_semctl: calling process has not permission "
                                        "on semaphore %d, key %d\n", semaphoreSet->ID(),
                                        (int)semaphoreSet->IpcKey()));
                                result = EACCES;
                        } else
                                result = semaphore->LastPid();
                        break;
                }

                case GETNCNT: {
                        if (!semaphoreSet->HasReadPermission()) {
                                TRACE(("xsi_semctl: calling process has not permission "
                                        "on semaphore %d, key %d\n", semaphoreSet->ID(),
                                        (int)semaphoreSet->IpcKey()));
                                result = EACCES;
                        } else
                                result = semaphore->ThreadsWaitingToIncrease();
                        break;
                }

                case GETZCNT: {
                        if (!semaphoreSet->HasReadPermission()) {
                                TRACE(("xsi_semctl: calling process has not permission "
                                        "on semaphore %d, key %d\n", semaphoreSet->ID(),
                                        (int)semaphoreSet->IpcKey()));
                                result = EACCES;
                        } else
                                result = semaphore->ThreadsWaitingToBeZero();
                        break;
                }

                case GETALL: {
                        if (!semaphoreSet->HasReadPermission()) {
                                TRACE(("xsi_semctl: calling process has not read "
                                        "permission on semaphore %d, key %d\n", semaphoreSet->ID(),
                                        (int)semaphoreSet->IpcKey()));
                                result = EACCES;
                        } else
                                for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
                                        semaphore = semaphoreSet->Semaphore(i);
                                        unsigned short value = semaphore->Value();
                                        if (user_memcpy(args.array + i, &value,
                                                        sizeof(unsigned short)) != B_OK) {
                                                TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
                                                result = B_BAD_ADDRESS;
                                                break;
                                        }
                                }
                        break;
                }

                case SETALL: {
                        if (!semaphoreSet->HasPermission()) {
                                TRACE(("xsi_semctl: calling process has not permission "
                                        "on semaphore %d, key %d\n", semaphoreSet->ID(),
                                        (int)semaphoreSet->IpcKey()));
                                result = EACCES;
                        } else {
                                bool doClear = true;
                                for (int i = 0; i < semaphoreSet->NumberOfSemaphores(); i++) {
                                        semaphore = semaphoreSet->Semaphore(i);
                                        unsigned short value;
                                        if (user_memcpy(&value, args.array + i,
                                                        sizeof(unsigned short)) != B_OK) {
                                                TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
                                                result = B_BAD_ADDRESS;
                                                doClear = false;
                                                break;
                                        } else
                                                semaphore->SetValue(value);
                                }
                                if (doClear)
                                        semaphoreSet->ClearUndos();
                        }
                        break;
                }

                case IPC_STAT: {
                        if (!semaphoreSet->HasReadPermission()) {
                                TRACE(("xsi_semctl: calling process has not read "
                                        "permission on semaphore %d, key %d\n", semaphoreSet->ID(),
                                        (int)semaphoreSet->IpcKey()));
                                result = EACCES;
                        } else {
                                struct semid_ds sem;
                                sem.sem_perm = semaphoreSet->IpcPermission();
                                sem.sem_nsems = semaphoreSet->NumberOfSemaphores();
                                sem.sem_otime = semaphoreSet->LastSemopTime();
                                sem.sem_ctime = semaphoreSet->LastSemctlTime();
                                if (user_memcpy(args.buf, &sem, sizeof(struct semid_ds))
                                                < B_OK) {
                                        TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
                                        result = B_BAD_ADDRESS;
                                }
                        }
                        break;
                }

                case IPC_SET: {
                        if (!semaphoreSet->HasPermission()) {
                                TRACE(("xsi_semctl: calling process has not "
                                        "permission on semaphore %d, key %d\n",
                                        semaphoreSet->ID(), (int)semaphoreSet->IpcKey()));
                                result = EACCES;
                        } else {
                                struct semid_ds sem;
                                if (user_memcpy(&sem, args.buf, sizeof(struct semid_ds))
                                                != B_OK) {
                                        TRACE_ERROR(("xsi_semctl: user_memcpy failed\n"));
                                        result = B_BAD_ADDRESS;
                                } else
                                        semaphoreSet->DoIpcSet(&sem);
                        }
                        break;
                }

                case IPC_RMID: {
                        // If this was the command, we are still holding
                        // the semaphore set hash table lock along with the
                        // ipc hash table lock and the semaphore set lock
                        // itself, this way we are sure there is not
                        // one waiting in the queue of the mutex.
                        if (!semaphoreSet->HasPermission()) {
                                TRACE(("xsi_semctl: calling process has not "
                                        "permission on semaphore %d, key %d\n",
                                        semaphoreSet->ID(), (int)semaphoreSet->IpcKey()));
                                return EACCES;
                        }
                        key_t key = semaphoreSet->IpcKey();
                        Ipc *ipcKey = NULL;
                        if (key != -1) {
                                ipcKey = sIpcHashTable.Lookup(key);
                                sIpcHashTable.Remove(ipcKey);
                        }
                        sSemaphoreHashTable.Remove(semaphoreSet);
                        // Wake up of threads waiting on this set
                        // happens in the destructor
                        if (key != -1)
                                delete ipcKey;
                        atomic_add(&sXsiSemaphoreCount, -semaphoreSet->NumberOfSemaphores());
                        atomic_add(&sXsiSemaphoreSetCount, -1);
                        // Remove any sem_undo request
                        while (struct sem_undo *entry
                                        = semaphoreSet->GetUndoList().RemoveHead()) {
                                MutexLocker _(entry->team->xsi_sem_context->lock);
                                entry->team->xsi_sem_context->undo_list.Remove(entry);
                                delete entry;
                        }

                        setLocker.Detach();
                        delete semaphoreSet;
                        return 0;
                }

                default:
                        TRACE_ERROR(("xsi_semctl: command %d not valid\n", command));
                        result = EINVAL;
        }

        return result;
}


status_t
_user_xsi_semop(int semaphoreID, struct sembuf *ops, size_t numOps)
{
        TRACE(("xsi_semop: semaphoreID = %d, ops = %p, numOps = %ld\n",
                semaphoreID, ops, numOps));

        if (!IS_USER_ADDRESS(ops)) {
                TRACE(("xsi_semop: sembuf address is not valid\n"));
                return B_BAD_ADDRESS;
        }
        if (numOps < 0 || numOps >= MAX_XSI_SEMS_PER_TEAM) {
                TRACE(("xsi_semop: numOps out of range\n"));
                return EINVAL;
        }

        MutexLocker setHashLocker(sXsiSemaphoreSetLock);
        XsiSemaphoreSet *semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
        if (semaphoreSet == NULL) {
                TRACE(("xsi_semop: semaphore set id %d not valid\n",
                        semaphoreID));
                return EINVAL;
        }
        MutexLocker setLocker(semaphoreSet->Lock());
        setHashLocker.Unlock();

        BStackOrHeapArray<struct sembuf, 16> operations(numOps);
        if (!operations.IsValid()) {
                TRACE_ERROR(("xsi_semop: failed to allocate sembuf struct\n"));
                return B_NO_MEMORY;
        }

        if (user_memcpy(operations, ops,
                        (sizeof(struct sembuf) * numOps)) != B_OK) {
                TRACE_ERROR(("xsi_semop: user_memcpy failed\n"));
                return B_BAD_ADDRESS;
        }

        // We won't do partial request, that is operations
        // only on some sempahores belonging to the set and then
        // going to sleep. If we must wait on a semaphore, we undo
        // all the operations already done and go to sleep, otherwise
        // we may caused some unwanted deadlock among threads
        // fighting for the same set.
        bool notDone = true;
        status_t result = 0;
        while (notDone) {
                XsiSemaphore *semaphore = NULL;
                const ushort numberOfSemaphores = semaphoreSet->NumberOfSemaphores();
                bool goToSleep = false;

                uint32 i = 0;
                for (; i < numOps; i++) {
                        ushort semaphoreNumber = operations[i].sem_num;
                        if (semaphoreNumber >= numberOfSemaphores) {
                                TRACE(("xsi_semop: %" B_PRIu32 " invalid semaphore number"
                                        "\n", i));
                                result = EINVAL;
                                break;
                        }
                        semaphore = semaphoreSet->Semaphore(semaphoreNumber);
                        unsigned short value = semaphore->Value();
                        short operation = operations[i].sem_op;
                        TRACE(("xsi_semop: semaphoreNumber = %d, value = %d\n",
                                semaphoreNumber, value));
                        if (operation < 0) {
                                if (semaphore->Add(operation)) {
                                        if (operations[i].sem_flg & IPC_NOWAIT)
                                                result = EAGAIN;
                                        else
                                                goToSleep = true;
                                        break;
                                }
                        } else if (operation == 0) {
                                if (value == 0)
                                        continue;
                                else if (operations[i].sem_flg & IPC_NOWAIT) {
                                        result = EAGAIN;
                                        break;
                                } else {
                                        goToSleep = true;
                                        break;
                                }
                        } else {
                                // Operation must be greater than zero,
                                // just add the value and continue
                                semaphore->Add(operation);
                        }
                }

                // Either we have to wait or an error occured
                if (goToSleep || result != 0) {
                        // Undo all previously done operations
                        for (uint32 j = 0; j < i; j++) {
                                ushort semaphoreNumber = operations[j].sem_num;
                                semaphore = semaphoreSet->Semaphore(semaphoreNumber);
                                short operation = operations[j].sem_op;
                                if (operation != 0)
                                        semaphore->Revert(operation);
                        }
                        if (result != 0)
                                return result;

                        // We have to wait: first enqueue the thread
                        // in the appropriate set waiting list, then
                        // unlock the set itself and block the thread.
                        bool waitOnZero = true;
                        if (operations[i].sem_op != 0)
                                waitOnZero = false;

                        ConditionVariableEntry queueEntry;
                        semaphore->Enqueue(&queueEntry, waitOnZero);

                        const uint32 sequenceNumber = semaphoreSet->SequenceNumber();

                        TRACE(("xsi_semop: thread %d going to sleep\n", (int)thread->id));
                        setLocker.Unlock();
                        semaphoreSet = NULL;
                        semaphore = NULL;
                        result = queueEntry.Wait(B_CAN_INTERRUPT);
                        TRACE(("xsi_semop: thread %d back to life\n", (int)thread->id));

                        // We are back to life. Find out why!
                        // Make sure the set hasn't been deleted or worst yet replaced.
                        setHashLocker.Lock();
                        semaphoreSet = sSemaphoreHashTable.Lookup(semaphoreID);
                        if (result == EIDRM || semaphoreSet == NULL || (semaphoreSet != NULL
                                        && sequenceNumber != semaphoreSet->SequenceNumber())) {
                                TRACE(("xsi_semop: semaphore set id %d (sequence = "
                                        "%" B_PRIu32 ") got destroyed\n", semaphoreID,
                                        sequenceNumber));
                                notDone = false;
                                result = EIDRM;
                        } else if (result == B_INTERRUPTED) {
                                TRACE(("xsi_semop: thread %d got interrupted while "
                                        "waiting on semaphore set id %d\n", (int)thread_get_current_thread_id(),
                                        semaphoreID));
                                XsiSemaphore::Dequeue(&queueEntry);
                                result = EINTR;
                                notDone = false;
                        } else {
                                setLocker.Lock();
                                setHashLocker.Unlock();
                        }
                } else {
                        // everything worked like a charm (so far)
                        notDone = false;
                        TRACE(("xsi_semop: semaphore acquired succesfully\n"));
                        // We acquired the semaphore, now records the sem_undo
                        // requests
                        for (uint32 i = 0; i < numOps; i++) {
                                if ((operations[i].sem_flg & SEM_UNDO) == 0)
                                        continue;

                                ushort semaphoreNumber = operations[i].sem_num;
                                XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
                                short operation = operations[i].sem_op;

                                if (semaphoreSet->RecordUndo(semaphoreNumber, operation) != B_OK) {
                                        // Unlikely scenario, but we might get here.
                                        // Undo everything!
                                        // Start with semaphore operations
                                        for (uint32 j = 0; j < numOps; j++) {
                                                ushort semaphoreNumber = operations[j].sem_num;
                                                semaphore = semaphoreSet->Semaphore(semaphoreNumber);
                                                short operation = operations[j].sem_op;
                                                if (operation != 0)
                                                        semaphore->Revert(operation);
                                        }
                                        // Remove all previously registered sem_undo request
                                        for (uint32 j = 0; j < i; j++) {
                                                if (operations[j].sem_flg & SEM_UNDO) {
                                                        semaphoreSet->RevertUndo(operations[j].sem_num,
                                                                operations[j].sem_op);
                                                }
                                        }
                                        result = ENOSPC;
                                }
                        }
                }
        }

        // We did it. Set the pid of all semaphores used
        if (result == 0) {
                for (uint32 i = 0; i < numOps; i++) {
                        ushort semaphoreNumber = operations[i].sem_num;
                        XsiSemaphore *semaphore = semaphoreSet->Semaphore(semaphoreNumber);
                        semaphore->SetPid(getpid());
                }
                semaphoreSet->SetLastSemopTime();
        }
        return result;
}