root/usr/src/lib/libc/port/aio/aio_alloc.c
/*
 * CDDL HEADER START
 *
 * The contents of this file are subject to the terms of the
 * Common Development and Distribution License (the "License").
 * You may not use this file except in compliance with the License.
 *
 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
 * or http://www.opensolaris.org/os/licensing.
 * See the License for the specific language governing permissions
 * and limitations under the License.
 *
 * When distributing Covered Code, include this CDDL HEADER in each
 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
 * If applicable, add the following below this CDDL HEADER, with the
 * fields enclosed by brackets "[]" replaced with your own identifying
 * information: Portions Copyright [yyyy] [name of copyright owner]
 *
 * CDDL HEADER END
 */

/*
 * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
 * Use is subject to license terms.
 */

#include "lint.h"
#include "thr_uberdata.h"
#include "asyncio.h"

/*
 * The aio subsystem memory allocation strategy:
 *
 * For each of the structure types we wish to allocate/free
 * (aio_worker_t, aio_req_t, aio_lio_t), we use mmap() to allocate
 * chunks of memory which are then subdivided into individual
 * elements which are put into a free list from which allocations
 * are made and to which frees are returned.
 *
 * Chunks start small (8 Kbytes) and get larger (size doubling)
 * as more chunks are needed.  This keeps memory usage small for
 * light use and fragmentation small for heavy use.
 *
 * Chunks are never unmapped except as an aftermath of fork()
 * in the child process, when they are all unmapped (because
 * all of the worker threads disappear in the child).
 */

#define INITIAL_CHUNKSIZE       (8 * 1024)

/*
 * The header structure for each chunk.
 * A pointer and a size_t ensures proper alignment for whatever follows.
 */
typedef struct chunk {
        struct chunk    *chunk_next;    /* linked list */
        size_t          chunk_size;     /* size of this chunk */
} chunk_t;

chunk_t *chunk_list = NULL;             /* list of all chunks */
mutex_t chunk_lock = DEFAULTMUTEX;

chunk_t *
chunk_alloc(size_t size)
{
        chunk_t *chp = NULL;
        void *ptr;

        ptr = mmap(NULL, size, PROT_READ | PROT_WRITE,
            MAP_PRIVATE | MAP_ANON, -1, (off_t)0);
        if (ptr != MAP_FAILED) {
                lmutex_lock(&chunk_lock);
                chp = ptr;
                chp->chunk_next = chunk_list;
                chunk_list = chp;
                chp->chunk_size = size;
                lmutex_unlock(&chunk_lock);
        }

        return (chp);
}

aio_worker_t *worker_freelist = NULL;   /* free list of worker structures */
aio_worker_t *worker_freelast = NULL;
size_t worker_chunksize = 0;
mutex_t worker_lock = DEFAULTMUTEX;

/*
 * Allocate a worker control block.
 */
aio_worker_t *
_aio_worker_alloc(void)
{
        aio_worker_t *aiowp;
        chunk_t *chp;
        size_t chunksize;
        int nelem;
        int i;

        lmutex_lock(&worker_lock);
        if ((aiowp = worker_freelist) == NULL) {
                if ((chunksize = 2 * worker_chunksize) == 0)
                        chunksize = INITIAL_CHUNKSIZE;
                if ((chp = chunk_alloc(chunksize)) == NULL) {
                        lmutex_unlock(&worker_lock);
                        return (NULL);
                }
                worker_chunksize = chunksize;
                worker_freelist = (aio_worker_t *)(uintptr_t)(chp + 1);
                nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_worker_t);
                for (i = 0, aiowp = worker_freelist; i < nelem; i++, aiowp++)
                        aiowp->work_forw = aiowp + 1;
                worker_freelast = aiowp - 1;
                worker_freelast->work_forw = NULL;
                aiowp = worker_freelist;
        }
        if ((worker_freelist = aiowp->work_forw) == NULL)
                worker_freelast = NULL;
        lmutex_unlock(&worker_lock);

        aiowp->work_forw = NULL;
        (void) mutex_init(&aiowp->work_qlock1, USYNC_THREAD, NULL);
        (void) cond_init(&aiowp->work_idle_cv, USYNC_THREAD, NULL);

        return (aiowp);
}

/*
 * Free a worker control block.
 * Declared with void *arg so it can be a pthread_key_create() destructor.
 */
void
_aio_worker_free(void *arg)
{
        aio_worker_t *aiowp = arg;

        (void) mutex_destroy(&aiowp->work_qlock1);
        (void) cond_destroy(&aiowp->work_idle_cv);
        (void) memset(aiowp, 0, sizeof (*aiowp));

        lmutex_lock(&worker_lock);
        if (worker_freelast == NULL) {
                worker_freelist = worker_freelast = aiowp;
        } else {
                worker_freelast->work_forw = aiowp;
                worker_freelast = aiowp;
        }
        lmutex_unlock(&worker_lock);
}

aio_req_t *_aio_freelist = NULL;        /* free list of request structures */
aio_req_t *_aio_freelast = NULL;
size_t request_chunksize = 0;
int _aio_freelist_cnt = 0;
int _aio_allocated_cnt = 0;
mutex_t __aio_cache_lock = DEFAULTMUTEX;

/*
 * Allocate an aio request structure.
 */
aio_req_t *
_aio_req_alloc(void)
{
        aio_req_t *reqp;
        chunk_t *chp;
        size_t chunksize;
        int nelem;
        int i;

        lmutex_lock(&__aio_cache_lock);
        if ((reqp = _aio_freelist) == NULL) {
                if ((chunksize = 2 * request_chunksize) == 0)
                        chunksize = INITIAL_CHUNKSIZE;
                if ((chp = chunk_alloc(chunksize)) == NULL) {
                        lmutex_unlock(&__aio_cache_lock);
                        return (NULL);
                }
                request_chunksize = chunksize;
                _aio_freelist = (aio_req_t *)(uintptr_t)(chp + 1);
                nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_req_t);
                for (i = 0, reqp = _aio_freelist; i < nelem; i++, reqp++) {
                        reqp->req_state = AIO_REQ_FREE;
                        reqp->req_link = reqp + 1;
                }
                _aio_freelast = reqp - 1;
                _aio_freelast->req_link = NULL;
                _aio_freelist_cnt = nelem;
                reqp = _aio_freelist;
        }
        if ((_aio_freelist = reqp->req_link) == NULL)
                _aio_freelast = NULL;
        _aio_freelist_cnt--;
        _aio_allocated_cnt++;
        lmutex_unlock(&__aio_cache_lock);

        ASSERT(reqp->req_state == AIO_REQ_FREE);
        reqp->req_state = 0;
        reqp->req_link = NULL;
        reqp->req_sigevent.sigev_notify = SIGEV_NONE;

        return (reqp);
}

/*
 * Free an aio request structure.
 */
void
_aio_req_free(aio_req_t *reqp)
{
        ASSERT(reqp->req_state != AIO_REQ_FREE &&
            reqp->req_state != AIO_REQ_DONEQ);
        (void) memset(reqp, 0, sizeof (*reqp));
        reqp->req_state = AIO_REQ_FREE;

        lmutex_lock(&__aio_cache_lock);
        if (_aio_freelast == NULL) {
                _aio_freelist = _aio_freelast = reqp;
        } else {
                _aio_freelast->req_link = reqp;
                _aio_freelast = reqp;
        }
        _aio_freelist_cnt++;
        _aio_allocated_cnt--;
        lmutex_unlock(&__aio_cache_lock);
}

aio_lio_t *_lio_head_freelist = NULL;   /* free list of lio head structures */
aio_lio_t *_lio_head_freelast = NULL;
size_t lio_head_chunksize = 0;
int _lio_alloc = 0;
int _lio_free = 0;
mutex_t __lio_mutex = DEFAULTMUTEX;

/*
 * Allocate a listio head structure.
 */
aio_lio_t *
_aio_lio_alloc(void)
{
        aio_lio_t *head;
        chunk_t *chp;
        size_t chunksize;
        int nelem;
        int i;

        lmutex_lock(&__lio_mutex);
        if ((head = _lio_head_freelist) == NULL) {
                if ((chunksize = 2 * lio_head_chunksize) == 0)
                        chunksize = INITIAL_CHUNKSIZE;
                if ((chp = chunk_alloc(chunksize)) == NULL) {
                        lmutex_unlock(&__lio_mutex);
                        return (NULL);
                }
                lio_head_chunksize = chunksize;
                _lio_head_freelist = (aio_lio_t *)(uintptr_t)(chp + 1);
                nelem = (chunksize - sizeof (chunk_t)) / sizeof (aio_lio_t);
                for (i = 0, head = _lio_head_freelist; i < nelem; i++, head++)
                        head->lio_next = head + 1;
                _lio_head_freelast = head - 1;
                _lio_head_freelast->lio_next = NULL;
                _lio_alloc += nelem;
                _lio_free = nelem;
                head = _lio_head_freelist;
        }
        if ((_lio_head_freelist = head->lio_next) == NULL)
                _lio_head_freelast = NULL;
        _lio_free--;
        lmutex_unlock(&__lio_mutex);

        ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
        head->lio_next = NULL;
        head->lio_port = -1;
        (void) mutex_init(&head->lio_mutex, USYNC_THREAD, NULL);
        (void) cond_init(&head->lio_cond_cv, USYNC_THREAD, NULL);

        return (head);
}

/*
 * Free a listio head structure.
 */
void
_aio_lio_free(aio_lio_t *head)
{
        ASSERT(head->lio_nent == 0 && head->lio_refcnt == 0);
        (void) mutex_destroy(&head->lio_mutex);
        (void) cond_destroy(&head->lio_cond_cv);
        (void) memset(head, 0, sizeof (*head));

        lmutex_lock(&__lio_mutex);
        if (_lio_head_freelast == NULL) {
                _lio_head_freelist = _lio_head_freelast = head;
        } else {
                _lio_head_freelast->lio_next = head;
                _lio_head_freelast = head;
        }
        _lio_free++;
        lmutex_unlock(&__lio_mutex);
}

void
postfork1_child_aio(void)
{
        chunk_t *chp;

        /*
         * All of the workers are gone; free their structures.
         */
        if (_kaio_supported != NULL) {
                (void) munmap((void *)_kaio_supported,
                    MAX_KAIO_FDARRAY_SIZE * sizeof (uint32_t));
                _kaio_supported = NULL;
        }
        if (_aio_hash != NULL) {
                (void) munmap((void *)_aio_hash, HASHSZ * sizeof (aio_hash_t));
                _aio_hash = NULL;
        }
        for (chp = chunk_list; chp != NULL; chp = chunk_list) {
                chunk_list = chp->chunk_next;
                (void) munmap((void *)chp, chp->chunk_size);
        }

        /*
         * Reinitialize global variables
         */

        worker_freelist = NULL;
        worker_freelast = NULL;
        worker_chunksize = 0;
        (void) mutex_init(&worker_lock, USYNC_THREAD, NULL);

        _aio_freelist = NULL;
        _aio_freelast = NULL;
        request_chunksize = 0;
        _aio_freelist_cnt = 0;
        _aio_allocated_cnt = 0;
        (void) mutex_init(&__aio_cache_lock, USYNC_THREAD, NULL);

        _lio_head_freelist = NULL;
        _lio_head_freelast = NULL;
        lio_head_chunksize = 0;
        _lio_alloc = 0;
        _lio_free = 0;
        (void) mutex_init(&__lio_mutex, USYNC_THREAD, NULL);

        (void) mutex_init(&__aio_initlock, USYNC_THREAD, NULL);
        (void) cond_init(&__aio_initcv, USYNC_THREAD, NULL);
        __aio_initbusy = 0;

        (void) mutex_init(&__aio_mutex, USYNC_THREAD, NULL);
        (void) cond_init(&_aio_iowait_cv, USYNC_THREAD, NULL);
        (void) cond_init(&_aio_waitn_cv, USYNC_THREAD, NULL);

        _kaio_ok = 0;
        __uaio_ok = 0;

        _kaiowp = NULL;

        __workers_rw = NULL;
        __nextworker_rw = NULL;
        __rw_workerscnt = 0;

        __workers_no = NULL;
        __nextworker_no = NULL;
        __no_workerscnt = 0;

        _aio_worker_cnt = 0;

        _aio_done_head = NULL;
        _aio_done_tail = NULL;
        _aio_donecnt = 0;

        _aio_doneq = NULL;
        _aio_doneq_cnt = 0;

        _aio_waitncnt = 0;
        _aio_outstand_cnt = 0;
        _kaio_outstand_cnt = 0;
        _aio_req_done_cnt = 0;
        _aio_kernel_suspend = 0;
        _aio_suscv_cnt = 0;

        _aiowait_flag = 0;
        _aio_flags = 0;
}

#define DISPLAY(var)    \
        (void) fprintf(stderr, #var "\t= %d\n", var)

static void
_aio_exit_info(void)
{
        if ((_kaio_ok | __uaio_ok) == 0)
                return;
        (void) fprintf(stderr, "\n");
        DISPLAY(_aio_freelist_cnt);
        DISPLAY(_aio_allocated_cnt);
        DISPLAY(_lio_alloc);
        DISPLAY(_lio_free);
        DISPLAY(__rw_workerscnt);
        DISPLAY(__no_workerscnt);
        DISPLAY(_aio_worker_cnt);
        DISPLAY(_aio_donecnt);
        DISPLAY(_aio_doneq_cnt);
        DISPLAY(_aio_waitncnt);
        DISPLAY(_aio_outstand_cnt);
        DISPLAY(_kaio_outstand_cnt);
        DISPLAY(_aio_req_done_cnt);
        DISPLAY(_aio_kernel_suspend);
        DISPLAY(_aio_suscv_cnt);
        DISPLAY(_aiowait_flag);
        DISPLAY(_aio_flags);
}

void
init_aio(void)
{
        char *str;

        (void) pthread_key_create(&_aio_key, _aio_worker_free);
        if ((str = getenv("_AIO_MIN_WORKERS")) != NULL) {
                if ((_min_workers = atoi(str)) <= 0)
                        _min_workers = 4;
        }
        if ((str = getenv("_AIO_MAX_WORKERS")) != NULL) {
                if ((_max_workers = atoi(str)) <= 0)
                        _max_workers = 256;
                if (_max_workers < _min_workers + 1)
                        _max_workers = _min_workers + 1;
        }
        if ((str = getenv("_AIO_EXIT_INFO")) != NULL && atoi(str) != 0)
                (void) atexit(_aio_exit_info);
}