root/src/tests/system/kernel/cache/pages_io_test.cpp
/*
 * Copyright 2004-2007, Axel Dörfler, axeld@pinc-software.de. All rights reserved.
 * Distributed under the terms of the MIT License.
 */


#include <OS.h>
#include <fs_interface.h>

#include <stdarg.h>
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <sys/uio.h>

#define TRACE_FILE_CACHE
#define TRACE(x) printf x
#define dprintf printf

#ifndef ASSERT
#       define ASSERT(x) ;
#endif

// maximum number of iovecs per request
#define MAX_IO_VECS                     64      // 256 kB
#define MAX_FILE_IO_VECS        4
#define MAX_TEMP_IO_VECS        8

#define CACHED_FILE_EXTENTS     2
        // must be smaller than MAX_FILE_IO_VECS
        // ToDo: find out how much of these are typically used

struct vm_cache_ref;

struct file_extent {
        off_t                   offset;
        file_io_vec             disk;
};

struct file_map {
        file_map();
        ~file_map();

        file_extent *operator[](uint32 index);
        file_extent *ExtentAt(uint32 index);
        status_t Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset);
        void Free();

        union {
                file_extent     direct[CACHED_FILE_EXTENTS];
                file_extent     *array;
        };
        size_t                  count;
};

struct file_cache_ref {
        vm_cache_ref    *cache;
        void                    *vnode;
        void                    *device;
        void                    *cookie;
        file_map                map;
};


const uint32 kMaxFileVecs = 1024;

file_io_vec gFileVecs[kMaxFileVecs];
size_t gFileVecCount;
off_t gFileSize;


file_map::file_map()
{
        array = NULL;
        count = 0;
}


file_map::~file_map()
{
        Free();
}


file_extent *
file_map::operator[](uint32 index)
{
        return ExtentAt(index);
}


file_extent *
file_map::ExtentAt(uint32 index)
{
        if (index >= count)
                return NULL;

        if (count > CACHED_FILE_EXTENTS)
                return &array[index];

        return &direct[index];
}


status_t
file_map::Add(file_io_vec *vecs, size_t vecCount, off_t &lastOffset)
{
        TRACE(("file_map::Add(vecCount = %ld)\n", vecCount));

        off_t offset = 0;

        if (vecCount <= CACHED_FILE_EXTENTS && count == 0) {
                // just use the reserved area in the file_cache_ref structure
        } else {
                // TODO: once we can invalidate only parts of the file map,
                //      we might need to copy the previously cached file extends
                //      from the direct range
                file_extent *newMap = (file_extent *)realloc(array,
                        (count + vecCount) * sizeof(file_extent));
                if (newMap == NULL)
                        return B_NO_MEMORY;

                array = newMap;

                if (count != 0) {
                        file_extent *extent = ExtentAt(count - 1);
                        offset = extent->offset + extent->disk.length;
                }
        }

        int32 start = count;
        count += vecCount;

        for (uint32 i = 0; i < vecCount; i++) {
                file_extent *extent = ExtentAt(start + i);

                extent->offset = offset;
                extent->disk = vecs[i];

                offset += extent->disk.length;
        }

#ifdef TRACE_FILE_CACHE
        for (uint32 i = 0; i < count; i++) {
                file_extent *extent = ExtentAt(i);
                dprintf("  [%ld] extend offset %lld, disk offset %lld, length %lld\n",
                        i, extent->offset, extent->disk.offset, extent->disk.length);
        }
#endif

        lastOffset = offset;
        return B_OK;
}


void
file_map::Free()
{
        if (count > CACHED_FILE_EXTENTS)
                free(array);

        array = NULL;
        count = 0;
}


//      #pragma mark -


void
set_vecs(iovec *vecs, size_t *_count, ...)
{
        uint32 base = 0;
        size_t count = 0;

        va_list args;
        va_start(args, _count);

        while (count < MAX_IO_VECS) {
                int32 length = va_arg(args, int32);
                if (length < 0)
                        break;

                vecs[count].iov_base = (void *)base;
                vecs[count].iov_len = length;

                base += length;
                count++;
        }

        va_end(args);
        *_count = count;
}


void
set_file_map(int32 base, int32 length, ...)
{
        gFileVecs[0].offset = base;
        gFileVecs[0].length = length;

        gFileSize = length;
        gFileVecCount = 1;

        va_list args;
        va_start(args, length);

        while (gFileVecCount < kMaxFileVecs) {
                off_t offset = va_arg(args, int32);
                if (offset < 0)
                        break;

                length = va_arg(args, int32);

                gFileVecs[gFileVecCount].offset = offset;
                gFileVecs[gFileVecCount].length = length;

                gFileSize += length;
                gFileVecCount++;
        }

        va_end(args);
}


status_t
find_map_base(off_t offset, off_t &diskOffset, off_t &diskLength,
        off_t &fileOffset)
{
        fileOffset = 0;

        for (uint32 i = 0; i < gFileVecCount; i++) {
                if (offset < gFileVecs[i].length) {
                        diskOffset = gFileVecs[i].offset;
                        diskLength = gFileVecs[i].length;
                        return B_OK;
                }

                fileOffset += gFileVecs[i].length;
                offset -= gFileVecs[i].length;
        }

        return B_ENTRY_NOT_FOUND;
}


//      #pragma mark - VFS functions


static status_t
vfs_get_file_map(void *vnode, off_t offset, size_t size, file_io_vec *vecs,
        size_t *_count)
{
        off_t diskOffset, diskLength, fileOffset;
        size_t max = *_count;
        uint32 index = 0;

        printf("vfs_get_file_map(offset = %lld, size = %lu, count = %lu)\n",
                offset, size, *_count);

        while (true) {
                status_t status = find_map_base(offset, diskOffset, diskLength, fileOffset);
                //status_t status = inode->FindBlockRun(offset, run, fileOffset);
                if (status != B_OK)
                        return status;

                vecs[index].offset = diskOffset + offset - fileOffset;
                vecs[index].length = diskLength - offset + fileOffset;
                offset += vecs[index].length;

                // are we already done?
                if (size <= vecs[index].length
                        || offset >= gFileSize) {
                        if (offset > gFileSize) {
                                // make sure the extent ends with the last official file
                                // block (without taking any preallocations into account)
                                vecs[index].length = gFileSize - fileOffset;
                        }
                        *_count = index + 1;
                        return B_OK;
                }

                size -= vecs[index].length;
                index++;

                if (index >= max) {
                        // we're out of file_io_vecs; let's bail out
                        *_count = index;
                        return B_BUFFER_OVERFLOW;
                }
        }
}


static status_t
vfs_read_pages(void *device, void *cookie, off_t offset,
        const iovec *vecs, size_t count, size_t *bytes, bool kernel)
{
        printf("read offset %lld, length %lu\n", offset, *bytes);
        for (uint32 i = 0; i < count; i++) {
                printf("  [%lu] base %lu, length %lu\n",
                        i, (uint32)vecs[i].iov_base, vecs[i].iov_len);
        }
        return B_OK;
}


static status_t
vfs_write_pages(void *device, void *cookie, off_t offset,
        const iovec *vecs, size_t count, size_t *bytes, bool kernel)
{
        printf("write offset %lld, length %lu\n", offset, *bytes);
        for (uint32 i = 0; i < count; i++) {
                printf("  [%lu] base %lu, length %lu\n",
                        i, (uint32)vecs[i].iov_base, vecs[i].iov_len);
        }
        return B_OK;
}


//      #pragma mark - file_cache.cpp copies


static file_extent *
find_file_extent(file_cache_ref *ref, off_t offset, uint32 *_index)
{
        // TODO: do binary search

        for (uint32 index = 0; index < ref->map.count; index++) {
                file_extent *extent = ref->map[index];

                if (extent->offset <= offset
                        && extent->offset + extent->disk.length > offset) {
                        if (_index)
                                *_index = index;
                        return extent;
                }
        }

        return NULL;
}


static status_t
get_file_map(file_cache_ref *ref, off_t offset, size_t size,
        file_io_vec *vecs, size_t *_count)
{
        size_t maxVecs = *_count;
        status_t status = B_OK;

        if (ref->map.count == 0) {
                // we don't yet have the map of this file, so let's grab it
                // (ordered by offset, so that we can do a binary search on them)

                //mutex_lock(&ref->cache->lock);

                // the file map could have been requested in the mean time
                if (ref->map.count == 0) {
                        size_t vecCount = maxVecs;
                        off_t mapOffset = 0;

                        while (true) {
                                status = vfs_get_file_map(ref->vnode, mapOffset, ~0UL, vecs, &vecCount);
                                if (status < B_OK && status != B_BUFFER_OVERFLOW) {
                                        //mutex_unlock(&ref->cache->lock);
                                        return status;
                                }

                                status_t addStatus = ref->map.Add(vecs, vecCount, mapOffset);
                                if (addStatus != B_OK) {
                                        // only clobber the status in case of failure
                                        status = addStatus;
                                }

                                if (status != B_BUFFER_OVERFLOW)
                                        break;

                                // when we are here, the map has been stored in the array, and
                                // the array size was still too small to cover the whole file
                                vecCount = maxVecs;
                        }
                }

                //mutex_unlock(&ref->cache->lock);
        }

        if (status != B_OK) {
                // We must invalidate the (part of the) map we already
                // have, as we cannot know if it's complete or not
                ref->map.Free();
                return status;
        }

        // We now have cached the map of this file, we now need to
        // translate it for the requested access.

        uint32 index;
        file_extent *fileExtent = find_file_extent(ref, offset, &index);
        if (fileExtent == NULL) {
                // access outside file bounds? But that's not our problem
                *_count = 0;
                return B_OK;
        }

        offset -= fileExtent->offset;
        vecs[0].offset = fileExtent->disk.offset + offset;
        vecs[0].length = fileExtent->disk.length - offset;

        if (vecs[0].length >= size || index >= ref->map.count - 1) {
                *_count = 1;
                return B_OK;
        }

        // copy the rest of the vecs

        size -= vecs[0].length;

        for (index = 1; index < ref->map.count;) {
                fileExtent++;

                vecs[index] = fileExtent->disk;
                index++;

                if (size <= fileExtent->disk.length)
                        break;

                if (index >= maxVecs) {
                        *_count = index;
                        return B_BUFFER_OVERFLOW;
                }

                size -= fileExtent->disk.length;
        }

        *_count = index;
        return B_OK;
}


/*!
        Does the dirty work of translating the request into actual disk offsets
        and reads to or writes from the supplied iovecs as specified by \a doWrite.
*/
static status_t
pages_io(file_cache_ref *ref, off_t offset, const iovec *vecs, size_t count,
        size_t *_numBytes, bool doWrite)
{
        TRACE(("pages_io: ref = %p, offset = %lld, size = %lu, vecCount = %lu, %s\n", ref, offset,
                *_numBytes, count, doWrite ? "write" : "read"));

        // translate the iovecs into direct device accesses
        file_io_vec fileVecs[MAX_FILE_IO_VECS];
        size_t fileVecCount = MAX_FILE_IO_VECS;
        size_t numBytes = *_numBytes;

        status_t status = get_file_map(ref, offset, numBytes, fileVecs,
                &fileVecCount);
        if (status < B_OK && status != B_BUFFER_OVERFLOW) {
                TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n", offset,
                        numBytes, strerror(status)));
                return status;
        }

        bool bufferOverflow = status == B_BUFFER_OVERFLOW;

#ifdef TRACE_FILE_CACHE
        dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount, offset, numBytes,
                bufferOverflow ? " (array too small)" : "");
        for (size_t i = 0; i < fileVecCount; i++) {
                dprintf("  [%lu] offset = %lld, size = %lld\n",
                        i, fileVecs[i].offset, fileVecs[i].length);
        }
#endif

        if (fileVecCount == 0) {
                // There are no file vecs at this offset, so we're obviously trying
                // to access the file outside of its bounds
                TRACE(("pages_io: access outside of vnode %p at offset %lld\n",
                        ref->vnode, offset));
                return B_BAD_VALUE;
        }

        uint32 fileVecIndex;
        size_t size;

        if (!doWrite) {
                // now directly read the data from the device
                // the first file_io_vec can be read directly

                size = fileVecs[0].length;
                if (size > numBytes)
                        size = numBytes;

                status = vfs_read_pages(ref->device, ref->cookie, fileVecs[0].offset, vecs,
                        count, &size, false);
                if (status < B_OK)
                        return status;

                // TODO: this is a work-around for buggy device drivers!
                //      When our own drivers honour the length, we can:
                //      a) also use this direct I/O for writes (otherwise, it would
                //         overwrite precious data)
                //      b) panic if the term below is true (at least for writes)
                if (size > fileVecs[0].length) {
                        //dprintf("warning: device driver %p doesn't respect total length in read_pages() call!\n", ref->device);
                        size = fileVecs[0].length;
                }

                ASSERT(size <= fileVecs[0].length);

                // If the file portion was contiguous, we're already done now
                if (size == numBytes)
                        return B_OK;

                // if we reached the end of the file, we can return as well
                if (size != fileVecs[0].length) {
                        *_numBytes = size;
                        return B_OK;
                }

                fileVecIndex = 1;
        } else {
                fileVecIndex = 0;
                size = 0;
        }

        // Too bad, let's process the rest of the file_io_vecs

        size_t totalSize = size;

        // first, find out where we have to continue in our iovecs
        uint32 i = 0;
        for (; i < count; i++) {
                if (size < vecs[i].iov_len)
                        break;

                size -= vecs[i].iov_len;
        }

        size_t vecOffset = size;
        size_t bytesLeft = numBytes - size;

        while (true) {
                for (; fileVecIndex < fileVecCount; fileVecIndex++) {
                        file_io_vec &fileVec = fileVecs[fileVecIndex];
                        off_t fileOffset = fileVec.offset;
                        off_t fileLeft = min_c(fileVec.length, bytesLeft);

                        TRACE(("FILE VEC [%lu] length %lld\n", fileVecIndex, fileLeft));

                        // process the complete fileVec
                        while (fileLeft > 0) {
                                iovec tempVecs[MAX_TEMP_IO_VECS];
                                uint32 tempCount = 0;

                                // size tracks how much of what is left of the current fileVec
                                // (fileLeft) has been assigned to tempVecs 
                                size = 0;

                                // assign what is left of the current fileVec to the tempVecs
                                for (size = 0; size < fileLeft && i < count
                                                && tempCount < MAX_TEMP_IO_VECS;) {
                                        // try to satisfy one iovec per iteration (or as much as
                                        // possible)

                                        // bytes left of the current iovec
                                        size_t vecLeft = vecs[i].iov_len - vecOffset;
                                        if (vecLeft == 0) {
                                                vecOffset = 0;
                                                i++;
                                                continue;
                                        }

                                        TRACE(("fill vec %ld, offset = %lu, size = %lu\n",
                                                i, vecOffset, size));

                                        // actually available bytes
                                        size_t tempVecSize = min_c(vecLeft, fileLeft - size);

                                        tempVecs[tempCount].iov_base
                                                = (void *)((addr_t)vecs[i].iov_base + vecOffset);
                                        tempVecs[tempCount].iov_len = tempVecSize;
                                        tempCount++;

                                        size += tempVecSize;
                                        vecOffset += tempVecSize;
                                }

                                size_t bytes = size;
                                if (doWrite) {
                                        status = vfs_write_pages(ref->device, ref->cookie,
                                                fileOffset, tempVecs, tempCount, &bytes, false);
                                } else {
                                        status = vfs_read_pages(ref->device, ref->cookie,
                                                fileOffset, tempVecs, tempCount, &bytes, false);
                                }
                                if (status < B_OK)
                                        return status;

                                totalSize += bytes;
                                bytesLeft -= size;
                                fileOffset += size;
                                fileLeft -= size;
                                //dprintf("-> file left = %Lu\n", fileLeft);

                                if (size != bytes || i >= count) {
                                        // there are no more bytes or iovecs, let's bail out
                                        *_numBytes = totalSize;
                                        return B_OK;
                                }
                        }
                }

                if (bufferOverflow) {
                        status = get_file_map(ref, offset + totalSize, bytesLeft, fileVecs,
                                &fileVecCount);
                        if (status < B_OK && status != B_BUFFER_OVERFLOW) {
                                TRACE(("get_file_map(offset = %lld, numBytes = %lu) failed: %s\n",
                                        offset, numBytes, strerror(status)));
                                return status;
                        }

                        bufferOverflow = status == B_BUFFER_OVERFLOW;
                        fileVecIndex = 0;

#ifdef TRACE_FILE_CACHE
                        dprintf("got %lu file vecs for %lld:%lu%s:\n", fileVecCount,
                                offset + totalSize, numBytes,
                                bufferOverflow ? " (array too small)" : "");
                        for (size_t i = 0; i < fileVecCount; i++) {
                                dprintf("  [%lu] offset = %lld, size = %lld\n",
                                        i, fileVecs[i].offset, fileVecs[i].length);
                        }
#endif
                } else
                        break;
        }

        *_numBytes = totalSize;
        return B_OK;
}


//      #pragma mark -


int
main(int argc, char **argv)
{
        file_cache_ref ref;
        iovec vecs[MAX_IO_VECS];
        size_t count = 1;
        size_t numBytes = 10000;
        off_t offset = 4999;

        set_vecs(vecs, &count, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15,
                16, 4096, 8192, 16384, 4096, 4096, -1);
        set_file_map(0, 2000, 5000, 3000, 10000, 800, 11000, 20, 12000, 30,
                13000, 70, 14000, 100, 15000, 900, 20000, 30000, -1);

        status_t status = pages_io(&ref, offset, vecs, count, &numBytes, false);
        if (status < B_OK)
                fprintf(stderr, "pages_io() returned: %s\n", strerror(status));

        return 0;
}