root/drivers/firewire/core-iso.c
// SPDX-License-Identifier: GPL-2.0-or-later
/*
 * Isochronous I/O functionality:
 *   - Isochronous DMA context management
 *   - Isochronous bus resource management (channels, bandwidth), client side
 *
 * Copyright (C) 2006 Kristian Hoegsberg <krh@bitplanet.net>
 */

#include <linux/dma-mapping.h>
#include <linux/errno.h>
#include <linux/firewire.h>
#include <linux/firewire-constants.h>
#include <linux/kernel.h>
#include <linux/mm.h>
#include <linux/slab.h>
#include <linux/spinlock.h>
#include <linux/vmalloc.h>
#include <linux/export.h>

#include <asm/byteorder.h>

#include "core.h"

#include <trace/events/firewire.h>

/*
 * Isochronous DMA context management
 */

int fw_iso_buffer_alloc(struct fw_iso_buffer *buffer, int page_count)
{
        struct page **page_array __free(kfree) = kzalloc_objs(page_array[0],
                                                              page_count);

        if (!page_array)
                return -ENOMEM;

        // Retrieve noncontiguous pages. The descriptors for 1394 OHCI isochronous DMA contexts
        // have a set of address and length per each, while the reason to use pages is the
        // convenience to map them into virtual address space of user process.
        unsigned long nr_populated = alloc_pages_bulk(GFP_KERNEL | GFP_DMA32 | __GFP_ZERO,
                                                      page_count, page_array);
        if (nr_populated != page_count) {
                // Assuming the above call fills page_array sequentially from the beginning.
                release_pages(page_array, nr_populated);
                return -ENOMEM;
        }

        buffer->page_count = page_count;
        buffer->pages = no_free_ptr(page_array);

        return 0;
}

int fw_iso_buffer_map_dma(struct fw_iso_buffer *buffer, struct fw_card *card,
                          enum dma_data_direction direction)
{
        dma_addr_t *dma_addrs __free(kfree) = kzalloc_objs(dma_addrs[0],
                                                           buffer->page_count);
        int i;

        if (!dma_addrs)
                return -ENOMEM;

        // Retrieve DMA mapping addresses for the pages. They are not contiguous. Maintain the cache
        // coherency for the pages by hand.
        for (i = 0; i < buffer->page_count; i++) {
                // The dma_map_phys() with a physical address per page is available here, instead.
                dma_addr_t dma_addr = dma_map_page(card->device, buffer->pages[i], 0, PAGE_SIZE,
                                                   direction);
                if (dma_mapping_error(card->device, dma_addr))
                        break;

                dma_addrs[i] = dma_addr;
        }
        if (i < buffer->page_count) {
                while (i-- > 0)
                        dma_unmap_page(card->device, dma_addrs[i], PAGE_SIZE, buffer->direction);
                return -ENOMEM;
        }

        buffer->direction = direction;
        buffer->dma_addrs = no_free_ptr(dma_addrs);

        return 0;
}

int fw_iso_buffer_init(struct fw_iso_buffer *buffer, struct fw_card *card,
                       int page_count, enum dma_data_direction direction)
{
        int ret;

        ret = fw_iso_buffer_alloc(buffer, page_count);
        if (ret < 0)
                return ret;

        ret = fw_iso_buffer_map_dma(buffer, card, direction);
        if (ret < 0)
                fw_iso_buffer_destroy(buffer, card);

        return ret;
}
EXPORT_SYMBOL(fw_iso_buffer_init);

void fw_iso_buffer_destroy(struct fw_iso_buffer *buffer,
                           struct fw_card *card)
{
        if (buffer->dma_addrs) {
                for (int i = 0; i < buffer->page_count; ++i) {
                        dma_addr_t dma_addr = buffer->dma_addrs[i];
                        dma_unmap_page(card->device, dma_addr, PAGE_SIZE, buffer->direction);
                }
                kfree(buffer->dma_addrs);
                buffer->dma_addrs = NULL;
        }

        if (buffer->pages) {
                release_pages(buffer->pages, buffer->page_count);
                kfree(buffer->pages);
                buffer->pages = NULL;
        }

        buffer->page_count = 0;
}
EXPORT_SYMBOL(fw_iso_buffer_destroy);

/* Convert DMA address to offset into virtually contiguous buffer. */
size_t fw_iso_buffer_lookup(struct fw_iso_buffer *buffer, dma_addr_t completed)
{
        for (int i = 0; i < buffer->page_count; i++) {
                dma_addr_t dma_addr = buffer->dma_addrs[i];
                ssize_t offset = (ssize_t)completed - (ssize_t)dma_addr;
                if (offset > 0 && offset <= PAGE_SIZE)
                        return (i << PAGE_SHIFT) + offset;
        }

        return 0;
}

struct fw_iso_context *__fw_iso_context_create(struct fw_card *card, int type, int channel,
                int speed, size_t header_size, size_t header_storage_size,
                union fw_iso_callback callback, void *callback_data)
{
        struct fw_iso_context *ctx;

        ctx = card->driver->allocate_iso_context(card, type, channel, header_size,
                                                 header_storage_size);
        if (IS_ERR(ctx))
                return ctx;

        ctx->card = card;
        ctx->type = type;
        ctx->channel = channel;
        ctx->speed = speed;
        ctx->flags = 0;
        ctx->header_size = header_size;
        ctx->header_storage_size = header_storage_size;
        ctx->callback = callback;
        ctx->callback_data = callback_data;

        trace_isoc_outbound_allocate(ctx, channel, speed);
        trace_isoc_inbound_single_allocate(ctx, channel, header_size);
        trace_isoc_inbound_multiple_allocate(ctx);

        return ctx;
}
EXPORT_SYMBOL(__fw_iso_context_create);

void fw_iso_context_destroy(struct fw_iso_context *ctx)
{
        trace_isoc_outbound_destroy(ctx);
        trace_isoc_inbound_single_destroy(ctx);
        trace_isoc_inbound_multiple_destroy(ctx);

        ctx->card->driver->free_iso_context(ctx);
}
EXPORT_SYMBOL(fw_iso_context_destroy);

int fw_iso_context_start(struct fw_iso_context *ctx,
                         int cycle, int sync, int tags)
{
        trace_isoc_outbound_start(ctx, cycle);
        trace_isoc_inbound_single_start(ctx, cycle, sync, tags);
        trace_isoc_inbound_multiple_start(ctx, cycle, sync, tags);

        return ctx->card->driver->start_iso(ctx, cycle, sync, tags);
}
EXPORT_SYMBOL(fw_iso_context_start);

int fw_iso_context_set_channels(struct fw_iso_context *ctx, u64 *channels)
{
        trace_isoc_inbound_multiple_channels(ctx, *channels);

        return ctx->card->driver->set_iso_channels(ctx, channels);
}

int fw_iso_context_queue(struct fw_iso_context *ctx,
                         struct fw_iso_packet *packet,
                         struct fw_iso_buffer *buffer,
                         unsigned long payload)
{
        trace_isoc_outbound_queue(ctx, payload, packet);
        trace_isoc_inbound_single_queue(ctx, payload, packet);
        trace_isoc_inbound_multiple_queue(ctx, payload, packet);

        return ctx->card->driver->queue_iso(ctx, packet, buffer, payload);
}
EXPORT_SYMBOL(fw_iso_context_queue);

void fw_iso_context_queue_flush(struct fw_iso_context *ctx)
{
        trace_isoc_outbound_flush(ctx);
        trace_isoc_inbound_single_flush(ctx);
        trace_isoc_inbound_multiple_flush(ctx);

        ctx->card->driver->flush_queue_iso(ctx);
}
EXPORT_SYMBOL(fw_iso_context_queue_flush);

/**
 * fw_iso_context_flush_completions() - process isochronous context in current process context.
 * @ctx: the isochronous context
 *
 * Process the isochronous context in the current process context. The registered callback function
 * is called when a queued packet buffer with the interrupt flag is completed, either after
 * transmission in the IT context or after being filled in the IR context. Additionally, the
 * callback function is also called for the packet buffer completed at last. Furthermore, the
 * callback function is called as well when the header buffer in the context becomes full. If it is
 * required to process the context asynchronously, fw_iso_context_schedule_flush_completions() is
 * available instead.
 *
 * Context: Process context. May sleep due to disable_work_sync().
 */
int fw_iso_context_flush_completions(struct fw_iso_context *ctx)
{
        int err;

        trace_isoc_outbound_flush_completions(ctx);
        trace_isoc_inbound_single_flush_completions(ctx);
        trace_isoc_inbound_multiple_flush_completions(ctx);

        might_sleep();

        // Avoid dead lock due to programming mistake.
        if (WARN_ON_ONCE(current_work() == &ctx->work))
                return 0;

        disable_work_sync(&ctx->work);

        err = ctx->card->driver->flush_iso_completions(ctx);

        enable_work(&ctx->work);

        return err;
}
EXPORT_SYMBOL(fw_iso_context_flush_completions);

int fw_iso_context_stop(struct fw_iso_context *ctx)
{
        int err;

        trace_isoc_outbound_stop(ctx);
        trace_isoc_inbound_single_stop(ctx);
        trace_isoc_inbound_multiple_stop(ctx);

        might_sleep();

        // Avoid dead lock due to programming mistake.
        if (WARN_ON_ONCE(current_work() == &ctx->work))
                return 0;

        err = ctx->card->driver->stop_iso(ctx);

        cancel_work_sync(&ctx->work);

        return err;
}
EXPORT_SYMBOL(fw_iso_context_stop);

/*
 * Isochronous bus resource management (channels, bandwidth), client side
 */

static int manage_bandwidth(struct fw_card *card, int irm_id, int generation,
                            int bandwidth, bool allocate)
{
        int try, new, old = allocate ? BANDWIDTH_AVAILABLE_INITIAL : 0;
        __be32 data[2];

        /*
         * On a 1394a IRM with low contention, try < 1 is enough.
         * On a 1394-1995 IRM, we need at least try < 2.
         * Let's just do try < 5.
         */
        for (try = 0; try < 5; try++) {
                new = allocate ? old - bandwidth : old + bandwidth;
                if (new < 0 || new > BANDWIDTH_AVAILABLE_INITIAL)
                        return -EBUSY;

                data[0] = cpu_to_be32(old);
                data[1] = cpu_to_be32(new);
                switch (fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP,
                                irm_id, generation, SCODE_100,
                                CSR_REGISTER_BASE + CSR_BANDWIDTH_AVAILABLE,
                                data, 8)) {
                case RCODE_GENERATION:
                        /* A generation change frees all bandwidth. */
                        return allocate ? -EAGAIN : bandwidth;

                case RCODE_COMPLETE:
                        if (be32_to_cpup(data) == old)
                                return bandwidth;

                        old = be32_to_cpup(data);
                        /* Fall through. */
                }
        }

        return -EIO;
}

static int manage_channel(struct fw_card *card, int irm_id, int generation,
                u32 channels_mask, u64 offset, bool allocate)
{
        __be32 bit, all, old;
        __be32 data[2];
        int channel, ret = -EIO, retry = 5;

        old = all = allocate ? cpu_to_be32(~0) : 0;

        for (channel = 0; channel < 32; channel++) {
                if (!(channels_mask & 1 << channel))
                        continue;

                ret = -EBUSY;

                bit = cpu_to_be32(1 << (31 - channel));
                if ((old & bit) != (all & bit))
                        continue;

                data[0] = old;
                data[1] = old ^ bit;
                switch (fw_run_transaction(card, TCODE_LOCK_COMPARE_SWAP,
                                           irm_id, generation, SCODE_100,
                                           offset, data, 8)) {
                case RCODE_GENERATION:
                        /* A generation change frees all channels. */
                        return allocate ? -EAGAIN : channel;

                case RCODE_COMPLETE:
                        if (data[0] == old)
                                return channel;

                        old = data[0];

                        /* Is the IRM 1394a-2000 compliant? */
                        if ((data[0] & bit) == (data[1] & bit))
                                continue;

                        fallthrough;    /* It's a 1394-1995 IRM, retry */
                default:
                        if (retry) {
                                retry--;
                                channel--;
                        } else {
                                ret = -EIO;
                        }
                }
        }

        return ret;
}

static void deallocate_channel(struct fw_card *card, int irm_id,
                               int generation, int channel)
{
        u32 mask;
        u64 offset;

        mask = channel < 32 ? 1 << channel : 1 << (channel - 32);
        offset = channel < 32 ? CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI :
                                CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO;

        manage_channel(card, irm_id, generation, mask, offset, false);
}

/**
 * fw_iso_resource_manage() - Allocate or deallocate a channel and/or bandwidth
 * @card: card interface for this action
 * @generation: bus generation
 * @channels_mask: bitmask for channel allocation
 * @channel: pointer for returning channel allocation result
 * @bandwidth: pointer for returning bandwidth allocation result
 * @allocate: whether to allocate (true) or deallocate (false)
 *
 * In parameters: card, generation, channels_mask, bandwidth, allocate
 * Out parameters: channel, bandwidth
 *
 * This function blocks (sleeps) during communication with the IRM.
 *
 * Allocates or deallocates at most one channel out of channels_mask.
 * channels_mask is a bitfield with MSB for channel 63 and LSB for channel 0.
 * (Note, the IRM's CHANNELS_AVAILABLE is a big-endian bitfield with MSB for
 * channel 0 and LSB for channel 63.)
 * Allocates or deallocates as many bandwidth allocation units as specified.
 *
 * Returns channel < 0 if no channel was allocated or deallocated.
 * Returns bandwidth = 0 if no bandwidth was allocated or deallocated.
 *
 * If generation is stale, deallocations succeed but allocations fail with
 * channel = -EAGAIN.
 *
 * If channel allocation fails, no bandwidth will be allocated either.
 * If bandwidth allocation fails, no channel will be allocated either.
 * But deallocations of channel and bandwidth are tried independently
 * of each other's success.
 */
void fw_iso_resource_manage(struct fw_card *card, int generation,
                            u64 channels_mask, int *channel, int *bandwidth,
                            bool allocate)
{
        u32 channels_hi = channels_mask;        /* channels 31...0 */
        u32 channels_lo = channels_mask >> 32;  /* channels 63...32 */
        int irm_id, ret, c = -EINVAL;

        scoped_guard(spinlock_irq, &card->lock)
                irm_id = card->irm_node->node_id;

        if (channels_hi)
                c = manage_channel(card, irm_id, generation, channels_hi,
                                CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_HI,
                                allocate);
        if (channels_lo && c < 0) {
                c = manage_channel(card, irm_id, generation, channels_lo,
                                CSR_REGISTER_BASE + CSR_CHANNELS_AVAILABLE_LO,
                                allocate);
                if (c >= 0)
                        c += 32;
        }
        *channel = c;

        if (allocate && channels_mask != 0 && c < 0)
                *bandwidth = 0;

        if (*bandwidth == 0)
                return;

        ret = manage_bandwidth(card, irm_id, generation, *bandwidth, allocate);
        if (ret < 0)
                *bandwidth = 0;

        if (allocate && ret < 0) {
                if (c >= 0)
                        deallocate_channel(card, irm_id, generation, c);
                *channel = ret;
        }
}
EXPORT_SYMBOL(fw_iso_resource_manage);