root/drivers/media/platform/sunxi/sun4i-csi/sun4i_dma.c
// SPDX-License-Identifier: GPL-2.0+
/*
 * Copyright (C) 2016 NextThing Co
 * Copyright (C) 2016-2019 Bootlin
 *
 * Author: Maxime Ripard <maxime.ripard@bootlin.com>
 */

#include <linux/device.h>
#include <linux/interrupt.h>
#include <linux/list.h>
#include <linux/mutex.h>
#include <linux/spinlock.h>
#include <media/videobuf2-dma-contig.h>
#include <media/videobuf2-v4l2.h>

#include "sun4i_csi.h"

struct sun4i_csi_buffer {
        struct vb2_v4l2_buffer  vb;
        struct list_head        list;
};

static inline struct sun4i_csi_buffer *
vb2_v4l2_to_csi_buffer(const struct vb2_v4l2_buffer *p)
{
        return container_of(p, struct sun4i_csi_buffer, vb);
}

static inline struct sun4i_csi_buffer *
vb2_to_csi_buffer(const struct vb2_buffer *p)
{
        return vb2_v4l2_to_csi_buffer(to_vb2_v4l2_buffer(p));
}

static void sun4i_csi_capture_start(struct sun4i_csi *csi)
{
        writel(CSI_CPT_CTRL_VIDEO_START, csi->regs + CSI_CPT_CTRL_REG);
}

static void sun4i_csi_capture_stop(struct sun4i_csi *csi)
{
        writel(0, csi->regs + CSI_CPT_CTRL_REG);
}

static int sun4i_csi_queue_setup(struct vb2_queue *vq,
                                 unsigned int *nbuffers,
                                 unsigned int *nplanes,
                                 unsigned int sizes[],
                                 struct device *alloc_devs[])
{
        struct sun4i_csi *csi = vb2_get_drv_priv(vq);
        unsigned int num_planes = csi->fmt.num_planes;
        unsigned int i;

        if (*nplanes) {
                if (*nplanes != num_planes)
                        return -EINVAL;

                for (i = 0; i < num_planes; i++)
                        if (sizes[i] < csi->fmt.plane_fmt[i].sizeimage)
                                return -EINVAL;
                return 0;
        }

        *nplanes = num_planes;
        for (i = 0; i < num_planes; i++)
                sizes[i] = csi->fmt.plane_fmt[i].sizeimage;

        return 0;
};

static int sun4i_csi_buffer_prepare(struct vb2_buffer *vb)
{
        struct sun4i_csi *csi = vb2_get_drv_priv(vb->vb2_queue);
        unsigned int i;

        for (i = 0; i < csi->fmt.num_planes; i++) {
                unsigned long size = csi->fmt.plane_fmt[i].sizeimage;

                if (vb2_plane_size(vb, i) < size) {
                        dev_err(csi->dev, "buffer too small (%lu < %lu)\n",
                                vb2_plane_size(vb, i), size);
                        return -EINVAL;
                }

                vb2_set_plane_payload(vb, i, size);
        }

        return 0;
}

static int sun4i_csi_setup_scratch_buffer(struct sun4i_csi *csi,
                                          unsigned int slot)
{
        dma_addr_t addr = csi->scratch.paddr;
        unsigned int plane;

        dev_dbg(csi->dev,
                "No more available buffer, using the scratch buffer\n");

        for (plane = 0; plane < csi->fmt.num_planes; plane++) {
                writel(addr, csi->regs + CSI_BUF_ADDR_REG(plane, slot));
                addr += csi->fmt.plane_fmt[plane].sizeimage;
        }

        csi->current_buf[slot] = NULL;
        return 0;
}

static int sun4i_csi_buffer_fill_slot(struct sun4i_csi *csi, unsigned int slot)
{
        struct sun4i_csi_buffer *c_buf;
        struct vb2_v4l2_buffer *v_buf;
        unsigned int plane;

        /*
         * We should never end up in a situation where we overwrite an
         * already filled slot.
         */
        if (WARN_ON(csi->current_buf[slot]))
                return -EINVAL;

        if (list_empty(&csi->buf_list))
                return sun4i_csi_setup_scratch_buffer(csi, slot);

        c_buf = list_first_entry(&csi->buf_list, struct sun4i_csi_buffer, list);
        list_del_init(&c_buf->list);

        v_buf = &c_buf->vb;
        csi->current_buf[slot] = v_buf;

        for (plane = 0; plane < csi->fmt.num_planes; plane++) {
                dma_addr_t buf_addr;

                buf_addr = vb2_dma_contig_plane_dma_addr(&v_buf->vb2_buf,
                                                         plane);
                writel(buf_addr, csi->regs + CSI_BUF_ADDR_REG(plane, slot));
        }

        return 0;
}

static int sun4i_csi_buffer_fill_all(struct sun4i_csi *csi)
{
        unsigned int slot;
        int ret;

        for (slot = 0; slot < CSI_MAX_BUFFER; slot++) {
                ret = sun4i_csi_buffer_fill_slot(csi, slot);
                if (ret)
                        return ret;
        }

        return 0;
}

static void sun4i_csi_buffer_mark_done(struct sun4i_csi *csi,
                                       unsigned int slot,
                                       unsigned int sequence)
{
        struct vb2_v4l2_buffer *v_buf;

        if (!csi->current_buf[slot]) {
                dev_dbg(csi->dev, "Scratch buffer was used, ignoring..\n");
                return;
        }

        v_buf = csi->current_buf[slot];
        v_buf->field = csi->fmt.field;
        v_buf->sequence = sequence;
        v_buf->vb2_buf.timestamp = ktime_get_ns();
        vb2_buffer_done(&v_buf->vb2_buf, VB2_BUF_STATE_DONE);

        csi->current_buf[slot] = NULL;
}

static int sun4i_csi_buffer_flip(struct sun4i_csi *csi, unsigned int sequence)
{
        u32 reg = readl(csi->regs + CSI_BUF_CTRL_REG);
        unsigned int next;

        /* Our next buffer is not the current buffer */
        next = !(reg & CSI_BUF_CTRL_DBS);

        /* Report the previous buffer as done */
        sun4i_csi_buffer_mark_done(csi, next, sequence);

        /* Put a new buffer in there */
        return sun4i_csi_buffer_fill_slot(csi, next);
}

static void sun4i_csi_buffer_queue(struct vb2_buffer *vb)
{
        struct sun4i_csi *csi = vb2_get_drv_priv(vb->vb2_queue);
        struct sun4i_csi_buffer *buf = vb2_to_csi_buffer(vb);
        unsigned long flags;

        spin_lock_irqsave(&csi->qlock, flags);
        list_add_tail(&buf->list, &csi->buf_list);
        spin_unlock_irqrestore(&csi->qlock, flags);
}

static void return_all_buffers(struct sun4i_csi *csi,
                               enum vb2_buffer_state state)
{
        struct sun4i_csi_buffer *buf, *node;
        unsigned int slot;

        list_for_each_entry_safe(buf, node, &csi->buf_list, list) {
                vb2_buffer_done(&buf->vb.vb2_buf, state);
                list_del(&buf->list);
        }

        for (slot = 0; slot < CSI_MAX_BUFFER; slot++) {
                struct vb2_v4l2_buffer *v_buf = csi->current_buf[slot];

                if (!v_buf)
                        continue;

                vb2_buffer_done(&v_buf->vb2_buf, state);
                csi->current_buf[slot] = NULL;
        }
}

static int sun4i_csi_start_streaming(struct vb2_queue *vq, unsigned int count)
{
        struct sun4i_csi *csi = vb2_get_drv_priv(vq);
        struct v4l2_mbus_config_parallel *bus = &csi->bus;
        const struct sun4i_csi_format *csi_fmt;
        unsigned long href_pol, pclk_pol, vref_pol;
        unsigned long flags;
        unsigned int i;
        int ret;

        csi_fmt = sun4i_csi_find_format(&csi->fmt.pixelformat, NULL);
        if (!csi_fmt)
                return -EINVAL;

        dev_dbg(csi->dev, "Starting capture\n");

        csi->sequence = 0;

        /*
         * We need a scratch buffer in case where we'll not have any
         * more buffer queued so that we don't error out. One of those
         * cases is when you end up at the last frame to capture, you
         * don't have any buffer queued any more, and yet it doesn't
         * really matter since you'll never reach the next buffer.
         *
         * Since we support the multi-planar API, we need to have a
         * buffer for each plane. Allocating a single one large enough
         * to hold all the buffers is simpler, so let's go for that.
         */
        csi->scratch.size = 0;
        for (i = 0; i < csi->fmt.num_planes; i++)
                csi->scratch.size += csi->fmt.plane_fmt[i].sizeimage;

        csi->scratch.vaddr = dma_alloc_coherent(csi->dev,
                                                csi->scratch.size,
                                                &csi->scratch.paddr,
                                                GFP_KERNEL);
        if (!csi->scratch.vaddr) {
                dev_err(csi->dev, "Failed to allocate scratch buffer\n");
                ret = -ENOMEM;
                goto err_clear_dma_queue;
        }

        ret = video_device_pipeline_alloc_start(&csi->vdev);
        if (ret < 0)
                goto err_free_scratch_buffer;

        spin_lock_irqsave(&csi->qlock, flags);

        /* Setup timings */
        writel(CSI_WIN_CTRL_W_ACTIVE(csi->fmt.width * 2),
               csi->regs + CSI_WIN_CTRL_W_REG);
        writel(CSI_WIN_CTRL_H_ACTIVE(csi->fmt.height),
               csi->regs + CSI_WIN_CTRL_H_REG);

        /*
         * This hardware uses [HV]REF instead of [HV]SYNC. Based on the
         * provided timing diagrams in the manual, positive polarity
         * equals active high [HV]REF.
         *
         * When the back porch is 0, [HV]REF is more or less equivalent
         * to [HV]SYNC inverted.
         */
        href_pol = !!(bus->flags & V4L2_MBUS_HSYNC_ACTIVE_LOW);
        vref_pol = !!(bus->flags & V4L2_MBUS_VSYNC_ACTIVE_LOW);
        pclk_pol = !!(bus->flags & V4L2_MBUS_PCLK_SAMPLE_RISING);
        writel(CSI_CFG_INPUT_FMT(csi_fmt->input) |
               CSI_CFG_OUTPUT_FMT(csi_fmt->output) |
               CSI_CFG_VREF_POL(vref_pol) |
               CSI_CFG_HREF_POL(href_pol) |
               CSI_CFG_PCLK_POL(pclk_pol),
               csi->regs + CSI_CFG_REG);

        /* Setup buffer length */
        writel(csi->fmt.plane_fmt[0].bytesperline,
               csi->regs + CSI_BUF_LEN_REG);

        /* Prepare our buffers in hardware */
        ret = sun4i_csi_buffer_fill_all(csi);
        if (ret) {
                spin_unlock_irqrestore(&csi->qlock, flags);
                goto err_disable_pipeline;
        }

        /* Enable double buffering */
        writel(CSI_BUF_CTRL_DBE, csi->regs + CSI_BUF_CTRL_REG);

        /* Clear the pending interrupts */
        writel(CSI_INT_FRM_DONE, csi->regs + CSI_INT_STA_REG);

        /* Enable frame done interrupt */
        writel(CSI_INT_FRM_DONE, csi->regs + CSI_INT_EN_REG);

        sun4i_csi_capture_start(csi);

        spin_unlock_irqrestore(&csi->qlock, flags);

        ret = v4l2_subdev_call(csi->src_subdev, video, s_stream, 1);
        if (ret < 0 && ret != -ENOIOCTLCMD)
                goto err_disable_device;

        return 0;

err_disable_device:
        sun4i_csi_capture_stop(csi);

err_disable_pipeline:
        video_device_pipeline_stop(&csi->vdev);

err_free_scratch_buffer:
        dma_free_coherent(csi->dev, csi->scratch.size, csi->scratch.vaddr,
                          csi->scratch.paddr);

err_clear_dma_queue:
        spin_lock_irqsave(&csi->qlock, flags);
        return_all_buffers(csi, VB2_BUF_STATE_QUEUED);
        spin_unlock_irqrestore(&csi->qlock, flags);

        return ret;
}

static void sun4i_csi_stop_streaming(struct vb2_queue *vq)
{
        struct sun4i_csi *csi = vb2_get_drv_priv(vq);
        unsigned long flags;

        dev_dbg(csi->dev, "Stopping capture\n");

        v4l2_subdev_call(csi->src_subdev, video, s_stream, 0);
        sun4i_csi_capture_stop(csi);

        /* Release all active buffers */
        spin_lock_irqsave(&csi->qlock, flags);
        return_all_buffers(csi, VB2_BUF_STATE_ERROR);
        spin_unlock_irqrestore(&csi->qlock, flags);

        video_device_pipeline_stop(&csi->vdev);

        dma_free_coherent(csi->dev, csi->scratch.size, csi->scratch.vaddr,
                          csi->scratch.paddr);
}

static const struct vb2_ops sun4i_csi_qops = {
        .queue_setup            = sun4i_csi_queue_setup,
        .buf_prepare            = sun4i_csi_buffer_prepare,
        .buf_queue              = sun4i_csi_buffer_queue,
        .start_streaming        = sun4i_csi_start_streaming,
        .stop_streaming         = sun4i_csi_stop_streaming,
};

static irqreturn_t sun4i_csi_irq(int irq, void *data)
{
        struct sun4i_csi *csi = data;
        u32 reg;

        reg = readl(csi->regs + CSI_INT_STA_REG);

        /* Acknowledge the interrupts */
        writel(reg, csi->regs + CSI_INT_STA_REG);

        if (!(reg & CSI_INT_FRM_DONE))
                return IRQ_HANDLED;

        spin_lock(&csi->qlock);
        if (sun4i_csi_buffer_flip(csi, csi->sequence++)) {
                dev_warn(csi->dev, "%s: Flip failed\n", __func__);
                sun4i_csi_capture_stop(csi);
        }
        spin_unlock(&csi->qlock);

        return IRQ_HANDLED;
}

int sun4i_csi_dma_register(struct sun4i_csi *csi, int irq)
{
        struct vb2_queue *q = &csi->queue;
        int ret;
        int i;

        spin_lock_init(&csi->qlock);
        mutex_init(&csi->lock);

        INIT_LIST_HEAD(&csi->buf_list);
        for (i = 0; i < CSI_MAX_BUFFER; i++)
                csi->current_buf[i] = NULL;

        q->min_queued_buffers = 3;
        q->type = V4L2_BUF_TYPE_VIDEO_CAPTURE_MPLANE;
        q->io_modes = VB2_MMAP | VB2_DMABUF;
        q->lock = &csi->lock;
        q->drv_priv = csi;
        q->buf_struct_size = sizeof(struct sun4i_csi_buffer);
        q->ops = &sun4i_csi_qops;
        q->mem_ops = &vb2_dma_contig_memops;
        q->timestamp_flags = V4L2_BUF_FLAG_TIMESTAMP_MONOTONIC;
        q->dev = csi->dev;

        ret = vb2_queue_init(q);
        if (ret < 0) {
                dev_err(csi->dev, "failed to initialize VB2 queue\n");
                goto err_free_mutex;
        }

        ret = v4l2_device_register(csi->dev, &csi->v4l);
        if (ret) {
                dev_err(csi->dev, "Couldn't register the v4l2 device\n");
                goto err_free_mutex;
        }

        ret = devm_request_irq(csi->dev, irq, sun4i_csi_irq, 0,
                               dev_name(csi->dev), csi);
        if (ret) {
                dev_err(csi->dev, "Couldn't register our interrupt\n");
                goto err_unregister_device;
        }

        return 0;

err_unregister_device:
        v4l2_device_unregister(&csi->v4l);

err_free_mutex:
        mutex_destroy(&csi->lock);
        return ret;
}

void sun4i_csi_dma_unregister(struct sun4i_csi *csi)
{
        v4l2_device_unregister(&csi->v4l);
        mutex_destroy(&csi->lock);
}