root/drivers/s390/block/dasd_eer.c
// SPDX-License-Identifier: GPL-2.0
/*
 *  Character device driver for extended error reporting.
 *
 *  Copyright IBM Corp. 2005
 *  extended error reporting for DASD ECKD devices
 *  Author(s): Stefan Weinhuber <wein@de.ibm.com>
 */

#include <linux/export.h>
#include <linux/init.h>
#include <linux/fs.h>
#include <linux/kernel.h>
#include <linux/miscdevice.h>
#include <linux/module.h>
#include <linux/moduleparam.h>
#include <linux/device.h>
#include <linux/poll.h>
#include <linux/mutex.h>
#include <linux/err.h>
#include <linux/slab.h>

#include <linux/uaccess.h>
#include <linux/atomic.h>
#include <asm/ebcdic.h>

#include "dasd_int.h"
#include "dasd_eckd.h"

/*
 * SECTION: the internal buffer
 */

/*
 * The internal buffer is meant to store obaque blobs of data, so it does
 * not know of higher level concepts like triggers.
 * It consists of a number of pages that are used as a ringbuffer. Each data
 * blob is stored in a simple record that consists of an integer, which
 * contains the size of the following data, and the data bytes themselfes.
 *
 * To allow for multiple independent readers we create one internal buffer
 * each time the device is opened and destroy the buffer when the file is
 * closed again. The number of pages used for this buffer is determined by
 * the module parmeter eer_pages.
 *
 * One record can be written to a buffer by using the functions
 * - dasd_eer_start_record (one time per record to write the size to the
 *                          buffer and reserve the space for the data)
 * - dasd_eer_write_buffer (one or more times per record to write the data)
 * The data can be written in several steps but you will have to compute
 * the total size up front for the invocation of dasd_eer_start_record.
 * If the ringbuffer is full, dasd_eer_start_record will remove the required
 * number of old records.
 *
 * A record is typically read in two steps, first read the integer that
 * specifies the size of the following data, then read the data.
 * Both can be done by
 * - dasd_eer_read_buffer
 *
 * For all mentioned functions you need to get the bufferlock first and keep
 * it until a complete record is written or read.
 *
 * All information necessary to keep track of an internal buffer is kept in
 * a struct eerbuffer. The buffer specific to a file pointer is strored in
 * the private_data field of that file. To be able to write data to all
 * existing buffers, each buffer is also added to the bufferlist.
 * If the user does not want to read a complete record in one go, we have to
 * keep track of the rest of the record. residual stores the number of bytes
 * that are still to deliver. If the rest of the record is invalidated between
 * two reads then residual will be set to -1 so that the next read will fail.
 * All entries in the eerbuffer structure are protected with the bufferlock.
 * To avoid races between writing to a buffer on the one side and creating
 * and destroying buffers on the other side, the bufferlock must also be used
 * to protect the bufferlist.
 */

static int eer_pages = 5;
module_param(eer_pages, int, S_IRUGO|S_IWUSR);

struct eerbuffer {
        struct list_head list;
        char **buffer;
        int buffersize;
        int buffer_page_count;
        int head;
        int tail;
        int residual;
};

static LIST_HEAD(bufferlist);
static DEFINE_SPINLOCK(bufferlock);
static DECLARE_WAIT_QUEUE_HEAD(dasd_eer_read_wait_queue);

/*
 * How many free bytes are available on the buffer.
 * Needs to be called with bufferlock held.
 */
static int dasd_eer_get_free_bytes(struct eerbuffer *eerb)
{
        if (eerb->head < eerb->tail)
                return eerb->tail - eerb->head - 1;
        return eerb->buffersize - eerb->head + eerb->tail -1;
}

/*
 * How many bytes of buffer space are used.
 * Needs to be called with bufferlock held.
 */
static int dasd_eer_get_filled_bytes(struct eerbuffer *eerb)
{

        if (eerb->head >= eerb->tail)
                return eerb->head - eerb->tail;
        return eerb->buffersize - eerb->tail + eerb->head;
}

/*
 * The dasd_eer_write_buffer function just copies count bytes of data
 * to the buffer. Make sure to call dasd_eer_start_record first, to
 * make sure that enough free space is available.
 * Needs to be called with bufferlock held.
 */
static void dasd_eer_write_buffer(struct eerbuffer *eerb,
                                  char *data, int count)
{

        unsigned long headindex,localhead;
        unsigned long rest, len;
        char *nextdata;

        nextdata = data;
        rest = count;
        while (rest > 0) {
                headindex = eerb->head / PAGE_SIZE;
                localhead = eerb->head % PAGE_SIZE;
                len = min(rest, PAGE_SIZE - localhead);
                memcpy(eerb->buffer[headindex]+localhead, nextdata, len);
                nextdata += len;
                rest -= len;
                eerb->head += len;
                if (eerb->head == eerb->buffersize)
                        eerb->head = 0; /* wrap around */
                BUG_ON(eerb->head > eerb->buffersize);
        }
}

/*
 * Needs to be called with bufferlock held.
 */
static int dasd_eer_read_buffer(struct eerbuffer *eerb, char *data, int count)
{

        unsigned long tailindex,localtail;
        unsigned long rest, len, finalcount;
        char *nextdata;

        finalcount = min(count, dasd_eer_get_filled_bytes(eerb));
        nextdata = data;
        rest = finalcount;
        while (rest > 0) {
                tailindex = eerb->tail / PAGE_SIZE;
                localtail = eerb->tail % PAGE_SIZE;
                len = min(rest, PAGE_SIZE - localtail);
                memcpy(nextdata, eerb->buffer[tailindex] + localtail, len);
                nextdata += len;
                rest -= len;
                eerb->tail += len;
                if (eerb->tail == eerb->buffersize)
                        eerb->tail = 0; /* wrap around */
                BUG_ON(eerb->tail > eerb->buffersize);
        }
        return finalcount;
}

/*
 * Whenever you want to write a blob of data to the internal buffer you
 * have to start by using this function first. It will write the number
 * of bytes that will be written to the buffer. If necessary it will remove
 * old records to make room for the new one.
 * Needs to be called with bufferlock held.
 */
static int dasd_eer_start_record(struct eerbuffer *eerb, int count)
{
        int tailcount;

        if (count + sizeof(count) > eerb->buffersize)
                return -ENOMEM;
        while (dasd_eer_get_free_bytes(eerb) < count + sizeof(count)) {
                if (eerb->residual > 0) {
                        eerb->tail += eerb->residual;
                        if (eerb->tail >= eerb->buffersize)
                                eerb->tail -= eerb->buffersize;
                        eerb->residual = -1;
                }
                dasd_eer_read_buffer(eerb, (char *) &tailcount,
                                     sizeof(tailcount));
                eerb->tail += tailcount;
                if (eerb->tail >= eerb->buffersize)
                        eerb->tail -= eerb->buffersize;
        }
        dasd_eer_write_buffer(eerb, (char*) &count, sizeof(count));

        return 0;
};

/*
 * Release pages that are not used anymore.
 */
static void dasd_eer_free_buffer_pages(char **buf, int no_pages)
{
        int i;

        for (i = 0; i < no_pages; i++)
                free_page((unsigned long) buf[i]);
}

/*
 * Allocate a new set of memory pages.
 */
static int dasd_eer_allocate_buffer_pages(char **buf, int no_pages)
{
        int i;

        for (i = 0; i < no_pages; i++) {
                buf[i] = (char *) get_zeroed_page(GFP_KERNEL);
                if (!buf[i]) {
                        dasd_eer_free_buffer_pages(buf, i);
                        return -ENOMEM;
                }
        }
        return 0;
}

/*
 * SECTION: The extended error reporting functionality
 */

/*
 * When a DASD device driver wants to report an error, it calls the
 * function dasd_eer_write and gives the respective trigger ID as
 * parameter. Currently there are four kinds of triggers:
 *
 * DASD_EER_FATALERROR:  all kinds of unrecoverable I/O problems
 * DASD_EER_PPRCSUSPEND: PPRC was suspended
 * DASD_EER_NOPATH:      There is no path to the device left.
 * DASD_EER_STATECHANGE: The state of the device has changed.
 *
 * For the first three triggers all required information can be supplied by
 * the caller. For these triggers a record is written by the function
 * dasd_eer_write_standard_trigger.
 *
 * The DASD_EER_STATECHANGE trigger is special since a sense subsystem
 * status ccw need to be executed to gather the necessary sense data first.
 * The dasd_eer_snss function will queue the SNSS request and the request
 * callback will then call dasd_eer_write with the DASD_EER_STATCHANGE
 * trigger.
 *
 * To avoid memory allocations at runtime, the necessary memory is allocated
 * when the extended error reporting is enabled for a device (by
 * dasd_eer_probe). There is one sense subsystem status request for each
 * eer enabled DASD device. The presence of the cqr in device->eer_cqr
 * indicates that eer is enable for the device. The use of the snss request
 * is protected by the DASD_FLAG_EER_IN_USE bit. When this flag indicates
 * that the cqr is currently in use, dasd_eer_snss cannot start a second
 * request but sets the DASD_FLAG_EER_SNSS flag instead. The callback of
 * the SNSS request will check the bit and call dasd_eer_snss again.
 */

#define SNSS_DATA_SIZE 44

#define DASD_EER_BUSID_SIZE 10
struct dasd_eer_header {
        __u32 total_size;
        __u32 trigger;
        __u64 tv_sec;
        __u64 tv_usec;
        char busid[DASD_EER_BUSID_SIZE];
} __attribute__ ((packed));

/*
 * The following function can be used for those triggers that have
 * all necessary data available when the function is called.
 * If the parameter cqr is not NULL, the chain of requests will be searched
 * for valid sense data, and all valid sense data sets will be added to
 * the triggers data.
 */
static void dasd_eer_write_standard_trigger(struct dasd_device *device,
                                            struct dasd_ccw_req *cqr,
                                            int trigger)
{
        struct dasd_ccw_req *temp_cqr;
        int data_size;
        struct timespec64 ts;
        struct dasd_eer_header header;
        unsigned long flags;
        struct eerbuffer *eerb;
        char *sense;

        /* go through cqr chain and count the valid sense data sets */
        data_size = 0;
        for (temp_cqr = cqr; temp_cqr; temp_cqr = temp_cqr->refers)
                if (dasd_get_sense(&temp_cqr->irb))
                        data_size += 32;

        header.total_size = sizeof(header) + data_size + 4; /* "EOR" */
        header.trigger = trigger;
        ktime_get_real_ts64(&ts);
        header.tv_sec = ts.tv_sec;
        header.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
        strscpy(header.busid, dev_name(&device->cdev->dev),
                DASD_EER_BUSID_SIZE);

        spin_lock_irqsave(&bufferlock, flags);
        list_for_each_entry(eerb, &bufferlist, list) {
                dasd_eer_start_record(eerb, header.total_size);
                dasd_eer_write_buffer(eerb, (char *) &header, sizeof(header));
                for (temp_cqr = cqr; temp_cqr; temp_cqr = temp_cqr->refers) {
                        sense = dasd_get_sense(&temp_cqr->irb);
                        if (sense)
                                dasd_eer_write_buffer(eerb, sense, 32);
                }
                dasd_eer_write_buffer(eerb, "EOR", 4);
        }
        spin_unlock_irqrestore(&bufferlock, flags);
        wake_up_interruptible(&dasd_eer_read_wait_queue);
}

/*
 * This function writes a DASD_EER_STATECHANGE trigger.
 */
static void dasd_eer_write_snss_trigger(struct dasd_device *device,
                                        struct dasd_ccw_req *cqr,
                                        int trigger)
{
        int data_size;
        int snss_rc;
        struct timespec64 ts;
        struct dasd_eer_header header;
        unsigned long flags;
        struct eerbuffer *eerb;

        snss_rc = (cqr->status == DASD_CQR_DONE) ? 0 : -EIO;
        if (snss_rc)
                data_size = 0;
        else
                data_size = SNSS_DATA_SIZE;

        header.total_size = sizeof(header) + data_size + 4; /* "EOR" */
        header.trigger = DASD_EER_STATECHANGE;
        ktime_get_real_ts64(&ts);
        header.tv_sec = ts.tv_sec;
        header.tv_usec = ts.tv_nsec / NSEC_PER_USEC;
        strscpy(header.busid, dev_name(&device->cdev->dev),
                DASD_EER_BUSID_SIZE);

        spin_lock_irqsave(&bufferlock, flags);
        list_for_each_entry(eerb, &bufferlist, list) {
                dasd_eer_start_record(eerb, header.total_size);
                dasd_eer_write_buffer(eerb, (char *) &header , sizeof(header));
                if (!snss_rc)
                        dasd_eer_write_buffer(eerb, cqr->data, SNSS_DATA_SIZE);
                dasd_eer_write_buffer(eerb, "EOR", 4);
        }
        spin_unlock_irqrestore(&bufferlock, flags);
        wake_up_interruptible(&dasd_eer_read_wait_queue);
}

/*
 * This function is called for all triggers. It calls the appropriate
 * function that writes the actual trigger records.
 */
void dasd_eer_write(struct dasd_device *device, struct dasd_ccw_req *cqr,
                    unsigned int id)
{
        if (!device->eer_cqr)
                return;
        switch (id) {
        case DASD_EER_FATALERROR:
        case DASD_EER_PPRCSUSPEND:
                dasd_eer_write_standard_trigger(device, cqr, id);
                break;
        case DASD_EER_NOPATH:
        case DASD_EER_NOSPC:
        case DASD_EER_AUTOQUIESCE:
                dasd_eer_write_standard_trigger(device, NULL, id);
                break;
        case DASD_EER_STATECHANGE:
                dasd_eer_write_snss_trigger(device, cqr, id);
                break;
        default: /* unknown trigger, so we write it without any sense data */
                dasd_eer_write_standard_trigger(device, NULL, id);
                break;
        }
}
EXPORT_SYMBOL(dasd_eer_write);

/*
 * Start a sense subsystem status request.
 * Needs to be called with the device held.
 */
void dasd_eer_snss(struct dasd_device *device)
{
        struct dasd_ccw_req *cqr;

        cqr = device->eer_cqr;
        if (!cqr)       /* Device not eer enabled. */
                return;
        if (test_and_set_bit(DASD_FLAG_EER_IN_USE, &device->flags)) {
                /* Sense subsystem status request in use. */
                set_bit(DASD_FLAG_EER_SNSS, &device->flags);
                return;
        }
        /* cdev is already locked, can't use dasd_add_request_head */
        clear_bit(DASD_FLAG_EER_SNSS, &device->flags);
        cqr->status = DASD_CQR_QUEUED;
        list_add(&cqr->devlist, &device->ccw_queue);
        dasd_schedule_device_bh(device);
}

/*
 * Callback function for use with sense subsystem status request.
 */
static void dasd_eer_snss_cb(struct dasd_ccw_req *cqr, void *data)
{
        struct dasd_device *device = cqr->startdev;
        unsigned long flags;

        dasd_eer_write(device, cqr, DASD_EER_STATECHANGE);
        spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
        if (device->eer_cqr == cqr) {
                clear_bit(DASD_FLAG_EER_IN_USE, &device->flags);
                if (test_bit(DASD_FLAG_EER_SNSS, &device->flags))
                        /* Another SNSS has been requested in the meantime. */
                        dasd_eer_snss(device);
                cqr = NULL;
        }
        spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
        if (cqr)
                /*
                 * Extended error recovery has been switched off while
                 * the SNSS request was running. It could even have
                 * been switched off and on again in which case there
                 * is a new ccw in device->eer_cqr. Free the "old"
                 * snss request now.
                 */
                dasd_sfree_request(cqr, device);
}

/*
 * Enable error reporting on a given device.
 */
int dasd_eer_enable(struct dasd_device *device)
{
        struct dasd_ccw_req *cqr = NULL;
        unsigned long flags;
        struct ccw1 *ccw;
        int rc = 0;

        spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
        if (device->eer_cqr)
                goto out;
        else if (!device->discipline ||
                 strcmp(device->discipline->name, "ECKD"))
                rc = -EMEDIUMTYPE;
        else if (test_bit(DASD_FLAG_OFFLINE, &device->flags))
                rc = -EBUSY;

        if (rc)
                goto out;

        cqr = dasd_smalloc_request(DASD_ECKD_MAGIC, 1 /* SNSS */,
                                   SNSS_DATA_SIZE, device, NULL);
        if (IS_ERR(cqr)) {
                rc = -ENOMEM;
                cqr = NULL;
                goto out;
        }

        cqr->startdev = device;
        cqr->retries = 255;
        cqr->expires = 10 * HZ;
        clear_bit(DASD_CQR_FLAGS_USE_ERP, &cqr->flags);
        set_bit(DASD_CQR_ALLOW_SLOCK, &cqr->flags);

        ccw = cqr->cpaddr;
        ccw->cmd_code = DASD_ECKD_CCW_SNSS;
        ccw->count = SNSS_DATA_SIZE;
        ccw->flags = 0;
        ccw->cda = virt_to_dma32(cqr->data);

        cqr->buildclk = get_tod_clock();
        cqr->status = DASD_CQR_FILLED;
        cqr->callback = dasd_eer_snss_cb;

        if (!device->eer_cqr) {
                device->eer_cqr = cqr;
                cqr = NULL;
        }

out:
        spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);

        if (cqr)
                dasd_sfree_request(cqr, device);

        return rc;
}

/*
 * Disable error reporting on a given device.
 */
void dasd_eer_disable(struct dasd_device *device)
{
        struct dasd_ccw_req *cqr;
        unsigned long flags;
        int in_use;

        if (!device->eer_cqr)
                return;
        spin_lock_irqsave(get_ccwdev_lock(device->cdev), flags);
        cqr = device->eer_cqr;
        device->eer_cqr = NULL;
        clear_bit(DASD_FLAG_EER_SNSS, &device->flags);
        in_use = test_and_clear_bit(DASD_FLAG_EER_IN_USE, &device->flags);
        spin_unlock_irqrestore(get_ccwdev_lock(device->cdev), flags);
        if (cqr && !in_use)
                dasd_sfree_request(cqr, device);
}

/*
 * SECTION: the device operations
 */

/*
 * On the one side we need a lock to access our internal buffer, on the
 * other side a copy_to_user can sleep. So we need to copy the data we have
 * to transfer in a readbuffer, which is protected by the readbuffer_mutex.
 */
static char readbuffer[PAGE_SIZE];
static DEFINE_MUTEX(readbuffer_mutex);

static int dasd_eer_open(struct inode *inp, struct file *filp)
{
        struct eerbuffer *eerb;
        unsigned long flags;

        eerb = kzalloc_obj(struct eerbuffer);
        if (!eerb)
                return -ENOMEM;
        eerb->buffer_page_count = eer_pages;
        if (eerb->buffer_page_count < 1 ||
            eerb->buffer_page_count > INT_MAX / PAGE_SIZE) {
                kfree(eerb);
                DBF_EVENT(DBF_WARNING, "can't open device since module "
                        "parameter eer_pages is smaller than 1 or"
                        " bigger than %d", (int)(INT_MAX / PAGE_SIZE));
                return -EINVAL;
        }
        eerb->buffersize = eerb->buffer_page_count * PAGE_SIZE;
        eerb->buffer = kmalloc_array(eerb->buffer_page_count, sizeof(char *),
                                     GFP_KERNEL);
        if (!eerb->buffer) {
                kfree(eerb);
                return -ENOMEM;
        }
        if (dasd_eer_allocate_buffer_pages(eerb->buffer,
                                           eerb->buffer_page_count)) {
                kfree(eerb->buffer);
                kfree(eerb);
                return -ENOMEM;
        }
        filp->private_data = eerb;
        spin_lock_irqsave(&bufferlock, flags);
        list_add(&eerb->list, &bufferlist);
        spin_unlock_irqrestore(&bufferlock, flags);

        return nonseekable_open(inp,filp);
}

static int dasd_eer_close(struct inode *inp, struct file *filp)
{
        struct eerbuffer *eerb;
        unsigned long flags;

        eerb = (struct eerbuffer *) filp->private_data;
        spin_lock_irqsave(&bufferlock, flags);
        list_del(&eerb->list);
        spin_unlock_irqrestore(&bufferlock, flags);
        dasd_eer_free_buffer_pages(eerb->buffer, eerb->buffer_page_count);
        kfree(eerb->buffer);
        kfree(eerb);

        return 0;
}

static ssize_t dasd_eer_read(struct file *filp, char __user *buf,
                             size_t count, loff_t *ppos)
{
        int tc,rc;
        int tailcount,effective_count;
        unsigned long flags;
        struct eerbuffer *eerb;

        eerb = (struct eerbuffer *) filp->private_data;
        if (mutex_lock_interruptible(&readbuffer_mutex))
                return -ERESTARTSYS;

        spin_lock_irqsave(&bufferlock, flags);

        if (eerb->residual < 0) { /* the remainder of this record */
                                  /* has been deleted             */
                eerb->residual = 0;
                spin_unlock_irqrestore(&bufferlock, flags);
                mutex_unlock(&readbuffer_mutex);
                return -EIO;
        } else if (eerb->residual > 0) {
                /* OK we still have a second half of a record to deliver */
                effective_count = min(eerb->residual, (int) count);
                eerb->residual -= effective_count;
        } else {
                tc = 0;
                while (!tc) {
                        tc = dasd_eer_read_buffer(eerb, (char *) &tailcount,
                                                  sizeof(tailcount));
                        if (!tc) {
                                /* no data available */
                                spin_unlock_irqrestore(&bufferlock, flags);
                                mutex_unlock(&readbuffer_mutex);
                                if (filp->f_flags & O_NONBLOCK)
                                        return -EAGAIN;
                                rc = wait_event_interruptible(
                                        dasd_eer_read_wait_queue,
                                        eerb->head != eerb->tail);
                                if (rc)
                                        return rc;
                                if (mutex_lock_interruptible(&readbuffer_mutex))
                                        return -ERESTARTSYS;
                                spin_lock_irqsave(&bufferlock, flags);
                        }
                }
                WARN_ON(tc != sizeof(tailcount));
                effective_count = min(tailcount,(int)count);
                eerb->residual = tailcount - effective_count;
        }

        tc = dasd_eer_read_buffer(eerb, readbuffer, effective_count);
        WARN_ON(tc != effective_count);

        spin_unlock_irqrestore(&bufferlock, flags);

        if (copy_to_user(buf, readbuffer, effective_count)) {
                mutex_unlock(&readbuffer_mutex);
                return -EFAULT;
        }

        mutex_unlock(&readbuffer_mutex);
        return effective_count;
}

static __poll_t dasd_eer_poll(struct file *filp, poll_table *ptable)
{
        __poll_t mask;
        unsigned long flags;
        struct eerbuffer *eerb;

        eerb = (struct eerbuffer *) filp->private_data;
        poll_wait(filp, &dasd_eer_read_wait_queue, ptable);
        spin_lock_irqsave(&bufferlock, flags);
        if (eerb->head != eerb->tail)
                mask = EPOLLIN | EPOLLRDNORM ;
        else
                mask = 0;
        spin_unlock_irqrestore(&bufferlock, flags);
        return mask;
}

static const struct file_operations dasd_eer_fops = {
        .open           = &dasd_eer_open,
        .release        = &dasd_eer_close,
        .read           = &dasd_eer_read,
        .poll           = &dasd_eer_poll,
        .owner          = THIS_MODULE,
        .llseek         = noop_llseek,
};

static struct miscdevice *dasd_eer_dev = NULL;

int __init dasd_eer_init(void)
{
        int rc;

        dasd_eer_dev = kzalloc_obj(*dasd_eer_dev);
        if (!dasd_eer_dev)
                return -ENOMEM;

        dasd_eer_dev->minor = MISC_DYNAMIC_MINOR;
        dasd_eer_dev->name  = "dasd_eer";
        dasd_eer_dev->fops  = &dasd_eer_fops;

        rc = misc_register(dasd_eer_dev);
        if (rc) {
                kfree(dasd_eer_dev);
                dasd_eer_dev = NULL;
                DBF_EVENT(DBF_ERR, "%s", "dasd_eer_init could not "
                       "register misc device");
                return rc;
        }

        return 0;
}

void dasd_eer_exit(void)
{
        if (dasd_eer_dev) {
                misc_deregister(dasd_eer_dev);
                kfree(dasd_eer_dev);
                dasd_eer_dev = NULL;
        }
}