#define dev_fmt(fmt) "DOE: " fmt
#include <linux/bitfield.h>
#include <linux/delay.h>
#include <linux/device.h>
#include <linux/jiffies.h>
#include <linux/mutex.h>
#include <linux/pci.h>
#include <linux/pci-doe.h>
#include <linux/sysfs.h>
#include <linux/workqueue.h>
#include "pci.h"
#define PCI_DOE_TIMEOUT HZ
#define PCI_DOE_POLL_INTERVAL (PCI_DOE_TIMEOUT / 128)
#define PCI_DOE_FLAG_CANCEL 0
#define PCI_DOE_FLAG_DEAD 1
#define PCI_DOE_MAX_LENGTH (1 << 18)
struct pci_doe_mb {
struct pci_dev *pdev;
u16 cap_offset;
struct xarray feats;
wait_queue_head_t wq;
struct workqueue_struct *work_queue;
unsigned long flags;
#ifdef CONFIG_SYSFS
struct device_attribute *sysfs_attrs;
#endif
};
struct pci_doe_feature {
u16 vid;
u8 type;
};
struct pci_doe_task {
struct pci_doe_feature feat;
const __le32 *request_pl;
size_t request_pl_sz;
__le32 *response_pl;
size_t response_pl_sz;
int rv;
void (*complete)(struct pci_doe_task *task);
void *private;
struct work_struct work;
struct pci_doe_mb *doe_mb;
};
#ifdef CONFIG_SYSFS
static ssize_t doe_discovery_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
return sysfs_emit(buf, "0001:00\n");
}
static DEVICE_ATTR_RO(doe_discovery);
static struct attribute *pci_doe_sysfs_feature_attrs[] = {
&dev_attr_doe_discovery.attr,
NULL
};
static bool pci_doe_features_sysfs_group_visible(struct kobject *kobj)
{
struct pci_dev *pdev = to_pci_dev(kobj_to_dev(kobj));
return !xa_empty(&pdev->doe_mbs);
}
DEFINE_SIMPLE_SYSFS_GROUP_VISIBLE(pci_doe_features_sysfs)
const struct attribute_group pci_doe_sysfs_group = {
.name = "doe_features",
.attrs = pci_doe_sysfs_feature_attrs,
.is_visible = SYSFS_GROUP_VISIBLE(pci_doe_features_sysfs),
};
static ssize_t pci_doe_sysfs_feature_show(struct device *dev,
struct device_attribute *attr,
char *buf)
{
return sysfs_emit(buf, "%s\n", attr->attr.name);
}
static void pci_doe_sysfs_feature_remove(struct pci_dev *pdev,
struct pci_doe_mb *doe_mb)
{
struct device_attribute *attrs = doe_mb->sysfs_attrs;
struct device *dev = &pdev->dev;
unsigned long i;
void *entry;
if (!attrs)
return;
doe_mb->sysfs_attrs = NULL;
xa_for_each(&doe_mb->feats, i, entry) {
if (attrs[i].show)
sysfs_remove_file_from_group(&dev->kobj, &attrs[i].attr,
pci_doe_sysfs_group.name);
kfree(attrs[i].attr.name);
}
kfree(attrs);
}
static int pci_doe_sysfs_feature_populate(struct pci_dev *pdev,
struct pci_doe_mb *doe_mb)
{
struct device *dev = &pdev->dev;
struct device_attribute *attrs;
unsigned long num_features = 0;
unsigned long vid, type;
unsigned long i;
void *entry;
int ret;
xa_for_each(&doe_mb->feats, i, entry)
num_features++;
attrs = kzalloc_objs(*attrs, num_features);
if (!attrs) {
pci_warn(pdev, "Failed allocating the device_attribute array\n");
return -ENOMEM;
}
doe_mb->sysfs_attrs = attrs;
xa_for_each(&doe_mb->feats, i, entry) {
sysfs_attr_init(&attrs[i].attr);
vid = xa_to_value(entry) >> 8;
type = xa_to_value(entry) & 0xFF;
if (vid == PCI_VENDOR_ID_PCI_SIG &&
type == PCI_DOE_FEATURE_DISCOVERY) {
continue;
}
attrs[i].attr.name = kasprintf(GFP_KERNEL,
"%04lx:%02lx", vid, type);
if (!attrs[i].attr.name) {
ret = -ENOMEM;
pci_warn(pdev, "Failed allocating the attribute name\n");
goto fail;
}
attrs[i].attr.mode = 0444;
attrs[i].show = pci_doe_sysfs_feature_show;
ret = sysfs_add_file_to_group(&dev->kobj, &attrs[i].attr,
pci_doe_sysfs_group.name);
if (ret) {
attrs[i].show = NULL;
if (ret != -EEXIST) {
pci_warn(pdev, "Failed adding %s to sysfs group\n",
attrs[i].attr.name);
goto fail;
} else
kfree(attrs[i].attr.name);
}
}
return 0;
fail:
pci_doe_sysfs_feature_remove(pdev, doe_mb);
return ret;
}
void pci_doe_sysfs_teardown(struct pci_dev *pdev)
{
struct pci_doe_mb *doe_mb;
unsigned long index;
xa_for_each(&pdev->doe_mbs, index, doe_mb)
pci_doe_sysfs_feature_remove(pdev, doe_mb);
}
void pci_doe_sysfs_init(struct pci_dev *pdev)
{
struct pci_doe_mb *doe_mb;
unsigned long index;
int ret;
xa_for_each(&pdev->doe_mbs, index, doe_mb) {
ret = pci_doe_sysfs_feature_populate(pdev, doe_mb);
if (ret)
return;
}
}
#endif
static int pci_doe_wait(struct pci_doe_mb *doe_mb, unsigned long timeout)
{
if (wait_event_timeout(doe_mb->wq,
test_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags),
timeout))
return -EIO;
return 0;
}
static void pci_doe_write_ctrl(struct pci_doe_mb *doe_mb, u32 val)
{
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
pci_write_config_dword(pdev, offset + PCI_DOE_CTRL, val);
}
static int pci_doe_abort(struct pci_doe_mb *doe_mb)
{
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
unsigned long timeout_jiffies;
pci_dbg(pdev, "[%x] Issuing Abort\n", offset);
timeout_jiffies = jiffies + PCI_DOE_TIMEOUT;
pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_ABORT);
do {
int rc;
u32 val;
rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
if (rc)
return rc;
pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
if (!FIELD_GET(PCI_DOE_STATUS_ERROR, val) &&
!FIELD_GET(PCI_DOE_STATUS_BUSY, val))
return 0;
} while (!time_after(jiffies, timeout_jiffies));
pci_err(pdev, "[%x] ABORT timed out\n", offset);
return -EIO;
}
static int pci_doe_send_req(struct pci_doe_mb *doe_mb,
struct pci_doe_task *task)
{
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
unsigned long timeout_jiffies;
size_t length, remainder;
u32 val;
int i;
timeout_jiffies = jiffies + PCI_DOE_TIMEOUT;
do {
pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
} while (FIELD_GET(PCI_DOE_STATUS_BUSY, val) &&
!time_after(jiffies, timeout_jiffies));
if (FIELD_GET(PCI_DOE_STATUS_BUSY, val))
return -EBUSY;
if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
return -EIO;
length = 2 + DIV_ROUND_UP(task->request_pl_sz, sizeof(__le32));
if (length > PCI_DOE_MAX_LENGTH)
return -EIO;
if (length == PCI_DOE_MAX_LENGTH)
length = 0;
val = FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_VID, task->feat.vid) |
FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, task->feat.type);
pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val);
pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
FIELD_PREP(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH,
length));
for (i = 0; i < task->request_pl_sz / sizeof(__le32); i++)
pci_write_config_dword(pdev, offset + PCI_DOE_WRITE,
le32_to_cpu(task->request_pl[i]));
remainder = task->request_pl_sz % sizeof(__le32);
if (remainder) {
val = 0;
memcpy(&val, &task->request_pl[i], remainder);
le32_to_cpus(&val);
pci_write_config_dword(pdev, offset + PCI_DOE_WRITE, val);
}
pci_doe_write_ctrl(doe_mb, PCI_DOE_CTRL_GO);
return 0;
}
static bool pci_doe_data_obj_ready(struct pci_doe_mb *doe_mb)
{
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
u32 val;
pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
if (FIELD_GET(PCI_DOE_STATUS_DATA_OBJECT_READY, val))
return true;
return false;
}
static int pci_doe_recv_resp(struct pci_doe_mb *doe_mb, struct pci_doe_task *task)
{
size_t length, payload_length, remainder, received;
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
int i = 0;
u32 val;
pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
if ((FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val) != task->feat.vid) ||
(FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val) != task->feat.type)) {
dev_err_ratelimited(&pdev->dev, "[%x] expected [VID, Feature] = [%04x, %02x], got [%04x, %02x]\n",
doe_mb->cap_offset, task->feat.vid, task->feat.type,
FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_VID, val),
FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_1_TYPE, val));
return -EIO;
}
pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
length = FIELD_GET(PCI_DOE_DATA_OBJECT_HEADER_2_LENGTH, val);
if (!length)
length = PCI_DOE_MAX_LENGTH;
if (length < 2)
return -EIO;
length -= 2;
received = task->response_pl_sz;
payload_length = DIV_ROUND_UP(task->response_pl_sz, sizeof(__le32));
remainder = task->response_pl_sz % sizeof(__le32);
if (!remainder)
remainder = sizeof(__le32);
if (length < payload_length) {
received = length * sizeof(__le32);
payload_length = length;
remainder = sizeof(__le32);
}
if (payload_length) {
for (; i < payload_length - 1; i++) {
pci_read_config_dword(pdev, offset + PCI_DOE_READ,
&val);
task->response_pl[i] = cpu_to_le32(val);
pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
}
pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
cpu_to_le32s(&val);
memcpy(&task->response_pl[i], &val, remainder);
if (!pci_doe_data_obj_ready(doe_mb))
return -EIO;
pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
i++;
}
for (; i < length; i++) {
pci_read_config_dword(pdev, offset + PCI_DOE_READ, &val);
pci_write_config_dword(pdev, offset + PCI_DOE_READ, 0);
}
pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
if (FIELD_GET(PCI_DOE_STATUS_ERROR, val))
return -EIO;
return received;
}
static void signal_task_complete(struct pci_doe_task *task, int rv)
{
task->rv = rv;
destroy_work_on_stack(&task->work);
task->complete(task);
}
static void signal_task_abort(struct pci_doe_task *task, int rv)
{
struct pci_doe_mb *doe_mb = task->doe_mb;
struct pci_dev *pdev = doe_mb->pdev;
if (pci_doe_abort(doe_mb)) {
pci_err(pdev, "[%x] Abort failed marking mailbox dead\n",
doe_mb->cap_offset);
set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
}
signal_task_complete(task, rv);
}
static void doe_statemachine_work(struct work_struct *work)
{
struct pci_doe_task *task = container_of(work, struct pci_doe_task,
work);
struct pci_doe_mb *doe_mb = task->doe_mb;
struct pci_dev *pdev = doe_mb->pdev;
int offset = doe_mb->cap_offset;
unsigned long timeout_jiffies;
u32 val;
int rc;
if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags)) {
signal_task_complete(task, -EIO);
return;
}
rc = pci_doe_send_req(doe_mb, task);
if (rc) {
if (rc == -EBUSY)
dev_err_ratelimited(&pdev->dev, "[%x] busy detected; another entity is sending conflicting requests\n",
offset);
signal_task_abort(task, rc);
return;
}
timeout_jiffies = jiffies + PCI_DOE_TIMEOUT;
retry_resp:
pci_read_config_dword(pdev, offset + PCI_DOE_STATUS, &val);
if (FIELD_GET(PCI_DOE_STATUS_ERROR, val)) {
signal_task_abort(task, -EIO);
return;
}
if (!FIELD_GET(PCI_DOE_STATUS_DATA_OBJECT_READY, val)) {
if (time_after(jiffies, timeout_jiffies)) {
signal_task_abort(task, -EIO);
return;
}
rc = pci_doe_wait(doe_mb, PCI_DOE_POLL_INTERVAL);
if (rc) {
signal_task_abort(task, rc);
return;
}
goto retry_resp;
}
rc = pci_doe_recv_resp(doe_mb, task);
if (rc < 0) {
signal_task_abort(task, rc);
return;
}
signal_task_complete(task, rc);
}
static void pci_doe_task_complete(struct pci_doe_task *task)
{
complete(task->private);
}
static int pci_doe_discovery(struct pci_doe_mb *doe_mb, u8 capver, u8 *index, u16 *vid,
u8 *feature)
{
u32 request_pl = FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_INDEX,
*index) |
FIELD_PREP(PCI_DOE_DATA_OBJECT_DISC_REQ_3_VER,
(capver >= 2) ? 2 : 0);
__le32 request_pl_le = cpu_to_le32(request_pl);
__le32 response_pl_le;
u32 response_pl;
int rc;
rc = pci_doe(doe_mb, PCI_VENDOR_ID_PCI_SIG, PCI_DOE_FEATURE_DISCOVERY,
&request_pl_le, sizeof(request_pl_le),
&response_pl_le, sizeof(response_pl_le));
if (rc < 0)
return rc;
if (rc != sizeof(response_pl_le))
return -EIO;
response_pl = le32_to_cpu(response_pl_le);
*vid = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_VID, response_pl);
*feature = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_TYPE,
response_pl);
*index = FIELD_GET(PCI_DOE_DATA_OBJECT_DISC_RSP_3_NEXT_INDEX,
response_pl);
return 0;
}
static void *pci_doe_xa_feat_entry(u16 vid, u8 type)
{
return xa_mk_value((vid << 8) | type);
}
static int pci_doe_cache_features(struct pci_doe_mb *doe_mb)
{
u8 index = 0;
u8 xa_idx = 0;
u32 hdr = 0;
pci_read_config_dword(doe_mb->pdev, doe_mb->cap_offset, &hdr);
do {
int rc;
u16 vid;
u8 type;
rc = pci_doe_discovery(doe_mb, PCI_EXT_CAP_VER(hdr), &index,
&vid, &type);
if (rc)
return rc;
pci_dbg(doe_mb->pdev,
"[%x] Found feature %d vid: %x type: %x\n",
doe_mb->cap_offset, xa_idx, vid, type);
rc = xa_insert(&doe_mb->feats, xa_idx++,
pci_doe_xa_feat_entry(vid, type), GFP_KERNEL);
if (rc)
return rc;
} while (index);
return 0;
}
static void pci_doe_cancel_tasks(struct pci_doe_mb *doe_mb)
{
set_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags);
set_bit(PCI_DOE_FLAG_CANCEL, &doe_mb->flags);
wake_up(&doe_mb->wq);
}
static struct pci_doe_mb *pci_doe_create_mb(struct pci_dev *pdev,
u16 cap_offset)
{
struct pci_doe_mb *doe_mb;
int rc;
doe_mb = kzalloc_obj(*doe_mb);
if (!doe_mb)
return ERR_PTR(-ENOMEM);
doe_mb->pdev = pdev;
doe_mb->cap_offset = cap_offset;
init_waitqueue_head(&doe_mb->wq);
xa_init(&doe_mb->feats);
doe_mb->work_queue = alloc_ordered_workqueue("%s %s DOE [%x]", 0,
dev_bus_name(&pdev->dev),
pci_name(pdev),
doe_mb->cap_offset);
if (!doe_mb->work_queue) {
pci_err(pdev, "[%x] failed to allocate work queue\n",
doe_mb->cap_offset);
rc = -ENOMEM;
goto err_free;
}
rc = pci_doe_abort(doe_mb);
if (rc) {
pci_err(pdev, "[%x] failed to reset mailbox with abort command : %d\n",
doe_mb->cap_offset, rc);
goto err_destroy_wq;
}
rc = pci_doe_cache_features(doe_mb);
if (rc) {
pci_err(pdev, "[%x] failed to cache features : %d\n",
doe_mb->cap_offset, rc);
goto err_cancel;
}
return doe_mb;
err_cancel:
pci_doe_cancel_tasks(doe_mb);
xa_destroy(&doe_mb->feats);
err_destroy_wq:
destroy_workqueue(doe_mb->work_queue);
err_free:
kfree(doe_mb);
return ERR_PTR(rc);
}
static void pci_doe_destroy_mb(struct pci_doe_mb *doe_mb)
{
pci_doe_cancel_tasks(doe_mb);
xa_destroy(&doe_mb->feats);
destroy_workqueue(doe_mb->work_queue);
kfree(doe_mb);
}
static bool pci_doe_supports_feat(struct pci_doe_mb *doe_mb, u16 vid, u8 type)
{
unsigned long index;
void *entry;
if (vid == PCI_VENDOR_ID_PCI_SIG && type == PCI_DOE_FEATURE_DISCOVERY)
return true;
xa_for_each(&doe_mb->feats, index, entry)
if (entry == pci_doe_xa_feat_entry(vid, type))
return true;
return false;
}
static int pci_doe_submit_task(struct pci_doe_mb *doe_mb,
struct pci_doe_task *task)
{
if (!pci_doe_supports_feat(doe_mb, task->feat.vid, task->feat.type))
return -EINVAL;
if (test_bit(PCI_DOE_FLAG_DEAD, &doe_mb->flags))
return -EIO;
task->doe_mb = doe_mb;
INIT_WORK_ONSTACK(&task->work, doe_statemachine_work);
queue_work(doe_mb->work_queue, &task->work);
return 0;
}
int pci_doe(struct pci_doe_mb *doe_mb, u16 vendor, u8 type,
const void *request, size_t request_sz,
void *response, size_t response_sz)
{
DECLARE_COMPLETION_ONSTACK(c);
struct pci_doe_task task = {
.feat.vid = vendor,
.feat.type = type,
.request_pl = request,
.request_pl_sz = request_sz,
.response_pl = response,
.response_pl_sz = response_sz,
.complete = pci_doe_task_complete,
.private = &c,
};
int rc;
rc = pci_doe_submit_task(doe_mb, &task);
if (rc)
return rc;
wait_for_completion(&c);
return task.rv;
}
EXPORT_SYMBOL_GPL(pci_doe);
struct pci_doe_mb *pci_find_doe_mailbox(struct pci_dev *pdev, u16 vendor,
u8 type)
{
struct pci_doe_mb *doe_mb;
unsigned long index;
xa_for_each(&pdev->doe_mbs, index, doe_mb)
if (pci_doe_supports_feat(doe_mb, vendor, type))
return doe_mb;
return NULL;
}
EXPORT_SYMBOL_GPL(pci_find_doe_mailbox);
void pci_doe_init(struct pci_dev *pdev)
{
struct pci_doe_mb *doe_mb;
u16 offset = 0;
int rc;
xa_init(&pdev->doe_mbs);
while ((offset = pci_find_next_ext_capability(pdev, offset,
PCI_EXT_CAP_ID_DOE))) {
doe_mb = pci_doe_create_mb(pdev, offset);
if (IS_ERR(doe_mb)) {
pci_err(pdev, "[%x] failed to create mailbox: %ld\n",
offset, PTR_ERR(doe_mb));
continue;
}
rc = xa_insert(&pdev->doe_mbs, offset, doe_mb, GFP_KERNEL);
if (rc) {
pci_err(pdev, "[%x] failed to insert mailbox: %d\n",
offset, rc);
pci_doe_destroy_mb(doe_mb);
}
}
}
void pci_doe_destroy(struct pci_dev *pdev)
{
struct pci_doe_mb *doe_mb;
unsigned long index;
xa_for_each(&pdev->doe_mbs, index, doe_mb)
pci_doe_destroy_mb(doe_mb);
xa_destroy(&pdev->doe_mbs);
}
void pci_doe_disconnected(struct pci_dev *pdev)
{
struct pci_doe_mb *doe_mb;
unsigned long index;
xa_for_each(&pdev->doe_mbs, index, doe_mb)
pci_doe_cancel_tasks(doe_mb);
}