root/net/core/page_pool_user.c
// SPDX-License-Identifier: GPL-2.0

#include <linux/mutex.h>
#include <linux/netdevice.h>
#include <linux/xarray.h>
#include <net/busy_poll.h>
#include <net/net_debug.h>
#include <net/netdev_rx_queue.h>
#include <net/page_pool/helpers.h>
#include <net/page_pool/types.h>
#include <net/page_pool/memory_provider.h>
#include <net/sock.h>

#include "page_pool_priv.h"
#include "netdev-genl-gen.h"

static DEFINE_XARRAY_FLAGS(page_pools, XA_FLAGS_ALLOC1);
/* Protects: page_pools, netdevice->page_pools, pool->p.napi, pool->slow.netdev,
 *      pool->user.
 * Ordering: inside rtnl_lock
 */
DEFINE_MUTEX(page_pools_lock);

/* Page pools are only reachable from user space (via netlink) if they are
 * linked to a netdev at creation time. Following page pool "visibility"
 * states are possible:
 *  - normal
 *    - user.list: linked to real netdev, netdev: real netdev
 *  - orphaned - real netdev has disappeared
 *    - user.list: linked to lo, netdev: lo
 *  - invisible - either (a) created without netdev linking, (b) unlisted due
 *      to error, or (c) the entire namespace which owned this pool disappeared
 *    - user.list: unhashed, netdev: unknown
 */

typedef int (*pp_nl_fill_cb)(struct sk_buff *rsp, const struct page_pool *pool,
                             const struct genl_info *info);

static int
netdev_nl_page_pool_get_do(struct genl_info *info, u32 id, pp_nl_fill_cb fill)
{
        struct page_pool *pool;
        struct sk_buff *rsp;
        int err;

        mutex_lock(&page_pools_lock);
        pool = xa_load(&page_pools, id);
        if (!pool || hlist_unhashed(&pool->user.list) ||
            !net_eq(dev_net(pool->slow.netdev), genl_info_net(info))) {
                err = -ENOENT;
                goto err_unlock;
        }

        rsp = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!rsp) {
                err = -ENOMEM;
                goto err_unlock;
        }

        err = fill(rsp, pool, info);
        if (err)
                goto err_free_msg;

        mutex_unlock(&page_pools_lock);

        return genlmsg_reply(rsp, info);

err_free_msg:
        nlmsg_free(rsp);
err_unlock:
        mutex_unlock(&page_pools_lock);
        return err;
}

struct page_pool_dump_cb {
        unsigned long ifindex;
        u32 pp_id;
};

static int
netdev_nl_page_pool_get_dump(struct sk_buff *skb, struct netlink_callback *cb,
                             pp_nl_fill_cb fill)
{
        struct page_pool_dump_cb *state = (void *)cb->ctx;
        const struct genl_info *info = genl_info_dump(cb);
        struct net *net = sock_net(skb->sk);
        struct net_device *netdev;
        struct page_pool *pool;
        int err = 0;

        rtnl_lock();
        mutex_lock(&page_pools_lock);
        for_each_netdev_dump(net, netdev, state->ifindex) {
                hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
                        if (state->pp_id && state->pp_id < pool->user.id)
                                continue;

                        state->pp_id = pool->user.id;
                        err = fill(skb, pool, info);
                        if (err)
                                goto out;
                }

                state->pp_id = 0;
        }
out:
        mutex_unlock(&page_pools_lock);
        rtnl_unlock();

        return err;
}

static int
page_pool_nl_stats_fill(struct sk_buff *rsp, const struct page_pool *pool,
                        const struct genl_info *info)
{
#ifdef CONFIG_PAGE_POOL_STATS
        struct page_pool_stats stats = {};
        struct nlattr *nest;
        void *hdr;

        if (!page_pool_get_stats(pool, &stats))
                return 0;

        hdr = genlmsg_iput(rsp, info);
        if (!hdr)
                return -EMSGSIZE;

        nest = nla_nest_start(rsp, NETDEV_A_PAGE_POOL_STATS_INFO);

        if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id) ||
            (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
             nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
                         pool->slow.netdev->ifindex)))
                goto err_cancel_nest;

        nla_nest_end(rsp, nest);

        if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_FAST,
                         stats.alloc_stats.fast) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW,
                         stats.alloc_stats.slow) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_SLOW_HIGH_ORDER,
                         stats.alloc_stats.slow_high_order) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_EMPTY,
                         stats.alloc_stats.empty) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_REFILL,
                         stats.alloc_stats.refill) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_ALLOC_WAIVE,
                         stats.alloc_stats.waive) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHED,
                         stats.recycle_stats.cached) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_CACHE_FULL,
                         stats.recycle_stats.cache_full) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING,
                         stats.recycle_stats.ring) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RING_FULL,
                         stats.recycle_stats.ring_full) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_STATS_RECYCLE_RELEASED_REFCNT,
                         stats.recycle_stats.released_refcnt))
                goto err_cancel_msg;

        genlmsg_end(rsp, hdr);

        return 0;
err_cancel_nest:
        nla_nest_cancel(rsp, nest);
err_cancel_msg:
        genlmsg_cancel(rsp, hdr);
        return -EMSGSIZE;
#else
        GENL_SET_ERR_MSG(info, "kernel built without CONFIG_PAGE_POOL_STATS");
        return -EOPNOTSUPP;
#endif
}

int netdev_nl_page_pool_stats_get_doit(struct sk_buff *skb,
                                       struct genl_info *info)
{
        struct nlattr *tb[ARRAY_SIZE(netdev_page_pool_info_nl_policy)];
        struct nlattr *nest;
        int err;
        u32 id;

        if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_STATS_INFO))
                return -EINVAL;

        nest = info->attrs[NETDEV_A_PAGE_POOL_STATS_INFO];
        err = nla_parse_nested(tb, ARRAY_SIZE(tb) - 1, nest,
                               netdev_page_pool_info_nl_policy,
                               info->extack);
        if (err)
                return err;

        if (NL_REQ_ATTR_CHECK(info->extack, nest, tb, NETDEV_A_PAGE_POOL_ID))
                return -EINVAL;
        if (tb[NETDEV_A_PAGE_POOL_IFINDEX]) {
                NL_SET_ERR_MSG_ATTR(info->extack,
                                    tb[NETDEV_A_PAGE_POOL_IFINDEX],
                                    "selecting by ifindex not supported");
                return -EINVAL;
        }

        id = nla_get_uint(tb[NETDEV_A_PAGE_POOL_ID]);

        return netdev_nl_page_pool_get_do(info, id, page_pool_nl_stats_fill);
}

int netdev_nl_page_pool_stats_get_dumpit(struct sk_buff *skb,
                                         struct netlink_callback *cb)
{
        return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_stats_fill);
}

static int
page_pool_nl_fill(struct sk_buff *rsp, const struct page_pool *pool,
                  const struct genl_info *info)
{
        size_t inflight, refsz;
        unsigned int napi_id;
        void *hdr;

        hdr = genlmsg_iput(rsp, info);
        if (!hdr)
                return -EMSGSIZE;

        if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_ID, pool->user.id))
                goto err_cancel;

        if (pool->slow.netdev->ifindex != LOOPBACK_IFINDEX &&
            nla_put_u32(rsp, NETDEV_A_PAGE_POOL_IFINDEX,
                        pool->slow.netdev->ifindex))
                goto err_cancel;

        napi_id = pool->p.napi ? READ_ONCE(pool->p.napi->napi_id) : 0;
        if (napi_id_valid(napi_id) &&
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_NAPI_ID, napi_id))
                goto err_cancel;

        inflight = page_pool_inflight(pool, false);
        refsz = PAGE_SIZE << pool->p.order;
        if (nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT, inflight) ||
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_INFLIGHT_MEM,
                         inflight * refsz))
                goto err_cancel;
        if (pool->user.detach_time &&
            nla_put_uint(rsp, NETDEV_A_PAGE_POOL_DETACH_TIME,
                         ktime_divns(pool->user.detach_time, NSEC_PER_SEC)))
                goto err_cancel;

        if (pool->mp_ops && pool->mp_ops->nl_fill(pool->mp_priv, rsp, NULL))
                goto err_cancel;

        genlmsg_end(rsp, hdr);

        return 0;
err_cancel:
        genlmsg_cancel(rsp, hdr);
        return -EMSGSIZE;
}

static void netdev_nl_page_pool_event(const struct page_pool *pool, u32 cmd)
{
        struct genl_info info;
        struct sk_buff *ntf;
        struct net *net;

        lockdep_assert_held(&page_pools_lock);

        /* 'invisible' page pools don't matter */
        if (hlist_unhashed(&pool->user.list))
                return;
        net = dev_net(pool->slow.netdev);

        if (!genl_has_listeners(&netdev_nl_family, net, NETDEV_NLGRP_PAGE_POOL))
                return;

        genl_info_init_ntf(&info, &netdev_nl_family, cmd);

        ntf = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_KERNEL);
        if (!ntf)
                return;

        if (page_pool_nl_fill(ntf, pool, &info)) {
                nlmsg_free(ntf);
                return;
        }

        genlmsg_multicast_netns(&netdev_nl_family, net, ntf,
                                0, NETDEV_NLGRP_PAGE_POOL, GFP_KERNEL);
}

int netdev_nl_page_pool_get_doit(struct sk_buff *skb, struct genl_info *info)
{
        u32 id;

        if (GENL_REQ_ATTR_CHECK(info, NETDEV_A_PAGE_POOL_ID))
                return -EINVAL;

        id = nla_get_uint(info->attrs[NETDEV_A_PAGE_POOL_ID]);

        return netdev_nl_page_pool_get_do(info, id, page_pool_nl_fill);
}

int netdev_nl_page_pool_get_dumpit(struct sk_buff *skb,
                                   struct netlink_callback *cb)
{
        return netdev_nl_page_pool_get_dump(skb, cb, page_pool_nl_fill);
}

int page_pool_list(struct page_pool *pool)
{
        static u32 id_alloc_next;
        int err;

        mutex_lock(&page_pools_lock);
        err = xa_alloc_cyclic(&page_pools, &pool->user.id, pool, xa_limit_32b,
                              &id_alloc_next, GFP_KERNEL);
        if (err < 0)
                goto err_unlock;

        INIT_HLIST_NODE(&pool->user.list);
        if (pool->slow.netdev) {
                hlist_add_head(&pool->user.list,
                               &pool->slow.netdev->page_pools);
                netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_ADD_NTF);
        }

        mutex_unlock(&page_pools_lock);
        return 0;

err_unlock:
        mutex_unlock(&page_pools_lock);
        return err;
}

void page_pool_detached(struct page_pool *pool)
{
        mutex_lock(&page_pools_lock);
        pool->user.detach_time = ktime_get_boottime();
        netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
        mutex_unlock(&page_pools_lock);
}

void page_pool_unlist(struct page_pool *pool)
{
        mutex_lock(&page_pools_lock);
        netdev_nl_page_pool_event(pool, NETDEV_CMD_PAGE_POOL_DEL_NTF);
        xa_erase(&page_pools, pool->user.id);
        if (!hlist_unhashed(&pool->user.list))
                hlist_del(&pool->user.list);
        mutex_unlock(&page_pools_lock);
}

int page_pool_check_memory_provider(struct net_device *dev,
                                    struct netdev_rx_queue *rxq)
{
        void *binding = rxq->mp_params.mp_priv;
        struct page_pool *pool;
        struct hlist_node *n;

        if (!binding)
                return 0;

        mutex_lock(&page_pools_lock);
        hlist_for_each_entry_safe(pool, n, &dev->page_pools, user.list) {
                if (pool->mp_priv != binding)
                        continue;

                if (pool->slow.queue_idx == get_netdev_rx_queue_index(rxq)) {
                        mutex_unlock(&page_pools_lock);
                        return 0;
                }
        }
        mutex_unlock(&page_pools_lock);
        return -ENODATA;
}

static void page_pool_unreg_netdev_wipe(struct net_device *netdev)
{
        struct page_pool *pool;
        struct hlist_node *n;

        mutex_lock(&page_pools_lock);
        hlist_for_each_entry_safe(pool, n, &netdev->page_pools, user.list) {
                hlist_del_init(&pool->user.list);
                pool->slow.netdev = NET_PTR_POISON;
        }
        mutex_unlock(&page_pools_lock);
}

static void page_pool_unreg_netdev(struct net_device *netdev)
{
        struct page_pool *pool, *last;
        struct net_device *lo;

        lo = dev_net(netdev)->loopback_dev;

        mutex_lock(&page_pools_lock);
        last = NULL;
        hlist_for_each_entry(pool, &netdev->page_pools, user.list) {
                pool->slow.netdev = lo;
                netdev_nl_page_pool_event(pool,
                                          NETDEV_CMD_PAGE_POOL_CHANGE_NTF);
                last = pool;
        }
        if (last)
                hlist_splice_init(&netdev->page_pools, &last->user.list,
                                  &lo->page_pools);
        mutex_unlock(&page_pools_lock);
}

static int
page_pool_netdevice_event(struct notifier_block *nb,
                          unsigned long event, void *ptr)
{
        struct net_device *netdev = netdev_notifier_info_to_dev(ptr);

        if (event != NETDEV_UNREGISTER)
                return NOTIFY_DONE;

        if (hlist_empty(&netdev->page_pools))
                return NOTIFY_OK;

        if (netdev->ifindex != LOOPBACK_IFINDEX)
                page_pool_unreg_netdev(netdev);
        else
                page_pool_unreg_netdev_wipe(netdev);
        return NOTIFY_OK;
}

static struct notifier_block page_pool_netdevice_nb = {
        .notifier_call = page_pool_netdevice_event,
};

static int __init page_pool_user_init(void)
{
        return register_netdevice_notifier(&page_pool_netdevice_nb);
}

subsys_initcall(page_pool_user_init);