root/include/net/libeth/rx.h
/* SPDX-License-Identifier: GPL-2.0-only */
/* Copyright (C) 2024-2025 Intel Corporation */

#ifndef __LIBETH_RX_H
#define __LIBETH_RX_H

#include <linux/if_vlan.h>

#include <net/page_pool/helpers.h>
#include <net/xdp.h>

/* Rx buffer management */

/* Space reserved in front of each frame */
#define LIBETH_SKB_HEADROOM     (NET_SKB_PAD + NET_IP_ALIGN)
#define LIBETH_XDP_HEADROOM     (ALIGN(XDP_PACKET_HEADROOM, NET_SKB_PAD) + \
                                 NET_IP_ALIGN)
/* Maximum headroom for worst-case calculations */
#define LIBETH_MAX_HEADROOM     LIBETH_XDP_HEADROOM
/* Link layer / L2 overhead: Ethernet, 2 VLAN tags (C + S), FCS */
#define LIBETH_RX_LL_LEN        (ETH_HLEN + 2 * VLAN_HLEN + ETH_FCS_LEN)
/* Maximum supported L2-L4 header length */
#define LIBETH_MAX_HEAD         roundup_pow_of_two(max(MAX_HEADER, 256))

/* Always use order-0 pages */
#define LIBETH_RX_PAGE_ORDER    0
/* Pick a sane buffer stride and align to a cacheline boundary */
#define LIBETH_RX_BUF_STRIDE    SKB_DATA_ALIGN(128)
/* HW-writeable space in one buffer: truesize - headroom/tailroom, aligned */
#define LIBETH_RX_PAGE_LEN(hr)                                            \
        ALIGN_DOWN(SKB_MAX_ORDER(hr, LIBETH_RX_PAGE_ORDER),               \
                   LIBETH_RX_BUF_STRIDE)

/**
 * struct libeth_fqe - structure representing an Rx buffer (fill queue element)
 * @netmem: network memory reference holding the buffer
 * @offset: offset from the page start (to the headroom)
 * @truesize: total space occupied by the buffer (w/ headroom and tailroom)
 *
 * Depending on the MTU, API switches between one-page-per-frame and shared
 * page model (to conserve memory on bigger-page platforms). In case of the
 * former, @offset is always 0 and @truesize is always ```PAGE_SIZE```.
 */
struct libeth_fqe {
        netmem_ref              netmem;
        u32                     offset;
        u32                     truesize;
} __aligned_largest;

/**
 * enum libeth_fqe_type - enum representing types of Rx buffers
 * @LIBETH_FQE_MTU: buffer size is determined by MTU
 * @LIBETH_FQE_SHORT: buffer size is smaller than MTU, for short frames
 * @LIBETH_FQE_HDR: buffer size is ```LIBETH_MAX_HEAD```-sized, for headers
 */
enum libeth_fqe_type {
        LIBETH_FQE_MTU          = 0U,
        LIBETH_FQE_SHORT,
        LIBETH_FQE_HDR,
};

/**
 * struct libeth_fq - structure representing a buffer (fill) queue
 * @fp: hotpath part of the structure
 * @pp: &page_pool for buffer management
 * @fqes: array of Rx buffers
 * @truesize: size to allocate per buffer, w/overhead
 * @count: number of descriptors/buffers the queue has
 * @type: type of the buffers this queue has
 * @hsplit: flag whether header split is enabled
 * @xdp: flag indicating whether XDP is enabled
 * @buf_len: HW-writeable length per each buffer
 * @nid: ID of the closest NUMA node with memory
 */
struct libeth_fq {
        struct_group_tagged(libeth_fq_fp, fp,
                struct page_pool        *pp;
                struct libeth_fqe       *fqes;

                u32                     truesize;
                u32                     count;
        );

        /* Cold fields */
        enum libeth_fqe_type    type:2;
        bool                    hsplit:1;
        bool                    xdp:1;

        u32                     buf_len;
        int                     nid;
};

int libeth_rx_fq_create(struct libeth_fq *fq, struct napi_struct *napi);
void libeth_rx_fq_destroy(struct libeth_fq *fq);

/**
 * libeth_rx_alloc - allocate a new Rx buffer
 * @fq: fill queue to allocate for
 * @i: index of the buffer within the queue
 *
 * Return: DMA address to be passed to HW for Rx on successful allocation,
 * ```DMA_MAPPING_ERROR``` otherwise.
 */
static inline dma_addr_t libeth_rx_alloc(const struct libeth_fq_fp *fq, u32 i)
{
        struct libeth_fqe *buf = &fq->fqes[i];

        buf->truesize = fq->truesize;
        buf->netmem = page_pool_dev_alloc_netmem(fq->pp, &buf->offset,
                                                 &buf->truesize);
        if (unlikely(!buf->netmem))
                return DMA_MAPPING_ERROR;

        return page_pool_get_dma_addr_netmem(buf->netmem) + buf->offset +
               fq->pp->p.offset;
}

void libeth_rx_recycle_slow(netmem_ref netmem);

/**
 * libeth_rx_sync_for_cpu - synchronize or recycle buffer post DMA
 * @fqe: buffer to process
 * @len: frame length from the descriptor
 *
 * Process the buffer after it's written by HW. The regular path is to
 * synchronize DMA for CPU, but in case of no data it will be immediately
 * recycled back to its PP.
 *
 * Return: true when there's data to process, false otherwise.
 */
static inline bool libeth_rx_sync_for_cpu(const struct libeth_fqe *fqe,
                                          u32 len)
{
        netmem_ref netmem = fqe->netmem;

        /* Very rare, but possible case. The most common reason:
         * the last fragment contained FCS only, which was then
         * stripped by the HW.
         */
        if (unlikely(!len)) {
                libeth_rx_recycle_slow(netmem);
                return false;
        }

        page_pool_dma_sync_netmem_for_cpu(netmem_get_pp(netmem), netmem,
                                          fqe->offset, len);

        return true;
}

/* Converting abstract packet type numbers into a software structure with
 * the packet parameters to do O(1) lookup on Rx.
 */

enum {
        LIBETH_RX_PT_OUTER_L2                   = 0U,
        LIBETH_RX_PT_OUTER_IPV4,
        LIBETH_RX_PT_OUTER_IPV6,
};

enum {
        LIBETH_RX_PT_NOT_FRAG                   = 0U,
        LIBETH_RX_PT_FRAG,
};

enum {
        LIBETH_RX_PT_TUNNEL_IP_NONE             = 0U,
        LIBETH_RX_PT_TUNNEL_IP_IP,
        LIBETH_RX_PT_TUNNEL_IP_GRENAT,
        LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC,
        LIBETH_RX_PT_TUNNEL_IP_GRENAT_MAC_VLAN,
};

enum {
        LIBETH_RX_PT_TUNNEL_END_NONE            = 0U,
        LIBETH_RX_PT_TUNNEL_END_IPV4,
        LIBETH_RX_PT_TUNNEL_END_IPV6,
};

enum {
        LIBETH_RX_PT_INNER_NONE                 = 0U,
        LIBETH_RX_PT_INNER_UDP,
        LIBETH_RX_PT_INNER_TCP,
        LIBETH_RX_PT_INNER_SCTP,
        LIBETH_RX_PT_INNER_ICMP,
        LIBETH_RX_PT_INNER_TIMESYNC,
};

#define LIBETH_RX_PT_PAYLOAD_NONE               PKT_HASH_TYPE_NONE
#define LIBETH_RX_PT_PAYLOAD_L2                 PKT_HASH_TYPE_L2
#define LIBETH_RX_PT_PAYLOAD_L3                 PKT_HASH_TYPE_L3
#define LIBETH_RX_PT_PAYLOAD_L4                 PKT_HASH_TYPE_L4

struct libeth_rx_pt {
        u32                                     outer_ip:2;
        u32                                     outer_frag:1;
        u32                                     tunnel_type:3;
        u32                                     tunnel_end_prot:2;
        u32                                     tunnel_end_frag:1;
        u32                                     inner_prot:3;
        enum pkt_hash_types                     payload_layer:2;

        u32                                     pad:2;
        enum xdp_rss_hash_type                  hash_type:16;
};

/**
 * struct libeth_rx_csum - checksum offload bits decoded from the Rx descriptor
 * @l3l4p: detectable L3 and L4 integrity check is processed by the hardware
 * @ipe: IP checksum error
 * @eipe: external (outermost) IP header (only for tunels)
 * @eudpe: external (outermost) UDP checksum error (only for tunels)
 * @ipv6exadd: IPv6 header with extension headers
 * @l4e: L4 integrity error
 * @pprs: set for packets that skip checksum calculation in the HW pre parser
 * @nat: the packet is a UDP tunneled packet
 * @raw_csum_valid: set if raw checksum is valid
 * @pad: padding to naturally align raw_csum field
 * @raw_csum: raw checksum
 */
struct libeth_rx_csum {
        u32                                     l3l4p:1;
        u32                                     ipe:1;
        u32                                     eipe:1;
        u32                                     eudpe:1;
        u32                                     ipv6exadd:1;
        u32                                     l4e:1;
        u32                                     pprs:1;
        u32                                     nat:1;

        u32                                     raw_csum_valid:1;
        u32                                     pad:7;
        u32                                     raw_csum:16;
};

/**
 * struct libeth_rqe_info - receive queue element info
 * @len: packet length
 * @ptype: packet type based on types programmed into the device
 * @eop: whether it's the last fragment of the packet
 * @rxe: MAC errors: CRC, Alignment, Oversize, Undersizes, Length error
 * @vlan: C-VLAN or S-VLAN tag depending on the VLAN offload configuration
 */
struct libeth_rqe_info {
        u32                                     len;

        u32                                     ptype:14;
        u32                                     eop:1;
        u32                                     rxe:1;

        u32                                     vlan:16;
};

void libeth_rx_pt_gen_hash_type(struct libeth_rx_pt *pt);

/**
 * libeth_rx_pt_get_ip_ver - get IP version from a packet type structure
 * @pt: packet type params
 *
 * Wrapper to compile out the IPv6 code from the drivers when not supported
 * by the kernel.
 *
 * Return: @pt.outer_ip or stub for IPv6 when not compiled-in.
 */
static inline u32 libeth_rx_pt_get_ip_ver(struct libeth_rx_pt pt)
{
#if !IS_ENABLED(CONFIG_IPV6)
        switch (pt.outer_ip) {
        case LIBETH_RX_PT_OUTER_IPV4:
                return LIBETH_RX_PT_OUTER_IPV4;
        default:
                return LIBETH_RX_PT_OUTER_L2;
        }
#else
        return pt.outer_ip;
#endif
}

/* libeth_has_*() can be used to quickly check whether the HW metadata is
 * available to avoid further expensive processing such as descriptor reads.
 * They already check for the corresponding netdev feature to be enabled,
 * thus can be used as drop-in replacements.
 */

static inline bool libeth_rx_pt_has_checksum(const struct net_device *dev,
                                             struct libeth_rx_pt pt)
{
        /* Non-zero _INNER* is only possible when _OUTER_IPV* is set,
         * it is enough to check only for the L4 type.
         */
        return likely(pt.inner_prot > LIBETH_RX_PT_INNER_NONE &&
                      (dev->features & NETIF_F_RXCSUM));
}

static inline bool libeth_rx_pt_has_hash(const struct net_device *dev,
                                         struct libeth_rx_pt pt)
{
        return likely(pt.payload_layer > LIBETH_RX_PT_PAYLOAD_NONE &&
                      (dev->features & NETIF_F_RXHASH));
}

/**
 * libeth_rx_pt_set_hash - fill in skb hash value basing on the PT
 * @skb: skb to fill the hash in
 * @hash: 32-bit hash value from the descriptor
 * @pt: packet type
 */
static inline void libeth_rx_pt_set_hash(struct sk_buff *skb, u32 hash,
                                         struct libeth_rx_pt pt)
{
        skb_set_hash(skb, hash, pt.payload_layer);
}

#endif /* __LIBETH_RX_H */