root/include/net/netfilter/nf_flow_table.h
#ifndef _NF_FLOW_TABLE_H
#define _NF_FLOW_TABLE_H

#include <linux/in.h>
#include <linux/in6.h>
#include <linux/netdevice.h>
#include <linux/rhashtable-types.h>
#include <linux/rcupdate.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_conntrack_tuple_common.h>
#include <net/flow_offload.h>
#include <net/dst.h>
#include <linux/if_pppox.h>
#include <linux/ppp_defs.h>

struct nf_flowtable;
struct nf_flow_rule;
struct flow_offload;
enum flow_offload_tuple_dir;

struct nf_flow_key {
        struct flow_dissector_key_meta                  meta;
        struct flow_dissector_key_control               control;
        struct flow_dissector_key_control               enc_control;
        struct flow_dissector_key_basic                 basic;
        struct flow_dissector_key_vlan                  vlan;
        struct flow_dissector_key_vlan                  cvlan;
        union {
                struct flow_dissector_key_ipv4_addrs    ipv4;
                struct flow_dissector_key_ipv6_addrs    ipv6;
        };
        struct flow_dissector_key_keyid                 enc_key_id;
        union {
                struct flow_dissector_key_ipv4_addrs    enc_ipv4;
                struct flow_dissector_key_ipv6_addrs    enc_ipv6;
        };
        struct flow_dissector_key_tcp                   tcp;
        struct flow_dissector_key_ports                 tp;
} __aligned(BITS_PER_LONG / 8); /* Ensure that we can do comparisons as longs. */

struct nf_flow_match {
        struct flow_dissector   dissector;
        struct nf_flow_key      key;
        struct nf_flow_key      mask;
};

struct nf_flow_rule {
        struct nf_flow_match    match;
        struct flow_rule        *rule;
};

struct nf_flowtable_type {
        struct list_head                list;
        int                             family;
        int                             (*init)(struct nf_flowtable *ft);
        bool                            (*gc)(const struct flow_offload *flow);
        int                             (*setup)(struct nf_flowtable *ft,
                                                 struct net_device *dev,
                                                 enum flow_block_command cmd);
        int                             (*action)(struct net *net,
                                                  struct flow_offload *flow,
                                                  enum flow_offload_tuple_dir dir,
                                                  struct nf_flow_rule *flow_rule);
        void                            (*free)(struct nf_flowtable *ft);
        void                            (*get)(struct nf_flowtable *ft);
        void                            (*put)(struct nf_flowtable *ft);
        nf_hookfn                       *hook;
        struct module                   *owner;
};

enum nf_flowtable_flags {
        NF_FLOWTABLE_HW_OFFLOAD         = 0x1,  /* NFT_FLOWTABLE_HW_OFFLOAD */
        NF_FLOWTABLE_COUNTER            = 0x2,  /* NFT_FLOWTABLE_COUNTER */
};

struct nf_flowtable {
        unsigned int                    flags;          /* readonly in datapath */
        int                             priority;       /* control path (padding hole) */
        struct rhashtable               rhashtable;     /* datapath, read-mostly members come first */

        struct list_head                list;           /* slowpath parts */
        const struct nf_flowtable_type  *type;
        struct delayed_work             gc_work;
        struct flow_block               flow_block;
        struct rw_semaphore             flow_block_lock; /* Guards flow_block */
        possible_net_t                  net;
};

static inline bool nf_flowtable_hw_offload(struct nf_flowtable *flowtable)
{
        return flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD;
}

enum flow_offload_tuple_dir {
        FLOW_OFFLOAD_DIR_ORIGINAL = IP_CT_DIR_ORIGINAL,
        FLOW_OFFLOAD_DIR_REPLY = IP_CT_DIR_REPLY,
};
#define FLOW_OFFLOAD_DIR_MAX    IP_CT_DIR_MAX

enum flow_offload_xmit_type {
        FLOW_OFFLOAD_XMIT_UNSPEC        = 0,
        FLOW_OFFLOAD_XMIT_NEIGH,
        FLOW_OFFLOAD_XMIT_XFRM,
        FLOW_OFFLOAD_XMIT_DIRECT,
        FLOW_OFFLOAD_XMIT_TC,
};

#define NF_FLOW_TABLE_ENCAP_MAX         2

struct flow_offload_tunnel {
        union {
                struct in_addr  src_v4;
                struct in6_addr src_v6;
        };
        union {
                struct in_addr  dst_v4;
                struct in6_addr dst_v6;
        };

        u8      l3_proto;
};

struct flow_offload_tuple {
        union {
                struct in_addr          src_v4;
                struct in6_addr         src_v6;
        };
        union {
                struct in_addr          dst_v4;
                struct in6_addr         dst_v6;
        };
        struct {
                __be16                  src_port;
                __be16                  dst_port;
        };

        int                             iifidx;

        u8                              l3proto;
        u8                              l4proto;
        struct {
                u16                     id;
                __be16                  proto;
        } encap[NF_FLOW_TABLE_ENCAP_MAX];

        struct flow_offload_tunnel      tun;

        /* All members above are keys for lookups, see flow_offload_hash(). */
        struct { }                      __hash;

        u8                              dir:2,
                                        xmit_type:3,
                                        encap_num:2,
                                        tun_num:2,
                                        in_vlan_ingress:2;
        u16                             mtu;
        union {
                struct {
                        struct dst_entry *dst_cache;
                        u32             ifidx;
                        u32             dst_cookie;
                };
                struct {
                        u32             ifidx;
                        u8              h_source[ETH_ALEN];
                        u8              h_dest[ETH_ALEN];
                } out;
                struct {
                        u32             iifidx;
                } tc;
        };
};

struct flow_offload_tuple_rhash {
        struct rhash_head               node;
        struct flow_offload_tuple       tuple;
};

enum nf_flow_flags {
        NF_FLOW_SNAT,
        NF_FLOW_DNAT,
        NF_FLOW_CLOSING,
        NF_FLOW_TEARDOWN,
        NF_FLOW_HW,
        NF_FLOW_HW_DYING,
        NF_FLOW_HW_DEAD,
        NF_FLOW_HW_PENDING,
        NF_FLOW_HW_BIDIRECTIONAL,
        NF_FLOW_HW_ESTABLISHED,
};

enum flow_offload_type {
        NF_FLOW_OFFLOAD_UNSPEC  = 0,
        NF_FLOW_OFFLOAD_ROUTE,
};

struct flow_offload {
        struct flow_offload_tuple_rhash         tuplehash[FLOW_OFFLOAD_DIR_MAX];
        struct nf_conn                          *ct;
        unsigned long                           flags;
        u16                                     type;
        u32                                     timeout;
        struct rcu_head                         rcu_head;
};

#define NF_FLOW_TIMEOUT (30 * HZ)
#define nf_flowtable_time_stamp (u32)jiffies

unsigned long flow_offload_get_timeout(struct flow_offload *flow);

static inline __s32 nf_flow_timeout_delta(unsigned int timeout)
{
        return (__s32)(timeout - nf_flowtable_time_stamp);
}

struct nf_flow_route {
        struct {
                struct dst_entry                *dst;
                struct {
                        u32                     ifindex;
                        struct {
                                u16             id;
                                __be16          proto;
                        } encap[NF_FLOW_TABLE_ENCAP_MAX];
                        struct flow_offload_tunnel tun;
                        u8                      num_encaps:2,
                                                num_tuns:2,
                                                ingress_vlans:2;
                } in;
                struct {
                        u32                     ifindex;
                        u32                     hw_ifindex;
                        u8                      h_source[ETH_ALEN];
                        u8                      h_dest[ETH_ALEN];
                } out;
                enum flow_offload_xmit_type     xmit_type;
        } tuple[FLOW_OFFLOAD_DIR_MAX];
};

struct flow_offload *flow_offload_alloc(struct nf_conn *ct);
void flow_offload_free(struct flow_offload *flow);

struct nft_flowtable;
struct nft_pktinfo;
int nft_flow_route(const struct nft_pktinfo *pkt, const struct nf_conn *ct,
                   struct nf_flow_route *route, enum ip_conntrack_dir dir,
                   struct nft_flowtable *ft);

static inline int
nf_flow_table_offload_add_cb(struct nf_flowtable *flow_table,
                             flow_setup_cb_t *cb, void *cb_priv)
{
        struct flow_block *block = &flow_table->flow_block;
        struct flow_block_cb *block_cb;
        int err = 0;

        down_write(&flow_table->flow_block_lock);
        block_cb = flow_block_cb_lookup(block, cb, cb_priv);
        if (block_cb) {
                err = -EEXIST;
                goto unlock;
        }

        block_cb = flow_block_cb_alloc(cb, cb_priv, cb_priv, NULL);
        if (IS_ERR(block_cb)) {
                err = PTR_ERR(block_cb);
                goto unlock;
        }

        list_add_tail(&block_cb->list, &block->cb_list);
        up_write(&flow_table->flow_block_lock);

        if (flow_table->type->get)
                flow_table->type->get(flow_table);
        return 0;

unlock:
        up_write(&flow_table->flow_block_lock);
        return err;
}

static inline void
nf_flow_table_offload_del_cb(struct nf_flowtable *flow_table,
                             flow_setup_cb_t *cb, void *cb_priv)
{
        struct flow_block *block = &flow_table->flow_block;
        struct flow_block_cb *block_cb;

        down_write(&flow_table->flow_block_lock);
        block_cb = flow_block_cb_lookup(block, cb, cb_priv);
        if (block_cb) {
                list_del(&block_cb->list);
                flow_block_cb_free(block_cb);
        } else {
                WARN_ON(true);
        }
        up_write(&flow_table->flow_block_lock);

        if (flow_table->type->put)
                flow_table->type->put(flow_table);
}

void flow_offload_route_init(struct flow_offload *flow,
                             struct nf_flow_route *route);

int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow);
void flow_offload_refresh(struct nf_flowtable *flow_table,
                          struct flow_offload *flow, bool force);

struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
                                                     struct flow_offload_tuple *tuple);
void nf_flow_table_gc_run(struct nf_flowtable *flow_table);
void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
                              struct net_device *dev);
void nf_flow_table_cleanup(struct net_device *dev);

int nf_flow_table_init(struct nf_flowtable *flow_table);
void nf_flow_table_free(struct nf_flowtable *flow_table);

void flow_offload_teardown(struct flow_offload *flow);

void nf_flow_snat_port(const struct flow_offload *flow,
                       struct sk_buff *skb, unsigned int thoff,
                       u8 protocol, enum flow_offload_tuple_dir dir);
void nf_flow_dnat_port(const struct flow_offload *flow,
                       struct sk_buff *skb, unsigned int thoff,
                       u8 protocol, enum flow_offload_tuple_dir dir);

struct flow_ports {
        __be16 source, dest;
};

struct nf_flowtable *nf_flowtable_by_dev(const struct net_device *dev);
int nf_flow_offload_xdp_setup(struct nf_flowtable *flowtable,
                              struct net_device *dev,
                              enum flow_block_command cmd);

unsigned int nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
                                     const struct nf_hook_state *state);
unsigned int nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
                                       const struct nf_hook_state *state);

#if (IS_BUILTIN(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) || \
    (IS_MODULE(CONFIG_NF_FLOW_TABLE) && IS_ENABLED(CONFIG_DEBUG_INFO_BTF_MODULES))
extern int nf_flow_register_bpf(void);
#else
static inline int nf_flow_register_bpf(void)
{
        return 0;
}
#endif

#define MODULE_ALIAS_NF_FLOWTABLE(family)       \
        MODULE_ALIAS("nf-flowtable-" __stringify(family))

void nf_flow_offload_add(struct nf_flowtable *flowtable,
                         struct flow_offload *flow);
void nf_flow_offload_del(struct nf_flowtable *flowtable,
                         struct flow_offload *flow);
void nf_flow_offload_stats(struct nf_flowtable *flowtable,
                           struct flow_offload *flow);

void nf_flow_table_offload_flush(struct nf_flowtable *flowtable);
void nf_flow_table_offload_flush_cleanup(struct nf_flowtable *flowtable);

int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
                                struct net_device *dev,
                                enum flow_block_command cmd);
int nf_flow_rule_route_ipv4(struct net *net, struct flow_offload *flow,
                            enum flow_offload_tuple_dir dir,
                            struct nf_flow_rule *flow_rule);
int nf_flow_rule_route_ipv6(struct net *net, struct flow_offload *flow,
                            enum flow_offload_tuple_dir dir,
                            struct nf_flow_rule *flow_rule);

int nf_flow_table_offload_init(void);
void nf_flow_table_offload_exit(void);

static inline __be16 __nf_flow_pppoe_proto(const struct sk_buff *skb)
{
        __be16 proto;

        proto = *((__be16 *)(skb_mac_header(skb) + ETH_HLEN +
                             sizeof(struct pppoe_hdr)));
        switch (proto) {
        case htons(PPP_IP):
                return htons(ETH_P_IP);
        case htons(PPP_IPV6):
                return htons(ETH_P_IPV6);
        }

        return 0;
}

static inline bool nf_flow_pppoe_proto(struct sk_buff *skb, __be16 *inner_proto)
{
        if (!pskb_may_pull(skb, ETH_HLEN + PPPOE_SES_HLEN))
                return false;

        *inner_proto = __nf_flow_pppoe_proto(skb);

        return true;
}

#define NF_FLOW_TABLE_STAT_INC(net, count) __this_cpu_inc((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_DEC(net, count) __this_cpu_dec((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_INC_ATOMIC(net, count)       \
        this_cpu_inc((net)->ft.stat->count)
#define NF_FLOW_TABLE_STAT_DEC_ATOMIC(net, count)       \
        this_cpu_dec((net)->ft.stat->count)

#ifdef CONFIG_NF_FLOW_TABLE_PROCFS
int nf_flow_table_init_proc(struct net *net);
void nf_flow_table_fini_proc(struct net *net);
#else
static inline int nf_flow_table_init_proc(struct net *net)
{
        return 0;
}

static inline void nf_flow_table_fini_proc(struct net *net)
{
}
#endif /* CONFIG_NF_FLOW_TABLE_PROCFS */

#endif /* _NF_FLOW_TABLE_H */