#include <linux/kernel.h>
#include <linux/init.h>
#include <linux/module.h>
#include <linux/list.h>
#include <linux/rbtree.h>
#include <linux/bsearch.h>
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
#include <net/netfilter/nf_tables_core.h>
struct nft_array_interval {
struct nft_set_ext *from;
struct nft_set_ext *to;
};
struct nft_array {
u32 max_intervals;
u32 num_intervals;
struct nft_array_interval *intervals;
struct rcu_head rcu_head;
};
struct nft_rbtree {
struct rb_root root;
rwlock_t lock;
struct nft_array __rcu *array;
struct nft_array *array_next;
unsigned long start_rbe_cookie;
unsigned long last_gc;
struct list_head expired;
u64 last_tstamp;
};
struct nft_rbtree_elem {
struct nft_elem_priv priv;
union {
struct rb_node node;
struct list_head list;
};
struct nft_set_ext ext;
};
static bool nft_rbtree_interval_end(const struct nft_rbtree_elem *rbe)
{
return nft_set_ext_exists(&rbe->ext, NFT_SET_EXT_FLAGS) &&
(*nft_set_ext_flags(&rbe->ext) & NFT_SET_ELEM_INTERVAL_END);
}
static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe)
{
return !nft_rbtree_interval_end(rbe);
}
static bool nft_rbtree_interval_null(const struct nft_set *set,
const struct nft_rbtree_elem *rbe)
{
return (!memchr_inv(nft_set_ext_key(&rbe->ext), 0, set->klen) &&
nft_rbtree_interval_end(rbe));
}
static int nft_rbtree_cmp(const struct nft_set *set,
const struct nft_rbtree_elem *e1,
const struct nft_rbtree_elem *e2)
{
return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext),
set->klen);
}
struct nft_array_lookup_ctx {
const u32 *key;
u32 klen;
};
static int nft_array_lookup_cmp(const void *pkey, const void *entry)
{
const struct nft_array_interval *interval = entry;
const struct nft_array_lookup_ctx *ctx = pkey;
int a, b;
if (!interval->from)
return 1;
a = memcmp(ctx->key, nft_set_ext_key(interval->from), ctx->klen);
if (!interval->to)
b = -1;
else
b = memcmp(ctx->key, nft_set_ext_key(interval->to), ctx->klen);
if (a >= 0 && b < 0)
return 0;
if (a < 0)
return -1;
return 1;
}
INDIRECT_CALLABLE_SCOPE
const struct nft_set_ext *
nft_rbtree_lookup(const struct net *net, const struct nft_set *set,
const u32 *key)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_array *array = rcu_dereference(priv->array);
const struct nft_array_interval *interval;
struct nft_array_lookup_ctx ctx = {
.key = key,
.klen = set->klen,
};
if (!array)
return NULL;
interval = bsearch(&ctx, array->intervals, array->num_intervals,
sizeof(struct nft_array_interval),
nft_array_lookup_cmp);
if (!interval || nft_set_elem_expired(interval->from))
return NULL;
return interval->from;
}
struct nft_array_get_ctx {
const u32 *key;
unsigned int flags;
u32 klen;
};
static int nft_array_get_cmp(const void *pkey, const void *entry)
{
const struct nft_array_interval *interval = entry;
const struct nft_array_get_ctx *ctx = pkey;
int a, b;
if (!interval->from)
return 1;
a = memcmp(ctx->key, nft_set_ext_key(interval->from), ctx->klen);
if (!interval->to)
b = -1;
else
b = memcmp(ctx->key, nft_set_ext_key(interval->to), ctx->klen);
if (a >= 0) {
if (ctx->flags & NFT_SET_ELEM_INTERVAL_END && b <= 0)
return 0;
else if (b < 0)
return 0;
}
if (a < 0)
return -1;
return 1;
}
static struct nft_elem_priv *
nft_rbtree_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_array *array = rcu_dereference(priv->array);
const struct nft_array_interval *interval;
struct nft_array_get_ctx ctx = {
.key = (const u32 *)&elem->key.val,
.flags = flags,
.klen = set->klen,
};
struct nft_rbtree_elem *rbe;
if (!array)
return ERR_PTR(-ENOENT);
interval = bsearch(&ctx, array->intervals, array->num_intervals,
sizeof(struct nft_array_interval), nft_array_get_cmp);
if (!interval || nft_set_elem_expired(interval->from))
return ERR_PTR(-ENOENT);
if (flags & NFT_SET_ELEM_INTERVAL_END)
rbe = container_of(interval->to, struct nft_rbtree_elem, ext);
else
rbe = container_of(interval->from, struct nft_rbtree_elem, ext);
return &rbe->priv;
}
static void nft_rbtree_gc_elem_move(struct net *net, struct nft_set *set,
struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe)
{
lockdep_assert_held_write(&priv->lock);
nft_setelem_data_deactivate(net, set, &rbe->priv);
rb_erase(&rbe->node, &priv->root);
list_add(&rbe->list, &priv->expired);
}
static const struct nft_rbtree_elem *
nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
struct net *net = read_pnet(&set->net);
struct nft_rbtree_elem *rbe_prev;
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_end(rbe_prev) &&
nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
break;
prev = rb_prev(prev);
}
rbe_prev = NULL;
if (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
nft_rbtree_gc_elem_move(net, set, priv, rbe_prev);
}
nft_rbtree_gc_elem_move(net, set, priv, rbe);
return rbe_prev;
}
static bool nft_rbtree_update_first(const struct nft_set *set,
struct nft_rbtree_elem *rbe,
struct rb_node *first)
{
struct nft_rbtree_elem *first_elem;
first_elem = rb_entry(first, struct nft_rbtree_elem, node);
if (nft_rbtree_cmp(set, rbe, first_elem) < 0)
return true;
return false;
}
static struct nft_rbtree_elem *nft_rbtree_prev_active(struct nft_rbtree_elem *rbe)
{
struct rb_node *node;
node = rb_prev(&rbe->node);
if (!node)
return NULL;
return rb_entry(node, struct nft_rbtree_elem, node);
}
static struct nft_rbtree_elem *
__nft_rbtree_next_active(struct rb_node *node, u8 genmask)
{
struct nft_rbtree_elem *next_rbe;
while (node) {
next_rbe = rb_entry(node, struct nft_rbtree_elem, node);
if (!nft_set_elem_active(&next_rbe->ext, genmask)) {
node = rb_next(node);
continue;
}
return next_rbe;
}
return NULL;
}
static struct nft_rbtree_elem *
nft_rbtree_next_active(struct nft_rbtree_elem *rbe, u8 genmask)
{
return __nft_rbtree_next_active(rb_next(&rbe->node), genmask);
}
static void nft_rbtree_maybe_reset_start_cookie(struct nft_rbtree *priv,
u64 tstamp)
{
if (priv->last_tstamp != tstamp) {
priv->start_rbe_cookie = 0;
priv->last_tstamp = tstamp;
}
}
static void nft_rbtree_set_start_cookie(struct nft_rbtree *priv,
const struct nft_rbtree_elem *rbe)
{
priv->start_rbe_cookie = (unsigned long)rbe;
}
static bool nft_rbtree_cmp_start_cookie(struct nft_rbtree *priv,
const struct nft_rbtree_elem *rbe)
{
return priv->start_rbe_cookie == (unsigned long)rbe;
}
static bool nft_rbtree_insert_same_interval(const struct net *net,
struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe)
{
u8 genmask = nft_genmask_next(net);
struct nft_rbtree_elem *next_rbe;
if (!priv->start_rbe_cookie)
return true;
next_rbe = nft_rbtree_next_active(rbe, genmask);
if (next_rbe) {
if (nft_rbtree_interval_start(next_rbe) &&
nft_rbtree_cmp_start_cookie(priv, next_rbe)) {
priv->start_rbe_cookie = 0;
return true;
}
}
priv->start_rbe_cookie = 0;
return false;
}
static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree_elem *new,
struct nft_elem_priv **elem_priv, u64 tstamp)
{
struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL, *rbe_prev;
struct rb_node *node, *next, *parent, **p, *first = NULL;
struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
int d;
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
parent = *p;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
d = nft_rbtree_cmp(set, rbe, new);
if (d < 0) {
p = &parent->rb_left;
} else if (d > 0) {
if (!first ||
nft_rbtree_update_first(set, rbe, first))
first = &rbe->node;
p = &parent->rb_right;
} else {
if (nft_rbtree_interval_end(rbe))
p = &parent->rb_left;
else
p = &parent->rb_right;
}
}
if (!first)
first = rb_first(&priv->root);
for (node = first; node != NULL; node = next) {
next = rb_next(node);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
if (!nft_set_elem_active(&rbe->ext, genmask))
continue;
if (__nft_set_elem_expired(&rbe->ext, tstamp) &&
nft_set_elem_active(&rbe->ext, cur_genmask)) {
const struct nft_rbtree_elem *removed_end;
removed_end = nft_rbtree_gc_elem(set, priv, rbe);
if (IS_ERR(removed_end))
return PTR_ERR(removed_end);
if (removed_end == rbe_le || removed_end == rbe_ge)
return -EAGAIN;
continue;
}
d = nft_rbtree_cmp(set, rbe, new);
if (d == 0) {
if (nft_rbtree_interval_end(rbe)) {
rbe_le = rbe;
break;
}
if (!rbe_ge) {
rbe_ge = rbe;
continue;
}
if (nft_rbtree_cmp(set, rbe_ge, new) != 0) {
rbe_ge = rbe;
continue;
}
if ((nft_rbtree_interval_start(new) &&
nft_rbtree_interval_start(rbe_ge)) ||
(nft_rbtree_interval_end(new) &&
nft_rbtree_interval_end(rbe_ge))) {
rbe_ge = rbe;
continue;
}
} else if (d > 0) {
rbe_ge = rbe;
continue;
} else if (d < 0) {
rbe_le = rbe;
break;
}
}
if (nft_rbtree_interval_null(set, new))
priv->start_rbe_cookie = 0;
else if (nft_rbtree_interval_start(new) && priv->start_rbe_cookie)
priv->start_rbe_cookie = 0;
if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) &&
nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) {
*elem_priv = &rbe_ge->priv;
nft_rbtree_set_start_cookie(priv, rbe_ge);
return -EEXIST;
}
if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) &&
nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) {
if (nft_rbtree_interval_null(set, new))
return -ECANCELED;
*elem_priv = &rbe_le->priv;
if (!nft_rbtree_insert_same_interval(net, priv, rbe_le))
return -ENOTEMPTY;
return -EEXIST;
}
if (rbe_le &&
nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) {
if (!nft_set_is_anonymous(set))
return -ENOTEMPTY;
rbe_prev = nft_rbtree_prev_active(rbe_le);
if (rbe_prev && nft_rbtree_interval_end(rbe_prev))
return -ENOTEMPTY;
}
if (rbe_le &&
nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new))
return -ENOTEMPTY;
if (rbe_ge &&
nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new))
return -ENOTEMPTY;
parent = NULL;
p = &priv->root.rb_node;
while (*p != NULL) {
parent = *p;
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
d = nft_rbtree_cmp(set, rbe, new);
if (d < 0)
p = &parent->rb_left;
else if (d > 0)
p = &parent->rb_right;
else if (nft_rbtree_interval_end(rbe))
p = &parent->rb_left;
else
p = &parent->rb_right;
}
rb_link_node_rcu(&new->node, parent, p);
rb_insert_color(&new->node, &priv->root);
return 0;
}
static int nft_array_intervals_alloc(struct nft_array *array, u32 max_intervals)
{
struct nft_array_interval *intervals;
intervals = kvzalloc_objs(struct nft_array_interval, max_intervals,
GFP_KERNEL_ACCOUNT);
if (!intervals)
return -ENOMEM;
if (array->intervals)
kvfree(array->intervals);
array->intervals = intervals;
array->max_intervals = max_intervals;
return 0;
}
static struct nft_array *nft_array_alloc(u32 max_intervals)
{
struct nft_array *array;
array = kzalloc_obj(*array, GFP_KERNEL_ACCOUNT);
if (!array)
return NULL;
if (nft_array_intervals_alloc(array, max_intervals) < 0) {
kfree(array);
return NULL;
}
return array;
}
static u32 nft_array_elems(const struct nft_set *set)
{
u32 nelems = atomic_read(&set->nelems) - set->ndeact;
if (nft_set_is_anonymous(set))
return nelems;
return (nelems / 2) + 2;
}
#define NFT_ARRAY_INITIAL_SIZE 1024
#define NFT_ARRAY_INITIAL_ANON_SIZE 16
#define NFT_ARRAY_INITIAL_ANON_THRESH (8192U / sizeof(struct nft_array_interval))
static int nft_array_may_resize(const struct nft_set *set, bool flush)
{
u32 initial_intervals, max_intervals, new_max_intervals, delta;
u32 shrinked_max_intervals, nelems = nft_array_elems(set);
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_array *array;
if (nft_set_is_anonymous(set))
initial_intervals = NFT_ARRAY_INITIAL_ANON_SIZE;
else
initial_intervals = NFT_ARRAY_INITIAL_SIZE;
if (priv->array_next) {
max_intervals = priv->array_next->max_intervals;
new_max_intervals = priv->array_next->max_intervals;
} else {
if (priv->array) {
max_intervals = priv->array->max_intervals;
new_max_intervals = priv->array->max_intervals;
} else {
max_intervals = 0;
new_max_intervals = initial_intervals;
}
}
if (nft_set_is_anonymous(set))
goto maybe_grow;
if (flush) {
nelems = 0;
new_max_intervals = NFT_ARRAY_INITIAL_SIZE;
goto realloc_array;
}
if (check_add_overflow(new_max_intervals, new_max_intervals,
&shrinked_max_intervals))
return -EOVERFLOW;
shrinked_max_intervals = DIV_ROUND_UP(shrinked_max_intervals, 3);
if (shrinked_max_intervals > NFT_ARRAY_INITIAL_SIZE &&
nelems < shrinked_max_intervals) {
new_max_intervals = shrinked_max_intervals;
goto realloc_array;
}
maybe_grow:
if (nelems > new_max_intervals) {
if (nft_set_is_anonymous(set) &&
new_max_intervals < NFT_ARRAY_INITIAL_ANON_THRESH) {
new_max_intervals <<= 1;
} else {
delta = new_max_intervals >> 1;
if (check_add_overflow(new_max_intervals, delta,
&new_max_intervals))
return -EOVERFLOW;
}
}
realloc_array:
if (WARN_ON_ONCE(nelems > new_max_intervals))
return -ENOMEM;
if (priv->array_next) {
if (max_intervals == new_max_intervals)
return 0;
if (nft_array_intervals_alloc(priv->array_next, new_max_intervals) < 0)
return -ENOMEM;
} else {
array = nft_array_alloc(new_max_intervals);
if (!array)
return -ENOMEM;
priv->array_next = array;
}
return 0;
}
static int nft_rbtree_insert(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem,
struct nft_elem_priv **elem_priv)
{
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem->priv);
struct nft_rbtree *priv = nft_set_priv(set);
u64 tstamp = nft_net_tstamp(net);
int err;
nft_rbtree_maybe_reset_start_cookie(priv, tstamp);
if (nft_array_may_resize(set, false) < 0)
return -ENOMEM;
do {
if (fatal_signal_pending(current))
return -EINTR;
cond_resched();
write_lock_bh(&priv->lock);
err = __nft_rbtree_insert(net, set, rbe, elem_priv, tstamp);
write_unlock_bh(&priv->lock);
} while (err == -EAGAIN);
return err;
}
static void nft_rbtree_erase(struct nft_rbtree *priv, struct nft_rbtree_elem *rbe)
{
write_lock_bh(&priv->lock);
rb_erase(&rbe->node, &priv->root);
write_unlock_bh(&priv->lock);
}
static void nft_rbtree_remove(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
{
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
struct nft_rbtree *priv = nft_set_priv(set);
nft_rbtree_erase(priv, rbe);
}
static void nft_rbtree_activate(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
{
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
nft_clear(net, &rbe->ext);
}
static struct nft_rbtree_elem *
nft_rbtree_next_inactive(struct nft_rbtree_elem *rbe, u8 genmask)
{
struct nft_rbtree_elem *next_rbe;
struct rb_node *node;
node = rb_next(&rbe->node);
if (node) {
next_rbe = rb_entry(node, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_start(next_rbe) &&
!nft_set_elem_active(&next_rbe->ext, genmask))
return next_rbe;
}
return NULL;
}
static bool nft_rbtree_deactivate_same_interval(const struct net *net,
struct nft_rbtree *priv,
struct nft_rbtree_elem *rbe)
{
u8 genmask = nft_genmask_next(net);
struct nft_rbtree_elem *next_rbe;
if (!priv->start_rbe_cookie)
return true;
next_rbe = nft_rbtree_next_inactive(rbe, genmask);
if (next_rbe) {
if (nft_rbtree_interval_start(next_rbe) &&
nft_rbtree_cmp_start_cookie(priv, next_rbe)) {
priv->start_rbe_cookie = 0;
return true;
}
}
priv->start_rbe_cookie = 0;
return false;
}
static void nft_rbtree_flush(const struct net *net,
const struct nft_set *set,
struct nft_elem_priv *elem_priv)
{
struct nft_rbtree_elem *rbe = nft_elem_priv_cast(elem_priv);
nft_set_elem_change_active(net, set, &rbe->ext);
}
static struct nft_elem_priv *
nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem)
{
struct nft_rbtree_elem *rbe, *this = nft_elem_priv_cast(elem->priv);
struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
u8 genmask = nft_genmask_next(net);
u64 tstamp = nft_net_tstamp(net);
int d;
nft_rbtree_maybe_reset_start_cookie(priv, tstamp);
if (nft_rbtree_interval_start(this) ||
nft_rbtree_interval_null(set, this))
priv->start_rbe_cookie = 0;
if (nft_array_may_resize(set, false) < 0)
return NULL;
while (parent != NULL) {
rbe = rb_entry(parent, struct nft_rbtree_elem, node);
d = memcmp(nft_set_ext_key(&rbe->ext), &elem->key.val,
set->klen);
if (d < 0)
parent = parent->rb_left;
else if (d > 0)
parent = parent->rb_right;
else {
if (nft_rbtree_interval_end(rbe) &&
nft_rbtree_interval_start(this)) {
parent = parent->rb_left;
continue;
} else if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
} else if (__nft_set_elem_expired(&rbe->ext, tstamp)) {
break;
} else if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = parent->rb_left;
continue;
}
if (nft_rbtree_interval_start(rbe))
nft_rbtree_set_start_cookie(priv, rbe);
else if (!nft_rbtree_deactivate_same_interval(net, priv, rbe))
return NULL;
nft_rbtree_flush(net, set, &rbe->priv);
return &rbe->priv;
}
}
return NULL;
}
static void nft_rbtree_do_walk(const struct nft_ctx *ctx,
struct nft_set *set,
struct nft_set_iter *iter)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
struct rb_node *node;
for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) {
rbe = rb_entry(node, struct nft_rbtree_elem, node);
if (iter->count < iter->skip)
goto cont;
iter->err = iter->fn(ctx, set, iter, &rbe->priv);
if (iter->err < 0)
return;
cont:
iter->count++;
}
}
static void nft_rbtree_walk(const struct nft_ctx *ctx,
struct nft_set *set,
struct nft_set_iter *iter)
{
struct nft_rbtree *priv = nft_set_priv(set);
switch (iter->type) {
case NFT_ITER_UPDATE_CLONE:
if (nft_array_may_resize(set, true) < 0) {
iter->err = -ENOMEM;
break;
}
fallthrough;
case NFT_ITER_UPDATE:
lockdep_assert_held(&nft_pernet(ctx->net)->commit_mutex);
nft_rbtree_do_walk(ctx, set, iter);
break;
case NFT_ITER_READ:
read_lock_bh(&priv->lock);
nft_rbtree_do_walk(ctx, set, iter);
read_unlock_bh(&priv->lock);
break;
default:
iter->err = -EINVAL;
WARN_ON_ONCE(1);
break;
}
}
static void nft_rbtree_gc_scan(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
struct net *net = read_pnet(&set->net);
u64 tstamp = nft_net_tstamp(net);
struct rb_node *node, *next;
for (node = rb_first(&priv->root); node ; node = next) {
next = rb_next(node);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_end(rbe)) {
rbe_end = rbe;
continue;
}
if (!__nft_set_elem_expired(&rbe->ext, tstamp))
continue;
write_lock_bh(&priv->lock);
if (rbe_end) {
nft_rbtree_gc_elem_move(net, set, priv, rbe_end);
rbe_end = NULL;
}
nft_rbtree_gc_elem_move(net, set, priv, rbe);
write_unlock_bh(&priv->lock);
}
priv->last_gc = jiffies;
}
static void nft_rbtree_gc_queue(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end;
struct nft_trans_gc *gc;
if (list_empty(&priv->expired))
return;
gc = nft_trans_gc_alloc(set, 0, GFP_KERNEL);
if (!gc)
return;
list_for_each_entry_safe(rbe, rbe_end, &priv->expired, list) {
list_del(&rbe->list);
nft_trans_gc_elem_add(gc, rbe);
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
if (!gc)
return;
}
gc = nft_trans_gc_catchall_sync(gc);
nft_trans_gc_queue_sync_done(gc);
}
static u64 nft_rbtree_privsize(const struct nlattr * const nla[],
const struct nft_set_desc *desc)
{
return sizeof(struct nft_rbtree);
}
static int nft_rbtree_init(const struct nft_set *set,
const struct nft_set_desc *desc,
const struct nlattr * const nla[])
{
struct nft_rbtree *priv = nft_set_priv(set);
BUILD_BUG_ON(offsetof(struct nft_rbtree_elem, priv) != 0);
rwlock_init(&priv->lock);
priv->root = RB_ROOT;
INIT_LIST_HEAD(&priv->expired);
priv->array = NULL;
priv->array_next = NULL;
return 0;
}
static void __nft_array_free(struct nft_array *array)
{
kvfree(array->intervals);
kfree(array);
}
static void nft_rbtree_destroy(const struct nft_ctx *ctx,
const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *next;
struct nft_array *array;
struct rb_node *node;
list_for_each_entry_safe(rbe, next, &priv->expired, list) {
list_del(&rbe->list);
nf_tables_set_elem_destroy(ctx, set, &rbe->priv);
}
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
nf_tables_set_elem_destroy(ctx, set, &rbe->priv);
}
array = rcu_dereference_protected(priv->array, true);
if (array)
__nft_array_free(array);
if (priv->array_next)
__nft_array_free(priv->array_next);
}
static bool nft_rbtree_estimate(const struct nft_set_desc *desc, u32 features,
struct nft_set_estimate *est)
{
if (desc->field_count > 1)
return false;
if (desc->size)
est->size = sizeof(struct nft_rbtree) +
desc->size * sizeof(struct nft_rbtree_elem);
else
est->size = ~0;
est->lookup = NFT_SET_CLASS_O_LOG_N;
est->space = NFT_SET_CLASS_O_N;
return true;
}
static void nft_array_free_rcu(struct rcu_head *rcu_head)
{
struct nft_array *array = container_of(rcu_head, struct nft_array, rcu_head);
__nft_array_free(array);
}
static void nft_rbtree_commit(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *prev_rbe;
struct nft_array *old;
u32 num_intervals = 0;
struct rb_node *node;
if (!priv->array_next)
return;
if (time_after_eq(jiffies, priv->last_gc + nft_set_gc_interval(set)))
nft_rbtree_gc_scan(set);
node = rb_last(&priv->root);
if (node)
prev_rbe = rb_entry(node, struct nft_rbtree_elem, node);
else
prev_rbe = NULL;
while (prev_rbe) {
rbe = prev_rbe;
if (nft_rbtree_interval_start(rbe))
priv->array_next->intervals[num_intervals].from = &rbe->ext;
else if (nft_rbtree_interval_end(rbe))
priv->array_next->intervals[num_intervals++].to = &rbe->ext;
if (num_intervals >= priv->array_next->max_intervals) {
pr_warn_once("malformed interval set from userspace?");
goto err_out;
}
node = rb_prev(node);
if (!node)
break;
prev_rbe = rb_entry(node, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_start(rbe) &&
nft_rbtree_interval_start(prev_rbe) &&
priv->array_next->intervals[num_intervals].from)
priv->array_next->intervals[num_intervals++].to = &prev_rbe->ext;
if (num_intervals >= priv->array_next->max_intervals) {
pr_warn_once("malformed interval set from userspace?");
goto err_out;
}
}
if (priv->array_next->intervals[num_intervals].from)
num_intervals++;
err_out:
priv->array_next->num_intervals = num_intervals;
old = rcu_replace_pointer(priv->array, priv->array_next,
lockdep_is_held(&nft_pernet(read_pnet(&set->net))->commit_mutex));
priv->array_next = NULL;
if (old)
call_rcu(&old->rcu_head, nft_array_free_rcu);
nft_rbtree_gc_queue(set);
}
static void nft_rbtree_abort(const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_array *array_next;
if (!priv->array_next)
return;
array_next = priv->array_next;
priv->array_next = NULL;
__nft_array_free(array_next);
}
static void nft_rbtree_gc_init(const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
priv->last_gc = jiffies;
}
static u32 nft_rbtree_ksize(u32 size)
{
return size * 2;
}
static u32 nft_rbtree_usize(u32 size)
{
if (!size)
return 0;
return size / 2;
}
static u32 nft_rbtree_adjust_maxsize(const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
struct rb_node *node;
const void *key;
node = rb_last(&priv->root);
if (!node)
return 0;
rbe = rb_entry(node, struct nft_rbtree_elem, node);
if (!nft_rbtree_interval_end(rbe))
return 0;
key = nft_set_ext_key(&rbe->ext);
if (memchr(key, 1, set->klen))
return 0;
return 1;
}
const struct nft_set_type nft_set_rbtree_type = {
.features = NFT_SET_INTERVAL | NFT_SET_MAP | NFT_SET_OBJECT | NFT_SET_TIMEOUT,
.ops = {
.privsize = nft_rbtree_privsize,
.elemsize = offsetof(struct nft_rbtree_elem, ext),
.estimate = nft_rbtree_estimate,
.init = nft_rbtree_init,
.destroy = nft_rbtree_destroy,
.insert = nft_rbtree_insert,
.remove = nft_rbtree_remove,
.deactivate = nft_rbtree_deactivate,
.flush = nft_rbtree_flush,
.activate = nft_rbtree_activate,
.commit = nft_rbtree_commit,
.abort = nft_rbtree_abort,
.gc_init = nft_rbtree_gc_init,
.lookup = nft_rbtree_lookup,
.walk = nft_rbtree_walk,
.get = nft_rbtree_get,
.ksize = nft_rbtree_ksize,
.usize = nft_rbtree_usize,
.adjust_maxsize = nft_rbtree_adjust_maxsize,
},
};