#include "opt_inet6.h"
#include "opt_inet.h"
#include "opt_rss.h"
#include <sys/param.h>
#include <sys/mbuf.h>
#include <sys/socket.h>
#include <sys/priv.h>
#include <sys/kernel.h>
#include <sys/smp.h>
#include <sys/sysctl.h>
#include <sys/sbuf.h>
#include <net/if.h>
#include <net/if_var.h>
#include <net/netisr.h>
#include <net/rss_config.h>
#include <net/toeplitz.h>
#if !defined(INET) && !defined(INET6)
#define _net_inet _net
#define _net_inet_rss _net_rss
#endif
SYSCTL_DECL(_net_inet);
SYSCTL_NODE(_net_inet, OID_AUTO, rss, CTLFLAG_RW | CTLFLAG_MPSAFE, 0,
"Receive-side steering");
static u_int rss_hashalgo = RSS_HASH_TOEPLITZ;
SYSCTL_INT(_net_inet_rss, OID_AUTO, hashalgo, CTLFLAG_RDTUN, &rss_hashalgo, 0,
"RSS hash algorithm");
#ifdef RSS
FEATURE(rss, "Receiver-side scaling");
static u_int rss_bits;
SYSCTL_INT(_net_inet_rss, OID_AUTO, bits, CTLFLAG_RDTUN, &rss_bits, 0,
"RSS bits");
static u_int rss_mask;
SYSCTL_INT(_net_inet_rss, OID_AUTO, mask, CTLFLAG_RD, &rss_mask, 0,
"RSS mask");
static const u_int rss_maxbits = RSS_MAXBITS;
SYSCTL_INT(_net_inet_rss, OID_AUTO, maxbits, CTLFLAG_RD,
__DECONST(int *, &rss_maxbits), 0, "RSS maximum bits");
static u_int rss_ncpus;
SYSCTL_INT(_net_inet_rss, OID_AUTO, ncpus, CTLFLAG_RD, &rss_ncpus, 0,
"Number of CPUs available to RSS");
#define RSS_MAXCPUS (1 << (RSS_MAXBITS - 1))
static const u_int rss_maxcpus = RSS_MAXCPUS;
SYSCTL_INT(_net_inet_rss, OID_AUTO, maxcpus, CTLFLAG_RD,
__DECONST(int *, &rss_maxcpus), 0, "RSS maximum CPUs that can be used");
static u_int rss_buckets;
SYSCTL_INT(_net_inet_rss, OID_AUTO, buckets, CTLFLAG_RD, &rss_buckets, 0,
"RSS buckets");
static const u_int rss_basecpu;
SYSCTL_INT(_net_inet_rss, OID_AUTO, basecpu, CTLFLAG_RD,
__DECONST(int *, &rss_basecpu), 0, "RSS base CPU");
#endif
int rss_debug = 0;
SYSCTL_INT(_net_inet_rss, OID_AUTO, debug, CTLFLAG_RWTUN, &rss_debug, 0,
"RSS debug level");
static u_int rss_udp_4tuple = 0;
SYSCTL_INT(_net_inet_rss, OID_AUTO, udp_4tuple, CTLFLAG_RDTUN,
&rss_udp_4tuple, 0,
"Enable UDP 4-tuple RSS hashing (src/dst IP + src/dst port)");
static uint8_t rss_key[RSS_KEYSIZE] = {
0x6d, 0x5a, 0x56, 0xda, 0x25, 0x5b, 0x0e, 0xc2,
0x41, 0x67, 0x25, 0x3d, 0x43, 0xa3, 0x8f, 0xb0,
0xd0, 0xca, 0x2b, 0xcb, 0xae, 0x7b, 0x30, 0xb4,
0x77, 0xcb, 0x2d, 0xa3, 0x80, 0x30, 0xf2, 0x0c,
0x6a, 0x42, 0xb7, 0x3b, 0xbe, 0xac, 0x01, 0xfa,
};
#ifdef RSS
struct rss_table_entry {
uint8_t rte_cpu;
};
static struct rss_table_entry rss_table[RSS_TABLE_MAXLEN];
#endif
static void
rss_init(__unused void *arg)
{
#ifdef RSS
u_int i;
u_int cpuid;
#endif
switch (rss_hashalgo) {
case RSS_HASH_TOEPLITZ:
case RSS_HASH_NAIVE:
break;
default:
RSS_DEBUG("invalid RSS hashalgo %u, coercing to %u\n",
rss_hashalgo, RSS_HASH_TOEPLITZ);
rss_hashalgo = RSS_HASH_TOEPLITZ;
}
#ifdef RSS
rss_ncpus = 0;
for (i = 0; i <= mp_maxid; i++) {
if (CPU_ABSENT(i))
continue;
rss_ncpus++;
}
if (rss_ncpus > RSS_MAXCPUS)
rss_ncpus = RSS_MAXCPUS;
if (rss_ncpus > 1) {
if (rss_bits == 0)
rss_bits = fls(rss_ncpus - 1) + 1;
if (rss_bits == 0 || rss_bits > RSS_MAXBITS) {
RSS_DEBUG("RSS bits %u not valid, coercing to %u\n",
rss_bits, RSS_MAXBITS);
rss_bits = RSS_MAXBITS;
}
rss_buckets = (1 << rss_bits);
if (rss_buckets < rss_ncpus)
RSS_DEBUG("WARNING: rss_buckets (%u) less than "
"rss_ncpus (%u)\n", rss_buckets, rss_ncpus);
rss_mask = rss_buckets - 1;
} else {
rss_bits = 0;
rss_buckets = 1;
rss_mask = 0;
}
cpuid = CPU_FIRST();
for (i = 0; i < rss_buckets; i++) {
rss_table[i].rte_cpu = cpuid;
cpuid = CPU_NEXT(cpuid);
}
#endif
}
SYSINIT(rss_init, SI_SUB_SOFTINTR, SI_ORDER_SECOND, rss_init, NULL);
static uint32_t
rss_naive_hash(u_int keylen, const uint8_t *key, u_int datalen,
const uint8_t *data)
{
uint32_t v;
u_int i;
v = 0;
for (i = 0; i < keylen; i++)
v += key[i];
for (i = 0; i < datalen; i++)
v += data[i];
return (v);
}
uint32_t
rss_hash(u_int datalen, const uint8_t *data)
{
switch (rss_hashalgo) {
case RSS_HASH_TOEPLITZ:
return (toeplitz_hash(sizeof(rss_key), rss_key, datalen,
data));
case RSS_HASH_NAIVE:
return (rss_naive_hash(sizeof(rss_key), rss_key, datalen,
data));
default:
panic("%s: unsupported/unknown hashalgo %d", __func__,
rss_hashalgo);
}
}
void
rss_getkey(uint8_t *key)
{
bcopy(rss_key, key, sizeof(rss_key));
}
u_int
rss_gethashalgo(void)
{
return (rss_hashalgo);
}
#ifdef RSS
u_int
rss_getbits(void)
{
return (rss_bits);
}
u_int
rss_getbucket(u_int hash)
{
return (hash & rss_mask);
}
u_int
rss_get_indirection_to_bucket(u_int index)
{
return (index & rss_mask);
}
u_int
rss_getcpu(u_int bucket)
{
return (rss_table[bucket].rte_cpu);
}
u_int
rss_hash2cpuid(uint32_t hash_val, uint32_t hash_type)
{
switch (hash_type) {
case M_HASHTYPE_RSS_IPV4:
case M_HASHTYPE_RSS_TCP_IPV4:
case M_HASHTYPE_RSS_UDP_IPV4:
case M_HASHTYPE_RSS_IPV6:
case M_HASHTYPE_RSS_TCP_IPV6:
case M_HASHTYPE_RSS_UDP_IPV6:
return (rss_getcpu(rss_getbucket(hash_val)));
default:
return (NETISR_CPUID_NONE);
}
}
int
rss_hash2bucket(uint32_t hash_val, uint32_t hash_type, uint32_t *bucket_id)
{
switch (hash_type) {
case M_HASHTYPE_RSS_IPV4:
case M_HASHTYPE_RSS_TCP_IPV4:
case M_HASHTYPE_RSS_UDP_IPV4:
case M_HASHTYPE_RSS_IPV6:
case M_HASHTYPE_RSS_TCP_IPV6:
case M_HASHTYPE_RSS_UDP_IPV6:
*bucket_id = rss_getbucket(hash_val);
return (0);
default:
return (-1);
}
}
struct mbuf *
rss_m2cpuid(struct mbuf *m, uintptr_t source, u_int *cpuid)
{
M_ASSERTPKTHDR(m);
*cpuid = rss_hash2cpuid(m->m_pkthdr.flowid, M_HASHTYPE_GET(m));
return (m);
}
int
rss_m2bucket(struct mbuf *m, uint32_t *bucket_id)
{
M_ASSERTPKTHDR(m);
return(rss_hash2bucket(m->m_pkthdr.flowid, M_HASHTYPE_GET(m),
bucket_id));
}
u_int
rss_getnumbuckets(void)
{
return (rss_buckets);
}
u_int
rss_getnumcpus(void)
{
return (rss_ncpus);
}
#endif
inline u_int
rss_gethashconfig(void)
{
u_int config;
config =
RSS_HASHTYPE_RSS_IPV4
| RSS_HASHTYPE_RSS_TCP_IPV4
| RSS_HASHTYPE_RSS_IPV6
| RSS_HASHTYPE_RSS_TCP_IPV6
| RSS_HASHTYPE_RSS_IPV6_EX
| RSS_HASHTYPE_RSS_TCP_IPV6_EX;
if (rss_udp_4tuple) {
config |=
RSS_HASHTYPE_RSS_UDP_IPV4
| RSS_HASHTYPE_RSS_UDP_IPV6
| RSS_HASHTYPE_RSS_UDP_IPV6_EX;
}
return (config);
}
static int
sysctl_rss_key(SYSCTL_HANDLER_ARGS)
{
uint8_t temp_rss_key[RSS_KEYSIZE];
int error;
error = priv_check(req->td, PRIV_NETINET_HASHKEY);
if (error)
return (error);
bcopy(rss_key, temp_rss_key, sizeof(temp_rss_key));
error = sysctl_handle_opaque(oidp, temp_rss_key,
sizeof(temp_rss_key), req);
if (error)
return (error);
if (req->newptr != NULL) {
return (EINVAL);
}
return (0);
}
SYSCTL_PROC(_net_inet_rss, OID_AUTO, key,
CTLTYPE_OPAQUE | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0, sysctl_rss_key,
"", "RSS keying material");
#ifdef RSS
static int
sysctl_rss_bucket_mapping(SYSCTL_HANDLER_ARGS)
{
struct sbuf *sb;
int error;
int i;
error = 0;
error = sysctl_wire_old_buffer(req, 0);
if (error != 0)
return (error);
sb = sbuf_new_for_sysctl(NULL, NULL, 512, req);
if (sb == NULL)
return (ENOMEM);
for (i = 0; i < rss_buckets; i++) {
sbuf_printf(sb, "%s%d:%d", i == 0 ? "" : " ",
i,
rss_getcpu(i));
}
error = sbuf_finish(sb);
sbuf_delete(sb);
return (error);
}
SYSCTL_PROC(_net_inet_rss, OID_AUTO, bucket_mapping,
CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
sysctl_rss_bucket_mapping, "", "RSS bucket -> CPU mapping");
#endif