#include <stdio.h>
#include <unistd.h>
#include <fcntl.h>
#include <sys/socket.h>
#include <bpf/libbpf.h>
#include "test_progs.h"
#include "network_helpers.h"
#include "test_tc_tunnel.skel.h"
#define SERVER_NS "tc-tunnel-server-ns"
#define CLIENT_NS "tc-tunnel-client-ns"
#define MAC_ADDR_VETH1 "00:11:22:33:44:55"
#define IP4_ADDR_VETH1 "192.168.1.1"
#define IP6_ADDR_VETH1 "fd::1"
#define MAC_ADDR_VETH2 "66:77:88:99:AA:BB"
#define IP4_ADDR_VETH2 "192.168.1.2"
#define IP6_ADDR_VETH2 "fd::2"
#define TEST_NAME_MAX_LEN 64
#define PROG_NAME_MAX_LEN 64
#define TUNNEL_ARGS_MAX_LEN 128
#define BUFFER_LEN 2000
#define DEFAULT_TEST_DATA_SIZE 100
#define GSO_TEST_DATA_SIZE BUFFER_LEN
#define TIMEOUT_MS 1000
#define TEST_PORT 8000
#define UDP_PORT 5555
#define MPLS_UDP_PORT 6635
#define FOU_MPLS_PROTO 137
#define VXLAN_ID 1
#define VXLAN_PORT 8472
#define MPLS_TABLE_ENTRIES_COUNT 65536
static char tx_buffer[BUFFER_LEN], rx_buffer[BUFFER_LEN];
struct subtest_cfg {
char *ebpf_tun_type;
char *iproute_tun_type;
char *mac_tun_type;
int ipproto;
void (*extra_decap_mod_args_cb)(struct subtest_cfg *cfg, char *dst);
bool tunnel_need_veth_mac;
bool configure_fou_rx_port;
char *tmode;
bool expect_kern_decap_failure;
bool configure_mpls;
bool test_gso;
char *tunnel_client_addr;
char *tunnel_server_addr;
char name[TEST_NAME_MAX_LEN];
char *server_addr;
int client_egress_prog_fd;
int server_ingress_prog_fd;
char extra_decap_mod_args[TUNNEL_ARGS_MAX_LEN];
int server_fd;
};
struct connection {
int client_fd;
int server_fd;
};
static int build_subtest_name(struct subtest_cfg *cfg, char *dst, size_t size)
{
int ret;
ret = snprintf(dst, size, "%s_%s", cfg->ebpf_tun_type,
cfg->mac_tun_type);
return ret < 0 ? ret : 0;
}
static int set_subtest_progs(struct subtest_cfg *cfg, struct test_tc_tunnel *skel)
{
char prog_name[PROG_NAME_MAX_LEN];
struct bpf_program *prog;
int ret;
ret = snprintf(prog_name, PROG_NAME_MAX_LEN, "__encap_");
if (ret < 0)
return ret;
ret = build_subtest_name(cfg, prog_name + ret, PROG_NAME_MAX_LEN - ret);
if (ret < 0)
return ret;
prog = bpf_object__find_program_by_name(skel->obj, prog_name);
if (!prog)
return -1;
cfg->client_egress_prog_fd = bpf_program__fd(prog);
cfg->server_ingress_prog_fd = bpf_program__fd(skel->progs.decap_f);
return 0;
}
static void set_subtest_addresses(struct subtest_cfg *cfg)
{
if (cfg->ipproto == 6)
cfg->server_addr = IP6_ADDR_VETH2;
else
cfg->server_addr = IP4_ADDR_VETH2;
if (cfg->tunnel_client_addr && cfg->tunnel_server_addr)
return;
if (cfg->ipproto == 6) {
cfg->tunnel_client_addr = IP6_ADDR_VETH1;
cfg->tunnel_server_addr = IP6_ADDR_VETH2;
} else {
cfg->tunnel_client_addr = IP4_ADDR_VETH1;
cfg->tunnel_server_addr = IP4_ADDR_VETH2;
}
}
static int run_server(struct subtest_cfg *cfg)
{
int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
struct nstoken *nstoken;
struct network_helper_opts opts = {
.timeout_ms = TIMEOUT_MS
};
nstoken = open_netns(SERVER_NS);
if (!ASSERT_OK_PTR(nstoken, "open server ns"))
return -1;
cfg->server_fd = start_server_str(family, SOCK_STREAM, cfg->server_addr,
TEST_PORT, &opts);
close_netns(nstoken);
if (!ASSERT_OK_FD(cfg->server_fd, "start server"))
return -1;
return 0;
}
static int check_server_rx_data(struct subtest_cfg *cfg,
struct connection *conn, int len)
{
int err;
memset(rx_buffer, 0, BUFFER_LEN);
err = recv(conn->server_fd, rx_buffer, len, 0);
if (!ASSERT_EQ(err, len, "check rx data len"))
return 1;
if (!ASSERT_MEMEQ(tx_buffer, rx_buffer, len, "check received data"))
return 1;
return 0;
}
static struct connection *connect_client_to_server(struct subtest_cfg *cfg)
{
struct network_helper_opts opts = {.timeout_ms = 500};
int family = cfg->ipproto == 6 ? AF_INET6 : AF_INET;
struct connection *conn = NULL;
int client_fd, server_fd;
conn = malloc(sizeof(struct connection));
if (!conn)
return conn;
client_fd = connect_to_addr_str(family, SOCK_STREAM, cfg->server_addr,
TEST_PORT, &opts);
if (client_fd < 0) {
free(conn);
return NULL;
}
server_fd = accept(cfg->server_fd, NULL, NULL);
if (server_fd < 0) {
close(client_fd);
free(conn);
return NULL;
}
conn->server_fd = server_fd;
conn->client_fd = client_fd;
return conn;
}
static void disconnect_client_from_server(struct subtest_cfg *cfg,
struct connection *conn)
{
close(conn->server_fd);
close(conn->client_fd);
free(conn);
}
static int send_and_test_data(struct subtest_cfg *cfg, bool must_succeed)
{
struct connection *conn;
int err, res = -1;
conn = connect_client_to_server(cfg);
if (!must_succeed && !ASSERT_ERR_PTR(conn, "connection that must fail"))
goto end;
else if (!must_succeed)
return 0;
if (!ASSERT_OK_PTR(conn, "connection that must succeed"))
return -1;
err = send(conn->client_fd, tx_buffer, DEFAULT_TEST_DATA_SIZE, 0);
if (!ASSERT_EQ(err, DEFAULT_TEST_DATA_SIZE, "send data from client"))
goto end;
if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
goto end;
if (!cfg->test_gso) {
res = 0;
goto end;
}
err = send(conn->client_fd, tx_buffer, GSO_TEST_DATA_SIZE, 0);
if (!ASSERT_EQ(err, GSO_TEST_DATA_SIZE, "send (large) data from client"))
goto end;
if (check_server_rx_data(cfg, conn, DEFAULT_TEST_DATA_SIZE))
goto end;
res = 0;
end:
disconnect_client_from_server(cfg, conn);
return res;
}
static void vxlan_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
{
snprintf(dst, TUNNEL_ARGS_MAX_LEN, "id %d dstport %d udp6zerocsumrx",
VXLAN_ID, VXLAN_PORT);
}
static void udp_decap_mod_args_cb(struct subtest_cfg *cfg, char *dst)
{
bool is_mpls = !strcmp(cfg->mac_tun_type, "mpls");
snprintf(dst, TUNNEL_ARGS_MAX_LEN,
"encap fou encap-sport auto encap-dport %d",
is_mpls ? MPLS_UDP_PORT : UDP_PORT);
}
static int configure_fou_rx_port(struct subtest_cfg *cfg, bool add)
{
bool is_mpls = strcmp(cfg->mac_tun_type, "mpls") == 0;
int fou_proto;
if (is_mpls)
fou_proto = FOU_MPLS_PROTO;
else
fou_proto = cfg->ipproto == 6 ? 41 : 4;
SYS(fail, "ip fou %s port %d ipproto %d%s", add ? "add" : "del",
is_mpls ? MPLS_UDP_PORT : UDP_PORT, fou_proto,
cfg->ipproto == 6 ? " -6" : "");
return 0;
fail:
return 1;
}
static int add_fou_rx_port(struct subtest_cfg *cfg)
{
return configure_fou_rx_port(cfg, true);
}
static int del_fou_rx_port(struct subtest_cfg *cfg)
{
return configure_fou_rx_port(cfg, false);
}
static int update_tunnel_intf_addr(struct subtest_cfg *cfg)
{
SYS(fail, "ip link set dev testtun0 address " MAC_ADDR_VETH2);
return 0;
fail:
return -1;
}
static int configure_kernel_for_mpls(struct subtest_cfg *cfg)
{
SYS(fail, "sysctl -qw net.mpls.platform_labels=%d",
MPLS_TABLE_ENTRIES_COUNT);
SYS(fail, "ip -f mpls route add 1000 dev lo");
SYS(fail, "ip link set lo up");
SYS(fail, "sysctl -qw net.mpls.conf.testtun0.input=1");
SYS(fail, "sysctl -qw net.ipv4.conf.lo.rp_filter=0");
return 0;
fail:
return -1;
}
static int configure_encapsulation(struct subtest_cfg *cfg)
{
int ret;
ret = tc_prog_attach("veth1", -1, cfg->client_egress_prog_fd);
return ret;
}
static int configure_kernel_decapsulation(struct subtest_cfg *cfg)
{
struct nstoken *nstoken = open_netns(SERVER_NS);
int ret = -1;
if (!ASSERT_OK_PTR(nstoken, "open server ns"))
return ret;
if (cfg->configure_fou_rx_port &&
!ASSERT_OK(add_fou_rx_port(cfg), "configure FOU RX port"))
goto fail;
SYS(fail, "ip link add name testtun0 type %s %s remote %s local %s %s",
cfg->iproute_tun_type, cfg->tmode ? cfg->tmode : "",
cfg->tunnel_client_addr, cfg->tunnel_server_addr,
cfg->extra_decap_mod_args);
if (cfg->tunnel_need_veth_mac &&
!ASSERT_OK(update_tunnel_intf_addr(cfg), "update testtun0 mac"))
goto fail;
if (cfg->configure_mpls &&
(!ASSERT_OK(configure_kernel_for_mpls(cfg),
"configure MPLS decap")))
goto fail;
SYS(fail, "sysctl -qw net.ipv4.conf.all.rp_filter=0");
SYS(fail, "sysctl -qw net.ipv4.conf.testtun0.rp_filter=0");
SYS(fail, "ip link set dev testtun0 up");
ret = 0;
fail:
close_netns(nstoken);
return ret;
}
static void remove_kernel_decapsulation(struct subtest_cfg *cfg)
{
SYS_NOFAIL("ip link del testtun0");
if (cfg->configure_mpls)
SYS_NOFAIL("ip -f mpls route del 1000 dev lo");
if (cfg->configure_fou_rx_port)
del_fou_rx_port(cfg);
}
static int configure_ebpf_decapsulation(struct subtest_cfg *cfg)
{
struct nstoken *nstoken = open_netns(SERVER_NS);
int ret = -1;
if (!ASSERT_OK_PTR(nstoken, "open server ns"))
return ret;
if (!cfg->expect_kern_decap_failure)
SYS(fail, "ip link del testtun0");
if (!ASSERT_OK(tc_prog_attach("veth2", cfg->server_ingress_prog_fd, -1),
"attach_program"))
goto fail;
ret = 0;
fail:
close_netns(nstoken);
return ret;
}
static void run_test(struct subtest_cfg *cfg)
{
struct nstoken *nstoken;
if (!ASSERT_OK(run_server(cfg), "run server"))
return;
nstoken = open_netns(CLIENT_NS);
if (!ASSERT_OK_PTR(nstoken, "open client ns"))
goto fail;
if (!ASSERT_OK(send_and_test_data(cfg, true), "connect without any encap"))
goto fail;
if (!ASSERT_OK(configure_encapsulation(cfg), "configure encapsulation"))
goto fail;
if (!cfg->expect_kern_decap_failure) {
if (!ASSERT_OK(configure_kernel_decapsulation(cfg),
"configure kernel decapsulation"))
goto fail;
if (!ASSERT_OK(send_and_test_data(cfg, true),
"connect with encap prog and kern decap"))
goto fail;
}
if (!ASSERT_OK(configure_ebpf_decapsulation(cfg), "configure ebpf decapsulation"))
goto fail;
ASSERT_OK(send_and_test_data(cfg, true), "connect with encap and decap progs");
fail:
close_netns(nstoken);
close(cfg->server_fd);
}
static int setup(void)
{
struct nstoken *nstoken_client, *nstoken_server;
int fd, err;
fd = open("/dev/urandom", O_RDONLY);
if (!ASSERT_OK_FD(fd, "open urandom"))
goto fail;
err = read(fd, tx_buffer, BUFFER_LEN);
close(fd);
if (!ASSERT_EQ(err, BUFFER_LEN, "read random bytes"))
goto fail;
if (!ASSERT_OK(make_netns(CLIENT_NS), "create client ns") ||
!ASSERT_OK(make_netns(SERVER_NS), "create server ns"))
goto fail;
nstoken_client = open_netns(CLIENT_NS);
if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
goto fail_delete_ns;
SYS(fail_close_ns_client, "ip link add %s type veth peer name %s",
"veth1 mtu 1500 netns " CLIENT_NS " address " MAC_ADDR_VETH1,
"veth2 mtu 1500 netns " SERVER_NS " address " MAC_ADDR_VETH2);
SYS(fail_close_ns_client, "ethtool -K veth1 tso off");
SYS(fail_close_ns_client, "ip link set veth1 up");
nstoken_server = open_netns(SERVER_NS);
if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
goto fail_close_ns_client;
SYS(fail_close_ns_server, "ip link set veth2 up");
close_netns(nstoken_server);
close_netns(nstoken_client);
return 0;
fail_close_ns_server:
close_netns(nstoken_server);
fail_close_ns_client:
close_netns(nstoken_client);
fail_delete_ns:
SYS_NOFAIL("ip netns del " CLIENT_NS);
SYS_NOFAIL("ip netns del " SERVER_NS);
fail:
return -1;
}
static int subtest_setup(struct test_tc_tunnel *skel, struct subtest_cfg *cfg)
{
struct nstoken *nstoken_client, *nstoken_server;
int ret = -1;
set_subtest_addresses(cfg);
if (!ASSERT_OK(set_subtest_progs(cfg, skel),
"find subtest progs"))
goto fail;
if (cfg->extra_decap_mod_args_cb)
cfg->extra_decap_mod_args_cb(cfg, cfg->extra_decap_mod_args);
nstoken_client = open_netns(CLIENT_NS);
if (!ASSERT_OK_PTR(nstoken_client, "open client ns"))
goto fail;
SYS(fail_close_client_ns,
"ip -4 addr add " IP4_ADDR_VETH1 "/24 dev veth1");
SYS(fail_close_client_ns, "ip -4 route flush table main");
SYS(fail_close_client_ns,
"ip -4 route add " IP4_ADDR_VETH2 " mtu 1450 dev veth1");
SYS(fail_close_client_ns,
"ip -6 addr add " IP6_ADDR_VETH1 "/64 dev veth1 nodad");
SYS(fail_close_client_ns, "ip -6 route flush table main");
SYS(fail_close_client_ns,
"ip -6 route add " IP6_ADDR_VETH2 " mtu 1430 dev veth1");
nstoken_server = open_netns(SERVER_NS);
if (!ASSERT_OK_PTR(nstoken_server, "open server ns"))
goto fail_close_client_ns;
SYS(fail_close_server_ns,
"ip -4 addr add " IP4_ADDR_VETH2 "/24 dev veth2");
SYS(fail_close_server_ns,
"ip -6 addr add " IP6_ADDR_VETH2 "/64 dev veth2 nodad");
ret = 0;
fail_close_server_ns:
close_netns(nstoken_server);
fail_close_client_ns:
close_netns(nstoken_client);
fail:
return ret;
}
static void subtest_cleanup(struct subtest_cfg *cfg)
{
struct nstoken *nstoken;
nstoken = open_netns(CLIENT_NS);
if (ASSERT_OK_PTR(nstoken, "open clien ns")) {
SYS_NOFAIL("tc qdisc delete dev veth1 parent ffff:fff1");
SYS_NOFAIL("ip a flush veth1");
close_netns(nstoken);
}
nstoken = open_netns(SERVER_NS);
if (ASSERT_OK_PTR(nstoken, "open clien ns")) {
SYS_NOFAIL("tc qdisc delete dev veth2 parent ffff:fff1");
SYS_NOFAIL("ip a flush veth2");
if (!cfg->expect_kern_decap_failure)
remove_kernel_decapsulation(cfg);
close_netns(nstoken);
}
}
static void cleanup(void)
{
remove_netns(CLIENT_NS);
remove_netns(SERVER_NS);
}
static struct subtest_cfg subtests_cfg[] = {
{
.ebpf_tun_type = "ipip",
.mac_tun_type = "none",
.iproute_tun_type = "ipip",
.ipproto = 4,
},
{
.ebpf_tun_type = "ipip6",
.mac_tun_type = "none",
.iproute_tun_type = "ip6tnl",
.ipproto = 4,
.tunnel_client_addr = IP6_ADDR_VETH1,
.tunnel_server_addr = IP6_ADDR_VETH2,
},
{
.ebpf_tun_type = "ip6tnl",
.iproute_tun_type = "ip6tnl",
.mac_tun_type = "none",
.ipproto = 6,
},
{
.mac_tun_type = "none",
.ebpf_tun_type = "sit",
.iproute_tun_type = "sit",
.ipproto = 6,
.tunnel_client_addr = IP4_ADDR_VETH1,
.tunnel_server_addr = IP4_ADDR_VETH2,
},
{
.ebpf_tun_type = "vxlan",
.mac_tun_type = "eth",
.iproute_tun_type = "vxlan",
.ipproto = 4,
.extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
.tunnel_need_veth_mac = true
},
{
.ebpf_tun_type = "ip6vxlan",
.mac_tun_type = "eth",
.iproute_tun_type = "vxlan",
.ipproto = 6,
.extra_decap_mod_args_cb = vxlan_decap_mod_args_cb,
.tunnel_need_veth_mac = true
},
{
.ebpf_tun_type = "gre",
.mac_tun_type = "none",
.iproute_tun_type = "gre",
.ipproto = 4,
.test_gso = true
},
{
.ebpf_tun_type = "gre",
.mac_tun_type = "eth",
.iproute_tun_type = "gretap",
.ipproto = 4,
.tunnel_need_veth_mac = true,
.test_gso = true
},
{
.ebpf_tun_type = "gre",
.mac_tun_type = "mpls",
.iproute_tun_type = "gre",
.ipproto = 4,
.configure_mpls = true,
.test_gso = true
},
{
.ebpf_tun_type = "ip6gre",
.mac_tun_type = "none",
.iproute_tun_type = "ip6gre",
.ipproto = 6,
.test_gso = true,
},
{
.ebpf_tun_type = "ip6gre",
.mac_tun_type = "eth",
.iproute_tun_type = "ip6gretap",
.ipproto = 6,
.tunnel_need_veth_mac = true,
.test_gso = true
},
{
.ebpf_tun_type = "ip6gre",
.mac_tun_type = "mpls",
.iproute_tun_type = "ip6gre",
.ipproto = 6,
.configure_mpls = true,
.test_gso = true
},
{
.ebpf_tun_type = "udp",
.mac_tun_type = "none",
.iproute_tun_type = "ipip",
.ipproto = 4,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.test_gso = true
},
{
.ebpf_tun_type = "udp",
.mac_tun_type = "eth",
.iproute_tun_type = "ipip",
.ipproto = 4,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.expect_kern_decap_failure = true,
.test_gso = true
},
{
.ebpf_tun_type = "udp",
.mac_tun_type = "mpls",
.iproute_tun_type = "ipip",
.ipproto = 4,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.tmode = "mode any ttl 255",
.configure_mpls = true,
.test_gso = true
},
{
.ebpf_tun_type = "ip6udp",
.mac_tun_type = "none",
.iproute_tun_type = "ip6tnl",
.ipproto = 6,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.test_gso = true
},
{
.ebpf_tun_type = "ip6udp",
.mac_tun_type = "eth",
.iproute_tun_type = "ip6tnl",
.ipproto = 6,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.expect_kern_decap_failure = true,
.test_gso = true
},
{
.ebpf_tun_type = "ip6udp",
.mac_tun_type = "mpls",
.iproute_tun_type = "ip6tnl",
.ipproto = 6,
.extra_decap_mod_args_cb = udp_decap_mod_args_cb,
.configure_fou_rx_port = true,
.tmode = "mode any ttl 255",
.expect_kern_decap_failure = true,
.test_gso = true
},
};
void test_tc_tunnel(void)
{
struct test_tc_tunnel *skel;
struct subtest_cfg *cfg;
int i, ret;
skel = test_tc_tunnel__open_and_load();
if (!ASSERT_OK_PTR(skel, "skel open and load"))
return;
if (!ASSERT_OK(setup(), "global setup"))
goto out;
for (i = 0; i < ARRAY_SIZE(subtests_cfg); i++) {
cfg = &subtests_cfg[i];
ret = build_subtest_name(cfg, cfg->name, TEST_NAME_MAX_LEN);
if (ret < 0 || !test__start_subtest(cfg->name))
continue;
if (subtest_setup(skel, cfg) == 0)
run_test(cfg);
subtest_cleanup(cfg);
}
cleanup();
out:
test_tc_tunnel__destroy(skel);
}