diff options
author | Toni Uhlig <matzeton@googlemail.com> | 2025-06-16 17:09:43 +0200 |
---|---|---|
committer | Toni Uhlig <matzeton@googlemail.com> | 2025-06-18 15:16:30 +0200 |
commit | 0730e77eb4aa9841e90a17f190b9ae7d80565054 (patch) | |
tree | b214414fb23070fcfcbfc880f78eb7b4cef987c8 |
Initial commitmain
Signed-off-by: Toni Uhlig <matzeton@googlemail.com>
-rw-r--r-- | Makefile | 53 | ||||
-rw-r--r-- | README.md | 13 | ||||
-rw-r--r-- | benchmark-user-with-burst.png | bin | 0 -> 5616 bytes | |||
-rw-r--r-- | benchmark-user.png | bin | 0 -> 5509 bytes | |||
-rw-r--r-- | benchmark-xdp-with-burst.png | bin | 0 -> 5147 bytes | |||
-rw-r--r-- | benchmark-xdp.png | bin | 0 -> 4950 bytes | |||
-rw-r--r-- | common.h | 5 | ||||
-rwxr-xr-x | gnuplot.script | 12 | ||||
-rw-r--r-- | user_client.c | 85 | ||||
-rw-r--r-- | user_server.c | 52 | ||||
-rw-r--r-- | xdp_checksum.h | 94 | ||||
-rw-r--r-- | xdp_loader.c | 70 | ||||
-rw-r--r-- | xdp_manip.h | 28 | ||||
-rw-r--r-- | xdp_parser.h | 132 | ||||
-rw-r--r-- | xdp_udp_handler.c | 127 |
15 files changed, 671 insertions, 0 deletions
diff --git a/Makefile b/Makefile new file mode 100644 index 0000000..3edcf37 --- /dev/null +++ b/Makefile @@ -0,0 +1,53 @@ +CC=clang-20 +IF=lo +ifneq ($(DEBUG),) +BPF_CFLAGS+=-DXDP_DEBUG=1 +endif + +all: xdp_udp_handler.o xdp_loader user_server user_client + +run: all + sudo ./xdp_loader $(IF) + +clean: + rm -f ./xdp_udp_handler.o ./xdp_loader ./user_server ./user_client + rm -f ./fit.log ./rtt.dat ./rtt_tmp.dat + +detach: + sudo bpftool net detach xdp dev $(IF) + +benchmark: all + @echo 'Running benchmark in user space..' + ./user_server & + ./user_client >./rtt_tmp.dat + cp ./rtt_tmp.dat ./rtt.dat + ./gnuplot.script + mv -v rtt.png benchmark-user-with-burst.png + # use only the last 90 data points + tail -n90 ./rtt_tmp.dat >./rtt.dat + ./gnuplot.script + mv -v rtt.png benchmark-user.png + @echo 'Running benchmark with XDP..' + $(MAKE) detach + $(MAKE) run + ./user_client >./rtt_tmp.dat + cp ./rtt_tmp.dat ./rtt.dat + ./gnuplot.script + mv -v rtt.png benchmark-xdp-with-burst.png + # use only the last 90 data points + tail -n90 ./rtt_tmp.dat >./rtt.dat + ./gnuplot.script + mv -v rtt.png benchmark-xdp.png + $(MAKE) detach + +xdp_udp_handler.o: xdp_udp_handler.c xdp_parser.h xdp_checksum.h + $(CC) -Wall -Wextra -O3 -g -target bpf -D__TARGET_ARCH_x86 $(BPF_CFLAGS) -c xdp_udp_handler.c -o xdp_udp_handler.o + +xdp_loader: xdp_loader.c + $(CC) -Wall -Wextra -O3 -g $(CFLAGS) xdp_loader.c -o xdp_loader $(shell pkg-config --cflags --libs libbpf) $(LDFLAGS) + +user_server: user_server.c + $(CC) -Wall -Wextra -O3 -g $(CFLAGS) user_server.c -o user_server + +user_client: user_client.c + $(CC) -Wall -Wextra -O3 -g $(CFLAGS) user_client.c -o user_client diff --git a/README.md b/README.md new file mode 100644 index 0000000..1f013d6 --- /dev/null +++ b/README.md @@ -0,0 +1,13 @@ +Testing the performance of ring0 for packet decoding and manipulation. + +Just run: `make benchmark`, that's it! + +XDP (ring0): + + + + +Classic (ring3): + + + diff --git a/benchmark-user-with-burst.png b/benchmark-user-with-burst.png Binary files differnew file mode 100644 index 0000000..f0d669c --- /dev/null +++ b/benchmark-user-with-burst.png diff --git a/benchmark-user.png b/benchmark-user.png Binary files differnew file mode 100644 index 0000000..bf544b9 --- /dev/null +++ b/benchmark-user.png diff --git a/benchmark-xdp-with-burst.png b/benchmark-xdp-with-burst.png Binary files differnew file mode 100644 index 0000000..c5f0bcb --- /dev/null +++ b/benchmark-xdp-with-burst.png diff --git a/benchmark-xdp.png b/benchmark-xdp.png Binary files differnew file mode 100644 index 0000000..1226523 --- /dev/null +++ b/benchmark-xdp.png diff --git a/common.h b/common.h new file mode 100644 index 0000000..da4cb92 --- /dev/null +++ b/common.h @@ -0,0 +1,5 @@ +#define UDP_PORT 50000 +#define MAX_UDP_PACKETS 100 + +#define PAYLOAD_REQUEST 0x01, 0x08, 0x00, 0x00, 0x05, 0x01, 0x01, 0x01, 0x05, 0x22, 0x01 +#define PAYLOAD_RESPONSE 0x01, 0x04, 0x00, 0x00, 0x05, 0x01, 0x01, 0x02 diff --git a/gnuplot.script b/gnuplot.script new file mode 100755 index 0000000..a75243a --- /dev/null +++ b/gnuplot.script @@ -0,0 +1,12 @@ +#!/usr/bin/env gnuplot + +set terminal png +set output "rtt.png" +f(x) = mean_rtt +fit f(x) 'rtt.dat' u 1:2 via mean_rtt +stddev_rtt = sqrt(FIT_WSSR / (FIT_NDF + 1 )) +set title "Packets vs. RTT" +set xlabel "Packet Number" +set ylabel "RTT in usec" + +plot "rtt.dat" with lines, mean_rtt w l lt 3, stddev_rtt w l lt 3 lc rgb '#bbbbdd' diff --git a/user_client.c b/user_client.c new file mode 100644 index 0000000..68c79af --- /dev/null +++ b/user_client.c @@ -0,0 +1,85 @@ +// udp client driver program +#include <arpa/inet.h> +#include <netinet/in.h> +#include <stdio.h> +#include <stdlib.h> +#include <strings.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <unistd.h> + +#include "common.h" + +static const unsigned char payload_request[] = { PAYLOAD_REQUEST, 0xFF}; + +struct { + unsigned long long int send_start; + unsigned long long int received_end; +} timings[MAX_UDP_PACKETS]; + +static int set_ts(unsigned long long int * const out) { + struct timeval tv = {}; + + if (gettimeofday(&tv, NULL) != 0) + return 1; + + *out = (tv.tv_sec * 1000 * 1000) + tv.tv_usec; + return 0; +} + +int main() { + char buffer[BUFSIZ]; + int sockfd; + struct sockaddr_in servaddr = {}; + + servaddr.sin_addr.s_addr = inet_addr("127.0.0.1"); + servaddr.sin_port = htons(UDP_PORT); + servaddr.sin_family = AF_INET; + sockfd = socket(AF_INET, SOCK_DGRAM, 0); + if (sockfd < 0) { + perror("udp socket"); + return 1; + } + + if (connect(sockfd, (struct sockaddr *)&servaddr, sizeof(servaddr)) < 0) { + perror("udp connect"); + return 1; + } + + size_t i; + ssize_t n; + for (i = 0; i < MAX_UDP_PACKETS; ++i) { + if (set_ts(&timings[i].send_start) != 0) { + n = -1; + break; + } + + n = sendto(sockfd, payload_request, sizeof(payload_request), 0, NULL, 0); + if (n < 0) { + perror("udp sendto"); + break; + } + + n = recvfrom(sockfd, buffer, sizeof(buffer), 0, (struct sockaddr *)NULL, + NULL); + if (n < 0) { + perror("udp recvfrom"); + break; + } + + if (set_ts(&timings[i].received_end) != 0) { + n = -1; + break; + } + } + + close(sockfd); + + printf("# packet_number rtt\n"); + for (size_t i = 0; i < MAX_UDP_PACKETS; ++i) { + printf("%zu %lld\n", i, timings[i].received_end - timings[i].send_start); + } + + return !(i == MAX_UDP_PACKETS); +} diff --git a/user_server.c b/user_server.c new file mode 100644 index 0000000..de2054f --- /dev/null +++ b/user_server.c @@ -0,0 +1,52 @@ +#include <arpa/inet.h> +#include <netinet/in.h> +#include <stdio.h> +#include <strings.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> + +#include "common.h" + +static const unsigned char payload_response[] = {PAYLOAD_RESPONSE}; + +int main() { + unsigned char buffer[BUFSIZ]; + int listenfd; + socklen_t len; + struct sockaddr_in servaddr = {}; + struct sockaddr_in cliaddr; + + listenfd = socket(AF_INET, SOCK_DGRAM, 0); + servaddr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + servaddr.sin_port = htons(UDP_PORT); + servaddr.sin_family = AF_INET; + + if (bind(listenfd, (struct sockaddr *)&servaddr, sizeof(servaddr)) != 0) { + perror("udp bind"); + return 1; + } + + size_t i; + ssize_t n; + for (i = 0; i < MAX_UDP_PACKETS; ++i) { + len = sizeof(cliaddr); + n = recvfrom(listenfd, buffer, sizeof(buffer), 0, + (struct sockaddr *)&cliaddr, &len); + if (n < 0) { + perror("udp recvfrom"); + break; + } + + n = sendto(listenfd, payload_response, sizeof(payload_response), 0, + (struct sockaddr *)&cliaddr, sizeof(cliaddr)); + if (n < 0) { + perror("udp sendto"); + break; + } + } + + close(listenfd); + + return !(i == MAX_UDP_PACKETS); +} diff --git a/xdp_checksum.h b/xdp_checksum.h new file mode 100644 index 0000000..1d35565 --- /dev/null +++ b/xdp_checksum.h @@ -0,0 +1,94 @@ +#ifndef XDP_CHECKSUM_H +#define XDP_CHECKSUM_H 1 + +#include <linux/ip.h> +#include <linux/udp.h> +#include <stdlib.h> + +static __always_inline __u16 ip_checksum(struct iphdr *iph) { + __u32 sum = 0; + __u16 *ptr = (__u16 *)iph; + +// IP header is iph->ihl * 4 bytes in size --> iph->ihl * 2 16 bit works +#pragma unroll + for (int i = 0; i < 10; i++) { // max 20 bytes (5 * 4) + if (i >= iph->ihl * 2) + break; + if (i == 5) + continue; // checksum field at offset 10–11, skip for calc + sum += (__u32)ptr[i]; + } + + while (sum >> 16) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + +static __always_inline __u16 csum_fold_helper(__u32 sum) { + while (sum >> 16) + sum = (sum & 0xffff) + (sum >> 16); + return ~sum; +} + +static __always_inline __u16 csum_add(__u32 sum, __u32 value) { + sum += value; + return sum; +} + +static __always_inline __u32 csum16_add(__u32 sum, __u16 val) { + sum += val; + if (sum > 0xffff) + sum -= 0xffff; + return sum; +} + +static __always_inline __u16 udp_checksum(struct iphdr *iph, + struct udphdr *udph, void *data_end) { + __u32 sum = 0; + + // IP header source/dest address + layer4 protocol + sum = csum16_add(sum, (__u16)(iph->saddr >> 16)); + sum = csum16_add(sum, (__u16)(iph->saddr & 0xffff)); + sum = csum16_add(sum, (__u16)(iph->daddr >> 16)); + sum = csum16_add(sum, (__u16)(iph->daddr & 0xffff)); + sum = csum16_add(sum, __constant_htons(IPPROTO_UDP)); + // UDP header length + sum = csum16_add(sum, udph->len); + // UDP header checksum + __u16 *ptr = (__u16 *)udph; +#pragma unroll + for (size_t i = 0; i < sizeof(struct udphdr) / 2; i++) { + if ((void *)(ptr + 1) > data_end) + break; + sum = csum16_add(sum, *ptr); + ptr++; + } + + // UDP payload checksum + void *payload = (void *)udph + sizeof(struct udphdr); + int payload_len = __constant_ntohs(udph->len) - sizeof(struct udphdr); + ptr = (__u16 *)payload; + + for (int i = 0; i < 256; i++) { // max 512 bytes + if (payload_len <= 0) + break; + if ((void *)(ptr + 1) > data_end) + break; + sum = csum16_add(sum, *ptr); + ptr++; + payload_len -= 2; + } + + // if payload length off: pad wiuth null + if (payload_len == 1) { + __u8 *last = (__u8 *)ptr; + if ((void *)(last + 1) <= data_end) { + sum = csum16_add(sum, (*last) << 8); + } + } + + return csum_fold_helper(sum); +} + +#endif diff --git a/xdp_loader.c b/xdp_loader.c new file mode 100644 index 0000000..6f620b8 --- /dev/null +++ b/xdp_loader.c @@ -0,0 +1,70 @@ +#include <arpa/inet.h> +#include <errno.h> +#include <net/if.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <bpf/bpf.h> +#include <bpf/libbpf.h> +#include <linux/if_link.h> + +int main(int argc, char **argv) { + if (argc != 2) { + fprintf(stderr, "Usage: %s <iface>\n", argv[0]); + return 1; + } + + const char *const iface = argv[1]; + + int ifindex = if_nametoindex(iface); + if (!ifindex) { + perror("if_nametoindex"); + return 1; + } + + struct bpf_object *obj; + int prog_fd; + const char *const xdp_file = "xdp_udp_handler.o"; + + obj = bpf_object__open_file(xdp_file, NULL); + if (!obj) { + fprintf(stderr, "Error opening BPF object file: `%s'\n", xdp_file); + return 1; + } + + if (bpf_object__load(obj)) { + fprintf(stderr, "Error loading BPF object\n"); + return 1; + } + + struct bpf_program *const prog = + bpf_object__find_program_by_name(obj, "xdp_udp_handler"); + if (!prog) { + fprintf(stderr, "Couldn't find program\n"); + return 1; + } + + prog_fd = bpf_program__fd(prog); + if (prog_fd < 0) { + fprintf(stderr, "Invalid program FD\n"); + return 1; + } + + printf("Attaching BPF program in driver (native) mode..\n"); + if (bpf_xdp_attach(ifindex, prog_fd, + XDP_FLAGS_UPDATE_IF_NOEXIST | XDP_FLAGS_DRV_MODE, + NULL) < 0) { + perror("bpf_xdp_attach"); + printf("Attaching BPF program in kernel mode..\n"); + if (bpf_xdp_attach(ifindex, prog_fd, XDP_FLAGS_UPDATE_IF_NOEXIST, NULL) < + 0) { + perror("bpf_xdp_attach"); + return 1; + } + } + + printf("XDP program loaded\n"); + return 0; +} diff --git a/xdp_manip.h b/xdp_manip.h new file mode 100644 index 0000000..70eba28 --- /dev/null +++ b/xdp_manip.h @@ -0,0 +1,28 @@ +#ifndef XDP_MANIP_H +#define XDP_MANIP_H 1 + +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/udp.h> + +static __always_inline void swap_eth_addr(struct ethhdr *const ethhdr) { + unsigned char src_eth[ETH_ALEN]; + + __builtin_memcpy(src_eth, ethhdr->h_source, sizeof(ethhdr->h_source)); + __builtin_memcpy(ethhdr->h_source, ethhdr->h_dest, sizeof(ethhdr->h_source)); + __builtin_memcpy(ethhdr->h_dest, src_eth, sizeof(src_eth)); +} + +static __always_inline void swap_ip4_addr(struct iphdr *const iphdr) { + __be32 src_ip = iphdr->saddr; + iphdr->saddr = iphdr->daddr; + iphdr->daddr = src_ip; +} + +static __always_inline void swap_udp_port(struct udphdr *const udphdr) { + __be16 src_port = udphdr->source; + udphdr->source = udphdr->dest; + udphdr->dest = src_port; +} + +#endif diff --git a/xdp_parser.h b/xdp_parser.h new file mode 100644 index 0000000..a227204 --- /dev/null +++ b/xdp_parser.h @@ -0,0 +1,132 @@ +#ifndef XDP_PARSER_H +#define XDP_PARSER_H 1 + +#include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/ipv6.h> +#include <linux/udp.h> +#include <stdlib.h> + +#ifndef VLAN_MAX_DEPTH +#define VLAN_MAX_DEPTH 2 +#endif + +#define VLAN_VID_MASK 0x0fff /* VLAN Identifier */ + +struct hdr_cursor { + union { + void *pos; + __u8 *pos_u8; + }; +}; + +struct vlan_hdr { + __be16 h_vlan_TCI; + __be16 h_vlan_encapsulated_proto; +}; + +struct collect_vlans { + __u16 id[VLAN_MAX_DEPTH]; +}; + +static __always_inline int proto_is_vlan(__u16 h_proto) { + return !!(h_proto == __constant_htons(ETH_P_8021Q) || + h_proto == __constant_htons(ETH_P_8021AD)); +} + +static __always_inline int parse_ethhdr_vlan(struct hdr_cursor *nh, + void *data_end, + struct ethhdr **ethhdr, + struct collect_vlans *vlans) { + struct ethhdr *eth = nh->pos; + int hdrsize = sizeof(*eth); + struct vlan_hdr *vlh; + __u16 h_proto; + int i; + + if (nh->pos + hdrsize > data_end) + return -1; + + nh->pos += hdrsize; + *ethhdr = eth; + vlh = nh->pos; + h_proto = eth->h_proto; + +#pragma unroll + for (i = 0; i < VLAN_MAX_DEPTH; i++) { + if (!proto_is_vlan(h_proto)) + break; + + if (vlh + 1 > (struct vlan_hdr *)data_end) + break; + + h_proto = vlh->h_vlan_encapsulated_proto; + if (vlans) /* collect VLAN ids */ + vlans->id[i] = (__constant_ntohs(vlh->h_vlan_TCI) & VLAN_VID_MASK); + + vlh++; + } + + nh->pos = vlh; + return h_proto; /* network-byte-order */ +} + +static __always_inline int parse_ethhdr(struct hdr_cursor *nh, void *data_end, + struct ethhdr **ethhdr) { + return parse_ethhdr_vlan(nh, data_end, ethhdr, NULL); +} + +static __always_inline int parse_ip6hdr(struct hdr_cursor *nh, void *data_end, + struct ipv6hdr **ip6hdr) { + struct ipv6hdr *ip6h = nh->pos; + + if (ip6h + 1 > (struct ipv6hdr *)data_end) + return -1; + + nh->pos = ip6h + 1; + *ip6hdr = ip6h; + + return ip6h->nexthdr; +} + +static __always_inline int parse_iphdr(struct hdr_cursor *nh, void *data_end, + struct iphdr **iphdr) { + struct iphdr *iph = (struct iphdr *)nh->pos; + size_t hdrsize; + + if (iph + 1 > (struct iphdr *)data_end) + return -1; + + hdrsize = iph->ihl * 4; + if (hdrsize < sizeof(*iph)) + return -1; + + /* Variable-length IPv4 header, need to use byte-based arithmetic */ + if (nh->pos + hdrsize > data_end) + return -1; + + nh->pos += hdrsize; + *iphdr = iph; + + return iph->protocol; +} + +static __always_inline int parse_udphdr(struct hdr_cursor *nh, void *data_end, + struct udphdr **udphdr) { + int len; + struct udphdr *h = nh->pos; + + if (h + 1 > (struct udphdr *)data_end) + return -1; + + nh->pos = h + 1; + *udphdr = h; + + len = __constant_ntohs(h->len) - sizeof(struct udphdr); + if (len < 0) + return -1; + + return len; +} + +#endif diff --git a/xdp_udp_handler.c b/xdp_udp_handler.c new file mode 100644 index 0000000..930667f --- /dev/null +++ b/xdp_udp_handler.c @@ -0,0 +1,127 @@ +#include <linux/bpf.h> +#include <linux/in.h> + +#include <bpf/bpf_helpers.h> + +#include "xdp_checksum.h" +#include "xdp_manip.h" +#include "xdp_parser.h" + +#ifdef XDP_DEBUG +#define dbg_bpf_printk(fmt, args...) bpf_printk(fmt, ##args) +#else +#define dbg_bpf_printk(fmt, args...) +#endif + +#include "common.h" + +SEC("xdp") +int xdp_udp_handler(struct xdp_md *ctx) { + void *data_end = (void *)(long)ctx->data_end; + void *data = (void *)(long)ctx->data; + __u32 action = XDP_PASS; /* Default action */ + struct hdr_cursor nh = {.pos = data}; + int nh_type; + struct ethhdr *ethhdr; + struct iphdr *iphdr = NULL; + struct ipv6hdr *ip6hdr = NULL; + struct udphdr *udphdr; + static const __u8 payload_request[] = {PAYLOAD_REQUEST}; + static const __u32 payload_request_size = sizeof(payload_request); + static const __u8 payload_response[] = {PAYLOAD_RESPONSE}; + __u32 payload_size; + + nh_type = parse_ethhdr(&nh, data_end, ðhdr); + + switch (__constant_ntohs(nh_type)) { + case ETH_P_IP: + nh_type = parse_iphdr(&nh, data_end, &iphdr); + break; + case ETH_P_IPV6: + nh_type = parse_ip6hdr(&nh, data_end, &ip6hdr); + break; + default: + goto out; + } + + if (nh_type != IPPROTO_UDP) { + goto out; + } + if (parse_udphdr(&nh, data_end, &udphdr) < 0) { + action = XDP_ABORTED; + goto out; + } + if (udphdr->dest != __constant_htons(UDP_PORT)) { + goto out; + } + + payload_size = (__u8 *)data_end - (__u8 *)nh.pos; + if (payload_size < payload_request_size) { + goto out; + } + + if (nh.pos_u8 + payload_request_size + 1 > (__u8 *)data_end) { + goto out; + } + if (__builtin_memcmp(nh.pos_u8, payload_request, sizeof(payload_request)) != + 0) { + goto out; + } + + dbg_bpf_printk("Received UDP Ping Request #%u with size %u", nh.pos_u8[11], + payload_size); + + /* Shrink UDP packet for Ping Reply */ + if (bpf_xdp_adjust_tail(ctx, payload_request_size - payload_size) != 0) { + return XDP_ABORTED; + } + + /* Set data pointers (may not be valid anymore) */ + data = (void *)(long)ctx->data; + data_end = (void *)(long)ctx->data_end; + nh.pos = data; + + if (parse_ethhdr(&nh, data_end, ðhdr) < 0) { + action = XDP_ABORTED; + goto out; + } + + payload_size -= 4; + + if (iphdr != NULL) { + if (parse_iphdr(&nh, data_end, &iphdr) < 0) { + action = XDP_ABORTED; + goto out; + } + if (parse_udphdr(&nh, data_end, &udphdr) < 0) { + action = XDP_ABORTED; + goto out; + } + swap_eth_addr(ethhdr); + udphdr->len = __constant_htons(payload_size + sizeof(*udphdr)); + iphdr->tot_len = + __constant_htons(iphdr->ihl * 4 + payload_size + sizeof(*udphdr)); + swap_ip4_addr(iphdr); + iphdr->check = ip_checksum(iphdr); + if (nh.pos_u8 + sizeof(payload_response) > (__u8 *)data_end) { + action = XDP_ABORTED; + goto out; + } + __builtin_memcpy(nh.pos_u8, payload_response, sizeof(payload_response)); + swap_udp_port(udphdr); + udphdr->check = udp_checksum(iphdr, udphdr, data_end); + } else if (ip6hdr != NULL) { + parse_ip6hdr(&nh, data_end, &ip6hdr); + goto out; // TODO: Implement! + } else { + action = XDP_ABORTED; + goto out; + } + + action = XDP_TX; +out: + return action; +} + +char _license[] SEC("license") = + "GPL"; // Please keep this to prevent tainting kernel |