1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
|
From: Richard Gobert <richardbgobert@gmail.com>
Date: Tue, 30 Apr 2024 16:35:54 +0200
Subject: [PATCH] net: gro: fix udp bad offset in socket lookup by adding
{inner_}network_offset to napi_gro_cb
Commits a602456 ("udp: Add GRO functions to UDP socket") and 57c67ff ("udp:
additional GRO support") introduce incorrect usage of {ip,ipv6}_hdr in the
complete phase of gro. The functions always return skb->network_header,
which in the case of encapsulated packets at the gro complete phase, is
always set to the innermost L3 of the packet. That means that calling
{ip,ipv6}_hdr for skbs which completed the GRO receive phase (both in
gro_list and *_gro_complete) when parsing an encapsulated packet's _outer_
L3/L4 may return an unexpected value.
This incorrect usage leads to a bug in GRO's UDP socket lookup.
udp{4,6}_lib_lookup_skb functions use ip_hdr/ipv6_hdr respectively. These
*_hdr functions return network_header which will point to the innermost L3,
resulting in the wrong offset being used in __udp{4,6}_lib_lookup with
encapsulated packets.
This patch adds network_offset and inner_network_offset to napi_gro_cb, and
makes sure both are set correctly.
To fix the issue, network_offsets union is used inside napi_gro_cb, in
which both the outer and the inner network offsets are saved.
Reproduction example:
Endpoint configuration example (fou + local address bind)
# ip fou add port 6666 ipproto 4
# ip link add name tun1 type ipip remote 2.2.2.1 local 2.2.2.2 encap fou encap-dport 5555 encap-sport 6666 mode ipip
# ip link set tun1 up
# ip a add 1.1.1.2/24 dev tun1
Netperf TCP_STREAM result on net-next before patch is applied:
net-next main, GRO enabled:
$ netperf -H 1.1.1.2 -t TCP_STREAM -l 5
Recv Send Send
Socket Socket Message Elapsed
Size Size Size Time Throughput
bytes bytes bytes secs. 10^6bits/sec
131072 16384 16384 5.28 2.37
net-next main, GRO disabled:
$ netperf -H 1.1.1.2 -t TCP_STREAM -l 5
Recv Send Send
Socket Socket Message Elapsed
Size Size Size Time Throughput
bytes bytes bytes secs. 10^6bits/sec
131072 16384 16384 5.01 2745.06
patch applied, GRO enabled:
$ netperf -H 1.1.1.2 -t TCP_STREAM -l 5
Recv Send Send
Socket Socket Message Elapsed
Size Size Size Time Throughput
bytes bytes bytes secs. 10^6bits/sec
131072 16384 16384 5.01 2877.38
Fixes: a6024562ffd7 ("udp: Add GRO functions to UDP socket")
Signed-off-by: Richard Gobert <richardbgobert@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Reviewed-by: Willem de Bruijn <willemb@google.com>
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
---
--- a/include/net/gro.h
+++ b/include/net/gro.h
@@ -86,6 +86,15 @@ struct napi_gro_cb {
/* used to support CHECKSUM_COMPLETE for tunneling protocols */
__wsum csum;
+
+ /* L3 offsets */
+ union {
+ struct {
+ u16 network_offset;
+ u16 inner_network_offset;
+ };
+ u16 network_offsets[2];
+ };
};
#define NAPI_GRO_CB(skb) ((struct napi_gro_cb *)(skb)->cb)
--- a/net/8021q/vlan_core.c
+++ b/net/8021q/vlan_core.c
@@ -478,6 +478,8 @@ static struct sk_buff *vlan_gro_receive(
if (unlikely(!vhdr))
goto out;
+ NAPI_GRO_CB(skb)->network_offsets[NAPI_GRO_CB(skb)->encap_mark] = hlen;
+
type = vhdr->h_vlan_encapsulated_proto;
ptype = gro_find_receive_by_type(type);
--- a/net/core/gro.c
+++ b/net/core/gro.c
@@ -373,6 +373,7 @@ static inline void skb_gro_reset_offset(
const struct skb_shared_info *pinfo = skb_shinfo(skb);
const skb_frag_t *frag0 = &pinfo->frags[0];
+ NAPI_GRO_CB(skb)->network_offset = 0;
NAPI_GRO_CB(skb)->data_offset = 0;
NAPI_GRO_CB(skb)->frag0 = NULL;
NAPI_GRO_CB(skb)->frag0_len = 0;
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1571,6 +1571,7 @@ struct sk_buff *inet_gro_receive(struct
/* The above will be needed by the transport layer if there is one
* immediately following this IP hdr.
*/
+ NAPI_GRO_CB(skb)->inner_network_offset = off;
/* Note : No need to call skb_gro_postpull_rcsum() here,
* as we already checked checksum over ipv4 header was 0
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -534,7 +534,8 @@ static inline struct sock *__udp4_lib_lo
struct sock *udp4_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
struct net *net = dev_net(skb->dev);
int iif, sdif;
--- a/net/ipv4/udp_offload.c
+++ b/net/ipv4/udp_offload.c
@@ -718,7 +718,8 @@ EXPORT_SYMBOL(udp_gro_complete);
INDIRECT_CALLABLE_SCOPE int udp4_gro_complete(struct sk_buff *skb, int nhoff)
{
- const struct iphdr *iph = ip_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct iphdr *iph = (struct iphdr *)(skb->data + offset);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
/* do fraglist only if there is no outer UDP encap (or we already processed it) */
--- a/net/ipv6/ip6_offload.c
+++ b/net/ipv6/ip6_offload.c
@@ -240,6 +240,7 @@ INDIRECT_CALLABLE_SCOPE struct sk_buff *
goto out;
skb_set_network_header(skb, off);
+ NAPI_GRO_CB(skb)->inner_network_offset = off;
flush += ntohs(iph->payload_len) != skb->len - hlen;
--- a/net/ipv6/udp.c
+++ b/net/ipv6/udp.c
@@ -275,7 +275,8 @@ static struct sock *__udp6_lib_lookup_sk
struct sock *udp6_lib_lookup_skb(const struct sk_buff *skb,
__be16 sport, __be16 dport)
{
- const struct ipv6hdr *iph = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *iph = (struct ipv6hdr *)(skb->data + offset);
struct net *net = dev_net(skb->dev);
int iif, sdif;
--- a/net/ipv6/udp_offload.c
+++ b/net/ipv6/udp_offload.c
@@ -164,7 +164,8 @@ flush:
INDIRECT_CALLABLE_SCOPE int udp6_gro_complete(struct sk_buff *skb, int nhoff)
{
- const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+ const u16 offset = NAPI_GRO_CB(skb)->network_offsets[skb->encapsulation];
+ const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + offset);
struct udphdr *uh = (struct udphdr *)(skb->data + nhoff);
/* do fraglist only if there is no outer UDP encap (or we already processed it) */
|