forked from cilium/cilium
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathfib.h
346 lines (315 loc) · 10.2 KB
/
fib.h
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
/* SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) */
/* Copyright Authors of Cilium */
#ifndef __LIB_FIB_H_
#define __LIB_FIB_H_
#include <bpf/ctx/ctx.h>
#include <bpf/api.h>
#include "common.h"
#include "neigh.h"
#include "l3.h"
static __always_inline int
maybe_add_l2_hdr(struct __ctx_buff *ctx __maybe_unused,
__u32 ifindex __maybe_unused,
bool *l2_hdr_required __maybe_unused)
{
if (IS_L3_DEV(ifindex)) {
/* The packet is going to be redirected to L3 dev, so
* skip L2 addr settings.
*/
*l2_hdr_required = false;
} else if (ETH_HLEN == 0) {
/* The packet is going to be redirected from L3 to L2
* device, so we need to create L2 header first.
*/
__u16 proto = ctx_get_protocol(ctx);
if (ctx_change_head(ctx, __ETH_HLEN, 0))
return DROP_INVALID;
if (eth_store_proto(ctx, proto, 0) < 0)
return DROP_WRITE_ERROR;
}
return 0;
}
static __always_inline bool fib_ok(int ret)
{
return likely(ret == CTX_ACT_TX || ret == CTX_ACT_REDIRECT);
}
/* fib_do_redirect will redirect the ctx to a particular output interface.
*
* the redirect can occur with or without a previous call to fib_lookup.
*
* if a previous fib_lookup was performed, this function will attempt to redirect
* to the output interface in the provided 'fib_params', as long as 'fib_ret'
* is set to 'BPF_FIB_LKUP_RET_SUCCESS'
*
* if a previous fib_lookup was performed and the return was 'BPF_FIB_LKUP_NO_NEIGH'
* this function will then attempt to copy the af_family and destination address
* out of 'fib_params' and into 'redir_neigh' struct then perform a
* 'redirect_neigh'.
*
* if no previous fib_lookup was performed, and the desire is to simply use
* 'redirect_neigh' then set 'fib_params' to nil and 'fib_ret' to
* 'BPF_FIB_LKUP_RET_NO_NEIGH'.
* in this case, the 'oif' value will be used for the 'redirect_neigh' call.
*
* in a special case, if a previous fib_lookup was performed, and the return
* was 'BPF_FIB_LKUP_RET_NO_NEIGH', and we are on a kernel version where
* the target interface for the fib lookup is not returned
* (due to ARP failing, see Kernel commit d1c362e1dd68) the provided 'oif'
* will be used as output interface for redirect.
*/
static __always_inline int
fib_do_redirect(struct __ctx_buff *ctx, const bool needs_l2_check,
const struct bpf_fib_lookup_padded *fib_params, __s8 *fib_ret,
int *oif)
{
struct bpf_redir_neigh nh_params;
struct bpf_redir_neigh *nh = NULL;
union macaddr *dmac = 0;
int ret;
/* sanity check, we only enter this function with these two fib lookup
* return codes.
*/
if (*fib_ret && (*fib_ret != BPF_FIB_LKUP_RET_NO_NEIGH))
return DROP_NO_FIB;
/* determine which oif to use before needs_l2_check determines if layer 2
* header needs to be pushed.
*/
if (fib_params) {
if (*fib_ret == BPF_FIB_LKUP_RET_NO_NEIGH &&
!is_defined(HAVE_FIB_IFINDEX) && *oif) {
/* For kernels without d1c362e1dd68 ("bpf: Always
* return target ifindex in bpf_fib_lookup") we
* fall back to use the caller-provided oif when
* necessary.
* no-op
*/
} else {
*oif = fib_params->l.ifindex;
}
}
/* determine if we need to append layer 2 header */
if (needs_l2_check) {
bool l2_hdr_required = true;
ret = maybe_add_l2_hdr(ctx, *oif, &l2_hdr_required);
if (ret != 0)
return ret;
if (!l2_hdr_required)
goto out_send;
}
/* determine if we are performing redirect or redirect_neigh*/
switch (*fib_ret) {
case BPF_FIB_LKUP_RET_SUCCESS:
if (eth_store_daddr(ctx, fib_params->l.dmac, 0) < 0)
return DROP_WRITE_ERROR;
if (eth_store_saddr(ctx, fib_params->l.smac, 0) < 0)
return DROP_WRITE_ERROR;
break;
case BPF_FIB_LKUP_RET_NO_NEIGH:
/* previous fib lookup was performed, we can fillout both
* a bpf_redir_neigh and a dmac.
*
* the former is used if we have access to redirect_neigh
* the latter is used if we don't and have to use the eBPF
* neighbor map.
*/
if (fib_params) {
nh_params.nh_family = fib_params->l.family;
__bpf_memcpy_builtin(&nh_params.ipv6_nh,
&fib_params->l.ipv6_dst,
sizeof(nh_params.ipv6_nh));
nh = &nh_params;
if (!neigh_resolver_available()) {
/* The neigh_record_ip{4,6} locations are mainly from
* inbound client traffic on the load-balancer where we
* know that replies need to go back to them.
*/
dmac = fib_params->l.family == AF_INET ?
neigh_lookup_ip4(&fib_params->l.ipv4_dst) :
neigh_lookup_ip6((void *)&fib_params->l.ipv6_dst);
}
}
/* If we are able to resolve neighbors on demand, always
* prefer that over the BPF neighbor map since the latter
* might be less accurate in some asymmetric corner cases.
*/
if (neigh_resolver_available()) {
if (nh)
return redirect_neigh(*oif, &nh_params,
sizeof(nh_params), 0);
else
return redirect_neigh(*oif, NULL, 0, 0);
} else {
union macaddr smac = NATIVE_DEV_MAC_BY_IFINDEX(*oif);
if (!dmac) {
*fib_ret = BPF_FIB_MAP_NO_NEIGH;
return DROP_NO_FIB;
}
if (eth_store_daddr_aligned(ctx, dmac->addr, 0) < 0)
return DROP_WRITE_ERROR;
if (eth_store_saddr_aligned(ctx, smac.addr, 0) < 0)
return DROP_WRITE_ERROR;
}
};
out_send:
return ctx_redirect(ctx, *oif, 0);
}
static __always_inline int
fib_redirect(struct __ctx_buff *ctx, const bool needs_l2_check,
struct bpf_fib_lookup_padded *fib_params, __s8 *fib_err, int *oif)
{
struct bpf_redir_neigh nh_params;
struct bpf_redir_neigh *nh = NULL;
bool no_neigh = is_defined(ENABLE_SKIP_FIB);
int ret;
#ifndef ENABLE_SKIP_FIB
ret = fib_lookup(ctx, &fib_params->l, sizeof(fib_params->l), 0);
if (ret != BPF_FIB_LKUP_RET_SUCCESS) {
*fib_err = (__s8)ret;
if (likely(ret == BPF_FIB_LKUP_RET_NO_NEIGH)) {
nh_params.nh_family = fib_params->l.family;
__bpf_memcpy_builtin(&nh_params.ipv6_nh,
&fib_params->l.ipv6_dst,
sizeof(nh_params.ipv6_nh));
nh = &nh_params;
no_neigh = true;
/* For kernels without d1c362e1dd68 ("bpf: Always
* return target ifindex in bpf_fib_lookup") we
* fall back to use the caller-provided oif when
* necessary.
*/
if (!is_defined(HAVE_FIB_IFINDEX) && *oif > 0)
goto skip_oif;
} else {
return DROP_NO_FIB;
}
}
*oif = fib_params->l.ifindex;
skip_oif:
#else
*oif = DIRECT_ROUTING_DEV_IFINDEX;
#endif /* ENABLE_SKIP_FIB */
if (needs_l2_check) {
bool l2_hdr_required = true;
ret = maybe_add_l2_hdr(ctx, *oif, &l2_hdr_required);
if (ret != 0)
return ret;
if (!l2_hdr_required)
goto out_send;
}
if (no_neigh) {
/* If we are able to resolve neighbors on demand, always
* prefer that over the BPF neighbor map since the latter
* might be less accurate in some asymmetric corner cases.
*/
if (neigh_resolver_available()) {
if (nh)
return redirect_neigh(*oif, &nh_params,
sizeof(nh_params), 0);
else
return redirect_neigh(*oif, NULL, 0, 0);
} else {
union macaddr *dmac, smac = NATIVE_DEV_MAC_BY_IFINDEX(*oif);
/* The neigh_record_ip{4,6} locations are mainly from
* inbound client traffic on the load-balancer where we
* know that replies need to go back to them.
*/
dmac = fib_params->l.family == AF_INET ?
neigh_lookup_ip4(&fib_params->l.ipv4_dst) :
neigh_lookup_ip6((void *)&fib_params->l.ipv6_dst);
if (!dmac) {
*fib_err = BPF_FIB_MAP_NO_NEIGH;
return DROP_NO_FIB;
}
if (eth_store_daddr_aligned(ctx, dmac->addr, 0) < 0)
return DROP_WRITE_ERROR;
if (eth_store_saddr_aligned(ctx, smac.addr, 0) < 0)
return DROP_WRITE_ERROR;
}
} else {
if (eth_store_daddr(ctx, fib_params->l.dmac, 0) < 0)
return DROP_WRITE_ERROR;
if (eth_store_saddr(ctx, fib_params->l.smac, 0) < 0)
return DROP_WRITE_ERROR;
}
out_send:
return ctx_redirect(ctx, *oif, 0);
}
#ifdef ENABLE_IPV6
/* fib_lookup_v6 will perform a fib lookup with the src and dest addresses
* provided.
*
* after the function returns 'fib_params' will have the results of the fib lookup
* if successful.
*/
static __always_inline int
fib_lookup_v6(struct __ctx_buff *ctx, struct bpf_fib_lookup_padded *fib_params,
const struct in6_addr *ipv6_src, const struct in6_addr *ipv6_dst,
int flags)
{
fib_params->l.family = AF_INET6;
fib_params->l.ifindex = ctx_get_ifindex(ctx);
ipv6_addr_copy((union v6addr *)&fib_params->l.ipv6_src,
(union v6addr *)ipv6_src);
ipv6_addr_copy((union v6addr *)&fib_params->l.ipv6_dst,
(union v6addr *)ipv6_dst);
return fib_lookup(ctx, &fib_params->l, sizeof(fib_params->l), flags);
};
static __always_inline int
fib_redirect_v6(struct __ctx_buff *ctx, int l3_off,
struct ipv6hdr *ip6, const bool needs_l2_check,
__s8 *fib_err, int iif, int *oif)
{
struct bpf_fib_lookup_padded fib_params = {
.l = {
.family = AF_INET6,
.ifindex = iif,
},
};
int ret;
ipv6_addr_copy((union v6addr *)&fib_params.l.ipv6_src,
(union v6addr *)&ip6->saddr);
ipv6_addr_copy((union v6addr *)&fib_params.l.ipv6_dst,
(union v6addr *)&ip6->daddr);
ret = ipv6_l3(ctx, l3_off, NULL, NULL, METRIC_EGRESS);
if (unlikely(ret != CTX_ACT_OK))
return ret;
return fib_redirect(ctx, needs_l2_check, &fib_params, fib_err, oif);
}
#endif /* ENABLE_IPV6 */
#ifdef ENABLE_IPV4
/* fib_lookup_v4 will perform a fib lookup with the src and dest addresses
* provided.
*
* after the function returns 'fib_params' will have the results of the fib lookup
* if successful.
*/
static __always_inline int
fib_lookup_v4(struct __ctx_buff *ctx, struct bpf_fib_lookup_padded *fib_params,
__be32 ipv4_src, __be32 ipv4_dst, int flags) {
fib_params->l.family = AF_INET;
fib_params->l.ifindex = ctx_get_ifindex(ctx);
fib_params->l.ipv4_src = ipv4_src;
fib_params->l.ipv4_dst = ipv4_dst;
return fib_lookup(ctx, &fib_params->l, sizeof(fib_params->l), flags);
}
static __always_inline int
fib_redirect_v4(struct __ctx_buff *ctx, int l3_off,
struct iphdr *ip4, const bool needs_l2_check,
__s8 *fib_err, int iif, int *oif)
{
struct bpf_fib_lookup_padded fib_params = {
.l = {
.family = AF_INET,
.ifindex = iif,
.ipv4_src = ip4->saddr,
.ipv4_dst = ip4->daddr,
},
};
int ret;
ret = ipv4_l3(ctx, l3_off, NULL, NULL, ip4);
if (unlikely(ret != CTX_ACT_OK))
return ret;
return fib_redirect(ctx, needs_l2_check, &fib_params, fib_err, oif);
}
#endif /* ENABLE_IPV4 */
#endif /* __LIB_FIB_H_ */