forked from cilium/cilium
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathinit.sh
executable file
·277 lines (240 loc) · 8.53 KB
/
init.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
#!/bin/bash
# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
# Copyright Authors of Cilium
# LIB=${1}
RUNDIR=${2}
PROCSYSNETDIR=${3}
SYSCLASSNETDIR=${4}
IP4_HOST=${5}
IP6_HOST=${6}
MODE=${7}
TUNNEL_PROTOCOL=${8}
# Only set if TUNNEL_PROTOCOL = "vxlan", "geneve"
TUNNEL_PORT=${9}
# Only set if MODE = "direct"
# NATIVE_DEVS=${10}
HOST_DEV1=${11}
HOST_DEV2=${12}
MTU=${13}
# SOCKETLB=${14}
# SOCKETLB_PEER=${15}
# CGROUP_ROOT=${16}
# BPFFS_ROOT=${17}
# NODE_PORT=${18}
# NODE_PORT_BIND=${19}
# MCPU=${20}
# NR_CPUS=${21}
ENDPOINT_ROUTES=${22}
PROXY_RULE=${23}
FILTER_PRIO=${24}
DEFAULT_RTPROTO=${25}
LOCAL_RULE_PRIO=${26}
# If the value below is changed, be sure to update bugtool/cmd/configuration.go
# as well when dumping the routing table in bugtool. See GH-5828.
PROXY_RT_TABLE=2005
TO_PROXY_RT_TABLE=2004
set -e
set -x
set -o pipefail
# Remove old legacy files
rm $RUNDIR/encap.state 2> /dev/null || true
function setup_dev()
{
local -r NAME=$1
ip link set $NAME up
if [ "$IP6_HOST" != "<nil>" ]; then
echo 1 > "${PROCSYSNETDIR}/ipv6/conf/${NAME}/forwarding"
fi
if [ "$IP4_HOST" != "<nil>" ]; then
echo 1 > "${PROCSYSNETDIR}/ipv4/conf/${NAME}/forwarding"
echo 0 > "${PROCSYSNETDIR}/ipv4/conf/${NAME}/rp_filter"
echo 1 > "${PROCSYSNETDIR}/ipv4/conf/${NAME}/accept_local"
echo 0 > "${PROCSYSNETDIR}/ipv4/conf/${NAME}/send_redirects"
fi
}
function move_local_rules_af()
{
IP=$1
# Do not move the rule if we don't support the address family
if [ -z "$($IP rule list)" ]; then
return
fi
# move the local table lookup rule from pref 0 to pref LOCAL_RULE_PRIO so we
# can insert the cilium ip rules before the local table. It is strictly
# required to add the new local rule before deleting the old one as
# otherwise local addresses will not be reachable for a short period of
# time.
$IP rule list | grep "${LOCAL_RULE_PRIO}" | grep "lookup local" || {
$IP rule add from all lookup local pref ${LOCAL_RULE_PRIO} proto $DEFAULT_RTPROTO
}
$IP rule del from all lookup local pref 0 2> /dev/null || true
# check if the move of the local table move was successful and restore
# it otherwise
if [ "$($IP rule list | grep "lookup local" | wc -l)" -eq "0" ]; then
$IP rule add from all lookup local pref 0 proto $DEFAULT_RTPROTO
$IP rule del from all lookup local pref ${LOCAL_RULE_PRIO}
echo "Error: The kernel does not support moving the local table routing rule"
echo "Local routing rules:"
$IP rule list lookup local
exit 1
fi
}
function move_local_rules()
{
if [ "$IP4_HOST" != "<nil>" ]; then
move_local_rules_af "ip -4"
fi
if [ "$IP6_HOST" != "<nil>" ]; then
move_local_rules_af "ip -6"
fi
}
function setup_proxy_rules()
{
# TODO(brb): remove $PROXY_RT_TABLE -related code in v1.15
from_ingress_rulespec="fwmark 0xA00/0xF00 pref 10 lookup $PROXY_RT_TABLE proto $DEFAULT_RTPROTO"
# Any packet to an ingress or egress proxy uses a separate routing table
# that routes the packet to the loopback device regardless of the destination
# address in the packet. For this to work the ctx must have a socket set
# (e.g., via TPROXY).
to_proxy_rulespec="fwmark 0x200/0xF00 pref 9 lookup $TO_PROXY_RT_TABLE proto $DEFAULT_RTPROTO"
if [ "$IP4_HOST" != "<nil>" ]; then
if [ -n "$(ip -4 rule list)" ]; then
if [ -z "$(ip -4 rule list $to_proxy_rulespec)" ]; then
ip -4 rule add $to_proxy_rulespec
fi
ip -4 rule delete $from_ingress_rulespec || true
fi
# Traffic to the host proxy is local
ip route replace table $TO_PROXY_RT_TABLE local 0.0.0.0/0 dev lo proto $DEFAULT_RTPROTO
# The $PROXY_RT_TABLE is no longer in use, so delete it
ip route delete table $PROXY_RT_TABLE $IP4_HOST/32 dev $HOST_DEV1 2>/dev/null || true
ip route delete table $PROXY_RT_TABLE default via $IP4_HOST 2>/dev/null || true
else
ip -4 rule del $to_proxy_rulespec 2> /dev/null || true
ip -4 rule del $from_ingress_rulespec 2> /dev/null || true
fi
if [ "$IP6_HOST" != "<nil>" ]; then
if [ -n "$(ip -6 rule list)" ]; then
if [ -z "$(ip -6 rule list $to_proxy_rulespec)" ]; then
ip -6 rule add $to_proxy_rulespec
fi
ip -6 rule delete $from_ingress_rulespec || true
fi
IP6_LLADDR=$(ip -6 addr show dev $HOST_DEV2 | grep inet6 | head -1 | awk '{print $2}' | awk -F'/' '{print $1}')
if [ -n "$IP6_LLADDR" ]; then
# Traffic to the host proxy is local
ip -6 route replace table $TO_PROXY_RT_TABLE local ::/0 dev lo proto $DEFAULT_RTPROTO
# The $PROXY_RT_TABLE is no longer in use, so delete it
ip -6 route delete table $PROXY_RT_TABLE ${IP6_LLADDR}/128 dev $HOST_DEV1 2>/dev/null || true
ip -6 route delete table $PROXY_RT_TABLE default via $IP6_LLADDR dev $HOST_DEV1 2>/dev/null || true
fi
else
ip -6 rule del $to_proxy_rulespec 2> /dev/null || true
ip -6 rule del $from_ingress_rulespec 2> /dev/null || true
fi
}
function rnd_mac_addr()
{
local lower=$(od /dev/urandom -N5 -t x1 -An | sed 's/ /:/g')
local upper=$(( 0x$(od /dev/urandom -N1 -t x1 -An | cut -d' ' -f2) & 0xfe | 0x02 ))
printf '%02x%s' $upper $lower
}
function create_encap_dev()
{
TUNNEL_OPTS="external"
if [ "${TUNNEL_PORT}" != "<nil>" ]; then
TUNNEL_OPTS="dstport $TUNNEL_PORT $TUNNEL_OPTS"
fi
ip link add name $ENCAP_DEV address $(rnd_mac_addr) type $TUNNEL_PROTOCOL $TUNNEL_OPTS || encap_fail
}
function encap_fail()
{
(>&2 echo "ERROR: Setup of encapsulation device $ENCAP_DEV has failed. Is another program using a $MODE device?")
(>&2 echo "Configured $MODE devices on the system:")
(>&2 ip link show type $MODE)
exit 1
}
if [ "$PROXY_RULE" = "true" ]; then
# Decrease priority of the rule to identify local addresses
move_local_rules
# Install new rules before local rule to ensure that packets from the proxy are
# using a separate routing table
setup_proxy_rules
fi
if [ "$MODE" = "ipip" ]; then
if [ "$IP4_HOST" != "<nil>" ]; then
ENCAP_DEV="cilium_ipip4"
ip link show $ENCAP_DEV || {
# Upon module load it will create a non-removable tunl0
# device. Instead of creating an additional useless one,
# rename tunl0 with cilium prefix in a second step. If
# we to do 'ip link add name $ENCAP_DEV [...]' it would
# create two devices. :/
ip link add name tunl0 type ipip external || true
ip link set tunl0 name $ENCAP_DEV
}
setup_dev $ENCAP_DEV || encap_fail
ENCAP_IDX=$(cat "${SYSCLASSNETDIR}/${ENCAP_DEV}/ifindex")
sed -i '/^#.*ENCAP4_IFINDEX.*$/d' $RUNDIR/globals/node_config.h
echo "#define ENCAP4_IFINDEX $ENCAP_IDX" >> $RUNDIR/globals/node_config.h
else
ip link del cilium_ipip4 2> /dev/null || true
fi
if [ "$IP6_HOST" != "<nil>" ]; then
ENCAP_DEV="cilium_ipip6"
ip link show $ENCAP_DEV || {
# For cilium_ipip6 device, we unfortunately cannot use the
# same workaround as cilium_ipip4. While the latter allows
# to set an existing tunl0 into collect_md mode, the default
# ip6tnl0 if present cannot. It's quite annoying, but if v6
# was built into the kernel, we might just need to live with
# it. Default device creation can still be worked around
# via boot param if the sysctl from agent won't do it.
ip link add name $ENCAP_DEV type ip6tnl external || true
ip link set sit0 name cilium_sit || true
}
setup_dev $ENCAP_DEV || encap_fail
ENCAP_IDX=$(cat "${SYSCLASSNETDIR}/${ENCAP_DEV}/ifindex")
sed -i '/^#.*ENCAP6_IFINDEX.*$/d' $RUNDIR/globals/node_config.h
echo "#define ENCAP6_IFINDEX $ENCAP_IDX" >> $RUNDIR/globals/node_config.h
else
ip link del cilium_ipip6 2> /dev/null || true
ip link del cilium_sit 2> /dev/null || true
fi
else
ip link del cilium_ipip4 2> /dev/null || true
ip link del cilium_ipip6 2> /dev/null || true
ip link del cilium_sit 2> /dev/null || true
fi
# Remove eventual existing encapsulation device from previous run
case "${TUNNEL_PROTOCOL}" in
"<nil>")
ip link del cilium_vxlan 2> /dev/null || true
ip link del cilium_geneve 2> /dev/null || true
;;
"vxlan")
ip link del cilium_geneve 2> /dev/null || true
;;
"geneve")
ip link del cilium_vxlan 2> /dev/null || true
;;
*)
(>&2 echo "ERROR: Unknown tunnel mode")
exit 1
;;
esac
if [ "${TUNNEL_PROTOCOL}" != "<nil>" ]; then
ENCAP_DEV="cilium_${TUNNEL_PROTOCOL}"
ip link show $ENCAP_DEV || create_encap_dev
if [ "${TUNNEL_PORT}" != "<nil>" ]; then
ip -details link show $ENCAP_DEV | grep "dstport $TUNNEL_PORT" || {
ip link delete name $ENCAP_DEV type $TUNNEL_PROTOCOL
create_encap_dev
}
fi
ip link set $ENCAP_DEV mtu $MTU || encap_fail
setup_dev $ENCAP_DEV || encap_fail
ENCAP_IDX=$(cat "${SYSCLASSNETDIR}/${ENCAP_DEV}/ifindex")
sed -i '/^#.*ENCAP_IFINDEX.*$/d' $RUNDIR/globals/node_config.h
echo "#define ENCAP_IFINDEX $ENCAP_IDX" >> $RUNDIR/globals/node_config.h
fi