1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1980, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 */
31 /************************************************************************
32 * Note: In this file a 'fib' is a "forwarding information base" *
33 * Which is the new name for an in kernel routing (next hop) table. *
34 ***********************************************************************/
35
36 #include <sys/cdefs.h>
37 __FBSDID("$FreeBSD$");
38 #include "opt_route.h"
39
40 #include <sys/param.h>
41 #include <sys/socket.h>
42 #include <sys/systm.h>
43 #include <sys/malloc.h>
44 #include <sys/jail.h>
45 #include <sys/proc.h>
46 #include <sys/sysctl.h>
47 #include <sys/syslog.h>
48 #include <sys/kernel.h>
49 #include <sys/lock.h>
50 #include <sys/sx.h>
51 #include <sys/domain.h>
52 #include <sys/sysproto.h>
53
54 #include <net/vnet.h>
55 #include <net/route.h>
56 #include <net/route/route_var.h>
57
58 /* Kernel config default option. */
59 #ifdef ROUTETABLES
60 #if ROUTETABLES <= 0
61 #error "ROUTETABLES defined too low"
62 #endif
63 #if ROUTETABLES > RT_MAXFIBS
64 #error "ROUTETABLES defined too big"
65 #endif
66 #define RT_NUMFIBS ROUTETABLES
67 #endif /* ROUTETABLES */
68 /* Initialize to default if not otherwise set. */
69 #ifndef RT_NUMFIBS
70 #define RT_NUMFIBS 1
71 #endif
72
73 static void grow_rtables(uint32_t num_fibs);
74
75 VNET_DEFINE_STATIC(struct sx, rtables_lock);
76 #define V_rtables_lock VNET(rtables_lock)
77 #define RTABLES_LOCK() sx_xlock(&V_rtables_lock)
78 #define RTABLES_UNLOCK() sx_xunlock(&V_rtables_lock)
79 #define RTABLES_LOCK_INIT() sx_init(&V_rtables_lock, "rtables lock")
80 #define RTABLES_LOCK_ASSERT() sx_assert(&V_rtables_lock, SA_LOCKED)
81
82 VNET_DEFINE_STATIC(struct rib_head **, rt_tables);
83 #define V_rt_tables VNET(rt_tables)
84
85 VNET_DEFINE(uint32_t, _rt_numfibs) = RT_NUMFIBS;
86
87 /*
88 * Handler for net.my_fibnum.
89 * Returns current fib of the process.
90 */
91 static int
92 sysctl_my_fibnum(SYSCTL_HANDLER_ARGS)
93 {
94 int fibnum;
95 int error;
96
97 fibnum = curthread->td_proc->p_fibnum;
98 error = sysctl_handle_int(oidp, &fibnum, 0, req);
99 return (error);
100 }
101 SYSCTL_PROC(_net, OID_AUTO, my_fibnum,
102 CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, 0,
103 &sysctl_my_fibnum, "I",
104 "default FIB of caller");
105
106 static uint32_t
107 normalize_num_rtables(uint32_t num_rtables)
108 {
109
110 if (num_rtables > RT_MAXFIBS)
111 num_rtables = RT_MAXFIBS;
112 else if (num_rtables == 0)
113 num_rtables = 1;
114 return (num_rtables);
115 }
116
117 /*
118 * Sets the number of fibs in the current vnet.
119 * Function does not allow shrinking number of rtables.
120 */
121 static int
122 sysctl_fibs(SYSCTL_HANDLER_ARGS)
123 {
124 uint32_t new_fibs;
125 int error;
126
127 RTABLES_LOCK();
128 new_fibs = V_rt_numfibs;
129 error = sysctl_handle_32(oidp, &new_fibs, 0, req);
130 if (error == 0) {
131 new_fibs = normalize_num_rtables(new_fibs);
132
133 if (new_fibs < V_rt_numfibs)
134 error = ENOTCAPABLE;
135 if (new_fibs > V_rt_numfibs)
136 grow_rtables(new_fibs);
137 }
138 RTABLES_UNLOCK();
139
140 return (error);
141 }
142 SYSCTL_PROC(_net, OID_AUTO, fibs,
143 CTLFLAG_VNET | CTLTYPE_U32 | CTLFLAG_RWTUN | CTLFLAG_NOFETCH | CTLFLAG_MPSAFE,
144 NULL, 0, &sysctl_fibs, "IU",
145 "set number of fibs");
146
147 /*
148 * Sets fib of a current process.
149 */
150 int
151 sys_setfib(struct thread *td, struct setfib_args *uap)
152 {
153 int error = 0;
154
155 CURVNET_SET(TD_TO_VNET(td));
156 if (uap->fibnum >= 0 && uap->fibnum < V_rt_numfibs)
157 td->td_proc->p_fibnum = uap->fibnum;
158 else
159 error = EINVAL;
160 CURVNET_RESTORE();
161
162 return (error);
163 }
164
165 /*
166 * If required, copy interface routes from existing tables to the
167 * newly-created routing table.
168 */
169 static void
170 populate_kernel_routes(struct rib_head **new_rt_tables, struct rib_head *rh)
171 {
172 for (int i = 0; i < V_rt_numfibs; i++) {
173 struct rib_head *rh_src = new_rt_tables[i * (AF_MAX + 1) + rh->rib_family];
174 if ((rh_src != NULL) && (rh_src != rh))
175 rib_copy_kernel_routes(rh_src, rh);
176 }
177 }
178
179 /*
180 * Grows up the number of routing tables in the current fib.
181 * Function creates new index array for all rtables and allocates
182 * remaining routing tables.
183 */
184 static void
185 grow_rtables(uint32_t num_tables)
186 {
187 struct domain *dom;
188 struct rib_head **prnh, *rh;
189 struct rib_head **new_rt_tables, **old_rt_tables;
190 int family;
191
192 RTABLES_LOCK_ASSERT();
193
194 KASSERT(num_tables >= V_rt_numfibs, ("num_tables(%u) < rt_numfibs(%u)\n",
195 num_tables, V_rt_numfibs));
196
197 new_rt_tables = mallocarray(num_tables * (AF_MAX + 1), sizeof(void *),
198 M_RTABLE, M_WAITOK | M_ZERO);
199
200 if ((num_tables > 1) && (V_rt_add_addr_allfibs == 0))
201 printf("WARNING: Adding ifaddrs to all fibs has been turned off "
202 "by default. Consider tuning %s if needed\n",
203 "net.add_addr_allfibs");
204
205 #ifdef FIB_ALGO
206 fib_grow_rtables(num_tables);
207 #endif
208
209 /*
210 * Current rt_tables layout:
211 * fib0[af0, af1, af2, .., AF_MAX]fib1[af0, af1, af2, .., Af_MAX]..
212 * this allows to copy existing tables data by using memcpy()
213 */
214 if (V_rt_tables != NULL)
215 memcpy(new_rt_tables, V_rt_tables,
216 V_rt_numfibs * (AF_MAX + 1) * sizeof(void *));
217
218 /* Populate the remainders */
219 SLIST_FOREACH(dom, &domains, dom_next) {
220 if (dom->dom_rtattach == NULL)
221 continue;
222 family = dom->dom_family;
223 for (int i = 0; i < num_tables; i++) {
224 prnh = &new_rt_tables[i * (AF_MAX + 1) + family];
225 if (*prnh != NULL)
226 continue;
227 rh = dom->dom_rtattach(i);
228 if (rh == NULL)
229 log(LOG_ERR, "unable to create routing table for %d.%d\n",
230 dom->dom_family, i);
231 else
232 populate_kernel_routes(new_rt_tables, rh);
233 *prnh = rh;
234 }
235 }
236
237 /*
238 * Update rtables pointer.
239 * Ensure all writes to new_rt_tables has been completed before
240 * switching pointer.
241 */
242 atomic_thread_fence_rel();
243 old_rt_tables = V_rt_tables;
244 V_rt_tables = new_rt_tables;
245
246 /* Wait till all cpus see new pointers */
247 atomic_thread_fence_rel();
248 NET_EPOCH_WAIT();
249
250 /* Set number of fibs to a new value */
251 V_rt_numfibs = num_tables;
252
253 #ifdef FIB_ALGO
254 /* Attach fib algo to the new rtables */
255 SLIST_FOREACH(dom, &domains, dom_next) {
256 if (dom->dom_rtattach != NULL)
257 fib_setup_family(dom->dom_family, num_tables);
258 }
259 #endif
260
261 if (old_rt_tables != NULL)
262 free(old_rt_tables, M_RTABLE);
263 }
264
265 static void
266 vnet_rtables_init(const void *unused __unused)
267 {
268 int num_rtables_base;
269
270 if (IS_DEFAULT_VNET(curvnet)) {
271 num_rtables_base = RT_NUMFIBS;
272 TUNABLE_INT_FETCH("net.fibs", &num_rtables_base);
273 V_rt_numfibs = normalize_num_rtables(num_rtables_base);
274 } else
275 V_rt_numfibs = 1;
276
277 vnet_rtzone_init();
278 #ifdef FIB_ALGO
279 vnet_fib_init();
280 #endif
281 RTABLES_LOCK_INIT();
282
283 RTABLES_LOCK();
284 grow_rtables(V_rt_numfibs);
285 RTABLES_UNLOCK();
286 }
287 VNET_SYSINIT(vnet_rtables_init, SI_SUB_PROTO_DOMAIN, SI_ORDER_FOURTH,
288 vnet_rtables_init, 0);
289
290 #ifdef VIMAGE
291 static void
292 rtables_destroy(const void *unused __unused)
293 {
294 struct rib_head *rnh;
295 struct domain *dom;
296 int family;
297
298 RTABLES_LOCK();
299 SLIST_FOREACH(dom, &domains, dom_next) {
300 if (dom->dom_rtdetach == NULL)
301 continue;
302 family = dom->dom_family;
303 for (int i = 0; i < V_rt_numfibs; i++) {
304 rnh = rt_tables_get_rnh(i, family);
305 dom->dom_rtdetach(rnh);
306 }
307 }
308 RTABLES_UNLOCK();
309
310 /*
311 * dom_rtdetach calls rt_table_destroy(), which
312 * schedules deletion for all rtentries, nexthops and control
313 * structures. Wait for the destruction callbacks to fire.
314 * Note that this should result in freeing all rtentries, but
315 * nexthops deletions will be scheduled for the next epoch run
316 * and will be completed after vnet teardown.
317 */
318 NET_EPOCH_DRAIN_CALLBACKS();
319
320 free(V_rt_tables, M_RTABLE);
321 vnet_rtzone_destroy();
322 #ifdef FIB_ALGO
323 vnet_fib_destroy();
324 #endif
325 }
326 VNET_SYSUNINIT(rtables_destroy, SI_SUB_PROTO_DOMAIN, SI_ORDER_FIRST,
327 rtables_destroy, 0);
328 #endif
329
330 static inline struct rib_head *
331 rt_tables_get_rnh_ptr(uint32_t table, sa_family_t family)
332 {
333 struct rib_head **prnh;
334
335 KASSERT(table < V_rt_numfibs,
336 ("%s: table out of bounds (%d < %d)", __func__, table,
337 V_rt_numfibs));
338 KASSERT(family < (AF_MAX + 1),
339 ("%s: fam out of bounds (%d < %d)", __func__, family, AF_MAX + 1));
340
341 /* rnh is [fib=0][af=0]. */
342 prnh = V_rt_tables;
343 /* Get the offset to the requested table and fam. */
344 prnh += table * (AF_MAX + 1) + family;
345
346 return (*prnh);
347 }
348
349 struct rib_head *
350 rt_tables_get_rnh(uint32_t table, sa_family_t family)
351 {
352
353 return (rt_tables_get_rnh_ptr(table, family));
354 }
355
356 u_int
357 rt_tables_get_gen(uint32_t table, sa_family_t family)
358 {
359 struct rib_head *rnh;
360
361 rnh = rt_tables_get_rnh_ptr(table, family);
362 KASSERT(rnh != NULL, ("%s: NULL rib_head pointer table %d family %d",
363 __func__, table, family));
364 return (rnh->rnh_gen);
365 }
Cache object: dae58352aa2d9ecc613bbc15ae03922c
|