1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2022 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include <sys/types.h>
33 #include <sys/malloc.h>
34 #include <sys/rmlock.h>
35 #include <sys/socket.h>
36 #include <sys/ck.h>
37
38 #include <net/if.h>
39 #include <net/if_dl.h>
40 #include <net/route.h>
41 #include <net/route/nhop.h>
42 #include <net/route/route_ctl.h>
43 #include <netlink/netlink.h>
44 #include <netlink/netlink_ctl.h>
45 #include <netlink/netlink_linux.h>
46 #include <netlink/netlink_route.h>
47
48 #include <compat/linux/linux.h>
49 #include <compat/linux/linux_common.h>
50 #include <compat/linux/linux_util.h>
51
52 #define DEBUG_MOD_NAME nl_linux
53 #define DEBUG_MAX_LEVEL LOG_DEBUG3
54 #include <netlink/netlink_debug.h>
55 _DECLARE_DEBUG(LOG_DEBUG);
56
57 static bool
58 valid_rta_size(const struct rtattr *rta, int sz)
59 {
60 return (NL_RTA_DATA_LEN(rta) == sz);
61 }
62
63 static bool
64 valid_rta_u32(const struct rtattr *rta)
65 {
66 return (valid_rta_size(rta, sizeof(uint32_t)));
67 }
68
69 static uint32_t
70 _rta_get_uint32(const struct rtattr *rta)
71 {
72 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
73 }
74
75 static struct nlmsghdr *
76 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
77 {
78 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1);
79
80 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg))
81 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family);
82
83 return (hdr);
84 }
85
86 static struct nlmsghdr *
87 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
88 {
89 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1);
90
91 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg))
92 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family);
93
94 return (hdr);
95 }
96
97 static struct nlmsghdr *
98 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
99 {
100 /* Tweak address families and default fib only */
101 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1);
102 struct nlattr *nla, *nla_head;
103 int attrs_len;
104
105 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family);
106
107 if (rtm->rtm_table == 254)
108 rtm->rtm_table = 0;
109
110 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr);
111 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg));
112 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
113
114 NLA_FOREACH(nla, nla_head, attrs_len) {
115 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d",
116 nla->nla_type, nla->nla_len, attrs_len);
117 struct rtattr *rta = (struct rtattr *)nla;
118 if (rta->rta_len < sizeof(struct rtattr)) {
119 break;
120 }
121 switch (rta->rta_type) {
122 case NL_RTA_TABLE:
123 if (!valid_rta_u32(rta))
124 goto done;
125 rtm->rtm_table = 0;
126 uint32_t fibnum = _rta_get_uint32(rta);
127 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum);
128 if (fibnum == 254) {
129 *((uint32_t *)NL_RTA_DATA(rta)) = 0;
130 }
131 break;
132 }
133 }
134
135 done:
136 return (hdr);
137 }
138
139 static struct nlmsghdr *
140 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
141 {
142 switch (hdr->nlmsg_type) {
143 case NL_RTM_GETROUTE:
144 case NL_RTM_NEWROUTE:
145 case NL_RTM_DELROUTE:
146 return (rtnl_route_from_linux(hdr, npt));
147 case NL_RTM_GETNEIGH:
148 return (rtnl_neigh_from_linux(hdr, npt));
149 case NL_RTM_GETADDR:
150 return (rtnl_ifaddr_from_linux(hdr, npt));
151 /* Silence warning for the messages where no translation is required */
152 case NL_RTM_NEWLINK:
153 case NL_RTM_DELLINK:
154 case NL_RTM_GETLINK:
155 break;
156 default:
157 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated",
158 hdr->nlmsg_type);
159 }
160
161 return (hdr);
162 }
163
164 static struct nlmsghdr *
165 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr,
166 struct nl_pstate *npt)
167 {
168 switch (netlink_family) {
169 case NETLINK_ROUTE:
170 return (rtnl_from_linux(hdr, npt));
171 }
172
173 return (hdr);
174 }
175
176
177 /************************************************************
178 * Kernel -> Linux
179 ************************************************************/
180
181 static bool
182 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw)
183 {
184 char *out_hdr;
185 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char);
186
187 if (out_hdr != NULL) {
188 memcpy(out_hdr, hdr, hdr->nlmsg_len);
189 return (true);
190 }
191 return (false);
192 }
193
194 static bool
195 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw)
196 {
197 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
198 hdr->nlmsg_flags, 0));
199 }
200
201 static void *
202 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz)
203 {
204 void *next_hdr = nlmsg_reserve_data(nw, sz, void);
205 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz));
206
207 return (next_hdr);
208 }
209 #define nlmsg_copy_next_header(_hdr, _ns, _t) \
210 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t))))
211
212 static bool
213 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw)
214 {
215 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr);
216 if (nla != NULL) {
217 memcpy(nla, nla_orig, nla_orig->nla_len);
218 return (true);
219 }
220 return (false);
221 }
222
223 static bool
224 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
225 {
226 struct nlattr *nla;
227
228 int hdrlen = NETLINK_ALIGN(raw_hdrlen);
229 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
230 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
231
232 NLA_FOREACH(nla, nla_head, attrs_len) {
233 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len);
234 if (nla->nla_len < sizeof(struct nlattr)) {
235 return (false);
236 }
237 if (!nlmsg_copy_nla(nla, nw))
238 return (false);
239 }
240 return (true);
241 }
242
243 static unsigned int
244 rtnl_if_flags_to_linux(unsigned int if_flags)
245 {
246 unsigned int result = 0;
247
248 for (int i = 0; i < 31; i++) {
249 unsigned int flag = 1 << i;
250 if (!(flag & if_flags))
251 continue;
252 switch (flag) {
253 case IFF_UP:
254 case IFF_BROADCAST:
255 case IFF_DEBUG:
256 case IFF_LOOPBACK:
257 case IFF_POINTOPOINT:
258 case IFF_DRV_RUNNING:
259 case IFF_NOARP:
260 case IFF_PROMISC:
261 case IFF_ALLMULTI:
262 result |= flag;
263 break;
264 case IFF_KNOWSEPOCH:
265 case IFF_DRV_OACTIVE:
266 case IFF_SIMPLEX:
267 case IFF_LINK0:
268 case IFF_LINK1:
269 case IFF_LINK2:
270 case IFF_CANTCONFIG:
271 case IFF_PPROMISC:
272 case IFF_MONITOR:
273 case IFF_STATICARP:
274 case IFF_STICKYARP:
275 case IFF_DYING:
276 case IFF_RENAMING:
277 case IFF_NOGROUP:
278 /* No Linux analogue */
279 break;
280 case IFF_MULTICAST:
281 result |= 1 << 12;
282 }
283 }
284 return (result);
285 }
286
287 static bool
288 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
289 struct nl_writer *nw)
290 {
291 if (!nlmsg_copy_header(hdr, nw))
292 return (false);
293
294 struct ifinfomsg *ifinfo;
295 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg);
296
297 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family);
298 /* Convert interface type */
299 switch (ifinfo->ifi_type) {
300 case IFT_ETHER:
301 ifinfo->ifi_type = 1; // ARPHRD_ETHER
302 break;
303 }
304 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags);
305
306 /* Copy attributes unchanged */
307 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw))
308 return (false);
309
310 /* make ip(8) happy */
311 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue"))
312 return (false);
313
314 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000))
315 return (false);
316
317 nlmsg_end(nw);
318 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
319 return (true);
320 }
321
322 static bool
323 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
324 struct nl_writer *nw)
325 {
326 if (!nlmsg_copy_header(hdr, nw))
327 return (false);
328
329 struct ifaddrmsg *ifamsg;
330 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg);
331
332 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family);
333 /* XXX: fake ifa_flags? */
334
335 /* Copy attributes unchanged */
336 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw))
337 return (false);
338
339 nlmsg_end(nw);
340 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
341 return (true);
342 }
343
344 static bool
345 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
346 struct nl_writer *nw)
347 {
348 if (!nlmsg_copy_header(hdr, nw))
349 return (false);
350
351 struct ndmsg *ndm;
352 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg);
353
354 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family);
355
356 /* Copy attributes unchanged */
357 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw))
358 return (false);
359
360 nlmsg_end(nw);
361 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
362 return (true);
363 }
364
365 static bool
366 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
367 struct nl_writer *nw)
368 {
369 if (!nlmsg_copy_header(hdr, nw))
370 return (false);
371
372 struct rtmsg *rtm;
373 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg);
374 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family);
375
376 struct nlattr *nla;
377
378 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg));
379 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
380 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
381
382 NLA_FOREACH(nla, nla_head, attrs_len) {
383 struct rtattr *rta = (struct rtattr *)nla;
384 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len);
385 if (rta->rta_len < sizeof(struct rtattr)) {
386 break;
387 }
388
389 switch (rta->rta_type) {
390 case NL_RTA_TABLE:
391 {
392 uint32_t fibnum;
393 fibnum = _rta_get_uint32(rta);
394 if (fibnum == 0)
395 fibnum = 254;
396 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum);
397 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum))
398 return (false);
399 }
400 break;
401 default:
402 if (!nlmsg_copy_nla(nla, nw))
403 return (false);
404 break;
405 }
406 }
407
408 nlmsg_end(nw);
409 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
410 return (true);
411 }
412
413 static bool
414 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
415 {
416 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type);
417
418 switch (hdr->nlmsg_type) {
419 case NL_RTM_NEWLINK:
420 case NL_RTM_DELLINK:
421 case NL_RTM_GETLINK:
422 return (rtnl_newlink_to_linux(hdr, nlp, nw));
423 case NL_RTM_NEWADDR:
424 case NL_RTM_DELADDR:
425 return (rtnl_newaddr_to_linux(hdr, nlp, nw));
426 case NL_RTM_NEWROUTE:
427 case NL_RTM_DELROUTE:
428 return (rtnl_newroute_to_linux(hdr, nlp, nw));
429 case NL_RTM_NEWNEIGH:
430 case NL_RTM_DELNEIGH:
431 case NL_RTM_GETNEIGH:
432 return (rtnl_newneigh_to_linux(hdr, nlp, nw));
433 default:
434 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
435 hdr->nlmsg_type);
436 return (handle_default_out(hdr, nw));
437 }
438 }
439
440 static bool
441 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
442 {
443 if (!nlmsg_copy_header(hdr, nw))
444 return (false);
445
446 struct nlmsgerr *nlerr;
447 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr);
448 nlerr->error = bsd_to_linux_errno(nlerr->error);
449
450 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr);
451 if (hdr->nlmsg_len == copied_len) {
452 nlmsg_end(nw);
453 return (true);
454 }
455
456 /*
457 * CAP_ACK was not set. Original request needs to be translated.
458 * XXX: implement translation of the original message
459 */
460 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated",
461 nlerr->msg.nlmsg_type);
462 char *dst_payload, *src_payload;
463 int copy_len = hdr->nlmsg_len - copied_len;
464 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char);
465
466 src_payload = (char *)hdr + copied_len;
467
468 memcpy(dst_payload, src_payload, copy_len);
469 nlmsg_end(nw);
470
471 return (true);
472 }
473
474 static bool
475 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp,
476 struct nl_writer *nw)
477 {
478 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
479 switch (hdr->nlmsg_type) {
480 case NLMSG_ERROR:
481 return (nlmsg_error_to_linux(hdr, nlp, nw));
482 case NLMSG_NOOP:
483 case NLMSG_DONE:
484 case NLMSG_OVERRUN:
485 return (handle_default_out(hdr, nw));
486 default:
487 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
488 hdr->nlmsg_type);
489 return (handle_default_out(hdr, nw));
490 }
491 }
492
493 switch (netlink_family) {
494 case NETLINK_ROUTE:
495 return (rtnl_to_linux(hdr, nlp, nw));
496 default:
497 return (handle_default_out(hdr, nw));
498 }
499 }
500
501 static struct mbuf *
502 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp)
503 {
504 RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length);
505 struct nl_writer nw = {};
506
507 struct mbuf *m = NULL;
508 if (!nlmsg_get_chain_writer(&nw, data_length, &m)) {
509 RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d",
510 data_length);
511 return (NULL);
512 }
513
514 /* Assume correct headers. Buffer IS mutable */
515 int count = 0;
516 for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
517 struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset];
518 int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
519 count++;
520
521 if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) {
522 RT_LOG(LOG_DEBUG, "failed to process msg type %d",
523 hdr->nlmsg_type);
524 m_freem(m);
525 return (NULL);
526 }
527 offset += msglen;
528 }
529 nlmsg_flush(&nw);
530 RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count,
531 m ? m_length(m, NULL) : 0);
532
533 return (m);
534 }
535
536 static struct mbuf *
537 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp)
538 {
539 /* XXX: easiest solution, not optimized for performance */
540 int data_length = m_length(m, NULL);
541 char *buf = malloc(data_length, M_LINUX, M_NOWAIT);
542 if (buf == NULL) {
543 RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message",
544 data_length);
545 m_freem(m);
546 return (NULL);
547 }
548 m_copydata(m, 0, data_length, buf);
549 m_freem(m);
550
551 m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp);
552 free(buf, M_LINUX);
553
554 return (m);
555 }
556
557 static struct linux_netlink_provider linux_netlink_v1 = {
558 .mbufs_to_linux = mbufs_to_linux,
559 .msgs_to_linux = nlmsgs_to_linux,
560 .msg_from_linux = nlmsg_from_linux,
561 };
562
563 void
564 linux_netlink_register(void)
565 {
566 linux_netlink_p = &linux_netlink_v1;
567 }
568
569 void
570 linux_netlink_deregister(void)
571 {
572 linux_netlink_p = NULL;
573 }
Cache object: 28e5ee7b873e187bc38e100a957d708e
|