1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3 *
4 * Copyright (c) 2022 Alexander V. Chernikov
5 *
6 * Redistribution and use in source and binary forms, with or without
7 * modification, are permitted provided that the following conditions
8 * are met:
9 * 1. Redistributions of source code must retain the above copyright
10 * notice, this list of conditions and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 #include "opt_inet.h"
31 #include "opt_inet6.h"
32 #include <sys/types.h>
33 #include <sys/malloc.h>
34 #include <sys/rmlock.h>
35 #include <sys/socket.h>
36 #include <sys/ck.h>
37
38 #include <net/if.h>
39 #include <net/if_dl.h>
40 #include <net/route.h>
41 #include <net/route/nhop.h>
42 #include <net/route/route_ctl.h>
43 #include <netlink/netlink.h>
44 #include <netlink/netlink_ctl.h>
45 #include <netlink/netlink_linux.h>
46 #include <netlink/netlink_route.h>
47
48 #include <compat/linux/linux.h>
49 #include <compat/linux/linux_common.h>
50 #include <compat/linux/linux_util.h>
51
52 #define DEBUG_MOD_NAME nl_linux
53 #define DEBUG_MAX_LEVEL LOG_DEBUG3
54 #include <netlink/netlink_debug.h>
55 _DECLARE_DEBUG(LOG_DEBUG);
56
57 static bool
58 valid_rta_size(const struct rtattr *rta, int sz)
59 {
60 return (NL_RTA_DATA_LEN(rta) == sz);
61 }
62
63 static bool
64 valid_rta_u32(const struct rtattr *rta)
65 {
66 return (valid_rta_size(rta, sizeof(uint32_t)));
67 }
68
69 static uint32_t
70 _rta_get_uint32(const struct rtattr *rta)
71 {
72 return (*((const uint32_t *)NL_RTA_DATA_CONST(rta)));
73 }
74
75 static struct nlmsghdr *
76 rtnl_neigh_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
77 {
78 struct ndmsg *ndm = (struct ndmsg *)(hdr + 1);
79
80 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ndmsg))
81 ndm->ndm_family = linux_to_bsd_domain(ndm->ndm_family);
82
83 return (hdr);
84 }
85
86 static struct nlmsghdr *
87 rtnl_ifaddr_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
88 {
89 struct ifaddrmsg *ifam = (struct ifaddrmsg *)(hdr + 1);
90
91 if (hdr->nlmsg_len >= sizeof(struct nlmsghdr) + sizeof(struct ifaddrmsg))
92 ifam->ifa_family = linux_to_bsd_domain(ifam->ifa_family);
93
94 return (hdr);
95 }
96
97 static struct nlmsghdr *
98 rtnl_route_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
99 {
100 /* Tweak address families and default fib only */
101 struct rtmsg *rtm = (struct rtmsg *)(hdr + 1);
102 struct nlattr *nla, *nla_head;
103 int attrs_len;
104
105 rtm->rtm_family = linux_to_bsd_domain(rtm->rtm_family);
106
107 if (rtm->rtm_table == 254)
108 rtm->rtm_table = 0;
109
110 attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr);
111 attrs_len -= NETLINK_ALIGN(sizeof(struct rtmsg));
112 nla_head = (struct nlattr *)((char *)rtm + NETLINK_ALIGN(sizeof(struct rtmsg)));
113
114 NLA_FOREACH(nla, nla_head, attrs_len) {
115 RT_LOG(LOG_DEBUG3, "GOT type %d len %d total %d",
116 nla->nla_type, nla->nla_len, attrs_len);
117 struct rtattr *rta = (struct rtattr *)nla;
118 if (rta->rta_len < sizeof(struct rtattr)) {
119 break;
120 }
121 switch (rta->rta_type) {
122 case NL_RTA_TABLE:
123 if (!valid_rta_u32(rta))
124 goto done;
125 rtm->rtm_table = 0;
126 uint32_t fibnum = _rta_get_uint32(rta);
127 RT_LOG(LOG_DEBUG3, "GET RTABLE: %u", fibnum);
128 if (fibnum == 254) {
129 *((uint32_t *)NL_RTA_DATA(rta)) = 0;
130 }
131 break;
132 }
133 }
134
135 done:
136 return (hdr);
137 }
138
139 static struct nlmsghdr *
140 rtnl_from_linux(struct nlmsghdr *hdr, struct nl_pstate *npt)
141 {
142 switch (hdr->nlmsg_type) {
143 case NL_RTM_GETROUTE:
144 case NL_RTM_NEWROUTE:
145 case NL_RTM_DELROUTE:
146 return (rtnl_route_from_linux(hdr, npt));
147 case NL_RTM_GETNEIGH:
148 return (rtnl_neigh_from_linux(hdr, npt));
149 case NL_RTM_GETADDR:
150 return (rtnl_ifaddr_from_linux(hdr, npt));
151 /* Silence warning for the messages where no translation is required */
152 case NL_RTM_NEWLINK:
153 case NL_RTM_DELLINK:
154 case NL_RTM_GETLINK:
155 break;
156 default:
157 RT_LOG(LOG_DEBUG, "Passing message type %d untranslated",
158 hdr->nlmsg_type);
159 }
160
161 return (hdr);
162 }
163
164 static struct nlmsghdr *
165 nlmsg_from_linux(int netlink_family, struct nlmsghdr *hdr,
166 struct nl_pstate *npt)
167 {
168 switch (netlink_family) {
169 case NETLINK_ROUTE:
170 return (rtnl_from_linux(hdr, npt));
171 }
172
173 return (hdr);
174 }
175
176
177 /************************************************************
178 * Kernel -> Linux
179 ************************************************************/
180
181 static bool
182 handle_default_out(struct nlmsghdr *hdr, struct nl_writer *nw)
183 {
184 char *out_hdr;
185 out_hdr = nlmsg_reserve_data(nw, NLMSG_ALIGN(hdr->nlmsg_len), char);
186
187 if (out_hdr != NULL) {
188 memcpy(out_hdr, hdr, hdr->nlmsg_len);
189 return (true);
190 }
191 return (false);
192 }
193
194 static bool
195 nlmsg_copy_header(struct nlmsghdr *hdr, struct nl_writer *nw)
196 {
197 return (nlmsg_add(nw, hdr->nlmsg_pid, hdr->nlmsg_seq, hdr->nlmsg_type,
198 hdr->nlmsg_flags, 0));
199 }
200
201 static void *
202 _nlmsg_copy_next_header(struct nlmsghdr *hdr, struct nl_writer *nw, int sz)
203 {
204 void *next_hdr = nlmsg_reserve_data(nw, sz, void);
205 memcpy(next_hdr, hdr + 1, NLMSG_ALIGN(sz));
206
207 return (next_hdr);
208 }
209 #define nlmsg_copy_next_header(_hdr, _ns, _t) \
210 ((_t *)(_nlmsg_copy_next_header(_hdr, _ns, sizeof(_t))))
211
212 static bool
213 nlmsg_copy_nla(const struct nlattr *nla_orig, struct nl_writer *nw)
214 {
215 struct nlattr *nla = nlmsg_reserve_data(nw, nla_orig->nla_len, struct nlattr);
216 if (nla != NULL) {
217 memcpy(nla, nla_orig, nla_orig->nla_len);
218 return (true);
219 }
220 return (false);
221 }
222
223 static bool
224 nlmsg_copy_all_nla(struct nlmsghdr *hdr, int raw_hdrlen, struct nl_writer *nw)
225 {
226 struct nlattr *nla;
227
228 int hdrlen = NETLINK_ALIGN(raw_hdrlen);
229 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
230 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
231
232 NLA_FOREACH(nla, nla_head, attrs_len) {
233 RT_LOG(LOG_DEBUG3, "reading attr %d len %d", nla->nla_type, nla->nla_len);
234 if (nla->nla_len < sizeof(struct nlattr)) {
235 return (false);
236 }
237 if (!nlmsg_copy_nla(nla, nw))
238 return (false);
239 }
240 return (true);
241 }
242
243 static unsigned int
244 rtnl_if_flags_to_linux(unsigned int if_flags)
245 {
246 unsigned int result = 0;
247
248 for (int i = 0; i < 31; i++) {
249 unsigned int flag = 1 << i;
250 if (!(flag & if_flags))
251 continue;
252 switch (flag) {
253 case IFF_UP:
254 case IFF_BROADCAST:
255 case IFF_DEBUG:
256 case IFF_LOOPBACK:
257 case IFF_POINTOPOINT:
258 case IFF_DRV_RUNNING:
259 case IFF_NOARP:
260 case IFF_PROMISC:
261 case IFF_ALLMULTI:
262 result |= flag;
263 break;
264 case IFF_KNOWSEPOCH:
265 case IFF_DRV_OACTIVE:
266 case IFF_SIMPLEX:
267 case IFF_LINK0:
268 case IFF_LINK1:
269 case IFF_LINK2:
270 case IFF_CANTCONFIG:
271 case IFF_PPROMISC:
272 case IFF_MONITOR:
273 case IFF_STATICARP:
274 case IFF_DYING:
275 case IFF_RENAMING:
276 case IFF_NOGROUP:
277 /* No Linux analogue */
278 break;
279 case IFF_MULTICAST:
280 result |= 1 << 12;
281 }
282 }
283 return (result);
284 }
285
286 static bool
287 rtnl_newlink_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
288 struct nl_writer *nw)
289 {
290 if (!nlmsg_copy_header(hdr, nw))
291 return (false);
292
293 struct ifinfomsg *ifinfo;
294 ifinfo = nlmsg_copy_next_header(hdr, nw, struct ifinfomsg);
295
296 ifinfo->ifi_family = bsd_to_linux_domain(ifinfo->ifi_family);
297 /* Convert interface type */
298 switch (ifinfo->ifi_type) {
299 case IFT_ETHER:
300 ifinfo->ifi_type = 1; // ARPHRD_ETHER
301 break;
302 }
303 ifinfo->ifi_flags = rtnl_if_flags_to_linux(ifinfo->ifi_flags);
304
305 /* Copy attributes unchanged */
306 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifinfomsg), nw))
307 return (false);
308
309 /* make ip(8) happy */
310 if (!nlattr_add_string(nw, IFLA_QDISC, "noqueue"))
311 return (false);
312
313 if (!nlattr_add_u32(nw, IFLA_TXQLEN, 1000))
314 return (false);
315
316 nlmsg_end(nw);
317 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
318 return (true);
319 }
320
321 static bool
322 rtnl_newaddr_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
323 struct nl_writer *nw)
324 {
325 if (!nlmsg_copy_header(hdr, nw))
326 return (false);
327
328 struct ifaddrmsg *ifamsg;
329 ifamsg = nlmsg_copy_next_header(hdr, nw, struct ifaddrmsg);
330
331 ifamsg->ifa_family = bsd_to_linux_domain(ifamsg->ifa_family);
332 /* XXX: fake ifa_flags? */
333
334 /* Copy attributes unchanged */
335 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ifaddrmsg), nw))
336 return (false);
337
338 nlmsg_end(nw);
339 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
340 return (true);
341 }
342
343 static bool
344 rtnl_newneigh_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
345 struct nl_writer *nw)
346 {
347 if (!nlmsg_copy_header(hdr, nw))
348 return (false);
349
350 struct ndmsg *ndm;
351 ndm = nlmsg_copy_next_header(hdr, nw, struct ndmsg);
352
353 ndm->ndm_family = bsd_to_linux_domain(ndm->ndm_family);
354
355 /* Copy attributes unchanged */
356 if (!nlmsg_copy_all_nla(hdr, sizeof(struct ndmsg), nw))
357 return (false);
358
359 nlmsg_end(nw);
360 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
361 return (true);
362 }
363
364 static bool
365 rtnl_newroute_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp,
366 struct nl_writer *nw)
367 {
368 if (!nlmsg_copy_header(hdr, nw))
369 return (false);
370
371 struct rtmsg *rtm;
372 rtm = nlmsg_copy_next_header(hdr, nw, struct rtmsg);
373 rtm->rtm_family = bsd_to_linux_domain(rtm->rtm_family);
374
375 struct nlattr *nla;
376
377 int hdrlen = NETLINK_ALIGN(sizeof(struct rtmsg));
378 int attrs_len = hdr->nlmsg_len - sizeof(struct nlmsghdr) - hdrlen;
379 struct nlattr *nla_head = (struct nlattr *)((char *)(hdr + 1) + hdrlen);
380
381 NLA_FOREACH(nla, nla_head, attrs_len) {
382 struct rtattr *rta = (struct rtattr *)nla;
383 //RT_LOG(LOG_DEBUG, "READING attr %d len %d", nla->nla_type, nla->nla_len);
384 if (rta->rta_len < sizeof(struct rtattr)) {
385 break;
386 }
387
388 switch (rta->rta_type) {
389 case NL_RTA_TABLE:
390 {
391 uint32_t fibnum;
392 fibnum = _rta_get_uint32(rta);
393 if (fibnum == 0)
394 fibnum = 254;
395 RT_LOG(LOG_DEBUG3, "XFIBNUM %u", fibnum);
396 if (!nlattr_add_u32(nw, NL_RTA_TABLE, fibnum))
397 return (false);
398 }
399 break;
400 default:
401 if (!nlmsg_copy_nla(nla, nw))
402 return (false);
403 break;
404 }
405 }
406
407 nlmsg_end(nw);
408 RT_LOG(LOG_DEBUG2, "done processing nw %p", nw);
409 return (true);
410 }
411
412 static bool
413 rtnl_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
414 {
415 RT_LOG(LOG_DEBUG2, "Got message type %d", hdr->nlmsg_type);
416
417 switch (hdr->nlmsg_type) {
418 case NL_RTM_NEWLINK:
419 case NL_RTM_DELLINK:
420 case NL_RTM_GETLINK:
421 return (rtnl_newlink_to_linux(hdr, nlp, nw));
422 case NL_RTM_NEWADDR:
423 case NL_RTM_DELADDR:
424 return (rtnl_newaddr_to_linux(hdr, nlp, nw));
425 case NL_RTM_NEWROUTE:
426 case NL_RTM_DELROUTE:
427 return (rtnl_newroute_to_linux(hdr, nlp, nw));
428 case NL_RTM_NEWNEIGH:
429 case NL_RTM_DELNEIGH:
430 case NL_RTM_GETNEIGH:
431 return (rtnl_newneigh_to_linux(hdr, nlp, nw));
432 default:
433 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
434 hdr->nlmsg_type);
435 return (handle_default_out(hdr, nw));
436 }
437 }
438
439 static bool
440 nlmsg_error_to_linux(struct nlmsghdr *hdr, struct nlpcb *nlp, struct nl_writer *nw)
441 {
442 if (!nlmsg_copy_header(hdr, nw))
443 return (false);
444
445 struct nlmsgerr *nlerr;
446 nlerr = nlmsg_copy_next_header(hdr, nw, struct nlmsgerr);
447 nlerr->error = bsd_to_linux_errno(nlerr->error);
448
449 int copied_len = sizeof(struct nlmsghdr) + sizeof(struct nlmsgerr);
450 if (hdr->nlmsg_len == copied_len) {
451 nlmsg_end(nw);
452 return (true);
453 }
454
455 /*
456 * CAP_ACK was not set. Original request needs to be translated.
457 * XXX: implement translation of the original message
458 */
459 RT_LOG(LOG_DEBUG, "[WARN] Passing ack message type %d untranslated",
460 nlerr->msg.nlmsg_type);
461 char *dst_payload, *src_payload;
462 int copy_len = hdr->nlmsg_len - copied_len;
463 dst_payload = nlmsg_reserve_data(nw, NLMSG_ALIGN(copy_len), char);
464
465 src_payload = (char *)hdr + copied_len;
466
467 memcpy(dst_payload, src_payload, copy_len);
468 nlmsg_end(nw);
469
470 return (true);
471 }
472
473 static bool
474 nlmsg_to_linux(int netlink_family, struct nlmsghdr *hdr, struct nlpcb *nlp,
475 struct nl_writer *nw)
476 {
477 if (hdr->nlmsg_type < NLMSG_MIN_TYPE) {
478 switch (hdr->nlmsg_type) {
479 case NLMSG_ERROR:
480 return (nlmsg_error_to_linux(hdr, nlp, nw));
481 case NLMSG_NOOP:
482 case NLMSG_DONE:
483 case NLMSG_OVERRUN:
484 return (handle_default_out(hdr, nw));
485 default:
486 RT_LOG(LOG_DEBUG, "[WARN] Passing message type %d untranslated",
487 hdr->nlmsg_type);
488 return (handle_default_out(hdr, nw));
489 }
490 }
491
492 switch (netlink_family) {
493 case NETLINK_ROUTE:
494 return (rtnl_to_linux(hdr, nlp, nw));
495 default:
496 return (handle_default_out(hdr, nw));
497 }
498 }
499
500 static struct mbuf *
501 nlmsgs_to_linux(int netlink_family, char *buf, int data_length, struct nlpcb *nlp)
502 {
503 RT_LOG(LOG_DEBUG3, "LINUX: get %p size %d", buf, data_length);
504 struct nl_writer nw = {};
505
506 struct mbuf *m = NULL;
507 if (!nlmsg_get_chain_writer(&nw, data_length, &m)) {
508 RT_LOG(LOG_DEBUG, "unable to setup chain writer for size %d",
509 data_length);
510 return (NULL);
511 }
512
513 /* Assume correct headers. Buffer IS mutable */
514 int count = 0;
515 for (int offset = 0; offset + sizeof(struct nlmsghdr) <= data_length;) {
516 struct nlmsghdr *hdr = (struct nlmsghdr *)&buf[offset];
517 int msglen = NLMSG_ALIGN(hdr->nlmsg_len);
518 count++;
519
520 if (!nlmsg_to_linux(netlink_family, hdr, nlp, &nw)) {
521 RT_LOG(LOG_DEBUG, "failed to process msg type %d",
522 hdr->nlmsg_type);
523 m_freem(m);
524 return (NULL);
525 }
526 offset += msglen;
527 }
528 nlmsg_flush(&nw);
529 RT_LOG(LOG_DEBUG3, "Processed %d messages, chain size %d", count,
530 m ? m_length(m, NULL) : 0);
531
532 return (m);
533 }
534
535 static struct mbuf *
536 mbufs_to_linux(int netlink_family, struct mbuf *m, struct nlpcb *nlp)
537 {
538 /* XXX: easiest solution, not optimized for performance */
539 int data_length = m_length(m, NULL);
540 char *buf = malloc(data_length, M_LINUX, M_NOWAIT);
541 if (buf == NULL) {
542 RT_LOG(LOG_DEBUG, "unable to allocate %d bytes, dropping message",
543 data_length);
544 m_freem(m);
545 return (NULL);
546 }
547 m_copydata(m, 0, data_length, buf);
548 m_freem(m);
549
550 m = nlmsgs_to_linux(netlink_family, buf, data_length, nlp);
551 free(buf, M_LINUX);
552
553 return (m);
554 }
555
556 static struct linux_netlink_provider linux_netlink_v1 = {
557 .mbufs_to_linux = mbufs_to_linux,
558 .msgs_to_linux = nlmsgs_to_linux,
559 .msg_from_linux = nlmsg_from_linux,
560 };
561
562 void
563 linux_netlink_register()
564 {
565 linux_netlink_p = &linux_netlink_v1;
566 }
567
568 void
569 linux_netlink_deregister()
570 {
571 linux_netlink_p = NULL;
572 }
Cache object: 14d7e85e02ce49f209fa66fcb7bc4b44
|