1 /*-
2 * Copyright (c) 2015 Dmitry Chagin
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29
30 #include <opt_inet6.h>
31
32 #include <sys/param.h>
33 #include <sys/systm.h>
34 #include <sys/conf.h>
35 #include <sys/ctype.h>
36 #include <sys/jail.h>
37 #include <sys/lock.h>
38 #include <sys/malloc.h>
39 #include <sys/signalvar.h>
40 #include <sys/socket.h>
41 #include <sys/socketvar.h>
42
43 #include <net/if.h>
44 #include <net/if_var.h>
45 #include <net/if_dl.h>
46 #include <net/if_types.h>
47
48 #include <sys/un.h>
49 #include <netinet/in.h>
50
51 #include <compat/linux/linux.h>
52 #include <compat/linux/linux_common.h>
53 #include <compat/linux/linux_util.h>
54
55 struct futex_list futex_list;
56 struct mtx futex_mtx; /* protects the futex list */
57
58 CTASSERT(LINUX_IFNAMSIZ == IFNAMSIZ);
59
60 static int bsd_to_linux_sigtbl[LINUX_SIGTBLSZ] = {
61 LINUX_SIGHUP, /* SIGHUP */
62 LINUX_SIGINT, /* SIGINT */
63 LINUX_SIGQUIT, /* SIGQUIT */
64 LINUX_SIGILL, /* SIGILL */
65 LINUX_SIGTRAP, /* SIGTRAP */
66 LINUX_SIGABRT, /* SIGABRT */
67 0, /* SIGEMT */
68 LINUX_SIGFPE, /* SIGFPE */
69 LINUX_SIGKILL, /* SIGKILL */
70 LINUX_SIGBUS, /* SIGBUS */
71 LINUX_SIGSEGV, /* SIGSEGV */
72 LINUX_SIGSYS, /* SIGSYS */
73 LINUX_SIGPIPE, /* SIGPIPE */
74 LINUX_SIGALRM, /* SIGALRM */
75 LINUX_SIGTERM, /* SIGTERM */
76 LINUX_SIGURG, /* SIGURG */
77 LINUX_SIGSTOP, /* SIGSTOP */
78 LINUX_SIGTSTP, /* SIGTSTP */
79 LINUX_SIGCONT, /* SIGCONT */
80 LINUX_SIGCHLD, /* SIGCHLD */
81 LINUX_SIGTTIN, /* SIGTTIN */
82 LINUX_SIGTTOU, /* SIGTTOU */
83 LINUX_SIGIO, /* SIGIO */
84 LINUX_SIGXCPU, /* SIGXCPU */
85 LINUX_SIGXFSZ, /* SIGXFSZ */
86 LINUX_SIGVTALRM,/* SIGVTALRM */
87 LINUX_SIGPROF, /* SIGPROF */
88 LINUX_SIGWINCH, /* SIGWINCH */
89 0, /* SIGINFO */
90 LINUX_SIGUSR1, /* SIGUSR1 */
91 LINUX_SIGUSR2 /* SIGUSR2 */
92 };
93
94 static int linux_to_bsd_sigtbl[LINUX_SIGTBLSZ] = {
95 SIGHUP, /* LINUX_SIGHUP */
96 SIGINT, /* LINUX_SIGINT */
97 SIGQUIT, /* LINUX_SIGQUIT */
98 SIGILL, /* LINUX_SIGILL */
99 SIGTRAP, /* LINUX_SIGTRAP */
100 SIGABRT, /* LINUX_SIGABRT */
101 SIGBUS, /* LINUX_SIGBUS */
102 SIGFPE, /* LINUX_SIGFPE */
103 SIGKILL, /* LINUX_SIGKILL */
104 SIGUSR1, /* LINUX_SIGUSR1 */
105 SIGSEGV, /* LINUX_SIGSEGV */
106 SIGUSR2, /* LINUX_SIGUSR2 */
107 SIGPIPE, /* LINUX_SIGPIPE */
108 SIGALRM, /* LINUX_SIGALRM */
109 SIGTERM, /* LINUX_SIGTERM */
110 SIGBUS, /* LINUX_SIGSTKFLT */
111 SIGCHLD, /* LINUX_SIGCHLD */
112 SIGCONT, /* LINUX_SIGCONT */
113 SIGSTOP, /* LINUX_SIGSTOP */
114 SIGTSTP, /* LINUX_SIGTSTP */
115 SIGTTIN, /* LINUX_SIGTTIN */
116 SIGTTOU, /* LINUX_SIGTTOU */
117 SIGURG, /* LINUX_SIGURG */
118 SIGXCPU, /* LINUX_SIGXCPU */
119 SIGXFSZ, /* LINUX_SIGXFSZ */
120 SIGVTALRM, /* LINUX_SIGVTALARM */
121 SIGPROF, /* LINUX_SIGPROF */
122 SIGWINCH, /* LINUX_SIGWINCH */
123 SIGIO, /* LINUX_SIGIO */
124 /*
125 * FreeBSD does not have SIGPWR signal, map Linux SIGPWR signal
126 * to the first unused FreeBSD signal number. Since Linux supports
127 * signals from 1 to 64 we are ok here as our SIGRTMIN = 65.
128 */
129 SIGRTMIN, /* LINUX_SIGPWR */
130 SIGSYS /* LINUX_SIGSYS */
131 };
132
133 static struct cdev *dev_shm_cdev;
134 static struct cdevsw dev_shm_cdevsw = {
135 .d_version = D_VERSION,
136 .d_name = "dev_shm",
137 };
138
139 /*
140 * Map Linux RT signals to the FreeBSD RT signals.
141 */
142 static inline int
143 linux_to_bsd_rt_signal(int sig)
144 {
145
146 return (SIGRTMIN + 1 + sig - LINUX_SIGRTMIN);
147 }
148
149 static inline int
150 bsd_to_linux_rt_signal(int sig)
151 {
152
153 return (sig - SIGRTMIN - 1 + LINUX_SIGRTMIN);
154 }
155
156 int
157 linux_to_bsd_signal(int sig)
158 {
159
160 KASSERT(sig > 0 && sig <= LINUX_SIGRTMAX, ("invalid Linux signal %d\n", sig));
161
162 if (sig < LINUX_SIGRTMIN)
163 return (linux_to_bsd_sigtbl[_SIG_IDX(sig)]);
164
165 return (linux_to_bsd_rt_signal(sig));
166 }
167
168 int
169 bsd_to_linux_signal(int sig)
170 {
171
172 if (sig <= LINUX_SIGTBLSZ)
173 return (bsd_to_linux_sigtbl[_SIG_IDX(sig)]);
174 if (sig == SIGRTMIN)
175 return (LINUX_SIGPWR);
176
177 return (bsd_to_linux_rt_signal(sig));
178 }
179
180 int
181 linux_to_bsd_sigaltstack(int lsa)
182 {
183 int bsa = 0;
184
185 if (lsa & LINUX_SS_DISABLE)
186 bsa |= SS_DISABLE;
187 /*
188 * Linux ignores SS_ONSTACK flag for ss
189 * parameter while FreeBSD prohibits it.
190 */
191 return (bsa);
192 }
193
194 int
195 bsd_to_linux_sigaltstack(int bsa)
196 {
197 int lsa = 0;
198
199 if (bsa & SS_DISABLE)
200 lsa |= LINUX_SS_DISABLE;
201 if (bsa & SS_ONSTACK)
202 lsa |= LINUX_SS_ONSTACK;
203 return (lsa);
204 }
205
206 void
207 linux_to_bsd_sigset(l_sigset_t *lss, sigset_t *bss)
208 {
209 int b, l;
210
211 SIGEMPTYSET(*bss);
212 for (l = 1; l <= LINUX_SIGRTMAX; l++) {
213 if (LINUX_SIGISMEMBER(*lss, l)) {
214 b = linux_to_bsd_signal(l);
215 if (b)
216 SIGADDSET(*bss, b);
217 }
218 }
219 }
220
221 void
222 bsd_to_linux_sigset(sigset_t *bss, l_sigset_t *lss)
223 {
224 int b, l;
225
226 LINUX_SIGEMPTYSET(*lss);
227 for (b = 1; b <= SIGRTMAX; b++) {
228 if (SIGISMEMBER(*bss, b)) {
229 l = bsd_to_linux_signal(b);
230 if (l)
231 LINUX_SIGADDSET(*lss, l);
232 }
233 }
234 }
235
236 /*
237 * Translate a Linux interface name to a FreeBSD interface name,
238 * and return the associated ifnet structure
239 * bsdname and lxname need to be least IFNAMSIZ bytes long, but
240 * can point to the same buffer.
241 */
242 struct ifnet *
243 ifname_linux_to_bsd(struct thread *td, const char *lxname, char *bsdname)
244 {
245 struct ifnet *ifp;
246 int len, unit;
247 char *ep;
248 int index;
249 bool is_eth, is_lo;
250
251 for (len = 0; len < LINUX_IFNAMSIZ; ++len)
252 if (!isalpha(lxname[len]) || lxname[len] == '\0')
253 break;
254 if (len == 0 || len == LINUX_IFNAMSIZ)
255 return (NULL);
256 /* Linux loopback interface name is lo (not lo0) */
257 is_lo = (len == 2 && strncmp(lxname, "lo", len) == 0);
258 unit = (int)strtoul(lxname + len, &ep, 10);
259 if ((ep == NULL || ep == lxname + len || ep >= lxname + LINUX_IFNAMSIZ) &&
260 is_lo == 0)
261 return (NULL);
262 index = 0;
263 is_eth = (len == 3 && strncmp(lxname, "eth", len) == 0);
264
265 CURVNET_SET(TD_TO_VNET(td));
266 IFNET_RLOCK();
267 CK_STAILQ_FOREACH(ifp, &V_ifnet, if_link) {
268 /*
269 * Allow Linux programs to use FreeBSD names. Don't presume
270 * we never have an interface named "eth", so don't make
271 * the test optional based on is_eth.
272 */
273 if (strncmp(ifp->if_xname, lxname, LINUX_IFNAMSIZ) == 0)
274 break;
275 if (is_eth && IFP_IS_ETH(ifp) && unit == index++)
276 break;
277 if (is_lo && IFP_IS_LOOP(ifp))
278 break;
279 }
280 IFNET_RUNLOCK();
281 CURVNET_RESTORE();
282 if (ifp != NULL && bsdname != NULL)
283 strlcpy(bsdname, ifp->if_xname, IFNAMSIZ);
284 return (ifp);
285 }
286
287 void
288 linux_ifflags(struct ifnet *ifp, short *flags)
289 {
290 unsigned short fl;
291
292 fl = (ifp->if_flags | ifp->if_drv_flags) & 0xffff;
293 *flags = 0;
294 if (fl & IFF_UP)
295 *flags |= LINUX_IFF_UP;
296 if (fl & IFF_BROADCAST)
297 *flags |= LINUX_IFF_BROADCAST;
298 if (fl & IFF_DEBUG)
299 *flags |= LINUX_IFF_DEBUG;
300 if (fl & IFF_LOOPBACK)
301 *flags |= LINUX_IFF_LOOPBACK;
302 if (fl & IFF_POINTOPOINT)
303 *flags |= LINUX_IFF_POINTOPOINT;
304 if (fl & IFF_DRV_RUNNING)
305 *flags |= LINUX_IFF_RUNNING;
306 if (fl & IFF_NOARP)
307 *flags |= LINUX_IFF_NOARP;
308 if (fl & IFF_PROMISC)
309 *flags |= LINUX_IFF_PROMISC;
310 if (fl & IFF_ALLMULTI)
311 *flags |= LINUX_IFF_ALLMULTI;
312 if (fl & IFF_MULTICAST)
313 *flags |= LINUX_IFF_MULTICAST;
314 }
315
316 int
317 linux_ifhwaddr(struct ifnet *ifp, struct l_sockaddr *lsa)
318 {
319 struct ifaddr *ifa;
320 struct sockaddr_dl *sdl;
321
322 if (IFP_IS_LOOP(ifp)) {
323 bzero(lsa, sizeof(*lsa));
324 lsa->sa_family = LINUX_ARPHRD_LOOPBACK;
325 return (0);
326 }
327
328 if (!IFP_IS_ETH(ifp))
329 return (ENOENT);
330
331 CK_STAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) {
332 sdl = (struct sockaddr_dl*)ifa->ifa_addr;
333 if (sdl != NULL && (sdl->sdl_family == AF_LINK) &&
334 (sdl->sdl_type == IFT_ETHER)) {
335 bzero(lsa, sizeof(*lsa));
336 lsa->sa_family = LINUX_ARPHRD_ETHER;
337 bcopy(LLADDR(sdl), lsa->sa_data, LINUX_IFHWADDRLEN);
338 return (0);
339 }
340 }
341
342 return (ENOENT);
343 }
344
345 int
346 linux_to_bsd_domain(int domain)
347 {
348
349 switch (domain) {
350 case LINUX_AF_UNSPEC:
351 return (AF_UNSPEC);
352 case LINUX_AF_UNIX:
353 return (AF_LOCAL);
354 case LINUX_AF_INET:
355 return (AF_INET);
356 case LINUX_AF_INET6:
357 return (AF_INET6);
358 case LINUX_AF_AX25:
359 return (AF_CCITT);
360 case LINUX_AF_IPX:
361 return (AF_IPX);
362 case LINUX_AF_APPLETALK:
363 return (AF_APPLETALK);
364 }
365 return (-1);
366 }
367
368 int
369 bsd_to_linux_domain(int domain)
370 {
371
372 switch (domain) {
373 case AF_UNSPEC:
374 return (LINUX_AF_UNSPEC);
375 case AF_LOCAL:
376 return (LINUX_AF_UNIX);
377 case AF_INET:
378 return (LINUX_AF_INET);
379 case AF_INET6:
380 return (LINUX_AF_INET6);
381 case AF_CCITT:
382 return (LINUX_AF_AX25);
383 case AF_IPX:
384 return (LINUX_AF_IPX);
385 case AF_APPLETALK:
386 return (LINUX_AF_APPLETALK);
387 }
388 return (-1);
389 }
390
391 /*
392 * Based on the fact that:
393 * 1. Native and Linux storage of struct sockaddr
394 * and struct sockaddr_in6 are equal.
395 * 2. On Linux sa_family is the first member of all struct sockaddr.
396 */
397 int
398 bsd_to_linux_sockaddr(const struct sockaddr *sa, struct l_sockaddr **lsa,
399 socklen_t len)
400 {
401 struct l_sockaddr *kosa;
402 int error, bdom;
403
404 *lsa = NULL;
405 if (len < 2 || len > UCHAR_MAX)
406 return (EINVAL);
407
408 kosa = malloc(len, M_SONAME, M_WAITOK);
409 bcopy(sa, kosa, len);
410
411 bdom = bsd_to_linux_domain(sa->sa_family);
412 if (bdom == -1) {
413 error = EAFNOSUPPORT;
414 goto out;
415 }
416
417 kosa->sa_family = bdom;
418 *lsa = kosa;
419 return (0);
420
421 out:
422 free(kosa, M_SONAME);
423 return (error);
424 }
425
426 int
427 linux_to_bsd_sockaddr(const struct l_sockaddr *osa, struct sockaddr **sap,
428 socklen_t *len)
429 {
430 struct sockaddr *sa;
431 struct l_sockaddr *kosa;
432 #ifdef INET6
433 struct sockaddr_in6 *sin6;
434 bool oldv6size;
435 #endif
436 char *name;
437 int salen, bdom, error, hdrlen, namelen;
438
439 if (*len < 2 || *len > UCHAR_MAX)
440 return (EINVAL);
441
442 salen = *len;
443
444 #ifdef INET6
445 oldv6size = false;
446 /*
447 * Check for old (pre-RFC2553) sockaddr_in6. We may accept it
448 * if it's a v4-mapped address, so reserve the proper space
449 * for it.
450 */
451 if (salen == sizeof(struct sockaddr_in6) - sizeof(uint32_t)) {
452 salen += sizeof(uint32_t);
453 oldv6size = true;
454 }
455 #endif
456
457 kosa = malloc(salen, M_SONAME, M_WAITOK);
458
459 if ((error = copyin(osa, kosa, *len)))
460 goto out;
461
462 bdom = linux_to_bsd_domain(kosa->sa_family);
463 if (bdom == -1) {
464 error = EAFNOSUPPORT;
465 goto out;
466 }
467
468 #ifdef INET6
469 /*
470 * Older Linux IPv6 code uses obsolete RFC2133 struct sockaddr_in6,
471 * which lacks the scope id compared with RFC2553 one. If we detect
472 * the situation, reject the address and write a message to system log.
473 *
474 * Still accept addresses for which the scope id is not used.
475 */
476 if (oldv6size) {
477 if (bdom == AF_INET6) {
478 sin6 = (struct sockaddr_in6 *)kosa;
479 if (IN6_IS_ADDR_V4MAPPED(&sin6->sin6_addr) ||
480 (!IN6_IS_ADDR_LINKLOCAL(&sin6->sin6_addr) &&
481 !IN6_IS_ADDR_SITELOCAL(&sin6->sin6_addr) &&
482 !IN6_IS_ADDR_V4COMPAT(&sin6->sin6_addr) &&
483 !IN6_IS_ADDR_UNSPECIFIED(&sin6->sin6_addr) &&
484 !IN6_IS_ADDR_MULTICAST(&sin6->sin6_addr))) {
485 sin6->sin6_scope_id = 0;
486 } else {
487 linux_msg(curthread,
488 "obsolete pre-RFC2553 sockaddr_in6 rejected");
489 error = EINVAL;
490 goto out;
491 }
492 } else
493 salen -= sizeof(uint32_t);
494 }
495 #endif
496 if (bdom == AF_INET) {
497 if (salen < sizeof(struct sockaddr_in)) {
498 error = EINVAL;
499 goto out;
500 }
501 salen = sizeof(struct sockaddr_in);
502 }
503
504 if (bdom == AF_LOCAL && salen > sizeof(struct sockaddr_un)) {
505 hdrlen = offsetof(struct sockaddr_un, sun_path);
506 name = ((struct sockaddr_un *)kosa)->sun_path;
507 if (*name == '\0') {
508 /*
509 * Linux abstract namespace starts with a NULL byte.
510 * XXX We do not support abstract namespace yet.
511 */
512 namelen = strnlen(name + 1, salen - hdrlen - 1) + 1;
513 } else
514 namelen = strnlen(name, salen - hdrlen);
515 salen = hdrlen + namelen;
516 if (salen > sizeof(struct sockaddr_un)) {
517 error = ENAMETOOLONG;
518 goto out;
519 }
520 }
521
522 sa = (struct sockaddr *)kosa;
523 sa->sa_family = bdom;
524 sa->sa_len = salen;
525
526 *sap = sa;
527 *len = salen;
528 return (0);
529
530 out:
531 free(kosa, M_SONAME);
532 return (error);
533 }
534
535 void
536 linux_dev_shm_create(void)
537 {
538 int error;
539
540 error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, &dev_shm_cdev,
541 &dev_shm_cdevsw, NULL, UID_ROOT, GID_WHEEL, 0, "shm/.mountpoint");
542 if (error != 0) {
543 printf("%s: failed to create device node, error %d\n",
544 __func__, error);
545 }
546 }
547
548 void
549 linux_dev_shm_destroy(void)
550 {
551
552 destroy_dev(dev_shm_cdev);
553 }
554
555 int
556 bsd_to_linux_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
557 size_t mapcnt, int no_value)
558 {
559 int bsd_mask, bsd_value, linux_mask, linux_value;
560 int linux_ret;
561 size_t i;
562 bool applied;
563
564 applied = false;
565 linux_ret = 0;
566 for (i = 0; i < mapcnt; ++i) {
567 bsd_mask = bitmap[i].bsd_mask;
568 bsd_value = bitmap[i].bsd_value;
569 if (bsd_mask == 0)
570 bsd_mask = bsd_value;
571
572 linux_mask = bitmap[i].linux_mask;
573 linux_value = bitmap[i].linux_value;
574 if (linux_mask == 0)
575 linux_mask = linux_value;
576
577 /*
578 * If a mask larger than just the value is set, we explicitly
579 * want to make sure that only this bit we mapped within that
580 * mask is set.
581 */
582 if ((value & bsd_mask) == bsd_value) {
583 linux_ret = (linux_ret & ~linux_mask) | linux_value;
584 applied = true;
585 }
586 }
587
588 if (!applied)
589 return (no_value);
590 return (linux_ret);
591 }
592
593 int
594 linux_to_bsd_bits_(int value, struct bsd_to_linux_bitmap *bitmap,
595 size_t mapcnt, int no_value)
596 {
597 int bsd_mask, bsd_value, linux_mask, linux_value;
598 int bsd_ret;
599 size_t i;
600 bool applied;
601
602 applied = false;
603 bsd_ret = 0;
604 for (i = 0; i < mapcnt; ++i) {
605 bsd_mask = bitmap[i].bsd_mask;
606 bsd_value = bitmap[i].bsd_value;
607 if (bsd_mask == 0)
608 bsd_mask = bsd_value;
609
610 linux_mask = bitmap[i].linux_mask;
611 linux_value = bitmap[i].linux_value;
612 if (linux_mask == 0)
613 linux_mask = linux_value;
614
615 /*
616 * If a mask larger than just the value is set, we explicitly
617 * want to make sure that only this bit we mapped within that
618 * mask is set.
619 */
620 if ((value & linux_mask) == linux_value) {
621 bsd_ret = (bsd_ret & ~bsd_mask) | bsd_value;
622 applied = true;
623 }
624 }
625
626 if (!applied)
627 return (no_value);
628 return (bsd_ret);
629 }
Cache object: f9fa0b7c9fd636c17a9e596e3d2dd8bc
|