FreeBSD/Linux Kernel Cross Reference
sys/netinet/in_pcb.c
1 /*-
2 * Copyright (c) 1982, 1986, 1991, 1993, 1995
3 * The Regents of the University of California.
4 * Copyright (c) 2007 Robert N. M. Watson
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 4. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
32 */
33
34 #include <sys/cdefs.h>
35 __FBSDID("$FreeBSD$");
36
37 #include "opt_ddb.h"
38 #include "opt_ipsec.h"
39 #include "opt_inet6.h"
40 #include "opt_mac.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/malloc.h>
45 #include <sys/mbuf.h>
46 #include <sys/domain.h>
47 #include <sys/protosw.h>
48 #include <sys/socket.h>
49 #include <sys/socketvar.h>
50 #include <sys/priv.h>
51 #include <sys/proc.h>
52 #include <sys/jail.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55
56 #ifdef DDB
57 #include <ddb/ddb.h>
58 #endif
59
60 #include <vm/uma.h>
61
62 #include <net/if.h>
63 #include <net/if_types.h>
64 #include <net/route.h>
65
66 #include <netinet/in.h>
67 #include <netinet/in_pcb.h>
68 #include <netinet/in_var.h>
69 #include <netinet/ip_var.h>
70 #include <netinet/tcp_var.h>
71 #include <netinet/udp.h>
72 #include <netinet/udp_var.h>
73 #ifdef INET6
74 #include <netinet/ip6.h>
75 #include <netinet6/ip6_var.h>
76 #endif /* INET6 */
77
78
79 #ifdef IPSEC
80 #include <netipsec/ipsec.h>
81 #include <netipsec/key.h>
82 #endif /* IPSEC */
83
84 #include <security/mac/mac_framework.h>
85
86 /*
87 * These configure the range of local port addresses assigned to
88 * "unspecified" outgoing connections/packets/whatever.
89 */
90 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
91 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
92 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
93 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
94 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
95 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
96
97 /*
98 * Reserved ports accessible only to root. There are significant
99 * security considerations that must be accounted for when changing these,
100 * but the security benefits can be great. Please be careful.
101 */
102 int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */
103 int ipport_reservedlow = 0;
104
105 /* Variables dealing with random ephemeral port allocation. */
106 int ipport_randomized = 1; /* user controlled via sysctl */
107 int ipport_randomcps = 10; /* user controlled via sysctl */
108 int ipport_randomtime = 45; /* user controlled via sysctl */
109 int ipport_stoprandom = 0; /* toggled by ipport_tick */
110 int ipport_tcpallocs;
111 int ipport_tcplastcount;
112
113 #define RANGECHK(var, min, max) \
114 if ((var) < (min)) { (var) = (min); } \
115 else if ((var) > (max)) { (var) = (max); }
116
117 static int
118 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
119 {
120 int error;
121
122 error = sysctl_handle_int(oidp, oidp->oid_arg1, oidp->oid_arg2, req);
123 if (error == 0) {
124 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
125 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
126 RANGECHK(ipport_firstauto, IPPORT_RESERVED, IPPORT_MAX);
127 RANGECHK(ipport_lastauto, IPPORT_RESERVED, IPPORT_MAX);
128 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, IPPORT_MAX);
129 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, IPPORT_MAX);
130 }
131 return (error);
132 }
133
134 #undef RANGECHK
135
136 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
137
138 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
139 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
140 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
141 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
142 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
143 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
144 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
145 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
146 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
147 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
148 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
149 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
150 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
151 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, "");
152 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
153 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, "");
154 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomized, CTLFLAG_RW,
155 &ipport_randomized, 0, "Enable random port allocation");
156 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomcps, CTLFLAG_RW,
157 &ipport_randomcps, 0, "Maximum number of random port "
158 "allocations before switching to a sequental one");
159 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, randomtime, CTLFLAG_RW,
160 &ipport_randomtime, 0, "Minimum time to keep sequental port "
161 "allocation before switching to a random one");
162
163 /*
164 * in_pcb.c: manage the Protocol Control Blocks.
165 *
166 * NOTE: It is assumed that most of these functions will be called with
167 * the pcbinfo lock held, and often, the inpcb lock held, as these utility
168 * functions often modify hash chains or addresses in pcbs.
169 */
170
171 /*
172 * Allocate a PCB and associate it with the socket.
173 * On success return with the PCB locked.
174 */
175 int
176 in_pcballoc(struct socket *so, struct inpcbinfo *pcbinfo)
177 {
178 struct inpcb *inp;
179 int error;
180
181 INP_INFO_WLOCK_ASSERT(pcbinfo);
182 error = 0;
183 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
184 if (inp == NULL)
185 return (ENOBUFS);
186 bzero(inp, inp_zero_size);
187 inp->inp_pcbinfo = pcbinfo;
188 inp->inp_socket = so;
189 inp->inp_cred = crhold(so->so_cred);
190 inp->inp_inc.inc_fibnum = so->so_fibnum;
191 #ifdef MAC
192 error = mac_init_inpcb(inp, M_NOWAIT);
193 if (error != 0)
194 goto out;
195 SOCK_LOCK(so);
196 mac_create_inpcb_from_socket(so, inp);
197 SOCK_UNLOCK(so);
198 #endif
199
200 #ifdef IPSEC
201 error = ipsec_init_policy(so, &inp->inp_sp);
202 if (error != 0) {
203 #ifdef MAC
204 mac_destroy_inpcb(inp);
205 #endif
206 goto out;
207 }
208 #endif /*IPSEC*/
209 #ifdef INET6
210 if (INP_SOCKAF(so) == AF_INET6) {
211 inp->inp_vflag |= INP_IPV6PROTO;
212 if (ip6_v6only)
213 inp->inp_flags |= IN6P_IPV6_V6ONLY;
214 }
215 #endif
216 LIST_INSERT_HEAD(pcbinfo->ipi_listhead, inp, inp_list);
217 pcbinfo->ipi_count++;
218 so->so_pcb = (caddr_t)inp;
219 #ifdef INET6
220 if (ip6_auto_flowlabel)
221 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
222 #endif
223 INP_WLOCK(inp);
224 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
225
226 #if defined(IPSEC) || defined(MAC)
227 out:
228 if (error != 0) {
229 crfree(inp->inp_cred);
230 uma_zfree(pcbinfo->ipi_zone, inp);
231 }
232 #endif
233 return (error);
234 }
235
236 int
237 in_pcbbind(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
238 {
239 int anonport, error;
240
241 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
242 INP_WLOCK_ASSERT(inp);
243
244 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
245 return (EINVAL);
246 anonport = inp->inp_lport == 0 && (nam == NULL ||
247 ((struct sockaddr_in *)nam)->sin_port == 0);
248 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
249 &inp->inp_lport, cred);
250 if (error)
251 return (error);
252 if (in_pcbinshash(inp) != 0) {
253 inp->inp_laddr.s_addr = INADDR_ANY;
254 inp->inp_lport = 0;
255 return (EAGAIN);
256 }
257 if (anonport)
258 inp->inp_flags |= INP_ANONPORT;
259 return (0);
260 }
261
262 /*
263 * Set up a bind operation on a PCB, performing port allocation
264 * as required, but do not actually modify the PCB. Callers can
265 * either complete the bind by setting inp_laddr/inp_lport and
266 * calling in_pcbinshash(), or they can just use the resulting
267 * port and address to authorise the sending of a once-off packet.
268 *
269 * On error, the values of *laddrp and *lportp are not changed.
270 */
271 int
272 in_pcbbind_setup(struct inpcb *inp, struct sockaddr *nam, in_addr_t *laddrp,
273 u_short *lportp, struct ucred *cred)
274 {
275 struct socket *so = inp->inp_socket;
276 unsigned short *lastport;
277 struct sockaddr_in *sin;
278 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
279 struct in_addr laddr;
280 u_short lport = 0;
281 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
282 int error, prison = 0;
283 int dorandom;
284
285 /*
286 * Because no actual state changes occur here, a global write lock on
287 * the pcbinfo isn't required.
288 */
289 INP_INFO_LOCK_ASSERT(pcbinfo);
290 INP_LOCK_ASSERT(inp);
291
292 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
293 return (EADDRNOTAVAIL);
294 laddr.s_addr = *laddrp;
295 if (nam != NULL && laddr.s_addr != INADDR_ANY)
296 return (EINVAL);
297 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
298 wild = INPLOOKUP_WILDCARD;
299 if (nam) {
300 sin = (struct sockaddr_in *)nam;
301 if (nam->sa_len != sizeof (*sin))
302 return (EINVAL);
303 #ifdef notdef
304 /*
305 * We should check the family, but old programs
306 * incorrectly fail to initialize it.
307 */
308 if (sin->sin_family != AF_INET)
309 return (EAFNOSUPPORT);
310 #endif
311 if (sin->sin_addr.s_addr != INADDR_ANY)
312 if (prison_ip(cred, 0, &sin->sin_addr.s_addr))
313 return(EINVAL);
314 if (sin->sin_port != *lportp) {
315 /* Don't allow the port to change. */
316 if (*lportp != 0)
317 return (EINVAL);
318 lport = sin->sin_port;
319 }
320 /* NB: lport is left as 0 if the port isn't being changed. */
321 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
322 /*
323 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
324 * allow complete duplication of binding if
325 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
326 * and a multicast address is bound on both
327 * new and duplicated sockets.
328 */
329 if (so->so_options & SO_REUSEADDR)
330 reuseport = SO_REUSEADDR|SO_REUSEPORT;
331 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
332 sin->sin_port = 0; /* yech... */
333 bzero(&sin->sin_zero, sizeof(sin->sin_zero));
334 if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
335 return (EADDRNOTAVAIL);
336 }
337 laddr = sin->sin_addr;
338 if (lport) {
339 struct inpcb *t;
340 struct tcptw *tw;
341
342 /* GROSS */
343 if (ntohs(lport) <= ipport_reservedhigh &&
344 ntohs(lport) >= ipport_reservedlow &&
345 priv_check_cred(cred, PRIV_NETINET_RESERVEDPORT,
346 0))
347 return (EACCES);
348 if (jailed(cred))
349 prison = 1;
350 if (!IN_MULTICAST(ntohl(sin->sin_addr.s_addr)) &&
351 priv_check_cred(inp->inp_cred,
352 PRIV_NETINET_REUSEPORT, 0) != 0) {
353 t = in_pcblookup_local(pcbinfo, sin->sin_addr,
354 lport, prison ? 0 : INPLOOKUP_WILDCARD,
355 cred);
356 /*
357 * XXX
358 * This entire block sorely needs a rewrite.
359 */
360 if (t &&
361 ((t->inp_vflag & INP_TIMEWAIT) == 0) &&
362 (so->so_type != SOCK_STREAM ||
363 ntohl(t->inp_faddr.s_addr) == INADDR_ANY) &&
364 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
365 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
366 (t->inp_socket->so_options &
367 SO_REUSEPORT) == 0) &&
368 (inp->inp_cred->cr_uid !=
369 t->inp_cred->cr_uid))
370 return (EADDRINUSE);
371 }
372 if (prison && prison_ip(cred, 0, &sin->sin_addr.s_addr))
373 return (EADDRNOTAVAIL);
374 t = in_pcblookup_local(pcbinfo, sin->sin_addr,
375 lport, prison ? 0 : wild, cred);
376 if (t && (t->inp_vflag & INP_TIMEWAIT)) {
377 /*
378 * XXXRW: If an incpb has had its timewait
379 * state recycled, we treat the address as
380 * being in use (for now). This is better
381 * than a panic, but not desirable.
382 */
383 tw = intotw(inp);
384 if (tw == NULL ||
385 (reuseport & tw->tw_so_options) == 0)
386 return (EADDRINUSE);
387 } else if (t &&
388 (reuseport & t->inp_socket->so_options) == 0) {
389 #ifdef INET6
390 if (ntohl(sin->sin_addr.s_addr) !=
391 INADDR_ANY ||
392 ntohl(t->inp_laddr.s_addr) !=
393 INADDR_ANY ||
394 INP_SOCKAF(so) ==
395 INP_SOCKAF(t->inp_socket))
396 #endif
397 return (EADDRINUSE);
398 }
399 }
400 }
401 if (*lportp != 0)
402 lport = *lportp;
403 if (lport == 0) {
404 u_short first, last;
405 int count;
406
407 if (laddr.s_addr != INADDR_ANY)
408 if (prison_ip(cred, 0, &laddr.s_addr))
409 return (EINVAL);
410
411 if (inp->inp_flags & INP_HIGHPORT) {
412 first = ipport_hifirstauto; /* sysctl */
413 last = ipport_hilastauto;
414 lastport = &pcbinfo->ipi_lasthi;
415 } else if (inp->inp_flags & INP_LOWPORT) {
416 error = priv_check_cred(cred,
417 PRIV_NETINET_RESERVEDPORT, 0);
418 if (error)
419 return error;
420 first = ipport_lowfirstauto; /* 1023 */
421 last = ipport_lowlastauto; /* 600 */
422 lastport = &pcbinfo->ipi_lastlow;
423 } else {
424 first = ipport_firstauto; /* sysctl */
425 last = ipport_lastauto;
426 lastport = &pcbinfo->ipi_lastport;
427 }
428 /*
429 * For UDP, use random port allocation as long as the user
430 * allows it. For TCP (and as of yet unknown) connections,
431 * use random port allocation only if the user allows it AND
432 * ipport_tick() allows it.
433 */
434 if (ipport_randomized &&
435 (!ipport_stoprandom || pcbinfo == &udbinfo))
436 dorandom = 1;
437 else
438 dorandom = 0;
439 /*
440 * It makes no sense to do random port allocation if
441 * we have the only port available.
442 */
443 if (first == last)
444 dorandom = 0;
445 /* Make sure to not include UDP packets in the count. */
446 if (pcbinfo != &udbinfo)
447 ipport_tcpallocs++;
448 /*
449 * Simple check to ensure all ports are not used up causing
450 * a deadlock here.
451 *
452 * We split the two cases (up and down) so that the direction
453 * is not being tested on each round of the loop.
454 */
455 if (first > last) {
456 /*
457 * counting down
458 */
459 if (dorandom)
460 *lastport = first -
461 (arc4random() % (first - last));
462 count = first - last;
463
464 do {
465 if (count-- < 0) /* completely used? */
466 return (EADDRNOTAVAIL);
467 --*lastport;
468 if (*lastport > first || *lastport < last)
469 *lastport = first;
470 lport = htons(*lastport);
471 } while (in_pcblookup_local(pcbinfo, laddr, lport,
472 wild, cred));
473 } else {
474 /*
475 * counting up
476 */
477 if (dorandom)
478 *lastport = first +
479 (arc4random() % (last - first));
480 count = last - first;
481
482 do {
483 if (count-- < 0) /* completely used? */
484 return (EADDRNOTAVAIL);
485 ++*lastport;
486 if (*lastport < first || *lastport > last)
487 *lastport = first;
488 lport = htons(*lastport);
489 } while (in_pcblookup_local(pcbinfo, laddr, lport,
490 wild, cred));
491 }
492 }
493 if (prison_ip(cred, 0, &laddr.s_addr))
494 return (EINVAL);
495 *laddrp = laddr.s_addr;
496 *lportp = lport;
497 return (0);
498 }
499
500 /*
501 * Connect from a socket to a specified address.
502 * Both address and port must be specified in argument sin.
503 * If don't have a local address for this socket yet,
504 * then pick one.
505 */
506 int
507 in_pcbconnect(struct inpcb *inp, struct sockaddr *nam, struct ucred *cred)
508 {
509 u_short lport, fport;
510 in_addr_t laddr, faddr;
511 int anonport, error;
512
513 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
514 INP_WLOCK_ASSERT(inp);
515
516 lport = inp->inp_lport;
517 laddr = inp->inp_laddr.s_addr;
518 anonport = (lport == 0);
519 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
520 NULL, cred);
521 if (error)
522 return (error);
523
524 /* Do the initial binding of the local address if required. */
525 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
526 inp->inp_lport = lport;
527 inp->inp_laddr.s_addr = laddr;
528 if (in_pcbinshash(inp) != 0) {
529 inp->inp_laddr.s_addr = INADDR_ANY;
530 inp->inp_lport = 0;
531 return (EAGAIN);
532 }
533 }
534
535 /* Commit the remaining changes. */
536 inp->inp_lport = lport;
537 inp->inp_laddr.s_addr = laddr;
538 inp->inp_faddr.s_addr = faddr;
539 inp->inp_fport = fport;
540 in_pcbrehash(inp);
541
542 if (anonport)
543 inp->inp_flags |= INP_ANONPORT;
544 return (0);
545 }
546
547 /*
548 * Set up for a connect from a socket to the specified address.
549 * On entry, *laddrp and *lportp should contain the current local
550 * address and port for the PCB; these are updated to the values
551 * that should be placed in inp_laddr and inp_lport to complete
552 * the connect.
553 *
554 * On success, *faddrp and *fportp will be set to the remote address
555 * and port. These are not updated in the error case.
556 *
557 * If the operation fails because the connection already exists,
558 * *oinpp will be set to the PCB of that connection so that the
559 * caller can decide to override it. In all other cases, *oinpp
560 * is set to NULL.
561 */
562 int
563 in_pcbconnect_setup(struct inpcb *inp, struct sockaddr *nam,
564 in_addr_t *laddrp, u_short *lportp, in_addr_t *faddrp, u_short *fportp,
565 struct inpcb **oinpp, struct ucred *cred)
566 {
567 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
568 struct in_ifaddr *ia;
569 struct sockaddr_in sa;
570 struct ucred *socred;
571 struct inpcb *oinp;
572 struct in_addr laddr, faddr;
573 u_short lport, fport;
574 int error;
575
576 /*
577 * Because a global state change doesn't actually occur here, a read
578 * lock is sufficient.
579 */
580 INP_INFO_LOCK_ASSERT(inp->inp_pcbinfo);
581 INP_LOCK_ASSERT(inp);
582
583 if (oinpp != NULL)
584 *oinpp = NULL;
585 if (nam->sa_len != sizeof (*sin))
586 return (EINVAL);
587 if (sin->sin_family != AF_INET)
588 return (EAFNOSUPPORT);
589 if (sin->sin_port == 0)
590 return (EADDRNOTAVAIL);
591 laddr.s_addr = *laddrp;
592 lport = *lportp;
593 faddr = sin->sin_addr;
594 fport = sin->sin_port;
595 socred = inp->inp_socket->so_cred;
596 if (laddr.s_addr == INADDR_ANY && jailed(socred)) {
597 bzero(&sa, sizeof(sa));
598 sa.sin_addr.s_addr = htonl(prison_getip(socred));
599 sa.sin_len = sizeof(sa);
600 sa.sin_family = AF_INET;
601 error = in_pcbbind_setup(inp, (struct sockaddr *)&sa,
602 &laddr.s_addr, &lport, cred);
603 if (error)
604 return (error);
605 }
606 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
607 /*
608 * If the destination address is INADDR_ANY,
609 * use the primary local address.
610 * If the supplied address is INADDR_BROADCAST,
611 * and the primary interface supports broadcast,
612 * choose the broadcast address for that interface.
613 */
614 if (faddr.s_addr == INADDR_ANY)
615 faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
616 else if (faddr.s_addr == (u_long)INADDR_BROADCAST &&
617 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags &
618 IFF_BROADCAST))
619 faddr = satosin(&TAILQ_FIRST(
620 &in_ifaddrhead)->ia_broadaddr)->sin_addr;
621 }
622 if (laddr.s_addr == INADDR_ANY) {
623 ia = NULL;
624 /*
625 * If route is known our src addr is taken from the i/f,
626 * else punt.
627 *
628 * Find out route to destination
629 */
630 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0)
631 ia = ip_rtaddr(faddr, inp->inp_inc.inc_fibnum);
632 /*
633 * If we found a route, use the address corresponding to
634 * the outgoing interface.
635 *
636 * Otherwise assume faddr is reachable on a directly connected
637 * network and try to find a corresponding interface to take
638 * the source address from.
639 */
640 if (ia == NULL) {
641 bzero(&sa, sizeof(sa));
642 sa.sin_addr = faddr;
643 sa.sin_len = sizeof(sa);
644 sa.sin_family = AF_INET;
645
646 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa)));
647 if (ia == NULL)
648 ia = ifatoia(ifa_ifwithnet(sintosa(&sa)));
649 if (ia == NULL)
650 return (ENETUNREACH);
651 }
652 /*
653 * If the destination address is multicast and an outgoing
654 * interface has been set as a multicast option, use the
655 * address of that interface as our source address.
656 */
657 if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
658 inp->inp_moptions != NULL) {
659 struct ip_moptions *imo;
660 struct ifnet *ifp;
661
662 imo = inp->inp_moptions;
663 if (imo->imo_multicast_ifp != NULL) {
664 ifp = imo->imo_multicast_ifp;
665 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
666 if (ia->ia_ifp == ifp)
667 break;
668 if (ia == NULL)
669 return (EADDRNOTAVAIL);
670 }
671 }
672 laddr = ia->ia_addr.sin_addr;
673 }
674
675 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
676 0, NULL);
677 if (oinp != NULL) {
678 if (oinpp != NULL)
679 *oinpp = oinp;
680 return (EADDRINUSE);
681 }
682 if (lport == 0) {
683 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport,
684 cred);
685 if (error)
686 return (error);
687 }
688 *laddrp = laddr.s_addr;
689 *lportp = lport;
690 *faddrp = faddr.s_addr;
691 *fportp = fport;
692 return (0);
693 }
694
695 void
696 in_pcbdisconnect(struct inpcb *inp)
697 {
698
699 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
700 INP_WLOCK_ASSERT(inp);
701
702 inp->inp_faddr.s_addr = INADDR_ANY;
703 inp->inp_fport = 0;
704 in_pcbrehash(inp);
705 }
706
707 /*
708 * Historically, in_pcbdetach() included the functionality now found in
709 * in_pcbfree() and in_pcbdrop(). They are now broken out to reflect the
710 * more complex life cycle of TCP.
711 *
712 * in_pcbdetach() is responsibe for disconnecting the socket from an inpcb.
713 * For most protocols, this will be invoked immediately prior to calling
714 * in_pcbfree(). However, for TCP the inpcb may significantly outlive the
715 * socket, in which case in_pcbfree() may be deferred.
716 */
717 void
718 in_pcbdetach(struct inpcb *inp)
719 {
720
721 KASSERT(inp->inp_socket != NULL, ("in_pcbdetach: inp_socket == NULL"));
722
723 inp->inp_socket->so_pcb = NULL;
724 inp->inp_socket = NULL;
725 }
726
727 /*
728 * in_pcbfree() is responsible for freeing an already-detached inpcb, as well
729 * as removing it from any global inpcb lists it might be on.
730 */
731 void
732 in_pcbfree(struct inpcb *inp)
733 {
734 struct inpcbinfo *ipi = inp->inp_pcbinfo;
735
736 KASSERT(inp->inp_socket == NULL, ("in_pcbfree: inp_socket != NULL"));
737
738 INP_INFO_WLOCK_ASSERT(ipi);
739 INP_WLOCK_ASSERT(inp);
740
741 #ifdef IPSEC
742 ipsec4_delete_pcbpolicy(inp);
743 #endif /*IPSEC*/
744 inp->inp_gencnt = ++ipi->ipi_gencnt;
745 in_pcbremlists(inp);
746 if (inp->inp_options)
747 (void)m_free(inp->inp_options);
748 if (inp->inp_moptions != NULL)
749 inp_freemoptions(inp->inp_moptions);
750 inp->inp_vflag = 0;
751 crfree(inp->inp_cred);
752
753 #ifdef MAC
754 mac_destroy_inpcb(inp);
755 #endif
756 INP_WUNLOCK(inp);
757 uma_zfree(ipi->ipi_zone, inp);
758 }
759
760 /*
761 * in_pcbdrop() removes an inpcb from hashed lists, releasing its address and
762 * port reservation, and preventing it from being returned by inpcb lookups.
763 *
764 * It is used by TCP to mark an inpcb as unused and avoid future packet
765 * delivery or event notification when a socket remains open but TCP has
766 * closed. This might occur as a result of a shutdown()-initiated TCP close
767 * or a RST on the wire, and allows the port binding to be reused while still
768 * maintaining the invariant that so_pcb always points to a valid inpcb until
769 * in_pcbdetach().
770 *
771 * XXXRW: An inp_lport of 0 is used to indicate that the inpcb is not on hash
772 * lists, but can lead to confusing netstat output, as open sockets with
773 * closed TCP connections will no longer appear to have their bound port
774 * number. An explicit flag would be better, as it would allow us to leave
775 * the port number intact after the connection is dropped.
776 *
777 * XXXRW: Possibly in_pcbdrop() should also prevent future notifications by
778 * in_pcbnotifyall() and in_pcbpurgeif0()?
779 */
780 void
781 in_pcbdrop(struct inpcb *inp)
782 {
783
784 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
785 INP_WLOCK_ASSERT(inp);
786
787 inp->inp_vflag |= INP_DROPPED;
788 if (inp->inp_lport) {
789 struct inpcbport *phd = inp->inp_phd;
790
791 LIST_REMOVE(inp, inp_hash);
792 LIST_REMOVE(inp, inp_portlist);
793 if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
794 LIST_REMOVE(phd, phd_hash);
795 free(phd, M_PCB);
796 }
797 inp->inp_lport = 0;
798 }
799 }
800
801 /*
802 * Common routines to return the socket addresses associated with inpcbs.
803 */
804 struct sockaddr *
805 in_sockaddr(in_port_t port, struct in_addr *addr_p)
806 {
807 struct sockaddr_in *sin;
808
809 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
810 M_WAITOK | M_ZERO);
811 sin->sin_family = AF_INET;
812 sin->sin_len = sizeof(*sin);
813 sin->sin_addr = *addr_p;
814 sin->sin_port = port;
815
816 return (struct sockaddr *)sin;
817 }
818
819 int
820 in_getsockaddr(struct socket *so, struct sockaddr **nam)
821 {
822 struct inpcb *inp;
823 struct in_addr addr;
824 in_port_t port;
825
826 inp = sotoinpcb(so);
827 KASSERT(inp != NULL, ("in_getsockaddr: inp == NULL"));
828
829 INP_RLOCK(inp);
830 port = inp->inp_lport;
831 addr = inp->inp_laddr;
832 INP_RUNLOCK(inp);
833
834 *nam = in_sockaddr(port, &addr);
835 return 0;
836 }
837
838 int
839 in_getpeeraddr(struct socket *so, struct sockaddr **nam)
840 {
841 struct inpcb *inp;
842 struct in_addr addr;
843 in_port_t port;
844
845 inp = sotoinpcb(so);
846 KASSERT(inp != NULL, ("in_getpeeraddr: inp == NULL"));
847
848 INP_RLOCK(inp);
849 port = inp->inp_fport;
850 addr = inp->inp_faddr;
851 INP_RUNLOCK(inp);
852
853 *nam = in_sockaddr(port, &addr);
854 return 0;
855 }
856
857 void
858 in_pcbnotifyall(struct inpcbinfo *pcbinfo, struct in_addr faddr, int errno,
859 struct inpcb *(*notify)(struct inpcb *, int))
860 {
861 struct inpcb *inp, *inp_temp;
862
863 INP_INFO_WLOCK(pcbinfo);
864 LIST_FOREACH_SAFE(inp, pcbinfo->ipi_listhead, inp_list, inp_temp) {
865 INP_WLOCK(inp);
866 #ifdef INET6
867 if ((inp->inp_vflag & INP_IPV4) == 0) {
868 INP_WUNLOCK(inp);
869 continue;
870 }
871 #endif
872 if (inp->inp_faddr.s_addr != faddr.s_addr ||
873 inp->inp_socket == NULL) {
874 INP_WUNLOCK(inp);
875 continue;
876 }
877 if ((*notify)(inp, errno))
878 INP_WUNLOCK(inp);
879 }
880 INP_INFO_WUNLOCK(pcbinfo);
881 }
882
883 void
884 in_pcbpurgeif0(struct inpcbinfo *pcbinfo, struct ifnet *ifp)
885 {
886 struct inpcb *inp;
887 struct ip_moptions *imo;
888 int i, gap;
889
890 INP_INFO_RLOCK(pcbinfo);
891 LIST_FOREACH(inp, pcbinfo->ipi_listhead, inp_list) {
892 INP_WLOCK(inp);
893 imo = inp->inp_moptions;
894 if ((inp->inp_vflag & INP_IPV4) &&
895 imo != NULL) {
896 /*
897 * Unselect the outgoing interface if it is being
898 * detached.
899 */
900 if (imo->imo_multicast_ifp == ifp)
901 imo->imo_multicast_ifp = NULL;
902
903 /*
904 * Drop multicast group membership if we joined
905 * through the interface being detached.
906 */
907 for (i = 0, gap = 0; i < imo->imo_num_memberships;
908 i++) {
909 if (imo->imo_membership[i]->inm_ifp == ifp) {
910 in_delmulti(imo->imo_membership[i]);
911 gap++;
912 } else if (gap != 0)
913 imo->imo_membership[i - gap] =
914 imo->imo_membership[i];
915 }
916 imo->imo_num_memberships -= gap;
917 }
918 INP_WUNLOCK(inp);
919 }
920 INP_INFO_RUNLOCK(pcbinfo);
921 }
922
923 /*
924 * Lookup a PCB based on the local address and port.
925 */
926 #define INP_LOOKUP_MAPPED_PCB_COST 3
927 struct inpcb *
928 in_pcblookup_local(struct inpcbinfo *pcbinfo, struct in_addr laddr,
929 u_short lport, int wild_okay, struct ucred *cred)
930 {
931 struct inpcb *inp;
932 #ifdef INET6
933 int matchwild = 3 + INP_LOOKUP_MAPPED_PCB_COST;
934 #else
935 int matchwild = 3;
936 #endif
937 int wildcard;
938
939 INP_INFO_LOCK_ASSERT(pcbinfo);
940
941 if (!wild_okay) {
942 struct inpcbhead *head;
943 /*
944 * Look for an unconnected (wildcard foreign addr) PCB that
945 * matches the local address and port we're looking for.
946 */
947 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
948 0, pcbinfo->ipi_hashmask)];
949 LIST_FOREACH(inp, head, inp_hash) {
950 #ifdef INET6
951 if ((inp->inp_vflag & INP_IPV4) == 0)
952 continue;
953 #endif
954 if (inp->inp_faddr.s_addr == INADDR_ANY &&
955 inp->inp_laddr.s_addr == laddr.s_addr &&
956 inp->inp_lport == lport) {
957 /*
958 * Found.
959 */
960 return (inp);
961 }
962 }
963 /*
964 * Not found.
965 */
966 return (NULL);
967 } else {
968 struct inpcbporthead *porthash;
969 struct inpcbport *phd;
970 struct inpcb *match = NULL;
971 /*
972 * Best fit PCB lookup.
973 *
974 * First see if this local port is in use by looking on the
975 * port hash list.
976 */
977 porthash = &pcbinfo->ipi_porthashbase[INP_PCBPORTHASH(lport,
978 pcbinfo->ipi_porthashmask)];
979 LIST_FOREACH(phd, porthash, phd_hash) {
980 if (phd->phd_port == lport)
981 break;
982 }
983 if (phd != NULL) {
984 /*
985 * Port is in use by one or more PCBs. Look for best
986 * fit.
987 */
988 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
989 wildcard = 0;
990 #ifdef INET6
991 if ((inp->inp_vflag & INP_IPV4) == 0)
992 continue;
993 /*
994 * We never select the PCB that has
995 * INP_IPV6 flag and is bound to :: if
996 * we have another PCB which is bound
997 * to 0.0.0.0. If a PCB has the
998 * INP_IPV6 flag, then we set its cost
999 * higher than IPv4 only PCBs.
1000 *
1001 * Note that the case only happens
1002 * when a socket is bound to ::, under
1003 * the condition that the use of the
1004 * mapped address is allowed.
1005 */
1006 if ((inp->inp_vflag & INP_IPV6) != 0)
1007 wildcard += INP_LOOKUP_MAPPED_PCB_COST;
1008 #endif
1009 if (inp->inp_faddr.s_addr != INADDR_ANY)
1010 wildcard++;
1011 if (inp->inp_laddr.s_addr != INADDR_ANY) {
1012 if (laddr.s_addr == INADDR_ANY)
1013 wildcard++;
1014 else if (inp->inp_laddr.s_addr != laddr.s_addr)
1015 continue;
1016 } else {
1017 if (laddr.s_addr != INADDR_ANY)
1018 wildcard++;
1019 }
1020 if (wildcard < matchwild) {
1021 match = inp;
1022 matchwild = wildcard;
1023 if (matchwild == 0) {
1024 break;
1025 }
1026 }
1027 }
1028 }
1029 return (match);
1030 }
1031 }
1032 #undef INP_LOOKUP_MAPPED_PCB_COST
1033
1034 /*
1035 * Lookup PCB in hash list.
1036 */
1037 struct inpcb *
1038 in_pcblookup_hash(struct inpcbinfo *pcbinfo, struct in_addr faddr,
1039 u_int fport_arg, struct in_addr laddr, u_int lport_arg, int wildcard,
1040 struct ifnet *ifp)
1041 {
1042 struct inpcbhead *head;
1043 struct inpcb *inp;
1044 u_short fport = fport_arg, lport = lport_arg;
1045
1046 INP_INFO_LOCK_ASSERT(pcbinfo);
1047
1048 /*
1049 * First look for an exact match.
1050 */
1051 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(faddr.s_addr, lport, fport,
1052 pcbinfo->ipi_hashmask)];
1053 LIST_FOREACH(inp, head, inp_hash) {
1054 #ifdef INET6
1055 if ((inp->inp_vflag & INP_IPV4) == 0)
1056 continue;
1057 #endif
1058 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1059 inp->inp_laddr.s_addr == laddr.s_addr &&
1060 inp->inp_fport == fport &&
1061 inp->inp_lport == lport)
1062 return (inp);
1063 }
1064
1065 /*
1066 * Then look for a wildcard match, if requested.
1067 */
1068 if (wildcard) {
1069 struct inpcb *local_wild = NULL;
1070 #ifdef INET6
1071 struct inpcb *local_wild_mapped = NULL;
1072 #endif
1073
1074 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(INADDR_ANY, lport,
1075 0, pcbinfo->ipi_hashmask)];
1076 LIST_FOREACH(inp, head, inp_hash) {
1077 #ifdef INET6
1078 if ((inp->inp_vflag & INP_IPV4) == 0)
1079 continue;
1080 #endif
1081 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1082 inp->inp_lport == lport) {
1083 if (ifp && ifp->if_type == IFT_FAITH &&
1084 (inp->inp_flags & INP_FAITH) == 0)
1085 continue;
1086 if (inp->inp_laddr.s_addr == laddr.s_addr)
1087 return (inp);
1088 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1089 #ifdef INET6
1090 if (INP_CHECK_SOCKAF(inp->inp_socket,
1091 AF_INET6))
1092 local_wild_mapped = inp;
1093 else
1094 #endif
1095 local_wild = inp;
1096 }
1097 }
1098 }
1099 #ifdef INET6
1100 if (local_wild == NULL)
1101 return (local_wild_mapped);
1102 #endif
1103 return (local_wild);
1104 }
1105 return (NULL);
1106 }
1107
1108 /*
1109 * Insert PCB onto various hash lists.
1110 */
1111 int
1112 in_pcbinshash(struct inpcb *inp)
1113 {
1114 struct inpcbhead *pcbhash;
1115 struct inpcbporthead *pcbporthash;
1116 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1117 struct inpcbport *phd;
1118 u_int32_t hashkey_faddr;
1119
1120 INP_INFO_WLOCK_ASSERT(pcbinfo);
1121 INP_WLOCK_ASSERT(inp);
1122
1123 #ifdef INET6
1124 if (inp->inp_vflag & INP_IPV6)
1125 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1126 else
1127 #endif /* INET6 */
1128 hashkey_faddr = inp->inp_faddr.s_addr;
1129
1130 pcbhash = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
1131 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
1132
1133 pcbporthash = &pcbinfo->ipi_porthashbase[
1134 INP_PCBPORTHASH(inp->inp_lport, pcbinfo->ipi_porthashmask)];
1135
1136 /*
1137 * Go through port list and look for a head for this lport.
1138 */
1139 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1140 if (phd->phd_port == inp->inp_lport)
1141 break;
1142 }
1143 /*
1144 * If none exists, malloc one and tack it on.
1145 */
1146 if (phd == NULL) {
1147 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
1148 if (phd == NULL) {
1149 return (ENOBUFS); /* XXX */
1150 }
1151 phd->phd_port = inp->inp_lport;
1152 LIST_INIT(&phd->phd_pcblist);
1153 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1154 }
1155 inp->inp_phd = phd;
1156 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1157 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
1158 return (0);
1159 }
1160
1161 /*
1162 * Move PCB to the proper hash bucket when { faddr, fport } have been
1163 * changed. NOTE: This does not handle the case of the lport changing (the
1164 * hashed port list would have to be updated as well), so the lport must
1165 * not change after in_pcbinshash() has been called.
1166 */
1167 void
1168 in_pcbrehash(struct inpcb *inp)
1169 {
1170 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1171 struct inpcbhead *head;
1172 u_int32_t hashkey_faddr;
1173
1174 INP_INFO_WLOCK_ASSERT(pcbinfo);
1175 INP_WLOCK_ASSERT(inp);
1176
1177 #ifdef INET6
1178 if (inp->inp_vflag & INP_IPV6)
1179 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1180 else
1181 #endif /* INET6 */
1182 hashkey_faddr = inp->inp_faddr.s_addr;
1183
1184 head = &pcbinfo->ipi_hashbase[INP_PCBHASH(hashkey_faddr,
1185 inp->inp_lport, inp->inp_fport, pcbinfo->ipi_hashmask)];
1186
1187 LIST_REMOVE(inp, inp_hash);
1188 LIST_INSERT_HEAD(head, inp, inp_hash);
1189 }
1190
1191 /*
1192 * Remove PCB from various lists.
1193 */
1194 void
1195 in_pcbremlists(struct inpcb *inp)
1196 {
1197 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1198
1199 INP_INFO_WLOCK_ASSERT(pcbinfo);
1200 INP_WLOCK_ASSERT(inp);
1201
1202 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
1203 if (inp->inp_lport) {
1204 struct inpcbport *phd = inp->inp_phd;
1205
1206 LIST_REMOVE(inp, inp_hash);
1207 LIST_REMOVE(inp, inp_portlist);
1208 if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1209 LIST_REMOVE(phd, phd_hash);
1210 free(phd, M_PCB);
1211 }
1212 }
1213 LIST_REMOVE(inp, inp_list);
1214 pcbinfo->ipi_count--;
1215 }
1216
1217 /*
1218 * A set label operation has occurred at the socket layer, propagate the
1219 * label change into the in_pcb for the socket.
1220 */
1221 void
1222 in_pcbsosetlabel(struct socket *so)
1223 {
1224 #ifdef MAC
1225 struct inpcb *inp;
1226
1227 inp = sotoinpcb(so);
1228 KASSERT(inp != NULL, ("in_pcbsosetlabel: so->so_pcb == NULL"));
1229
1230 INP_WLOCK(inp);
1231 SOCK_LOCK(so);
1232 mac_inpcb_sosetlabel(so, inp);
1233 SOCK_UNLOCK(so);
1234 INP_WUNLOCK(inp);
1235 #endif
1236 }
1237
1238 /*
1239 * ipport_tick runs once per second, determining if random port allocation
1240 * should be continued. If more than ipport_randomcps ports have been
1241 * allocated in the last second, then we return to sequential port
1242 * allocation. We return to random allocation only once we drop below
1243 * ipport_randomcps for at least ipport_randomtime seconds.
1244 */
1245 void
1246 ipport_tick(void *xtp)
1247 {
1248
1249 if (ipport_tcpallocs <= ipport_tcplastcount + ipport_randomcps) {
1250 if (ipport_stoprandom > 0)
1251 ipport_stoprandom--;
1252 } else
1253 ipport_stoprandom = ipport_randomtime;
1254 ipport_tcplastcount = ipport_tcpallocs;
1255 callout_reset(&ipport_tick_callout, hz, ipport_tick, NULL);
1256 }
1257
1258 void
1259 inp_apply_all(void (*func)(struct inpcb *, void *), void *arg)
1260 {
1261 struct inpcb *inp;
1262
1263 INP_INFO_RLOCK(&tcbinfo);
1264 LIST_FOREACH(inp, tcbinfo.ipi_listhead, inp_list) {
1265 INP_WLOCK(inp);
1266 func(inp, arg);
1267 INP_WUNLOCK(inp);
1268 }
1269 INP_INFO_RUNLOCK(&tcbinfo);
1270 }
1271
1272 struct socket *
1273 inp_inpcbtosocket(struct inpcb *inp)
1274 {
1275
1276 INP_WLOCK_ASSERT(inp);
1277 return (inp->inp_socket);
1278 }
1279
1280 struct tcpcb *
1281 inp_inpcbtotcpcb(struct inpcb *inp)
1282 {
1283
1284 INP_WLOCK_ASSERT(inp);
1285 return ((struct tcpcb *)inp->inp_ppcb);
1286 }
1287
1288 int
1289 inp_ip_tos_get(const struct inpcb *inp)
1290 {
1291
1292 return (inp->inp_ip_tos);
1293 }
1294
1295 void
1296 inp_ip_tos_set(struct inpcb *inp, int val)
1297 {
1298
1299 inp->inp_ip_tos = val;
1300 }
1301
1302 void
1303 inp_4tuple_get(struct inpcb *inp, uint32_t *laddr, uint16_t *lp,
1304 uint32_t *faddr, uint16_t *fp)
1305 {
1306
1307 INP_LOCK_ASSERT(inp);
1308 *laddr = inp->inp_laddr.s_addr;
1309 *faddr = inp->inp_faddr.s_addr;
1310 *lp = inp->inp_lport;
1311 *fp = inp->inp_fport;
1312 }
1313
1314 struct inpcb *
1315 so_sotoinpcb(struct socket *so)
1316 {
1317
1318 return (sotoinpcb(so));
1319 }
1320
1321 struct tcpcb *
1322 so_sototcpcb(struct socket *so)
1323 {
1324
1325 return (sototcpcb(so));
1326 }
1327
1328 void
1329 inp_wlock(struct inpcb *inp)
1330 {
1331
1332 INP_WLOCK(inp);
1333 }
1334
1335 void
1336 inp_wunlock(struct inpcb *inp)
1337 {
1338
1339 INP_WUNLOCK(inp);
1340 }
1341
1342 void
1343 inp_rlock(struct inpcb *inp)
1344 {
1345
1346 INP_RLOCK(inp);
1347 }
1348
1349 void
1350 inp_runlock(struct inpcb *inp)
1351 {
1352
1353 INP_RUNLOCK(inp);
1354 }
1355
1356 #ifdef INVARIANTS
1357 void
1358 inp_wlock_assert(struct inpcb *inp)
1359 {
1360
1361 INP_WLOCK_ASSERT(inp);
1362 }
1363
1364 void
1365 inp_rlock_assert(struct inpcb *inp)
1366 {
1367
1368 INP_RLOCK_ASSERT(inp);
1369 }
1370
1371 void
1372 inp_lock_assert(struct inpcb *inp)
1373 {
1374
1375 INP_LOCK_ASSERT(inp);
1376 }
1377
1378 void
1379 inp_unlock_assert(struct inpcb *inp)
1380 {
1381
1382 INP_UNLOCK_ASSERT(inp);
1383 }
1384
1385 #endif
1386
1387 #ifdef DDB
1388 static void
1389 db_print_indent(int indent)
1390 {
1391 int i;
1392
1393 for (i = 0; i < indent; i++)
1394 db_printf(" ");
1395 }
1396
1397 static void
1398 db_print_inconninfo(struct in_conninfo *inc, const char *name, int indent)
1399 {
1400 char faddr_str[48], laddr_str[48];
1401
1402 db_print_indent(indent);
1403 db_printf("%s at %p\n", name, inc);
1404
1405 indent += 2;
1406
1407 #ifdef INET6
1408 if (inc->inc_flags == 1) {
1409 /* IPv6. */
1410 ip6_sprintf(laddr_str, &inc->inc6_laddr);
1411 ip6_sprintf(faddr_str, &inc->inc6_faddr);
1412 } else {
1413 #endif
1414 /* IPv4. */
1415 inet_ntoa_r(inc->inc_laddr, laddr_str);
1416 inet_ntoa_r(inc->inc_faddr, faddr_str);
1417 #ifdef INET6
1418 }
1419 #endif
1420 db_print_indent(indent);
1421 db_printf("inc_laddr %s inc_lport %u\n", laddr_str,
1422 ntohs(inc->inc_lport));
1423 db_print_indent(indent);
1424 db_printf("inc_faddr %s inc_fport %u\n", faddr_str,
1425 ntohs(inc->inc_fport));
1426 }
1427
1428 static void
1429 db_print_inpflags(int inp_flags)
1430 {
1431 int comma;
1432
1433 comma = 0;
1434 if (inp_flags & INP_RECVOPTS) {
1435 db_printf("%sINP_RECVOPTS", comma ? ", " : "");
1436 comma = 1;
1437 }
1438 if (inp_flags & INP_RECVRETOPTS) {
1439 db_printf("%sINP_RECVRETOPTS", comma ? ", " : "");
1440 comma = 1;
1441 }
1442 if (inp_flags & INP_RECVDSTADDR) {
1443 db_printf("%sINP_RECVDSTADDR", comma ? ", " : "");
1444 comma = 1;
1445 }
1446 if (inp_flags & INP_HDRINCL) {
1447 db_printf("%sINP_HDRINCL", comma ? ", " : "");
1448 comma = 1;
1449 }
1450 if (inp_flags & INP_HIGHPORT) {
1451 db_printf("%sINP_HIGHPORT", comma ? ", " : "");
1452 comma = 1;
1453 }
1454 if (inp_flags & INP_LOWPORT) {
1455 db_printf("%sINP_LOWPORT", comma ? ", " : "");
1456 comma = 1;
1457 }
1458 if (inp_flags & INP_ANONPORT) {
1459 db_printf("%sINP_ANONPORT", comma ? ", " : "");
1460 comma = 1;
1461 }
1462 if (inp_flags & INP_RECVIF) {
1463 db_printf("%sINP_RECVIF", comma ? ", " : "");
1464 comma = 1;
1465 }
1466 if (inp_flags & INP_MTUDISC) {
1467 db_printf("%sINP_MTUDISC", comma ? ", " : "");
1468 comma = 1;
1469 }
1470 if (inp_flags & INP_FAITH) {
1471 db_printf("%sINP_FAITH", comma ? ", " : "");
1472 comma = 1;
1473 }
1474 if (inp_flags & INP_RECVTTL) {
1475 db_printf("%sINP_RECVTTL", comma ? ", " : "");
1476 comma = 1;
1477 }
1478 if (inp_flags & INP_DONTFRAG) {
1479 db_printf("%sINP_DONTFRAG", comma ? ", " : "");
1480 comma = 1;
1481 }
1482 if (inp_flags & IN6P_IPV6_V6ONLY) {
1483 db_printf("%sIN6P_IPV6_V6ONLY", comma ? ", " : "");
1484 comma = 1;
1485 }
1486 if (inp_flags & IN6P_PKTINFO) {
1487 db_printf("%sIN6P_PKTINFO", comma ? ", " : "");
1488 comma = 1;
1489 }
1490 if (inp_flags & IN6P_HOPLIMIT) {
1491 db_printf("%sIN6P_HOPLIMIT", comma ? ", " : "");
1492 comma = 1;
1493 }
1494 if (inp_flags & IN6P_HOPOPTS) {
1495 db_printf("%sIN6P_HOPOPTS", comma ? ", " : "");
1496 comma = 1;
1497 }
1498 if (inp_flags & IN6P_DSTOPTS) {
1499 db_printf("%sIN6P_DSTOPTS", comma ? ", " : "");
1500 comma = 1;
1501 }
1502 if (inp_flags & IN6P_RTHDR) {
1503 db_printf("%sIN6P_RTHDR", comma ? ", " : "");
1504 comma = 1;
1505 }
1506 if (inp_flags & IN6P_RTHDRDSTOPTS) {
1507 db_printf("%sIN6P_RTHDRDSTOPTS", comma ? ", " : "");
1508 comma = 1;
1509 }
1510 if (inp_flags & IN6P_TCLASS) {
1511 db_printf("%sIN6P_TCLASS", comma ? ", " : "");
1512 comma = 1;
1513 }
1514 if (inp_flags & IN6P_AUTOFLOWLABEL) {
1515 db_printf("%sIN6P_AUTOFLOWLABEL", comma ? ", " : "");
1516 comma = 1;
1517 }
1518 if (inp_flags & IN6P_RFC2292) {
1519 db_printf("%sIN6P_RFC2292", comma ? ", " : "");
1520 comma = 1;
1521 }
1522 if (inp_flags & IN6P_MTU) {
1523 db_printf("IN6P_MTU%s", comma ? ", " : "");
1524 comma = 1;
1525 }
1526 }
1527
1528 static void
1529 db_print_inpvflag(u_char inp_vflag)
1530 {
1531 int comma;
1532
1533 comma = 0;
1534 if (inp_vflag & INP_IPV4) {
1535 db_printf("%sINP_IPV4", comma ? ", " : "");
1536 comma = 1;
1537 }
1538 if (inp_vflag & INP_IPV6) {
1539 db_printf("%sINP_IPV6", comma ? ", " : "");
1540 comma = 1;
1541 }
1542 if (inp_vflag & INP_IPV6PROTO) {
1543 db_printf("%sINP_IPV6PROTO", comma ? ", " : "");
1544 comma = 1;
1545 }
1546 if (inp_vflag & INP_TIMEWAIT) {
1547 db_printf("%sINP_TIMEWAIT", comma ? ", " : "");
1548 comma = 1;
1549 }
1550 if (inp_vflag & INP_ONESBCAST) {
1551 db_printf("%sINP_ONESBCAST", comma ? ", " : "");
1552 comma = 1;
1553 }
1554 if (inp_vflag & INP_DROPPED) {
1555 db_printf("%sINP_DROPPED", comma ? ", " : "");
1556 comma = 1;
1557 }
1558 if (inp_vflag & INP_SOCKREF) {
1559 db_printf("%sINP_SOCKREF", comma ? ", " : "");
1560 comma = 1;
1561 }
1562 }
1563
1564 void
1565 db_print_inpcb(struct inpcb *inp, const char *name, int indent)
1566 {
1567
1568 db_print_indent(indent);
1569 db_printf("%s at %p\n", name, inp);
1570
1571 indent += 2;
1572
1573 db_print_indent(indent);
1574 db_printf("inp_flow: 0x%x\n", inp->inp_flow);
1575
1576 db_print_inconninfo(&inp->inp_inc, "inp_conninfo", indent);
1577
1578 db_print_indent(indent);
1579 db_printf("inp_ppcb: %p inp_pcbinfo: %p inp_socket: %p\n",
1580 inp->inp_ppcb, inp->inp_pcbinfo, inp->inp_socket);
1581
1582 db_print_indent(indent);
1583 db_printf("inp_label: %p inp_flags: 0x%x (",
1584 inp->inp_label, inp->inp_flags);
1585 db_print_inpflags(inp->inp_flags);
1586 db_printf(")\n");
1587
1588 db_print_indent(indent);
1589 db_printf("inp_sp: %p inp_vflag: 0x%x (", inp->inp_sp,
1590 inp->inp_vflag);
1591 db_print_inpvflag(inp->inp_vflag);
1592 db_printf(")\n");
1593
1594 db_print_indent(indent);
1595 db_printf("inp_ip_ttl: %d inp_ip_p: %d inp_ip_minttl: %d\n",
1596 inp->inp_ip_ttl, inp->inp_ip_p, inp->inp_ip_minttl);
1597
1598 db_print_indent(indent);
1599 #ifdef INET6
1600 if (inp->inp_vflag & INP_IPV6) {
1601 db_printf("in6p_options: %p in6p_outputopts: %p "
1602 "in6p_moptions: %p\n", inp->in6p_options,
1603 inp->in6p_outputopts, inp->in6p_moptions);
1604 db_printf("in6p_icmp6filt: %p in6p_cksum %d "
1605 "in6p_hops %u\n", inp->in6p_icmp6filt, inp->in6p_cksum,
1606 inp->in6p_hops);
1607 } else
1608 #endif
1609 {
1610 db_printf("inp_ip_tos: %d inp_ip_options: %p "
1611 "inp_ip_moptions: %p\n", inp->inp_ip_tos,
1612 inp->inp_options, inp->inp_moptions);
1613 }
1614
1615 db_print_indent(indent);
1616 db_printf("inp_phd: %p inp_gencnt: %ju\n", inp->inp_phd,
1617 (uintmax_t)inp->inp_gencnt);
1618 }
1619
1620 DB_SHOW_COMMAND(inpcb, db_show_inpcb)
1621 {
1622 struct inpcb *inp;
1623
1624 if (!have_addr) {
1625 db_printf("usage: show inpcb <addr>\n");
1626 return;
1627 }
1628 inp = (struct inpcb *)addr;
1629
1630 db_print_inpcb(inp, "inpcb", 0);
1631 }
1632 #endif
Cache object: 23187f0b834d6d502d8d352d53839907
|