FreeBSD/Linux Kernel Cross Reference
sys/netinet/in_pcb.c
1 /*
2 * Copyright (c) 1982, 1986, 1991, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
34 * $FreeBSD: releng/5.2/sys/netinet/in_pcb.c 125088 2004-01-27 15:54:05Z ume $
35 */
36
37 #include "opt_ipsec.h"
38 #include "opt_inet6.h"
39 #include "opt_mac.h"
40
41 #include <sys/param.h>
42 #include <sys/systm.h>
43 #include <sys/limits.h>
44 #include <sys/mac.h>
45 #include <sys/malloc.h>
46 #include <sys/mbuf.h>
47 #include <sys/domain.h>
48 #include <sys/protosw.h>
49 #include <sys/socket.h>
50 #include <sys/socketvar.h>
51 #include <sys/proc.h>
52 #include <sys/jail.h>
53 #include <sys/kernel.h>
54 #include <sys/sysctl.h>
55
56 #include <vm/uma.h>
57
58 #include <net/if.h>
59 #include <net/if_types.h>
60 #include <net/route.h>
61
62 #include <netinet/in.h>
63 #include <netinet/in_pcb.h>
64 #include <netinet/in_var.h>
65 #include <netinet/ip_var.h>
66 #include <netinet/tcp_var.h>
67 #ifdef INET6
68 #include <netinet/ip6.h>
69 #include <netinet6/ip6_var.h>
70 #endif /* INET6 */
71
72 #ifdef IPSEC
73 #include <netinet6/ipsec.h>
74 #include <netkey/key.h>
75 #endif /* IPSEC */
76
77 #ifdef FAST_IPSEC
78 #if defined(IPSEC) || defined(IPSEC_ESP)
79 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!"
80 #endif
81
82 #include <netipsec/ipsec.h>
83 #include <netipsec/key.h>
84 #endif /* FAST_IPSEC */
85
86 struct in_addr zeroin_addr;
87
88 /*
89 * These configure the range of local port addresses assigned to
90 * "unspecified" outgoing connections/packets/whatever.
91 */
92 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
93 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
94 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
95 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
96 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
97 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
98
99 /*
100 * Reserved ports accessible only to root. There are significant
101 * security considerations that must be accounted for when changing these,
102 * but the security benefits can be great. Please be careful.
103 */
104 int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */
105 int ipport_reservedlow = 0;
106
107 #define RANGECHK(var, min, max) \
108 if ((var) < (min)) { (var) = (min); } \
109 else if ((var) > (max)) { (var) = (max); }
110
111 static int
112 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
113 {
114 int error = sysctl_handle_int(oidp,
115 oidp->oid_arg1, oidp->oid_arg2, req);
116 if (!error) {
117 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
118 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
119 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
120 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
121 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
122 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
123 }
124 return error;
125 }
126
127 #undef RANGECHK
128
129 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
130
131 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
132 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
133 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
134 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
135 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
136 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
137 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
138 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
139 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
140 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
141 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
142 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
143 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
144 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, "");
145 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
146 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, "");
147
148 /*
149 * in_pcb.c: manage the Protocol Control Blocks.
150 *
151 * NOTE: It is assumed that most of these functions will be called at
152 * splnet(). XXX - There are, unfortunately, a few exceptions to this
153 * rule that should be fixed.
154 */
155
156 /*
157 * Allocate a PCB and associate it with the socket.
158 */
159 int
160 in_pcballoc(so, pcbinfo, td, type)
161 struct socket *so;
162 struct inpcbinfo *pcbinfo;
163 struct thread *td;
164 const char *type;
165 {
166 register struct inpcb *inp;
167 int error;
168
169 INP_INFO_WLOCK_ASSERT(pcbinfo);
170 error = 0;
171 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT | M_ZERO);
172 if (inp == NULL)
173 return (ENOBUFS);
174 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
175 inp->inp_pcbinfo = pcbinfo;
176 inp->inp_socket = so;
177 #ifdef MAC
178 error = mac_init_inpcb(inp, M_NOWAIT);
179 if (error != 0)
180 goto out;
181 mac_create_inpcb_from_socket(so, inp);
182 #endif
183 #if defined(IPSEC) || defined(FAST_IPSEC)
184 #ifdef FAST_IPSEC
185 error = ipsec_init_policy(so, &inp->inp_sp);
186 #else
187 error = ipsec_init_pcbpolicy(so, &inp->inp_sp);
188 #endif
189 if (error != 0)
190 goto out;
191 #endif /*IPSEC*/
192 #if defined(INET6)
193 if (INP_SOCKAF(so) == AF_INET6) {
194 inp->inp_vflag |= INP_IPV6PROTO;
195 if (ip6_v6only)
196 inp->inp_flags |= IN6P_IPV6_V6ONLY;
197 }
198 #endif
199 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
200 pcbinfo->ipi_count++;
201 so->so_pcb = (caddr_t)inp;
202 INP_LOCK_INIT(inp, "inp", type);
203 #ifdef INET6
204 if (ip6_auto_flowlabel)
205 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
206 #endif
207 #if defined(IPSEC) || defined(FAST_IPSEC) || defined(MAC)
208 out:
209 if (error != 0)
210 uma_zfree(pcbinfo->ipi_zone, inp);
211 #endif
212 return (error);
213 }
214
215 int
216 in_pcbbind(inp, nam, td)
217 register struct inpcb *inp;
218 struct sockaddr *nam;
219 struct thread *td;
220 {
221 int anonport, error;
222
223 INP_INFO_WLOCK_ASSERT(inp->inp_pcbinfo);
224 INP_LOCK_ASSERT(inp);
225
226 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
227 return (EINVAL);
228 anonport = inp->inp_lport == 0 && (nam == NULL ||
229 ((struct sockaddr_in *)nam)->sin_port == 0);
230 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
231 &inp->inp_lport, td);
232 if (error)
233 return (error);
234 if (in_pcbinshash(inp) != 0) {
235 inp->inp_laddr.s_addr = INADDR_ANY;
236 inp->inp_lport = 0;
237 return (EAGAIN);
238 }
239 if (anonport)
240 inp->inp_flags |= INP_ANONPORT;
241 return (0);
242 }
243
244 /*
245 * Set up a bind operation on a PCB, performing port allocation
246 * as required, but do not actually modify the PCB. Callers can
247 * either complete the bind by setting inp_laddr/inp_lport and
248 * calling in_pcbinshash(), or they can just use the resulting
249 * port and address to authorise the sending of a once-off packet.
250 *
251 * On error, the values of *laddrp and *lportp are not changed.
252 */
253 int
254 in_pcbbind_setup(inp, nam, laddrp, lportp, td)
255 struct inpcb *inp;
256 struct sockaddr *nam;
257 in_addr_t *laddrp;
258 u_short *lportp;
259 struct thread *td;
260 {
261 struct socket *so = inp->inp_socket;
262 unsigned short *lastport;
263 struct sockaddr_in *sin;
264 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
265 struct in_addr laddr;
266 u_short lport = 0;
267 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
268 int error, prison = 0;
269
270 INP_INFO_WLOCK_ASSERT(pcbinfo);
271 INP_LOCK_ASSERT(inp);
272
273 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
274 return (EADDRNOTAVAIL);
275 laddr.s_addr = *laddrp;
276 if (nam != NULL && laddr.s_addr != INADDR_ANY)
277 return (EINVAL);
278 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
279 wild = 1;
280 if (nam) {
281 sin = (struct sockaddr_in *)nam;
282 if (nam->sa_len != sizeof (*sin))
283 return (EINVAL);
284 #ifdef notdef
285 /*
286 * We should check the family, but old programs
287 * incorrectly fail to initialize it.
288 */
289 if (sin->sin_family != AF_INET)
290 return (EAFNOSUPPORT);
291 #endif
292 if (sin->sin_addr.s_addr != INADDR_ANY)
293 if (prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr))
294 return(EINVAL);
295 if (sin->sin_port != *lportp) {
296 /* Don't allow the port to change. */
297 if (*lportp != 0)
298 return (EINVAL);
299 lport = sin->sin_port;
300 }
301 /* NB: lport is left as 0 if the port isn't being changed. */
302 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
303 /*
304 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
305 * allow complete duplication of binding if
306 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
307 * and a multicast address is bound on both
308 * new and duplicated sockets.
309 */
310 if (so->so_options & SO_REUSEADDR)
311 reuseport = SO_REUSEADDR|SO_REUSEPORT;
312 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
313 sin->sin_port = 0; /* yech... */
314 bzero(&sin->sin_zero, sizeof(sin->sin_zero));
315 if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
316 return (EADDRNOTAVAIL);
317 }
318 laddr = sin->sin_addr;
319 if (lport) {
320 struct inpcb *t;
321 /* GROSS */
322 if (ntohs(lport) <= ipport_reservedhigh &&
323 ntohs(lport) >= ipport_reservedlow &&
324 td && suser_cred(td->td_ucred, PRISON_ROOT))
325 return (EACCES);
326 if (td && jailed(td->td_ucred))
327 prison = 1;
328 if (so->so_cred->cr_uid != 0 &&
329 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
330 t = in_pcblookup_local(inp->inp_pcbinfo,
331 sin->sin_addr, lport,
332 prison ? 0 : INPLOOKUP_WILDCARD);
333 /*
334 * XXX
335 * This entire block sorely needs a rewrite.
336 */
337 if (t && (t->inp_vflag & INP_TIMEWAIT)) {
338 if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
339 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
340 (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) &&
341 (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid))
342 return (EADDRINUSE);
343 } else
344 if (t &&
345 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
346 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
347 (t->inp_socket->so_options &
348 SO_REUSEPORT) == 0) &&
349 (so->so_cred->cr_uid !=
350 t->inp_socket->so_cred->cr_uid)) {
351 #if defined(INET6)
352 if (ntohl(sin->sin_addr.s_addr) !=
353 INADDR_ANY ||
354 ntohl(t->inp_laddr.s_addr) !=
355 INADDR_ANY ||
356 INP_SOCKAF(so) ==
357 INP_SOCKAF(t->inp_socket))
358 #endif /* defined(INET6) */
359 return (EADDRINUSE);
360 }
361 }
362 if (prison &&
363 prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr))
364 return (EADDRNOTAVAIL);
365 t = in_pcblookup_local(pcbinfo, sin->sin_addr,
366 lport, prison ? 0 : wild);
367 if (t && (t->inp_vflag & INP_TIMEWAIT)) {
368 if ((reuseport & intotw(t)->tw_so_options) == 0)
369 return (EADDRINUSE);
370 } else
371 if (t &&
372 (reuseport & t->inp_socket->so_options) == 0) {
373 #if defined(INET6)
374 if (ntohl(sin->sin_addr.s_addr) !=
375 INADDR_ANY ||
376 ntohl(t->inp_laddr.s_addr) !=
377 INADDR_ANY ||
378 INP_SOCKAF(so) ==
379 INP_SOCKAF(t->inp_socket))
380 #endif /* defined(INET6) */
381 return (EADDRINUSE);
382 }
383 }
384 }
385 if (*lportp != 0)
386 lport = *lportp;
387 if (lport == 0) {
388 u_short first, last;
389 int count;
390
391 if (laddr.s_addr != INADDR_ANY)
392 if (prison_ip(td->td_ucred, 0, &laddr.s_addr))
393 return (EINVAL);
394
395 if (inp->inp_flags & INP_HIGHPORT) {
396 first = ipport_hifirstauto; /* sysctl */
397 last = ipport_hilastauto;
398 lastport = &pcbinfo->lasthi;
399 } else if (inp->inp_flags & INP_LOWPORT) {
400 if (td && (error = suser_cred(td->td_ucred,
401 PRISON_ROOT)) != 0)
402 return error;
403 first = ipport_lowfirstauto; /* 1023 */
404 last = ipport_lowlastauto; /* 600 */
405 lastport = &pcbinfo->lastlow;
406 } else {
407 first = ipport_firstauto; /* sysctl */
408 last = ipport_lastauto;
409 lastport = &pcbinfo->lastport;
410 }
411 /*
412 * Simple check to ensure all ports are not used up causing
413 * a deadlock here.
414 *
415 * We split the two cases (up and down) so that the direction
416 * is not being tested on each round of the loop.
417 */
418 if (first > last) {
419 /*
420 * counting down
421 */
422 count = first - last;
423
424 do {
425 if (count-- < 0) /* completely used? */
426 return (EADDRNOTAVAIL);
427 --*lastport;
428 if (*lastport > first || *lastport < last)
429 *lastport = first;
430 lport = htons(*lastport);
431 } while (in_pcblookup_local(pcbinfo, laddr, lport,
432 wild));
433 } else {
434 /*
435 * counting up
436 */
437 count = last - first;
438
439 do {
440 if (count-- < 0) /* completely used? */
441 return (EADDRNOTAVAIL);
442 ++*lastport;
443 if (*lastport < first || *lastport > last)
444 *lastport = first;
445 lport = htons(*lastport);
446 } while (in_pcblookup_local(pcbinfo, laddr, lport,
447 wild));
448 }
449 }
450 if (prison_ip(td->td_ucred, 0, &laddr.s_addr))
451 return (EINVAL);
452 *laddrp = laddr.s_addr;
453 *lportp = lport;
454 return (0);
455 }
456
457 /*
458 * Connect from a socket to a specified address.
459 * Both address and port must be specified in argument sin.
460 * If don't have a local address for this socket yet,
461 * then pick one.
462 */
463 int
464 in_pcbconnect(inp, nam, td)
465 register struct inpcb *inp;
466 struct sockaddr *nam;
467 struct thread *td;
468 {
469 u_short lport, fport;
470 in_addr_t laddr, faddr;
471 int anonport, error;
472
473 lport = inp->inp_lport;
474 laddr = inp->inp_laddr.s_addr;
475 anonport = (lport == 0);
476 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
477 NULL, td);
478 if (error)
479 return (error);
480
481 /* Do the initial binding of the local address if required. */
482 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
483 inp->inp_lport = lport;
484 inp->inp_laddr.s_addr = laddr;
485 if (in_pcbinshash(inp) != 0) {
486 inp->inp_laddr.s_addr = INADDR_ANY;
487 inp->inp_lport = 0;
488 return (EAGAIN);
489 }
490 }
491
492 /* Commit the remaining changes. */
493 inp->inp_lport = lport;
494 inp->inp_laddr.s_addr = laddr;
495 inp->inp_faddr.s_addr = faddr;
496 inp->inp_fport = fport;
497 in_pcbrehash(inp);
498 #ifdef IPSEC
499 if (inp->inp_socket->so_type == SOCK_STREAM)
500 ipsec_pcbconn(inp->inp_sp);
501 #endif
502 if (anonport)
503 inp->inp_flags |= INP_ANONPORT;
504 return (0);
505 }
506
507 /*
508 * Set up for a connect from a socket to the specified address.
509 * On entry, *laddrp and *lportp should contain the current local
510 * address and port for the PCB; these are updated to the values
511 * that should be placed in inp_laddr and inp_lport to complete
512 * the connect.
513 *
514 * On success, *faddrp and *fportp will be set to the remote address
515 * and port. These are not updated in the error case.
516 *
517 * If the operation fails because the connection already exists,
518 * *oinpp will be set to the PCB of that connection so that the
519 * caller can decide to override it. In all other cases, *oinpp
520 * is set to NULL.
521 */
522 int
523 in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td)
524 register struct inpcb *inp;
525 struct sockaddr *nam;
526 in_addr_t *laddrp;
527 u_short *lportp;
528 in_addr_t *faddrp;
529 u_short *fportp;
530 struct inpcb **oinpp;
531 struct thread *td;
532 {
533 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
534 struct in_ifaddr *ia;
535 struct sockaddr_in sa;
536 struct ucred *cred;
537 struct inpcb *oinp;
538 struct in_addr laddr, faddr;
539 u_short lport, fport;
540 int error;
541
542 if (oinpp != NULL)
543 *oinpp = NULL;
544 if (nam->sa_len != sizeof (*sin))
545 return (EINVAL);
546 if (sin->sin_family != AF_INET)
547 return (EAFNOSUPPORT);
548 if (sin->sin_port == 0)
549 return (EADDRNOTAVAIL);
550 laddr.s_addr = *laddrp;
551 lport = *lportp;
552 faddr = sin->sin_addr;
553 fport = sin->sin_port;
554 cred = inp->inp_socket->so_cred;
555 if (laddr.s_addr == INADDR_ANY && jailed(cred)) {
556 bzero(&sa, sizeof(sa));
557 sa.sin_addr.s_addr = htonl(prison_getip(cred));
558 sa.sin_len = sizeof(sa);
559 sa.sin_family = AF_INET;
560 error = in_pcbbind_setup(inp, (struct sockaddr *)&sa,
561 &laddr.s_addr, &lport, td);
562 if (error)
563 return (error);
564 }
565 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
566 /*
567 * If the destination address is INADDR_ANY,
568 * use the primary local address.
569 * If the supplied address is INADDR_BROADCAST,
570 * and the primary interface supports broadcast,
571 * choose the broadcast address for that interface.
572 */
573 if (faddr.s_addr == INADDR_ANY)
574 faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
575 else if (faddr.s_addr == (u_long)INADDR_BROADCAST &&
576 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags &
577 IFF_BROADCAST))
578 faddr = satosin(&TAILQ_FIRST(
579 &in_ifaddrhead)->ia_broadaddr)->sin_addr;
580 }
581 if (laddr.s_addr == INADDR_ANY) {
582 struct route sro;
583
584 bzero(&sro, sizeof(sro));
585 ia = (struct in_ifaddr *)0;
586 /*
587 * If route is known our src addr is taken from the i/f,
588 * else punt.
589 */
590 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0) {
591 /* Find out route to destination */
592 sro.ro_dst.sa_family = AF_INET;
593 sro.ro_dst.sa_len = sizeof(struct sockaddr_in);
594 ((struct sockaddr_in *)&sro.ro_dst)->sin_addr = faddr;
595 rtalloc_ign(&sro, RTF_CLONING);
596 }
597 /*
598 * If we found a route, use the address
599 * corresponding to the outgoing interface
600 * unless it is the loopback (in case a route
601 * to our address on another net goes to loopback).
602 */
603 if (sro.ro_rt && !(sro.ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
604 ia = ifatoia(sro.ro_rt->rt_ifa);
605 if (sro.ro_rt)
606 RTFREE(sro.ro_rt);
607 if (ia == 0) {
608 bzero(&sa, sizeof(sa));
609 sa.sin_addr = faddr;
610 sa.sin_len = sizeof(sa);
611 sa.sin_family = AF_INET;
612
613 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa)));
614 if (ia == 0)
615 ia = ifatoia(ifa_ifwithnet(sintosa(&sa)));
616 if (ia == 0)
617 ia = TAILQ_FIRST(&in_ifaddrhead);
618 if (ia == 0)
619 return (EADDRNOTAVAIL);
620 }
621 /*
622 * If the destination address is multicast and an outgoing
623 * interface has been set as a multicast option, use the
624 * address of that interface as our source address.
625 */
626 if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
627 inp->inp_moptions != NULL) {
628 struct ip_moptions *imo;
629 struct ifnet *ifp;
630
631 imo = inp->inp_moptions;
632 if (imo->imo_multicast_ifp != NULL) {
633 ifp = imo->imo_multicast_ifp;
634 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
635 if (ia->ia_ifp == ifp)
636 break;
637 if (ia == 0)
638 return (EADDRNOTAVAIL);
639 }
640 }
641 laddr = ia->ia_addr.sin_addr;
642 }
643
644 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
645 0, NULL);
646 if (oinp != NULL) {
647 if (oinpp != NULL)
648 *oinpp = oinp;
649 return (EADDRINUSE);
650 }
651 if (lport == 0) {
652 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, td);
653 if (error)
654 return (error);
655 }
656 *laddrp = laddr.s_addr;
657 *lportp = lport;
658 *faddrp = faddr.s_addr;
659 *fportp = fport;
660 return (0);
661 }
662
663 void
664 in_pcbdisconnect(inp)
665 struct inpcb *inp;
666 {
667 INP_LOCK_ASSERT(inp);
668
669 inp->inp_faddr.s_addr = INADDR_ANY;
670 inp->inp_fport = 0;
671 in_pcbrehash(inp);
672 #ifdef IPSEC
673 ipsec_pcbdisconn(inp->inp_sp);
674 #endif
675 if (inp->inp_socket->so_state & SS_NOFDREF)
676 in_pcbdetach(inp);
677 }
678
679 void
680 in_pcbdetach(inp)
681 struct inpcb *inp;
682 {
683 struct socket *so = inp->inp_socket;
684 struct inpcbinfo *ipi = inp->inp_pcbinfo;
685
686 INP_LOCK_ASSERT(inp);
687
688 #if defined(IPSEC) || defined(FAST_IPSEC)
689 ipsec4_delete_pcbpolicy(inp);
690 #endif /*IPSEC*/
691 inp->inp_gencnt = ++ipi->ipi_gencnt;
692 in_pcbremlists(inp);
693 if (so) {
694 so->so_pcb = 0;
695 sotryfree(so);
696 }
697 if (inp->inp_options)
698 (void)m_free(inp->inp_options);
699 ip_freemoptions(inp->inp_moptions);
700 inp->inp_vflag = 0;
701 INP_LOCK_DESTROY(inp);
702 #ifdef MAC
703 mac_destroy_inpcb(inp);
704 #endif
705 uma_zfree(ipi->ipi_zone, inp);
706 }
707
708 struct sockaddr *
709 in_sockaddr(port, addr_p)
710 in_port_t port;
711 struct in_addr *addr_p;
712 {
713 struct sockaddr_in *sin;
714
715 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
716 M_WAITOK | M_ZERO);
717 sin->sin_family = AF_INET;
718 sin->sin_len = sizeof(*sin);
719 sin->sin_addr = *addr_p;
720 sin->sin_port = port;
721
722 return (struct sockaddr *)sin;
723 }
724
725 /*
726 * The wrapper function will pass down the pcbinfo for this function to lock.
727 * The socket must have a valid
728 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
729 * except through a kernel programming error, so it is acceptable to panic
730 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
731 * because there actually /is/ a programming error somewhere... XXX)
732 */
733 int
734 in_setsockaddr(so, nam, pcbinfo)
735 struct socket *so;
736 struct sockaddr **nam;
737 struct inpcbinfo *pcbinfo;
738 {
739 int s;
740 register struct inpcb *inp;
741 struct in_addr addr;
742 in_port_t port;
743
744 s = splnet();
745 INP_INFO_RLOCK(pcbinfo);
746 inp = sotoinpcb(so);
747 if (!inp) {
748 INP_INFO_RUNLOCK(pcbinfo);
749 splx(s);
750 return ECONNRESET;
751 }
752 INP_LOCK(inp);
753 port = inp->inp_lport;
754 addr = inp->inp_laddr;
755 INP_UNLOCK(inp);
756 INP_INFO_RUNLOCK(pcbinfo);
757 splx(s);
758
759 *nam = in_sockaddr(port, &addr);
760 return 0;
761 }
762
763 /*
764 * The wrapper function will pass down the pcbinfo for this function to lock.
765 */
766 int
767 in_setpeeraddr(so, nam, pcbinfo)
768 struct socket *so;
769 struct sockaddr **nam;
770 struct inpcbinfo *pcbinfo;
771 {
772 int s;
773 register struct inpcb *inp;
774 struct in_addr addr;
775 in_port_t port;
776
777 s = splnet();
778 INP_INFO_RLOCK(pcbinfo);
779 inp = sotoinpcb(so);
780 if (!inp) {
781 INP_INFO_RUNLOCK(pcbinfo);
782 splx(s);
783 return ECONNRESET;
784 }
785 INP_LOCK(inp);
786 port = inp->inp_fport;
787 addr = inp->inp_faddr;
788 INP_UNLOCK(inp);
789 INP_INFO_RUNLOCK(pcbinfo);
790 splx(s);
791
792 *nam = in_sockaddr(port, &addr);
793 return 0;
794 }
795
796 void
797 in_pcbnotifyall(pcbinfo, faddr, errno, notify)
798 struct inpcbinfo *pcbinfo;
799 struct in_addr faddr;
800 int errno;
801 struct inpcb *(*notify)(struct inpcb *, int);
802 {
803 struct inpcb *inp, *ninp;
804 struct inpcbhead *head;
805 int s;
806
807 s = splnet();
808 INP_INFO_WLOCK(pcbinfo);
809 head = pcbinfo->listhead;
810 for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
811 INP_LOCK(inp);
812 ninp = LIST_NEXT(inp, inp_list);
813 #ifdef INET6
814 if ((inp->inp_vflag & INP_IPV4) == 0) {
815 INP_UNLOCK(inp);
816 continue;
817 }
818 #endif
819 if (inp->inp_faddr.s_addr != faddr.s_addr ||
820 inp->inp_socket == NULL) {
821 INP_UNLOCK(inp);
822 continue;
823 }
824 if ((*notify)(inp, errno))
825 INP_UNLOCK(inp);
826 }
827 INP_INFO_WUNLOCK(pcbinfo);
828 splx(s);
829 }
830
831 void
832 in_pcbpurgeif0(pcbinfo, ifp)
833 struct inpcbinfo *pcbinfo;
834 struct ifnet *ifp;
835 {
836 struct inpcb *inp;
837 struct ip_moptions *imo;
838 int i, gap;
839
840 /* why no splnet here? XXX */
841 INP_INFO_RLOCK(pcbinfo);
842 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
843 INP_LOCK(inp);
844 imo = inp->inp_moptions;
845 if ((inp->inp_vflag & INP_IPV4) &&
846 imo != NULL) {
847 /*
848 * Unselect the outgoing interface if it is being
849 * detached.
850 */
851 if (imo->imo_multicast_ifp == ifp)
852 imo->imo_multicast_ifp = NULL;
853
854 /*
855 * Drop multicast group membership if we joined
856 * through the interface being detached.
857 */
858 for (i = 0, gap = 0; i < imo->imo_num_memberships;
859 i++) {
860 if (imo->imo_membership[i]->inm_ifp == ifp) {
861 in_delmulti(imo->imo_membership[i]);
862 gap++;
863 } else if (gap != 0)
864 imo->imo_membership[i - gap] =
865 imo->imo_membership[i];
866 }
867 imo->imo_num_memberships -= gap;
868 }
869 INP_UNLOCK(inp);
870 }
871 INP_INFO_RUNLOCK(pcbinfo);
872 }
873
874 /*
875 * Lookup a PCB based on the local address and port.
876 */
877 struct inpcb *
878 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
879 struct inpcbinfo *pcbinfo;
880 struct in_addr laddr;
881 u_int lport_arg;
882 int wild_okay;
883 {
884 register struct inpcb *inp;
885 int matchwild = 3, wildcard;
886 u_short lport = lport_arg;
887
888 INP_INFO_WLOCK_ASSERT(pcbinfo);
889
890 if (!wild_okay) {
891 struct inpcbhead *head;
892 /*
893 * Look for an unconnected (wildcard foreign addr) PCB that
894 * matches the local address and port we're looking for.
895 */
896 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
897 LIST_FOREACH(inp, head, inp_hash) {
898 #ifdef INET6
899 if ((inp->inp_vflag & INP_IPV4) == 0)
900 continue;
901 #endif
902 if (inp->inp_faddr.s_addr == INADDR_ANY &&
903 inp->inp_laddr.s_addr == laddr.s_addr &&
904 inp->inp_lport == lport) {
905 /*
906 * Found.
907 */
908 return (inp);
909 }
910 }
911 /*
912 * Not found.
913 */
914 return (NULL);
915 } else {
916 struct inpcbporthead *porthash;
917 struct inpcbport *phd;
918 struct inpcb *match = NULL;
919 /*
920 * Best fit PCB lookup.
921 *
922 * First see if this local port is in use by looking on the
923 * port hash list.
924 */
925 retrylookup:
926 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
927 pcbinfo->porthashmask)];
928 LIST_FOREACH(phd, porthash, phd_hash) {
929 if (phd->phd_port == lport)
930 break;
931 }
932 if (phd != NULL) {
933 /*
934 * Port is in use by one or more PCBs. Look for best
935 * fit.
936 */
937 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
938 wildcard = 0;
939 #ifdef INET6
940 if ((inp->inp_vflag & INP_IPV4) == 0)
941 continue;
942 #endif
943 /*
944 * Clean out old time_wait sockets if they
945 * are clogging up needed local ports.
946 */
947 if ((inp->inp_vflag & INP_TIMEWAIT) != 0) {
948 if (tcp_twrecycleable((struct tcptw *)inp->inp_ppcb)) {
949 INP_LOCK(inp);
950 tcp_twclose((struct tcptw *)inp->inp_ppcb, 0);
951 match = NULL;
952 goto retrylookup;
953 }
954 }
955 if (inp->inp_faddr.s_addr != INADDR_ANY)
956 wildcard++;
957 if (inp->inp_laddr.s_addr != INADDR_ANY) {
958 if (laddr.s_addr == INADDR_ANY)
959 wildcard++;
960 else if (inp->inp_laddr.s_addr != laddr.s_addr)
961 continue;
962 } else {
963 if (laddr.s_addr != INADDR_ANY)
964 wildcard++;
965 }
966 if (wildcard < matchwild) {
967 match = inp;
968 matchwild = wildcard;
969 if (matchwild == 0) {
970 break;
971 }
972 }
973 }
974 }
975 return (match);
976 }
977 }
978
979 /*
980 * Lookup PCB in hash list.
981 */
982 struct inpcb *
983 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard,
984 ifp)
985 struct inpcbinfo *pcbinfo;
986 struct in_addr faddr, laddr;
987 u_int fport_arg, lport_arg;
988 int wildcard;
989 struct ifnet *ifp;
990 {
991 struct inpcbhead *head;
992 register struct inpcb *inp;
993 u_short fport = fport_arg, lport = lport_arg;
994
995 INP_INFO_RLOCK_ASSERT(pcbinfo);
996 /*
997 * First look for an exact match.
998 */
999 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
1000 LIST_FOREACH(inp, head, inp_hash) {
1001 #ifdef INET6
1002 if ((inp->inp_vflag & INP_IPV4) == 0)
1003 continue;
1004 #endif
1005 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1006 inp->inp_laddr.s_addr == laddr.s_addr &&
1007 inp->inp_fport == fport &&
1008 inp->inp_lport == lport) {
1009 /*
1010 * Found.
1011 */
1012 return (inp);
1013 }
1014 }
1015 if (wildcard) {
1016 struct inpcb *local_wild = NULL;
1017 #if defined(INET6)
1018 struct inpcb *local_wild_mapped = NULL;
1019 #endif /* defined(INET6) */
1020
1021 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1022 LIST_FOREACH(inp, head, inp_hash) {
1023 #ifdef INET6
1024 if ((inp->inp_vflag & INP_IPV4) == 0)
1025 continue;
1026 #endif
1027 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1028 inp->inp_lport == lport) {
1029 if (ifp && ifp->if_type == IFT_FAITH &&
1030 (inp->inp_flags & INP_FAITH) == 0)
1031 continue;
1032 if (inp->inp_laddr.s_addr == laddr.s_addr)
1033 return (inp);
1034 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1035 #if defined(INET6)
1036 if (INP_CHECK_SOCKAF(inp->inp_socket,
1037 AF_INET6))
1038 local_wild_mapped = inp;
1039 else
1040 #endif /* defined(INET6) */
1041 local_wild = inp;
1042 }
1043 }
1044 }
1045 #if defined(INET6)
1046 if (local_wild == NULL)
1047 return (local_wild_mapped);
1048 #endif /* defined(INET6) */
1049 return (local_wild);
1050 }
1051
1052 /*
1053 * Not found.
1054 */
1055 return (NULL);
1056 }
1057
1058 /*
1059 * Insert PCB onto various hash lists.
1060 */
1061 int
1062 in_pcbinshash(inp)
1063 struct inpcb *inp;
1064 {
1065 struct inpcbhead *pcbhash;
1066 struct inpcbporthead *pcbporthash;
1067 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1068 struct inpcbport *phd;
1069 u_int32_t hashkey_faddr;
1070
1071 INP_INFO_WLOCK_ASSERT(pcbinfo);
1072 #ifdef INET6
1073 if (inp->inp_vflag & INP_IPV6)
1074 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1075 else
1076 #endif /* INET6 */
1077 hashkey_faddr = inp->inp_faddr.s_addr;
1078
1079 pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
1080 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
1081
1082 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
1083 pcbinfo->porthashmask)];
1084
1085 /*
1086 * Go through port list and look for a head for this lport.
1087 */
1088 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1089 if (phd->phd_port == inp->inp_lport)
1090 break;
1091 }
1092 /*
1093 * If none exists, malloc one and tack it on.
1094 */
1095 if (phd == NULL) {
1096 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
1097 if (phd == NULL) {
1098 return (ENOBUFS); /* XXX */
1099 }
1100 phd->phd_port = inp->inp_lport;
1101 LIST_INIT(&phd->phd_pcblist);
1102 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1103 }
1104 inp->inp_phd = phd;
1105 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1106 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
1107 return (0);
1108 }
1109
1110 /*
1111 * Move PCB to the proper hash bucket when { faddr, fport } have been
1112 * changed. NOTE: This does not handle the case of the lport changing (the
1113 * hashed port list would have to be updated as well), so the lport must
1114 * not change after in_pcbinshash() has been called.
1115 */
1116 void
1117 in_pcbrehash(inp)
1118 struct inpcb *inp;
1119 {
1120 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1121 struct inpcbhead *head;
1122 u_int32_t hashkey_faddr;
1123
1124 INP_INFO_WLOCK_ASSERT(pcbinfo);
1125 /* XXX? INP_LOCK_ASSERT(inp); */
1126 #ifdef INET6
1127 if (inp->inp_vflag & INP_IPV6)
1128 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1129 else
1130 #endif /* INET6 */
1131 hashkey_faddr = inp->inp_faddr.s_addr;
1132
1133 head = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
1134 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
1135
1136 LIST_REMOVE(inp, inp_hash);
1137 LIST_INSERT_HEAD(head, inp, inp_hash);
1138 }
1139
1140 /*
1141 * Remove PCB from various lists.
1142 */
1143 void
1144 in_pcbremlists(inp)
1145 struct inpcb *inp;
1146 {
1147 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1148
1149 INP_INFO_WLOCK_ASSERT(pcbinfo);
1150 INP_LOCK_ASSERT(inp);
1151
1152 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
1153 if (inp->inp_lport) {
1154 struct inpcbport *phd = inp->inp_phd;
1155
1156 LIST_REMOVE(inp, inp_hash);
1157 LIST_REMOVE(inp, inp_portlist);
1158 if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1159 LIST_REMOVE(phd, phd_hash);
1160 free(phd, M_PCB);
1161 }
1162 }
1163 LIST_REMOVE(inp, inp_list);
1164 pcbinfo->ipi_count--;
1165 }
1166
1167 /*
1168 * A set label operation has occurred at the socket layer, propagate the
1169 * label change into the in_pcb for the socket.
1170 */
1171 void
1172 in_pcbsosetlabel(so)
1173 struct socket *so;
1174 {
1175 #ifdef MAC
1176 struct inpcb *inp;
1177
1178 /* XXX: Will assert socket lock when we have them. */
1179 inp = (struct inpcb *)so->so_pcb;
1180 INP_LOCK(inp);
1181 mac_inpcb_sosetlabel(so, inp);
1182 INP_UNLOCK(inp);
1183 #endif
1184 }
1185
1186 int
1187 prison_xinpcb(struct thread *td, struct inpcb *inp)
1188 {
1189 if (!jailed(td->td_ucred))
1190 return (0);
1191 if (ntohl(inp->inp_laddr.s_addr) == prison_getip(td->td_ucred))
1192 return (0);
1193 return (1);
1194 }
Cache object: 18f1b8eb172ce508fcd35fb61cf38779
|