FreeBSD/Linux Kernel Cross Reference
sys/netinet/in_pcb.c
1 /*
2 * Copyright (c) 1982, 1986, 1991, 1993, 1995
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)in_pcb.c 8.4 (Berkeley) 5/24/95
34 * $FreeBSD: releng/5.1/sys/netinet/in_pcb.c 114216 2003-04-29 13:36:06Z kan $
35 */
36
37 #include "opt_ipsec.h"
38 #include "opt_inet6.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/limits.h>
43 #include <sys/malloc.h>
44 #include <sys/mbuf.h>
45 #include <sys/domain.h>
46 #include <sys/protosw.h>
47 #include <sys/socket.h>
48 #include <sys/socketvar.h>
49 #include <sys/proc.h>
50 #include <sys/jail.h>
51 #include <sys/kernel.h>
52 #include <sys/sysctl.h>
53
54 #include <vm/uma.h>
55
56 #include <net/if.h>
57 #include <net/if_types.h>
58 #include <net/route.h>
59
60 #include <netinet/in.h>
61 #include <netinet/in_pcb.h>
62 #include <netinet/in_var.h>
63 #include <netinet/ip_var.h>
64 #include <netinet/tcp_var.h>
65 #ifdef INET6
66 #include <netinet/ip6.h>
67 #include <netinet6/ip6_var.h>
68 #endif /* INET6 */
69
70 #ifdef IPSEC
71 #include <netinet6/ipsec.h>
72 #include <netkey/key.h>
73 #endif /* IPSEC */
74
75 #ifdef FAST_IPSEC
76 #if defined(IPSEC) || defined(IPSEC_ESP)
77 #error "Bad idea: don't compile with both IPSEC and FAST_IPSEC!"
78 #endif
79
80 #include <netipsec/ipsec.h>
81 #include <netipsec/key.h>
82 #define IPSEC
83 #endif /* FAST_IPSEC */
84
85 struct in_addr zeroin_addr;
86
87 /*
88 * These configure the range of local port addresses assigned to
89 * "unspecified" outgoing connections/packets/whatever.
90 */
91 int ipport_lowfirstauto = IPPORT_RESERVED - 1; /* 1023 */
92 int ipport_lowlastauto = IPPORT_RESERVEDSTART; /* 600 */
93 int ipport_firstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
94 int ipport_lastauto = IPPORT_HILASTAUTO; /* 65535 */
95 int ipport_hifirstauto = IPPORT_HIFIRSTAUTO; /* 49152 */
96 int ipport_hilastauto = IPPORT_HILASTAUTO; /* 65535 */
97
98 /*
99 * Reserved ports accessible only to root. There are significant
100 * security considerations that must be accounted for when changing these,
101 * but the security benefits can be great. Please be careful.
102 */
103 int ipport_reservedhigh = IPPORT_RESERVED - 1; /* 1023 */
104 int ipport_reservedlow = 0;
105
106 #define RANGECHK(var, min, max) \
107 if ((var) < (min)) { (var) = (min); } \
108 else if ((var) > (max)) { (var) = (max); }
109
110 static int
111 sysctl_net_ipport_check(SYSCTL_HANDLER_ARGS)
112 {
113 int error = sysctl_handle_int(oidp,
114 oidp->oid_arg1, oidp->oid_arg2, req);
115 if (!error) {
116 RANGECHK(ipport_lowfirstauto, 1, IPPORT_RESERVED - 1);
117 RANGECHK(ipport_lowlastauto, 1, IPPORT_RESERVED - 1);
118 RANGECHK(ipport_firstauto, IPPORT_RESERVED, USHRT_MAX);
119 RANGECHK(ipport_lastauto, IPPORT_RESERVED, USHRT_MAX);
120 RANGECHK(ipport_hifirstauto, IPPORT_RESERVED, USHRT_MAX);
121 RANGECHK(ipport_hilastauto, IPPORT_RESERVED, USHRT_MAX);
122 }
123 return error;
124 }
125
126 #undef RANGECHK
127
128 SYSCTL_NODE(_net_inet_ip, IPPROTO_IP, portrange, CTLFLAG_RW, 0, "IP Ports");
129
130 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowfirst, CTLTYPE_INT|CTLFLAG_RW,
131 &ipport_lowfirstauto, 0, &sysctl_net_ipport_check, "I", "");
132 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, lowlast, CTLTYPE_INT|CTLFLAG_RW,
133 &ipport_lowlastauto, 0, &sysctl_net_ipport_check, "I", "");
134 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, first, CTLTYPE_INT|CTLFLAG_RW,
135 &ipport_firstauto, 0, &sysctl_net_ipport_check, "I", "");
136 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, last, CTLTYPE_INT|CTLFLAG_RW,
137 &ipport_lastauto, 0, &sysctl_net_ipport_check, "I", "");
138 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hifirst, CTLTYPE_INT|CTLFLAG_RW,
139 &ipport_hifirstauto, 0, &sysctl_net_ipport_check, "I", "");
140 SYSCTL_PROC(_net_inet_ip_portrange, OID_AUTO, hilast, CTLTYPE_INT|CTLFLAG_RW,
141 &ipport_hilastauto, 0, &sysctl_net_ipport_check, "I", "");
142 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedhigh,
143 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedhigh, 0, "");
144 SYSCTL_INT(_net_inet_ip_portrange, OID_AUTO, reservedlow,
145 CTLFLAG_RW|CTLFLAG_SECURE, &ipport_reservedlow, 0, "");
146
147 /*
148 * in_pcb.c: manage the Protocol Control Blocks.
149 *
150 * NOTE: It is assumed that most of these functions will be called at
151 * splnet(). XXX - There are, unfortunately, a few exceptions to this
152 * rule that should be fixed.
153 */
154
155 /*
156 * Allocate a PCB and associate it with the socket.
157 */
158 int
159 in_pcballoc(so, pcbinfo, td)
160 struct socket *so;
161 struct inpcbinfo *pcbinfo;
162 struct thread *td;
163 {
164 register struct inpcb *inp;
165 #ifdef IPSEC
166 int error;
167 #endif
168
169 inp = uma_zalloc(pcbinfo->ipi_zone, M_NOWAIT);
170 if (inp == NULL)
171 return (ENOBUFS);
172 bzero((caddr_t)inp, sizeof(*inp));
173 inp->inp_gencnt = ++pcbinfo->ipi_gencnt;
174 inp->inp_pcbinfo = pcbinfo;
175 inp->inp_socket = so;
176 #ifdef IPSEC
177 error = ipsec_init_policy(so, &inp->inp_sp);
178 if (error != 0) {
179 uma_zfree(pcbinfo->ipi_zone, inp);
180 return error;
181 }
182 #endif /*IPSEC*/
183 #if defined(INET6)
184 if (INP_SOCKAF(so) == AF_INET6) {
185 inp->inp_vflag |= INP_IPV6PROTO;
186 if (ip6_v6only)
187 inp->inp_flags |= IN6P_IPV6_V6ONLY;
188 }
189 #endif
190 LIST_INSERT_HEAD(pcbinfo->listhead, inp, inp_list);
191 pcbinfo->ipi_count++;
192 so->so_pcb = (caddr_t)inp;
193 INP_LOCK_INIT(inp, "inp");
194 #ifdef INET6
195 if (ip6_auto_flowlabel)
196 inp->inp_flags |= IN6P_AUTOFLOWLABEL;
197 #endif
198 return (0);
199 }
200
201 int
202 in_pcbbind(inp, nam, td)
203 register struct inpcb *inp;
204 struct sockaddr *nam;
205 struct thread *td;
206 {
207 int anonport, error;
208
209 if (inp->inp_lport != 0 || inp->inp_laddr.s_addr != INADDR_ANY)
210 return (EINVAL);
211 anonport = inp->inp_lport == 0 && (nam == NULL ||
212 ((struct sockaddr_in *)nam)->sin_port == 0);
213 error = in_pcbbind_setup(inp, nam, &inp->inp_laddr.s_addr,
214 &inp->inp_lport, td);
215 if (error)
216 return (error);
217 if (in_pcbinshash(inp) != 0) {
218 inp->inp_laddr.s_addr = INADDR_ANY;
219 inp->inp_lport = 0;
220 return (EAGAIN);
221 }
222 if (anonport)
223 inp->inp_flags |= INP_ANONPORT;
224 return (0);
225 }
226
227 /*
228 * Set up a bind operation on a PCB, performing port allocation
229 * as required, but do not actually modify the PCB. Callers can
230 * either complete the bind by setting inp_laddr/inp_lport and
231 * calling in_pcbinshash(), or they can just use the resulting
232 * port and address to authorise the sending of a once-off packet.
233 *
234 * On error, the values of *laddrp and *lportp are not changed.
235 */
236 int
237 in_pcbbind_setup(inp, nam, laddrp, lportp, td)
238 struct inpcb *inp;
239 struct sockaddr *nam;
240 in_addr_t *laddrp;
241 u_short *lportp;
242 struct thread *td;
243 {
244 struct socket *so = inp->inp_socket;
245 unsigned short *lastport;
246 struct sockaddr_in *sin;
247 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
248 struct in_addr laddr;
249 u_short lport = 0;
250 int wild = 0, reuseport = (so->so_options & SO_REUSEPORT);
251 int error, prison = 0;
252
253 if (TAILQ_EMPTY(&in_ifaddrhead)) /* XXX broken! */
254 return (EADDRNOTAVAIL);
255 laddr.s_addr = *laddrp;
256 if (nam != NULL && laddr.s_addr != INADDR_ANY)
257 return (EINVAL);
258 if ((so->so_options & (SO_REUSEADDR|SO_REUSEPORT)) == 0)
259 wild = 1;
260 if (nam) {
261 sin = (struct sockaddr_in *)nam;
262 if (nam->sa_len != sizeof (*sin))
263 return (EINVAL);
264 #ifdef notdef
265 /*
266 * We should check the family, but old programs
267 * incorrectly fail to initialize it.
268 */
269 if (sin->sin_family != AF_INET)
270 return (EAFNOSUPPORT);
271 #endif
272 if (sin->sin_addr.s_addr != INADDR_ANY)
273 if (prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr))
274 return(EINVAL);
275 if (sin->sin_port != *lportp) {
276 /* Don't allow the port to change. */
277 if (*lportp != 0)
278 return (EINVAL);
279 lport = sin->sin_port;
280 }
281 /* NB: lport is left as 0 if the port isn't being changed. */
282 if (IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
283 /*
284 * Treat SO_REUSEADDR as SO_REUSEPORT for multicast;
285 * allow complete duplication of binding if
286 * SO_REUSEPORT is set, or if SO_REUSEADDR is set
287 * and a multicast address is bound on both
288 * new and duplicated sockets.
289 */
290 if (so->so_options & SO_REUSEADDR)
291 reuseport = SO_REUSEADDR|SO_REUSEPORT;
292 } else if (sin->sin_addr.s_addr != INADDR_ANY) {
293 sin->sin_port = 0; /* yech... */
294 bzero(&sin->sin_zero, sizeof(sin->sin_zero));
295 if (ifa_ifwithaddr((struct sockaddr *)sin) == 0)
296 return (EADDRNOTAVAIL);
297 }
298 laddr = sin->sin_addr;
299 if (lport) {
300 struct inpcb *t;
301 /* GROSS */
302 if (ntohs(lport) <= ipport_reservedhigh &&
303 ntohs(lport) >= ipport_reservedlow &&
304 td && suser_cred(td->td_ucred, PRISON_ROOT))
305 return (EACCES);
306 if (td && jailed(td->td_ucred))
307 prison = 1;
308 if (so->so_cred->cr_uid != 0 &&
309 !IN_MULTICAST(ntohl(sin->sin_addr.s_addr))) {
310 t = in_pcblookup_local(inp->inp_pcbinfo,
311 sin->sin_addr, lport,
312 prison ? 0 : INPLOOKUP_WILDCARD);
313 /*
314 * XXX
315 * This entire block sorely needs a rewrite.
316 */
317 if (t && (t->inp_vflag & INP_TIMEWAIT)) {
318 if ((ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
319 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
320 (intotw(t)->tw_so_options & SO_REUSEPORT) == 0) &&
321 (so->so_cred->cr_uid != intotw(t)->tw_cred->cr_uid))
322 return (EADDRINUSE);
323 } else
324 if (t &&
325 (ntohl(sin->sin_addr.s_addr) != INADDR_ANY ||
326 ntohl(t->inp_laddr.s_addr) != INADDR_ANY ||
327 (t->inp_socket->so_options &
328 SO_REUSEPORT) == 0) &&
329 (so->so_cred->cr_uid !=
330 t->inp_socket->so_cred->cr_uid)) {
331 #if defined(INET6)
332 if (ntohl(sin->sin_addr.s_addr) !=
333 INADDR_ANY ||
334 ntohl(t->inp_laddr.s_addr) !=
335 INADDR_ANY ||
336 INP_SOCKAF(so) ==
337 INP_SOCKAF(t->inp_socket))
338 #endif /* defined(INET6) */
339 return (EADDRINUSE);
340 }
341 }
342 if (prison &&
343 prison_ip(td->td_ucred, 0, &sin->sin_addr.s_addr))
344 return (EADDRNOTAVAIL);
345 t = in_pcblookup_local(pcbinfo, sin->sin_addr,
346 lport, prison ? 0 : wild);
347 if (t && (t->inp_vflag & INP_TIMEWAIT)) {
348 if ((reuseport & intotw(t)->tw_so_options) == 0)
349 return (EADDRINUSE);
350 } else
351 if (t &&
352 (reuseport & t->inp_socket->so_options) == 0) {
353 #if defined(INET6)
354 if (ntohl(sin->sin_addr.s_addr) !=
355 INADDR_ANY ||
356 ntohl(t->inp_laddr.s_addr) !=
357 INADDR_ANY ||
358 INP_SOCKAF(so) ==
359 INP_SOCKAF(t->inp_socket))
360 #endif /* defined(INET6) */
361 return (EADDRINUSE);
362 }
363 }
364 }
365 if (*lportp != 0)
366 lport = *lportp;
367 if (lport == 0) {
368 ushort first, last;
369 int count;
370
371 if (laddr.s_addr != INADDR_ANY)
372 if (prison_ip(td->td_ucred, 0, &laddr.s_addr))
373 return (EINVAL);
374
375 if (inp->inp_flags & INP_HIGHPORT) {
376 first = ipport_hifirstauto; /* sysctl */
377 last = ipport_hilastauto;
378 lastport = &pcbinfo->lasthi;
379 } else if (inp->inp_flags & INP_LOWPORT) {
380 if (td && (error = suser_cred(td->td_ucred,
381 PRISON_ROOT)) != 0)
382 return error;
383 first = ipport_lowfirstauto; /* 1023 */
384 last = ipport_lowlastauto; /* 600 */
385 lastport = &pcbinfo->lastlow;
386 } else {
387 first = ipport_firstauto; /* sysctl */
388 last = ipport_lastauto;
389 lastport = &pcbinfo->lastport;
390 }
391 /*
392 * Simple check to ensure all ports are not used up causing
393 * a deadlock here.
394 *
395 * We split the two cases (up and down) so that the direction
396 * is not being tested on each round of the loop.
397 */
398 if (first > last) {
399 /*
400 * counting down
401 */
402 count = first - last;
403
404 do {
405 if (count-- < 0) /* completely used? */
406 return (EADDRNOTAVAIL);
407 --*lastport;
408 if (*lastport > first || *lastport < last)
409 *lastport = first;
410 lport = htons(*lastport);
411 } while (in_pcblookup_local(pcbinfo, laddr, lport,
412 wild));
413 } else {
414 /*
415 * counting up
416 */
417 count = last - first;
418
419 do {
420 if (count-- < 0) /* completely used? */
421 return (EADDRNOTAVAIL);
422 ++*lastport;
423 if (*lastport < first || *lastport > last)
424 *lastport = first;
425 lport = htons(*lastport);
426 } while (in_pcblookup_local(pcbinfo, laddr, lport,
427 wild));
428 }
429 }
430 if (prison_ip(td->td_ucred, 0, &laddr.s_addr))
431 return (EINVAL);
432 *laddrp = laddr.s_addr;
433 *lportp = lport;
434 return (0);
435 }
436
437 /*
438 * Connect from a socket to a specified address.
439 * Both address and port must be specified in argument sin.
440 * If don't have a local address for this socket yet,
441 * then pick one.
442 */
443 int
444 in_pcbconnect(inp, nam, td)
445 register struct inpcb *inp;
446 struct sockaddr *nam;
447 struct thread *td;
448 {
449 u_short lport, fport;
450 in_addr_t laddr, faddr;
451 int anonport, error;
452
453 lport = inp->inp_lport;
454 laddr = inp->inp_laddr.s_addr;
455 anonport = (lport == 0);
456 error = in_pcbconnect_setup(inp, nam, &laddr, &lport, &faddr, &fport,
457 NULL, td);
458 if (error)
459 return (error);
460
461 /* Do the initial binding of the local address if required. */
462 if (inp->inp_laddr.s_addr == INADDR_ANY && inp->inp_lport == 0) {
463 inp->inp_lport = lport;
464 inp->inp_laddr.s_addr = laddr;
465 if (in_pcbinshash(inp) != 0) {
466 inp->inp_laddr.s_addr = INADDR_ANY;
467 inp->inp_lport = 0;
468 return (EAGAIN);
469 }
470 }
471
472 /* Commit the remaining changes. */
473 inp->inp_lport = lport;
474 inp->inp_laddr.s_addr = laddr;
475 inp->inp_faddr.s_addr = faddr;
476 inp->inp_fport = fport;
477 in_pcbrehash(inp);
478 if (anonport)
479 inp->inp_flags |= INP_ANONPORT;
480 return (0);
481 }
482
483 /*
484 * Set up for a connect from a socket to the specified address.
485 * On entry, *laddrp and *lportp should contain the current local
486 * address and port for the PCB; these are updated to the values
487 * that should be placed in inp_laddr and inp_lport to complete
488 * the connect.
489 *
490 * On success, *faddrp and *fportp will be set to the remote address
491 * and port. These are not updated in the error case.
492 *
493 * If the operation fails because the connection already exists,
494 * *oinpp will be set to the PCB of that connection so that the
495 * caller can decide to override it. In all other cases, *oinpp
496 * is set to NULL.
497 */
498 int
499 in_pcbconnect_setup(inp, nam, laddrp, lportp, faddrp, fportp, oinpp, td)
500 register struct inpcb *inp;
501 struct sockaddr *nam;
502 in_addr_t *laddrp;
503 u_short *lportp;
504 in_addr_t *faddrp;
505 u_short *fportp;
506 struct inpcb **oinpp;
507 struct thread *td;
508 {
509 struct sockaddr_in *sin = (struct sockaddr_in *)nam;
510 struct in_ifaddr *ia;
511 struct sockaddr_in sa;
512 struct ucred *cred;
513 struct inpcb *oinp;
514 struct in_addr laddr, faddr;
515 u_short lport, fport;
516 int error;
517
518 if (oinpp != NULL)
519 *oinpp = NULL;
520 if (nam->sa_len != sizeof (*sin))
521 return (EINVAL);
522 if (sin->sin_family != AF_INET)
523 return (EAFNOSUPPORT);
524 if (sin->sin_port == 0)
525 return (EADDRNOTAVAIL);
526 laddr.s_addr = *laddrp;
527 lport = *lportp;
528 faddr = sin->sin_addr;
529 fport = sin->sin_port;
530 cred = inp->inp_socket->so_cred;
531 if (laddr.s_addr == INADDR_ANY && jailed(cred)) {
532 bzero(&sa, sizeof(sa));
533 sa.sin_addr.s_addr = htonl(prison_getip(cred));
534 sa.sin_len = sizeof(sa);
535 sa.sin_family = AF_INET;
536 error = in_pcbbind_setup(inp, (struct sockaddr *)&sa,
537 &laddr.s_addr, &lport, td);
538 if (error)
539 return (error);
540 }
541
542 if (!TAILQ_EMPTY(&in_ifaddrhead)) {
543 /*
544 * If the destination address is INADDR_ANY,
545 * use the primary local address.
546 * If the supplied address is INADDR_BROADCAST,
547 * and the primary interface supports broadcast,
548 * choose the broadcast address for that interface.
549 */
550 if (faddr.s_addr == INADDR_ANY)
551 faddr = IA_SIN(TAILQ_FIRST(&in_ifaddrhead))->sin_addr;
552 else if (faddr.s_addr == (u_long)INADDR_BROADCAST &&
553 (TAILQ_FIRST(&in_ifaddrhead)->ia_ifp->if_flags &
554 IFF_BROADCAST))
555 faddr = satosin(&TAILQ_FIRST(
556 &in_ifaddrhead)->ia_broadaddr)->sin_addr;
557 }
558 if (laddr.s_addr == INADDR_ANY) {
559 register struct route *ro;
560
561 ia = (struct in_ifaddr *)0;
562 /*
563 * If route is known or can be allocated now,
564 * our src addr is taken from the i/f, else punt.
565 * Note that we should check the address family of the cached
566 * destination, in case of sharing the cache with IPv6.
567 */
568 ro = &inp->inp_route;
569 if (ro->ro_rt &&
570 (ro->ro_dst.sa_family != AF_INET ||
571 satosin(&ro->ro_dst)->sin_addr.s_addr != faddr.s_addr ||
572 inp->inp_socket->so_options & SO_DONTROUTE)) {
573 RTFREE(ro->ro_rt);
574 ro->ro_rt = (struct rtentry *)0;
575 }
576 if ((inp->inp_socket->so_options & SO_DONTROUTE) == 0 && /*XXX*/
577 (ro->ro_rt == (struct rtentry *)0 ||
578 ro->ro_rt->rt_ifp == (struct ifnet *)0)) {
579 /* No route yet, so try to acquire one */
580 bzero(&ro->ro_dst, sizeof(struct sockaddr_in));
581 ro->ro_dst.sa_family = AF_INET;
582 ro->ro_dst.sa_len = sizeof(struct sockaddr_in);
583 ((struct sockaddr_in *)&ro->ro_dst)->sin_addr = faddr;
584 rtalloc(ro);
585 }
586 /*
587 * If we found a route, use the address
588 * corresponding to the outgoing interface
589 * unless it is the loopback (in case a route
590 * to our address on another net goes to loopback).
591 */
592 if (ro->ro_rt && !(ro->ro_rt->rt_ifp->if_flags & IFF_LOOPBACK))
593 ia = ifatoia(ro->ro_rt->rt_ifa);
594 if (ia == 0) {
595 bzero(&sa, sizeof(sa));
596 sa.sin_addr = faddr;
597 sa.sin_len = sizeof(sa);
598 sa.sin_family = AF_INET;
599
600 ia = ifatoia(ifa_ifwithdstaddr(sintosa(&sa)));
601 if (ia == 0)
602 ia = ifatoia(ifa_ifwithnet(sintosa(&sa)));
603 if (ia == 0)
604 ia = TAILQ_FIRST(&in_ifaddrhead);
605 if (ia == 0)
606 return (EADDRNOTAVAIL);
607 }
608 /*
609 * If the destination address is multicast and an outgoing
610 * interface has been set as a multicast option, use the
611 * address of that interface as our source address.
612 */
613 if (IN_MULTICAST(ntohl(faddr.s_addr)) &&
614 inp->inp_moptions != NULL) {
615 struct ip_moptions *imo;
616 struct ifnet *ifp;
617
618 imo = inp->inp_moptions;
619 if (imo->imo_multicast_ifp != NULL) {
620 ifp = imo->imo_multicast_ifp;
621 TAILQ_FOREACH(ia, &in_ifaddrhead, ia_link)
622 if (ia->ia_ifp == ifp)
623 break;
624 if (ia == 0)
625 return (EADDRNOTAVAIL);
626 }
627 }
628 laddr = ia->ia_addr.sin_addr;
629 }
630
631 oinp = in_pcblookup_hash(inp->inp_pcbinfo, faddr, fport, laddr, lport,
632 0, NULL);
633 if (oinp != NULL) {
634 if (oinpp != NULL)
635 *oinpp = oinp;
636 return (EADDRINUSE);
637 }
638 if (lport == 0) {
639 error = in_pcbbind_setup(inp, NULL, &laddr.s_addr, &lport, td);
640 if (error)
641 return (error);
642 }
643 *laddrp = laddr.s_addr;
644 *lportp = lport;
645 *faddrp = faddr.s_addr;
646 *fportp = fport;
647 return (0);
648 }
649
650 void
651 in_pcbdisconnect(inp)
652 struct inpcb *inp;
653 {
654
655 inp->inp_faddr.s_addr = INADDR_ANY;
656 inp->inp_fport = 0;
657 in_pcbrehash(inp);
658 if (inp->inp_socket->so_state & SS_NOFDREF)
659 in_pcbdetach(inp);
660 }
661
662 void
663 in_pcbdetach(inp)
664 struct inpcb *inp;
665 {
666 struct socket *so = inp->inp_socket;
667 struct inpcbinfo *ipi = inp->inp_pcbinfo;
668
669 #ifdef IPSEC
670 ipsec4_delete_pcbpolicy(inp);
671 #endif /*IPSEC*/
672 inp->inp_gencnt = ++ipi->ipi_gencnt;
673 in_pcbremlists(inp);
674 if (so) {
675 so->so_pcb = 0;
676 sotryfree(so);
677 }
678 if (inp->inp_options)
679 (void)m_free(inp->inp_options);
680 if (inp->inp_route.ro_rt)
681 rtfree(inp->inp_route.ro_rt);
682 ip_freemoptions(inp->inp_moptions);
683 inp->inp_vflag = 0;
684 INP_LOCK_DESTROY(inp);
685 uma_zfree(ipi->ipi_zone, inp);
686 }
687
688 struct sockaddr *
689 in_sockaddr(port, addr_p)
690 in_port_t port;
691 struct in_addr *addr_p;
692 {
693 struct sockaddr_in *sin;
694
695 MALLOC(sin, struct sockaddr_in *, sizeof *sin, M_SONAME,
696 M_WAITOK | M_ZERO);
697 sin->sin_family = AF_INET;
698 sin->sin_len = sizeof(*sin);
699 sin->sin_addr = *addr_p;
700 sin->sin_port = port;
701
702 return (struct sockaddr *)sin;
703 }
704
705 /*
706 * The wrapper function will pass down the pcbinfo for this function to lock.
707 * The socket must have a valid
708 * (i.e., non-nil) PCB, but it should be impossible to get an invalid one
709 * except through a kernel programming error, so it is acceptable to panic
710 * (or in this case trap) if the PCB is invalid. (Actually, we don't trap
711 * because there actually /is/ a programming error somewhere... XXX)
712 */
713 int
714 in_setsockaddr(so, nam, pcbinfo)
715 struct socket *so;
716 struct sockaddr **nam;
717 struct inpcbinfo *pcbinfo;
718 {
719 int s;
720 register struct inpcb *inp;
721 struct in_addr addr;
722 in_port_t port;
723
724 s = splnet();
725 INP_INFO_RLOCK(pcbinfo);
726 inp = sotoinpcb(so);
727 if (!inp) {
728 INP_INFO_RUNLOCK(pcbinfo);
729 splx(s);
730 return ECONNRESET;
731 }
732 INP_LOCK(inp);
733 port = inp->inp_lport;
734 addr = inp->inp_laddr;
735 INP_UNLOCK(inp);
736 INP_INFO_RUNLOCK(pcbinfo);
737 splx(s);
738
739 *nam = in_sockaddr(port, &addr);
740 return 0;
741 }
742
743 /*
744 * The wrapper function will pass down the pcbinfo for this function to lock.
745 */
746 int
747 in_setpeeraddr(so, nam, pcbinfo)
748 struct socket *so;
749 struct sockaddr **nam;
750 struct inpcbinfo *pcbinfo;
751 {
752 int s;
753 register struct inpcb *inp;
754 struct in_addr addr;
755 in_port_t port;
756
757 s = splnet();
758 INP_INFO_RLOCK(pcbinfo);
759 inp = sotoinpcb(so);
760 if (!inp) {
761 INP_INFO_RUNLOCK(pcbinfo);
762 splx(s);
763 return ECONNRESET;
764 }
765 INP_LOCK(inp);
766 port = inp->inp_fport;
767 addr = inp->inp_faddr;
768 INP_UNLOCK(inp);
769 INP_INFO_RUNLOCK(pcbinfo);
770 splx(s);
771
772 *nam = in_sockaddr(port, &addr);
773 return 0;
774 }
775
776 void
777 in_pcbnotifyall(pcbinfo, faddr, errno, notify)
778 struct inpcbinfo *pcbinfo;
779 struct in_addr faddr;
780 int errno;
781 struct inpcb *(*notify)(struct inpcb *, int);
782 {
783 struct inpcb *inp, *ninp;
784 struct inpcbhead *head;
785 int s;
786
787 s = splnet();
788 INP_INFO_WLOCK(pcbinfo);
789 head = pcbinfo->listhead;
790 for (inp = LIST_FIRST(head); inp != NULL; inp = ninp) {
791 INP_LOCK(inp);
792 ninp = LIST_NEXT(inp, inp_list);
793 #ifdef INET6
794 if ((inp->inp_vflag & INP_IPV4) == 0) {
795 INP_UNLOCK(inp);
796 continue;
797 }
798 #endif
799 if (inp->inp_faddr.s_addr != faddr.s_addr ||
800 inp->inp_socket == NULL) {
801 INP_UNLOCK(inp);
802 continue;
803 }
804 if ((*notify)(inp, errno))
805 INP_UNLOCK(inp);
806 }
807 INP_INFO_WUNLOCK(pcbinfo);
808 splx(s);
809 }
810
811 void
812 in_pcbpurgeif0(pcbinfo, ifp)
813 struct inpcbinfo *pcbinfo;
814 struct ifnet *ifp;
815 {
816 struct inpcb *inp;
817 struct ip_moptions *imo;
818 int i, gap;
819
820 /* why no splnet here? XXX */
821 INP_INFO_RLOCK(pcbinfo);
822 LIST_FOREACH(inp, pcbinfo->listhead, inp_list) {
823 INP_LOCK(inp);
824 imo = inp->inp_moptions;
825 if ((inp->inp_vflag & INP_IPV4) &&
826 imo != NULL) {
827 /*
828 * Unselect the outgoing interface if it is being
829 * detached.
830 */
831 if (imo->imo_multicast_ifp == ifp)
832 imo->imo_multicast_ifp = NULL;
833
834 /*
835 * Drop multicast group membership if we joined
836 * through the interface being detached.
837 */
838 for (i = 0, gap = 0; i < imo->imo_num_memberships;
839 i++) {
840 if (imo->imo_membership[i]->inm_ifp == ifp) {
841 in_delmulti(imo->imo_membership[i]);
842 gap++;
843 } else if (gap != 0)
844 imo->imo_membership[i - gap] =
845 imo->imo_membership[i];
846 }
847 imo->imo_num_memberships -= gap;
848 }
849 INP_UNLOCK(inp);
850 }
851 INP_INFO_RUNLOCK(pcbinfo);
852 }
853
854 /*
855 * Check for alternatives when higher level complains
856 * about service problems. For now, invalidate cached
857 * routing information. If the route was created dynamically
858 * (by a redirect), time to try a default gateway again.
859 */
860 void
861 in_losing(inp)
862 struct inpcb *inp;
863 {
864 register struct rtentry *rt;
865 struct rt_addrinfo info;
866
867 if ((rt = inp->inp_route.ro_rt)) {
868 bzero((caddr_t)&info, sizeof(info));
869 info.rti_flags = rt->rt_flags;
870 info.rti_info[RTAX_DST] = rt_key(rt);
871 info.rti_info[RTAX_GATEWAY] = rt->rt_gateway;
872 info.rti_info[RTAX_NETMASK] = rt_mask(rt);
873 rt_missmsg(RTM_LOSING, &info, rt->rt_flags, 0);
874 if (rt->rt_flags & RTF_DYNAMIC)
875 (void) rtrequest1(RTM_DELETE, &info, NULL);
876 inp->inp_route.ro_rt = NULL;
877 rtfree(rt);
878 /*
879 * A new route can be allocated
880 * the next time output is attempted.
881 */
882 }
883 }
884
885 /*
886 * After a routing change, flush old routing
887 * and allocate a (hopefully) better one.
888 */
889 struct inpcb *
890 in_rtchange(inp, errno)
891 register struct inpcb *inp;
892 int errno;
893 {
894 if (inp->inp_route.ro_rt) {
895 rtfree(inp->inp_route.ro_rt);
896 inp->inp_route.ro_rt = 0;
897 /*
898 * A new route can be allocated the next time
899 * output is attempted.
900 */
901 }
902 return inp;
903 }
904
905 /*
906 * Lookup a PCB based on the local address and port.
907 */
908 struct inpcb *
909 in_pcblookup_local(pcbinfo, laddr, lport_arg, wild_okay)
910 struct inpcbinfo *pcbinfo;
911 struct in_addr laddr;
912 u_int lport_arg;
913 int wild_okay;
914 {
915 register struct inpcb *inp;
916 int matchwild = 3, wildcard;
917 u_short lport = lport_arg;
918
919 if (!wild_okay) {
920 struct inpcbhead *head;
921 /*
922 * Look for an unconnected (wildcard foreign addr) PCB that
923 * matches the local address and port we're looking for.
924 */
925 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
926 LIST_FOREACH(inp, head, inp_hash) {
927 #ifdef INET6
928 if ((inp->inp_vflag & INP_IPV4) == 0)
929 continue;
930 #endif
931 if (inp->inp_faddr.s_addr == INADDR_ANY &&
932 inp->inp_laddr.s_addr == laddr.s_addr &&
933 inp->inp_lport == lport) {
934 /*
935 * Found.
936 */
937 return (inp);
938 }
939 }
940 /*
941 * Not found.
942 */
943 return (NULL);
944 } else {
945 struct inpcbporthead *porthash;
946 struct inpcbport *phd;
947 struct inpcb *match = NULL;
948 /*
949 * Best fit PCB lookup.
950 *
951 * First see if this local port is in use by looking on the
952 * port hash list.
953 */
954 porthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(lport,
955 pcbinfo->porthashmask)];
956 LIST_FOREACH(phd, porthash, phd_hash) {
957 if (phd->phd_port == lport)
958 break;
959 }
960 if (phd != NULL) {
961 /*
962 * Port is in use by one or more PCBs. Look for best
963 * fit.
964 */
965 LIST_FOREACH(inp, &phd->phd_pcblist, inp_portlist) {
966 wildcard = 0;
967 #ifdef INET6
968 if ((inp->inp_vflag & INP_IPV4) == 0)
969 continue;
970 #endif
971 if (inp->inp_faddr.s_addr != INADDR_ANY)
972 wildcard++;
973 if (inp->inp_laddr.s_addr != INADDR_ANY) {
974 if (laddr.s_addr == INADDR_ANY)
975 wildcard++;
976 else if (inp->inp_laddr.s_addr != laddr.s_addr)
977 continue;
978 } else {
979 if (laddr.s_addr != INADDR_ANY)
980 wildcard++;
981 }
982 if (wildcard < matchwild) {
983 match = inp;
984 matchwild = wildcard;
985 if (matchwild == 0) {
986 break;
987 }
988 }
989 }
990 }
991 return (match);
992 }
993 }
994
995 /*
996 * Lookup PCB in hash list.
997 */
998 struct inpcb *
999 in_pcblookup_hash(pcbinfo, faddr, fport_arg, laddr, lport_arg, wildcard,
1000 ifp)
1001 struct inpcbinfo *pcbinfo;
1002 struct in_addr faddr, laddr;
1003 u_int fport_arg, lport_arg;
1004 int wildcard;
1005 struct ifnet *ifp;
1006 {
1007 struct inpcbhead *head;
1008 register struct inpcb *inp;
1009 u_short fport = fport_arg, lport = lport_arg;
1010
1011 /*
1012 * First look for an exact match.
1013 */
1014 head = &pcbinfo->hashbase[INP_PCBHASH(faddr.s_addr, lport, fport, pcbinfo->hashmask)];
1015 LIST_FOREACH(inp, head, inp_hash) {
1016 #ifdef INET6
1017 if ((inp->inp_vflag & INP_IPV4) == 0)
1018 continue;
1019 #endif
1020 if (inp->inp_faddr.s_addr == faddr.s_addr &&
1021 inp->inp_laddr.s_addr == laddr.s_addr &&
1022 inp->inp_fport == fport &&
1023 inp->inp_lport == lport) {
1024 /*
1025 * Found.
1026 */
1027 return (inp);
1028 }
1029 }
1030 if (wildcard) {
1031 struct inpcb *local_wild = NULL;
1032 #if defined(INET6)
1033 struct inpcb *local_wild_mapped = NULL;
1034 #endif /* defined(INET6) */
1035
1036 head = &pcbinfo->hashbase[INP_PCBHASH(INADDR_ANY, lport, 0, pcbinfo->hashmask)];
1037 LIST_FOREACH(inp, head, inp_hash) {
1038 #ifdef INET6
1039 if ((inp->inp_vflag & INP_IPV4) == 0)
1040 continue;
1041 #endif
1042 if (inp->inp_faddr.s_addr == INADDR_ANY &&
1043 inp->inp_lport == lport) {
1044 if (ifp && ifp->if_type == IFT_FAITH &&
1045 (inp->inp_flags & INP_FAITH) == 0)
1046 continue;
1047 if (inp->inp_laddr.s_addr == laddr.s_addr)
1048 return (inp);
1049 else if (inp->inp_laddr.s_addr == INADDR_ANY) {
1050 #if defined(INET6)
1051 if (INP_CHECK_SOCKAF(inp->inp_socket,
1052 AF_INET6))
1053 local_wild_mapped = inp;
1054 else
1055 #endif /* defined(INET6) */
1056 local_wild = inp;
1057 }
1058 }
1059 }
1060 #if defined(INET6)
1061 if (local_wild == NULL)
1062 return (local_wild_mapped);
1063 #endif /* defined(INET6) */
1064 return (local_wild);
1065 }
1066
1067 /*
1068 * Not found.
1069 */
1070 return (NULL);
1071 }
1072
1073 /*
1074 * Insert PCB onto various hash lists.
1075 */
1076 int
1077 in_pcbinshash(inp)
1078 struct inpcb *inp;
1079 {
1080 struct inpcbhead *pcbhash;
1081 struct inpcbporthead *pcbporthash;
1082 struct inpcbinfo *pcbinfo = inp->inp_pcbinfo;
1083 struct inpcbport *phd;
1084 u_int32_t hashkey_faddr;
1085
1086 #ifdef INET6
1087 if (inp->inp_vflag & INP_IPV6)
1088 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1089 else
1090 #endif /* INET6 */
1091 hashkey_faddr = inp->inp_faddr.s_addr;
1092
1093 pcbhash = &pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
1094 inp->inp_lport, inp->inp_fport, pcbinfo->hashmask)];
1095
1096 pcbporthash = &pcbinfo->porthashbase[INP_PCBPORTHASH(inp->inp_lport,
1097 pcbinfo->porthashmask)];
1098
1099 /*
1100 * Go through port list and look for a head for this lport.
1101 */
1102 LIST_FOREACH(phd, pcbporthash, phd_hash) {
1103 if (phd->phd_port == inp->inp_lport)
1104 break;
1105 }
1106 /*
1107 * If none exists, malloc one and tack it on.
1108 */
1109 if (phd == NULL) {
1110 MALLOC(phd, struct inpcbport *, sizeof(struct inpcbport), M_PCB, M_NOWAIT);
1111 if (phd == NULL) {
1112 return (ENOBUFS); /* XXX */
1113 }
1114 phd->phd_port = inp->inp_lport;
1115 LIST_INIT(&phd->phd_pcblist);
1116 LIST_INSERT_HEAD(pcbporthash, phd, phd_hash);
1117 }
1118 inp->inp_phd = phd;
1119 LIST_INSERT_HEAD(&phd->phd_pcblist, inp, inp_portlist);
1120 LIST_INSERT_HEAD(pcbhash, inp, inp_hash);
1121 return (0);
1122 }
1123
1124 /*
1125 * Move PCB to the proper hash bucket when { faddr, fport } have been
1126 * changed. NOTE: This does not handle the case of the lport changing (the
1127 * hashed port list would have to be updated as well), so the lport must
1128 * not change after in_pcbinshash() has been called.
1129 */
1130 void
1131 in_pcbrehash(inp)
1132 struct inpcb *inp;
1133 {
1134 struct inpcbhead *head;
1135 u_int32_t hashkey_faddr;
1136
1137 #ifdef INET6
1138 if (inp->inp_vflag & INP_IPV6)
1139 hashkey_faddr = inp->in6p_faddr.s6_addr32[3] /* XXX */;
1140 else
1141 #endif /* INET6 */
1142 hashkey_faddr = inp->inp_faddr.s_addr;
1143
1144 head = &inp->inp_pcbinfo->hashbase[INP_PCBHASH(hashkey_faddr,
1145 inp->inp_lport, inp->inp_fport, inp->inp_pcbinfo->hashmask)];
1146
1147 LIST_REMOVE(inp, inp_hash);
1148 LIST_INSERT_HEAD(head, inp, inp_hash);
1149 }
1150
1151 /*
1152 * Remove PCB from various lists.
1153 */
1154 void
1155 in_pcbremlists(inp)
1156 struct inpcb *inp;
1157 {
1158 inp->inp_gencnt = ++inp->inp_pcbinfo->ipi_gencnt;
1159 if (inp->inp_lport) {
1160 struct inpcbport *phd = inp->inp_phd;
1161
1162 LIST_REMOVE(inp, inp_hash);
1163 LIST_REMOVE(inp, inp_portlist);
1164 if (LIST_FIRST(&phd->phd_pcblist) == NULL) {
1165 LIST_REMOVE(phd, phd_hash);
1166 free(phd, M_PCB);
1167 }
1168 }
1169 LIST_REMOVE(inp, inp_list);
1170 inp->inp_pcbinfo->ipi_count--;
1171 }
1172
1173 int
1174 prison_xinpcb(struct thread *td, struct inpcb *inp)
1175 {
1176 if (!jailed(td->td_ucred))
1177 return (0);
1178 if (ntohl(inp->inp_laddr.s_addr) == prison_getip(td->td_ucred))
1179 return (0);
1180 return (1);
1181 }
Cache object: c54f612ac6fd610493f252773397f08f
|