1 /*
2 * Copyright (c) 1982, 1986, 1989, 1991, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * From: @(#)uipc_usrreq.c 8.3 (Berkeley) 1/4/94
34 * $FreeBSD$
35 */
36
37 #include <sys/param.h>
38 #include <sys/systm.h>
39 #include <sys/kernel.h>
40 #include <sys/domain.h>
41 #include <sys/fcntl.h>
42 #include <sys/malloc.h> /* XXX must be before <sys/file.h> */
43 #include <sys/file.h>
44 #include <sys/filedesc.h>
45 #include <sys/lock.h>
46 #include <sys/mbuf.h>
47 #include <sys/namei.h>
48 #include <sys/proc.h>
49 #include <sys/protosw.h>
50 #include <sys/socket.h>
51 #include <sys/socketvar.h>
52 #include <sys/stat.h>
53 #include <sys/sysctl.h>
54 #include <sys/un.h>
55 #include <sys/unpcb.h>
56 #include <sys/vnode.h>
57
58 #include <vm/vm_zone.h>
59
60 struct vm_zone *unp_zone;
61 static unp_gen_t unp_gencnt;
62 static u_int unp_count;
63
64 static struct unp_head unp_shead, unp_dhead;
65
66 /*
67 * Unix communications domain.
68 *
69 * TODO:
70 * SEQPACKET, RDM
71 * rethink name space problems
72 * need a proper out-of-band
73 * lock pushdown
74 */
75 static struct sockaddr sun_noname = { sizeof(sun_noname), AF_LOCAL };
76 static ino_t unp_ino; /* prototype for fake inode numbers */
77
78 static int unp_attach __P((struct socket *));
79 static void unp_detach __P((struct unpcb *));
80 static int unp_bind __P((struct unpcb *,struct sockaddr *, struct proc *));
81 static int unp_connect __P((struct socket *,struct sockaddr *,
82 struct proc *));
83 static void unp_disconnect __P((struct unpcb *));
84 static void unp_shutdown __P((struct unpcb *));
85 static void unp_drop __P((struct unpcb *, int));
86 static void unp_gc __P((void));
87 static void unp_scan __P((struct mbuf *, void (*)(struct file *)));
88 static void unp_mark __P((struct file *));
89 static void unp_discard __P((struct file *));
90 static int unp_internalize __P((struct mbuf *, struct proc *));
91
92 static int
93 uipc_abort(struct socket *so)
94 {
95 struct unpcb *unp = sotounpcb(so);
96
97 if (unp == 0)
98 return EINVAL;
99 unp_drop(unp, ECONNABORTED);
100 return 0;
101 }
102
103 static int
104 uipc_accept(struct socket *so, struct sockaddr **nam)
105 {
106 struct unpcb *unp = sotounpcb(so);
107
108 if (unp == 0)
109 return EINVAL;
110
111 /*
112 * Pass back name of connected socket,
113 * if it was bound and we are still connected
114 * (our peer may have closed already!).
115 */
116 if (unp->unp_conn && unp->unp_conn->unp_addr) {
117 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
118 1);
119 } else {
120 *nam = dup_sockaddr((struct sockaddr *)&sun_noname, 1);
121 }
122 return 0;
123 }
124
125 static int
126 uipc_attach(struct socket *so, int proto, struct proc *p)
127 {
128 struct unpcb *unp = sotounpcb(so);
129
130 if (unp != 0)
131 return EISCONN;
132 return unp_attach(so);
133 }
134
135 static int
136 uipc_bind(struct socket *so, struct sockaddr *nam, struct proc *p)
137 {
138 struct unpcb *unp = sotounpcb(so);
139
140 if (unp == 0)
141 return EINVAL;
142
143 return unp_bind(unp, nam, p);
144 }
145
146 static int
147 uipc_connect(struct socket *so, struct sockaddr *nam, struct proc *p)
148 {
149 struct unpcb *unp = sotounpcb(so);
150
151 if (unp == 0)
152 return EINVAL;
153 return unp_connect(so, nam, curproc);
154 }
155
156 static int
157 uipc_connect2(struct socket *so1, struct socket *so2)
158 {
159 struct unpcb *unp = sotounpcb(so1);
160
161 if (unp == 0)
162 return EINVAL;
163
164 return unp_connect2(so1, so2);
165 }
166
167 /* control is EOPNOTSUPP */
168
169 static int
170 uipc_detach(struct socket *so)
171 {
172 struct unpcb *unp = sotounpcb(so);
173
174 if (unp == 0)
175 return EINVAL;
176
177 unp_detach(unp);
178 return 0;
179 }
180
181 static int
182 uipc_disconnect(struct socket *so)
183 {
184 struct unpcb *unp = sotounpcb(so);
185
186 if (unp == 0)
187 return EINVAL;
188 unp_disconnect(unp);
189 return 0;
190 }
191
192 static int
193 uipc_listen(struct socket *so, struct proc *p)
194 {
195 struct unpcb *unp = sotounpcb(so);
196
197 if (unp == 0 || unp->unp_vnode == 0)
198 return EINVAL;
199 return 0;
200 }
201
202 static int
203 uipc_peeraddr(struct socket *so, struct sockaddr **nam)
204 {
205 struct unpcb *unp = sotounpcb(so);
206
207 if (unp == 0)
208 return EINVAL;
209 if (unp->unp_conn && unp->unp_conn->unp_addr)
210 *nam = dup_sockaddr((struct sockaddr *)unp->unp_conn->unp_addr,
211 1);
212 return 0;
213 }
214
215 static int
216 uipc_rcvd(struct socket *so, int flags)
217 {
218 struct unpcb *unp = sotounpcb(so);
219 struct socket *so2;
220
221 if (unp == 0)
222 return EINVAL;
223 switch (so->so_type) {
224 case SOCK_DGRAM:
225 panic("uipc_rcvd DGRAM?");
226 /*NOTREACHED*/
227
228 case SOCK_STREAM:
229 #define rcv (&so->so_rcv)
230 #define snd (&so2->so_snd)
231 if (unp->unp_conn == 0)
232 break;
233 so2 = unp->unp_conn->unp_socket;
234 /*
235 * Adjust backpressure on sender
236 * and wakeup any waiting to write.
237 */
238 snd->sb_mbmax += unp->unp_mbcnt - rcv->sb_mbcnt;
239 unp->unp_mbcnt = rcv->sb_mbcnt;
240 snd->sb_hiwat += unp->unp_cc - rcv->sb_cc;
241 unp->unp_cc = rcv->sb_cc;
242 sowwakeup(so2);
243 #undef snd
244 #undef rcv
245 break;
246
247 default:
248 panic("uipc_rcvd unknown socktype");
249 }
250 return 0;
251 }
252
253 /* pru_rcvoob is EOPNOTSUPP */
254
255 static int
256 uipc_send(struct socket *so, int flags, struct mbuf *m, struct sockaddr *nam,
257 struct mbuf *control, struct proc *p)
258 {
259 int error = 0;
260 struct unpcb *unp = sotounpcb(so);
261 struct socket *so2;
262
263 if (unp == 0) {
264 error = EINVAL;
265 goto release;
266 }
267 if (flags & PRUS_OOB) {
268 error = EOPNOTSUPP;
269 goto release;
270 }
271
272 if (control && (error = unp_internalize(control, p)))
273 goto release;
274
275 switch (so->so_type) {
276 case SOCK_DGRAM:
277 {
278 struct sockaddr *from;
279
280 if (nam) {
281 if (unp->unp_conn) {
282 error = EISCONN;
283 break;
284 }
285 error = unp_connect(so, nam, p);
286 if (error)
287 break;
288 } else {
289 if (unp->unp_conn == 0) {
290 error = ENOTCONN;
291 break;
292 }
293 }
294 so2 = unp->unp_conn->unp_socket;
295 if (unp->unp_addr)
296 from = (struct sockaddr *)unp->unp_addr;
297 else
298 from = &sun_noname;
299 if (sbappendaddr(&so2->so_rcv, from, m, control)) {
300 sorwakeup(so2);
301 m = 0;
302 control = 0;
303 } else
304 error = ENOBUFS;
305 if (nam)
306 unp_disconnect(unp);
307 break;
308 }
309
310 case SOCK_STREAM:
311 #define rcv (&so2->so_rcv)
312 #define snd (&so->so_snd)
313 /* Connect if not connected yet. */
314 /*
315 * Note: A better implementation would complain
316 * if not equal to the peer's address.
317 */
318 if ((so->so_state & SS_ISCONNECTED) == 0) {
319 if (nam) {
320 error = unp_connect(so, nam, p);
321 if (error)
322 break; /* XXX */
323 } else {
324 error = ENOTCONN;
325 break;
326 }
327 }
328
329 if (so->so_state & SS_CANTSENDMORE) {
330 error = EPIPE;
331 break;
332 }
333 if (unp->unp_conn == 0)
334 panic("uipc_send connected but no connection?");
335 so2 = unp->unp_conn->unp_socket;
336 /*
337 * Send to paired receive port, and then reduce
338 * send buffer hiwater marks to maintain backpressure.
339 * Wake up readers.
340 */
341 if (control) {
342 if (sbappendcontrol(rcv, m, control))
343 control = 0;
344 } else
345 sbappend(rcv, m);
346 snd->sb_mbmax -=
347 rcv->sb_mbcnt - unp->unp_conn->unp_mbcnt;
348 unp->unp_conn->unp_mbcnt = rcv->sb_mbcnt;
349 snd->sb_hiwat -= rcv->sb_cc - unp->unp_conn->unp_cc;
350 unp->unp_conn->unp_cc = rcv->sb_cc;
351 sorwakeup(so2);
352 m = 0;
353 #undef snd
354 #undef rcv
355 break;
356
357 default:
358 panic("uipc_send unknown socktype");
359 }
360
361 /*
362 * SEND_EOF is equivalent to a SEND followed by
363 * a SHUTDOWN.
364 */
365 if (flags & PRUS_EOF) {
366 socantsendmore(so);
367 unp_shutdown(unp);
368 }
369
370 if (control && error != 0)
371 unp_dispose(control);
372
373 release:
374 if (control)
375 m_freem(control);
376 if (m)
377 m_freem(m);
378 return error;
379 }
380
381 static int
382 uipc_sense(struct socket *so, struct stat *sb)
383 {
384 struct unpcb *unp = sotounpcb(so);
385 struct socket *so2;
386
387 if (unp == 0)
388 return EINVAL;
389 sb->st_blksize = so->so_snd.sb_hiwat;
390 if (so->so_type == SOCK_STREAM && unp->unp_conn != 0) {
391 so2 = unp->unp_conn->unp_socket;
392 sb->st_blksize += so2->so_rcv.sb_cc;
393 }
394 sb->st_dev = NODEV;
395 if (unp->unp_ino == 0)
396 unp->unp_ino = unp_ino++;
397 sb->st_ino = unp->unp_ino;
398 return (0);
399 }
400
401 static int
402 uipc_shutdown(struct socket *so)
403 {
404 struct unpcb *unp = sotounpcb(so);
405
406 if (unp == 0)
407 return EINVAL;
408 socantsendmore(so);
409 unp_shutdown(unp);
410 return 0;
411 }
412
413 static int
414 uipc_sockaddr(struct socket *so, struct sockaddr **nam)
415 {
416 struct unpcb *unp = sotounpcb(so);
417
418 if (unp == 0)
419 return EINVAL;
420 if (unp->unp_addr)
421 *nam = dup_sockaddr((struct sockaddr *)unp->unp_addr, 1);
422 return 0;
423 }
424
425 struct pr_usrreqs uipc_usrreqs = {
426 uipc_abort, uipc_accept, uipc_attach, uipc_bind, uipc_connect,
427 uipc_connect2, pru_control_notsupp, uipc_detach, uipc_disconnect,
428 uipc_listen, uipc_peeraddr, uipc_rcvd, pru_rcvoob_notsupp,
429 uipc_send, uipc_sense, uipc_shutdown, uipc_sockaddr,
430 sosend, soreceive, sopoll
431 };
432
433 /*
434 * Both send and receive buffers are allocated PIPSIZ bytes of buffering
435 * for stream sockets, although the total for sender and receiver is
436 * actually only PIPSIZ.
437 * Datagram sockets really use the sendspace as the maximum datagram size,
438 * and don't really want to reserve the sendspace. Their recvspace should
439 * be large enough for at least one max-size datagram plus address.
440 */
441 #ifndef PIPSIZ
442 #define PIPSIZ 8192
443 #endif
444 static u_long unpst_sendspace = PIPSIZ;
445 static u_long unpst_recvspace = PIPSIZ;
446 static u_long unpdg_sendspace = 2*1024; /* really max datagram size */
447 static u_long unpdg_recvspace = 4*1024;
448
449 static int unp_rights; /* file descriptors in flight */
450
451 SYSCTL_INT(_net_local_stream, OID_AUTO, sendspace, CTLFLAG_RW,
452 &unpst_sendspace, 0, "");
453 SYSCTL_INT(_net_local_stream, OID_AUTO, recvspace, CTLFLAG_RW,
454 &unpst_recvspace, 0, "");
455 SYSCTL_INT(_net_local_dgram, OID_AUTO, maxdgram, CTLFLAG_RW,
456 &unpdg_sendspace, 0, "");
457 SYSCTL_INT(_net_local_dgram, OID_AUTO, recvspace, CTLFLAG_RW,
458 &unpdg_recvspace, 0, "");
459 SYSCTL_INT(_net_local, OID_AUTO, inflight, CTLFLAG_RD, &unp_rights, 0, "");
460
461 static int
462 unp_attach(so)
463 struct socket *so;
464 {
465 register struct unpcb *unp;
466 int error;
467
468 if (so->so_snd.sb_hiwat == 0 || so->so_rcv.sb_hiwat == 0) {
469 switch (so->so_type) {
470
471 case SOCK_STREAM:
472 error = soreserve(so, unpst_sendspace, unpst_recvspace);
473 break;
474
475 case SOCK_DGRAM:
476 error = soreserve(so, unpdg_sendspace, unpdg_recvspace);
477 break;
478
479 default:
480 panic("unp_attach");
481 }
482 if (error)
483 return (error);
484 }
485 unp = zalloc(unp_zone);
486 if (unp == NULL)
487 return (ENOBUFS);
488 bzero(unp, sizeof *unp);
489 unp->unp_gencnt = ++unp_gencnt;
490 unp_count++;
491 LIST_INIT(&unp->unp_refs);
492 unp->unp_socket = so;
493 LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead
494 : &unp_shead, unp, unp_link);
495 so->so_pcb = (caddr_t)unp;
496 return (0);
497 }
498
499 static void
500 unp_detach(unp)
501 register struct unpcb *unp;
502 {
503 LIST_REMOVE(unp, unp_link);
504 unp->unp_gencnt = ++unp_gencnt;
505 --unp_count;
506 if (unp->unp_vnode) {
507 unp->unp_vnode->v_socket = 0;
508 vrele(unp->unp_vnode);
509 unp->unp_vnode = 0;
510 }
511 if (unp->unp_conn)
512 unp_disconnect(unp);
513 while (unp->unp_refs.lh_first)
514 unp_drop(unp->unp_refs.lh_first, ECONNRESET);
515 soisdisconnected(unp->unp_socket);
516 unp->unp_socket->so_pcb = 0;
517 if (unp_rights) {
518 /*
519 * Normally the receive buffer is flushed later,
520 * in sofree, but if our receive buffer holds references
521 * to descriptors that are now garbage, we will dispose
522 * of those descriptor references after the garbage collector
523 * gets them (resulting in a "panic: closef: count < 0").
524 */
525 sorflush(unp->unp_socket);
526 unp_gc();
527 }
528 if (unp->unp_addr)
529 FREE(unp->unp_addr, M_SONAME);
530 zfree(unp_zone, unp);
531 }
532
533 static int
534 unp_bind(unp, nam, p)
535 struct unpcb *unp;
536 struct sockaddr *nam;
537 struct proc *p;
538 {
539 struct sockaddr_un *soun = (struct sockaddr_un *)nam;
540 register struct vnode *vp;
541 struct vattr vattr;
542 int error, namelen;
543 struct nameidata nd;
544 char buf[SOCK_MAXADDRLEN];
545
546 if (unp->unp_vnode != NULL)
547 return (EINVAL);
548 #define offsetof(s, e) ((char *)&((s *)0)->e - (char *)((s *)0))
549 namelen = soun->sun_len - offsetof(struct sockaddr_un, sun_path);
550 if (namelen <= 0)
551 return EINVAL;
552 strncpy(buf, soun->sun_path, namelen);
553 buf[namelen] = 0; /* null-terminate the string */
554 NDINIT(&nd, CREATE, FOLLOW | LOCKPARENT, UIO_SYSSPACE,
555 buf, p);
556 /* SHOULD BE ABLE TO ADOPT EXISTING AND wakeup() ALA FIFO's */
557 error = namei(&nd);
558 if (error)
559 return (error);
560 vp = nd.ni_vp;
561 if (vp != NULL) {
562 VOP_ABORTOP(nd.ni_dvp, &nd.ni_cnd);
563 if (nd.ni_dvp == vp)
564 vrele(nd.ni_dvp);
565 else
566 vput(nd.ni_dvp);
567 vrele(vp);
568 return (EADDRINUSE);
569 }
570 VATTR_NULL(&vattr);
571 vattr.va_type = VSOCK;
572 vattr.va_mode = (ACCESSPERMS & ~p->p_fd->fd_cmask);
573 VOP_LEASE(nd.ni_dvp, p, p->p_ucred, LEASE_WRITE);
574 error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr);
575 vput(nd.ni_dvp);
576 if (error)
577 return (error);
578 vp = nd.ni_vp;
579 vp->v_socket = unp->unp_socket;
580 unp->unp_vnode = vp;
581 unp->unp_addr = (struct sockaddr_un *)dup_sockaddr(nam, 1);
582 VOP_UNLOCK(vp, 0, p);
583 return (0);
584 }
585
586 static int
587 unp_connect(so, nam, p)
588 struct socket *so;
589 struct sockaddr *nam;
590 struct proc *p;
591 {
592 register struct sockaddr_un *soun = (struct sockaddr_un *)nam;
593 register struct vnode *vp;
594 register struct socket *so2, *so3;
595 struct unpcb *unp2, *unp3;
596 int error, len;
597 struct nameidata nd;
598 char buf[SOCK_MAXADDRLEN];
599
600 len = nam->sa_len - offsetof(struct sockaddr_un, sun_path);
601 if (len <= 0)
602 return EINVAL;
603 strncpy(buf, soun->sun_path, len);
604 buf[len] = 0;
605
606 NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, buf, p);
607 error = namei(&nd);
608 if (error)
609 return (error);
610 vp = nd.ni_vp;
611 if (vp->v_type != VSOCK) {
612 error = ENOTSOCK;
613 goto bad;
614 }
615 error = VOP_ACCESS(vp, VWRITE, p->p_ucred, p);
616 if (error)
617 goto bad;
618 so2 = vp->v_socket;
619 if (so2 == 0) {
620 error = ECONNREFUSED;
621 goto bad;
622 }
623 if (so->so_type != so2->so_type) {
624 error = EPROTOTYPE;
625 goto bad;
626 }
627 if (so->so_proto->pr_flags & PR_CONNREQUIRED) {
628 if ((so2->so_options & SO_ACCEPTCONN) == 0 ||
629 (so3 = sonewconn(so2, 0)) == 0) {
630 error = ECONNREFUSED;
631 goto bad;
632 }
633 unp2 = sotounpcb(so2);
634 unp3 = sotounpcb(so3);
635 if (unp2->unp_addr)
636 unp3->unp_addr = (struct sockaddr_un *)
637 dup_sockaddr((struct sockaddr *)
638 unp2->unp_addr, 1);
639 so2 = so3;
640 }
641 error = unp_connect2(so, so2);
642 bad:
643 vput(vp);
644 return (error);
645 }
646
647 int
648 unp_connect2(so, so2)
649 register struct socket *so;
650 register struct socket *so2;
651 {
652 register struct unpcb *unp = sotounpcb(so);
653 register struct unpcb *unp2;
654
655 if (so2->so_type != so->so_type)
656 return (EPROTOTYPE);
657 unp2 = sotounpcb(so2);
658 unp->unp_conn = unp2;
659 switch (so->so_type) {
660
661 case SOCK_DGRAM:
662 LIST_INSERT_HEAD(&unp2->unp_refs, unp, unp_reflink);
663 soisconnected(so);
664 break;
665
666 case SOCK_STREAM:
667 unp2->unp_conn = unp;
668 soisconnected(so);
669 soisconnected(so2);
670 break;
671
672 default:
673 panic("unp_connect2");
674 }
675 return (0);
676 }
677
678 static void
679 unp_disconnect(unp)
680 struct unpcb *unp;
681 {
682 register struct unpcb *unp2 = unp->unp_conn;
683
684 if (unp2 == 0)
685 return;
686 unp->unp_conn = 0;
687 switch (unp->unp_socket->so_type) {
688
689 case SOCK_DGRAM:
690 LIST_REMOVE(unp, unp_reflink);
691 unp->unp_socket->so_state &= ~SS_ISCONNECTED;
692 break;
693
694 case SOCK_STREAM:
695 soisdisconnected(unp->unp_socket);
696 unp2->unp_conn = 0;
697 soisdisconnected(unp2->unp_socket);
698 break;
699 }
700 }
701
702 #ifdef notdef
703 void
704 unp_abort(unp)
705 struct unpcb *unp;
706 {
707
708 unp_detach(unp);
709 }
710 #endif
711
712 static int
713 unp_pcblist SYSCTL_HANDLER_ARGS
714 {
715 int error, i, n;
716 struct unpcb *unp, **unp_list;
717 unp_gen_t gencnt;
718 struct xunpgen xug;
719 struct unp_head *head;
720
721 head = ((intptr_t)arg1 == SOCK_DGRAM ? &unp_dhead : &unp_shead);
722
723 /*
724 * The process of preparing the PCB list is too time-consuming and
725 * resource-intensive to repeat twice on every request.
726 */
727 if (req->oldptr == 0) {
728 n = unp_count;
729 req->oldidx = 2 * (sizeof xug)
730 + (n + n/8) * sizeof(struct xunpcb);
731 return 0;
732 }
733
734 if (req->newptr != 0)
735 return EPERM;
736
737 /*
738 * OK, now we're committed to doing something.
739 */
740 gencnt = unp_gencnt;
741 n = unp_count;
742
743 xug.xug_len = sizeof xug;
744 xug.xug_count = n;
745 xug.xug_gen = gencnt;
746 xug.xug_sogen = so_gencnt;
747 error = SYSCTL_OUT(req, &xug, sizeof xug);
748 if (error)
749 return error;
750
751 unp_list = malloc(n * sizeof *unp_list, M_TEMP, M_WAITOK);
752 if (unp_list == 0)
753 return ENOMEM;
754
755 for (unp = head->lh_first, i = 0; unp && i < n;
756 unp = unp->unp_link.le_next) {
757 if (unp->unp_gencnt <= gencnt)
758 unp_list[i++] = unp;
759 }
760 n = i; /* in case we lost some during malloc */
761
762 error = 0;
763 for (i = 0; i < n; i++) {
764 unp = unp_list[i];
765 if (unp->unp_gencnt <= gencnt) {
766 struct xunpcb xu;
767 xu.xu_len = sizeof xu;
768 xu.xu_unpp = unp;
769 /*
770 * XXX - need more locking here to protect against
771 * connect/disconnect races for SMP.
772 */
773 if (unp->unp_addr)
774 bcopy(unp->unp_addr, &xu.xu_addr,
775 unp->unp_addr->sun_len);
776 if (unp->unp_conn && unp->unp_conn->unp_addr)
777 bcopy(unp->unp_conn->unp_addr,
778 &xu.xu_caddr,
779 unp->unp_conn->unp_addr->sun_len);
780 bcopy(unp, &xu.xu_unp, sizeof *unp);
781 sotoxsocket(unp->unp_socket, &xu.xu_socket);
782 error = SYSCTL_OUT(req, &xu, sizeof xu);
783 }
784 }
785 if (!error) {
786 /*
787 * Give the user an updated idea of our state.
788 * If the generation differs from what we told
789 * her before, she knows that something happened
790 * while we were processing this request, and it
791 * might be necessary to retry.
792 */
793 xug.xug_gen = unp_gencnt;
794 xug.xug_sogen = so_gencnt;
795 xug.xug_count = unp_count;
796 error = SYSCTL_OUT(req, &xug, sizeof xug);
797 }
798 free(unp_list, M_TEMP);
799 return error;
800 }
801
802 SYSCTL_PROC(_net_local_dgram, OID_AUTO, pcblist, CTLFLAG_RD,
803 (caddr_t)(long)SOCK_DGRAM, 0, unp_pcblist, "S,xunpcb",
804 "List of active local datagram sockets");
805 SYSCTL_PROC(_net_local_stream, OID_AUTO, pcblist, CTLFLAG_RD,
806 (caddr_t)(long)SOCK_STREAM, 0, unp_pcblist, "S,xunpcb",
807 "List of active local stream sockets");
808
809 static void
810 unp_shutdown(unp)
811 struct unpcb *unp;
812 {
813 struct socket *so;
814
815 if (unp->unp_socket->so_type == SOCK_STREAM && unp->unp_conn &&
816 (so = unp->unp_conn->unp_socket))
817 socantrcvmore(so);
818 }
819
820 static void
821 unp_drop(unp, errno)
822 struct unpcb *unp;
823 int errno;
824 {
825 struct socket *so = unp->unp_socket;
826
827 so->so_error = errno;
828 unp_disconnect(unp);
829 if (so->so_head) {
830 LIST_REMOVE(unp, unp_link);
831 unp->unp_gencnt = ++unp_gencnt;
832 unp_count--;
833 so->so_pcb = (caddr_t) 0;
834 if (unp->unp_addr)
835 FREE(unp->unp_addr, M_SONAME);
836 zfree(unp_zone, unp);
837 sofree(so);
838 }
839 }
840
841 #ifdef notdef
842 void
843 unp_drain()
844 {
845
846 }
847 #endif
848
849 int
850 unp_externalize(rights)
851 struct mbuf *rights;
852 {
853 struct proc *p = curproc; /* XXX */
854 register int i;
855 register struct cmsghdr *cm = mtod(rights, struct cmsghdr *);
856 register struct file **rp = (struct file **)(cm + 1);
857 register struct file *fp;
858 int newfds = (cm->cmsg_len - sizeof(*cm)) / sizeof (int);
859 int f;
860
861 /*
862 * if the new FD's will not fit, then we free them all
863 */
864 if (!fdavail(p, newfds)) {
865 for (i = 0; i < newfds; i++) {
866 fp = *rp;
867 unp_discard(fp);
868 *rp++ = 0;
869 }
870 return (EMSGSIZE);
871 }
872 /*
873 * now change each pointer to an fd in the global table to
874 * an integer that is the index to the local fd table entry
875 * that we set up to point to the global one we are transferring.
876 * XXX this assumes a pointer and int are the same size...!
877 */
878 for (i = 0; i < newfds; i++) {
879 if (fdalloc(p, 0, &f))
880 panic("unp_externalize");
881 fp = *rp;
882 p->p_fd->fd_ofiles[f] = fp;
883 fp->f_msgcount--;
884 unp_rights--;
885 *(int *)rp++ = f;
886 }
887 return (0);
888 }
889
890 void
891 unp_init(void)
892 {
893 unp_zone = zinit("unpcb", sizeof(struct unpcb), nmbclusters, 0, 0);
894 if (unp_zone == 0)
895 panic("unp_init");
896 LIST_INIT(&unp_dhead);
897 LIST_INIT(&unp_shead);
898 }
899
900 #ifndef MIN
901 #define MIN(a,b) (((a)<(b))?(a):(b))
902 #endif
903
904 static int
905 unp_internalize(control, p)
906 struct mbuf *control;
907 struct proc *p;
908 {
909 struct filedesc *fdp = p->p_fd;
910 register struct cmsghdr *cm = mtod(control, struct cmsghdr *);
911 register struct file **rp;
912 register struct file *fp;
913 register int i, fd;
914 register struct cmsgcred *cmcred;
915 int oldfds;
916
917 if ((cm->cmsg_type != SCM_RIGHTS && cm->cmsg_type != SCM_CREDS) ||
918 cm->cmsg_level != SOL_SOCKET || cm->cmsg_len != control->m_len)
919 return (EINVAL);
920
921 /*
922 * Fill in credential information.
923 */
924 if (cm->cmsg_type == SCM_CREDS) {
925 cmcred = (struct cmsgcred *)(cm + 1);
926 cmcred->cmcred_pid = p->p_pid;
927 cmcred->cmcred_uid = p->p_cred->p_ruid;
928 cmcred->cmcred_gid = p->p_cred->p_rgid;
929 cmcred->cmcred_euid = p->p_ucred->cr_uid;
930 cmcred->cmcred_ngroups = MIN(p->p_ucred->cr_ngroups,
931 CMGROUP_MAX);
932 for (i = 0; i < cmcred->cmcred_ngroups; i++)
933 cmcred->cmcred_groups[i] = p->p_ucred->cr_groups[i];
934 return(0);
935 }
936
937 oldfds = (cm->cmsg_len - sizeof (*cm)) / sizeof (int);
938 /*
939 * check that all the FDs passed in refer to legal OPEN files
940 * If not, reject the entire operation.
941 */
942 rp = (struct file **)(cm + 1);
943 for (i = 0; i < oldfds; i++) {
944 fd = *(int *)rp++;
945 if ((unsigned)fd >= fdp->fd_nfiles ||
946 fdp->fd_ofiles[fd] == NULL)
947 return (EBADF);
948 }
949 /*
950 * Now replace the integer FDs with pointers to
951 * the associated global file table entry..
952 * XXX this assumes a pointer and an int are the same size!
953 */
954 rp = (struct file **)(cm + 1);
955 for (i = 0; i < oldfds; i++) {
956 fp = fdp->fd_ofiles[*(int *)rp];
957 *rp++ = fp;
958 fp->f_count++;
959 fp->f_msgcount++;
960 unp_rights++;
961 }
962 return (0);
963 }
964
965 static int unp_defer, unp_gcing;
966
967 static void
968 unp_gc()
969 {
970 register struct file *fp, *nextfp;
971 register struct socket *so;
972 struct file **extra_ref, **fpp;
973 int nunref, i;
974
975 if (unp_gcing)
976 return;
977 unp_gcing = 1;
978 unp_defer = 0;
979 /*
980 * before going through all this, set all FDs to
981 * be NOT defered and NOT externally accessible
982 */
983 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next)
984 fp->f_flag &= ~(FMARK|FDEFER);
985 do {
986 for (fp = filehead.lh_first; fp != 0; fp = fp->f_list.le_next) {
987 /*
988 * If the file is not open, skip it
989 */
990 if (fp->f_count == 0)
991 continue;
992 /*
993 * If we already marked it as 'defer' in a
994 * previous pass, then try process it this time
995 * and un-mark it
996 */
997 if (fp->f_flag & FDEFER) {
998 fp->f_flag &= ~FDEFER;
999 unp_defer--;
1000 } else {
1001 /*
1002 * if it's not defered, then check if it's
1003 * already marked.. if so skip it
1004 */
1005 if (fp->f_flag & FMARK)
1006 continue;
1007 /*
1008 * If all references are from messages
1009 * in transit, then skip it. it's not
1010 * externally accessible.
1011 */
1012 if (fp->f_count == fp->f_msgcount)
1013 continue;
1014 /*
1015 * If it got this far then it must be
1016 * externally accessible.
1017 */
1018 fp->f_flag |= FMARK;
1019 }
1020 /*
1021 * either it was defered, or it is externally
1022 * accessible and not already marked so.
1023 * Now check if it is possibly one of OUR sockets.
1024 */
1025 if (fp->f_type != DTYPE_SOCKET ||
1026 (so = (struct socket *)fp->f_data) == 0)
1027 continue;
1028 if (so->so_proto->pr_domain != &localdomain ||
1029 (so->so_proto->pr_flags&PR_RIGHTS) == 0)
1030 continue;
1031 #ifdef notdef
1032 if (so->so_rcv.sb_flags & SB_LOCK) {
1033 /*
1034 * This is problematical; it's not clear
1035 * we need to wait for the sockbuf to be
1036 * unlocked (on a uniprocessor, at least),
1037 * and it's also not clear what to do
1038 * if sbwait returns an error due to receipt
1039 * of a signal. If sbwait does return
1040 * an error, we'll go into an infinite
1041 * loop. Delete all of this for now.
1042 */
1043 (void) sbwait(&so->so_rcv);
1044 goto restart;
1045 }
1046 #endif
1047 /*
1048 * So, Ok, it's one of our sockets and it IS externally
1049 * accessible (or was defered). Now we look
1050 * to see if we hold any file descriptors in its
1051 * message buffers. Follow those links and mark them
1052 * as accessible too.
1053 */
1054 unp_scan(so->so_rcv.sb_mb, unp_mark);
1055 }
1056 } while (unp_defer);
1057 /*
1058 * We grab an extra reference to each of the file table entries
1059 * that are not otherwise accessible and then free the rights
1060 * that are stored in messages on them.
1061 *
1062 * The bug in the orginal code is a little tricky, so I'll describe
1063 * what's wrong with it here.
1064 *
1065 * It is incorrect to simply unp_discard each entry for f_msgcount
1066 * times -- consider the case of sockets A and B that contain
1067 * references to each other. On a last close of some other socket,
1068 * we trigger a gc since the number of outstanding rights (unp_rights)
1069 * is non-zero. If during the sweep phase the gc code un_discards,
1070 * we end up doing a (full) closef on the descriptor. A closef on A
1071 * results in the following chain. Closef calls soo_close, which
1072 * calls soclose. Soclose calls first (through the switch
1073 * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
1074 * returns because the previous instance had set unp_gcing, and
1075 * we return all the way back to soclose, which marks the socket
1076 * with SS_NOFDREF, and then calls sofree. Sofree calls sorflush
1077 * to free up the rights that are queued in messages on the socket A,
1078 * i.e., the reference on B. The sorflush calls via the dom_dispose
1079 * switch unp_dispose, which unp_scans with unp_discard. This second
1080 * instance of unp_discard just calls closef on B.
1081 *
1082 * Well, a similar chain occurs on B, resulting in a sorflush on B,
1083 * which results in another closef on A. Unfortunately, A is already
1084 * being closed, and the descriptor has already been marked with
1085 * SS_NOFDREF, and soclose panics at this point.
1086 *
1087 * Here, we first take an extra reference to each inaccessible
1088 * descriptor. Then, we call sorflush ourself, since we know
1089 * it is a Unix domain socket anyhow. After we destroy all the
1090 * rights carried in messages, we do a last closef to get rid
1091 * of our extra reference. This is the last close, and the
1092 * unp_detach etc will shut down the socket.
1093 *
1094 * 91/09/19, bsy@cs.cmu.edu
1095 */
1096 extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK);
1097 for (nunref = 0, fp = filehead.lh_first, fpp = extra_ref; fp != 0;
1098 fp = nextfp) {
1099 nextfp = fp->f_list.le_next;
1100 /*
1101 * If it's not open, skip it
1102 */
1103 if (fp->f_count == 0)
1104 continue;
1105 /*
1106 * If all refs are from msgs, and it's not marked accessible
1107 * then it must be referenced from some unreachable cycle
1108 * of (shut-down) FDs, so include it in our
1109 * list of FDs to remove
1110 */
1111 if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) {
1112 *fpp++ = fp;
1113 nunref++;
1114 fp->f_count++;
1115 }
1116 }
1117 /*
1118 * for each FD on our hit list, do the following two things
1119 */
1120 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
1121 struct file *tfp = *fpp;
1122 if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL)
1123 sorflush((struct socket *)(tfp->f_data));
1124 }
1125
1126
1127 for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp)
1128 closef(*fpp, (struct proc *) NULL);
1129 free((caddr_t)extra_ref, M_FILE);
1130 unp_gcing = 0;
1131 }
1132
1133 void
1134 unp_dispose(m)
1135 struct mbuf *m;
1136 {
1137
1138 if (m)
1139 unp_scan(m, unp_discard);
1140 }
1141
1142 static void
1143 unp_scan(m0, op)
1144 register struct mbuf *m0;
1145 void (*op) __P((struct file *));
1146 {
1147 register struct mbuf *m;
1148 register struct file **rp;
1149 register struct cmsghdr *cm;
1150 register int i;
1151 int qfds;
1152
1153 while (m0) {
1154 for (m = m0; m; m = m->m_next)
1155 if (m->m_type == MT_CONTROL &&
1156 m->m_len >= sizeof(*cm)) {
1157 cm = mtod(m, struct cmsghdr *);
1158 if (cm->cmsg_level != SOL_SOCKET ||
1159 cm->cmsg_type != SCM_RIGHTS)
1160 continue;
1161 qfds = (cm->cmsg_len - sizeof *cm)
1162 / sizeof (struct file *);
1163 rp = (struct file **)(cm + 1);
1164 for (i = 0; i < qfds; i++)
1165 (*op)(*rp++);
1166 break; /* XXX, but saves time */
1167 }
1168 m0 = m0->m_act;
1169 }
1170 }
1171
1172 static void
1173 unp_mark(fp)
1174 struct file *fp;
1175 {
1176
1177 if (fp->f_flag & FMARK)
1178 return;
1179 unp_defer++;
1180 fp->f_flag |= (FMARK|FDEFER);
1181 }
1182
1183 static void
1184 unp_discard(fp)
1185 struct file *fp;
1186 {
1187
1188 fp->f_msgcount--;
1189 unp_rights--;
1190 (void) closef(fp, (struct proc *)NULL);
1191 }
Cache object: e02fb7904faf3804365d81a10830979d
|