1 /*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/6.0/sys/kern/uipc_syscalls.c 147784 2005-07-05 22:49:10Z rwatson $");
37
38 #include "opt_compat.h"
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mac.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77
78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80
81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 int compat);
85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 int compat);
87
88 /*
89 * NSFBUFS-related variables and associated sysctls
90 */
91 int nsfbufs;
92 int nsfbufspeak;
93 int nsfbufsused;
94
95 SYSCTL_DECL(_kern_ipc);
96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
97 "Maximum number of sendfile(2) sf_bufs available");
98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
99 "Number of sendfile(2) sf_bufs at peak usage");
100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
101 "Number of sendfile(2) sf_bufs in use");
102
103 /*
104 * Convert a user file descriptor to a kernel file entry. A reference on the
105 * file entry is held upon returning. This is lighter weight than
106 * fgetsock(), which bumps the socket reference drops the file reference
107 * count instead, as this approach avoids several additional mutex operations
108 * associated with the additional reference count.
109 */
110 static int
111 getsock(struct filedesc *fdp, int fd, struct file **fpp)
112 {
113 struct file *fp;
114 int error;
115
116 fp = NULL;
117 if (fdp == NULL)
118 error = EBADF;
119 else {
120 FILEDESC_LOCK_FAST(fdp);
121 fp = fget_locked(fdp, fd);
122 if (fp == NULL)
123 error = EBADF;
124 else if (fp->f_type != DTYPE_SOCKET) {
125 fp = NULL;
126 error = ENOTSOCK;
127 } else {
128 fhold(fp);
129 error = 0;
130 }
131 FILEDESC_UNLOCK_FAST(fdp);
132 }
133 *fpp = fp;
134 return (error);
135 }
136
137 /*
138 * System call interface to the socket abstraction.
139 */
140 #if defined(COMPAT_43)
141 #define COMPAT_OLDSOCK
142 #endif
143
144 /*
145 * MPSAFE
146 */
147 int
148 socket(td, uap)
149 struct thread *td;
150 register struct socket_args /* {
151 int domain;
152 int type;
153 int protocol;
154 } */ *uap;
155 {
156 struct filedesc *fdp;
157 struct socket *so;
158 struct file *fp;
159 int fd, error;
160
161 #ifdef MAC
162 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
163 uap->protocol);
164 if (error)
165 return (error);
166 #endif
167 fdp = td->td_proc->p_fd;
168 error = falloc(td, &fp, &fd);
169 if (error)
170 return (error);
171 /* An extra reference on `fp' has been held for us by falloc(). */
172 NET_LOCK_GIANT();
173 error = socreate(uap->domain, &so, uap->type, uap->protocol,
174 td->td_ucred, td);
175 NET_UNLOCK_GIANT();
176 if (error) {
177 fdclose(fdp, fp, fd, td);
178 } else {
179 FILEDESC_LOCK_FAST(fdp);
180 fp->f_data = so; /* already has ref count */
181 fp->f_flag = FREAD|FWRITE;
182 fp->f_ops = &socketops;
183 fp->f_type = DTYPE_SOCKET;
184 FILEDESC_UNLOCK_FAST(fdp);
185 td->td_retval[0] = fd;
186 }
187 fdrop(fp, td);
188 return (error);
189 }
190
191 /*
192 * MPSAFE
193 */
194 /* ARGSUSED */
195 int
196 bind(td, uap)
197 struct thread *td;
198 register struct bind_args /* {
199 int s;
200 caddr_t name;
201 int namelen;
202 } */ *uap;
203 {
204 struct sockaddr *sa;
205 int error;
206
207 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
208 return (error);
209
210 return (kern_bind(td, uap->s, sa));
211 }
212
213 int
214 kern_bind(td, fd, sa)
215 struct thread *td;
216 int fd;
217 struct sockaddr *sa;
218 {
219 struct socket *so;
220 struct file *fp;
221 int error;
222
223 NET_LOCK_GIANT();
224 error = getsock(td->td_proc->p_fd, fd, &fp);
225 if (error)
226 goto done2;
227 so = fp->f_data;
228 #ifdef MAC
229 SOCK_LOCK(so);
230 error = mac_check_socket_bind(td->td_ucred, so, sa);
231 SOCK_UNLOCK(so);
232 if (error)
233 goto done1;
234 #endif
235 error = sobind(so, sa, td);
236 #ifdef MAC
237 done1:
238 #endif
239 fdrop(fp, td);
240 done2:
241 NET_UNLOCK_GIANT();
242 FREE(sa, M_SONAME);
243 return (error);
244 }
245
246 /*
247 * MPSAFE
248 */
249 /* ARGSUSED */
250 int
251 listen(td, uap)
252 struct thread *td;
253 register struct listen_args /* {
254 int s;
255 int backlog;
256 } */ *uap;
257 {
258 struct socket *so;
259 struct file *fp;
260 int error;
261
262 NET_LOCK_GIANT();
263 error = getsock(td->td_proc->p_fd, uap->s, &fp);
264 if (error == 0) {
265 so = fp->f_data;
266 #ifdef MAC
267 SOCK_LOCK(so);
268 error = mac_check_socket_listen(td->td_ucred, so);
269 SOCK_UNLOCK(so);
270 if (error)
271 goto done;
272 #endif
273 error = solisten(so, uap->backlog, td);
274 #ifdef MAC
275 done:
276 #endif
277 fdrop(fp, td);
278 }
279 NET_UNLOCK_GIANT();
280 return(error);
281 }
282
283 /*
284 * accept1()
285 * MPSAFE
286 */
287 static int
288 accept1(td, uap, compat)
289 struct thread *td;
290 register struct accept_args /* {
291 int s;
292 struct sockaddr * __restrict name;
293 socklen_t * __restrict anamelen;
294 } */ *uap;
295 int compat;
296 {
297 struct filedesc *fdp;
298 struct file *nfp = NULL;
299 struct sockaddr *sa = NULL;
300 socklen_t namelen;
301 int error;
302 struct socket *head, *so;
303 int fd;
304 u_int fflag;
305 pid_t pgid;
306 int tmp;
307
308 fdp = td->td_proc->p_fd;
309 if (uap->name) {
310 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
311 if(error)
312 return (error);
313 if (namelen < 0)
314 return (EINVAL);
315 }
316 NET_LOCK_GIANT();
317 error = fgetsock(td, uap->s, &head, &fflag);
318 if (error)
319 goto done2;
320 if ((head->so_options & SO_ACCEPTCONN) == 0) {
321 error = EINVAL;
322 goto done;
323 }
324 #ifdef MAC
325 SOCK_LOCK(head);
326 error = mac_check_socket_accept(td->td_ucred, head);
327 SOCK_UNLOCK(head);
328 if (error != 0)
329 goto done;
330 #endif
331 error = falloc(td, &nfp, &fd);
332 if (error)
333 goto done;
334 ACCEPT_LOCK();
335 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
336 ACCEPT_UNLOCK();
337 error = EWOULDBLOCK;
338 goto noconnection;
339 }
340 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
341 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
342 head->so_error = ECONNABORTED;
343 break;
344 }
345 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
346 "accept", 0);
347 if (error) {
348 ACCEPT_UNLOCK();
349 goto noconnection;
350 }
351 }
352 if (head->so_error) {
353 error = head->so_error;
354 head->so_error = 0;
355 ACCEPT_UNLOCK();
356 goto noconnection;
357 }
358 so = TAILQ_FIRST(&head->so_comp);
359 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
360 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
361
362 /*
363 * Before changing the flags on the socket, we have to bump the
364 * reference count. Otherwise, if the protocol calls sofree(),
365 * the socket will be released due to a zero refcount.
366 */
367 SOCK_LOCK(so); /* soref() and so_state update */
368 soref(so); /* file descriptor reference */
369
370 TAILQ_REMOVE(&head->so_comp, so, so_list);
371 head->so_qlen--;
372 so->so_state |= (head->so_state & SS_NBIO);
373 so->so_qstate &= ~SQ_COMP;
374 so->so_head = NULL;
375
376 SOCK_UNLOCK(so);
377 ACCEPT_UNLOCK();
378
379 /* An extra reference on `nfp' has been held for us by falloc(). */
380 td->td_retval[0] = fd;
381
382 /* connection has been removed from the listen queue */
383 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
384
385 pgid = fgetown(&head->so_sigio);
386 if (pgid != 0)
387 fsetown(pgid, &so->so_sigio);
388
389 FILE_LOCK(nfp);
390 nfp->f_data = so; /* nfp has ref count from falloc */
391 nfp->f_flag = fflag;
392 nfp->f_ops = &socketops;
393 nfp->f_type = DTYPE_SOCKET;
394 FILE_UNLOCK(nfp);
395 /* Sync socket nonblocking/async state with file flags */
396 tmp = fflag & FNONBLOCK;
397 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
398 tmp = fflag & FASYNC;
399 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
400 sa = 0;
401 error = soaccept(so, &sa);
402 if (error) {
403 /*
404 * return a namelen of zero for older code which might
405 * ignore the return value from accept.
406 */
407 if (uap->name != NULL) {
408 namelen = 0;
409 (void) copyout(&namelen,
410 uap->anamelen, sizeof(*uap->anamelen));
411 }
412 goto noconnection;
413 }
414 if (sa == NULL) {
415 namelen = 0;
416 if (uap->name)
417 goto gotnoname;
418 error = 0;
419 goto done;
420 }
421 if (uap->name) {
422 /* check sa_len before it is destroyed */
423 if (namelen > sa->sa_len)
424 namelen = sa->sa_len;
425 #ifdef COMPAT_OLDSOCK
426 if (compat)
427 ((struct osockaddr *)sa)->sa_family =
428 sa->sa_family;
429 #endif
430 error = copyout(sa, uap->name, (u_int)namelen);
431 if (!error)
432 gotnoname:
433 error = copyout(&namelen,
434 uap->anamelen, sizeof (*uap->anamelen));
435 }
436 noconnection:
437 if (sa)
438 FREE(sa, M_SONAME);
439
440 /*
441 * close the new descriptor, assuming someone hasn't ripped it
442 * out from under us.
443 */
444 if (error)
445 fdclose(fdp, nfp, fd, td);
446
447 /*
448 * Release explicitly held references before returning.
449 */
450 done:
451 if (nfp != NULL)
452 fdrop(nfp, td);
453 fputsock(head);
454 done2:
455 NET_UNLOCK_GIANT();
456 return (error);
457 }
458
459 /*
460 * MPSAFE (accept1() is MPSAFE)
461 */
462 int
463 accept(td, uap)
464 struct thread *td;
465 struct accept_args *uap;
466 {
467
468 return (accept1(td, uap, 0));
469 }
470
471 #ifdef COMPAT_OLDSOCK
472 /*
473 * MPSAFE (accept1() is MPSAFE)
474 */
475 int
476 oaccept(td, uap)
477 struct thread *td;
478 struct accept_args *uap;
479 {
480
481 return (accept1(td, uap, 1));
482 }
483 #endif /* COMPAT_OLDSOCK */
484
485 /*
486 * MPSAFE
487 */
488 /* ARGSUSED */
489 int
490 connect(td, uap)
491 struct thread *td;
492 register struct connect_args /* {
493 int s;
494 caddr_t name;
495 int namelen;
496 } */ *uap;
497 {
498 struct sockaddr *sa;
499 int error;
500
501 error = getsockaddr(&sa, uap->name, uap->namelen);
502 if (error)
503 return (error);
504
505 return (kern_connect(td, uap->s, sa));
506 }
507
508
509 int
510 kern_connect(td, fd, sa)
511 struct thread *td;
512 int fd;
513 struct sockaddr *sa;
514 {
515 struct socket *so;
516 struct file *fp;
517 int error;
518 int interrupted = 0;
519
520 NET_LOCK_GIANT();
521 error = getsock(td->td_proc->p_fd, fd, &fp);
522 if (error)
523 goto done2;
524 so = fp->f_data;
525 if (so->so_state & SS_ISCONNECTING) {
526 error = EALREADY;
527 goto done1;
528 }
529 #ifdef MAC
530 SOCK_LOCK(so);
531 error = mac_check_socket_connect(td->td_ucred, so, sa);
532 SOCK_UNLOCK(so);
533 if (error)
534 goto bad;
535 #endif
536 error = soconnect(so, sa, td);
537 if (error)
538 goto bad;
539 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
540 error = EINPROGRESS;
541 goto done1;
542 }
543 SOCK_LOCK(so);
544 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
545 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
546 "connec", 0);
547 if (error) {
548 if (error == EINTR || error == ERESTART)
549 interrupted = 1;
550 break;
551 }
552 }
553 if (error == 0) {
554 error = so->so_error;
555 so->so_error = 0;
556 }
557 SOCK_UNLOCK(so);
558 bad:
559 if (!interrupted)
560 so->so_state &= ~SS_ISCONNECTING;
561 if (error == ERESTART)
562 error = EINTR;
563 done1:
564 fdrop(fp, td);
565 done2:
566 NET_UNLOCK_GIANT();
567 FREE(sa, M_SONAME);
568 return (error);
569 }
570
571 /*
572 * MPSAFE
573 */
574 int
575 socketpair(td, uap)
576 struct thread *td;
577 register struct socketpair_args /* {
578 int domain;
579 int type;
580 int protocol;
581 int *rsv;
582 } */ *uap;
583 {
584 register struct filedesc *fdp = td->td_proc->p_fd;
585 struct file *fp1, *fp2;
586 struct socket *so1, *so2;
587 int fd, error, sv[2];
588
589 #ifdef MAC
590 /* We might want to have a separate check for socket pairs. */
591 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
592 uap->protocol);
593 if (error)
594 return (error);
595 #endif
596
597 NET_LOCK_GIANT();
598 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
599 td->td_ucred, td);
600 if (error)
601 goto done2;
602 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
603 td->td_ucred, td);
604 if (error)
605 goto free1;
606 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
607 error = falloc(td, &fp1, &fd);
608 if (error)
609 goto free2;
610 sv[0] = fd;
611 fp1->f_data = so1; /* so1 already has ref count */
612 error = falloc(td, &fp2, &fd);
613 if (error)
614 goto free3;
615 fp2->f_data = so2; /* so2 already has ref count */
616 sv[1] = fd;
617 error = soconnect2(so1, so2);
618 if (error)
619 goto free4;
620 if (uap->type == SOCK_DGRAM) {
621 /*
622 * Datagram socket connection is asymmetric.
623 */
624 error = soconnect2(so2, so1);
625 if (error)
626 goto free4;
627 }
628 FILE_LOCK(fp1);
629 fp1->f_flag = FREAD|FWRITE;
630 fp1->f_ops = &socketops;
631 fp1->f_type = DTYPE_SOCKET;
632 FILE_UNLOCK(fp1);
633 FILE_LOCK(fp2);
634 fp2->f_flag = FREAD|FWRITE;
635 fp2->f_ops = &socketops;
636 fp2->f_type = DTYPE_SOCKET;
637 FILE_UNLOCK(fp2);
638 error = copyout(sv, uap->rsv, 2 * sizeof (int));
639 fdrop(fp1, td);
640 fdrop(fp2, td);
641 goto done2;
642 free4:
643 fdclose(fdp, fp2, sv[1], td);
644 fdrop(fp2, td);
645 free3:
646 fdclose(fdp, fp1, sv[0], td);
647 fdrop(fp1, td);
648 free2:
649 (void)soclose(so2);
650 free1:
651 (void)soclose(so1);
652 done2:
653 NET_UNLOCK_GIANT();
654 return (error);
655 }
656
657 static int
658 sendit(td, s, mp, flags)
659 register struct thread *td;
660 int s;
661 register struct msghdr *mp;
662 int flags;
663 {
664 struct mbuf *control;
665 struct sockaddr *to;
666 int error;
667
668 if (mp->msg_name != NULL) {
669 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
670 if (error) {
671 to = NULL;
672 goto bad;
673 }
674 mp->msg_name = to;
675 } else {
676 to = NULL;
677 }
678
679 if (mp->msg_control) {
680 if (mp->msg_controllen < sizeof(struct cmsghdr)
681 #ifdef COMPAT_OLDSOCK
682 && mp->msg_flags != MSG_COMPAT
683 #endif
684 ) {
685 error = EINVAL;
686 goto bad;
687 }
688 error = sockargs(&control, mp->msg_control,
689 mp->msg_controllen, MT_CONTROL);
690 if (error)
691 goto bad;
692 #ifdef COMPAT_OLDSOCK
693 if (mp->msg_flags == MSG_COMPAT) {
694 register struct cmsghdr *cm;
695
696 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
697 if (control == 0) {
698 error = ENOBUFS;
699 goto bad;
700 } else {
701 cm = mtod(control, struct cmsghdr *);
702 cm->cmsg_len = control->m_len;
703 cm->cmsg_level = SOL_SOCKET;
704 cm->cmsg_type = SCM_RIGHTS;
705 }
706 }
707 #endif
708 } else {
709 control = NULL;
710 }
711
712 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
713
714 bad:
715 if (to)
716 FREE(to, M_SONAME);
717 return (error);
718 }
719
720 int
721 kern_sendit(td, s, mp, flags, control, segflg)
722 struct thread *td;
723 int s;
724 struct msghdr *mp;
725 int flags;
726 struct mbuf *control;
727 enum uio_seg segflg;
728 {
729 struct file *fp;
730 struct uio auio;
731 struct iovec *iov;
732 struct socket *so;
733 int i;
734 int len, error;
735 #ifdef KTRACE
736 struct uio *ktruio = NULL;
737 #endif
738
739 NET_LOCK_GIANT();
740 error = getsock(td->td_proc->p_fd, s, &fp);
741 if (error)
742 goto bad2;
743 so = (struct socket *)fp->f_data;
744
745 #ifdef MAC
746 SOCK_LOCK(so);
747 error = mac_check_socket_send(td->td_ucred, so);
748 SOCK_UNLOCK(so);
749 if (error)
750 goto bad;
751 #endif
752
753 auio.uio_iov = mp->msg_iov;
754 auio.uio_iovcnt = mp->msg_iovlen;
755 auio.uio_segflg = segflg;
756 auio.uio_rw = UIO_WRITE;
757 auio.uio_td = td;
758 auio.uio_offset = 0; /* XXX */
759 auio.uio_resid = 0;
760 iov = mp->msg_iov;
761 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
762 if ((auio.uio_resid += iov->iov_len) < 0) {
763 error = EINVAL;
764 goto bad;
765 }
766 }
767 #ifdef KTRACE
768 if (KTRPOINT(td, KTR_GENIO))
769 ktruio = cloneuio(&auio);
770 #endif
771 len = auio.uio_resid;
772 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
773 0, control, flags, td);
774 if (error) {
775 if (auio.uio_resid != len && (error == ERESTART ||
776 error == EINTR || error == EWOULDBLOCK))
777 error = 0;
778 /* Generation of SIGPIPE can be controlled per socket */
779 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
780 !(flags & MSG_NOSIGNAL)) {
781 PROC_LOCK(td->td_proc);
782 psignal(td->td_proc, SIGPIPE);
783 PROC_UNLOCK(td->td_proc);
784 }
785 }
786 if (error == 0)
787 td->td_retval[0] = len - auio.uio_resid;
788 #ifdef KTRACE
789 if (ktruio != NULL) {
790 ktruio->uio_resid = td->td_retval[0];
791 ktrgenio(s, UIO_WRITE, ktruio, error);
792 }
793 #endif
794 bad:
795 fdrop(fp, td);
796 bad2:
797 NET_UNLOCK_GIANT();
798 return (error);
799 }
800
801 /*
802 * MPSAFE
803 */
804 int
805 sendto(td, uap)
806 struct thread *td;
807 register struct sendto_args /* {
808 int s;
809 caddr_t buf;
810 size_t len;
811 int flags;
812 caddr_t to;
813 int tolen;
814 } */ *uap;
815 {
816 struct msghdr msg;
817 struct iovec aiov;
818 int error;
819
820 msg.msg_name = uap->to;
821 msg.msg_namelen = uap->tolen;
822 msg.msg_iov = &aiov;
823 msg.msg_iovlen = 1;
824 msg.msg_control = 0;
825 #ifdef COMPAT_OLDSOCK
826 msg.msg_flags = 0;
827 #endif
828 aiov.iov_base = uap->buf;
829 aiov.iov_len = uap->len;
830 error = sendit(td, uap->s, &msg, uap->flags);
831 return (error);
832 }
833
834 #ifdef COMPAT_OLDSOCK
835 /*
836 * MPSAFE
837 */
838 int
839 osend(td, uap)
840 struct thread *td;
841 register struct osend_args /* {
842 int s;
843 caddr_t buf;
844 int len;
845 int flags;
846 } */ *uap;
847 {
848 struct msghdr msg;
849 struct iovec aiov;
850 int error;
851
852 msg.msg_name = 0;
853 msg.msg_namelen = 0;
854 msg.msg_iov = &aiov;
855 msg.msg_iovlen = 1;
856 aiov.iov_base = uap->buf;
857 aiov.iov_len = uap->len;
858 msg.msg_control = 0;
859 msg.msg_flags = 0;
860 error = sendit(td, uap->s, &msg, uap->flags);
861 return (error);
862 }
863
864 /*
865 * MPSAFE
866 */
867 int
868 osendmsg(td, uap)
869 struct thread *td;
870 struct osendmsg_args /* {
871 int s;
872 caddr_t msg;
873 int flags;
874 } */ *uap;
875 {
876 struct msghdr msg;
877 struct iovec *iov;
878 int error;
879
880 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
881 if (error)
882 return (error);
883 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
884 if (error)
885 return (error);
886 msg.msg_iov = iov;
887 msg.msg_flags = MSG_COMPAT;
888 error = sendit(td, uap->s, &msg, uap->flags);
889 free(iov, M_IOV);
890 return (error);
891 }
892 #endif
893
894 /*
895 * MPSAFE
896 */
897 int
898 sendmsg(td, uap)
899 struct thread *td;
900 struct sendmsg_args /* {
901 int s;
902 caddr_t msg;
903 int flags;
904 } */ *uap;
905 {
906 struct msghdr msg;
907 struct iovec *iov;
908 int error;
909
910 error = copyin(uap->msg, &msg, sizeof (msg));
911 if (error)
912 return (error);
913 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
914 if (error)
915 return (error);
916 msg.msg_iov = iov;
917 #ifdef COMPAT_OLDSOCK
918 msg.msg_flags = 0;
919 #endif
920 error = sendit(td, uap->s, &msg, uap->flags);
921 free(iov, M_IOV);
922 return (error);
923 }
924
925 static int
926 recvit(td, s, mp, namelenp)
927 struct thread *td;
928 int s;
929 struct msghdr *mp;
930 void *namelenp;
931 {
932 struct uio auio;
933 struct iovec *iov;
934 int i;
935 socklen_t len;
936 int error;
937 struct mbuf *m, *control = 0;
938 caddr_t ctlbuf;
939 struct file *fp;
940 struct socket *so;
941 struct sockaddr *fromsa = 0;
942 #ifdef KTRACE
943 struct uio *ktruio = NULL;
944 #endif
945
946 NET_LOCK_GIANT();
947 error = getsock(td->td_proc->p_fd, s, &fp);
948 if (error) {
949 NET_UNLOCK_GIANT();
950 return (error);
951 }
952 so = fp->f_data;
953
954 #ifdef MAC
955 SOCK_LOCK(so);
956 error = mac_check_socket_receive(td->td_ucred, so);
957 SOCK_UNLOCK(so);
958 if (error) {
959 fdrop(fp, td);
960 NET_UNLOCK_GIANT();
961 return (error);
962 }
963 #endif
964
965 auio.uio_iov = mp->msg_iov;
966 auio.uio_iovcnt = mp->msg_iovlen;
967 auio.uio_segflg = UIO_USERSPACE;
968 auio.uio_rw = UIO_READ;
969 auio.uio_td = td;
970 auio.uio_offset = 0; /* XXX */
971 auio.uio_resid = 0;
972 iov = mp->msg_iov;
973 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
974 if ((auio.uio_resid += iov->iov_len) < 0) {
975 fdrop(fp, td);
976 NET_UNLOCK_GIANT();
977 return (EINVAL);
978 }
979 }
980 #ifdef KTRACE
981 if (KTRPOINT(td, KTR_GENIO))
982 ktruio = cloneuio(&auio);
983 #endif
984 len = auio.uio_resid;
985 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
986 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
987 &mp->msg_flags);
988 if (error) {
989 if (auio.uio_resid != (int)len && (error == ERESTART ||
990 error == EINTR || error == EWOULDBLOCK))
991 error = 0;
992 }
993 #ifdef KTRACE
994 if (ktruio != NULL) {
995 ktruio->uio_resid = (int)len - auio.uio_resid;
996 ktrgenio(s, UIO_READ, ktruio, error);
997 }
998 #endif
999 if (error)
1000 goto out;
1001 td->td_retval[0] = (int)len - auio.uio_resid;
1002 if (mp->msg_name) {
1003 len = mp->msg_namelen;
1004 if (len <= 0 || fromsa == 0)
1005 len = 0;
1006 else {
1007 /* save sa_len before it is destroyed by MSG_COMPAT */
1008 len = MIN(len, fromsa->sa_len);
1009 #ifdef COMPAT_OLDSOCK
1010 if (mp->msg_flags & MSG_COMPAT)
1011 ((struct osockaddr *)fromsa)->sa_family =
1012 fromsa->sa_family;
1013 #endif
1014 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1015 if (error)
1016 goto out;
1017 }
1018 mp->msg_namelen = len;
1019 if (namelenp &&
1020 (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
1021 #ifdef COMPAT_OLDSOCK
1022 if (mp->msg_flags & MSG_COMPAT)
1023 error = 0; /* old recvfrom didn't check */
1024 else
1025 #endif
1026 goto out;
1027 }
1028 }
1029 if (mp->msg_control) {
1030 #ifdef COMPAT_OLDSOCK
1031 /*
1032 * We assume that old recvmsg calls won't receive access
1033 * rights and other control info, esp. as control info
1034 * is always optional and those options didn't exist in 4.3.
1035 * If we receive rights, trim the cmsghdr; anything else
1036 * is tossed.
1037 */
1038 if (control && mp->msg_flags & MSG_COMPAT) {
1039 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1040 SOL_SOCKET ||
1041 mtod(control, struct cmsghdr *)->cmsg_type !=
1042 SCM_RIGHTS) {
1043 mp->msg_controllen = 0;
1044 goto out;
1045 }
1046 control->m_len -= sizeof (struct cmsghdr);
1047 control->m_data += sizeof (struct cmsghdr);
1048 }
1049 #endif
1050 len = mp->msg_controllen;
1051 m = control;
1052 mp->msg_controllen = 0;
1053 ctlbuf = mp->msg_control;
1054
1055 while (m && len > 0) {
1056 unsigned int tocopy;
1057
1058 if (len >= m->m_len)
1059 tocopy = m->m_len;
1060 else {
1061 mp->msg_flags |= MSG_CTRUNC;
1062 tocopy = len;
1063 }
1064
1065 if ((error = copyout(mtod(m, caddr_t),
1066 ctlbuf, tocopy)) != 0)
1067 goto out;
1068
1069 ctlbuf += tocopy;
1070 len -= tocopy;
1071 m = m->m_next;
1072 }
1073 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1074 }
1075 out:
1076 fdrop(fp, td);
1077 NET_UNLOCK_GIANT();
1078 if (fromsa)
1079 FREE(fromsa, M_SONAME);
1080 if (control)
1081 m_freem(control);
1082 return (error);
1083 }
1084
1085 /*
1086 * MPSAFE
1087 */
1088 int
1089 recvfrom(td, uap)
1090 struct thread *td;
1091 register struct recvfrom_args /* {
1092 int s;
1093 caddr_t buf;
1094 size_t len;
1095 int flags;
1096 struct sockaddr * __restrict from;
1097 socklen_t * __restrict fromlenaddr;
1098 } */ *uap;
1099 {
1100 struct msghdr msg;
1101 struct iovec aiov;
1102 int error;
1103
1104 if (uap->fromlenaddr) {
1105 error = copyin(uap->fromlenaddr,
1106 &msg.msg_namelen, sizeof (msg.msg_namelen));
1107 if (error)
1108 goto done2;
1109 } else {
1110 msg.msg_namelen = 0;
1111 }
1112 msg.msg_name = uap->from;
1113 msg.msg_iov = &aiov;
1114 msg.msg_iovlen = 1;
1115 aiov.iov_base = uap->buf;
1116 aiov.iov_len = uap->len;
1117 msg.msg_control = 0;
1118 msg.msg_flags = uap->flags;
1119 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1120 done2:
1121 return(error);
1122 }
1123
1124 #ifdef COMPAT_OLDSOCK
1125 /*
1126 * MPSAFE
1127 */
1128 int
1129 orecvfrom(td, uap)
1130 struct thread *td;
1131 struct recvfrom_args *uap;
1132 {
1133
1134 uap->flags |= MSG_COMPAT;
1135 return (recvfrom(td, uap));
1136 }
1137 #endif
1138
1139
1140 #ifdef COMPAT_OLDSOCK
1141 /*
1142 * MPSAFE
1143 */
1144 int
1145 orecv(td, uap)
1146 struct thread *td;
1147 register struct orecv_args /* {
1148 int s;
1149 caddr_t buf;
1150 int len;
1151 int flags;
1152 } */ *uap;
1153 {
1154 struct msghdr msg;
1155 struct iovec aiov;
1156 int error;
1157
1158 msg.msg_name = 0;
1159 msg.msg_namelen = 0;
1160 msg.msg_iov = &aiov;
1161 msg.msg_iovlen = 1;
1162 aiov.iov_base = uap->buf;
1163 aiov.iov_len = uap->len;
1164 msg.msg_control = 0;
1165 msg.msg_flags = uap->flags;
1166 error = recvit(td, uap->s, &msg, NULL);
1167 return (error);
1168 }
1169
1170 /*
1171 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1172 * overlays the new one, missing only the flags, and with the (old) access
1173 * rights where the control fields are now.
1174 *
1175 * MPSAFE
1176 */
1177 int
1178 orecvmsg(td, uap)
1179 struct thread *td;
1180 struct orecvmsg_args /* {
1181 int s;
1182 struct omsghdr *msg;
1183 int flags;
1184 } */ *uap;
1185 {
1186 struct msghdr msg;
1187 struct iovec *iov;
1188 int error;
1189
1190 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1191 if (error)
1192 return (error);
1193 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1194 if (error)
1195 return (error);
1196 msg.msg_flags = uap->flags | MSG_COMPAT;
1197 msg.msg_iov = iov;
1198 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1199 if (msg.msg_controllen && error == 0)
1200 error = copyout(&msg.msg_controllen,
1201 &uap->msg->msg_accrightslen, sizeof (int));
1202 free(iov, M_IOV);
1203 return (error);
1204 }
1205 #endif
1206
1207 /*
1208 * MPSAFE
1209 */
1210 int
1211 recvmsg(td, uap)
1212 struct thread *td;
1213 struct recvmsg_args /* {
1214 int s;
1215 struct msghdr *msg;
1216 int flags;
1217 } */ *uap;
1218 {
1219 struct msghdr msg;
1220 struct iovec *uiov, *iov;
1221 int error;
1222
1223 error = copyin(uap->msg, &msg, sizeof (msg));
1224 if (error)
1225 return (error);
1226 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1227 if (error)
1228 return (error);
1229 msg.msg_flags = uap->flags;
1230 #ifdef COMPAT_OLDSOCK
1231 msg.msg_flags &= ~MSG_COMPAT;
1232 #endif
1233 uiov = msg.msg_iov;
1234 msg.msg_iov = iov;
1235 error = recvit(td, uap->s, &msg, NULL);
1236 if (error == 0) {
1237 msg.msg_iov = uiov;
1238 error = copyout(&msg, uap->msg, sizeof(msg));
1239 }
1240 free(iov, M_IOV);
1241 return (error);
1242 }
1243
1244 /*
1245 * MPSAFE
1246 */
1247 /* ARGSUSED */
1248 int
1249 shutdown(td, uap)
1250 struct thread *td;
1251 register struct shutdown_args /* {
1252 int s;
1253 int how;
1254 } */ *uap;
1255 {
1256 struct socket *so;
1257 struct file *fp;
1258 int error;
1259
1260 NET_LOCK_GIANT();
1261 error = getsock(td->td_proc->p_fd, uap->s, &fp);
1262 if (error == 0) {
1263 so = fp->f_data;
1264 error = soshutdown(so, uap->how);
1265 fdrop(fp, td);
1266 }
1267 NET_UNLOCK_GIANT();
1268 return (error);
1269 }
1270
1271 /*
1272 * MPSAFE
1273 */
1274 /* ARGSUSED */
1275 int
1276 setsockopt(td, uap)
1277 struct thread *td;
1278 register struct setsockopt_args /* {
1279 int s;
1280 int level;
1281 int name;
1282 caddr_t val;
1283 int valsize;
1284 } */ *uap;
1285 {
1286
1287 return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1288 uap->val, UIO_USERSPACE, uap->valsize));
1289 }
1290
1291 int
1292 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1293 struct thread *td;
1294 int s;
1295 int level;
1296 int name;
1297 void *val;
1298 enum uio_seg valseg;
1299 socklen_t valsize;
1300 {
1301 int error;
1302 struct socket *so;
1303 struct file *fp;
1304 struct sockopt sopt;
1305
1306 if (val == NULL && valsize != 0)
1307 return (EFAULT);
1308 if (valsize < 0)
1309 return (EINVAL);
1310
1311 sopt.sopt_dir = SOPT_SET;
1312 sopt.sopt_level = level;
1313 sopt.sopt_name = name;
1314 sopt.sopt_val = val;
1315 sopt.sopt_valsize = valsize;
1316 switch (valseg) {
1317 case UIO_USERSPACE:
1318 sopt.sopt_td = td;
1319 break;
1320 case UIO_SYSSPACE:
1321 sopt.sopt_td = NULL;
1322 break;
1323 default:
1324 panic("kern_setsockopt called with bad valseg");
1325 }
1326
1327 NET_LOCK_GIANT();
1328 error = getsock(td->td_proc->p_fd, s, &fp);
1329 if (error == 0) {
1330 so = fp->f_data;
1331 error = sosetopt(so, &sopt);
1332 fdrop(fp, td);
1333 }
1334 NET_UNLOCK_GIANT();
1335 return(error);
1336 }
1337
1338 /*
1339 * MPSAFE
1340 */
1341 /* ARGSUSED */
1342 int
1343 getsockopt(td, uap)
1344 struct thread *td;
1345 register struct getsockopt_args /* {
1346 int s;
1347 int level;
1348 int name;
1349 void * __restrict val;
1350 socklen_t * __restrict avalsize;
1351 } */ *uap;
1352 {
1353 socklen_t valsize;
1354 int error;
1355
1356 if (uap->val) {
1357 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1358 if (error)
1359 return (error);
1360 }
1361
1362 error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1363 uap->val, UIO_USERSPACE, &valsize);
1364
1365 if (error == 0)
1366 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1367 return (error);
1368 }
1369
1370 /*
1371 * Kernel version of getsockopt.
1372 * optval can be a userland or userspace. optlen is always a kernel pointer.
1373 */
1374 int
1375 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1376 struct thread *td;
1377 int s;
1378 int level;
1379 int name;
1380 void *val;
1381 enum uio_seg valseg;
1382 socklen_t *valsize;
1383 {
1384 int error;
1385 struct socket *so;
1386 struct file *fp;
1387 struct sockopt sopt;
1388
1389 if (val == NULL)
1390 *valsize = 0;
1391 if (*valsize < 0)
1392 return (EINVAL);
1393
1394 sopt.sopt_dir = SOPT_GET;
1395 sopt.sopt_level = level;
1396 sopt.sopt_name = name;
1397 sopt.sopt_val = val;
1398 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1399 switch (valseg) {
1400 case UIO_USERSPACE:
1401 sopt.sopt_td = td;
1402 break;
1403 case UIO_SYSSPACE:
1404 sopt.sopt_td = NULL;
1405 break;
1406 default:
1407 panic("kern_getsockopt called with bad valseg");
1408 }
1409
1410 NET_LOCK_GIANT();
1411 error = getsock(td->td_proc->p_fd, s, &fp);
1412 if (error == 0) {
1413 so = fp->f_data;
1414 error = sogetopt(so, &sopt);
1415 *valsize = sopt.sopt_valsize;
1416 fdrop(fp, td);
1417 }
1418 NET_UNLOCK_GIANT();
1419 return (error);
1420 }
1421
1422 /*
1423 * getsockname1() - Get socket name.
1424 *
1425 * MPSAFE
1426 */
1427 /* ARGSUSED */
1428 static int
1429 getsockname1(td, uap, compat)
1430 struct thread *td;
1431 register struct getsockname_args /* {
1432 int fdes;
1433 struct sockaddr * __restrict asa;
1434 socklen_t * __restrict alen;
1435 } */ *uap;
1436 int compat;
1437 {
1438 struct socket *so;
1439 struct sockaddr *sa;
1440 struct file *fp;
1441 socklen_t len;
1442 int error;
1443
1444 NET_LOCK_GIANT();
1445 error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1446 if (error)
1447 goto done2;
1448 so = fp->f_data;
1449 error = copyin(uap->alen, &len, sizeof (len));
1450 if (error)
1451 goto done1;
1452 if (len < 0) {
1453 error = EINVAL;
1454 goto done1;
1455 }
1456 sa = 0;
1457 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1458 if (error)
1459 goto bad;
1460 if (sa == 0) {
1461 len = 0;
1462 goto gotnothing;
1463 }
1464
1465 len = MIN(len, sa->sa_len);
1466 #ifdef COMPAT_OLDSOCK
1467 if (compat)
1468 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1469 #endif
1470 error = copyout(sa, uap->asa, (u_int)len);
1471 if (error == 0)
1472 gotnothing:
1473 error = copyout(&len, uap->alen, sizeof (len));
1474 bad:
1475 if (sa)
1476 FREE(sa, M_SONAME);
1477 done1:
1478 fdrop(fp, td);
1479 done2:
1480 NET_UNLOCK_GIANT();
1481 return (error);
1482 }
1483
1484 /*
1485 * MPSAFE
1486 */
1487 int
1488 getsockname(td, uap)
1489 struct thread *td;
1490 struct getsockname_args *uap;
1491 {
1492
1493 return (getsockname1(td, uap, 0));
1494 }
1495
1496 #ifdef COMPAT_OLDSOCK
1497 /*
1498 * MPSAFE
1499 */
1500 int
1501 ogetsockname(td, uap)
1502 struct thread *td;
1503 struct getsockname_args *uap;
1504 {
1505
1506 return (getsockname1(td, uap, 1));
1507 }
1508 #endif /* COMPAT_OLDSOCK */
1509
1510 /*
1511 * getpeername1() - Get name of peer for connected socket.
1512 *
1513 * MPSAFE
1514 */
1515 /* ARGSUSED */
1516 static int
1517 getpeername1(td, uap, compat)
1518 struct thread *td;
1519 register struct getpeername_args /* {
1520 int fdes;
1521 struct sockaddr * __restrict asa;
1522 socklen_t * __restrict alen;
1523 } */ *uap;
1524 int compat;
1525 {
1526 struct socket *so;
1527 struct sockaddr *sa;
1528 struct file *fp;
1529 socklen_t len;
1530 int error;
1531
1532 NET_LOCK_GIANT();
1533 error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1534 if (error)
1535 goto done2;
1536 so = fp->f_data;
1537 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1538 error = ENOTCONN;
1539 goto done1;
1540 }
1541 error = copyin(uap->alen, &len, sizeof (len));
1542 if (error)
1543 goto done1;
1544 if (len < 0) {
1545 error = EINVAL;
1546 goto done1;
1547 }
1548 sa = 0;
1549 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1550 if (error)
1551 goto bad;
1552 if (sa == 0) {
1553 len = 0;
1554 goto gotnothing;
1555 }
1556 len = MIN(len, sa->sa_len);
1557 #ifdef COMPAT_OLDSOCK
1558 if (compat)
1559 ((struct osockaddr *)sa)->sa_family =
1560 sa->sa_family;
1561 #endif
1562 error = copyout(sa, uap->asa, (u_int)len);
1563 if (error)
1564 goto bad;
1565 gotnothing:
1566 error = copyout(&len, uap->alen, sizeof (len));
1567 bad:
1568 if (sa)
1569 FREE(sa, M_SONAME);
1570 done1:
1571 fdrop(fp, td);
1572 done2:
1573 NET_UNLOCK_GIANT();
1574 return (error);
1575 }
1576
1577 /*
1578 * MPSAFE
1579 */
1580 int
1581 getpeername(td, uap)
1582 struct thread *td;
1583 struct getpeername_args *uap;
1584 {
1585
1586 return (getpeername1(td, uap, 0));
1587 }
1588
1589 #ifdef COMPAT_OLDSOCK
1590 /*
1591 * MPSAFE
1592 */
1593 int
1594 ogetpeername(td, uap)
1595 struct thread *td;
1596 struct ogetpeername_args *uap;
1597 {
1598
1599 /* XXX uap should have type `getpeername_args *' to begin with. */
1600 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1601 }
1602 #endif /* COMPAT_OLDSOCK */
1603
1604 int
1605 sockargs(mp, buf, buflen, type)
1606 struct mbuf **mp;
1607 caddr_t buf;
1608 int buflen, type;
1609 {
1610 register struct sockaddr *sa;
1611 register struct mbuf *m;
1612 int error;
1613
1614 if ((u_int)buflen > MLEN) {
1615 #ifdef COMPAT_OLDSOCK
1616 if (type == MT_SONAME && (u_int)buflen <= 112)
1617 buflen = MLEN; /* unix domain compat. hack */
1618 else
1619 #endif
1620 if ((u_int)buflen > MCLBYTES)
1621 return (EINVAL);
1622 }
1623 m = m_get(M_TRYWAIT, type);
1624 if (m == NULL)
1625 return (ENOBUFS);
1626 if ((u_int)buflen > MLEN) {
1627 MCLGET(m, M_TRYWAIT);
1628 if ((m->m_flags & M_EXT) == 0) {
1629 m_free(m);
1630 return (ENOBUFS);
1631 }
1632 }
1633 m->m_len = buflen;
1634 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1635 if (error)
1636 (void) m_free(m);
1637 else {
1638 *mp = m;
1639 if (type == MT_SONAME) {
1640 sa = mtod(m, struct sockaddr *);
1641
1642 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1643 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1644 sa->sa_family = sa->sa_len;
1645 #endif
1646 sa->sa_len = buflen;
1647 }
1648 }
1649 return (error);
1650 }
1651
1652 int
1653 getsockaddr(namp, uaddr, len)
1654 struct sockaddr **namp;
1655 caddr_t uaddr;
1656 size_t len;
1657 {
1658 struct sockaddr *sa;
1659 int error;
1660
1661 if (len > SOCK_MAXADDRLEN)
1662 return (ENAMETOOLONG);
1663 if (len < offsetof(struct sockaddr, sa_data[0]))
1664 return (EINVAL);
1665 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1666 error = copyin(uaddr, sa, len);
1667 if (error) {
1668 FREE(sa, M_SONAME);
1669 } else {
1670 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1671 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1672 sa->sa_family = sa->sa_len;
1673 #endif
1674 sa->sa_len = len;
1675 *namp = sa;
1676 }
1677 return (error);
1678 }
1679
1680 /*
1681 * Detach mapped page and release resources back to the system.
1682 */
1683 void
1684 sf_buf_mext(void *addr, void *args)
1685 {
1686 vm_page_t m;
1687
1688 m = sf_buf_page(args);
1689 sf_buf_free(args);
1690 vm_page_lock_queues();
1691 vm_page_unwire(m, 0);
1692 /*
1693 * Check for the object going away on us. This can
1694 * happen since we don't hold a reference to it.
1695 * If so, we're responsible for freeing the page.
1696 */
1697 if (m->wire_count == 0 && m->object == NULL)
1698 vm_page_free(m);
1699 vm_page_unlock_queues();
1700 }
1701
1702 /*
1703 * sendfile(2)
1704 *
1705 * MPSAFE
1706 *
1707 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1708 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1709 *
1710 * Send a file specified by 'fd' and starting at 'offset' to a socket
1711 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1712 * nbytes == 0. Optionally add a header and/or trailer to the socket
1713 * output. If specified, write the total number of bytes sent into *sbytes.
1714 *
1715 */
1716 int
1717 sendfile(struct thread *td, struct sendfile_args *uap)
1718 {
1719
1720 return (do_sendfile(td, uap, 0));
1721 }
1722
1723 #ifdef COMPAT_FREEBSD4
1724 int
1725 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1726 {
1727 struct sendfile_args args;
1728
1729 args.fd = uap->fd;
1730 args.s = uap->s;
1731 args.offset = uap->offset;
1732 args.nbytes = uap->nbytes;
1733 args.hdtr = uap->hdtr;
1734 args.sbytes = uap->sbytes;
1735 args.flags = uap->flags;
1736
1737 return (do_sendfile(td, &args, 1));
1738 }
1739 #endif /* COMPAT_FREEBSD4 */
1740
1741 static int
1742 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1743 {
1744 struct vnode *vp;
1745 struct vm_object *obj;
1746 struct socket *so = NULL;
1747 struct mbuf *m, *m_header = NULL;
1748 struct sf_buf *sf;
1749 struct vm_page *pg;
1750 struct writev_args nuap;
1751 struct sf_hdtr hdtr;
1752 struct uio *hdr_uio = NULL;
1753 off_t off, xfsize, hdtr_size, sbytes = 0;
1754 int error, headersize = 0, headersent = 0;
1755
1756 mtx_lock(&Giant);
1757
1758 hdtr_size = 0;
1759
1760 /*
1761 * The descriptor must be a regular file and have a backing VM object.
1762 */
1763 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1764 goto done;
1765 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1766 obj = vp->v_object;
1767 VOP_UNLOCK(vp, 0, td);
1768 if (obj == NULL) {
1769 error = EINVAL;
1770 goto done;
1771 }
1772 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1773 goto done;
1774 if (so->so_type != SOCK_STREAM) {
1775 error = EINVAL;
1776 goto done;
1777 }
1778 if ((so->so_state & SS_ISCONNECTED) == 0) {
1779 error = ENOTCONN;
1780 goto done;
1781 }
1782 if (uap->offset < 0) {
1783 error = EINVAL;
1784 goto done;
1785 }
1786
1787 #ifdef MAC
1788 SOCK_LOCK(so);
1789 error = mac_check_socket_send(td->td_ucred, so);
1790 SOCK_UNLOCK(so);
1791 if (error)
1792 goto done;
1793 #endif
1794
1795 /*
1796 * If specified, get the pointer to the sf_hdtr struct for
1797 * any headers/trailers.
1798 */
1799 if (uap->hdtr != NULL) {
1800 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1801 if (error)
1802 goto done;
1803 /*
1804 * Send any headers.
1805 */
1806 if (hdtr.headers != NULL) {
1807 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1808 if (error)
1809 goto done;
1810 hdr_uio->uio_td = td;
1811 hdr_uio->uio_rw = UIO_WRITE;
1812 if (hdr_uio->uio_resid > 0) {
1813 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0);
1814 if (m_header == NULL)
1815 goto done;
1816 headersize = m_header->m_pkthdr.len;
1817 if (compat)
1818 sbytes += headersize;
1819 }
1820 }
1821 }
1822
1823 /*
1824 * Protect against multiple writers to the socket.
1825 */
1826 SOCKBUF_LOCK(&so->so_snd);
1827 (void) sblock(&so->so_snd, M_WAITOK);
1828 SOCKBUF_UNLOCK(&so->so_snd);
1829
1830 /*
1831 * Loop through the pages in the file, starting with the requested
1832 * offset. Get a file page (do I/O if necessary), map the file page
1833 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1834 * it on the socket.
1835 */
1836 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1837 vm_pindex_t pindex;
1838 vm_offset_t pgoff;
1839
1840 pindex = OFF_TO_IDX(off);
1841 VM_OBJECT_LOCK(obj);
1842 retry_lookup:
1843 /*
1844 * Calculate the amount to transfer. Not to exceed a page,
1845 * the EOF, or the passed in nbytes.
1846 */
1847 xfsize = obj->un_pager.vnp.vnp_size - off;
1848 VM_OBJECT_UNLOCK(obj);
1849 if (xfsize > PAGE_SIZE)
1850 xfsize = PAGE_SIZE;
1851 pgoff = (vm_offset_t)(off & PAGE_MASK);
1852 if (PAGE_SIZE - pgoff < xfsize)
1853 xfsize = PAGE_SIZE - pgoff;
1854 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1855 xfsize = uap->nbytes - sbytes;
1856 if (xfsize <= 0) {
1857 if (m_header != NULL) {
1858 m = m_header;
1859 m_header = NULL;
1860 SOCKBUF_LOCK(&so->so_snd);
1861 goto retry_space;
1862 } else
1863 break;
1864 }
1865 /*
1866 * Optimize the non-blocking case by looking at the socket space
1867 * before going to the extra work of constituting the sf_buf.
1868 */
1869 SOCKBUF_LOCK(&so->so_snd);
1870 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1871 if (so->so_snd.sb_state & SBS_CANTSENDMORE)
1872 error = EPIPE;
1873 else
1874 error = EAGAIN;
1875 sbunlock(&so->so_snd);
1876 SOCKBUF_UNLOCK(&so->so_snd);
1877 goto done;
1878 }
1879 SOCKBUF_UNLOCK(&so->so_snd);
1880 VM_OBJECT_LOCK(obj);
1881 /*
1882 * Attempt to look up the page.
1883 *
1884 * Allocate if not found
1885 *
1886 * Wait and loop if busy.
1887 */
1888 pg = vm_page_lookup(obj, pindex);
1889
1890 if (pg == NULL) {
1891 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
1892 VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1893 if (pg == NULL) {
1894 VM_OBJECT_UNLOCK(obj);
1895 VM_WAIT;
1896 VM_OBJECT_LOCK(obj);
1897 goto retry_lookup;
1898 }
1899 vm_page_lock_queues();
1900 } else {
1901 vm_page_lock_queues();
1902 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1903 goto retry_lookup;
1904 /*
1905 * Wire the page so it does not get ripped out from
1906 * under us.
1907 */
1908 vm_page_wire(pg);
1909 }
1910
1911 /*
1912 * If page is not valid for what we need, initiate I/O
1913 */
1914
1915 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1916 VM_OBJECT_UNLOCK(obj);
1917 } else if (uap->flags & SF_NODISKIO) {
1918 error = EBUSY;
1919 } else {
1920 int bsize, resid;
1921
1922 /*
1923 * Ensure that our page is still around when the I/O
1924 * completes.
1925 */
1926 vm_page_io_start(pg);
1927 vm_page_unlock_queues();
1928 VM_OBJECT_UNLOCK(obj);
1929
1930 /*
1931 * Get the page from backing store.
1932 */
1933 bsize = vp->v_mount->mnt_stat.f_iosize;
1934 vn_lock(vp, LK_SHARED | LK_RETRY, td);
1935 /*
1936 * XXXMAC: Because we don't have fp->f_cred here,
1937 * we pass in NOCRED. This is probably wrong, but
1938 * is consistent with our original implementation.
1939 */
1940 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1941 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1942 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
1943 td->td_ucred, NOCRED, &resid, td);
1944 VOP_UNLOCK(vp, 0, td);
1945 VM_OBJECT_LOCK(obj);
1946 vm_page_lock_queues();
1947 vm_page_io_finish(pg);
1948 if (!error)
1949 VM_OBJECT_UNLOCK(obj);
1950 mbstat.sf_iocnt++;
1951 }
1952
1953 if (error) {
1954 vm_page_unwire(pg, 0);
1955 /*
1956 * See if anyone else might know about this page.
1957 * If not and it is not valid, then free it.
1958 */
1959 if (pg->wire_count == 0 && pg->valid == 0 &&
1960 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1961 pg->hold_count == 0) {
1962 vm_page_free(pg);
1963 }
1964 vm_page_unlock_queues();
1965 VM_OBJECT_UNLOCK(obj);
1966 SOCKBUF_LOCK(&so->so_snd);
1967 sbunlock(&so->so_snd);
1968 SOCKBUF_UNLOCK(&so->so_snd);
1969 goto done;
1970 }
1971 vm_page_unlock_queues();
1972
1973 /*
1974 * Get a sendfile buf. We usually wait as long as necessary,
1975 * but this wait can be interrupted.
1976 */
1977 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
1978 mbstat.sf_allocfail++;
1979 vm_page_lock_queues();
1980 vm_page_unwire(pg, 0);
1981 if (pg->wire_count == 0 && pg->object == NULL)
1982 vm_page_free(pg);
1983 vm_page_unlock_queues();
1984 SOCKBUF_LOCK(&so->so_snd);
1985 sbunlock(&so->so_snd);
1986 SOCKBUF_UNLOCK(&so->so_snd);
1987 error = EINTR;
1988 goto done;
1989 }
1990
1991 /*
1992 * Get an mbuf header and set it up as having external storage.
1993 */
1994 if (m_header)
1995 MGET(m, M_TRYWAIT, MT_DATA);
1996 else
1997 MGETHDR(m, M_TRYWAIT, MT_DATA);
1998 if (m == NULL) {
1999 error = ENOBUFS;
2000 sf_buf_mext((void *)sf_buf_kva(sf), sf);
2001 SOCKBUF_LOCK(&so->so_snd);
2002 sbunlock(&so->so_snd);
2003 SOCKBUF_UNLOCK(&so->so_snd);
2004 goto done;
2005 }
2006 /*
2007 * Setup external storage for mbuf.
2008 */
2009 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
2010 EXT_SFBUF);
2011 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
2012 m->m_pkthdr.len = m->m_len = xfsize;
2013
2014 if (m_header) {
2015 m_cat(m_header, m);
2016 m = m_header;
2017 m_header = NULL;
2018 m_fixhdr(m);
2019 }
2020
2021 /*
2022 * Add the buffer to the socket buffer chain.
2023 */
2024 SOCKBUF_LOCK(&so->so_snd);
2025 retry_space:
2026 /*
2027 * Make sure that the socket is still able to take more data.
2028 * CANTSENDMORE being true usually means that the connection
2029 * was closed. so_error is true when an error was sensed after
2030 * a previous send.
2031 * The state is checked after the page mapping and buffer
2032 * allocation above since those operations may block and make
2033 * any socket checks stale. From this point forward, nothing
2034 * blocks before the pru_send (or more accurately, any blocking
2035 * results in a loop back to here to re-check).
2036 */
2037 SOCKBUF_LOCK_ASSERT(&so->so_snd);
2038 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
2039 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2040 error = EPIPE;
2041 } else {
2042 error = so->so_error;
2043 so->so_error = 0;
2044 }
2045 m_freem(m);
2046 sbunlock(&so->so_snd);
2047 SOCKBUF_UNLOCK(&so->so_snd);
2048 goto done;
2049 }
2050 /*
2051 * Wait for socket space to become available. We do this just
2052 * after checking the connection state above in order to avoid
2053 * a race condition with sbwait().
2054 */
2055 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2056 if (so->so_state & SS_NBIO) {
2057 m_freem(m);
2058 sbunlock(&so->so_snd);
2059 SOCKBUF_UNLOCK(&so->so_snd);
2060 error = EAGAIN;
2061 goto done;
2062 }
2063 error = sbwait(&so->so_snd);
2064 /*
2065 * An error from sbwait usually indicates that we've
2066 * been interrupted by a signal. If we've sent anything
2067 * then return bytes sent, otherwise return the error.
2068 */
2069 if (error) {
2070 m_freem(m);
2071 sbunlock(&so->so_snd);
2072 SOCKBUF_UNLOCK(&so->so_snd);
2073 goto done;
2074 }
2075 goto retry_space;
2076 }
2077 SOCKBUF_UNLOCK(&so->so_snd);
2078 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2079 if (error) {
2080 SOCKBUF_LOCK(&so->so_snd);
2081 sbunlock(&so->so_snd);
2082 SOCKBUF_UNLOCK(&so->so_snd);
2083 goto done;
2084 }
2085 headersent = 1;
2086 }
2087 SOCKBUF_LOCK(&so->so_snd);
2088 sbunlock(&so->so_snd);
2089 SOCKBUF_UNLOCK(&so->so_snd);
2090
2091 /*
2092 * Send trailers. Wimp out and use writev(2).
2093 */
2094 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2095 nuap.fd = uap->s;
2096 nuap.iovp = hdtr.trailers;
2097 nuap.iovcnt = hdtr.trl_cnt;
2098 error = writev(td, &nuap);
2099 if (error)
2100 goto done;
2101 if (compat)
2102 sbytes += td->td_retval[0];
2103 else
2104 hdtr_size += td->td_retval[0];
2105 }
2106
2107 done:
2108 if (headersent) {
2109 if (!compat)
2110 hdtr_size += headersize;
2111 } else {
2112 if (compat)
2113 sbytes -= headersize;
2114 }
2115 /*
2116 * If there was no error we have to clear td->td_retval[0]
2117 * because it may have been set by writev.
2118 */
2119 if (error == 0) {
2120 td->td_retval[0] = 0;
2121 }
2122 if (uap->sbytes != NULL) {
2123 if (!compat)
2124 sbytes += hdtr_size;
2125 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2126 }
2127 if (vp)
2128 vrele(vp);
2129 if (so)
2130 fputsock(so);
2131 if (hdr_uio != NULL)
2132 free(hdr_uio, M_IOV);
2133 if (m_header)
2134 m_freem(m_header);
2135
2136 mtx_unlock(&Giant);
2137
2138 if (error == ERESTART)
2139 error = EINTR;
2140
2141 return (error);
2142 }
Cache object: ffc31c511123920e9819d3da5aaf8f7f
|