1 /*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/5.4/sys/kern/uipc_syscalls.c 145335 2005-04-20 19:11:07Z cvs2svn $");
37
38 #include "opt_compat.h"
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mac.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77
78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80
81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 int compat);
85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 int compat);
87
88 /*
89 * NSFBUFS-related variables and associated sysctls
90 */
91 int nsfbufs;
92 int nsfbufspeak;
93 int nsfbufsused;
94
95 SYSCTL_DECL(_kern_ipc);
96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
97 "Maximum number of sendfile(2) sf_bufs available");
98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
99 "Number of sendfile(2) sf_bufs at peak usage");
100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
101 "Number of sendfile(2) sf_bufs in use");
102
103 /*
104 * Convert a user file descriptor to a kernel file entry. A reference on the
105 * file entry is held upon returning. This is lighter weight than
106 * fgetsock(), which bumps the socket reference drops the file reference
107 * count instead, as this approach avoids several additional mutex operations
108 * associated with the additional reference count.
109 */
110 static int
111 getsock(struct filedesc *fdp, int fd, struct file **fpp)
112 {
113 struct file *fp;
114 int error;
115
116 fp = NULL;
117 if (fdp == NULL)
118 error = EBADF;
119 else {
120 FILEDESC_LOCK_FAST(fdp);
121 fp = fget_locked(fdp, fd);
122 if (fp == NULL)
123 error = EBADF;
124 else if (fp->f_type != DTYPE_SOCKET) {
125 fp = NULL;
126 error = ENOTSOCK;
127 } else {
128 fhold(fp);
129 error = 0;
130 }
131 FILEDESC_UNLOCK_FAST(fdp);
132 }
133 *fpp = fp;
134 return (error);
135 }
136
137 /*
138 * System call interface to the socket abstraction.
139 */
140 #if defined(COMPAT_43)
141 #define COMPAT_OLDSOCK
142 #endif
143
144 /*
145 * MPSAFE
146 */
147 int
148 socket(td, uap)
149 struct thread *td;
150 register struct socket_args /* {
151 int domain;
152 int type;
153 int protocol;
154 } */ *uap;
155 {
156 struct filedesc *fdp;
157 struct socket *so;
158 struct file *fp;
159 int fd, error;
160
161 fdp = td->td_proc->p_fd;
162 error = falloc(td, &fp, &fd);
163 if (error)
164 return (error);
165 /* An extra reference on `fp' has been held for us by falloc(). */
166 NET_LOCK_GIANT();
167 error = socreate(uap->domain, &so, uap->type, uap->protocol,
168 td->td_ucred, td);
169 NET_UNLOCK_GIANT();
170 if (error) {
171 fdclose(fdp, fp, fd, td);
172 } else {
173 FILEDESC_LOCK_FAST(fdp);
174 fp->f_data = so; /* already has ref count */
175 fp->f_flag = FREAD|FWRITE;
176 fp->f_ops = &socketops;
177 fp->f_type = DTYPE_SOCKET;
178 FILEDESC_UNLOCK_FAST(fdp);
179 td->td_retval[0] = fd;
180 }
181 fdrop(fp, td);
182 return (error);
183 }
184
185 /*
186 * MPSAFE
187 */
188 /* ARGSUSED */
189 int
190 bind(td, uap)
191 struct thread *td;
192 register struct bind_args /* {
193 int s;
194 caddr_t name;
195 int namelen;
196 } */ *uap;
197 {
198 struct sockaddr *sa;
199 int error;
200
201 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
202 return (error);
203
204 return (kern_bind(td, uap->s, sa));
205 }
206
207 int
208 kern_bind(td, fd, sa)
209 struct thread *td;
210 int fd;
211 struct sockaddr *sa;
212 {
213 struct socket *so;
214 struct file *fp;
215 int error;
216
217 NET_LOCK_GIANT();
218 error = getsock(td->td_proc->p_fd, fd, &fp);
219 if (error)
220 goto done2;
221 so = fp->f_data;
222 #ifdef MAC
223 SOCK_LOCK(so);
224 error = mac_check_socket_bind(td->td_ucred, so, sa);
225 SOCK_UNLOCK(so);
226 if (error)
227 goto done1;
228 #endif
229 error = sobind(so, sa, td);
230 #ifdef MAC
231 done1:
232 #endif
233 fdrop(fp, td);
234 done2:
235 NET_UNLOCK_GIANT();
236 FREE(sa, M_SONAME);
237 return (error);
238 }
239
240 /*
241 * MPSAFE
242 */
243 /* ARGSUSED */
244 int
245 listen(td, uap)
246 struct thread *td;
247 register struct listen_args /* {
248 int s;
249 int backlog;
250 } */ *uap;
251 {
252 struct socket *so;
253 struct file *fp;
254 int error;
255
256 NET_LOCK_GIANT();
257 error = getsock(td->td_proc->p_fd, uap->s, &fp);
258 if (error == 0) {
259 so = fp->f_data;
260 #ifdef MAC
261 SOCK_LOCK(so);
262 error = mac_check_socket_listen(td->td_ucred, so);
263 SOCK_UNLOCK(so);
264 if (error)
265 goto done;
266 #endif
267 error = solisten(so, uap->backlog, td);
268 #ifdef MAC
269 done:
270 #endif
271 fdrop(fp, td);
272 }
273 NET_UNLOCK_GIANT();
274 return(error);
275 }
276
277 /*
278 * accept1()
279 * MPSAFE
280 */
281 static int
282 accept1(td, uap, compat)
283 struct thread *td;
284 register struct accept_args /* {
285 int s;
286 struct sockaddr * __restrict name;
287 socklen_t * __restrict anamelen;
288 } */ *uap;
289 int compat;
290 {
291 struct filedesc *fdp;
292 struct file *nfp = NULL;
293 struct sockaddr *sa = NULL;
294 socklen_t namelen;
295 int error;
296 struct socket *head, *so;
297 int fd;
298 u_int fflag;
299 pid_t pgid;
300 int tmp;
301
302 fdp = td->td_proc->p_fd;
303 if (uap->name) {
304 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
305 if(error)
306 return (error);
307 if (namelen < 0)
308 return (EINVAL);
309 }
310 NET_LOCK_GIANT();
311 error = fgetsock(td, uap->s, &head, &fflag);
312 if (error)
313 goto done2;
314 if ((head->so_options & SO_ACCEPTCONN) == 0) {
315 error = EINVAL;
316 goto done;
317 }
318 error = falloc(td, &nfp, &fd);
319 if (error)
320 goto done;
321 ACCEPT_LOCK();
322 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
323 ACCEPT_UNLOCK();
324 error = EWOULDBLOCK;
325 goto noconnection;
326 }
327 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
328 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
329 head->so_error = ECONNABORTED;
330 break;
331 }
332 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
333 "accept", 0);
334 if (error) {
335 ACCEPT_UNLOCK();
336 goto noconnection;
337 }
338 }
339 if (head->so_error) {
340 error = head->so_error;
341 head->so_error = 0;
342 ACCEPT_UNLOCK();
343 goto noconnection;
344 }
345 so = TAILQ_FIRST(&head->so_comp);
346 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
347 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
348
349 /*
350 * Before changing the flags on the socket, we have to bump the
351 * reference count. Otherwise, if the protocol calls sofree(),
352 * the socket will be released due to a zero refcount.
353 */
354 SOCK_LOCK(so); /* soref() and so_state update */
355 soref(so); /* file descriptor reference */
356
357 TAILQ_REMOVE(&head->so_comp, so, so_list);
358 head->so_qlen--;
359 so->so_state |= (head->so_state & SS_NBIO);
360 so->so_qstate &= ~SQ_COMP;
361 so->so_head = NULL;
362
363 SOCK_UNLOCK(so);
364 ACCEPT_UNLOCK();
365
366 /* An extra reference on `nfp' has been held for us by falloc(). */
367 td->td_retval[0] = fd;
368
369 /* connection has been removed from the listen queue */
370 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
371
372 pgid = fgetown(&head->so_sigio);
373 if (pgid != 0)
374 fsetown(pgid, &so->so_sigio);
375
376 FILE_LOCK(nfp);
377 nfp->f_data = so; /* nfp has ref count from falloc */
378 nfp->f_flag = fflag;
379 nfp->f_ops = &socketops;
380 nfp->f_type = DTYPE_SOCKET;
381 FILE_UNLOCK(nfp);
382 /* Sync socket nonblocking/async state with file flags */
383 tmp = fflag & FNONBLOCK;
384 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
385 tmp = fflag & FASYNC;
386 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
387 sa = 0;
388 error = soaccept(so, &sa);
389 if (error) {
390 /*
391 * return a namelen of zero for older code which might
392 * ignore the return value from accept.
393 */
394 if (uap->name != NULL) {
395 namelen = 0;
396 (void) copyout(&namelen,
397 uap->anamelen, sizeof(*uap->anamelen));
398 }
399 goto noconnection;
400 }
401 if (sa == NULL) {
402 namelen = 0;
403 if (uap->name)
404 goto gotnoname;
405 error = 0;
406 goto done;
407 }
408 if (uap->name) {
409 /* check sa_len before it is destroyed */
410 if (namelen > sa->sa_len)
411 namelen = sa->sa_len;
412 #ifdef COMPAT_OLDSOCK
413 if (compat)
414 ((struct osockaddr *)sa)->sa_family =
415 sa->sa_family;
416 #endif
417 error = copyout(sa, uap->name, (u_int)namelen);
418 if (!error)
419 gotnoname:
420 error = copyout(&namelen,
421 uap->anamelen, sizeof (*uap->anamelen));
422 }
423 noconnection:
424 if (sa)
425 FREE(sa, M_SONAME);
426
427 /*
428 * close the new descriptor, assuming someone hasn't ripped it
429 * out from under us.
430 */
431 if (error)
432 fdclose(fdp, nfp, fd, td);
433
434 /*
435 * Release explicitly held references before returning.
436 */
437 done:
438 if (nfp != NULL)
439 fdrop(nfp, td);
440 fputsock(head);
441 done2:
442 NET_UNLOCK_GIANT();
443 return (error);
444 }
445
446 /*
447 * MPSAFE (accept1() is MPSAFE)
448 */
449 int
450 accept(td, uap)
451 struct thread *td;
452 struct accept_args *uap;
453 {
454
455 return (accept1(td, uap, 0));
456 }
457
458 #ifdef COMPAT_OLDSOCK
459 /*
460 * MPSAFE (accept1() is MPSAFE)
461 */
462 int
463 oaccept(td, uap)
464 struct thread *td;
465 struct accept_args *uap;
466 {
467
468 return (accept1(td, uap, 1));
469 }
470 #endif /* COMPAT_OLDSOCK */
471
472 /*
473 * MPSAFE
474 */
475 /* ARGSUSED */
476 int
477 connect(td, uap)
478 struct thread *td;
479 register struct connect_args /* {
480 int s;
481 caddr_t name;
482 int namelen;
483 } */ *uap;
484 {
485 struct sockaddr *sa;
486 int error;
487
488 error = getsockaddr(&sa, uap->name, uap->namelen);
489 if (error)
490 return (error);
491
492 return (kern_connect(td, uap->s, sa));
493 }
494
495
496 int
497 kern_connect(td, fd, sa)
498 struct thread *td;
499 int fd;
500 struct sockaddr *sa;
501 {
502 struct socket *so;
503 struct file *fp;
504 int error, s;
505 int interrupted = 0;
506
507 NET_LOCK_GIANT();
508 error = getsock(td->td_proc->p_fd, fd, &fp);
509 if (error)
510 goto done2;
511 so = fp->f_data;
512 if (so->so_state & SS_ISCONNECTING) {
513 error = EALREADY;
514 goto done1;
515 }
516 #ifdef MAC
517 SOCK_LOCK(so);
518 error = mac_check_socket_connect(td->td_ucred, so, sa);
519 SOCK_UNLOCK(so);
520 if (error)
521 goto bad;
522 #endif
523 error = soconnect(so, sa, td);
524 if (error)
525 goto bad;
526 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
527 error = EINPROGRESS;
528 goto done1;
529 }
530 s = splnet();
531 SOCK_LOCK(so);
532 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
533 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
534 "connec", 0);
535 if (error) {
536 if (error == EINTR || error == ERESTART)
537 interrupted = 1;
538 break;
539 }
540 }
541 if (error == 0) {
542 error = so->so_error;
543 so->so_error = 0;
544 }
545 SOCK_UNLOCK(so);
546 splx(s);
547 bad:
548 if (!interrupted)
549 so->so_state &= ~SS_ISCONNECTING;
550 if (error == ERESTART)
551 error = EINTR;
552 done1:
553 fdrop(fp, td);
554 done2:
555 NET_UNLOCK_GIANT();
556 FREE(sa, M_SONAME);
557 return (error);
558 }
559
560 /*
561 * MPSAFE
562 */
563 int
564 socketpair(td, uap)
565 struct thread *td;
566 register struct socketpair_args /* {
567 int domain;
568 int type;
569 int protocol;
570 int *rsv;
571 } */ *uap;
572 {
573 register struct filedesc *fdp = td->td_proc->p_fd;
574 struct file *fp1, *fp2;
575 struct socket *so1, *so2;
576 int fd, error, sv[2];
577
578 NET_LOCK_GIANT();
579 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
580 td->td_ucred, td);
581 if (error)
582 goto done2;
583 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
584 td->td_ucred, td);
585 if (error)
586 goto free1;
587 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
588 error = falloc(td, &fp1, &fd);
589 if (error)
590 goto free2;
591 sv[0] = fd;
592 fp1->f_data = so1; /* so1 already has ref count */
593 error = falloc(td, &fp2, &fd);
594 if (error)
595 goto free3;
596 fp2->f_data = so2; /* so2 already has ref count */
597 sv[1] = fd;
598 error = soconnect2(so1, so2);
599 if (error)
600 goto free4;
601 if (uap->type == SOCK_DGRAM) {
602 /*
603 * Datagram socket connection is asymmetric.
604 */
605 error = soconnect2(so2, so1);
606 if (error)
607 goto free4;
608 }
609 FILE_LOCK(fp1);
610 fp1->f_flag = FREAD|FWRITE;
611 fp1->f_ops = &socketops;
612 fp1->f_type = DTYPE_SOCKET;
613 FILE_UNLOCK(fp1);
614 FILE_LOCK(fp2);
615 fp2->f_flag = FREAD|FWRITE;
616 fp2->f_ops = &socketops;
617 fp2->f_type = DTYPE_SOCKET;
618 FILE_UNLOCK(fp2);
619 error = copyout(sv, uap->rsv, 2 * sizeof (int));
620 fdrop(fp1, td);
621 fdrop(fp2, td);
622 goto done2;
623 free4:
624 fdclose(fdp, fp2, sv[1], td);
625 fdrop(fp2, td);
626 free3:
627 fdclose(fdp, fp1, sv[0], td);
628 fdrop(fp1, td);
629 free2:
630 (void)soclose(so2);
631 free1:
632 (void)soclose(so1);
633 done2:
634 NET_UNLOCK_GIANT();
635 return (error);
636 }
637
638 static int
639 sendit(td, s, mp, flags)
640 register struct thread *td;
641 int s;
642 register struct msghdr *mp;
643 int flags;
644 {
645 struct mbuf *control;
646 struct sockaddr *to;
647 int error;
648
649 if (mp->msg_name != NULL) {
650 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
651 if (error) {
652 to = NULL;
653 goto bad;
654 }
655 mp->msg_name = to;
656 } else {
657 to = NULL;
658 }
659
660 if (mp->msg_control) {
661 if (mp->msg_controllen < sizeof(struct cmsghdr)
662 #ifdef COMPAT_OLDSOCK
663 && mp->msg_flags != MSG_COMPAT
664 #endif
665 ) {
666 error = EINVAL;
667 goto bad;
668 }
669 error = sockargs(&control, mp->msg_control,
670 mp->msg_controllen, MT_CONTROL);
671 if (error)
672 goto bad;
673 #ifdef COMPAT_OLDSOCK
674 if (mp->msg_flags == MSG_COMPAT) {
675 register struct cmsghdr *cm;
676
677 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
678 if (control == 0) {
679 error = ENOBUFS;
680 goto bad;
681 } else {
682 cm = mtod(control, struct cmsghdr *);
683 cm->cmsg_len = control->m_len;
684 cm->cmsg_level = SOL_SOCKET;
685 cm->cmsg_type = SCM_RIGHTS;
686 }
687 }
688 #endif
689 } else {
690 control = NULL;
691 }
692
693 error = kern_sendit(td, s, mp, flags, control);
694
695 bad:
696 if (to)
697 FREE(to, M_SONAME);
698 return (error);
699 }
700
701 int
702 kern_sendit(td, s, mp, flags, control)
703 struct thread *td;
704 int s;
705 struct msghdr *mp;
706 int flags;
707 struct mbuf *control;
708 {
709 struct file *fp;
710 struct uio auio;
711 struct iovec *iov;
712 struct socket *so;
713 int i;
714 int len, error;
715 #ifdef KTRACE
716 struct uio *ktruio = NULL;
717 #endif
718
719 NET_LOCK_GIANT();
720 error = getsock(td->td_proc->p_fd, s, &fp);
721 if (error)
722 goto bad2;
723 so = (struct socket *)fp->f_data;
724
725 #ifdef MAC
726 SOCK_LOCK(so);
727 error = mac_check_socket_send(td->td_ucred, so);
728 SOCK_UNLOCK(so);
729 if (error)
730 goto bad;
731 #endif
732
733 auio.uio_iov = mp->msg_iov;
734 auio.uio_iovcnt = mp->msg_iovlen;
735 auio.uio_segflg = UIO_USERSPACE;
736 auio.uio_rw = UIO_WRITE;
737 auio.uio_td = td;
738 auio.uio_offset = 0; /* XXX */
739 auio.uio_resid = 0;
740 iov = mp->msg_iov;
741 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
742 if ((auio.uio_resid += iov->iov_len) < 0) {
743 error = EINVAL;
744 goto bad;
745 }
746 }
747 #ifdef KTRACE
748 if (KTRPOINT(td, KTR_GENIO))
749 ktruio = cloneuio(&auio);
750 #endif
751 len = auio.uio_resid;
752 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
753 0, control, flags, td);
754 if (error) {
755 if (auio.uio_resid != len && (error == ERESTART ||
756 error == EINTR || error == EWOULDBLOCK))
757 error = 0;
758 /* Generation of SIGPIPE can be controlled per socket */
759 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
760 PROC_LOCK(td->td_proc);
761 psignal(td->td_proc, SIGPIPE);
762 PROC_UNLOCK(td->td_proc);
763 }
764 }
765 if (error == 0)
766 td->td_retval[0] = len - auio.uio_resid;
767 #ifdef KTRACE
768 if (ktruio != NULL) {
769 ktruio->uio_resid = td->td_retval[0];
770 ktrgenio(s, UIO_WRITE, ktruio, error);
771 }
772 #endif
773 bad:
774 fdrop(fp, td);
775 bad2:
776 NET_UNLOCK_GIANT();
777 return (error);
778 }
779
780 /*
781 * MPSAFE
782 */
783 int
784 sendto(td, uap)
785 struct thread *td;
786 register struct sendto_args /* {
787 int s;
788 caddr_t buf;
789 size_t len;
790 int flags;
791 caddr_t to;
792 int tolen;
793 } */ *uap;
794 {
795 struct msghdr msg;
796 struct iovec aiov;
797 int error;
798
799 msg.msg_name = uap->to;
800 msg.msg_namelen = uap->tolen;
801 msg.msg_iov = &aiov;
802 msg.msg_iovlen = 1;
803 msg.msg_control = 0;
804 #ifdef COMPAT_OLDSOCK
805 msg.msg_flags = 0;
806 #endif
807 aiov.iov_base = uap->buf;
808 aiov.iov_len = uap->len;
809 error = sendit(td, uap->s, &msg, uap->flags);
810 return (error);
811 }
812
813 #ifdef COMPAT_OLDSOCK
814 /*
815 * MPSAFE
816 */
817 int
818 osend(td, uap)
819 struct thread *td;
820 register struct osend_args /* {
821 int s;
822 caddr_t buf;
823 int len;
824 int flags;
825 } */ *uap;
826 {
827 struct msghdr msg;
828 struct iovec aiov;
829 int error;
830
831 msg.msg_name = 0;
832 msg.msg_namelen = 0;
833 msg.msg_iov = &aiov;
834 msg.msg_iovlen = 1;
835 aiov.iov_base = uap->buf;
836 aiov.iov_len = uap->len;
837 msg.msg_control = 0;
838 msg.msg_flags = 0;
839 error = sendit(td, uap->s, &msg, uap->flags);
840 return (error);
841 }
842
843 /*
844 * MPSAFE
845 */
846 int
847 osendmsg(td, uap)
848 struct thread *td;
849 struct osendmsg_args /* {
850 int s;
851 caddr_t msg;
852 int flags;
853 } */ *uap;
854 {
855 struct msghdr msg;
856 struct iovec *iov;
857 int error;
858
859 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
860 if (error)
861 return (error);
862 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
863 if (error)
864 return (error);
865 msg.msg_iov = iov;
866 msg.msg_flags = MSG_COMPAT;
867 error = sendit(td, uap->s, &msg, uap->flags);
868 free(iov, M_IOV);
869 return (error);
870 }
871 #endif
872
873 /*
874 * MPSAFE
875 */
876 int
877 sendmsg(td, uap)
878 struct thread *td;
879 struct sendmsg_args /* {
880 int s;
881 caddr_t msg;
882 int flags;
883 } */ *uap;
884 {
885 struct msghdr msg;
886 struct iovec *iov;
887 int error;
888
889 error = copyin(uap->msg, &msg, sizeof (msg));
890 if (error)
891 return (error);
892 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
893 if (error)
894 return (error);
895 msg.msg_iov = iov;
896 #ifdef COMPAT_OLDSOCK
897 msg.msg_flags = 0;
898 #endif
899 error = sendit(td, uap->s, &msg, uap->flags);
900 free(iov, M_IOV);
901 return (error);
902 }
903
904 static int
905 recvit(td, s, mp, namelenp)
906 struct thread *td;
907 int s;
908 struct msghdr *mp;
909 void *namelenp;
910 {
911 struct uio auio;
912 struct iovec *iov;
913 int i;
914 socklen_t len;
915 int error;
916 struct mbuf *m, *control = 0;
917 caddr_t ctlbuf;
918 struct file *fp;
919 struct socket *so;
920 struct sockaddr *fromsa = 0;
921 #ifdef KTRACE
922 struct uio *ktruio = NULL;
923 #endif
924
925 NET_LOCK_GIANT();
926 error = getsock(td->td_proc->p_fd, s, &fp);
927 if (error) {
928 NET_UNLOCK_GIANT();
929 return (error);
930 }
931 so = fp->f_data;
932
933 #ifdef MAC
934 SOCK_LOCK(so);
935 error = mac_check_socket_receive(td->td_ucred, so);
936 SOCK_UNLOCK(so);
937 if (error) {
938 fdrop(fp, td);
939 NET_UNLOCK_GIANT();
940 return (error);
941 }
942 #endif
943
944 auio.uio_iov = mp->msg_iov;
945 auio.uio_iovcnt = mp->msg_iovlen;
946 auio.uio_segflg = UIO_USERSPACE;
947 auio.uio_rw = UIO_READ;
948 auio.uio_td = td;
949 auio.uio_offset = 0; /* XXX */
950 auio.uio_resid = 0;
951 iov = mp->msg_iov;
952 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
953 if ((auio.uio_resid += iov->iov_len) < 0) {
954 fdrop(fp, td);
955 NET_UNLOCK_GIANT();
956 return (EINVAL);
957 }
958 }
959 #ifdef KTRACE
960 if (KTRPOINT(td, KTR_GENIO))
961 ktruio = cloneuio(&auio);
962 #endif
963 len = auio.uio_resid;
964 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
965 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
966 &mp->msg_flags);
967 if (error) {
968 if (auio.uio_resid != (int)len && (error == ERESTART ||
969 error == EINTR || error == EWOULDBLOCK))
970 error = 0;
971 }
972 #ifdef KTRACE
973 if (ktruio != NULL) {
974 ktruio->uio_resid = (int)len - auio.uio_resid;
975 ktrgenio(s, UIO_READ, ktruio, error);
976 }
977 #endif
978 if (error)
979 goto out;
980 td->td_retval[0] = (int)len - auio.uio_resid;
981 if (mp->msg_name) {
982 len = mp->msg_namelen;
983 if (len <= 0 || fromsa == 0)
984 len = 0;
985 else {
986 /* save sa_len before it is destroyed by MSG_COMPAT */
987 len = MIN(len, fromsa->sa_len);
988 #ifdef COMPAT_OLDSOCK
989 if (mp->msg_flags & MSG_COMPAT)
990 ((struct osockaddr *)fromsa)->sa_family =
991 fromsa->sa_family;
992 #endif
993 error = copyout(fromsa, mp->msg_name, (unsigned)len);
994 if (error)
995 goto out;
996 }
997 mp->msg_namelen = len;
998 if (namelenp &&
999 (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
1000 #ifdef COMPAT_OLDSOCK
1001 if (mp->msg_flags & MSG_COMPAT)
1002 error = 0; /* old recvfrom didn't check */
1003 else
1004 #endif
1005 goto out;
1006 }
1007 }
1008 if (mp->msg_control) {
1009 #ifdef COMPAT_OLDSOCK
1010 /*
1011 * We assume that old recvmsg calls won't receive access
1012 * rights and other control info, esp. as control info
1013 * is always optional and those options didn't exist in 4.3.
1014 * If we receive rights, trim the cmsghdr; anything else
1015 * is tossed.
1016 */
1017 if (control && mp->msg_flags & MSG_COMPAT) {
1018 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1019 SOL_SOCKET ||
1020 mtod(control, struct cmsghdr *)->cmsg_type !=
1021 SCM_RIGHTS) {
1022 mp->msg_controllen = 0;
1023 goto out;
1024 }
1025 control->m_len -= sizeof (struct cmsghdr);
1026 control->m_data += sizeof (struct cmsghdr);
1027 }
1028 #endif
1029 len = mp->msg_controllen;
1030 m = control;
1031 mp->msg_controllen = 0;
1032 ctlbuf = mp->msg_control;
1033
1034 while (m && len > 0) {
1035 unsigned int tocopy;
1036
1037 if (len >= m->m_len)
1038 tocopy = m->m_len;
1039 else {
1040 mp->msg_flags |= MSG_CTRUNC;
1041 tocopy = len;
1042 }
1043
1044 if ((error = copyout(mtod(m, caddr_t),
1045 ctlbuf, tocopy)) != 0)
1046 goto out;
1047
1048 ctlbuf += tocopy;
1049 len -= tocopy;
1050 m = m->m_next;
1051 }
1052 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1053 }
1054 out:
1055 fdrop(fp, td);
1056 NET_UNLOCK_GIANT();
1057 if (fromsa)
1058 FREE(fromsa, M_SONAME);
1059 if (control)
1060 m_freem(control);
1061 return (error);
1062 }
1063
1064 /*
1065 * MPSAFE
1066 */
1067 int
1068 recvfrom(td, uap)
1069 struct thread *td;
1070 register struct recvfrom_args /* {
1071 int s;
1072 caddr_t buf;
1073 size_t len;
1074 int flags;
1075 struct sockaddr * __restrict from;
1076 socklen_t * __restrict fromlenaddr;
1077 } */ *uap;
1078 {
1079 struct msghdr msg;
1080 struct iovec aiov;
1081 int error;
1082
1083 if (uap->fromlenaddr) {
1084 error = copyin(uap->fromlenaddr,
1085 &msg.msg_namelen, sizeof (msg.msg_namelen));
1086 if (error)
1087 goto done2;
1088 } else {
1089 msg.msg_namelen = 0;
1090 }
1091 msg.msg_name = uap->from;
1092 msg.msg_iov = &aiov;
1093 msg.msg_iovlen = 1;
1094 aiov.iov_base = uap->buf;
1095 aiov.iov_len = uap->len;
1096 msg.msg_control = 0;
1097 msg.msg_flags = uap->flags;
1098 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1099 done2:
1100 return(error);
1101 }
1102
1103 #ifdef COMPAT_OLDSOCK
1104 /*
1105 * MPSAFE
1106 */
1107 int
1108 orecvfrom(td, uap)
1109 struct thread *td;
1110 struct recvfrom_args *uap;
1111 {
1112
1113 uap->flags |= MSG_COMPAT;
1114 return (recvfrom(td, uap));
1115 }
1116 #endif
1117
1118
1119 #ifdef COMPAT_OLDSOCK
1120 /*
1121 * MPSAFE
1122 */
1123 int
1124 orecv(td, uap)
1125 struct thread *td;
1126 register struct orecv_args /* {
1127 int s;
1128 caddr_t buf;
1129 int len;
1130 int flags;
1131 } */ *uap;
1132 {
1133 struct msghdr msg;
1134 struct iovec aiov;
1135 int error;
1136
1137 msg.msg_name = 0;
1138 msg.msg_namelen = 0;
1139 msg.msg_iov = &aiov;
1140 msg.msg_iovlen = 1;
1141 aiov.iov_base = uap->buf;
1142 aiov.iov_len = uap->len;
1143 msg.msg_control = 0;
1144 msg.msg_flags = uap->flags;
1145 error = recvit(td, uap->s, &msg, NULL);
1146 return (error);
1147 }
1148
1149 /*
1150 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1151 * overlays the new one, missing only the flags, and with the (old) access
1152 * rights where the control fields are now.
1153 *
1154 * MPSAFE
1155 */
1156 int
1157 orecvmsg(td, uap)
1158 struct thread *td;
1159 struct orecvmsg_args /* {
1160 int s;
1161 struct omsghdr *msg;
1162 int flags;
1163 } */ *uap;
1164 {
1165 struct msghdr msg;
1166 struct iovec *iov;
1167 int error;
1168
1169 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1170 if (error)
1171 return (error);
1172 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1173 if (error)
1174 return (error);
1175 msg.msg_flags = uap->flags | MSG_COMPAT;
1176 msg.msg_iov = iov;
1177 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1178 if (msg.msg_controllen && error == 0)
1179 error = copyout(&msg.msg_controllen,
1180 &uap->msg->msg_accrightslen, sizeof (int));
1181 free(iov, M_IOV);
1182 return (error);
1183 }
1184 #endif
1185
1186 /*
1187 * MPSAFE
1188 */
1189 int
1190 recvmsg(td, uap)
1191 struct thread *td;
1192 struct recvmsg_args /* {
1193 int s;
1194 struct msghdr *msg;
1195 int flags;
1196 } */ *uap;
1197 {
1198 struct msghdr msg;
1199 struct iovec *uiov, *iov;
1200 int error;
1201
1202 error = copyin(uap->msg, &msg, sizeof (msg));
1203 if (error)
1204 return (error);
1205 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1206 if (error)
1207 return (error);
1208 msg.msg_flags = uap->flags;
1209 #ifdef COMPAT_OLDSOCK
1210 msg.msg_flags &= ~MSG_COMPAT;
1211 #endif
1212 uiov = msg.msg_iov;
1213 msg.msg_iov = iov;
1214 error = recvit(td, uap->s, &msg, NULL);
1215 if (error == 0) {
1216 msg.msg_iov = uiov;
1217 error = copyout(&msg, uap->msg, sizeof(msg));
1218 }
1219 free(iov, M_IOV);
1220 return (error);
1221 }
1222
1223 /*
1224 * MPSAFE
1225 */
1226 /* ARGSUSED */
1227 int
1228 shutdown(td, uap)
1229 struct thread *td;
1230 register struct shutdown_args /* {
1231 int s;
1232 int how;
1233 } */ *uap;
1234 {
1235 struct socket *so;
1236 struct file *fp;
1237 int error;
1238
1239 NET_LOCK_GIANT();
1240 error = getsock(td->td_proc->p_fd, uap->s, &fp);
1241 if (error == 0) {
1242 so = fp->f_data;
1243 error = soshutdown(so, uap->how);
1244 fdrop(fp, td);
1245 }
1246 NET_UNLOCK_GIANT();
1247 return (error);
1248 }
1249
1250 /*
1251 * MPSAFE
1252 */
1253 /* ARGSUSED */
1254 int
1255 setsockopt(td, uap)
1256 struct thread *td;
1257 register struct setsockopt_args /* {
1258 int s;
1259 int level;
1260 int name;
1261 caddr_t val;
1262 int valsize;
1263 } */ *uap;
1264 {
1265
1266 return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1267 uap->val, UIO_USERSPACE, uap->valsize));
1268 }
1269
1270 int
1271 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1272 struct thread *td;
1273 int s;
1274 int level;
1275 int name;
1276 void *val;
1277 enum uio_seg valseg;
1278 socklen_t valsize;
1279 {
1280 int error;
1281 struct socket *so;
1282 struct file *fp;
1283 struct sockopt sopt;
1284
1285 if (val == NULL && valsize != 0)
1286 return (EFAULT);
1287 if (valsize < 0)
1288 return (EINVAL);
1289
1290 sopt.sopt_dir = SOPT_SET;
1291 sopt.sopt_level = level;
1292 sopt.sopt_name = name;
1293 sopt.sopt_val = val;
1294 sopt.sopt_valsize = valsize;
1295 switch (valseg) {
1296 case UIO_USERSPACE:
1297 sopt.sopt_td = td;
1298 break;
1299 case UIO_SYSSPACE:
1300 sopt.sopt_td = NULL;
1301 break;
1302 default:
1303 panic("kern_setsockopt called with bad valseg");
1304 }
1305
1306 NET_LOCK_GIANT();
1307 error = getsock(td->td_proc->p_fd, s, &fp);
1308 if (error == 0) {
1309 so = fp->f_data;
1310 error = sosetopt(so, &sopt);
1311 fdrop(fp, td);
1312 }
1313 NET_UNLOCK_GIANT();
1314 return(error);
1315 }
1316
1317 /*
1318 * MPSAFE
1319 */
1320 /* ARGSUSED */
1321 int
1322 getsockopt(td, uap)
1323 struct thread *td;
1324 register struct getsockopt_args /* {
1325 int s;
1326 int level;
1327 int name;
1328 void * __restrict val;
1329 socklen_t * __restrict avalsize;
1330 } */ *uap;
1331 {
1332 socklen_t valsize;
1333 int error;
1334
1335 if (uap->val) {
1336 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1337 if (error)
1338 return (error);
1339 }
1340
1341 error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1342 uap->val, UIO_USERSPACE, &valsize);
1343
1344 if (error == 0)
1345 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1346 return (error);
1347 }
1348
1349 /*
1350 * Kernel version of getsockopt.
1351 * optval can be a userland or userspace. optlen is always a kernel pointer.
1352 */
1353 int
1354 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1355 struct thread *td;
1356 int s;
1357 int level;
1358 int name;
1359 void *val;
1360 enum uio_seg valseg;
1361 socklen_t *valsize;
1362 {
1363 int error;
1364 struct socket *so;
1365 struct file *fp;
1366 struct sockopt sopt;
1367
1368 if (val == NULL)
1369 *valsize = 0;
1370 if (*valsize < 0)
1371 return (EINVAL);
1372
1373 sopt.sopt_dir = SOPT_GET;
1374 sopt.sopt_level = level;
1375 sopt.sopt_name = name;
1376 sopt.sopt_val = val;
1377 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1378 switch (valseg) {
1379 case UIO_USERSPACE:
1380 sopt.sopt_td = td;
1381 break;
1382 case UIO_SYSSPACE:
1383 sopt.sopt_td = NULL;
1384 break;
1385 default:
1386 panic("kern_getsockopt called with bad valseg");
1387 }
1388
1389 NET_LOCK_GIANT();
1390 error = getsock(td->td_proc->p_fd, s, &fp);
1391 if (error == 0) {
1392 so = fp->f_data;
1393 error = sogetopt(so, &sopt);
1394 *valsize = sopt.sopt_valsize;
1395 fdrop(fp, td);
1396 }
1397 NET_UNLOCK_GIANT();
1398 return (error);
1399 }
1400
1401 /*
1402 * getsockname1() - Get socket name.
1403 *
1404 * MPSAFE
1405 */
1406 /* ARGSUSED */
1407 static int
1408 getsockname1(td, uap, compat)
1409 struct thread *td;
1410 register struct getsockname_args /* {
1411 int fdes;
1412 struct sockaddr * __restrict asa;
1413 socklen_t * __restrict alen;
1414 } */ *uap;
1415 int compat;
1416 {
1417 struct socket *so;
1418 struct sockaddr *sa;
1419 struct file *fp;
1420 socklen_t len;
1421 int error;
1422
1423 NET_LOCK_GIANT();
1424 error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1425 if (error)
1426 goto done2;
1427 so = fp->f_data;
1428 error = copyin(uap->alen, &len, sizeof (len));
1429 if (error)
1430 goto done1;
1431 if (len < 0) {
1432 error = EINVAL;
1433 goto done1;
1434 }
1435 sa = 0;
1436 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1437 if (error)
1438 goto bad;
1439 if (sa == 0) {
1440 len = 0;
1441 goto gotnothing;
1442 }
1443
1444 len = MIN(len, sa->sa_len);
1445 #ifdef COMPAT_OLDSOCK
1446 if (compat)
1447 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1448 #endif
1449 error = copyout(sa, uap->asa, (u_int)len);
1450 if (error == 0)
1451 gotnothing:
1452 error = copyout(&len, uap->alen, sizeof (len));
1453 bad:
1454 if (sa)
1455 FREE(sa, M_SONAME);
1456 done1:
1457 fdrop(fp, td);
1458 done2:
1459 NET_UNLOCK_GIANT();
1460 return (error);
1461 }
1462
1463 /*
1464 * MPSAFE
1465 */
1466 int
1467 getsockname(td, uap)
1468 struct thread *td;
1469 struct getsockname_args *uap;
1470 {
1471
1472 return (getsockname1(td, uap, 0));
1473 }
1474
1475 #ifdef COMPAT_OLDSOCK
1476 /*
1477 * MPSAFE
1478 */
1479 int
1480 ogetsockname(td, uap)
1481 struct thread *td;
1482 struct getsockname_args *uap;
1483 {
1484
1485 return (getsockname1(td, uap, 1));
1486 }
1487 #endif /* COMPAT_OLDSOCK */
1488
1489 /*
1490 * getpeername1() - Get name of peer for connected socket.
1491 *
1492 * MPSAFE
1493 */
1494 /* ARGSUSED */
1495 static int
1496 getpeername1(td, uap, compat)
1497 struct thread *td;
1498 register struct getpeername_args /* {
1499 int fdes;
1500 struct sockaddr * __restrict asa;
1501 socklen_t * __restrict alen;
1502 } */ *uap;
1503 int compat;
1504 {
1505 struct socket *so;
1506 struct sockaddr *sa;
1507 struct file *fp;
1508 socklen_t len;
1509 int error;
1510
1511 NET_LOCK_GIANT();
1512 error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1513 if (error)
1514 goto done2;
1515 so = fp->f_data;
1516 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1517 error = ENOTCONN;
1518 goto done1;
1519 }
1520 error = copyin(uap->alen, &len, sizeof (len));
1521 if (error)
1522 goto done1;
1523 if (len < 0) {
1524 error = EINVAL;
1525 goto done1;
1526 }
1527 sa = 0;
1528 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1529 if (error)
1530 goto bad;
1531 if (sa == 0) {
1532 len = 0;
1533 goto gotnothing;
1534 }
1535 len = MIN(len, sa->sa_len);
1536 #ifdef COMPAT_OLDSOCK
1537 if (compat)
1538 ((struct osockaddr *)sa)->sa_family =
1539 sa->sa_family;
1540 #endif
1541 error = copyout(sa, uap->asa, (u_int)len);
1542 if (error)
1543 goto bad;
1544 gotnothing:
1545 error = copyout(&len, uap->alen, sizeof (len));
1546 bad:
1547 if (sa)
1548 FREE(sa, M_SONAME);
1549 done1:
1550 fdrop(fp, td);
1551 done2:
1552 NET_UNLOCK_GIANT();
1553 return (error);
1554 }
1555
1556 /*
1557 * MPSAFE
1558 */
1559 int
1560 getpeername(td, uap)
1561 struct thread *td;
1562 struct getpeername_args *uap;
1563 {
1564
1565 return (getpeername1(td, uap, 0));
1566 }
1567
1568 #ifdef COMPAT_OLDSOCK
1569 /*
1570 * MPSAFE
1571 */
1572 int
1573 ogetpeername(td, uap)
1574 struct thread *td;
1575 struct ogetpeername_args *uap;
1576 {
1577
1578 /* XXX uap should have type `getpeername_args *' to begin with. */
1579 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1580 }
1581 #endif /* COMPAT_OLDSOCK */
1582
1583 int
1584 sockargs(mp, buf, buflen, type)
1585 struct mbuf **mp;
1586 caddr_t buf;
1587 int buflen, type;
1588 {
1589 register struct sockaddr *sa;
1590 register struct mbuf *m;
1591 int error;
1592
1593 if ((u_int)buflen > MLEN) {
1594 #ifdef COMPAT_OLDSOCK
1595 if (type == MT_SONAME && (u_int)buflen <= 112)
1596 buflen = MLEN; /* unix domain compat. hack */
1597 else
1598 #endif
1599 if ((u_int)buflen > MCLBYTES)
1600 return (EINVAL);
1601 }
1602 m = m_get(M_TRYWAIT, type);
1603 if (m == NULL)
1604 return (ENOBUFS);
1605 if ((u_int)buflen > MLEN) {
1606 MCLGET(m, M_TRYWAIT);
1607 if ((m->m_flags & M_EXT) == 0) {
1608 m_free(m);
1609 return (ENOBUFS);
1610 }
1611 }
1612 m->m_len = buflen;
1613 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1614 if (error)
1615 (void) m_free(m);
1616 else {
1617 *mp = m;
1618 if (type == MT_SONAME) {
1619 sa = mtod(m, struct sockaddr *);
1620
1621 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1622 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1623 sa->sa_family = sa->sa_len;
1624 #endif
1625 sa->sa_len = buflen;
1626 }
1627 }
1628 return (error);
1629 }
1630
1631 int
1632 getsockaddr(namp, uaddr, len)
1633 struct sockaddr **namp;
1634 caddr_t uaddr;
1635 size_t len;
1636 {
1637 struct sockaddr *sa;
1638 int error;
1639
1640 if (len > SOCK_MAXADDRLEN)
1641 return (ENAMETOOLONG);
1642 if (len < offsetof(struct sockaddr, sa_data[0]))
1643 return (EINVAL);
1644 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1645 error = copyin(uaddr, sa, len);
1646 if (error) {
1647 FREE(sa, M_SONAME);
1648 } else {
1649 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1650 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1651 sa->sa_family = sa->sa_len;
1652 #endif
1653 sa->sa_len = len;
1654 *namp = sa;
1655 }
1656 return (error);
1657 }
1658
1659 /*
1660 * Detach mapped page and release resources back to the system.
1661 */
1662 void
1663 sf_buf_mext(void *addr, void *args)
1664 {
1665 vm_page_t m;
1666
1667 m = sf_buf_page(args);
1668 sf_buf_free(args);
1669 vm_page_lock_queues();
1670 vm_page_unwire(m, 0);
1671 /*
1672 * Check for the object going away on us. This can
1673 * happen since we don't hold a reference to it.
1674 * If so, we're responsible for freeing the page.
1675 */
1676 if (m->wire_count == 0 && m->object == NULL)
1677 vm_page_free(m);
1678 vm_page_unlock_queues();
1679 }
1680
1681 /*
1682 * sendfile(2)
1683 *
1684 * MPSAFE
1685 *
1686 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1687 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1688 *
1689 * Send a file specified by 'fd' and starting at 'offset' to a socket
1690 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1691 * nbytes == 0. Optionally add a header and/or trailer to the socket
1692 * output. If specified, write the total number of bytes sent into *sbytes.
1693 *
1694 */
1695 int
1696 sendfile(struct thread *td, struct sendfile_args *uap)
1697 {
1698
1699 return (do_sendfile(td, uap, 0));
1700 }
1701
1702 #ifdef COMPAT_FREEBSD4
1703 int
1704 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1705 {
1706 struct sendfile_args args;
1707
1708 args.fd = uap->fd;
1709 args.s = uap->s;
1710 args.offset = uap->offset;
1711 args.nbytes = uap->nbytes;
1712 args.hdtr = uap->hdtr;
1713 args.sbytes = uap->sbytes;
1714 args.flags = uap->flags;
1715
1716 return (do_sendfile(td, &args, 1));
1717 }
1718 #endif /* COMPAT_FREEBSD4 */
1719
1720 static int
1721 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1722 {
1723 struct vnode *vp;
1724 struct vm_object *obj;
1725 struct socket *so = NULL;
1726 struct mbuf *m, *m_header = NULL;
1727 struct sf_buf *sf;
1728 struct vm_page *pg;
1729 struct writev_args nuap;
1730 struct sf_hdtr hdtr;
1731 struct uio *hdr_uio = NULL;
1732 off_t off, xfsize, hdtr_size, sbytes = 0;
1733 int error, headersize = 0, headersent = 0;
1734
1735 mtx_lock(&Giant);
1736
1737 hdtr_size = 0;
1738
1739 /*
1740 * The descriptor must be a regular file and have a backing VM object.
1741 */
1742 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1743 goto done;
1744 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1745 VOP_UNLOCK(vp, 0, td);
1746 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1747 error = EINVAL;
1748 goto done;
1749 }
1750 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1751 goto done;
1752 if (so->so_type != SOCK_STREAM) {
1753 error = EINVAL;
1754 goto done;
1755 }
1756 if ((so->so_state & SS_ISCONNECTED) == 0) {
1757 error = ENOTCONN;
1758 goto done;
1759 }
1760 if (uap->offset < 0) {
1761 error = EINVAL;
1762 goto done;
1763 }
1764
1765 #ifdef MAC
1766 SOCK_LOCK(so);
1767 error = mac_check_socket_send(td->td_ucred, so);
1768 SOCK_UNLOCK(so);
1769 if (error)
1770 goto done;
1771 #endif
1772
1773 /*
1774 * If specified, get the pointer to the sf_hdtr struct for
1775 * any headers/trailers.
1776 */
1777 if (uap->hdtr != NULL) {
1778 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1779 if (error)
1780 goto done;
1781 /*
1782 * Send any headers.
1783 */
1784 if (hdtr.headers != NULL) {
1785 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1786 if (error)
1787 goto done;
1788 hdr_uio->uio_td = td;
1789 hdr_uio->uio_rw = UIO_WRITE;
1790 if (hdr_uio->uio_resid > 0) {
1791 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0);
1792 if (m_header == NULL)
1793 goto done;
1794 headersize = m_header->m_pkthdr.len;
1795 if (compat)
1796 sbytes += headersize;
1797 }
1798 }
1799 }
1800
1801 /*
1802 * Protect against multiple writers to the socket.
1803 */
1804 SOCKBUF_LOCK(&so->so_snd);
1805 (void) sblock(&so->so_snd, M_WAITOK);
1806 SOCKBUF_UNLOCK(&so->so_snd);
1807
1808 /*
1809 * Loop through the pages in the file, starting with the requested
1810 * offset. Get a file page (do I/O if necessary), map the file page
1811 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1812 * it on the socket.
1813 */
1814 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1815 vm_pindex_t pindex;
1816 vm_offset_t pgoff;
1817
1818 pindex = OFF_TO_IDX(off);
1819 VM_OBJECT_LOCK(obj);
1820 retry_lookup:
1821 /*
1822 * Calculate the amount to transfer. Not to exceed a page,
1823 * the EOF, or the passed in nbytes.
1824 */
1825 xfsize = obj->un_pager.vnp.vnp_size - off;
1826 VM_OBJECT_UNLOCK(obj);
1827 if (xfsize > PAGE_SIZE)
1828 xfsize = PAGE_SIZE;
1829 pgoff = (vm_offset_t)(off & PAGE_MASK);
1830 if (PAGE_SIZE - pgoff < xfsize)
1831 xfsize = PAGE_SIZE - pgoff;
1832 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1833 xfsize = uap->nbytes - sbytes;
1834 if (xfsize <= 0) {
1835 if (m_header != NULL) {
1836 m = m_header;
1837 m_header = NULL;
1838 SOCKBUF_LOCK(&so->so_snd);
1839 goto retry_space;
1840 } else
1841 break;
1842 }
1843 /*
1844 * Optimize the non-blocking case by looking at the socket space
1845 * before going to the extra work of constituting the sf_buf.
1846 */
1847 SOCKBUF_LOCK(&so->so_snd);
1848 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1849 if (so->so_snd.sb_state & SBS_CANTSENDMORE)
1850 error = EPIPE;
1851 else
1852 error = EAGAIN;
1853 sbunlock(&so->so_snd);
1854 SOCKBUF_UNLOCK(&so->so_snd);
1855 goto done;
1856 }
1857 SOCKBUF_UNLOCK(&so->so_snd);
1858 VM_OBJECT_LOCK(obj);
1859 /*
1860 * Attempt to look up the page.
1861 *
1862 * Allocate if not found
1863 *
1864 * Wait and loop if busy.
1865 */
1866 pg = vm_page_lookup(obj, pindex);
1867
1868 if (pg == NULL) {
1869 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
1870 VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1871 if (pg == NULL) {
1872 VM_OBJECT_UNLOCK(obj);
1873 VM_WAIT;
1874 VM_OBJECT_LOCK(obj);
1875 goto retry_lookup;
1876 }
1877 vm_page_lock_queues();
1878 } else {
1879 vm_page_lock_queues();
1880 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1881 goto retry_lookup;
1882 /*
1883 * Wire the page so it does not get ripped out from
1884 * under us.
1885 */
1886 vm_page_wire(pg);
1887 }
1888
1889 /*
1890 * If page is not valid for what we need, initiate I/O
1891 */
1892
1893 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1894 VM_OBJECT_UNLOCK(obj);
1895 } else if (uap->flags & SF_NODISKIO) {
1896 error = EBUSY;
1897 } else {
1898 int bsize, resid;
1899
1900 /*
1901 * Ensure that our page is still around when the I/O
1902 * completes.
1903 */
1904 vm_page_io_start(pg);
1905 vm_page_unlock_queues();
1906 VM_OBJECT_UNLOCK(obj);
1907
1908 /*
1909 * Get the page from backing store.
1910 */
1911 bsize = vp->v_mount->mnt_stat.f_iosize;
1912 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1913 /*
1914 * XXXMAC: Because we don't have fp->f_cred here,
1915 * we pass in NOCRED. This is probably wrong, but
1916 * is consistent with our original implementation.
1917 */
1918 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1919 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1920 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
1921 td->td_ucred, NOCRED, &resid, td);
1922 VOP_UNLOCK(vp, 0, td);
1923 VM_OBJECT_LOCK(obj);
1924 vm_page_lock_queues();
1925 vm_page_io_finish(pg);
1926 if (!error)
1927 VM_OBJECT_UNLOCK(obj);
1928 mbstat.sf_iocnt++;
1929 }
1930
1931 if (error) {
1932 vm_page_unwire(pg, 0);
1933 /*
1934 * See if anyone else might know about this page.
1935 * If not and it is not valid, then free it.
1936 */
1937 if (pg->wire_count == 0 && pg->valid == 0 &&
1938 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1939 pg->hold_count == 0) {
1940 vm_page_free(pg);
1941 }
1942 vm_page_unlock_queues();
1943 VM_OBJECT_UNLOCK(obj);
1944 SOCKBUF_LOCK(&so->so_snd);
1945 sbunlock(&so->so_snd);
1946 SOCKBUF_UNLOCK(&so->so_snd);
1947 goto done;
1948 }
1949 vm_page_unlock_queues();
1950
1951 /*
1952 * Get a sendfile buf. We usually wait as long as necessary,
1953 * but this wait can be interrupted.
1954 */
1955 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
1956 mbstat.sf_allocfail++;
1957 vm_page_lock_queues();
1958 vm_page_unwire(pg, 0);
1959 if (pg->wire_count == 0 && pg->object == NULL)
1960 vm_page_free(pg);
1961 vm_page_unlock_queues();
1962 SOCKBUF_LOCK(&so->so_snd);
1963 sbunlock(&so->so_snd);
1964 SOCKBUF_UNLOCK(&so->so_snd);
1965 error = EINTR;
1966 goto done;
1967 }
1968
1969 /*
1970 * Get an mbuf header and set it up as having external storage.
1971 */
1972 if (m_header)
1973 MGET(m, M_TRYWAIT, MT_DATA);
1974 else
1975 MGETHDR(m, M_TRYWAIT, MT_DATA);
1976 if (m == NULL) {
1977 error = ENOBUFS;
1978 sf_buf_mext((void *)sf_buf_kva(sf), sf);
1979 SOCKBUF_LOCK(&so->so_snd);
1980 sbunlock(&so->so_snd);
1981 SOCKBUF_UNLOCK(&so->so_snd);
1982 goto done;
1983 }
1984 /*
1985 * Setup external storage for mbuf.
1986 */
1987 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
1988 EXT_SFBUF);
1989 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
1990 m->m_pkthdr.len = m->m_len = xfsize;
1991
1992 if (m_header) {
1993 m_cat(m_header, m);
1994 m = m_header;
1995 m_header = NULL;
1996 m_fixhdr(m);
1997 }
1998
1999 /*
2000 * Add the buffer to the socket buffer chain.
2001 */
2002 SOCKBUF_LOCK(&so->so_snd);
2003 retry_space:
2004 /*
2005 * Make sure that the socket is still able to take more data.
2006 * CANTSENDMORE being true usually means that the connection
2007 * was closed. so_error is true when an error was sensed after
2008 * a previous send.
2009 * The state is checked after the page mapping and buffer
2010 * allocation above since those operations may block and make
2011 * any socket checks stale. From this point forward, nothing
2012 * blocks before the pru_send (or more accurately, any blocking
2013 * results in a loop back to here to re-check).
2014 */
2015 SOCKBUF_LOCK_ASSERT(&so->so_snd);
2016 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
2017 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2018 error = EPIPE;
2019 } else {
2020 error = so->so_error;
2021 so->so_error = 0;
2022 }
2023 m_freem(m);
2024 sbunlock(&so->so_snd);
2025 SOCKBUF_UNLOCK(&so->so_snd);
2026 goto done;
2027 }
2028 /*
2029 * Wait for socket space to become available. We do this just
2030 * after checking the connection state above in order to avoid
2031 * a race condition with sbwait().
2032 */
2033 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2034 if (so->so_state & SS_NBIO) {
2035 m_freem(m);
2036 sbunlock(&so->so_snd);
2037 SOCKBUF_UNLOCK(&so->so_snd);
2038 error = EAGAIN;
2039 goto done;
2040 }
2041 error = sbwait(&so->so_snd);
2042 /*
2043 * An error from sbwait usually indicates that we've
2044 * been interrupted by a signal. If we've sent anything
2045 * then return bytes sent, otherwise return the error.
2046 */
2047 if (error) {
2048 m_freem(m);
2049 sbunlock(&so->so_snd);
2050 SOCKBUF_UNLOCK(&so->so_snd);
2051 goto done;
2052 }
2053 goto retry_space;
2054 }
2055 SOCKBUF_UNLOCK(&so->so_snd);
2056 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2057 if (error) {
2058 SOCKBUF_LOCK(&so->so_snd);
2059 sbunlock(&so->so_snd);
2060 SOCKBUF_UNLOCK(&so->so_snd);
2061 goto done;
2062 }
2063 headersent = 1;
2064 }
2065 SOCKBUF_LOCK(&so->so_snd);
2066 sbunlock(&so->so_snd);
2067 SOCKBUF_UNLOCK(&so->so_snd);
2068
2069 /*
2070 * Send trailers. Wimp out and use writev(2).
2071 */
2072 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2073 nuap.fd = uap->s;
2074 nuap.iovp = hdtr.trailers;
2075 nuap.iovcnt = hdtr.trl_cnt;
2076 error = writev(td, &nuap);
2077 if (error)
2078 goto done;
2079 if (compat)
2080 sbytes += td->td_retval[0];
2081 else
2082 hdtr_size += td->td_retval[0];
2083 }
2084
2085 done:
2086 if (headersent) {
2087 if (!compat)
2088 hdtr_size += headersize;
2089 } else {
2090 if (compat)
2091 sbytes -= headersize;
2092 }
2093 /*
2094 * If there was no error we have to clear td->td_retval[0]
2095 * because it may have been set by writev.
2096 */
2097 if (error == 0) {
2098 td->td_retval[0] = 0;
2099 }
2100 if (uap->sbytes != NULL) {
2101 if (!compat)
2102 sbytes += hdtr_size;
2103 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2104 }
2105 if (vp)
2106 vrele(vp);
2107 if (so)
2108 fputsock(so);
2109 if (hdr_uio != NULL)
2110 free(hdr_uio, M_IOV);
2111 if (m_header)
2112 m_freem(m_header);
2113
2114 mtx_unlock(&Giant);
2115
2116 if (error == ERESTART)
2117 error = EINTR;
2118
2119 return (error);
2120 }
Cache object: 77b7950c83995942540d7ad7fc30a630
|