1 /*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37
38 #include "opt_compat.h"
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mac.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77
78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80
81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 int compat);
85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 int compat);
87
88 /*
89 * NSFBUFS-related variables and associated sysctls
90 */
91 int nsfbufs;
92 int nsfbufspeak;
93 int nsfbufsused;
94
95 SYSCTL_DECL(_kern_ipc);
96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
97 "Maximum number of sendfile(2) sf_bufs available");
98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
99 "Number of sendfile(2) sf_bufs at peak usage");
100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
101 "Number of sendfile(2) sf_bufs in use");
102
103 /*
104 * Convert a user file descriptor to a kernel file entry. A reference on the
105 * file entry is held upon returning. This is lighter weight than
106 * fgetsock(), which bumps the socket reference drops the file reference
107 * count instead, as this approach avoids several additional mutex operations
108 * associated with the additional reference count.
109 */
110 static int
111 getsock(struct filedesc *fdp, int fd, struct file **fpp)
112 {
113 struct file *fp;
114 int error;
115
116 fp = NULL;
117 if (fdp == NULL)
118 error = EBADF;
119 else {
120 FILEDESC_LOCK_FAST(fdp);
121 fp = fget_locked(fdp, fd);
122 if (fp == NULL)
123 error = EBADF;
124 else if (fp->f_type != DTYPE_SOCKET) {
125 fp = NULL;
126 error = ENOTSOCK;
127 } else {
128 fhold(fp);
129 error = 0;
130 }
131 FILEDESC_UNLOCK_FAST(fdp);
132 }
133 *fpp = fp;
134 return (error);
135 }
136
137 /*
138 * System call interface to the socket abstraction.
139 */
140 #if defined(COMPAT_43)
141 #define COMPAT_OLDSOCK
142 #endif
143
144 /*
145 * MPSAFE
146 */
147 int
148 socket(td, uap)
149 struct thread *td;
150 register struct socket_args /* {
151 int domain;
152 int type;
153 int protocol;
154 } */ *uap;
155 {
156 struct filedesc *fdp;
157 struct socket *so;
158 struct file *fp;
159 int fd, error;
160
161 fdp = td->td_proc->p_fd;
162 error = falloc(td, &fp, &fd);
163 if (error)
164 return (error);
165 /* An extra reference on `fp' has been held for us by falloc(). */
166 NET_LOCK_GIANT();
167 error = socreate(uap->domain, &so, uap->type, uap->protocol,
168 td->td_ucred, td);
169 NET_UNLOCK_GIANT();
170 if (error) {
171 fdclose(fdp, fp, fd, td);
172 } else {
173 FILEDESC_LOCK_FAST(fdp);
174 fp->f_data = so; /* already has ref count */
175 fp->f_flag = FREAD|FWRITE;
176 fp->f_ops = &socketops;
177 fp->f_type = DTYPE_SOCKET;
178 FILEDESC_UNLOCK_FAST(fdp);
179 td->td_retval[0] = fd;
180 }
181 fdrop(fp, td);
182 return (error);
183 }
184
185 /*
186 * MPSAFE
187 */
188 /* ARGSUSED */
189 int
190 bind(td, uap)
191 struct thread *td;
192 register struct bind_args /* {
193 int s;
194 caddr_t name;
195 int namelen;
196 } */ *uap;
197 {
198 struct sockaddr *sa;
199 int error;
200
201 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
202 return (error);
203
204 return (kern_bind(td, uap->s, sa));
205 }
206
207 int
208 kern_bind(td, fd, sa)
209 struct thread *td;
210 int fd;
211 struct sockaddr *sa;
212 {
213 struct socket *so;
214 struct file *fp;
215 int error;
216
217 NET_LOCK_GIANT();
218 error = getsock(td->td_proc->p_fd, fd, &fp);
219 if (error)
220 goto done2;
221 so = fp->f_data;
222 #ifdef MAC
223 SOCK_LOCK(so);
224 error = mac_check_socket_bind(td->td_ucred, so, sa);
225 SOCK_UNLOCK(so);
226 if (error)
227 goto done1;
228 #endif
229 error = sobind(so, sa, td);
230 #ifdef MAC
231 done1:
232 #endif
233 fdrop(fp, td);
234 done2:
235 NET_UNLOCK_GIANT();
236 FREE(sa, M_SONAME);
237 return (error);
238 }
239
240 /*
241 * MPSAFE
242 */
243 /* ARGSUSED */
244 int
245 listen(td, uap)
246 struct thread *td;
247 register struct listen_args /* {
248 int s;
249 int backlog;
250 } */ *uap;
251 {
252 struct socket *so;
253 struct file *fp;
254 int error;
255
256 NET_LOCK_GIANT();
257 error = getsock(td->td_proc->p_fd, uap->s, &fp);
258 if (error == 0) {
259 so = fp->f_data;
260 #ifdef MAC
261 SOCK_LOCK(so);
262 error = mac_check_socket_listen(td->td_ucred, so);
263 SOCK_UNLOCK(so);
264 if (error)
265 goto done;
266 #endif
267 error = solisten(so, uap->backlog, td);
268 #ifdef MAC
269 done:
270 #endif
271 fdrop(fp, td);
272 }
273 NET_UNLOCK_GIANT();
274 return(error);
275 }
276
277 /*
278 * accept1()
279 * MPSAFE
280 */
281 static int
282 accept1(td, uap, compat)
283 struct thread *td;
284 register struct accept_args /* {
285 int s;
286 struct sockaddr * __restrict name;
287 socklen_t * __restrict anamelen;
288 } */ *uap;
289 int compat;
290 {
291 struct filedesc *fdp;
292 struct file *nfp = NULL;
293 struct sockaddr *sa = NULL;
294 socklen_t namelen;
295 int error;
296 struct socket *head, *so;
297 int fd;
298 u_int fflag;
299 pid_t pgid;
300 int tmp;
301
302 fdp = td->td_proc->p_fd;
303 if (uap->name) {
304 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
305 if(error)
306 return (error);
307 if (namelen < 0)
308 return (EINVAL);
309 }
310 NET_LOCK_GIANT();
311 error = fgetsock(td, uap->s, &head, &fflag);
312 if (error)
313 goto done2;
314 if ((head->so_options & SO_ACCEPTCONN) == 0) {
315 error = EINVAL;
316 goto done;
317 }
318 error = falloc(td, &nfp, &fd);
319 if (error)
320 goto done;
321 ACCEPT_LOCK();
322 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
323 ACCEPT_UNLOCK();
324 error = EWOULDBLOCK;
325 goto noconnection;
326 }
327 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
328 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
329 head->so_error = ECONNABORTED;
330 break;
331 }
332 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
333 "accept", 0);
334 if (error) {
335 ACCEPT_UNLOCK();
336 goto noconnection;
337 }
338 }
339 if (head->so_error) {
340 error = head->so_error;
341 head->so_error = 0;
342 ACCEPT_UNLOCK();
343 goto noconnection;
344 }
345 so = TAILQ_FIRST(&head->so_comp);
346 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
347 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
348
349 /*
350 * Before changing the flags on the socket, we have to bump the
351 * reference count. Otherwise, if the protocol calls sofree(),
352 * the socket will be released due to a zero refcount.
353 */
354 SOCK_LOCK(so); /* soref() and so_state update */
355 soref(so); /* file descriptor reference */
356
357 TAILQ_REMOVE(&head->so_comp, so, so_list);
358 head->so_qlen--;
359 so->so_state |= (head->so_state & SS_NBIO);
360 so->so_qstate &= ~SQ_COMP;
361 so->so_head = NULL;
362
363 SOCK_UNLOCK(so);
364 ACCEPT_UNLOCK();
365
366 /* An extra reference on `nfp' has been held for us by falloc(). */
367 td->td_retval[0] = fd;
368
369 /* connection has been removed from the listen queue */
370 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
371
372 pgid = fgetown(&head->so_sigio);
373 if (pgid != 0)
374 fsetown(pgid, &so->so_sigio);
375
376 FILE_LOCK(nfp);
377 nfp->f_data = so; /* nfp has ref count from falloc */
378 nfp->f_flag = fflag;
379 nfp->f_ops = &socketops;
380 nfp->f_type = DTYPE_SOCKET;
381 FILE_UNLOCK(nfp);
382 /* Sync socket nonblocking/async state with file flags */
383 tmp = fflag & FNONBLOCK;
384 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
385 tmp = fflag & FASYNC;
386 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
387 sa = 0;
388 error = soaccept(so, &sa);
389 if (error) {
390 /*
391 * return a namelen of zero for older code which might
392 * ignore the return value from accept.
393 */
394 if (uap->name != NULL) {
395 namelen = 0;
396 (void) copyout(&namelen,
397 uap->anamelen, sizeof(*uap->anamelen));
398 }
399 goto noconnection;
400 }
401 if (sa == NULL) {
402 namelen = 0;
403 if (uap->name)
404 goto gotnoname;
405 error = 0;
406 goto done;
407 }
408 if (uap->name) {
409 /* check sa_len before it is destroyed */
410 if (namelen > sa->sa_len)
411 namelen = sa->sa_len;
412 #ifdef COMPAT_OLDSOCK
413 if (compat)
414 ((struct osockaddr *)sa)->sa_family =
415 sa->sa_family;
416 #endif
417 error = copyout(sa, uap->name, (u_int)namelen);
418 if (!error)
419 gotnoname:
420 error = copyout(&namelen,
421 uap->anamelen, sizeof (*uap->anamelen));
422 }
423 noconnection:
424 if (sa)
425 FREE(sa, M_SONAME);
426
427 /*
428 * close the new descriptor, assuming someone hasn't ripped it
429 * out from under us.
430 */
431 if (error)
432 fdclose(fdp, nfp, fd, td);
433
434 /*
435 * Release explicitly held references before returning.
436 */
437 done:
438 if (nfp != NULL)
439 fdrop(nfp, td);
440 fputsock(head);
441 done2:
442 NET_UNLOCK_GIANT();
443 return (error);
444 }
445
446 /*
447 * MPSAFE (accept1() is MPSAFE)
448 */
449 int
450 accept(td, uap)
451 struct thread *td;
452 struct accept_args *uap;
453 {
454
455 return (accept1(td, uap, 0));
456 }
457
458 #ifdef COMPAT_OLDSOCK
459 /*
460 * MPSAFE (accept1() is MPSAFE)
461 */
462 int
463 oaccept(td, uap)
464 struct thread *td;
465 struct accept_args *uap;
466 {
467
468 return (accept1(td, uap, 1));
469 }
470 #endif /* COMPAT_OLDSOCK */
471
472 /*
473 * MPSAFE
474 */
475 /* ARGSUSED */
476 int
477 connect(td, uap)
478 struct thread *td;
479 register struct connect_args /* {
480 int s;
481 caddr_t name;
482 int namelen;
483 } */ *uap;
484 {
485 struct sockaddr *sa;
486 int error;
487
488 error = getsockaddr(&sa, uap->name, uap->namelen);
489 if (error)
490 return (error);
491
492 return (kern_connect(td, uap->s, sa));
493 }
494
495
496 int
497 kern_connect(td, fd, sa)
498 struct thread *td;
499 int fd;
500 struct sockaddr *sa;
501 {
502 struct socket *so;
503 struct file *fp;
504 int error;
505 int interrupted = 0;
506
507 NET_LOCK_GIANT();
508 error = getsock(td->td_proc->p_fd, fd, &fp);
509 if (error)
510 goto done2;
511 so = fp->f_data;
512 if (so->so_state & SS_ISCONNECTING) {
513 error = EALREADY;
514 goto done1;
515 }
516 #ifdef MAC
517 SOCK_LOCK(so);
518 error = mac_check_socket_connect(td->td_ucred, so, sa);
519 SOCK_UNLOCK(so);
520 if (error)
521 goto bad;
522 #endif
523 error = soconnect(so, sa, td);
524 if (error)
525 goto bad;
526 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
527 error = EINPROGRESS;
528 goto done1;
529 }
530 SOCK_LOCK(so);
531 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
532 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
533 "connec", 0);
534 if (error) {
535 if (error == EINTR || error == ERESTART)
536 interrupted = 1;
537 break;
538 }
539 }
540 if (error == 0) {
541 error = so->so_error;
542 so->so_error = 0;
543 }
544 SOCK_UNLOCK(so);
545 bad:
546 if (!interrupted)
547 so->so_state &= ~SS_ISCONNECTING;
548 if (error == ERESTART)
549 error = EINTR;
550 done1:
551 fdrop(fp, td);
552 done2:
553 NET_UNLOCK_GIANT();
554 FREE(sa, M_SONAME);
555 return (error);
556 }
557
558 /*
559 * MPSAFE
560 */
561 int
562 socketpair(td, uap)
563 struct thread *td;
564 register struct socketpair_args /* {
565 int domain;
566 int type;
567 int protocol;
568 int *rsv;
569 } */ *uap;
570 {
571 register struct filedesc *fdp = td->td_proc->p_fd;
572 struct file *fp1, *fp2;
573 struct socket *so1, *so2;
574 int fd, error, sv[2];
575
576 NET_LOCK_GIANT();
577 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
578 td->td_ucred, td);
579 if (error)
580 goto done2;
581 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
582 td->td_ucred, td);
583 if (error)
584 goto free1;
585 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
586 error = falloc(td, &fp1, &fd);
587 if (error)
588 goto free2;
589 sv[0] = fd;
590 fp1->f_data = so1; /* so1 already has ref count */
591 error = falloc(td, &fp2, &fd);
592 if (error)
593 goto free3;
594 fp2->f_data = so2; /* so2 already has ref count */
595 sv[1] = fd;
596 error = soconnect2(so1, so2);
597 if (error)
598 goto free4;
599 if (uap->type == SOCK_DGRAM) {
600 /*
601 * Datagram socket connection is asymmetric.
602 */
603 error = soconnect2(so2, so1);
604 if (error)
605 goto free4;
606 }
607 FILE_LOCK(fp1);
608 fp1->f_flag = FREAD|FWRITE;
609 fp1->f_ops = &socketops;
610 fp1->f_type = DTYPE_SOCKET;
611 FILE_UNLOCK(fp1);
612 FILE_LOCK(fp2);
613 fp2->f_flag = FREAD|FWRITE;
614 fp2->f_ops = &socketops;
615 fp2->f_type = DTYPE_SOCKET;
616 FILE_UNLOCK(fp2);
617 error = copyout(sv, uap->rsv, 2 * sizeof (int));
618 fdrop(fp1, td);
619 fdrop(fp2, td);
620 goto done2;
621 free4:
622 fdclose(fdp, fp2, sv[1], td);
623 fdrop(fp2, td);
624 free3:
625 fdclose(fdp, fp1, sv[0], td);
626 fdrop(fp1, td);
627 free2:
628 (void)soclose(so2);
629 free1:
630 (void)soclose(so1);
631 done2:
632 NET_UNLOCK_GIANT();
633 return (error);
634 }
635
636 static int
637 sendit(td, s, mp, flags)
638 register struct thread *td;
639 int s;
640 register struct msghdr *mp;
641 int flags;
642 {
643 struct mbuf *control;
644 struct sockaddr *to;
645 int error;
646
647 if (mp->msg_name != NULL) {
648 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
649 if (error) {
650 to = NULL;
651 goto bad;
652 }
653 mp->msg_name = to;
654 } else {
655 to = NULL;
656 }
657
658 if (mp->msg_control) {
659 if (mp->msg_controllen < sizeof(struct cmsghdr)
660 #ifdef COMPAT_OLDSOCK
661 && mp->msg_flags != MSG_COMPAT
662 #endif
663 ) {
664 error = EINVAL;
665 goto bad;
666 }
667 error = sockargs(&control, mp->msg_control,
668 mp->msg_controllen, MT_CONTROL);
669 if (error)
670 goto bad;
671 #ifdef COMPAT_OLDSOCK
672 if (mp->msg_flags == MSG_COMPAT) {
673 register struct cmsghdr *cm;
674
675 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
676 if (control == 0) {
677 error = ENOBUFS;
678 goto bad;
679 } else {
680 cm = mtod(control, struct cmsghdr *);
681 cm->cmsg_len = control->m_len;
682 cm->cmsg_level = SOL_SOCKET;
683 cm->cmsg_type = SCM_RIGHTS;
684 }
685 }
686 #endif
687 } else {
688 control = NULL;
689 }
690
691 error = kern_sendit(td, s, mp, flags, control);
692
693 bad:
694 if (to)
695 FREE(to, M_SONAME);
696 return (error);
697 }
698
699 int
700 kern_sendit(td, s, mp, flags, control)
701 struct thread *td;
702 int s;
703 struct msghdr *mp;
704 int flags;
705 struct mbuf *control;
706 {
707 struct file *fp;
708 struct uio auio;
709 struct iovec *iov;
710 struct socket *so;
711 int i;
712 int len, error;
713 #ifdef KTRACE
714 struct uio *ktruio = NULL;
715 #endif
716
717 NET_LOCK_GIANT();
718 error = getsock(td->td_proc->p_fd, s, &fp);
719 if (error)
720 goto bad2;
721 so = (struct socket *)fp->f_data;
722
723 #ifdef MAC
724 SOCK_LOCK(so);
725 error = mac_check_socket_send(td->td_ucred, so);
726 SOCK_UNLOCK(so);
727 if (error)
728 goto bad;
729 #endif
730
731 auio.uio_iov = mp->msg_iov;
732 auio.uio_iovcnt = mp->msg_iovlen;
733 auio.uio_segflg = UIO_USERSPACE;
734 auio.uio_rw = UIO_WRITE;
735 auio.uio_td = td;
736 auio.uio_offset = 0; /* XXX */
737 auio.uio_resid = 0;
738 iov = mp->msg_iov;
739 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
740 if ((auio.uio_resid += iov->iov_len) < 0) {
741 error = EINVAL;
742 goto bad;
743 }
744 }
745 #ifdef KTRACE
746 if (KTRPOINT(td, KTR_GENIO))
747 ktruio = cloneuio(&auio);
748 #endif
749 len = auio.uio_resid;
750 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
751 0, control, flags, td);
752 if (error) {
753 if (auio.uio_resid != len && (error == ERESTART ||
754 error == EINTR || error == EWOULDBLOCK))
755 error = 0;
756 /* Generation of SIGPIPE can be controlled per socket */
757 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
758 PROC_LOCK(td->td_proc);
759 psignal(td->td_proc, SIGPIPE);
760 PROC_UNLOCK(td->td_proc);
761 }
762 }
763 if (error == 0)
764 td->td_retval[0] = len - auio.uio_resid;
765 #ifdef KTRACE
766 if (ktruio != NULL) {
767 ktruio->uio_resid = td->td_retval[0];
768 ktrgenio(s, UIO_WRITE, ktruio, error);
769 }
770 #endif
771 bad:
772 fdrop(fp, td);
773 bad2:
774 NET_UNLOCK_GIANT();
775 return (error);
776 }
777
778 /*
779 * MPSAFE
780 */
781 int
782 sendto(td, uap)
783 struct thread *td;
784 register struct sendto_args /* {
785 int s;
786 caddr_t buf;
787 size_t len;
788 int flags;
789 caddr_t to;
790 int tolen;
791 } */ *uap;
792 {
793 struct msghdr msg;
794 struct iovec aiov;
795 int error;
796
797 msg.msg_name = uap->to;
798 msg.msg_namelen = uap->tolen;
799 msg.msg_iov = &aiov;
800 msg.msg_iovlen = 1;
801 msg.msg_control = 0;
802 #ifdef COMPAT_OLDSOCK
803 msg.msg_flags = 0;
804 #endif
805 aiov.iov_base = uap->buf;
806 aiov.iov_len = uap->len;
807 error = sendit(td, uap->s, &msg, uap->flags);
808 return (error);
809 }
810
811 #ifdef COMPAT_OLDSOCK
812 /*
813 * MPSAFE
814 */
815 int
816 osend(td, uap)
817 struct thread *td;
818 register struct osend_args /* {
819 int s;
820 caddr_t buf;
821 int len;
822 int flags;
823 } */ *uap;
824 {
825 struct msghdr msg;
826 struct iovec aiov;
827 int error;
828
829 msg.msg_name = 0;
830 msg.msg_namelen = 0;
831 msg.msg_iov = &aiov;
832 msg.msg_iovlen = 1;
833 aiov.iov_base = uap->buf;
834 aiov.iov_len = uap->len;
835 msg.msg_control = 0;
836 msg.msg_flags = 0;
837 error = sendit(td, uap->s, &msg, uap->flags);
838 return (error);
839 }
840
841 /*
842 * MPSAFE
843 */
844 int
845 osendmsg(td, uap)
846 struct thread *td;
847 struct osendmsg_args /* {
848 int s;
849 caddr_t msg;
850 int flags;
851 } */ *uap;
852 {
853 struct msghdr msg;
854 struct iovec *iov;
855 int error;
856
857 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
858 if (error)
859 return (error);
860 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
861 if (error)
862 return (error);
863 msg.msg_iov = iov;
864 msg.msg_flags = MSG_COMPAT;
865 error = sendit(td, uap->s, &msg, uap->flags);
866 free(iov, M_IOV);
867 return (error);
868 }
869 #endif
870
871 /*
872 * MPSAFE
873 */
874 int
875 sendmsg(td, uap)
876 struct thread *td;
877 struct sendmsg_args /* {
878 int s;
879 caddr_t msg;
880 int flags;
881 } */ *uap;
882 {
883 struct msghdr msg;
884 struct iovec *iov;
885 int error;
886
887 error = copyin(uap->msg, &msg, sizeof (msg));
888 if (error)
889 return (error);
890 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
891 if (error)
892 return (error);
893 msg.msg_iov = iov;
894 #ifdef COMPAT_OLDSOCK
895 msg.msg_flags = 0;
896 #endif
897 error = sendit(td, uap->s, &msg, uap->flags);
898 free(iov, M_IOV);
899 return (error);
900 }
901
902 static int
903 recvit(td, s, mp, namelenp)
904 struct thread *td;
905 int s;
906 struct msghdr *mp;
907 void *namelenp;
908 {
909 struct uio auio;
910 struct iovec *iov;
911 int i;
912 socklen_t len;
913 int error;
914 struct mbuf *m, *control = 0;
915 caddr_t ctlbuf;
916 struct file *fp;
917 struct socket *so;
918 struct sockaddr *fromsa = 0;
919 #ifdef KTRACE
920 struct uio *ktruio = NULL;
921 #endif
922
923 NET_LOCK_GIANT();
924 error = getsock(td->td_proc->p_fd, s, &fp);
925 if (error) {
926 NET_UNLOCK_GIANT();
927 return (error);
928 }
929 so = fp->f_data;
930
931 #ifdef MAC
932 SOCK_LOCK(so);
933 error = mac_check_socket_receive(td->td_ucred, so);
934 SOCK_UNLOCK(so);
935 if (error) {
936 fdrop(fp, td);
937 NET_UNLOCK_GIANT();
938 return (error);
939 }
940 #endif
941
942 auio.uio_iov = mp->msg_iov;
943 auio.uio_iovcnt = mp->msg_iovlen;
944 auio.uio_segflg = UIO_USERSPACE;
945 auio.uio_rw = UIO_READ;
946 auio.uio_td = td;
947 auio.uio_offset = 0; /* XXX */
948 auio.uio_resid = 0;
949 iov = mp->msg_iov;
950 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
951 if ((auio.uio_resid += iov->iov_len) < 0) {
952 fdrop(fp, td);
953 NET_UNLOCK_GIANT();
954 return (EINVAL);
955 }
956 }
957 #ifdef KTRACE
958 if (KTRPOINT(td, KTR_GENIO))
959 ktruio = cloneuio(&auio);
960 #endif
961 len = auio.uio_resid;
962 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
963 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
964 &mp->msg_flags);
965 if (error) {
966 if (auio.uio_resid != (int)len && (error == ERESTART ||
967 error == EINTR || error == EWOULDBLOCK))
968 error = 0;
969 }
970 #ifdef KTRACE
971 if (ktruio != NULL) {
972 ktruio->uio_resid = (int)len - auio.uio_resid;
973 ktrgenio(s, UIO_READ, ktruio, error);
974 }
975 #endif
976 if (error)
977 goto out;
978 td->td_retval[0] = (int)len - auio.uio_resid;
979 if (mp->msg_name) {
980 len = mp->msg_namelen;
981 if (len <= 0 || fromsa == 0)
982 len = 0;
983 else {
984 /* save sa_len before it is destroyed by MSG_COMPAT */
985 len = MIN(len, fromsa->sa_len);
986 #ifdef COMPAT_OLDSOCK
987 if (mp->msg_flags & MSG_COMPAT)
988 ((struct osockaddr *)fromsa)->sa_family =
989 fromsa->sa_family;
990 #endif
991 error = copyout(fromsa, mp->msg_name, (unsigned)len);
992 if (error)
993 goto out;
994 }
995 mp->msg_namelen = len;
996 if (namelenp &&
997 (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
998 #ifdef COMPAT_OLDSOCK
999 if (mp->msg_flags & MSG_COMPAT)
1000 error = 0; /* old recvfrom didn't check */
1001 else
1002 #endif
1003 goto out;
1004 }
1005 }
1006 if (mp->msg_control) {
1007 #ifdef COMPAT_OLDSOCK
1008 /*
1009 * We assume that old recvmsg calls won't receive access
1010 * rights and other control info, esp. as control info
1011 * is always optional and those options didn't exist in 4.3.
1012 * If we receive rights, trim the cmsghdr; anything else
1013 * is tossed.
1014 */
1015 if (control && mp->msg_flags & MSG_COMPAT) {
1016 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1017 SOL_SOCKET ||
1018 mtod(control, struct cmsghdr *)->cmsg_type !=
1019 SCM_RIGHTS) {
1020 mp->msg_controllen = 0;
1021 goto out;
1022 }
1023 control->m_len -= sizeof (struct cmsghdr);
1024 control->m_data += sizeof (struct cmsghdr);
1025 }
1026 #endif
1027 len = mp->msg_controllen;
1028 m = control;
1029 mp->msg_controllen = 0;
1030 ctlbuf = mp->msg_control;
1031
1032 while (m && len > 0) {
1033 unsigned int tocopy;
1034
1035 if (len >= m->m_len)
1036 tocopy = m->m_len;
1037 else {
1038 mp->msg_flags |= MSG_CTRUNC;
1039 tocopy = len;
1040 }
1041
1042 if ((error = copyout(mtod(m, caddr_t),
1043 ctlbuf, tocopy)) != 0)
1044 goto out;
1045
1046 ctlbuf += tocopy;
1047 len -= tocopy;
1048 m = m->m_next;
1049 }
1050 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1051 }
1052 out:
1053 fdrop(fp, td);
1054 NET_UNLOCK_GIANT();
1055 if (fromsa)
1056 FREE(fromsa, M_SONAME);
1057 if (control)
1058 m_freem(control);
1059 return (error);
1060 }
1061
1062 /*
1063 * MPSAFE
1064 */
1065 int
1066 recvfrom(td, uap)
1067 struct thread *td;
1068 register struct recvfrom_args /* {
1069 int s;
1070 caddr_t buf;
1071 size_t len;
1072 int flags;
1073 struct sockaddr * __restrict from;
1074 socklen_t * __restrict fromlenaddr;
1075 } */ *uap;
1076 {
1077 struct msghdr msg;
1078 struct iovec aiov;
1079 int error;
1080
1081 if (uap->fromlenaddr) {
1082 error = copyin(uap->fromlenaddr,
1083 &msg.msg_namelen, sizeof (msg.msg_namelen));
1084 if (error)
1085 goto done2;
1086 } else {
1087 msg.msg_namelen = 0;
1088 }
1089 msg.msg_name = uap->from;
1090 msg.msg_iov = &aiov;
1091 msg.msg_iovlen = 1;
1092 aiov.iov_base = uap->buf;
1093 aiov.iov_len = uap->len;
1094 msg.msg_control = 0;
1095 msg.msg_flags = uap->flags;
1096 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1097 done2:
1098 return(error);
1099 }
1100
1101 #ifdef COMPAT_OLDSOCK
1102 /*
1103 * MPSAFE
1104 */
1105 int
1106 orecvfrom(td, uap)
1107 struct thread *td;
1108 struct recvfrom_args *uap;
1109 {
1110
1111 uap->flags |= MSG_COMPAT;
1112 return (recvfrom(td, uap));
1113 }
1114 #endif
1115
1116
1117 #ifdef COMPAT_OLDSOCK
1118 /*
1119 * MPSAFE
1120 */
1121 int
1122 orecv(td, uap)
1123 struct thread *td;
1124 register struct orecv_args /* {
1125 int s;
1126 caddr_t buf;
1127 int len;
1128 int flags;
1129 } */ *uap;
1130 {
1131 struct msghdr msg;
1132 struct iovec aiov;
1133 int error;
1134
1135 msg.msg_name = 0;
1136 msg.msg_namelen = 0;
1137 msg.msg_iov = &aiov;
1138 msg.msg_iovlen = 1;
1139 aiov.iov_base = uap->buf;
1140 aiov.iov_len = uap->len;
1141 msg.msg_control = 0;
1142 msg.msg_flags = uap->flags;
1143 error = recvit(td, uap->s, &msg, NULL);
1144 return (error);
1145 }
1146
1147 /*
1148 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1149 * overlays the new one, missing only the flags, and with the (old) access
1150 * rights where the control fields are now.
1151 *
1152 * MPSAFE
1153 */
1154 int
1155 orecvmsg(td, uap)
1156 struct thread *td;
1157 struct orecvmsg_args /* {
1158 int s;
1159 struct omsghdr *msg;
1160 int flags;
1161 } */ *uap;
1162 {
1163 struct msghdr msg;
1164 struct iovec *iov;
1165 int error;
1166
1167 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1168 if (error)
1169 return (error);
1170 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1171 if (error)
1172 return (error);
1173 msg.msg_flags = uap->flags | MSG_COMPAT;
1174 msg.msg_iov = iov;
1175 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1176 if (msg.msg_controllen && error == 0)
1177 error = copyout(&msg.msg_controllen,
1178 &uap->msg->msg_accrightslen, sizeof (int));
1179 free(iov, M_IOV);
1180 return (error);
1181 }
1182 #endif
1183
1184 /*
1185 * MPSAFE
1186 */
1187 int
1188 recvmsg(td, uap)
1189 struct thread *td;
1190 struct recvmsg_args /* {
1191 int s;
1192 struct msghdr *msg;
1193 int flags;
1194 } */ *uap;
1195 {
1196 struct msghdr msg;
1197 struct iovec *uiov, *iov;
1198 int error;
1199
1200 error = copyin(uap->msg, &msg, sizeof (msg));
1201 if (error)
1202 return (error);
1203 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1204 if (error)
1205 return (error);
1206 msg.msg_flags = uap->flags;
1207 #ifdef COMPAT_OLDSOCK
1208 msg.msg_flags &= ~MSG_COMPAT;
1209 #endif
1210 uiov = msg.msg_iov;
1211 msg.msg_iov = iov;
1212 error = recvit(td, uap->s, &msg, NULL);
1213 if (error == 0) {
1214 msg.msg_iov = uiov;
1215 error = copyout(&msg, uap->msg, sizeof(msg));
1216 }
1217 free(iov, M_IOV);
1218 return (error);
1219 }
1220
1221 /*
1222 * MPSAFE
1223 */
1224 /* ARGSUSED */
1225 int
1226 shutdown(td, uap)
1227 struct thread *td;
1228 register struct shutdown_args /* {
1229 int s;
1230 int how;
1231 } */ *uap;
1232 {
1233 struct socket *so;
1234 struct file *fp;
1235 int error;
1236
1237 NET_LOCK_GIANT();
1238 error = getsock(td->td_proc->p_fd, uap->s, &fp);
1239 if (error == 0) {
1240 so = fp->f_data;
1241 error = soshutdown(so, uap->how);
1242 fdrop(fp, td);
1243 }
1244 NET_UNLOCK_GIANT();
1245 return (error);
1246 }
1247
1248 /*
1249 * MPSAFE
1250 */
1251 /* ARGSUSED */
1252 int
1253 setsockopt(td, uap)
1254 struct thread *td;
1255 register struct setsockopt_args /* {
1256 int s;
1257 int level;
1258 int name;
1259 caddr_t val;
1260 int valsize;
1261 } */ *uap;
1262 {
1263
1264 return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1265 uap->val, UIO_USERSPACE, uap->valsize));
1266 }
1267
1268 int
1269 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1270 struct thread *td;
1271 int s;
1272 int level;
1273 int name;
1274 void *val;
1275 enum uio_seg valseg;
1276 socklen_t valsize;
1277 {
1278 int error;
1279 struct socket *so;
1280 struct file *fp;
1281 struct sockopt sopt;
1282
1283 if (val == NULL && valsize != 0)
1284 return (EFAULT);
1285 if (valsize < 0)
1286 return (EINVAL);
1287
1288 sopt.sopt_dir = SOPT_SET;
1289 sopt.sopt_level = level;
1290 sopt.sopt_name = name;
1291 sopt.sopt_val = val;
1292 sopt.sopt_valsize = valsize;
1293 switch (valseg) {
1294 case UIO_USERSPACE:
1295 sopt.sopt_td = td;
1296 break;
1297 case UIO_SYSSPACE:
1298 sopt.sopt_td = NULL;
1299 break;
1300 default:
1301 panic("kern_setsockopt called with bad valseg");
1302 }
1303
1304 NET_LOCK_GIANT();
1305 error = getsock(td->td_proc->p_fd, s, &fp);
1306 if (error == 0) {
1307 so = fp->f_data;
1308 error = sosetopt(so, &sopt);
1309 fdrop(fp, td);
1310 }
1311 NET_UNLOCK_GIANT();
1312 return(error);
1313 }
1314
1315 /*
1316 * MPSAFE
1317 */
1318 /* ARGSUSED */
1319 int
1320 getsockopt(td, uap)
1321 struct thread *td;
1322 register struct getsockopt_args /* {
1323 int s;
1324 int level;
1325 int name;
1326 void * __restrict val;
1327 socklen_t * __restrict avalsize;
1328 } */ *uap;
1329 {
1330 socklen_t valsize;
1331 int error;
1332
1333 if (uap->val) {
1334 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1335 if (error)
1336 return (error);
1337 }
1338
1339 error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1340 uap->val, UIO_USERSPACE, &valsize);
1341
1342 if (error == 0)
1343 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1344 return (error);
1345 }
1346
1347 /*
1348 * Kernel version of getsockopt.
1349 * optval can be a userland or userspace. optlen is always a kernel pointer.
1350 */
1351 int
1352 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1353 struct thread *td;
1354 int s;
1355 int level;
1356 int name;
1357 void *val;
1358 enum uio_seg valseg;
1359 socklen_t *valsize;
1360 {
1361 int error;
1362 struct socket *so;
1363 struct file *fp;
1364 struct sockopt sopt;
1365
1366 if (val == NULL)
1367 *valsize = 0;
1368 if (*valsize < 0)
1369 return (EINVAL);
1370
1371 sopt.sopt_dir = SOPT_GET;
1372 sopt.sopt_level = level;
1373 sopt.sopt_name = name;
1374 sopt.sopt_val = val;
1375 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1376 switch (valseg) {
1377 case UIO_USERSPACE:
1378 sopt.sopt_td = td;
1379 break;
1380 case UIO_SYSSPACE:
1381 sopt.sopt_td = NULL;
1382 break;
1383 default:
1384 panic("kern_getsockopt called with bad valseg");
1385 }
1386
1387 NET_LOCK_GIANT();
1388 error = getsock(td->td_proc->p_fd, s, &fp);
1389 if (error == 0) {
1390 so = fp->f_data;
1391 error = sogetopt(so, &sopt);
1392 *valsize = sopt.sopt_valsize;
1393 fdrop(fp, td);
1394 }
1395 NET_UNLOCK_GIANT();
1396 return (error);
1397 }
1398
1399 /*
1400 * getsockname1() - Get socket name.
1401 *
1402 * MPSAFE
1403 */
1404 /* ARGSUSED */
1405 static int
1406 getsockname1(td, uap, compat)
1407 struct thread *td;
1408 register struct getsockname_args /* {
1409 int fdes;
1410 struct sockaddr * __restrict asa;
1411 socklen_t * __restrict alen;
1412 } */ *uap;
1413 int compat;
1414 {
1415 struct socket *so;
1416 struct sockaddr *sa;
1417 struct file *fp;
1418 socklen_t len;
1419 int error;
1420
1421 NET_LOCK_GIANT();
1422 error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1423 if (error)
1424 goto done2;
1425 so = fp->f_data;
1426 error = copyin(uap->alen, &len, sizeof (len));
1427 if (error)
1428 goto done1;
1429 if (len < 0) {
1430 error = EINVAL;
1431 goto done1;
1432 }
1433 sa = 0;
1434 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1435 if (error)
1436 goto bad;
1437 if (sa == 0) {
1438 len = 0;
1439 goto gotnothing;
1440 }
1441
1442 len = MIN(len, sa->sa_len);
1443 #ifdef COMPAT_OLDSOCK
1444 if (compat)
1445 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1446 #endif
1447 error = copyout(sa, uap->asa, (u_int)len);
1448 if (error == 0)
1449 gotnothing:
1450 error = copyout(&len, uap->alen, sizeof (len));
1451 bad:
1452 if (sa)
1453 FREE(sa, M_SONAME);
1454 done1:
1455 fdrop(fp, td);
1456 done2:
1457 NET_UNLOCK_GIANT();
1458 return (error);
1459 }
1460
1461 /*
1462 * MPSAFE
1463 */
1464 int
1465 getsockname(td, uap)
1466 struct thread *td;
1467 struct getsockname_args *uap;
1468 {
1469
1470 return (getsockname1(td, uap, 0));
1471 }
1472
1473 #ifdef COMPAT_OLDSOCK
1474 /*
1475 * MPSAFE
1476 */
1477 int
1478 ogetsockname(td, uap)
1479 struct thread *td;
1480 struct getsockname_args *uap;
1481 {
1482
1483 return (getsockname1(td, uap, 1));
1484 }
1485 #endif /* COMPAT_OLDSOCK */
1486
1487 /*
1488 * getpeername1() - Get name of peer for connected socket.
1489 *
1490 * MPSAFE
1491 */
1492 /* ARGSUSED */
1493 static int
1494 getpeername1(td, uap, compat)
1495 struct thread *td;
1496 register struct getpeername_args /* {
1497 int fdes;
1498 struct sockaddr * __restrict asa;
1499 socklen_t * __restrict alen;
1500 } */ *uap;
1501 int compat;
1502 {
1503 struct socket *so;
1504 struct sockaddr *sa;
1505 struct file *fp;
1506 socklen_t len;
1507 int error;
1508
1509 NET_LOCK_GIANT();
1510 error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1511 if (error)
1512 goto done2;
1513 so = fp->f_data;
1514 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1515 error = ENOTCONN;
1516 goto done1;
1517 }
1518 error = copyin(uap->alen, &len, sizeof (len));
1519 if (error)
1520 goto done1;
1521 if (len < 0) {
1522 error = EINVAL;
1523 goto done1;
1524 }
1525 sa = 0;
1526 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1527 if (error)
1528 goto bad;
1529 if (sa == 0) {
1530 len = 0;
1531 goto gotnothing;
1532 }
1533 len = MIN(len, sa->sa_len);
1534 #ifdef COMPAT_OLDSOCK
1535 if (compat)
1536 ((struct osockaddr *)sa)->sa_family =
1537 sa->sa_family;
1538 #endif
1539 error = copyout(sa, uap->asa, (u_int)len);
1540 if (error)
1541 goto bad;
1542 gotnothing:
1543 error = copyout(&len, uap->alen, sizeof (len));
1544 bad:
1545 if (sa)
1546 FREE(sa, M_SONAME);
1547 done1:
1548 fdrop(fp, td);
1549 done2:
1550 NET_UNLOCK_GIANT();
1551 return (error);
1552 }
1553
1554 /*
1555 * MPSAFE
1556 */
1557 int
1558 getpeername(td, uap)
1559 struct thread *td;
1560 struct getpeername_args *uap;
1561 {
1562
1563 return (getpeername1(td, uap, 0));
1564 }
1565
1566 #ifdef COMPAT_OLDSOCK
1567 /*
1568 * MPSAFE
1569 */
1570 int
1571 ogetpeername(td, uap)
1572 struct thread *td;
1573 struct ogetpeername_args *uap;
1574 {
1575
1576 /* XXX uap should have type `getpeername_args *' to begin with. */
1577 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1578 }
1579 #endif /* COMPAT_OLDSOCK */
1580
1581 int
1582 sockargs(mp, buf, buflen, type)
1583 struct mbuf **mp;
1584 caddr_t buf;
1585 int buflen, type;
1586 {
1587 register struct sockaddr *sa;
1588 register struct mbuf *m;
1589 int error;
1590
1591 if ((u_int)buflen > MLEN) {
1592 #ifdef COMPAT_OLDSOCK
1593 if (type == MT_SONAME && (u_int)buflen <= 112)
1594 buflen = MLEN; /* unix domain compat. hack */
1595 else
1596 #endif
1597 if ((u_int)buflen > MCLBYTES)
1598 return (EINVAL);
1599 }
1600 m = m_get(M_TRYWAIT, type);
1601 if (m == NULL)
1602 return (ENOBUFS);
1603 if ((u_int)buflen > MLEN) {
1604 MCLGET(m, M_TRYWAIT);
1605 if ((m->m_flags & M_EXT) == 0) {
1606 m_free(m);
1607 return (ENOBUFS);
1608 }
1609 }
1610 m->m_len = buflen;
1611 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1612 if (error)
1613 (void) m_free(m);
1614 else {
1615 *mp = m;
1616 if (type == MT_SONAME) {
1617 sa = mtod(m, struct sockaddr *);
1618
1619 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1620 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1621 sa->sa_family = sa->sa_len;
1622 #endif
1623 sa->sa_len = buflen;
1624 }
1625 }
1626 return (error);
1627 }
1628
1629 int
1630 getsockaddr(namp, uaddr, len)
1631 struct sockaddr **namp;
1632 caddr_t uaddr;
1633 size_t len;
1634 {
1635 struct sockaddr *sa;
1636 int error;
1637
1638 if (len > SOCK_MAXADDRLEN)
1639 return (ENAMETOOLONG);
1640 if (len < offsetof(struct sockaddr, sa_data[0]))
1641 return (EINVAL);
1642 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1643 error = copyin(uaddr, sa, len);
1644 if (error) {
1645 FREE(sa, M_SONAME);
1646 } else {
1647 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1648 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1649 sa->sa_family = sa->sa_len;
1650 #endif
1651 sa->sa_len = len;
1652 *namp = sa;
1653 }
1654 return (error);
1655 }
1656
1657 /*
1658 * Detach mapped page and release resources back to the system.
1659 */
1660 void
1661 sf_buf_mext(void *addr, void *args)
1662 {
1663 vm_page_t m;
1664
1665 m = sf_buf_page(args);
1666 sf_buf_free(args);
1667 vm_page_lock_queues();
1668 vm_page_unwire(m, 0);
1669 /*
1670 * Check for the object going away on us. This can
1671 * happen since we don't hold a reference to it.
1672 * If so, we're responsible for freeing the page.
1673 */
1674 if (m->wire_count == 0 && m->object == NULL)
1675 vm_page_free(m);
1676 vm_page_unlock_queues();
1677 }
1678
1679 /*
1680 * sendfile(2)
1681 *
1682 * MPSAFE
1683 *
1684 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1685 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1686 *
1687 * Send a file specified by 'fd' and starting at 'offset' to a socket
1688 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1689 * nbytes == 0. Optionally add a header and/or trailer to the socket
1690 * output. If specified, write the total number of bytes sent into *sbytes.
1691 *
1692 */
1693 int
1694 sendfile(struct thread *td, struct sendfile_args *uap)
1695 {
1696
1697 return (do_sendfile(td, uap, 0));
1698 }
1699
1700 #ifdef COMPAT_FREEBSD4
1701 int
1702 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1703 {
1704 struct sendfile_args args;
1705
1706 args.fd = uap->fd;
1707 args.s = uap->s;
1708 args.offset = uap->offset;
1709 args.nbytes = uap->nbytes;
1710 args.hdtr = uap->hdtr;
1711 args.sbytes = uap->sbytes;
1712 args.flags = uap->flags;
1713
1714 return (do_sendfile(td, &args, 1));
1715 }
1716 #endif /* COMPAT_FREEBSD4 */
1717
1718 static int
1719 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1720 {
1721 struct vnode *vp;
1722 struct vm_object *obj;
1723 struct socket *so = NULL;
1724 struct mbuf *m, *m_header = NULL;
1725 struct sf_buf *sf;
1726 struct vm_page *pg;
1727 struct writev_args nuap;
1728 struct sf_hdtr hdtr;
1729 struct uio *hdr_uio = NULL;
1730 off_t off, xfsize, hdtr_size, sbytes = 0;
1731 int error, headersize = 0, headersent = 0;
1732
1733 mtx_lock(&Giant);
1734
1735 hdtr_size = 0;
1736
1737 /*
1738 * The descriptor must be a regular file and have a backing VM object.
1739 */
1740 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1741 goto done;
1742 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1743 VOP_UNLOCK(vp, 0, td);
1744 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1745 error = EINVAL;
1746 goto done;
1747 }
1748 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1749 goto done;
1750 if (so->so_type != SOCK_STREAM) {
1751 error = EINVAL;
1752 goto done;
1753 }
1754 if ((so->so_state & SS_ISCONNECTED) == 0) {
1755 error = ENOTCONN;
1756 goto done;
1757 }
1758 if (uap->offset < 0) {
1759 error = EINVAL;
1760 goto done;
1761 }
1762
1763 #ifdef MAC
1764 SOCK_LOCK(so);
1765 error = mac_check_socket_send(td->td_ucred, so);
1766 SOCK_UNLOCK(so);
1767 if (error)
1768 goto done;
1769 #endif
1770
1771 /*
1772 * If specified, get the pointer to the sf_hdtr struct for
1773 * any headers/trailers.
1774 */
1775 if (uap->hdtr != NULL) {
1776 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1777 if (error)
1778 goto done;
1779 /*
1780 * Send any headers.
1781 */
1782 if (hdtr.headers != NULL) {
1783 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1784 if (error)
1785 goto done;
1786 hdr_uio->uio_td = td;
1787 hdr_uio->uio_rw = UIO_WRITE;
1788 if (hdr_uio->uio_resid > 0) {
1789 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0);
1790 if (m_header == NULL)
1791 goto done;
1792 headersize = m_header->m_pkthdr.len;
1793 if (compat)
1794 sbytes += headersize;
1795 }
1796 }
1797 }
1798
1799 /*
1800 * Protect against multiple writers to the socket.
1801 */
1802 SOCKBUF_LOCK(&so->so_snd);
1803 (void) sblock(&so->so_snd, M_WAITOK);
1804 SOCKBUF_UNLOCK(&so->so_snd);
1805
1806 /*
1807 * Loop through the pages in the file, starting with the requested
1808 * offset. Get a file page (do I/O if necessary), map the file page
1809 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1810 * it on the socket.
1811 */
1812 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1813 vm_pindex_t pindex;
1814 vm_offset_t pgoff;
1815
1816 pindex = OFF_TO_IDX(off);
1817 VM_OBJECT_LOCK(obj);
1818 retry_lookup:
1819 /*
1820 * Calculate the amount to transfer. Not to exceed a page,
1821 * the EOF, or the passed in nbytes.
1822 */
1823 xfsize = obj->un_pager.vnp.vnp_size - off;
1824 VM_OBJECT_UNLOCK(obj);
1825 if (xfsize > PAGE_SIZE)
1826 xfsize = PAGE_SIZE;
1827 pgoff = (vm_offset_t)(off & PAGE_MASK);
1828 if (PAGE_SIZE - pgoff < xfsize)
1829 xfsize = PAGE_SIZE - pgoff;
1830 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1831 xfsize = uap->nbytes - sbytes;
1832 if (xfsize <= 0) {
1833 if (m_header != NULL) {
1834 m = m_header;
1835 m_header = NULL;
1836 SOCKBUF_LOCK(&so->so_snd);
1837 goto retry_space;
1838 } else
1839 break;
1840 }
1841 /*
1842 * Optimize the non-blocking case by looking at the socket space
1843 * before going to the extra work of constituting the sf_buf.
1844 */
1845 SOCKBUF_LOCK(&so->so_snd);
1846 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1847 if (so->so_snd.sb_state & SBS_CANTSENDMORE)
1848 error = EPIPE;
1849 else
1850 error = EAGAIN;
1851 sbunlock(&so->so_snd);
1852 SOCKBUF_UNLOCK(&so->so_snd);
1853 goto done;
1854 }
1855 SOCKBUF_UNLOCK(&so->so_snd);
1856 VM_OBJECT_LOCK(obj);
1857 /*
1858 * Attempt to look up the page.
1859 *
1860 * Allocate if not found
1861 *
1862 * Wait and loop if busy.
1863 */
1864 pg = vm_page_lookup(obj, pindex);
1865
1866 if (pg == NULL) {
1867 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
1868 VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1869 if (pg == NULL) {
1870 VM_OBJECT_UNLOCK(obj);
1871 VM_WAIT;
1872 VM_OBJECT_LOCK(obj);
1873 goto retry_lookup;
1874 }
1875 vm_page_lock_queues();
1876 } else {
1877 vm_page_lock_queues();
1878 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1879 goto retry_lookup;
1880 /*
1881 * Wire the page so it does not get ripped out from
1882 * under us.
1883 */
1884 vm_page_wire(pg);
1885 }
1886
1887 /*
1888 * If page is not valid for what we need, initiate I/O
1889 */
1890
1891 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1892 VM_OBJECT_UNLOCK(obj);
1893 } else if (uap->flags & SF_NODISKIO) {
1894 error = EBUSY;
1895 } else {
1896 int bsize, resid;
1897
1898 /*
1899 * Ensure that our page is still around when the I/O
1900 * completes.
1901 */
1902 vm_page_io_start(pg);
1903 vm_page_unlock_queues();
1904 VM_OBJECT_UNLOCK(obj);
1905
1906 /*
1907 * Get the page from backing store.
1908 */
1909 bsize = vp->v_mount->mnt_stat.f_iosize;
1910 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1911 /*
1912 * XXXMAC: Because we don't have fp->f_cred here,
1913 * we pass in NOCRED. This is probably wrong, but
1914 * is consistent with our original implementation.
1915 */
1916 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1917 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1918 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
1919 td->td_ucred, NOCRED, &resid, td);
1920 VOP_UNLOCK(vp, 0, td);
1921 VM_OBJECT_LOCK(obj);
1922 vm_page_lock_queues();
1923 vm_page_io_finish(pg);
1924 if (!error)
1925 VM_OBJECT_UNLOCK(obj);
1926 mbstat.sf_iocnt++;
1927 }
1928
1929 if (error) {
1930 vm_page_unwire(pg, 0);
1931 /*
1932 * See if anyone else might know about this page.
1933 * If not and it is not valid, then free it.
1934 */
1935 if (pg->wire_count == 0 && pg->valid == 0 &&
1936 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1937 pg->hold_count == 0) {
1938 vm_page_free(pg);
1939 }
1940 vm_page_unlock_queues();
1941 VM_OBJECT_UNLOCK(obj);
1942 SOCKBUF_LOCK(&so->so_snd);
1943 sbunlock(&so->so_snd);
1944 SOCKBUF_UNLOCK(&so->so_snd);
1945 goto done;
1946 }
1947 vm_page_unlock_queues();
1948
1949 /*
1950 * Get a sendfile buf. We usually wait as long as necessary,
1951 * but this wait can be interrupted.
1952 */
1953 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
1954 mbstat.sf_allocfail++;
1955 vm_page_lock_queues();
1956 vm_page_unwire(pg, 0);
1957 if (pg->wire_count == 0 && pg->object == NULL)
1958 vm_page_free(pg);
1959 vm_page_unlock_queues();
1960 SOCKBUF_LOCK(&so->so_snd);
1961 sbunlock(&so->so_snd);
1962 SOCKBUF_UNLOCK(&so->so_snd);
1963 error = EINTR;
1964 goto done;
1965 }
1966
1967 /*
1968 * Get an mbuf header and set it up as having external storage.
1969 */
1970 if (m_header)
1971 MGET(m, M_TRYWAIT, MT_DATA);
1972 else
1973 MGETHDR(m, M_TRYWAIT, MT_DATA);
1974 if (m == NULL) {
1975 error = ENOBUFS;
1976 sf_buf_mext((void *)sf_buf_kva(sf), sf);
1977 SOCKBUF_LOCK(&so->so_snd);
1978 sbunlock(&so->so_snd);
1979 SOCKBUF_UNLOCK(&so->so_snd);
1980 goto done;
1981 }
1982 /*
1983 * Setup external storage for mbuf.
1984 */
1985 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
1986 EXT_SFBUF);
1987 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
1988 m->m_pkthdr.len = m->m_len = xfsize;
1989
1990 if (m_header) {
1991 m_cat(m_header, m);
1992 m = m_header;
1993 m_header = NULL;
1994 m_fixhdr(m);
1995 }
1996
1997 /*
1998 * Add the buffer to the socket buffer chain.
1999 */
2000 SOCKBUF_LOCK(&so->so_snd);
2001 retry_space:
2002 /*
2003 * Make sure that the socket is still able to take more data.
2004 * CANTSENDMORE being true usually means that the connection
2005 * was closed. so_error is true when an error was sensed after
2006 * a previous send.
2007 * The state is checked after the page mapping and buffer
2008 * allocation above since those operations may block and make
2009 * any socket checks stale. From this point forward, nothing
2010 * blocks before the pru_send (or more accurately, any blocking
2011 * results in a loop back to here to re-check).
2012 */
2013 SOCKBUF_LOCK_ASSERT(&so->so_snd);
2014 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
2015 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2016 error = EPIPE;
2017 } else {
2018 error = so->so_error;
2019 so->so_error = 0;
2020 }
2021 m_freem(m);
2022 sbunlock(&so->so_snd);
2023 SOCKBUF_UNLOCK(&so->so_snd);
2024 goto done;
2025 }
2026 /*
2027 * Wait for socket space to become available. We do this just
2028 * after checking the connection state above in order to avoid
2029 * a race condition with sbwait().
2030 */
2031 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2032 if (so->so_state & SS_NBIO) {
2033 m_freem(m);
2034 sbunlock(&so->so_snd);
2035 SOCKBUF_UNLOCK(&so->so_snd);
2036 error = EAGAIN;
2037 goto done;
2038 }
2039 error = sbwait(&so->so_snd);
2040 /*
2041 * An error from sbwait usually indicates that we've
2042 * been interrupted by a signal. If we've sent anything
2043 * then return bytes sent, otherwise return the error.
2044 */
2045 if (error) {
2046 m_freem(m);
2047 sbunlock(&so->so_snd);
2048 SOCKBUF_UNLOCK(&so->so_snd);
2049 goto done;
2050 }
2051 goto retry_space;
2052 }
2053 SOCKBUF_UNLOCK(&so->so_snd);
2054 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2055 if (error) {
2056 SOCKBUF_LOCK(&so->so_snd);
2057 sbunlock(&so->so_snd);
2058 SOCKBUF_UNLOCK(&so->so_snd);
2059 goto done;
2060 }
2061 headersent = 1;
2062 }
2063 SOCKBUF_LOCK(&so->so_snd);
2064 sbunlock(&so->so_snd);
2065 SOCKBUF_UNLOCK(&so->so_snd);
2066
2067 /*
2068 * Send trailers. Wimp out and use writev(2).
2069 */
2070 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2071 nuap.fd = uap->s;
2072 nuap.iovp = hdtr.trailers;
2073 nuap.iovcnt = hdtr.trl_cnt;
2074 error = writev(td, &nuap);
2075 if (error)
2076 goto done;
2077 if (compat)
2078 sbytes += td->td_retval[0];
2079 else
2080 hdtr_size += td->td_retval[0];
2081 }
2082
2083 done:
2084 if (headersent) {
2085 if (!compat)
2086 hdtr_size += headersize;
2087 } else {
2088 if (compat)
2089 sbytes -= headersize;
2090 }
2091 /*
2092 * If there was no error we have to clear td->td_retval[0]
2093 * because it may have been set by writev.
2094 */
2095 if (error == 0) {
2096 td->td_retval[0] = 0;
2097 }
2098 if (uap->sbytes != NULL) {
2099 if (!compat)
2100 sbytes += hdtr_size;
2101 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2102 }
2103 if (vp)
2104 vrele(vp);
2105 if (so)
2106 fputsock(so);
2107 if (hdr_uio != NULL)
2108 free(hdr_uio, M_IOV);
2109 if (m_header)
2110 m_freem(m_header);
2111
2112 mtx_unlock(&Giant);
2113
2114 if (error == ERESTART)
2115 error = EINTR;
2116
2117 return (error);
2118 }
Cache object: 5cf52cfbf4e8171abde8cc3d08ea1183
|