1 /*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/5.3/sys/kern/uipc_syscalls.c 136588 2004-10-16 08:43:07Z cvs2svn $");
37
38 #include "opt_compat.h"
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mac.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77
78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80
81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 int compat);
85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 int compat);
87
88 /*
89 * NSFBUFS-related variables and associated sysctls
90 */
91 int nsfbufs;
92 int nsfbufspeak;
93 int nsfbufsused;
94
95 SYSCTL_DECL(_kern_ipc);
96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
97 "Maximum number of sendfile(2) sf_bufs available");
98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
99 "Number of sendfile(2) sf_bufs at peak usage");
100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
101 "Number of sendfile(2) sf_bufs in use");
102
103 /*
104 * System call interface to the socket abstraction.
105 */
106 #if defined(COMPAT_43)
107 #define COMPAT_OLDSOCK
108 #endif
109
110 /*
111 * MPSAFE
112 */
113 int
114 socket(td, uap)
115 struct thread *td;
116 register struct socket_args /* {
117 int domain;
118 int type;
119 int protocol;
120 } */ *uap;
121 {
122 struct filedesc *fdp;
123 struct socket *so;
124 struct file *fp;
125 int fd, error;
126
127 fdp = td->td_proc->p_fd;
128 error = falloc(td, &fp, &fd);
129 if (error)
130 return (error);
131 /* An extra reference on `fp' has been held for us by falloc(). */
132 NET_LOCK_GIANT();
133 error = socreate(uap->domain, &so, uap->type, uap->protocol,
134 td->td_ucred, td);
135 NET_UNLOCK_GIANT();
136 FILEDESC_LOCK(fdp);
137 if (error) {
138 if (fdp->fd_ofiles[fd] == fp) {
139 fdp->fd_ofiles[fd] = NULL;
140 fdunused(fdp, fd);
141 FILEDESC_UNLOCK(fdp);
142 fdrop(fp, td);
143 } else {
144 FILEDESC_UNLOCK(fdp);
145 }
146 } else {
147 fp->f_data = so; /* already has ref count */
148 fp->f_flag = FREAD|FWRITE;
149 fp->f_ops = &socketops;
150 fp->f_type = DTYPE_SOCKET;
151 FILEDESC_UNLOCK(fdp);
152 td->td_retval[0] = fd;
153 }
154 fdrop(fp, td);
155 return (error);
156 }
157
158 /*
159 * MPSAFE
160 */
161 /* ARGSUSED */
162 int
163 bind(td, uap)
164 struct thread *td;
165 register struct bind_args /* {
166 int s;
167 caddr_t name;
168 int namelen;
169 } */ *uap;
170 {
171 struct sockaddr *sa;
172 int error;
173
174 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
175 return (error);
176
177 return (kern_bind(td, uap->s, sa));
178 }
179
180 int
181 kern_bind(td, fd, sa)
182 struct thread *td;
183 int fd;
184 struct sockaddr *sa;
185 {
186 struct socket *so;
187 int error;
188
189 NET_LOCK_GIANT();
190 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
191 goto done2;
192 #ifdef MAC
193 SOCK_LOCK(so);
194 error = mac_check_socket_bind(td->td_ucred, so, sa);
195 SOCK_UNLOCK(so);
196 if (error)
197 goto done1;
198 #endif
199 error = sobind(so, sa, td);
200 #ifdef MAC
201 done1:
202 #endif
203 fputsock(so);
204 done2:
205 NET_UNLOCK_GIANT();
206 FREE(sa, M_SONAME);
207 return (error);
208 }
209
210 /*
211 * MPSAFE
212 */
213 /* ARGSUSED */
214 int
215 listen(td, uap)
216 struct thread *td;
217 register struct listen_args /* {
218 int s;
219 int backlog;
220 } */ *uap;
221 {
222 struct socket *so;
223 int error;
224
225 NET_LOCK_GIANT();
226 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
227 #ifdef MAC
228 SOCK_LOCK(so);
229 error = mac_check_socket_listen(td->td_ucred, so);
230 SOCK_UNLOCK(so);
231 if (error)
232 goto done;
233 #endif
234 error = solisten(so, uap->backlog, td);
235 #ifdef MAC
236 done:
237 #endif
238 fputsock(so);
239 }
240 NET_UNLOCK_GIANT();
241 return(error);
242 }
243
244 /*
245 * accept1()
246 * MPSAFE
247 */
248 static int
249 accept1(td, uap, compat)
250 struct thread *td;
251 register struct accept_args /* {
252 int s;
253 struct sockaddr * __restrict name;
254 socklen_t * __restrict anamelen;
255 } */ *uap;
256 int compat;
257 {
258 struct filedesc *fdp;
259 struct file *nfp = NULL;
260 struct sockaddr *sa = NULL;
261 socklen_t namelen;
262 int error;
263 struct socket *head, *so;
264 int fd;
265 u_int fflag;
266 pid_t pgid;
267 int tmp;
268
269 fdp = td->td_proc->p_fd;
270 if (uap->name) {
271 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
272 if(error)
273 return (error);
274 if (namelen < 0)
275 return (EINVAL);
276 }
277 NET_LOCK_GIANT();
278 error = fgetsock(td, uap->s, &head, &fflag);
279 if (error)
280 goto done2;
281 if ((head->so_options & SO_ACCEPTCONN) == 0) {
282 error = EINVAL;
283 goto done;
284 }
285 error = falloc(td, &nfp, &fd);
286 if (error)
287 goto done;
288 ACCEPT_LOCK();
289 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
290 ACCEPT_UNLOCK();
291 error = EWOULDBLOCK;
292 goto noconnection;
293 }
294 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
295 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
296 head->so_error = ECONNABORTED;
297 break;
298 }
299 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
300 "accept", 0);
301 if (error) {
302 ACCEPT_UNLOCK();
303 goto noconnection;
304 }
305 }
306 if (head->so_error) {
307 error = head->so_error;
308 head->so_error = 0;
309 ACCEPT_UNLOCK();
310 goto noconnection;
311 }
312 so = TAILQ_FIRST(&head->so_comp);
313 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
314 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
315
316 /*
317 * Before changing the flags on the socket, we have to bump the
318 * reference count. Otherwise, if the protocol calls sofree(),
319 * the socket will be released due to a zero refcount.
320 */
321 SOCK_LOCK(so);
322 soref(so); /* file descriptor reference */
323 SOCK_UNLOCK(so);
324
325 TAILQ_REMOVE(&head->so_comp, so, so_list);
326 head->so_qlen--;
327 so->so_state |= (head->so_state & SS_NBIO);
328 so->so_qstate &= ~SQ_COMP;
329 so->so_head = NULL;
330
331 ACCEPT_UNLOCK();
332
333 /* An extra reference on `nfp' has been held for us by falloc(). */
334 td->td_retval[0] = fd;
335
336 /* connection has been removed from the listen queue */
337 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
338
339 pgid = fgetown(&head->so_sigio);
340 if (pgid != 0)
341 fsetown(pgid, &so->so_sigio);
342
343 FILE_LOCK(nfp);
344 nfp->f_data = so; /* nfp has ref count from falloc */
345 nfp->f_flag = fflag;
346 nfp->f_ops = &socketops;
347 nfp->f_type = DTYPE_SOCKET;
348 FILE_UNLOCK(nfp);
349 /* Sync socket nonblocking/async state with file flags */
350 tmp = fflag & FNONBLOCK;
351 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
352 tmp = fflag & FASYNC;
353 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
354 sa = 0;
355 error = soaccept(so, &sa);
356 if (error) {
357 /*
358 * return a namelen of zero for older code which might
359 * ignore the return value from accept.
360 */
361 if (uap->name != NULL) {
362 namelen = 0;
363 (void) copyout(&namelen,
364 uap->anamelen, sizeof(*uap->anamelen));
365 }
366 goto noconnection;
367 }
368 if (sa == NULL) {
369 namelen = 0;
370 if (uap->name)
371 goto gotnoname;
372 error = 0;
373 goto done;
374 }
375 if (uap->name) {
376 /* check sa_len before it is destroyed */
377 if (namelen > sa->sa_len)
378 namelen = sa->sa_len;
379 #ifdef COMPAT_OLDSOCK
380 if (compat)
381 ((struct osockaddr *)sa)->sa_family =
382 sa->sa_family;
383 #endif
384 error = copyout(sa, uap->name, (u_int)namelen);
385 if (!error)
386 gotnoname:
387 error = copyout(&namelen,
388 uap->anamelen, sizeof (*uap->anamelen));
389 }
390 noconnection:
391 if (sa)
392 FREE(sa, M_SONAME);
393
394 /*
395 * close the new descriptor, assuming someone hasn't ripped it
396 * out from under us.
397 */
398 if (error) {
399 FILEDESC_LOCK(fdp);
400 if (fdp->fd_ofiles[fd] == nfp) {
401 fdp->fd_ofiles[fd] = NULL;
402 fdunused(fdp, fd);
403 FILEDESC_UNLOCK(fdp);
404 fdrop(nfp, td);
405 } else {
406 FILEDESC_UNLOCK(fdp);
407 }
408 }
409
410 /*
411 * Release explicitly held references before returning.
412 */
413 done:
414 if (nfp != NULL)
415 fdrop(nfp, td);
416 fputsock(head);
417 done2:
418 NET_UNLOCK_GIANT();
419 return (error);
420 }
421
422 /*
423 * MPSAFE (accept1() is MPSAFE)
424 */
425 int
426 accept(td, uap)
427 struct thread *td;
428 struct accept_args *uap;
429 {
430
431 return (accept1(td, uap, 0));
432 }
433
434 #ifdef COMPAT_OLDSOCK
435 /*
436 * MPSAFE (accept1() is MPSAFE)
437 */
438 int
439 oaccept(td, uap)
440 struct thread *td;
441 struct accept_args *uap;
442 {
443
444 return (accept1(td, uap, 1));
445 }
446 #endif /* COMPAT_OLDSOCK */
447
448 /*
449 * MPSAFE
450 */
451 /* ARGSUSED */
452 int
453 connect(td, uap)
454 struct thread *td;
455 register struct connect_args /* {
456 int s;
457 caddr_t name;
458 int namelen;
459 } */ *uap;
460 {
461 struct sockaddr *sa;
462 int error;
463
464 error = getsockaddr(&sa, uap->name, uap->namelen);
465 if (error)
466 return (error);
467
468 return (kern_connect(td, uap->s, sa));
469 }
470
471
472 int
473 kern_connect(td, fd, sa)
474 struct thread *td;
475 int fd;
476 struct sockaddr *sa;
477 {
478 struct socket *so;
479 int error, s;
480 int interrupted = 0;
481
482 NET_LOCK_GIANT();
483 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
484 goto done2;
485 if (so->so_state & SS_ISCONNECTING) {
486 error = EALREADY;
487 goto done1;
488 }
489 #ifdef MAC
490 SOCK_LOCK(so);
491 error = mac_check_socket_connect(td->td_ucred, so, sa);
492 SOCK_UNLOCK(so);
493 if (error)
494 goto bad;
495 #endif
496 error = soconnect(so, sa, td);
497 if (error)
498 goto bad;
499 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
500 error = EINPROGRESS;
501 goto done1;
502 }
503 s = splnet();
504 SOCK_LOCK(so);
505 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
506 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
507 "connec", 0);
508 if (error) {
509 if (error == EINTR || error == ERESTART)
510 interrupted = 1;
511 break;
512 }
513 }
514 if (error == 0) {
515 error = so->so_error;
516 so->so_error = 0;
517 }
518 SOCK_UNLOCK(so);
519 splx(s);
520 bad:
521 if (!interrupted)
522 so->so_state &= ~SS_ISCONNECTING;
523 if (error == ERESTART)
524 error = EINTR;
525 done1:
526 fputsock(so);
527 done2:
528 NET_UNLOCK_GIANT();
529 FREE(sa, M_SONAME);
530 return (error);
531 }
532
533 /*
534 * MPSAFE
535 */
536 int
537 socketpair(td, uap)
538 struct thread *td;
539 register struct socketpair_args /* {
540 int domain;
541 int type;
542 int protocol;
543 int *rsv;
544 } */ *uap;
545 {
546 register struct filedesc *fdp = td->td_proc->p_fd;
547 struct file *fp1, *fp2;
548 struct socket *so1, *so2;
549 int fd, error, sv[2];
550
551 NET_LOCK_GIANT();
552 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
553 td->td_ucred, td);
554 if (error)
555 goto done2;
556 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
557 td->td_ucred, td);
558 if (error)
559 goto free1;
560 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
561 error = falloc(td, &fp1, &fd);
562 if (error)
563 goto free2;
564 sv[0] = fd;
565 fp1->f_data = so1; /* so1 already has ref count */
566 error = falloc(td, &fp2, &fd);
567 if (error)
568 goto free3;
569 fp2->f_data = so2; /* so2 already has ref count */
570 sv[1] = fd;
571 error = soconnect2(so1, so2);
572 if (error)
573 goto free4;
574 if (uap->type == SOCK_DGRAM) {
575 /*
576 * Datagram socket connection is asymmetric.
577 */
578 error = soconnect2(so2, so1);
579 if (error)
580 goto free4;
581 }
582 FILE_LOCK(fp1);
583 fp1->f_flag = FREAD|FWRITE;
584 fp1->f_ops = &socketops;
585 fp1->f_type = DTYPE_SOCKET;
586 FILE_UNLOCK(fp1);
587 FILE_LOCK(fp2);
588 fp2->f_flag = FREAD|FWRITE;
589 fp2->f_ops = &socketops;
590 fp2->f_type = DTYPE_SOCKET;
591 FILE_UNLOCK(fp2);
592 error = copyout(sv, uap->rsv, 2 * sizeof (int));
593 fdrop(fp1, td);
594 fdrop(fp2, td);
595 goto done2;
596 free4:
597 FILEDESC_LOCK(fdp);
598 if (fdp->fd_ofiles[sv[1]] == fp2) {
599 fdp->fd_ofiles[sv[1]] = NULL;
600 fdunused(fdp, sv[1]);
601 FILEDESC_UNLOCK(fdp);
602 fdrop(fp2, td);
603 } else {
604 FILEDESC_UNLOCK(fdp);
605 }
606 fdrop(fp2, td);
607 free3:
608 FILEDESC_LOCK(fdp);
609 if (fdp->fd_ofiles[sv[0]] == fp1) {
610 fdp->fd_ofiles[sv[0]] = NULL;
611 fdunused(fdp, sv[0]);
612 FILEDESC_UNLOCK(fdp);
613 fdrop(fp1, td);
614 } else {
615 FILEDESC_UNLOCK(fdp);
616 }
617 fdrop(fp1, td);
618 free2:
619 (void)soclose(so2);
620 free1:
621 (void)soclose(so1);
622 done2:
623 NET_UNLOCK_GIANT();
624 return (error);
625 }
626
627 static int
628 sendit(td, s, mp, flags)
629 register struct thread *td;
630 int s;
631 register struct msghdr *mp;
632 int flags;
633 {
634 struct mbuf *control;
635 struct sockaddr *to;
636 int error;
637
638 if (mp->msg_name != NULL) {
639 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
640 if (error) {
641 to = NULL;
642 goto bad;
643 }
644 mp->msg_name = to;
645 } else {
646 to = NULL;
647 }
648
649 if (mp->msg_control) {
650 if (mp->msg_controllen < sizeof(struct cmsghdr)
651 #ifdef COMPAT_OLDSOCK
652 && mp->msg_flags != MSG_COMPAT
653 #endif
654 ) {
655 error = EINVAL;
656 goto bad;
657 }
658 error = sockargs(&control, mp->msg_control,
659 mp->msg_controllen, MT_CONTROL);
660 if (error)
661 goto bad;
662 #ifdef COMPAT_OLDSOCK
663 if (mp->msg_flags == MSG_COMPAT) {
664 register struct cmsghdr *cm;
665
666 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
667 if (control == 0) {
668 error = ENOBUFS;
669 goto bad;
670 } else {
671 cm = mtod(control, struct cmsghdr *);
672 cm->cmsg_len = control->m_len;
673 cm->cmsg_level = SOL_SOCKET;
674 cm->cmsg_type = SCM_RIGHTS;
675 }
676 }
677 #endif
678 } else {
679 control = NULL;
680 }
681
682 error = kern_sendit(td, s, mp, flags, control);
683
684 bad:
685 if (to)
686 FREE(to, M_SONAME);
687 return (error);
688 }
689
690 int
691 kern_sendit(td, s, mp, flags, control)
692 struct thread *td;
693 int s;
694 struct msghdr *mp;
695 int flags;
696 struct mbuf *control;
697 {
698 struct uio auio;
699 struct iovec *iov;
700 struct socket *so;
701 int i;
702 int len, error;
703 #ifdef KTRACE
704 struct uio *ktruio = NULL;
705 #endif
706
707 NET_LOCK_GIANT();
708 if ((error = fgetsock(td, s, &so, NULL)) != 0)
709 goto bad2;
710
711 #ifdef MAC
712 SOCK_LOCK(so);
713 error = mac_check_socket_send(td->td_ucred, so);
714 SOCK_UNLOCK(so);
715 if (error)
716 goto bad;
717 #endif
718
719 auio.uio_iov = mp->msg_iov;
720 auio.uio_iovcnt = mp->msg_iovlen;
721 auio.uio_segflg = UIO_USERSPACE;
722 auio.uio_rw = UIO_WRITE;
723 auio.uio_td = td;
724 auio.uio_offset = 0; /* XXX */
725 auio.uio_resid = 0;
726 iov = mp->msg_iov;
727 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
728 if ((auio.uio_resid += iov->iov_len) < 0) {
729 error = EINVAL;
730 goto bad;
731 }
732 }
733 #ifdef KTRACE
734 if (KTRPOINT(td, KTR_GENIO))
735 ktruio = cloneuio(&auio);
736 #endif
737 len = auio.uio_resid;
738 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
739 0, control, flags, td);
740 if (error) {
741 if (auio.uio_resid != len && (error == ERESTART ||
742 error == EINTR || error == EWOULDBLOCK))
743 error = 0;
744 /* Generation of SIGPIPE can be controlled per socket */
745 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
746 PROC_LOCK(td->td_proc);
747 psignal(td->td_proc, SIGPIPE);
748 PROC_UNLOCK(td->td_proc);
749 }
750 }
751 if (error == 0)
752 td->td_retval[0] = len - auio.uio_resid;
753 #ifdef KTRACE
754 if (ktruio != NULL) {
755 ktruio->uio_resid = td->td_retval[0];
756 ktrgenio(s, UIO_WRITE, ktruio, error);
757 }
758 #endif
759 bad:
760 fputsock(so);
761 bad2:
762 NET_UNLOCK_GIANT();
763 return (error);
764 }
765
766 /*
767 * MPSAFE
768 */
769 int
770 sendto(td, uap)
771 struct thread *td;
772 register struct sendto_args /* {
773 int s;
774 caddr_t buf;
775 size_t len;
776 int flags;
777 caddr_t to;
778 int tolen;
779 } */ *uap;
780 {
781 struct msghdr msg;
782 struct iovec aiov;
783 int error;
784
785 msg.msg_name = uap->to;
786 msg.msg_namelen = uap->tolen;
787 msg.msg_iov = &aiov;
788 msg.msg_iovlen = 1;
789 msg.msg_control = 0;
790 #ifdef COMPAT_OLDSOCK
791 msg.msg_flags = 0;
792 #endif
793 aiov.iov_base = uap->buf;
794 aiov.iov_len = uap->len;
795 error = sendit(td, uap->s, &msg, uap->flags);
796 return (error);
797 }
798
799 #ifdef COMPAT_OLDSOCK
800 /*
801 * MPSAFE
802 */
803 int
804 osend(td, uap)
805 struct thread *td;
806 register struct osend_args /* {
807 int s;
808 caddr_t buf;
809 int len;
810 int flags;
811 } */ *uap;
812 {
813 struct msghdr msg;
814 struct iovec aiov;
815 int error;
816
817 msg.msg_name = 0;
818 msg.msg_namelen = 0;
819 msg.msg_iov = &aiov;
820 msg.msg_iovlen = 1;
821 aiov.iov_base = uap->buf;
822 aiov.iov_len = uap->len;
823 msg.msg_control = 0;
824 msg.msg_flags = 0;
825 error = sendit(td, uap->s, &msg, uap->flags);
826 return (error);
827 }
828
829 /*
830 * MPSAFE
831 */
832 int
833 osendmsg(td, uap)
834 struct thread *td;
835 struct osendmsg_args /* {
836 int s;
837 caddr_t msg;
838 int flags;
839 } */ *uap;
840 {
841 struct msghdr msg;
842 struct iovec *iov;
843 int error;
844
845 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
846 if (error)
847 return (error);
848 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
849 if (error)
850 return (error);
851 msg.msg_iov = iov;
852 msg.msg_flags = MSG_COMPAT;
853 error = sendit(td, uap->s, &msg, uap->flags);
854 free(iov, M_IOV);
855 return (error);
856 }
857 #endif
858
859 /*
860 * MPSAFE
861 */
862 int
863 sendmsg(td, uap)
864 struct thread *td;
865 struct sendmsg_args /* {
866 int s;
867 caddr_t msg;
868 int flags;
869 } */ *uap;
870 {
871 struct msghdr msg;
872 struct iovec *iov;
873 int error;
874
875 error = copyin(uap->msg, &msg, sizeof (msg));
876 if (error)
877 return (error);
878 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
879 if (error)
880 return (error);
881 msg.msg_iov = iov;
882 #ifdef COMPAT_OLDSOCK
883 msg.msg_flags = 0;
884 #endif
885 error = sendit(td, uap->s, &msg, uap->flags);
886 free(iov, M_IOV);
887 return (error);
888 }
889
890 static int
891 recvit(td, s, mp, namelenp)
892 struct thread *td;
893 int s;
894 struct msghdr *mp;
895 void *namelenp;
896 {
897 struct uio auio;
898 struct iovec *iov;
899 int i;
900 socklen_t len;
901 int error;
902 struct mbuf *m, *control = 0;
903 caddr_t ctlbuf;
904 struct socket *so;
905 struct sockaddr *fromsa = 0;
906 #ifdef KTRACE
907 struct uio *ktruio = NULL;
908 #endif
909
910 NET_LOCK_GIANT();
911 if ((error = fgetsock(td, s, &so, NULL)) != 0) {
912 NET_UNLOCK_GIANT();
913 return (error);
914 }
915
916 #ifdef MAC
917 SOCK_LOCK(so);
918 error = mac_check_socket_receive(td->td_ucred, so);
919 SOCK_UNLOCK(so);
920 if (error) {
921 fputsock(so);
922 NET_UNLOCK_GIANT();
923 return (error);
924 }
925 #endif
926
927 auio.uio_iov = mp->msg_iov;
928 auio.uio_iovcnt = mp->msg_iovlen;
929 auio.uio_segflg = UIO_USERSPACE;
930 auio.uio_rw = UIO_READ;
931 auio.uio_td = td;
932 auio.uio_offset = 0; /* XXX */
933 auio.uio_resid = 0;
934 iov = mp->msg_iov;
935 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
936 if ((auio.uio_resid += iov->iov_len) < 0) {
937 fputsock(so);
938 NET_UNLOCK_GIANT();
939 return (EINVAL);
940 }
941 }
942 #ifdef KTRACE
943 if (KTRPOINT(td, KTR_GENIO))
944 ktruio = cloneuio(&auio);
945 #endif
946 len = auio.uio_resid;
947 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
948 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
949 &mp->msg_flags);
950 if (error) {
951 if (auio.uio_resid != (int)len && (error == ERESTART ||
952 error == EINTR || error == EWOULDBLOCK))
953 error = 0;
954 }
955 #ifdef KTRACE
956 if (ktruio != NULL) {
957 ktruio->uio_resid = (int)len - auio.uio_resid;
958 ktrgenio(s, UIO_READ, ktruio, error);
959 }
960 #endif
961 if (error)
962 goto out;
963 td->td_retval[0] = (int)len - auio.uio_resid;
964 if (mp->msg_name) {
965 len = mp->msg_namelen;
966 if (len <= 0 || fromsa == 0)
967 len = 0;
968 else {
969 /* save sa_len before it is destroyed by MSG_COMPAT */
970 len = MIN(len, fromsa->sa_len);
971 #ifdef COMPAT_OLDSOCK
972 if (mp->msg_flags & MSG_COMPAT)
973 ((struct osockaddr *)fromsa)->sa_family =
974 fromsa->sa_family;
975 #endif
976 error = copyout(fromsa, mp->msg_name, (unsigned)len);
977 if (error)
978 goto out;
979 }
980 mp->msg_namelen = len;
981 if (namelenp &&
982 (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
983 #ifdef COMPAT_OLDSOCK
984 if (mp->msg_flags & MSG_COMPAT)
985 error = 0; /* old recvfrom didn't check */
986 else
987 #endif
988 goto out;
989 }
990 }
991 if (mp->msg_control) {
992 #ifdef COMPAT_OLDSOCK
993 /*
994 * We assume that old recvmsg calls won't receive access
995 * rights and other control info, esp. as control info
996 * is always optional and those options didn't exist in 4.3.
997 * If we receive rights, trim the cmsghdr; anything else
998 * is tossed.
999 */
1000 if (control && mp->msg_flags & MSG_COMPAT) {
1001 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1002 SOL_SOCKET ||
1003 mtod(control, struct cmsghdr *)->cmsg_type !=
1004 SCM_RIGHTS) {
1005 mp->msg_controllen = 0;
1006 goto out;
1007 }
1008 control->m_len -= sizeof (struct cmsghdr);
1009 control->m_data += sizeof (struct cmsghdr);
1010 }
1011 #endif
1012 len = mp->msg_controllen;
1013 m = control;
1014 mp->msg_controllen = 0;
1015 ctlbuf = mp->msg_control;
1016
1017 while (m && len > 0) {
1018 unsigned int tocopy;
1019
1020 if (len >= m->m_len)
1021 tocopy = m->m_len;
1022 else {
1023 mp->msg_flags |= MSG_CTRUNC;
1024 tocopy = len;
1025 }
1026
1027 if ((error = copyout(mtod(m, caddr_t),
1028 ctlbuf, tocopy)) != 0)
1029 goto out;
1030
1031 ctlbuf += tocopy;
1032 len -= tocopy;
1033 m = m->m_next;
1034 }
1035 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1036 }
1037 out:
1038 fputsock(so);
1039 NET_UNLOCK_GIANT();
1040 if (fromsa)
1041 FREE(fromsa, M_SONAME);
1042 if (control)
1043 m_freem(control);
1044 return (error);
1045 }
1046
1047 /*
1048 * MPSAFE
1049 */
1050 int
1051 recvfrom(td, uap)
1052 struct thread *td;
1053 register struct recvfrom_args /* {
1054 int s;
1055 caddr_t buf;
1056 size_t len;
1057 int flags;
1058 struct sockaddr * __restrict from;
1059 socklen_t * __restrict fromlenaddr;
1060 } */ *uap;
1061 {
1062 struct msghdr msg;
1063 struct iovec aiov;
1064 int error;
1065
1066 if (uap->fromlenaddr) {
1067 error = copyin(uap->fromlenaddr,
1068 &msg.msg_namelen, sizeof (msg.msg_namelen));
1069 if (error)
1070 goto done2;
1071 } else {
1072 msg.msg_namelen = 0;
1073 }
1074 msg.msg_name = uap->from;
1075 msg.msg_iov = &aiov;
1076 msg.msg_iovlen = 1;
1077 aiov.iov_base = uap->buf;
1078 aiov.iov_len = uap->len;
1079 msg.msg_control = 0;
1080 msg.msg_flags = uap->flags;
1081 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1082 done2:
1083 return(error);
1084 }
1085
1086 #ifdef COMPAT_OLDSOCK
1087 /*
1088 * MPSAFE
1089 */
1090 int
1091 orecvfrom(td, uap)
1092 struct thread *td;
1093 struct recvfrom_args *uap;
1094 {
1095
1096 uap->flags |= MSG_COMPAT;
1097 return (recvfrom(td, uap));
1098 }
1099 #endif
1100
1101
1102 #ifdef COMPAT_OLDSOCK
1103 /*
1104 * MPSAFE
1105 */
1106 int
1107 orecv(td, uap)
1108 struct thread *td;
1109 register struct orecv_args /* {
1110 int s;
1111 caddr_t buf;
1112 int len;
1113 int flags;
1114 } */ *uap;
1115 {
1116 struct msghdr msg;
1117 struct iovec aiov;
1118 int error;
1119
1120 msg.msg_name = 0;
1121 msg.msg_namelen = 0;
1122 msg.msg_iov = &aiov;
1123 msg.msg_iovlen = 1;
1124 aiov.iov_base = uap->buf;
1125 aiov.iov_len = uap->len;
1126 msg.msg_control = 0;
1127 msg.msg_flags = uap->flags;
1128 error = recvit(td, uap->s, &msg, NULL);
1129 return (error);
1130 }
1131
1132 /*
1133 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1134 * overlays the new one, missing only the flags, and with the (old) access
1135 * rights where the control fields are now.
1136 *
1137 * MPSAFE
1138 */
1139 int
1140 orecvmsg(td, uap)
1141 struct thread *td;
1142 struct orecvmsg_args /* {
1143 int s;
1144 struct omsghdr *msg;
1145 int flags;
1146 } */ *uap;
1147 {
1148 struct msghdr msg;
1149 struct iovec *iov;
1150 int error;
1151
1152 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1153 if (error)
1154 return (error);
1155 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1156 if (error)
1157 return (error);
1158 msg.msg_flags = uap->flags | MSG_COMPAT;
1159 msg.msg_iov = iov;
1160 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1161 if (msg.msg_controllen && error == 0)
1162 error = copyout(&msg.msg_controllen,
1163 &uap->msg->msg_accrightslen, sizeof (int));
1164 free(iov, M_IOV);
1165 return (error);
1166 }
1167 #endif
1168
1169 /*
1170 * MPSAFE
1171 */
1172 int
1173 recvmsg(td, uap)
1174 struct thread *td;
1175 struct recvmsg_args /* {
1176 int s;
1177 struct msghdr *msg;
1178 int flags;
1179 } */ *uap;
1180 {
1181 struct msghdr msg;
1182 struct iovec *uiov, *iov;
1183 int error;
1184
1185 error = copyin(uap->msg, &msg, sizeof (msg));
1186 if (error)
1187 return (error);
1188 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1189 if (error)
1190 return (error);
1191 msg.msg_flags = uap->flags;
1192 #ifdef COMPAT_OLDSOCK
1193 msg.msg_flags &= ~MSG_COMPAT;
1194 #endif
1195 uiov = msg.msg_iov;
1196 msg.msg_iov = iov;
1197 error = recvit(td, uap->s, &msg, NULL);
1198 if (error == 0) {
1199 msg.msg_iov = uiov;
1200 error = copyout(&msg, uap->msg, sizeof(msg));
1201 }
1202 free(iov, M_IOV);
1203 return (error);
1204 }
1205
1206 /*
1207 * MPSAFE
1208 */
1209 /* ARGSUSED */
1210 int
1211 shutdown(td, uap)
1212 struct thread *td;
1213 register struct shutdown_args /* {
1214 int s;
1215 int how;
1216 } */ *uap;
1217 {
1218 struct socket *so;
1219 int error;
1220
1221 NET_LOCK_GIANT();
1222 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1223 error = soshutdown(so, uap->how);
1224 fputsock(so);
1225 }
1226 NET_UNLOCK_GIANT();
1227 return(error);
1228 }
1229
1230 /*
1231 * MPSAFE
1232 */
1233 /* ARGSUSED */
1234 int
1235 setsockopt(td, uap)
1236 struct thread *td;
1237 register struct setsockopt_args /* {
1238 int s;
1239 int level;
1240 int name;
1241 caddr_t val;
1242 int valsize;
1243 } */ *uap;
1244 {
1245
1246 return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1247 uap->val, UIO_USERSPACE, uap->valsize));
1248 }
1249
1250 int
1251 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1252 struct thread *td;
1253 int s;
1254 int level;
1255 int name;
1256 void *val;
1257 enum uio_seg valseg;
1258 socklen_t valsize;
1259 {
1260 int error;
1261 struct socket *so;
1262 struct sockopt sopt;
1263
1264 if (val == NULL && valsize != 0)
1265 return (EFAULT);
1266 if (valsize < 0)
1267 return (EINVAL);
1268
1269 sopt.sopt_dir = SOPT_SET;
1270 sopt.sopt_level = level;
1271 sopt.sopt_name = name;
1272 sopt.sopt_val = val;
1273 sopt.sopt_valsize = valsize;
1274 switch (valseg) {
1275 case UIO_USERSPACE:
1276 sopt.sopt_td = td;
1277 break;
1278 case UIO_SYSSPACE:
1279 sopt.sopt_td = NULL;
1280 break;
1281 default:
1282 panic("kern_setsockopt called with bad valseg");
1283 }
1284
1285 NET_LOCK_GIANT();
1286 if ((error = fgetsock(td, s, &so, NULL)) == 0) {
1287 error = sosetopt(so, &sopt);
1288 fputsock(so);
1289 }
1290 NET_UNLOCK_GIANT();
1291 return(error);
1292 }
1293
1294 /*
1295 * MPSAFE
1296 */
1297 /* ARGSUSED */
1298 int
1299 getsockopt(td, uap)
1300 struct thread *td;
1301 register struct getsockopt_args /* {
1302 int s;
1303 int level;
1304 int name;
1305 void * __restrict val;
1306 socklen_t * __restrict avalsize;
1307 } */ *uap;
1308 {
1309 socklen_t valsize;
1310 int error;
1311
1312 if (uap->val) {
1313 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1314 if (error)
1315 return (error);
1316 }
1317
1318 error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1319 uap->val, UIO_USERSPACE, &valsize);
1320
1321 if (error == 0)
1322 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1323 return (error);
1324 }
1325
1326 /*
1327 * Kernel version of getsockopt.
1328 * optval can be a userland or userspace. optlen is always a kernel pointer.
1329 */
1330 int
1331 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1332 struct thread *td;
1333 int s;
1334 int level;
1335 int name;
1336 void *val;
1337 enum uio_seg valseg;
1338 socklen_t *valsize;
1339 {
1340 int error;
1341 struct socket *so;
1342 struct sockopt sopt;
1343
1344 if (val == NULL)
1345 *valsize = 0;
1346 if (*valsize < 0)
1347 return (EINVAL);
1348
1349 sopt.sopt_dir = SOPT_GET;
1350 sopt.sopt_level = level;
1351 sopt.sopt_name = name;
1352 sopt.sopt_val = val;
1353 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1354 switch (valseg) {
1355 case UIO_USERSPACE:
1356 sopt.sopt_td = td;
1357 break;
1358 case UIO_SYSSPACE:
1359 sopt.sopt_td = NULL;
1360 break;
1361 default:
1362 panic("kern_getsockopt called with bad valseg");
1363 }
1364
1365 NET_LOCK_GIANT();
1366 if ((error = fgetsock(td, s, &so, NULL)) == 0) {
1367 error = sogetopt(so, &sopt);
1368 *valsize = sopt.sopt_valsize;
1369 fputsock(so);
1370 }
1371 NET_UNLOCK_GIANT();
1372 return (error);
1373 }
1374
1375 /*
1376 * getsockname1() - Get socket name.
1377 *
1378 * MPSAFE
1379 */
1380 /* ARGSUSED */
1381 static int
1382 getsockname1(td, uap, compat)
1383 struct thread *td;
1384 register struct getsockname_args /* {
1385 int fdes;
1386 struct sockaddr * __restrict asa;
1387 socklen_t * __restrict alen;
1388 } */ *uap;
1389 int compat;
1390 {
1391 struct socket *so;
1392 struct sockaddr *sa;
1393 socklen_t len;
1394 int error;
1395
1396 NET_LOCK_GIANT();
1397 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1398 goto done2;
1399 error = copyin(uap->alen, &len, sizeof (len));
1400 if (error)
1401 goto done1;
1402 if (len < 0) {
1403 error = EINVAL;
1404 goto done1;
1405 }
1406 sa = 0;
1407 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1408 if (error)
1409 goto bad;
1410 if (sa == 0) {
1411 len = 0;
1412 goto gotnothing;
1413 }
1414
1415 len = MIN(len, sa->sa_len);
1416 #ifdef COMPAT_OLDSOCK
1417 if (compat)
1418 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1419 #endif
1420 error = copyout(sa, uap->asa, (u_int)len);
1421 if (error == 0)
1422 gotnothing:
1423 error = copyout(&len, uap->alen, sizeof (len));
1424 bad:
1425 if (sa)
1426 FREE(sa, M_SONAME);
1427 done1:
1428 fputsock(so);
1429 done2:
1430 NET_UNLOCK_GIANT();
1431 return (error);
1432 }
1433
1434 /*
1435 * MPSAFE
1436 */
1437 int
1438 getsockname(td, uap)
1439 struct thread *td;
1440 struct getsockname_args *uap;
1441 {
1442
1443 return (getsockname1(td, uap, 0));
1444 }
1445
1446 #ifdef COMPAT_OLDSOCK
1447 /*
1448 * MPSAFE
1449 */
1450 int
1451 ogetsockname(td, uap)
1452 struct thread *td;
1453 struct getsockname_args *uap;
1454 {
1455
1456 return (getsockname1(td, uap, 1));
1457 }
1458 #endif /* COMPAT_OLDSOCK */
1459
1460 /*
1461 * getpeername1() - Get name of peer for connected socket.
1462 *
1463 * MPSAFE
1464 */
1465 /* ARGSUSED */
1466 static int
1467 getpeername1(td, uap, compat)
1468 struct thread *td;
1469 register struct getpeername_args /* {
1470 int fdes;
1471 struct sockaddr * __restrict asa;
1472 socklen_t * __restrict alen;
1473 } */ *uap;
1474 int compat;
1475 {
1476 struct socket *so;
1477 struct sockaddr *sa;
1478 socklen_t len;
1479 int error;
1480
1481 NET_LOCK_GIANT();
1482 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1483 goto done2;
1484 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1485 error = ENOTCONN;
1486 goto done1;
1487 }
1488 error = copyin(uap->alen, &len, sizeof (len));
1489 if (error)
1490 goto done1;
1491 if (len < 0) {
1492 error = EINVAL;
1493 goto done1;
1494 }
1495 sa = 0;
1496 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1497 if (error)
1498 goto bad;
1499 if (sa == 0) {
1500 len = 0;
1501 goto gotnothing;
1502 }
1503 len = MIN(len, sa->sa_len);
1504 #ifdef COMPAT_OLDSOCK
1505 if (compat)
1506 ((struct osockaddr *)sa)->sa_family =
1507 sa->sa_family;
1508 #endif
1509 error = copyout(sa, uap->asa, (u_int)len);
1510 if (error)
1511 goto bad;
1512 gotnothing:
1513 error = copyout(&len, uap->alen, sizeof (len));
1514 bad:
1515 if (sa)
1516 FREE(sa, M_SONAME);
1517 done1:
1518 fputsock(so);
1519 done2:
1520 NET_UNLOCK_GIANT();
1521 return (error);
1522 }
1523
1524 /*
1525 * MPSAFE
1526 */
1527 int
1528 getpeername(td, uap)
1529 struct thread *td;
1530 struct getpeername_args *uap;
1531 {
1532
1533 return (getpeername1(td, uap, 0));
1534 }
1535
1536 #ifdef COMPAT_OLDSOCK
1537 /*
1538 * MPSAFE
1539 */
1540 int
1541 ogetpeername(td, uap)
1542 struct thread *td;
1543 struct ogetpeername_args *uap;
1544 {
1545
1546 /* XXX uap should have type `getpeername_args *' to begin with. */
1547 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1548 }
1549 #endif /* COMPAT_OLDSOCK */
1550
1551 int
1552 sockargs(mp, buf, buflen, type)
1553 struct mbuf **mp;
1554 caddr_t buf;
1555 int buflen, type;
1556 {
1557 register struct sockaddr *sa;
1558 register struct mbuf *m;
1559 int error;
1560
1561 if ((u_int)buflen > MLEN) {
1562 #ifdef COMPAT_OLDSOCK
1563 if (type == MT_SONAME && (u_int)buflen <= 112)
1564 buflen = MLEN; /* unix domain compat. hack */
1565 else
1566 #endif
1567 if ((u_int)buflen > MCLBYTES)
1568 return (EINVAL);
1569 }
1570 m = m_get(M_TRYWAIT, type);
1571 if (m == NULL)
1572 return (ENOBUFS);
1573 if ((u_int)buflen > MLEN) {
1574 MCLGET(m, M_TRYWAIT);
1575 if ((m->m_flags & M_EXT) == 0) {
1576 m_free(m);
1577 return (ENOBUFS);
1578 }
1579 }
1580 m->m_len = buflen;
1581 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1582 if (error)
1583 (void) m_free(m);
1584 else {
1585 *mp = m;
1586 if (type == MT_SONAME) {
1587 sa = mtod(m, struct sockaddr *);
1588
1589 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1590 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1591 sa->sa_family = sa->sa_len;
1592 #endif
1593 sa->sa_len = buflen;
1594 }
1595 }
1596 return (error);
1597 }
1598
1599 int
1600 getsockaddr(namp, uaddr, len)
1601 struct sockaddr **namp;
1602 caddr_t uaddr;
1603 size_t len;
1604 {
1605 struct sockaddr *sa;
1606 int error;
1607
1608 if (len > SOCK_MAXADDRLEN)
1609 return (ENAMETOOLONG);
1610 if (len < offsetof(struct sockaddr, sa_data[0]))
1611 return (EINVAL);
1612 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1613 error = copyin(uaddr, sa, len);
1614 if (error) {
1615 FREE(sa, M_SONAME);
1616 } else {
1617 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1618 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1619 sa->sa_family = sa->sa_len;
1620 #endif
1621 sa->sa_len = len;
1622 *namp = sa;
1623 }
1624 return (error);
1625 }
1626
1627 /*
1628 * Detach mapped page and release resources back to the system.
1629 */
1630 void
1631 sf_buf_mext(void *addr, void *args)
1632 {
1633 vm_page_t m;
1634
1635 m = sf_buf_page(args);
1636 sf_buf_free(args);
1637 vm_page_lock_queues();
1638 vm_page_unwire(m, 0);
1639 /*
1640 * Check for the object going away on us. This can
1641 * happen since we don't hold a reference to it.
1642 * If so, we're responsible for freeing the page.
1643 */
1644 if (m->wire_count == 0 && m->object == NULL)
1645 vm_page_free(m);
1646 vm_page_unlock_queues();
1647 }
1648
1649 /*
1650 * sendfile(2)
1651 *
1652 * MPSAFE
1653 *
1654 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1655 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1656 *
1657 * Send a file specified by 'fd' and starting at 'offset' to a socket
1658 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1659 * nbytes == 0. Optionally add a header and/or trailer to the socket
1660 * output. If specified, write the total number of bytes sent into *sbytes.
1661 *
1662 */
1663 int
1664 sendfile(struct thread *td, struct sendfile_args *uap)
1665 {
1666
1667 return (do_sendfile(td, uap, 0));
1668 }
1669
1670 #ifdef COMPAT_FREEBSD4
1671 int
1672 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1673 {
1674 struct sendfile_args args;
1675
1676 args.fd = uap->fd;
1677 args.s = uap->s;
1678 args.offset = uap->offset;
1679 args.nbytes = uap->nbytes;
1680 args.hdtr = uap->hdtr;
1681 args.sbytes = uap->sbytes;
1682 args.flags = uap->flags;
1683
1684 return (do_sendfile(td, &args, 1));
1685 }
1686 #endif /* COMPAT_FREEBSD4 */
1687
1688 static int
1689 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1690 {
1691 struct vnode *vp;
1692 struct vm_object *obj;
1693 struct socket *so = NULL;
1694 struct mbuf *m, *m_header = NULL;
1695 struct sf_buf *sf;
1696 struct vm_page *pg;
1697 struct writev_args nuap;
1698 struct sf_hdtr hdtr;
1699 struct uio *hdr_uio = NULL;
1700 off_t off, xfsize, hdtr_size, sbytes = 0;
1701 int error, headersize = 0, headersent = 0;
1702
1703 mtx_lock(&Giant);
1704
1705 hdtr_size = 0;
1706
1707 /*
1708 * The descriptor must be a regular file and have a backing VM object.
1709 */
1710 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1711 goto done;
1712 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1713 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1714 error = EINVAL;
1715 VOP_UNLOCK(vp, 0, td);
1716 goto done;
1717 }
1718 VOP_UNLOCK(vp, 0, td);
1719 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1720 goto done;
1721 if (so->so_type != SOCK_STREAM) {
1722 error = EINVAL;
1723 goto done;
1724 }
1725 if ((so->so_state & SS_ISCONNECTED) == 0) {
1726 error = ENOTCONN;
1727 goto done;
1728 }
1729 if (uap->offset < 0) {
1730 error = EINVAL;
1731 goto done;
1732 }
1733
1734 #ifdef MAC
1735 SOCK_LOCK(so);
1736 error = mac_check_socket_send(td->td_ucred, so);
1737 SOCK_UNLOCK(so);
1738 if (error)
1739 goto done;
1740 #endif
1741
1742 /*
1743 * If specified, get the pointer to the sf_hdtr struct for
1744 * any headers/trailers.
1745 */
1746 if (uap->hdtr != NULL) {
1747 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1748 if (error)
1749 goto done;
1750 /*
1751 * Send any headers.
1752 */
1753 if (hdtr.headers != NULL) {
1754 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1755 if (error)
1756 goto done;
1757 hdr_uio->uio_td = td;
1758 hdr_uio->uio_rw = UIO_WRITE;
1759 if (hdr_uio->uio_resid > 0) {
1760 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0);
1761 if (m_header == NULL)
1762 goto done;
1763 headersize = m_header->m_pkthdr.len;
1764 if (compat)
1765 sbytes += headersize;
1766 }
1767 }
1768 }
1769
1770 /*
1771 * Protect against multiple writers to the socket.
1772 */
1773 SOCKBUF_LOCK(&so->so_snd);
1774 (void) sblock(&so->so_snd, M_WAITOK);
1775 SOCKBUF_UNLOCK(&so->so_snd);
1776
1777 /*
1778 * Loop through the pages in the file, starting with the requested
1779 * offset. Get a file page (do I/O if necessary), map the file page
1780 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1781 * it on the socket.
1782 */
1783 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1784 vm_pindex_t pindex;
1785 vm_offset_t pgoff;
1786
1787 pindex = OFF_TO_IDX(off);
1788 VM_OBJECT_LOCK(obj);
1789 retry_lookup:
1790 /*
1791 * Calculate the amount to transfer. Not to exceed a page,
1792 * the EOF, or the passed in nbytes.
1793 */
1794 xfsize = obj->un_pager.vnp.vnp_size - off;
1795 VM_OBJECT_UNLOCK(obj);
1796 if (xfsize > PAGE_SIZE)
1797 xfsize = PAGE_SIZE;
1798 pgoff = (vm_offset_t)(off & PAGE_MASK);
1799 if (PAGE_SIZE - pgoff < xfsize)
1800 xfsize = PAGE_SIZE - pgoff;
1801 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1802 xfsize = uap->nbytes - sbytes;
1803 if (xfsize <= 0) {
1804 if (m_header != NULL) {
1805 m = m_header;
1806 m_header = NULL;
1807 SOCKBUF_LOCK(&so->so_snd);
1808 goto retry_space;
1809 } else
1810 break;
1811 }
1812 /*
1813 * Optimize the non-blocking case by looking at the socket space
1814 * before going to the extra work of constituting the sf_buf.
1815 */
1816 SOCKBUF_LOCK(&so->so_snd);
1817 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1818 if (so->so_snd.sb_state & SBS_CANTSENDMORE)
1819 error = EPIPE;
1820 else
1821 error = EAGAIN;
1822 sbunlock(&so->so_snd);
1823 SOCKBUF_UNLOCK(&so->so_snd);
1824 goto done;
1825 }
1826 SOCKBUF_UNLOCK(&so->so_snd);
1827 VM_OBJECT_LOCK(obj);
1828 /*
1829 * Attempt to look up the page.
1830 *
1831 * Allocate if not found
1832 *
1833 * Wait and loop if busy.
1834 */
1835 pg = vm_page_lookup(obj, pindex);
1836
1837 if (pg == NULL) {
1838 pg = vm_page_alloc(obj, pindex,
1839 VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1840 if (pg == NULL) {
1841 VM_OBJECT_UNLOCK(obj);
1842 VM_WAIT;
1843 VM_OBJECT_LOCK(obj);
1844 goto retry_lookup;
1845 }
1846 vm_page_lock_queues();
1847 vm_page_wakeup(pg);
1848 } else {
1849 vm_page_lock_queues();
1850 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1851 goto retry_lookup;
1852 /*
1853 * Wire the page so it does not get ripped out from
1854 * under us.
1855 */
1856 vm_page_wire(pg);
1857 }
1858
1859 /*
1860 * If page is not valid for what we need, initiate I/O
1861 */
1862
1863 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1864 VM_OBJECT_UNLOCK(obj);
1865 } else if (uap->flags & SF_NODISKIO) {
1866 error = EBUSY;
1867 } else {
1868 int bsize, resid;
1869
1870 /*
1871 * Ensure that our page is still around when the I/O
1872 * completes.
1873 */
1874 vm_page_io_start(pg);
1875 vm_page_unlock_queues();
1876 VM_OBJECT_UNLOCK(obj);
1877
1878 /*
1879 * Get the page from backing store.
1880 */
1881 bsize = vp->v_mount->mnt_stat.f_iosize;
1882 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1883 /*
1884 * XXXMAC: Because we don't have fp->f_cred here,
1885 * we pass in NOCRED. This is probably wrong, but
1886 * is consistent with our original implementation.
1887 */
1888 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1889 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1890 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
1891 td->td_ucred, NOCRED, &resid, td);
1892 VOP_UNLOCK(vp, 0, td);
1893 if (error)
1894 VM_OBJECT_LOCK(obj);
1895 vm_page_lock_queues();
1896 vm_page_io_finish(pg);
1897 mbstat.sf_iocnt++;
1898 }
1899
1900 if (error) {
1901 vm_page_unwire(pg, 0);
1902 /*
1903 * See if anyone else might know about this page.
1904 * If not and it is not valid, then free it.
1905 */
1906 if (pg->wire_count == 0 && pg->valid == 0 &&
1907 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1908 pg->hold_count == 0) {
1909 vm_page_busy(pg);
1910 vm_page_free(pg);
1911 }
1912 vm_page_unlock_queues();
1913 VM_OBJECT_UNLOCK(obj);
1914 SOCKBUF_LOCK(&so->so_snd);
1915 sbunlock(&so->so_snd);
1916 SOCKBUF_UNLOCK(&so->so_snd);
1917 goto done;
1918 }
1919 vm_page_unlock_queues();
1920
1921 /*
1922 * Get a sendfile buf. We usually wait as long as necessary,
1923 * but this wait can be interrupted.
1924 */
1925 if ((sf = sf_buf_alloc(pg, PCATCH)) == NULL) {
1926 mbstat.sf_allocfail++;
1927 vm_page_lock_queues();
1928 vm_page_unwire(pg, 0);
1929 if (pg->wire_count == 0 && pg->object == NULL)
1930 vm_page_free(pg);
1931 vm_page_unlock_queues();
1932 SOCKBUF_LOCK(&so->so_snd);
1933 sbunlock(&so->so_snd);
1934 SOCKBUF_UNLOCK(&so->so_snd);
1935 error = EINTR;
1936 goto done;
1937 }
1938
1939 /*
1940 * Get an mbuf header and set it up as having external storage.
1941 */
1942 if (m_header)
1943 MGET(m, M_TRYWAIT, MT_DATA);
1944 else
1945 MGETHDR(m, M_TRYWAIT, MT_DATA);
1946 if (m == NULL) {
1947 error = ENOBUFS;
1948 sf_buf_mext((void *)sf_buf_kva(sf), sf);
1949 SOCKBUF_LOCK(&so->so_snd);
1950 sbunlock(&so->so_snd);
1951 SOCKBUF_UNLOCK(&so->so_snd);
1952 goto done;
1953 }
1954 /*
1955 * Setup external storage for mbuf.
1956 */
1957 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
1958 EXT_SFBUF);
1959 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
1960 m->m_pkthdr.len = m->m_len = xfsize;
1961
1962 if (m_header) {
1963 m_cat(m_header, m);
1964 m = m_header;
1965 m_header = NULL;
1966 m_fixhdr(m);
1967 }
1968
1969 /*
1970 * Add the buffer to the socket buffer chain.
1971 */
1972 SOCKBUF_LOCK(&so->so_snd);
1973 retry_space:
1974 /*
1975 * Make sure that the socket is still able to take more data.
1976 * CANTSENDMORE being true usually means that the connection
1977 * was closed. so_error is true when an error was sensed after
1978 * a previous send.
1979 * The state is checked after the page mapping and buffer
1980 * allocation above since those operations may block and make
1981 * any socket checks stale. From this point forward, nothing
1982 * blocks before the pru_send (or more accurately, any blocking
1983 * results in a loop back to here to re-check).
1984 */
1985 SOCKBUF_LOCK_ASSERT(&so->so_snd);
1986 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
1987 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
1988 error = EPIPE;
1989 } else {
1990 error = so->so_error;
1991 so->so_error = 0;
1992 }
1993 m_freem(m);
1994 sbunlock(&so->so_snd);
1995 SOCKBUF_UNLOCK(&so->so_snd);
1996 goto done;
1997 }
1998 /*
1999 * Wait for socket space to become available. We do this just
2000 * after checking the connection state above in order to avoid
2001 * a race condition with sbwait().
2002 */
2003 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2004 if (so->so_state & SS_NBIO) {
2005 m_freem(m);
2006 sbunlock(&so->so_snd);
2007 SOCKBUF_UNLOCK(&so->so_snd);
2008 error = EAGAIN;
2009 goto done;
2010 }
2011 error = sbwait(&so->so_snd);
2012 /*
2013 * An error from sbwait usually indicates that we've
2014 * been interrupted by a signal. If we've sent anything
2015 * then return bytes sent, otherwise return the error.
2016 */
2017 if (error) {
2018 m_freem(m);
2019 sbunlock(&so->so_snd);
2020 SOCKBUF_UNLOCK(&so->so_snd);
2021 goto done;
2022 }
2023 goto retry_space;
2024 }
2025 SOCKBUF_UNLOCK(&so->so_snd);
2026 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2027 if (error) {
2028 SOCKBUF_LOCK(&so->so_snd);
2029 sbunlock(&so->so_snd);
2030 SOCKBUF_UNLOCK(&so->so_snd);
2031 goto done;
2032 }
2033 headersent = 1;
2034 }
2035 SOCKBUF_LOCK(&so->so_snd);
2036 sbunlock(&so->so_snd);
2037 SOCKBUF_UNLOCK(&so->so_snd);
2038
2039 /*
2040 * Send trailers. Wimp out and use writev(2).
2041 */
2042 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2043 nuap.fd = uap->s;
2044 nuap.iovp = hdtr.trailers;
2045 nuap.iovcnt = hdtr.trl_cnt;
2046 error = writev(td, &nuap);
2047 if (error)
2048 goto done;
2049 if (compat)
2050 sbytes += td->td_retval[0];
2051 else
2052 hdtr_size += td->td_retval[0];
2053 }
2054
2055 done:
2056 if (headersent) {
2057 if (!compat)
2058 hdtr_size += headersize;
2059 } else {
2060 if (compat)
2061 sbytes -= headersize;
2062 }
2063 /*
2064 * If there was no error we have to clear td->td_retval[0]
2065 * because it may have been set by writev.
2066 */
2067 if (error == 0) {
2068 td->td_retval[0] = 0;
2069 }
2070 if (uap->sbytes != NULL) {
2071 if (!compat)
2072 sbytes += hdtr_size;
2073 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2074 }
2075 if (vp)
2076 vrele(vp);
2077 if (so)
2078 fputsock(so);
2079 if (hdr_uio != NULL)
2080 free(hdr_uio, M_IOV);
2081 if (m_header)
2082 m_freem(m_header);
2083
2084 mtx_unlock(&Giant);
2085
2086 if (error == ERESTART)
2087 error = EINTR;
2088
2089 return (error);
2090 }
Cache object: 58cd70ba825cb829b2c1d197a8521ac9
|