1 /*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/6.1/sys/kern/uipc_syscalls.c 176272 2008-02-14 11:47:39Z simon $");
37
38 #include "opt_compat.h"
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mac.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77
78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80
81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 int compat);
85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 int compat);
87
88 /*
89 * NSFBUFS-related variables and associated sysctls
90 */
91 int nsfbufs;
92 int nsfbufspeak;
93 int nsfbufsused;
94
95 SYSCTL_DECL(_kern_ipc);
96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
97 "Maximum number of sendfile(2) sf_bufs available");
98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
99 "Number of sendfile(2) sf_bufs at peak usage");
100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
101 "Number of sendfile(2) sf_bufs in use");
102
103 /*
104 * Convert a user file descriptor to a kernel file entry. A reference on the
105 * file entry is held upon returning. This is lighter weight than
106 * fgetsock(), which bumps the socket reference drops the file reference
107 * count instead, as this approach avoids several additional mutex operations
108 * associated with the additional reference count.
109 */
110 static int
111 getsock(struct filedesc *fdp, int fd, struct file **fpp)
112 {
113 struct file *fp;
114 int error;
115
116 fp = NULL;
117 if (fdp == NULL)
118 error = EBADF;
119 else {
120 FILEDESC_LOCK_FAST(fdp);
121 fp = fget_locked(fdp, fd);
122 if (fp == NULL)
123 error = EBADF;
124 else if (fp->f_type != DTYPE_SOCKET) {
125 fp = NULL;
126 error = ENOTSOCK;
127 } else {
128 fhold(fp);
129 error = 0;
130 }
131 FILEDESC_UNLOCK_FAST(fdp);
132 }
133 *fpp = fp;
134 return (error);
135 }
136
137 /*
138 * System call interface to the socket abstraction.
139 */
140 #if defined(COMPAT_43)
141 #define COMPAT_OLDSOCK
142 #endif
143
144 /*
145 * MPSAFE
146 */
147 int
148 socket(td, uap)
149 struct thread *td;
150 register struct socket_args /* {
151 int domain;
152 int type;
153 int protocol;
154 } */ *uap;
155 {
156 struct filedesc *fdp;
157 struct socket *so;
158 struct file *fp;
159 int fd, error;
160
161 #ifdef MAC
162 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
163 uap->protocol);
164 if (error)
165 return (error);
166 #endif
167 fdp = td->td_proc->p_fd;
168 error = falloc(td, &fp, &fd);
169 if (error)
170 return (error);
171 /* An extra reference on `fp' has been held for us by falloc(). */
172 NET_LOCK_GIANT();
173 error = socreate(uap->domain, &so, uap->type, uap->protocol,
174 td->td_ucred, td);
175 NET_UNLOCK_GIANT();
176 if (error) {
177 fdclose(fdp, fp, fd, td);
178 } else {
179 FILEDESC_LOCK_FAST(fdp);
180 fp->f_data = so; /* already has ref count */
181 fp->f_flag = FREAD|FWRITE;
182 fp->f_ops = &socketops;
183 fp->f_type = DTYPE_SOCKET;
184 FILEDESC_UNLOCK_FAST(fdp);
185 td->td_retval[0] = fd;
186 }
187 fdrop(fp, td);
188 return (error);
189 }
190
191 /*
192 * MPSAFE
193 */
194 /* ARGSUSED */
195 int
196 bind(td, uap)
197 struct thread *td;
198 register struct bind_args /* {
199 int s;
200 caddr_t name;
201 int namelen;
202 } */ *uap;
203 {
204 struct sockaddr *sa;
205 int error;
206
207 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
208 return (error);
209
210 return (kern_bind(td, uap->s, sa));
211 }
212
213 int
214 kern_bind(td, fd, sa)
215 struct thread *td;
216 int fd;
217 struct sockaddr *sa;
218 {
219 struct socket *so;
220 struct file *fp;
221 int error;
222
223 NET_LOCK_GIANT();
224 error = getsock(td->td_proc->p_fd, fd, &fp);
225 if (error)
226 goto done2;
227 so = fp->f_data;
228 #ifdef MAC
229 SOCK_LOCK(so);
230 error = mac_check_socket_bind(td->td_ucred, so, sa);
231 SOCK_UNLOCK(so);
232 if (error)
233 goto done1;
234 #endif
235 error = sobind(so, sa, td);
236 #ifdef MAC
237 done1:
238 #endif
239 fdrop(fp, td);
240 done2:
241 NET_UNLOCK_GIANT();
242 FREE(sa, M_SONAME);
243 return (error);
244 }
245
246 /*
247 * MPSAFE
248 */
249 /* ARGSUSED */
250 int
251 listen(td, uap)
252 struct thread *td;
253 register struct listen_args /* {
254 int s;
255 int backlog;
256 } */ *uap;
257 {
258 struct socket *so;
259 struct file *fp;
260 int error;
261
262 NET_LOCK_GIANT();
263 error = getsock(td->td_proc->p_fd, uap->s, &fp);
264 if (error == 0) {
265 so = fp->f_data;
266 #ifdef MAC
267 SOCK_LOCK(so);
268 error = mac_check_socket_listen(td->td_ucred, so);
269 SOCK_UNLOCK(so);
270 if (error)
271 goto done;
272 #endif
273 error = solisten(so, uap->backlog, td);
274 #ifdef MAC
275 done:
276 #endif
277 fdrop(fp, td);
278 }
279 NET_UNLOCK_GIANT();
280 return(error);
281 }
282
283 /*
284 * accept1()
285 * MPSAFE
286 */
287 static int
288 accept1(td, uap, compat)
289 struct thread *td;
290 register struct accept_args /* {
291 int s;
292 struct sockaddr * __restrict name;
293 socklen_t * __restrict anamelen;
294 } */ *uap;
295 int compat;
296 {
297 struct filedesc *fdp;
298 struct file *nfp = NULL;
299 struct sockaddr *sa = NULL;
300 socklen_t namelen;
301 int error;
302 struct socket *head, *so;
303 int fd;
304 u_int fflag;
305 pid_t pgid;
306 int tmp;
307
308 fdp = td->td_proc->p_fd;
309 if (uap->name) {
310 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
311 if(error)
312 return (error);
313 if (namelen < 0)
314 return (EINVAL);
315 }
316 NET_LOCK_GIANT();
317 error = fgetsock(td, uap->s, &head, &fflag);
318 if (error)
319 goto done2;
320 if ((head->so_options & SO_ACCEPTCONN) == 0) {
321 error = EINVAL;
322 goto done;
323 }
324 #ifdef MAC
325 SOCK_LOCK(head);
326 error = mac_check_socket_accept(td->td_ucred, head);
327 SOCK_UNLOCK(head);
328 if (error != 0)
329 goto done;
330 #endif
331 error = falloc(td, &nfp, &fd);
332 if (error)
333 goto done;
334 ACCEPT_LOCK();
335 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
336 ACCEPT_UNLOCK();
337 error = EWOULDBLOCK;
338 goto noconnection;
339 }
340 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
341 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
342 head->so_error = ECONNABORTED;
343 break;
344 }
345 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
346 "accept", 0);
347 if (error) {
348 ACCEPT_UNLOCK();
349 goto noconnection;
350 }
351 }
352 if (head->so_error) {
353 error = head->so_error;
354 head->so_error = 0;
355 ACCEPT_UNLOCK();
356 goto noconnection;
357 }
358 so = TAILQ_FIRST(&head->so_comp);
359 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
360 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
361
362 /*
363 * Before changing the flags on the socket, we have to bump the
364 * reference count. Otherwise, if the protocol calls sofree(),
365 * the socket will be released due to a zero refcount.
366 */
367 SOCK_LOCK(so); /* soref() and so_state update */
368 soref(so); /* file descriptor reference */
369
370 TAILQ_REMOVE(&head->so_comp, so, so_list);
371 head->so_qlen--;
372 so->so_state |= (head->so_state & SS_NBIO);
373 so->so_qstate &= ~SQ_COMP;
374 so->so_head = NULL;
375
376 SOCK_UNLOCK(so);
377 ACCEPT_UNLOCK();
378
379 /* An extra reference on `nfp' has been held for us by falloc(). */
380 td->td_retval[0] = fd;
381
382 /* connection has been removed from the listen queue */
383 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
384
385 pgid = fgetown(&head->so_sigio);
386 if (pgid != 0)
387 fsetown(pgid, &so->so_sigio);
388
389 FILE_LOCK(nfp);
390 nfp->f_data = so; /* nfp has ref count from falloc */
391 nfp->f_flag = fflag;
392 nfp->f_ops = &socketops;
393 nfp->f_type = DTYPE_SOCKET;
394 FILE_UNLOCK(nfp);
395 /* Sync socket nonblocking/async state with file flags */
396 tmp = fflag & FNONBLOCK;
397 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
398 tmp = fflag & FASYNC;
399 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
400 sa = 0;
401 error = soaccept(so, &sa);
402 if (error) {
403 /*
404 * return a namelen of zero for older code which might
405 * ignore the return value from accept.
406 */
407 if (uap->name != NULL) {
408 namelen = 0;
409 (void) copyout(&namelen,
410 uap->anamelen, sizeof(*uap->anamelen));
411 }
412 goto noconnection;
413 }
414 if (sa == NULL) {
415 namelen = 0;
416 if (uap->name)
417 goto gotnoname;
418 error = 0;
419 goto done;
420 }
421 if (uap->name) {
422 /* check sa_len before it is destroyed */
423 if (namelen > sa->sa_len)
424 namelen = sa->sa_len;
425 #ifdef COMPAT_OLDSOCK
426 if (compat)
427 ((struct osockaddr *)sa)->sa_family =
428 sa->sa_family;
429 #endif
430 error = copyout(sa, uap->name, (u_int)namelen);
431 if (!error)
432 gotnoname:
433 error = copyout(&namelen,
434 uap->anamelen, sizeof (*uap->anamelen));
435 }
436 noconnection:
437 if (sa)
438 FREE(sa, M_SONAME);
439
440 /*
441 * close the new descriptor, assuming someone hasn't ripped it
442 * out from under us.
443 */
444 if (error)
445 fdclose(fdp, nfp, fd, td);
446
447 /*
448 * Release explicitly held references before returning.
449 */
450 done:
451 if (nfp != NULL)
452 fdrop(nfp, td);
453 fputsock(head);
454 done2:
455 NET_UNLOCK_GIANT();
456 return (error);
457 }
458
459 /*
460 * MPSAFE (accept1() is MPSAFE)
461 */
462 int
463 accept(td, uap)
464 struct thread *td;
465 struct accept_args *uap;
466 {
467
468 return (accept1(td, uap, 0));
469 }
470
471 #ifdef COMPAT_OLDSOCK
472 /*
473 * MPSAFE (accept1() is MPSAFE)
474 */
475 int
476 oaccept(td, uap)
477 struct thread *td;
478 struct accept_args *uap;
479 {
480
481 return (accept1(td, uap, 1));
482 }
483 #endif /* COMPAT_OLDSOCK */
484
485 /*
486 * MPSAFE
487 */
488 /* ARGSUSED */
489 int
490 connect(td, uap)
491 struct thread *td;
492 register struct connect_args /* {
493 int s;
494 caddr_t name;
495 int namelen;
496 } */ *uap;
497 {
498 struct sockaddr *sa;
499 int error;
500
501 error = getsockaddr(&sa, uap->name, uap->namelen);
502 if (error)
503 return (error);
504
505 return (kern_connect(td, uap->s, sa));
506 }
507
508
509 int
510 kern_connect(td, fd, sa)
511 struct thread *td;
512 int fd;
513 struct sockaddr *sa;
514 {
515 struct socket *so;
516 struct file *fp;
517 int error;
518 int interrupted = 0;
519
520 NET_LOCK_GIANT();
521 error = getsock(td->td_proc->p_fd, fd, &fp);
522 if (error)
523 goto done2;
524 so = fp->f_data;
525 if (so->so_state & SS_ISCONNECTING) {
526 error = EALREADY;
527 goto done1;
528 }
529 #ifdef MAC
530 SOCK_LOCK(so);
531 error = mac_check_socket_connect(td->td_ucred, so, sa);
532 SOCK_UNLOCK(so);
533 if (error)
534 goto bad;
535 #endif
536 error = soconnect(so, sa, td);
537 if (error)
538 goto bad;
539 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
540 error = EINPROGRESS;
541 goto done1;
542 }
543 SOCK_LOCK(so);
544 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
545 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
546 "connec", 0);
547 if (error) {
548 if (error == EINTR || error == ERESTART)
549 interrupted = 1;
550 break;
551 }
552 }
553 if (error == 0) {
554 error = so->so_error;
555 so->so_error = 0;
556 }
557 SOCK_UNLOCK(so);
558 bad:
559 if (!interrupted)
560 so->so_state &= ~SS_ISCONNECTING;
561 if (error == ERESTART)
562 error = EINTR;
563 done1:
564 fdrop(fp, td);
565 done2:
566 NET_UNLOCK_GIANT();
567 FREE(sa, M_SONAME);
568 return (error);
569 }
570
571 /*
572 * MPSAFE
573 */
574 int
575 socketpair(td, uap)
576 struct thread *td;
577 register struct socketpair_args /* {
578 int domain;
579 int type;
580 int protocol;
581 int *rsv;
582 } */ *uap;
583 {
584 register struct filedesc *fdp = td->td_proc->p_fd;
585 struct file *fp1, *fp2;
586 struct socket *so1, *so2;
587 int fd, error, sv[2];
588
589 #ifdef MAC
590 /* We might want to have a separate check for socket pairs. */
591 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
592 uap->protocol);
593 if (error)
594 return (error);
595 #endif
596
597 NET_LOCK_GIANT();
598 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
599 td->td_ucred, td);
600 if (error)
601 goto done2;
602 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
603 td->td_ucred, td);
604 if (error)
605 goto free1;
606 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
607 error = falloc(td, &fp1, &fd);
608 if (error)
609 goto free2;
610 sv[0] = fd;
611 fp1->f_data = so1; /* so1 already has ref count */
612 error = falloc(td, &fp2, &fd);
613 if (error)
614 goto free3;
615 fp2->f_data = so2; /* so2 already has ref count */
616 sv[1] = fd;
617 error = soconnect2(so1, so2);
618 if (error)
619 goto free4;
620 if (uap->type == SOCK_DGRAM) {
621 /*
622 * Datagram socket connection is asymmetric.
623 */
624 error = soconnect2(so2, so1);
625 if (error)
626 goto free4;
627 }
628 FILE_LOCK(fp1);
629 fp1->f_flag = FREAD|FWRITE;
630 fp1->f_ops = &socketops;
631 fp1->f_type = DTYPE_SOCKET;
632 FILE_UNLOCK(fp1);
633 FILE_LOCK(fp2);
634 fp2->f_flag = FREAD|FWRITE;
635 fp2->f_ops = &socketops;
636 fp2->f_type = DTYPE_SOCKET;
637 FILE_UNLOCK(fp2);
638 error = copyout(sv, uap->rsv, 2 * sizeof (int));
639 fdrop(fp1, td);
640 fdrop(fp2, td);
641 goto done2;
642 free4:
643 fdclose(fdp, fp2, sv[1], td);
644 fdrop(fp2, td);
645 free3:
646 fdclose(fdp, fp1, sv[0], td);
647 fdrop(fp1, td);
648 free2:
649 (void)soclose(so2);
650 free1:
651 (void)soclose(so1);
652 done2:
653 NET_UNLOCK_GIANT();
654 return (error);
655 }
656
657 static int
658 sendit(td, s, mp, flags)
659 register struct thread *td;
660 int s;
661 register struct msghdr *mp;
662 int flags;
663 {
664 struct mbuf *control;
665 struct sockaddr *to;
666 int error;
667
668 if (mp->msg_name != NULL) {
669 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
670 if (error) {
671 to = NULL;
672 goto bad;
673 }
674 mp->msg_name = to;
675 } else {
676 to = NULL;
677 }
678
679 if (mp->msg_control) {
680 if (mp->msg_controllen < sizeof(struct cmsghdr)
681 #ifdef COMPAT_OLDSOCK
682 && mp->msg_flags != MSG_COMPAT
683 #endif
684 ) {
685 error = EINVAL;
686 goto bad;
687 }
688 error = sockargs(&control, mp->msg_control,
689 mp->msg_controllen, MT_CONTROL);
690 if (error)
691 goto bad;
692 #ifdef COMPAT_OLDSOCK
693 if (mp->msg_flags == MSG_COMPAT) {
694 register struct cmsghdr *cm;
695
696 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
697 if (control == 0) {
698 error = ENOBUFS;
699 goto bad;
700 } else {
701 cm = mtod(control, struct cmsghdr *);
702 cm->cmsg_len = control->m_len;
703 cm->cmsg_level = SOL_SOCKET;
704 cm->cmsg_type = SCM_RIGHTS;
705 }
706 }
707 #endif
708 } else {
709 control = NULL;
710 }
711
712 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
713
714 bad:
715 if (to)
716 FREE(to, M_SONAME);
717 return (error);
718 }
719
720 int
721 kern_sendit(td, s, mp, flags, control, segflg)
722 struct thread *td;
723 int s;
724 struct msghdr *mp;
725 int flags;
726 struct mbuf *control;
727 enum uio_seg segflg;
728 {
729 struct file *fp;
730 struct uio auio;
731 struct iovec *iov;
732 struct socket *so;
733 int i;
734 int len, error;
735 #ifdef KTRACE
736 struct uio *ktruio = NULL;
737 #endif
738
739 NET_LOCK_GIANT();
740 error = getsock(td->td_proc->p_fd, s, &fp);
741 if (error)
742 goto bad2;
743 so = (struct socket *)fp->f_data;
744
745 #ifdef MAC
746 SOCK_LOCK(so);
747 error = mac_check_socket_send(td->td_ucred, so);
748 SOCK_UNLOCK(so);
749 if (error)
750 goto bad;
751 #endif
752
753 auio.uio_iov = mp->msg_iov;
754 auio.uio_iovcnt = mp->msg_iovlen;
755 auio.uio_segflg = segflg;
756 auio.uio_rw = UIO_WRITE;
757 auio.uio_td = td;
758 auio.uio_offset = 0; /* XXX */
759 auio.uio_resid = 0;
760 iov = mp->msg_iov;
761 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
762 if ((auio.uio_resid += iov->iov_len) < 0) {
763 error = EINVAL;
764 goto bad;
765 }
766 }
767 #ifdef KTRACE
768 if (KTRPOINT(td, KTR_GENIO))
769 ktruio = cloneuio(&auio);
770 #endif
771 len = auio.uio_resid;
772 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
773 0, control, flags, td);
774 if (error) {
775 if (auio.uio_resid != len && (error == ERESTART ||
776 error == EINTR || error == EWOULDBLOCK))
777 error = 0;
778 /* Generation of SIGPIPE can be controlled per socket */
779 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
780 !(flags & MSG_NOSIGNAL)) {
781 PROC_LOCK(td->td_proc);
782 psignal(td->td_proc, SIGPIPE);
783 PROC_UNLOCK(td->td_proc);
784 }
785 }
786 if (error == 0)
787 td->td_retval[0] = len - auio.uio_resid;
788 #ifdef KTRACE
789 if (ktruio != NULL) {
790 ktruio->uio_resid = td->td_retval[0];
791 ktrgenio(s, UIO_WRITE, ktruio, error);
792 }
793 #endif
794 bad:
795 fdrop(fp, td);
796 bad2:
797 NET_UNLOCK_GIANT();
798 return (error);
799 }
800
801 /*
802 * MPSAFE
803 */
804 int
805 sendto(td, uap)
806 struct thread *td;
807 register struct sendto_args /* {
808 int s;
809 caddr_t buf;
810 size_t len;
811 int flags;
812 caddr_t to;
813 int tolen;
814 } */ *uap;
815 {
816 struct msghdr msg;
817 struct iovec aiov;
818 int error;
819
820 msg.msg_name = uap->to;
821 msg.msg_namelen = uap->tolen;
822 msg.msg_iov = &aiov;
823 msg.msg_iovlen = 1;
824 msg.msg_control = 0;
825 #ifdef COMPAT_OLDSOCK
826 msg.msg_flags = 0;
827 #endif
828 aiov.iov_base = uap->buf;
829 aiov.iov_len = uap->len;
830 error = sendit(td, uap->s, &msg, uap->flags);
831 return (error);
832 }
833
834 #ifdef COMPAT_OLDSOCK
835 /*
836 * MPSAFE
837 */
838 int
839 osend(td, uap)
840 struct thread *td;
841 register struct osend_args /* {
842 int s;
843 caddr_t buf;
844 int len;
845 int flags;
846 } */ *uap;
847 {
848 struct msghdr msg;
849 struct iovec aiov;
850 int error;
851
852 msg.msg_name = 0;
853 msg.msg_namelen = 0;
854 msg.msg_iov = &aiov;
855 msg.msg_iovlen = 1;
856 aiov.iov_base = uap->buf;
857 aiov.iov_len = uap->len;
858 msg.msg_control = 0;
859 msg.msg_flags = 0;
860 error = sendit(td, uap->s, &msg, uap->flags);
861 return (error);
862 }
863
864 /*
865 * MPSAFE
866 */
867 int
868 osendmsg(td, uap)
869 struct thread *td;
870 struct osendmsg_args /* {
871 int s;
872 caddr_t msg;
873 int flags;
874 } */ *uap;
875 {
876 struct msghdr msg;
877 struct iovec *iov;
878 int error;
879
880 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
881 if (error)
882 return (error);
883 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
884 if (error)
885 return (error);
886 msg.msg_iov = iov;
887 msg.msg_flags = MSG_COMPAT;
888 error = sendit(td, uap->s, &msg, uap->flags);
889 free(iov, M_IOV);
890 return (error);
891 }
892 #endif
893
894 /*
895 * MPSAFE
896 */
897 int
898 sendmsg(td, uap)
899 struct thread *td;
900 struct sendmsg_args /* {
901 int s;
902 caddr_t msg;
903 int flags;
904 } */ *uap;
905 {
906 struct msghdr msg;
907 struct iovec *iov;
908 int error;
909
910 error = copyin(uap->msg, &msg, sizeof (msg));
911 if (error)
912 return (error);
913 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
914 if (error)
915 return (error);
916 msg.msg_iov = iov;
917 #ifdef COMPAT_OLDSOCK
918 msg.msg_flags = 0;
919 #endif
920 error = sendit(td, uap->s, &msg, uap->flags);
921 free(iov, M_IOV);
922 return (error);
923 }
924
925 int
926 kern_recvit(td, s, mp, namelenp, segflg, controlp)
927 struct thread *td;
928 int s;
929 struct msghdr *mp;
930 void *namelenp;
931 enum uio_seg segflg;
932 struct mbuf **controlp;
933 {
934 struct uio auio;
935 struct iovec *iov;
936 int i;
937 socklen_t len;
938 int error;
939 struct mbuf *m, *control = 0;
940 caddr_t ctlbuf;
941 struct file *fp;
942 struct socket *so;
943 struct sockaddr *fromsa = 0;
944 #ifdef KTRACE
945 struct uio *ktruio = NULL;
946 #endif
947
948 if(controlp != NULL)
949 *controlp = 0;
950
951 NET_LOCK_GIANT();
952 error = getsock(td->td_proc->p_fd, s, &fp);
953 if (error) {
954 NET_UNLOCK_GIANT();
955 return (error);
956 }
957 so = fp->f_data;
958
959 #ifdef MAC
960 SOCK_LOCK(so);
961 error = mac_check_socket_receive(td->td_ucred, so);
962 SOCK_UNLOCK(so);
963 if (error) {
964 fdrop(fp, td);
965 NET_UNLOCK_GIANT();
966 return (error);
967 }
968 #endif
969
970 auio.uio_iov = mp->msg_iov;
971 auio.uio_iovcnt = mp->msg_iovlen;
972 auio.uio_segflg = segflg;
973 auio.uio_rw = UIO_READ;
974 auio.uio_td = td;
975 auio.uio_offset = 0; /* XXX */
976 auio.uio_resid = 0;
977 iov = mp->msg_iov;
978 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
979 if ((auio.uio_resid += iov->iov_len) < 0) {
980 fdrop(fp, td);
981 NET_UNLOCK_GIANT();
982 return (EINVAL);
983 }
984 }
985 #ifdef KTRACE
986 if (KTRPOINT(td, KTR_GENIO))
987 ktruio = cloneuio(&auio);
988 #endif
989 len = auio.uio_resid;
990 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
991 (struct mbuf **)0, (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
992 &mp->msg_flags);
993 if (error) {
994 if (auio.uio_resid != (int)len && (error == ERESTART ||
995 error == EINTR || error == EWOULDBLOCK))
996 error = 0;
997 }
998 #ifdef KTRACE
999 if (ktruio != NULL) {
1000 ktruio->uio_resid = (int)len - auio.uio_resid;
1001 ktrgenio(s, UIO_READ, ktruio, error);
1002 }
1003 #endif
1004 if (error)
1005 goto out;
1006 td->td_retval[0] = (int)len - auio.uio_resid;
1007 if (mp->msg_name) {
1008 len = mp->msg_namelen;
1009 if (len <= 0 || fromsa == 0)
1010 len = 0;
1011 else {
1012 /* save sa_len before it is destroyed by MSG_COMPAT */
1013 len = MIN(len, fromsa->sa_len);
1014 #ifdef COMPAT_OLDSOCK
1015 if (mp->msg_flags & MSG_COMPAT)
1016 ((struct osockaddr *)fromsa)->sa_family =
1017 fromsa->sa_family;
1018 #endif
1019 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1020 if (error)
1021 goto out;
1022 }
1023 mp->msg_namelen = len;
1024 if (namelenp &&
1025 (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
1026 #ifdef COMPAT_OLDSOCK
1027 if (mp->msg_flags & MSG_COMPAT)
1028 error = 0; /* old recvfrom didn't check */
1029 else
1030 #endif
1031 goto out;
1032 }
1033 }
1034 if (mp->msg_control && controlp == NULL) {
1035 #ifdef COMPAT_OLDSOCK
1036 /*
1037 * We assume that old recvmsg calls won't receive access
1038 * rights and other control info, esp. as control info
1039 * is always optional and those options didn't exist in 4.3.
1040 * If we receive rights, trim the cmsghdr; anything else
1041 * is tossed.
1042 */
1043 if (control && mp->msg_flags & MSG_COMPAT) {
1044 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1045 SOL_SOCKET ||
1046 mtod(control, struct cmsghdr *)->cmsg_type !=
1047 SCM_RIGHTS) {
1048 mp->msg_controllen = 0;
1049 goto out;
1050 }
1051 control->m_len -= sizeof (struct cmsghdr);
1052 control->m_data += sizeof (struct cmsghdr);
1053 }
1054 #endif
1055 len = mp->msg_controllen;
1056 m = control;
1057 mp->msg_controllen = 0;
1058 ctlbuf = mp->msg_control;
1059
1060 while (m && len > 0) {
1061 unsigned int tocopy;
1062
1063 if (len >= m->m_len)
1064 tocopy = m->m_len;
1065 else {
1066 mp->msg_flags |= MSG_CTRUNC;
1067 tocopy = len;
1068 }
1069
1070 if ((error = copyout(mtod(m, caddr_t),
1071 ctlbuf, tocopy)) != 0)
1072 goto out;
1073
1074 ctlbuf += tocopy;
1075 len -= tocopy;
1076 m = m->m_next;
1077 }
1078 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1079 }
1080 out:
1081 fdrop(fp, td);
1082 NET_UNLOCK_GIANT();
1083 if (fromsa)
1084 FREE(fromsa, M_SONAME);
1085
1086 if (error == 0 && controlp != NULL)
1087 *controlp = control;
1088 else if (control)
1089 m_freem(control);
1090
1091 return (error);
1092 }
1093
1094 static int
1095 recvit(td, s, mp, namelenp)
1096 struct thread *td;
1097 int s;
1098 struct msghdr *mp;
1099 void *namelenp;
1100 {
1101
1102 return (kern_recvit(td, s, mp, namelenp, UIO_USERSPACE, NULL));
1103 }
1104
1105 /*
1106 * MPSAFE
1107 */
1108 int
1109 recvfrom(td, uap)
1110 struct thread *td;
1111 register struct recvfrom_args /* {
1112 int s;
1113 caddr_t buf;
1114 size_t len;
1115 int flags;
1116 struct sockaddr * __restrict from;
1117 socklen_t * __restrict fromlenaddr;
1118 } */ *uap;
1119 {
1120 struct msghdr msg;
1121 struct iovec aiov;
1122 int error;
1123
1124 if (uap->fromlenaddr) {
1125 error = copyin(uap->fromlenaddr,
1126 &msg.msg_namelen, sizeof (msg.msg_namelen));
1127 if (error)
1128 goto done2;
1129 } else {
1130 msg.msg_namelen = 0;
1131 }
1132 msg.msg_name = uap->from;
1133 msg.msg_iov = &aiov;
1134 msg.msg_iovlen = 1;
1135 aiov.iov_base = uap->buf;
1136 aiov.iov_len = uap->len;
1137 msg.msg_control = 0;
1138 msg.msg_flags = uap->flags;
1139 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1140 done2:
1141 return(error);
1142 }
1143
1144 #ifdef COMPAT_OLDSOCK
1145 /*
1146 * MPSAFE
1147 */
1148 int
1149 orecvfrom(td, uap)
1150 struct thread *td;
1151 struct recvfrom_args *uap;
1152 {
1153
1154 uap->flags |= MSG_COMPAT;
1155 return (recvfrom(td, uap));
1156 }
1157 #endif
1158
1159
1160 #ifdef COMPAT_OLDSOCK
1161 /*
1162 * MPSAFE
1163 */
1164 int
1165 orecv(td, uap)
1166 struct thread *td;
1167 register struct orecv_args /* {
1168 int s;
1169 caddr_t buf;
1170 int len;
1171 int flags;
1172 } */ *uap;
1173 {
1174 struct msghdr msg;
1175 struct iovec aiov;
1176 int error;
1177
1178 msg.msg_name = 0;
1179 msg.msg_namelen = 0;
1180 msg.msg_iov = &aiov;
1181 msg.msg_iovlen = 1;
1182 aiov.iov_base = uap->buf;
1183 aiov.iov_len = uap->len;
1184 msg.msg_control = 0;
1185 msg.msg_flags = uap->flags;
1186 error = recvit(td, uap->s, &msg, NULL);
1187 return (error);
1188 }
1189
1190 /*
1191 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1192 * overlays the new one, missing only the flags, and with the (old) access
1193 * rights where the control fields are now.
1194 *
1195 * MPSAFE
1196 */
1197 int
1198 orecvmsg(td, uap)
1199 struct thread *td;
1200 struct orecvmsg_args /* {
1201 int s;
1202 struct omsghdr *msg;
1203 int flags;
1204 } */ *uap;
1205 {
1206 struct msghdr msg;
1207 struct iovec *iov;
1208 int error;
1209
1210 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1211 if (error)
1212 return (error);
1213 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1214 if (error)
1215 return (error);
1216 msg.msg_flags = uap->flags | MSG_COMPAT;
1217 msg.msg_iov = iov;
1218 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1219 if (msg.msg_controllen && error == 0)
1220 error = copyout(&msg.msg_controllen,
1221 &uap->msg->msg_accrightslen, sizeof (int));
1222 free(iov, M_IOV);
1223 return (error);
1224 }
1225 #endif
1226
1227 /*
1228 * MPSAFE
1229 */
1230 int
1231 recvmsg(td, uap)
1232 struct thread *td;
1233 struct recvmsg_args /* {
1234 int s;
1235 struct msghdr *msg;
1236 int flags;
1237 } */ *uap;
1238 {
1239 struct msghdr msg;
1240 struct iovec *uiov, *iov;
1241 int error;
1242
1243 error = copyin(uap->msg, &msg, sizeof (msg));
1244 if (error)
1245 return (error);
1246 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1247 if (error)
1248 return (error);
1249 msg.msg_flags = uap->flags;
1250 #ifdef COMPAT_OLDSOCK
1251 msg.msg_flags &= ~MSG_COMPAT;
1252 #endif
1253 uiov = msg.msg_iov;
1254 msg.msg_iov = iov;
1255 error = recvit(td, uap->s, &msg, NULL);
1256 if (error == 0) {
1257 msg.msg_iov = uiov;
1258 error = copyout(&msg, uap->msg, sizeof(msg));
1259 }
1260 free(iov, M_IOV);
1261 return (error);
1262 }
1263
1264 /*
1265 * MPSAFE
1266 */
1267 /* ARGSUSED */
1268 int
1269 shutdown(td, uap)
1270 struct thread *td;
1271 register struct shutdown_args /* {
1272 int s;
1273 int how;
1274 } */ *uap;
1275 {
1276 struct socket *so;
1277 struct file *fp;
1278 int error;
1279
1280 NET_LOCK_GIANT();
1281 error = getsock(td->td_proc->p_fd, uap->s, &fp);
1282 if (error == 0) {
1283 so = fp->f_data;
1284 error = soshutdown(so, uap->how);
1285 fdrop(fp, td);
1286 }
1287 NET_UNLOCK_GIANT();
1288 return (error);
1289 }
1290
1291 /*
1292 * MPSAFE
1293 */
1294 /* ARGSUSED */
1295 int
1296 setsockopt(td, uap)
1297 struct thread *td;
1298 register struct setsockopt_args /* {
1299 int s;
1300 int level;
1301 int name;
1302 caddr_t val;
1303 int valsize;
1304 } */ *uap;
1305 {
1306
1307 return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1308 uap->val, UIO_USERSPACE, uap->valsize));
1309 }
1310
1311 int
1312 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1313 struct thread *td;
1314 int s;
1315 int level;
1316 int name;
1317 void *val;
1318 enum uio_seg valseg;
1319 socklen_t valsize;
1320 {
1321 int error;
1322 struct socket *so;
1323 struct file *fp;
1324 struct sockopt sopt;
1325
1326 if (val == NULL && valsize != 0)
1327 return (EFAULT);
1328 if (valsize < 0)
1329 return (EINVAL);
1330
1331 sopt.sopt_dir = SOPT_SET;
1332 sopt.sopt_level = level;
1333 sopt.sopt_name = name;
1334 sopt.sopt_val = val;
1335 sopt.sopt_valsize = valsize;
1336 switch (valseg) {
1337 case UIO_USERSPACE:
1338 sopt.sopt_td = td;
1339 break;
1340 case UIO_SYSSPACE:
1341 sopt.sopt_td = NULL;
1342 break;
1343 default:
1344 panic("kern_setsockopt called with bad valseg");
1345 }
1346
1347 NET_LOCK_GIANT();
1348 error = getsock(td->td_proc->p_fd, s, &fp);
1349 if (error == 0) {
1350 so = fp->f_data;
1351 error = sosetopt(so, &sopt);
1352 fdrop(fp, td);
1353 }
1354 NET_UNLOCK_GIANT();
1355 return(error);
1356 }
1357
1358 /*
1359 * MPSAFE
1360 */
1361 /* ARGSUSED */
1362 int
1363 getsockopt(td, uap)
1364 struct thread *td;
1365 register struct getsockopt_args /* {
1366 int s;
1367 int level;
1368 int name;
1369 void * __restrict val;
1370 socklen_t * __restrict avalsize;
1371 } */ *uap;
1372 {
1373 socklen_t valsize;
1374 int error;
1375
1376 if (uap->val) {
1377 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1378 if (error)
1379 return (error);
1380 }
1381
1382 error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1383 uap->val, UIO_USERSPACE, &valsize);
1384
1385 if (error == 0)
1386 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1387 return (error);
1388 }
1389
1390 /*
1391 * Kernel version of getsockopt.
1392 * optval can be a userland or userspace. optlen is always a kernel pointer.
1393 */
1394 int
1395 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1396 struct thread *td;
1397 int s;
1398 int level;
1399 int name;
1400 void *val;
1401 enum uio_seg valseg;
1402 socklen_t *valsize;
1403 {
1404 int error;
1405 struct socket *so;
1406 struct file *fp;
1407 struct sockopt sopt;
1408
1409 if (val == NULL)
1410 *valsize = 0;
1411 if (*valsize < 0)
1412 return (EINVAL);
1413
1414 sopt.sopt_dir = SOPT_GET;
1415 sopt.sopt_level = level;
1416 sopt.sopt_name = name;
1417 sopt.sopt_val = val;
1418 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1419 switch (valseg) {
1420 case UIO_USERSPACE:
1421 sopt.sopt_td = td;
1422 break;
1423 case UIO_SYSSPACE:
1424 sopt.sopt_td = NULL;
1425 break;
1426 default:
1427 panic("kern_getsockopt called with bad valseg");
1428 }
1429
1430 NET_LOCK_GIANT();
1431 error = getsock(td->td_proc->p_fd, s, &fp);
1432 if (error == 0) {
1433 so = fp->f_data;
1434 error = sogetopt(so, &sopt);
1435 *valsize = sopt.sopt_valsize;
1436 fdrop(fp, td);
1437 }
1438 NET_UNLOCK_GIANT();
1439 return (error);
1440 }
1441
1442 /*
1443 * getsockname1() - Get socket name.
1444 *
1445 * MPSAFE
1446 */
1447 /* ARGSUSED */
1448 static int
1449 getsockname1(td, uap, compat)
1450 struct thread *td;
1451 register struct getsockname_args /* {
1452 int fdes;
1453 struct sockaddr * __restrict asa;
1454 socklen_t * __restrict alen;
1455 } */ *uap;
1456 int compat;
1457 {
1458 struct socket *so;
1459 struct sockaddr *sa;
1460 struct file *fp;
1461 socklen_t len;
1462 int error;
1463
1464 NET_LOCK_GIANT();
1465 error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1466 if (error)
1467 goto done2;
1468 so = fp->f_data;
1469 error = copyin(uap->alen, &len, sizeof (len));
1470 if (error)
1471 goto done1;
1472 if (len < 0) {
1473 error = EINVAL;
1474 goto done1;
1475 }
1476 sa = 0;
1477 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1478 if (error)
1479 goto bad;
1480 if (sa == 0) {
1481 len = 0;
1482 goto gotnothing;
1483 }
1484
1485 len = MIN(len, sa->sa_len);
1486 #ifdef COMPAT_OLDSOCK
1487 if (compat)
1488 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1489 #endif
1490 error = copyout(sa, uap->asa, (u_int)len);
1491 if (error == 0)
1492 gotnothing:
1493 error = copyout(&len, uap->alen, sizeof (len));
1494 bad:
1495 if (sa)
1496 FREE(sa, M_SONAME);
1497 done1:
1498 fdrop(fp, td);
1499 done2:
1500 NET_UNLOCK_GIANT();
1501 return (error);
1502 }
1503
1504 /*
1505 * MPSAFE
1506 */
1507 int
1508 getsockname(td, uap)
1509 struct thread *td;
1510 struct getsockname_args *uap;
1511 {
1512
1513 return (getsockname1(td, uap, 0));
1514 }
1515
1516 #ifdef COMPAT_OLDSOCK
1517 /*
1518 * MPSAFE
1519 */
1520 int
1521 ogetsockname(td, uap)
1522 struct thread *td;
1523 struct getsockname_args *uap;
1524 {
1525
1526 return (getsockname1(td, uap, 1));
1527 }
1528 #endif /* COMPAT_OLDSOCK */
1529
1530 /*
1531 * getpeername1() - Get name of peer for connected socket.
1532 *
1533 * MPSAFE
1534 */
1535 /* ARGSUSED */
1536 static int
1537 getpeername1(td, uap, compat)
1538 struct thread *td;
1539 register struct getpeername_args /* {
1540 int fdes;
1541 struct sockaddr * __restrict asa;
1542 socklen_t * __restrict alen;
1543 } */ *uap;
1544 int compat;
1545 {
1546 struct socket *so;
1547 struct sockaddr *sa;
1548 struct file *fp;
1549 socklen_t len;
1550 int error;
1551
1552 NET_LOCK_GIANT();
1553 error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1554 if (error)
1555 goto done2;
1556 so = fp->f_data;
1557 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1558 error = ENOTCONN;
1559 goto done1;
1560 }
1561 error = copyin(uap->alen, &len, sizeof (len));
1562 if (error)
1563 goto done1;
1564 if (len < 0) {
1565 error = EINVAL;
1566 goto done1;
1567 }
1568 sa = 0;
1569 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1570 if (error)
1571 goto bad;
1572 if (sa == 0) {
1573 len = 0;
1574 goto gotnothing;
1575 }
1576 len = MIN(len, sa->sa_len);
1577 #ifdef COMPAT_OLDSOCK
1578 if (compat)
1579 ((struct osockaddr *)sa)->sa_family =
1580 sa->sa_family;
1581 #endif
1582 error = copyout(sa, uap->asa, (u_int)len);
1583 if (error)
1584 goto bad;
1585 gotnothing:
1586 error = copyout(&len, uap->alen, sizeof (len));
1587 bad:
1588 if (sa)
1589 FREE(sa, M_SONAME);
1590 done1:
1591 fdrop(fp, td);
1592 done2:
1593 NET_UNLOCK_GIANT();
1594 return (error);
1595 }
1596
1597 /*
1598 * MPSAFE
1599 */
1600 int
1601 getpeername(td, uap)
1602 struct thread *td;
1603 struct getpeername_args *uap;
1604 {
1605
1606 return (getpeername1(td, uap, 0));
1607 }
1608
1609 #ifdef COMPAT_OLDSOCK
1610 /*
1611 * MPSAFE
1612 */
1613 int
1614 ogetpeername(td, uap)
1615 struct thread *td;
1616 struct ogetpeername_args *uap;
1617 {
1618
1619 /* XXX uap should have type `getpeername_args *' to begin with. */
1620 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1621 }
1622 #endif /* COMPAT_OLDSOCK */
1623
1624 int
1625 sockargs(mp, buf, buflen, type)
1626 struct mbuf **mp;
1627 caddr_t buf;
1628 int buflen, type;
1629 {
1630 register struct sockaddr *sa;
1631 register struct mbuf *m;
1632 int error;
1633
1634 if ((u_int)buflen > MLEN) {
1635 #ifdef COMPAT_OLDSOCK
1636 if (type == MT_SONAME && (u_int)buflen <= 112)
1637 buflen = MLEN; /* unix domain compat. hack */
1638 else
1639 #endif
1640 if ((u_int)buflen > MCLBYTES)
1641 return (EINVAL);
1642 }
1643 m = m_get(M_TRYWAIT, type);
1644 if (m == NULL)
1645 return (ENOBUFS);
1646 if ((u_int)buflen > MLEN) {
1647 MCLGET(m, M_TRYWAIT);
1648 if ((m->m_flags & M_EXT) == 0) {
1649 m_free(m);
1650 return (ENOBUFS);
1651 }
1652 }
1653 m->m_len = buflen;
1654 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1655 if (error)
1656 (void) m_free(m);
1657 else {
1658 *mp = m;
1659 if (type == MT_SONAME) {
1660 sa = mtod(m, struct sockaddr *);
1661
1662 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1663 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1664 sa->sa_family = sa->sa_len;
1665 #endif
1666 sa->sa_len = buflen;
1667 }
1668 }
1669 return (error);
1670 }
1671
1672 int
1673 getsockaddr(namp, uaddr, len)
1674 struct sockaddr **namp;
1675 caddr_t uaddr;
1676 size_t len;
1677 {
1678 struct sockaddr *sa;
1679 int error;
1680
1681 if (len > SOCK_MAXADDRLEN)
1682 return (ENAMETOOLONG);
1683 if (len < offsetof(struct sockaddr, sa_data[0]))
1684 return (EINVAL);
1685 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1686 error = copyin(uaddr, sa, len);
1687 if (error) {
1688 FREE(sa, M_SONAME);
1689 } else {
1690 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1691 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1692 sa->sa_family = sa->sa_len;
1693 #endif
1694 sa->sa_len = len;
1695 *namp = sa;
1696 }
1697 return (error);
1698 }
1699
1700 /*
1701 * Detach mapped page and release resources back to the system.
1702 */
1703 void
1704 sf_buf_mext(void *addr, void *args)
1705 {
1706 vm_page_t m;
1707
1708 m = sf_buf_page(args);
1709 sf_buf_free(args);
1710 vm_page_lock_queues();
1711 vm_page_unwire(m, 0);
1712 /*
1713 * Check for the object going away on us. This can
1714 * happen since we don't hold a reference to it.
1715 * If so, we're responsible for freeing the page.
1716 */
1717 if (m->wire_count == 0 && m->object == NULL)
1718 vm_page_free(m);
1719 vm_page_unlock_queues();
1720 }
1721
1722 /*
1723 * sendfile(2)
1724 *
1725 * MPSAFE
1726 *
1727 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1728 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1729 *
1730 * Send a file specified by 'fd' and starting at 'offset' to a socket
1731 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1732 * nbytes == 0. Optionally add a header and/or trailer to the socket
1733 * output. If specified, write the total number of bytes sent into *sbytes.
1734 *
1735 */
1736 int
1737 sendfile(struct thread *td, struct sendfile_args *uap)
1738 {
1739
1740 return (do_sendfile(td, uap, 0));
1741 }
1742
1743 #ifdef COMPAT_FREEBSD4
1744 int
1745 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1746 {
1747 struct sendfile_args args;
1748
1749 args.fd = uap->fd;
1750 args.s = uap->s;
1751 args.offset = uap->offset;
1752 args.nbytes = uap->nbytes;
1753 args.hdtr = uap->hdtr;
1754 args.sbytes = uap->sbytes;
1755 args.flags = uap->flags;
1756
1757 return (do_sendfile(td, &args, 1));
1758 }
1759 #endif /* COMPAT_FREEBSD4 */
1760
1761 static int
1762 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1763 {
1764 struct vnode *vp;
1765 struct vm_object *obj = NULL;
1766 struct socket *so = NULL;
1767 struct mbuf *m, *m_header = NULL;
1768 struct sf_buf *sf;
1769 struct vm_page *pg;
1770 struct writev_args nuap;
1771 struct sf_hdtr hdtr;
1772 struct uio *hdr_uio = NULL;
1773 off_t off, xfsize, hdtr_size, sbytes = 0;
1774 int error, headersize = 0, headersent = 0;
1775
1776 mtx_lock(&Giant);
1777
1778 hdtr_size = 0;
1779
1780 /*
1781 * The descriptor must be a regular file and have a backing VM object.
1782 */
1783 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1784 goto done;
1785 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1786 if (vp->v_type == VREG)
1787 obj = vp->v_object;
1788 VOP_UNLOCK(vp, 0, td);
1789 if (obj == NULL) {
1790 error = EINVAL;
1791 goto done;
1792 }
1793 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1794 goto done;
1795 if (so->so_type != SOCK_STREAM) {
1796 error = EINVAL;
1797 goto done;
1798 }
1799 if ((so->so_state & SS_ISCONNECTED) == 0) {
1800 error = ENOTCONN;
1801 goto done;
1802 }
1803 if (uap->offset < 0) {
1804 error = EINVAL;
1805 goto done;
1806 }
1807
1808 #ifdef MAC
1809 SOCK_LOCK(so);
1810 error = mac_check_socket_send(td->td_ucred, so);
1811 SOCK_UNLOCK(so);
1812 if (error)
1813 goto done;
1814 #endif
1815
1816 /*
1817 * If specified, get the pointer to the sf_hdtr struct for
1818 * any headers/trailers.
1819 */
1820 if (uap->hdtr != NULL) {
1821 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1822 if (error)
1823 goto done;
1824 /*
1825 * Send any headers.
1826 */
1827 if (hdtr.headers != NULL) {
1828 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1829 if (error)
1830 goto done;
1831 hdr_uio->uio_td = td;
1832 hdr_uio->uio_rw = UIO_WRITE;
1833 if (hdr_uio->uio_resid > 0) {
1834 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0);
1835 if (m_header == NULL)
1836 goto done;
1837 headersize = m_header->m_pkthdr.len;
1838 if (compat)
1839 sbytes += headersize;
1840 }
1841 }
1842 }
1843
1844 /*
1845 * Protect against multiple writers to the socket.
1846 */
1847 SOCKBUF_LOCK(&so->so_snd);
1848 (void) sblock(&so->so_snd, M_WAITOK);
1849 SOCKBUF_UNLOCK(&so->so_snd);
1850
1851 /*
1852 * Loop through the pages in the file, starting with the requested
1853 * offset. Get a file page (do I/O if necessary), map the file page
1854 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1855 * it on the socket.
1856 */
1857 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1858 vm_pindex_t pindex;
1859 vm_offset_t pgoff;
1860
1861 pindex = OFF_TO_IDX(off);
1862 VM_OBJECT_LOCK(obj);
1863 retry_lookup:
1864 /*
1865 * Calculate the amount to transfer. Not to exceed a page,
1866 * the EOF, or the passed in nbytes.
1867 */
1868 xfsize = obj->un_pager.vnp.vnp_size - off;
1869 VM_OBJECT_UNLOCK(obj);
1870 if (xfsize > PAGE_SIZE)
1871 xfsize = PAGE_SIZE;
1872 pgoff = (vm_offset_t)(off & PAGE_MASK);
1873 if (PAGE_SIZE - pgoff < xfsize)
1874 xfsize = PAGE_SIZE - pgoff;
1875 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1876 xfsize = uap->nbytes - sbytes;
1877 if (xfsize <= 0) {
1878 if (m_header != NULL) {
1879 m = m_header;
1880 m_header = NULL;
1881 SOCKBUF_LOCK(&so->so_snd);
1882 goto retry_space;
1883 } else
1884 break;
1885 }
1886 /*
1887 * Optimize the non-blocking case by looking at the socket space
1888 * before going to the extra work of constituting the sf_buf.
1889 */
1890 SOCKBUF_LOCK(&so->so_snd);
1891 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1892 if (so->so_snd.sb_state & SBS_CANTSENDMORE)
1893 error = EPIPE;
1894 else
1895 error = EAGAIN;
1896 sbunlock(&so->so_snd);
1897 SOCKBUF_UNLOCK(&so->so_snd);
1898 goto done;
1899 }
1900 SOCKBUF_UNLOCK(&so->so_snd);
1901 VM_OBJECT_LOCK(obj);
1902 /*
1903 * Attempt to look up the page.
1904 *
1905 * Allocate if not found
1906 *
1907 * Wait and loop if busy.
1908 */
1909 pg = vm_page_lookup(obj, pindex);
1910
1911 if (pg == NULL) {
1912 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
1913 VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1914 if (pg == NULL) {
1915 VM_OBJECT_UNLOCK(obj);
1916 VM_WAIT;
1917 VM_OBJECT_LOCK(obj);
1918 goto retry_lookup;
1919 }
1920 vm_page_lock_queues();
1921 } else {
1922 vm_page_lock_queues();
1923 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1924 goto retry_lookup;
1925 /*
1926 * Wire the page so it does not get ripped out from
1927 * under us.
1928 */
1929 vm_page_wire(pg);
1930 }
1931
1932 /*
1933 * If page is not valid for what we need, initiate I/O
1934 */
1935
1936 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1937 VM_OBJECT_UNLOCK(obj);
1938 } else if (uap->flags & SF_NODISKIO) {
1939 error = EBUSY;
1940 } else {
1941 int bsize, resid;
1942
1943 /*
1944 * Ensure that our page is still around when the I/O
1945 * completes.
1946 */
1947 vm_page_io_start(pg);
1948 vm_page_unlock_queues();
1949 VM_OBJECT_UNLOCK(obj);
1950
1951 /*
1952 * Get the page from backing store.
1953 */
1954 bsize = vp->v_mount->mnt_stat.f_iosize;
1955 vn_lock(vp, LK_SHARED | LK_RETRY, td);
1956 /*
1957 * XXXMAC: Because we don't have fp->f_cred here,
1958 * we pass in NOCRED. This is probably wrong, but
1959 * is consistent with our original implementation.
1960 */
1961 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1962 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1963 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
1964 td->td_ucred, NOCRED, &resid, td);
1965 VOP_UNLOCK(vp, 0, td);
1966 VM_OBJECT_LOCK(obj);
1967 vm_page_lock_queues();
1968 vm_page_io_finish(pg);
1969 if (!error)
1970 VM_OBJECT_UNLOCK(obj);
1971 mbstat.sf_iocnt++;
1972 }
1973
1974 if (error) {
1975 vm_page_unwire(pg, 0);
1976 /*
1977 * See if anyone else might know about this page.
1978 * If not and it is not valid, then free it.
1979 */
1980 if (pg->wire_count == 0 && pg->valid == 0 &&
1981 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1982 pg->hold_count == 0) {
1983 vm_page_free(pg);
1984 }
1985 vm_page_unlock_queues();
1986 VM_OBJECT_UNLOCK(obj);
1987 SOCKBUF_LOCK(&so->so_snd);
1988 sbunlock(&so->so_snd);
1989 SOCKBUF_UNLOCK(&so->so_snd);
1990 goto done;
1991 }
1992 vm_page_unlock_queues();
1993
1994 /*
1995 * Get a sendfile buf. We usually wait as long as necessary,
1996 * but this wait can be interrupted.
1997 */
1998 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
1999 mbstat.sf_allocfail++;
2000 vm_page_lock_queues();
2001 vm_page_unwire(pg, 0);
2002 if (pg->wire_count == 0 && pg->object == NULL)
2003 vm_page_free(pg);
2004 vm_page_unlock_queues();
2005 SOCKBUF_LOCK(&so->so_snd);
2006 sbunlock(&so->so_snd);
2007 SOCKBUF_UNLOCK(&so->so_snd);
2008 error = EINTR;
2009 goto done;
2010 }
2011
2012 /*
2013 * Get an mbuf header and set it up as having external storage.
2014 */
2015 if (m_header)
2016 MGET(m, M_TRYWAIT, MT_DATA);
2017 else
2018 MGETHDR(m, M_TRYWAIT, MT_DATA);
2019 if (m == NULL) {
2020 error = ENOBUFS;
2021 sf_buf_mext((void *)sf_buf_kva(sf), sf);
2022 SOCKBUF_LOCK(&so->so_snd);
2023 sbunlock(&so->so_snd);
2024 SOCKBUF_UNLOCK(&so->so_snd);
2025 goto done;
2026 }
2027 /*
2028 * Setup external storage for mbuf.
2029 */
2030 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
2031 EXT_SFBUF);
2032 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
2033 m->m_pkthdr.len = m->m_len = xfsize;
2034
2035 if (m_header) {
2036 m_cat(m_header, m);
2037 m = m_header;
2038 m_header = NULL;
2039 m_fixhdr(m);
2040 }
2041
2042 /*
2043 * Add the buffer to the socket buffer chain.
2044 */
2045 SOCKBUF_LOCK(&so->so_snd);
2046 retry_space:
2047 /*
2048 * Make sure that the socket is still able to take more data.
2049 * CANTSENDMORE being true usually means that the connection
2050 * was closed. so_error is true when an error was sensed after
2051 * a previous send.
2052 * The state is checked after the page mapping and buffer
2053 * allocation above since those operations may block and make
2054 * any socket checks stale. From this point forward, nothing
2055 * blocks before the pru_send (or more accurately, any blocking
2056 * results in a loop back to here to re-check).
2057 */
2058 SOCKBUF_LOCK_ASSERT(&so->so_snd);
2059 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
2060 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2061 error = EPIPE;
2062 } else {
2063 error = so->so_error;
2064 so->so_error = 0;
2065 }
2066 m_freem(m);
2067 sbunlock(&so->so_snd);
2068 SOCKBUF_UNLOCK(&so->so_snd);
2069 goto done;
2070 }
2071 /*
2072 * Wait for socket space to become available. We do this just
2073 * after checking the connection state above in order to avoid
2074 * a race condition with sbwait().
2075 */
2076 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2077 if (so->so_state & SS_NBIO) {
2078 m_freem(m);
2079 sbunlock(&so->so_snd);
2080 SOCKBUF_UNLOCK(&so->so_snd);
2081 error = EAGAIN;
2082 goto done;
2083 }
2084 error = sbwait(&so->so_snd);
2085 /*
2086 * An error from sbwait usually indicates that we've
2087 * been interrupted by a signal. If we've sent anything
2088 * then return bytes sent, otherwise return the error.
2089 */
2090 if (error) {
2091 m_freem(m);
2092 sbunlock(&so->so_snd);
2093 SOCKBUF_UNLOCK(&so->so_snd);
2094 goto done;
2095 }
2096 goto retry_space;
2097 }
2098 SOCKBUF_UNLOCK(&so->so_snd);
2099 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2100 if (error) {
2101 SOCKBUF_LOCK(&so->so_snd);
2102 sbunlock(&so->so_snd);
2103 SOCKBUF_UNLOCK(&so->so_snd);
2104 goto done;
2105 }
2106 headersent = 1;
2107 }
2108 SOCKBUF_LOCK(&so->so_snd);
2109 sbunlock(&so->so_snd);
2110 SOCKBUF_UNLOCK(&so->so_snd);
2111
2112 /*
2113 * Send trailers. Wimp out and use writev(2).
2114 */
2115 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2116 nuap.fd = uap->s;
2117 nuap.iovp = hdtr.trailers;
2118 nuap.iovcnt = hdtr.trl_cnt;
2119 error = writev(td, &nuap);
2120 if (error)
2121 goto done;
2122 if (compat)
2123 sbytes += td->td_retval[0];
2124 else
2125 hdtr_size += td->td_retval[0];
2126 }
2127
2128 done:
2129 if (headersent) {
2130 if (!compat)
2131 hdtr_size += headersize;
2132 } else {
2133 if (compat)
2134 sbytes -= headersize;
2135 }
2136 /*
2137 * If there was no error we have to clear td->td_retval[0]
2138 * because it may have been set by writev.
2139 */
2140 if (error == 0) {
2141 td->td_retval[0] = 0;
2142 }
2143 if (uap->sbytes != NULL) {
2144 if (!compat)
2145 sbytes += hdtr_size;
2146 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2147 }
2148 if (vp)
2149 vrele(vp);
2150 if (so)
2151 fputsock(so);
2152 if (hdr_uio != NULL)
2153 free(hdr_uio, M_IOV);
2154 if (m_header)
2155 m_freem(m_header);
2156
2157 mtx_unlock(&Giant);
2158
2159 if (error == ERESTART)
2160 error = EINTR;
2161
2162 return (error);
2163 }
Cache object: 62ef3d03b07bcf535251fdbc9076ad1d
|