1 /*-
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 4. Neither the name of the University nor the names of its contributors
17 * may be used to endorse or promote products derived from this software
18 * without specific prior written permission.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
21 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
22 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
23 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
24 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
25 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
26 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
27 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
28 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
29 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
30 * SUCH DAMAGE.
31 *
32 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
33 */
34
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD: releng/6.4/sys/kern/uipc_syscalls.c 176271 2008-02-14 11:46:08Z simon $");
37
38 #include "opt_compat.h"
39 #include "opt_ktrace.h"
40 #include "opt_mac.h"
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/kernel.h>
45 #include <sys/lock.h>
46 #include <sys/mac.h>
47 #include <sys/mutex.h>
48 #include <sys/sysproto.h>
49 #include <sys/malloc.h>
50 #include <sys/filedesc.h>
51 #include <sys/event.h>
52 #include <sys/proc.h>
53 #include <sys/fcntl.h>
54 #include <sys/file.h>
55 #include <sys/filio.h>
56 #include <sys/mount.h>
57 #include <sys/mbuf.h>
58 #include <sys/protosw.h>
59 #include <sys/sf_buf.h>
60 #include <sys/socket.h>
61 #include <sys/socketvar.h>
62 #include <sys/signalvar.h>
63 #include <sys/syscallsubr.h>
64 #include <sys/sysctl.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77
78 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
79 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
80
81 static int accept1(struct thread *td, struct accept_args *uap, int compat);
82 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
83 static int getsockname1(struct thread *td, struct getsockname_args *uap,
84 int compat);
85 static int getpeername1(struct thread *td, struct getpeername_args *uap,
86 int compat);
87
88 /*
89 * NSFBUFS-related variables and associated sysctls
90 */
91 int nsfbufs;
92 int nsfbufspeak;
93 int nsfbufsused;
94
95 SYSCTL_DECL(_kern_ipc);
96 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufs, CTLFLAG_RDTUN, &nsfbufs, 0,
97 "Maximum number of sendfile(2) sf_bufs available");
98 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufspeak, CTLFLAG_RD, &nsfbufspeak, 0,
99 "Number of sendfile(2) sf_bufs at peak usage");
100 SYSCTL_INT(_kern_ipc, OID_AUTO, nsfbufsused, CTLFLAG_RD, &nsfbufsused, 0,
101 "Number of sendfile(2) sf_bufs in use");
102
103 /*
104 * Convert a user file descriptor to a kernel file entry. A reference on the
105 * file entry is held upon returning. This is lighter weight than
106 * fgetsock(), which bumps the socket reference drops the file reference
107 * count instead, as this approach avoids several additional mutex operations
108 * associated with the additional reference count.
109 */
110 static int
111 getsock(struct filedesc *fdp, int fd, struct file **fpp)
112 {
113 struct file *fp;
114 int error;
115
116 fp = NULL;
117 if (fdp == NULL)
118 error = EBADF;
119 else {
120 FILEDESC_LOCK_FAST(fdp);
121 fp = fget_locked(fdp, fd);
122 if (fp == NULL)
123 error = EBADF;
124 else if (fp->f_type != DTYPE_SOCKET) {
125 fp = NULL;
126 error = ENOTSOCK;
127 } else {
128 fhold(fp);
129 error = 0;
130 }
131 FILEDESC_UNLOCK_FAST(fdp);
132 }
133 *fpp = fp;
134 return (error);
135 }
136
137 /*
138 * System call interface to the socket abstraction.
139 */
140 #if defined(COMPAT_43)
141 #define COMPAT_OLDSOCK
142 #endif
143
144 /*
145 * MPSAFE
146 */
147 int
148 socket(td, uap)
149 struct thread *td;
150 register struct socket_args /* {
151 int domain;
152 int type;
153 int protocol;
154 } */ *uap;
155 {
156 struct filedesc *fdp;
157 struct socket *so;
158 struct file *fp;
159 int fd, error;
160
161 #ifdef MAC
162 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
163 uap->protocol);
164 if (error)
165 return (error);
166 #endif
167 fdp = td->td_proc->p_fd;
168 error = falloc(td, &fp, &fd);
169 if (error)
170 return (error);
171 /* An extra reference on `fp' has been held for us by falloc(). */
172 NET_LOCK_GIANT();
173 error = socreate(uap->domain, &so, uap->type, uap->protocol,
174 td->td_ucred, td);
175 NET_UNLOCK_GIANT();
176 if (error) {
177 fdclose(fdp, fp, fd, td);
178 } else {
179 FILEDESC_LOCK_FAST(fdp);
180 fp->f_data = so; /* already has ref count */
181 fp->f_flag = FREAD|FWRITE;
182 fp->f_ops = &socketops;
183 fp->f_type = DTYPE_SOCKET;
184 FILEDESC_UNLOCK_FAST(fdp);
185 td->td_retval[0] = fd;
186 }
187 fdrop(fp, td);
188 return (error);
189 }
190
191 /*
192 * MPSAFE
193 */
194 /* ARGSUSED */
195 int
196 bind(td, uap)
197 struct thread *td;
198 register struct bind_args /* {
199 int s;
200 caddr_t name;
201 int namelen;
202 } */ *uap;
203 {
204 struct sockaddr *sa;
205 int error;
206
207 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
208 return (error);
209
210 return (kern_bind(td, uap->s, sa));
211 }
212
213 int
214 kern_bind(td, fd, sa)
215 struct thread *td;
216 int fd;
217 struct sockaddr *sa;
218 {
219 struct socket *so;
220 struct file *fp;
221 int error;
222
223 NET_LOCK_GIANT();
224 error = getsock(td->td_proc->p_fd, fd, &fp);
225 if (error)
226 goto done2;
227 so = fp->f_data;
228 #ifdef MAC
229 SOCK_LOCK(so);
230 error = mac_check_socket_bind(td->td_ucred, so, sa);
231 SOCK_UNLOCK(so);
232 if (error)
233 goto done1;
234 #endif
235 error = sobind(so, sa, td);
236 #ifdef MAC
237 done1:
238 #endif
239 fdrop(fp, td);
240 done2:
241 NET_UNLOCK_GIANT();
242 FREE(sa, M_SONAME);
243 return (error);
244 }
245
246 /*
247 * MPSAFE
248 */
249 /* ARGSUSED */
250 int
251 listen(td, uap)
252 struct thread *td;
253 register struct listen_args /* {
254 int s;
255 int backlog;
256 } */ *uap;
257 {
258 struct socket *so;
259 struct file *fp;
260 int error;
261
262 NET_LOCK_GIANT();
263 error = getsock(td->td_proc->p_fd, uap->s, &fp);
264 if (error == 0) {
265 so = fp->f_data;
266 #ifdef MAC
267 SOCK_LOCK(so);
268 error = mac_check_socket_listen(td->td_ucred, so);
269 SOCK_UNLOCK(so);
270 if (error)
271 goto done;
272 #endif
273 error = solisten(so, uap->backlog, td);
274 #ifdef MAC
275 done:
276 #endif
277 fdrop(fp, td);
278 }
279 NET_UNLOCK_GIANT();
280 return(error);
281 }
282
283 /*
284 * accept1()
285 * MPSAFE
286 */
287 static int
288 accept1(td, uap, compat)
289 struct thread *td;
290 register struct accept_args /* {
291 int s;
292 struct sockaddr * __restrict name;
293 socklen_t * __restrict anamelen;
294 } */ *uap;
295 int compat;
296 {
297 struct filedesc *fdp;
298 struct file *nfp = NULL;
299 struct sockaddr *sa = NULL;
300 socklen_t namelen;
301 int error;
302 struct socket *head, *so;
303 int fd;
304 u_int fflag;
305 pid_t pgid;
306 int tmp;
307
308 fdp = td->td_proc->p_fd;
309 if (uap->name) {
310 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
311 if(error)
312 return (error);
313 if (namelen < 0)
314 return (EINVAL);
315 }
316 NET_LOCK_GIANT();
317 error = fgetsock(td, uap->s, &head, &fflag);
318 if (error)
319 goto done2;
320 if ((head->so_options & SO_ACCEPTCONN) == 0) {
321 error = EINVAL;
322 goto done;
323 }
324 #ifdef MAC
325 SOCK_LOCK(head);
326 error = mac_check_socket_accept(td->td_ucred, head);
327 SOCK_UNLOCK(head);
328 if (error != 0)
329 goto done;
330 #endif
331 error = falloc(td, &nfp, &fd);
332 if (error)
333 goto done;
334 ACCEPT_LOCK();
335 if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) {
336 ACCEPT_UNLOCK();
337 error = EWOULDBLOCK;
338 goto noconnection;
339 }
340 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
341 if (head->so_rcv.sb_state & SBS_CANTRCVMORE) {
342 head->so_error = ECONNABORTED;
343 break;
344 }
345 error = msleep(&head->so_timeo, &accept_mtx, PSOCK | PCATCH,
346 "accept", 0);
347 if (error) {
348 ACCEPT_UNLOCK();
349 goto noconnection;
350 }
351 }
352 if (head->so_error) {
353 error = head->so_error;
354 head->so_error = 0;
355 ACCEPT_UNLOCK();
356 goto noconnection;
357 }
358 so = TAILQ_FIRST(&head->so_comp);
359 KASSERT(!(so->so_qstate & SQ_INCOMP), ("accept1: so SQ_INCOMP"));
360 KASSERT(so->so_qstate & SQ_COMP, ("accept1: so not SQ_COMP"));
361
362 /*
363 * Before changing the flags on the socket, we have to bump the
364 * reference count. Otherwise, if the protocol calls sofree(),
365 * the socket will be released due to a zero refcount.
366 */
367 SOCK_LOCK(so); /* soref() and so_state update */
368 soref(so); /* file descriptor reference */
369
370 TAILQ_REMOVE(&head->so_comp, so, so_list);
371 head->so_qlen--;
372 so->so_state |= (head->so_state & SS_NBIO);
373 so->so_qstate &= ~SQ_COMP;
374 so->so_head = NULL;
375
376 SOCK_UNLOCK(so);
377 ACCEPT_UNLOCK();
378
379 /* An extra reference on `nfp' has been held for us by falloc(). */
380 td->td_retval[0] = fd;
381
382 /* connection has been removed from the listen queue */
383 KNOTE_UNLOCKED(&head->so_rcv.sb_sel.si_note, 0);
384
385 pgid = fgetown(&head->so_sigio);
386 if (pgid != 0)
387 fsetown(pgid, &so->so_sigio);
388
389 FILE_LOCK(nfp);
390 nfp->f_data = so; /* nfp has ref count from falloc */
391 nfp->f_flag = fflag;
392 nfp->f_ops = &socketops;
393 nfp->f_type = DTYPE_SOCKET;
394 FILE_UNLOCK(nfp);
395 /* Sync socket nonblocking/async state with file flags */
396 tmp = fflag & FNONBLOCK;
397 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
398 tmp = fflag & FASYNC;
399 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
400 sa = 0;
401 error = soaccept(so, &sa);
402 if (error) {
403 /*
404 * return a namelen of zero for older code which might
405 * ignore the return value from accept.
406 */
407 if (uap->name != NULL) {
408 namelen = 0;
409 (void) copyout(&namelen,
410 uap->anamelen, sizeof(*uap->anamelen));
411 }
412 goto noconnection;
413 }
414 if (sa == NULL) {
415 namelen = 0;
416 if (uap->name)
417 goto gotnoname;
418 error = 0;
419 goto done;
420 }
421 if (uap->name) {
422 /* check sa_len before it is destroyed */
423 if (namelen > sa->sa_len)
424 namelen = sa->sa_len;
425 #ifdef COMPAT_OLDSOCK
426 if (compat)
427 ((struct osockaddr *)sa)->sa_family =
428 sa->sa_family;
429 #endif
430 error = copyout(sa, uap->name, (u_int)namelen);
431 if (!error)
432 gotnoname:
433 error = copyout(&namelen,
434 uap->anamelen, sizeof (*uap->anamelen));
435 }
436 noconnection:
437 if (sa)
438 FREE(sa, M_SONAME);
439
440 /*
441 * close the new descriptor, assuming someone hasn't ripped it
442 * out from under us.
443 */
444 if (error)
445 fdclose(fdp, nfp, fd, td);
446
447 /*
448 * Release explicitly held references before returning.
449 */
450 done:
451 if (nfp != NULL)
452 fdrop(nfp, td);
453 fputsock(head);
454 done2:
455 NET_UNLOCK_GIANT();
456 return (error);
457 }
458
459 /*
460 * MPSAFE (accept1() is MPSAFE)
461 */
462 int
463 accept(td, uap)
464 struct thread *td;
465 struct accept_args *uap;
466 {
467
468 return (accept1(td, uap, 0));
469 }
470
471 #ifdef COMPAT_OLDSOCK
472 /*
473 * MPSAFE (accept1() is MPSAFE)
474 */
475 int
476 oaccept(td, uap)
477 struct thread *td;
478 struct accept_args *uap;
479 {
480
481 return (accept1(td, uap, 1));
482 }
483 #endif /* COMPAT_OLDSOCK */
484
485 /*
486 * MPSAFE
487 */
488 /* ARGSUSED */
489 int
490 connect(td, uap)
491 struct thread *td;
492 register struct connect_args /* {
493 int s;
494 caddr_t name;
495 int namelen;
496 } */ *uap;
497 {
498 struct sockaddr *sa;
499 int error;
500
501 error = getsockaddr(&sa, uap->name, uap->namelen);
502 if (error)
503 return (error);
504
505 return (kern_connect(td, uap->s, sa));
506 }
507
508
509 int
510 kern_connect(td, fd, sa)
511 struct thread *td;
512 int fd;
513 struct sockaddr *sa;
514 {
515 struct socket *so;
516 struct file *fp;
517 int error;
518 int interrupted = 0;
519
520 NET_LOCK_GIANT();
521 error = getsock(td->td_proc->p_fd, fd, &fp);
522 if (error)
523 goto done2;
524 so = fp->f_data;
525 if (so->so_state & SS_ISCONNECTING) {
526 error = EALREADY;
527 goto done1;
528 }
529 #ifdef MAC
530 SOCK_LOCK(so);
531 error = mac_check_socket_connect(td->td_ucred, so, sa);
532 SOCK_UNLOCK(so);
533 if (error)
534 goto bad;
535 #endif
536 error = soconnect(so, sa, td);
537 if (error)
538 goto bad;
539 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
540 error = EINPROGRESS;
541 goto done1;
542 }
543 SOCK_LOCK(so);
544 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
545 error = msleep(&so->so_timeo, SOCK_MTX(so), PSOCK | PCATCH,
546 "connec", 0);
547 if (error) {
548 if (error == EINTR || error == ERESTART)
549 interrupted = 1;
550 break;
551 }
552 }
553 if (error == 0) {
554 error = so->so_error;
555 so->so_error = 0;
556 }
557 SOCK_UNLOCK(so);
558 bad:
559 if (!interrupted)
560 so->so_state &= ~SS_ISCONNECTING;
561 if (error == ERESTART)
562 error = EINTR;
563 done1:
564 fdrop(fp, td);
565 done2:
566 NET_UNLOCK_GIANT();
567 FREE(sa, M_SONAME);
568 return (error);
569 }
570
571 /*
572 * MPSAFE
573 */
574 int
575 socketpair(td, uap)
576 struct thread *td;
577 register struct socketpair_args /* {
578 int domain;
579 int type;
580 int protocol;
581 int *rsv;
582 } */ *uap;
583 {
584 register struct filedesc *fdp = td->td_proc->p_fd;
585 struct file *fp1, *fp2;
586 struct socket *so1, *so2;
587 int fd, error, sv[2];
588
589 #ifdef MAC
590 /* We might want to have a separate check for socket pairs. */
591 error = mac_check_socket_create(td->td_ucred, uap->domain, uap->type,
592 uap->protocol);
593 if (error)
594 return (error);
595 #endif
596
597 NET_LOCK_GIANT();
598 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
599 td->td_ucred, td);
600 if (error)
601 goto done2;
602 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
603 td->td_ucred, td);
604 if (error)
605 goto free1;
606 /* On success extra reference to `fp1' and 'fp2' is set by falloc. */
607 error = falloc(td, &fp1, &fd);
608 if (error)
609 goto free2;
610 sv[0] = fd;
611 fp1->f_data = so1; /* so1 already has ref count */
612 error = falloc(td, &fp2, &fd);
613 if (error)
614 goto free3;
615 fp2->f_data = so2; /* so2 already has ref count */
616 sv[1] = fd;
617 error = soconnect2(so1, so2);
618 if (error)
619 goto free4;
620 if (uap->type == SOCK_DGRAM) {
621 /*
622 * Datagram socket connection is asymmetric.
623 */
624 error = soconnect2(so2, so1);
625 if (error)
626 goto free4;
627 }
628 FILE_LOCK(fp1);
629 fp1->f_flag = FREAD|FWRITE;
630 fp1->f_ops = &socketops;
631 fp1->f_type = DTYPE_SOCKET;
632 FILE_UNLOCK(fp1);
633 FILE_LOCK(fp2);
634 fp2->f_flag = FREAD|FWRITE;
635 fp2->f_ops = &socketops;
636 fp2->f_type = DTYPE_SOCKET;
637 FILE_UNLOCK(fp2);
638 error = copyout(sv, uap->rsv, 2 * sizeof (int));
639 fdrop(fp1, td);
640 fdrop(fp2, td);
641 goto done2;
642 free4:
643 fdclose(fdp, fp2, sv[1], td);
644 fdrop(fp2, td);
645 free3:
646 fdclose(fdp, fp1, sv[0], td);
647 fdrop(fp1, td);
648 free2:
649 (void)soclose(so2);
650 free1:
651 (void)soclose(so1);
652 done2:
653 NET_UNLOCK_GIANT();
654 return (error);
655 }
656
657 static int
658 sendit(td, s, mp, flags)
659 register struct thread *td;
660 int s;
661 register struct msghdr *mp;
662 int flags;
663 {
664 struct mbuf *control;
665 struct sockaddr *to;
666 int error;
667
668 if (mp->msg_name != NULL) {
669 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
670 if (error) {
671 to = NULL;
672 goto bad;
673 }
674 mp->msg_name = to;
675 } else {
676 to = NULL;
677 }
678
679 if (mp->msg_control) {
680 if (mp->msg_controllen < sizeof(struct cmsghdr)
681 #ifdef COMPAT_OLDSOCK
682 && mp->msg_flags != MSG_COMPAT
683 #endif
684 ) {
685 error = EINVAL;
686 goto bad;
687 }
688 error = sockargs(&control, mp->msg_control,
689 mp->msg_controllen, MT_CONTROL);
690 if (error)
691 goto bad;
692 #ifdef COMPAT_OLDSOCK
693 if (mp->msg_flags == MSG_COMPAT) {
694 register struct cmsghdr *cm;
695
696 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
697 if (control == 0) {
698 error = ENOBUFS;
699 goto bad;
700 } else {
701 cm = mtod(control, struct cmsghdr *);
702 cm->cmsg_len = control->m_len;
703 cm->cmsg_level = SOL_SOCKET;
704 cm->cmsg_type = SCM_RIGHTS;
705 }
706 }
707 #endif
708 } else {
709 control = NULL;
710 }
711
712 error = kern_sendit(td, s, mp, flags, control, UIO_USERSPACE);
713
714 bad:
715 if (to)
716 FREE(to, M_SONAME);
717 return (error);
718 }
719
720 int
721 kern_sendit(td, s, mp, flags, control, segflg)
722 struct thread *td;
723 int s;
724 struct msghdr *mp;
725 int flags;
726 struct mbuf *control;
727 enum uio_seg segflg;
728 {
729 struct file *fp;
730 struct uio auio;
731 struct iovec *iov;
732 struct socket *so;
733 int i;
734 int len, error;
735 #ifdef KTRACE
736 struct uio *ktruio = NULL;
737 #endif
738
739 NET_LOCK_GIANT();
740 error = getsock(td->td_proc->p_fd, s, &fp);
741 if (error)
742 goto bad2;
743 so = (struct socket *)fp->f_data;
744
745 #ifdef MAC
746 SOCK_LOCK(so);
747 error = mac_check_socket_send(td->td_ucred, so);
748 SOCK_UNLOCK(so);
749 if (error)
750 goto bad;
751 #endif
752
753 auio.uio_iov = mp->msg_iov;
754 auio.uio_iovcnt = mp->msg_iovlen;
755 auio.uio_segflg = segflg;
756 auio.uio_rw = UIO_WRITE;
757 auio.uio_td = td;
758 auio.uio_offset = 0; /* XXX */
759 auio.uio_resid = 0;
760 iov = mp->msg_iov;
761 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
762 if ((auio.uio_resid += iov->iov_len) < 0) {
763 error = EINVAL;
764 goto bad;
765 }
766 }
767 #ifdef KTRACE
768 if (KTRPOINT(td, KTR_GENIO))
769 ktruio = cloneuio(&auio);
770 #endif
771 len = auio.uio_resid;
772 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
773 0, control, flags, td);
774 if (error) {
775 if (auio.uio_resid != len && (error == ERESTART ||
776 error == EINTR || error == EWOULDBLOCK))
777 error = 0;
778 /* Generation of SIGPIPE can be controlled per socket */
779 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE) &&
780 !(flags & MSG_NOSIGNAL)) {
781 PROC_LOCK(td->td_proc);
782 psignal(td->td_proc, SIGPIPE);
783 PROC_UNLOCK(td->td_proc);
784 }
785 }
786 if (error == 0)
787 td->td_retval[0] = len - auio.uio_resid;
788 #ifdef KTRACE
789 if (ktruio != NULL) {
790 ktruio->uio_resid = td->td_retval[0];
791 ktrgenio(s, UIO_WRITE, ktruio, error);
792 }
793 #endif
794 bad:
795 fdrop(fp, td);
796 bad2:
797 NET_UNLOCK_GIANT();
798 return (error);
799 }
800
801 /*
802 * MPSAFE
803 */
804 int
805 sendto(td, uap)
806 struct thread *td;
807 register struct sendto_args /* {
808 int s;
809 caddr_t buf;
810 size_t len;
811 int flags;
812 caddr_t to;
813 int tolen;
814 } */ *uap;
815 {
816 struct msghdr msg;
817 struct iovec aiov;
818 int error;
819
820 msg.msg_name = uap->to;
821 msg.msg_namelen = uap->tolen;
822 msg.msg_iov = &aiov;
823 msg.msg_iovlen = 1;
824 msg.msg_control = 0;
825 #ifdef COMPAT_OLDSOCK
826 msg.msg_flags = 0;
827 #endif
828 aiov.iov_base = uap->buf;
829 aiov.iov_len = uap->len;
830 error = sendit(td, uap->s, &msg, uap->flags);
831 return (error);
832 }
833
834 #ifdef COMPAT_OLDSOCK
835 /*
836 * MPSAFE
837 */
838 int
839 osend(td, uap)
840 struct thread *td;
841 register struct osend_args /* {
842 int s;
843 caddr_t buf;
844 int len;
845 int flags;
846 } */ *uap;
847 {
848 struct msghdr msg;
849 struct iovec aiov;
850 int error;
851
852 msg.msg_name = 0;
853 msg.msg_namelen = 0;
854 msg.msg_iov = &aiov;
855 msg.msg_iovlen = 1;
856 aiov.iov_base = uap->buf;
857 aiov.iov_len = uap->len;
858 msg.msg_control = 0;
859 msg.msg_flags = 0;
860 error = sendit(td, uap->s, &msg, uap->flags);
861 return (error);
862 }
863
864 /*
865 * MPSAFE
866 */
867 int
868 osendmsg(td, uap)
869 struct thread *td;
870 struct osendmsg_args /* {
871 int s;
872 caddr_t msg;
873 int flags;
874 } */ *uap;
875 {
876 struct msghdr msg;
877 struct iovec *iov;
878 int error;
879
880 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
881 if (error)
882 return (error);
883 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
884 if (error)
885 return (error);
886 msg.msg_iov = iov;
887 msg.msg_flags = MSG_COMPAT;
888 error = sendit(td, uap->s, &msg, uap->flags);
889 free(iov, M_IOV);
890 return (error);
891 }
892 #endif
893
894 /*
895 * MPSAFE
896 */
897 int
898 sendmsg(td, uap)
899 struct thread *td;
900 struct sendmsg_args /* {
901 int s;
902 caddr_t msg;
903 int flags;
904 } */ *uap;
905 {
906 struct msghdr msg;
907 struct iovec *iov;
908 int error;
909
910 error = copyin(uap->msg, &msg, sizeof (msg));
911 if (error)
912 return (error);
913 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
914 if (error)
915 return (error);
916 msg.msg_iov = iov;
917 #ifdef COMPAT_OLDSOCK
918 msg.msg_flags = 0;
919 #endif
920 error = sendit(td, uap->s, &msg, uap->flags);
921 free(iov, M_IOV);
922 return (error);
923 }
924
925 int
926 kern_recvit(td, s, mp, namelenp, segflg, controlp)
927 struct thread *td;
928 int s;
929 struct msghdr *mp;
930 void *namelenp;
931 enum uio_seg segflg;
932 struct mbuf **controlp;
933 {
934 struct uio auio;
935 struct iovec *iov;
936 int i;
937 socklen_t len;
938 int error;
939 struct mbuf *m, *control = 0;
940 caddr_t ctlbuf;
941 struct file *fp;
942 struct socket *so;
943 struct sockaddr *fromsa = 0;
944 #ifdef KTRACE
945 struct uio *ktruio = NULL;
946 #endif
947
948 if(controlp != NULL)
949 *controlp = 0;
950
951 NET_LOCK_GIANT();
952 error = getsock(td->td_proc->p_fd, s, &fp);
953 if (error) {
954 NET_UNLOCK_GIANT();
955 return (error);
956 }
957 so = fp->f_data;
958
959 #ifdef MAC
960 SOCK_LOCK(so);
961 error = mac_check_socket_receive(td->td_ucred, so);
962 SOCK_UNLOCK(so);
963 if (error) {
964 fdrop(fp, td);
965 NET_UNLOCK_GIANT();
966 return (error);
967 }
968 #endif
969
970 auio.uio_iov = mp->msg_iov;
971 auio.uio_iovcnt = mp->msg_iovlen;
972 auio.uio_segflg = segflg;
973 auio.uio_rw = UIO_READ;
974 auio.uio_td = td;
975 auio.uio_offset = 0; /* XXX */
976 auio.uio_resid = 0;
977 iov = mp->msg_iov;
978 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
979 if ((auio.uio_resid += iov->iov_len) < 0) {
980 fdrop(fp, td);
981 NET_UNLOCK_GIANT();
982 return (EINVAL);
983 }
984 }
985 #ifdef KTRACE
986 if (KTRPOINT(td, KTR_GENIO))
987 ktruio = cloneuio(&auio);
988 #endif
989 len = auio.uio_resid;
990 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
991 (struct mbuf **)0, (mp->msg_control || controlp) ? &control : (struct mbuf **)0,
992 &mp->msg_flags);
993 if (error) {
994 if (auio.uio_resid != (int)len && (error == ERESTART ||
995 error == EINTR || error == EWOULDBLOCK))
996 error = 0;
997 }
998 #ifdef KTRACE
999 if (ktruio != NULL) {
1000 ktruio->uio_resid = (int)len - auio.uio_resid;
1001 ktrgenio(s, UIO_READ, ktruio, error);
1002 }
1003 #endif
1004 if (error)
1005 goto out;
1006 td->td_retval[0] = (int)len - auio.uio_resid;
1007 if (mp->msg_name) {
1008 len = mp->msg_namelen;
1009 if (len <= 0 || fromsa == 0)
1010 len = 0;
1011 else {
1012 /* save sa_len before it is destroyed by MSG_COMPAT */
1013 len = MIN(len, fromsa->sa_len);
1014 #ifdef COMPAT_OLDSOCK
1015 if (mp->msg_flags & MSG_COMPAT)
1016 ((struct osockaddr *)fromsa)->sa_family =
1017 fromsa->sa_family;
1018 #endif
1019 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1020 if (error)
1021 goto out;
1022 }
1023 mp->msg_namelen = len;
1024 if (namelenp &&
1025 (error = copyout(&len, namelenp, sizeof (socklen_t)))) {
1026 #ifdef COMPAT_OLDSOCK
1027 if (mp->msg_flags & MSG_COMPAT)
1028 error = 0; /* old recvfrom didn't check */
1029 else
1030 #endif
1031 goto out;
1032 }
1033 }
1034 if (mp->msg_control && controlp == NULL) {
1035 #ifdef COMPAT_OLDSOCK
1036 /*
1037 * We assume that old recvmsg calls won't receive access
1038 * rights and other control info, esp. as control info
1039 * is always optional and those options didn't exist in 4.3.
1040 * If we receive rights, trim the cmsghdr; anything else
1041 * is tossed.
1042 */
1043 if (control && mp->msg_flags & MSG_COMPAT) {
1044 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1045 SOL_SOCKET ||
1046 mtod(control, struct cmsghdr *)->cmsg_type !=
1047 SCM_RIGHTS) {
1048 mp->msg_controllen = 0;
1049 goto out;
1050 }
1051 control->m_len -= sizeof (struct cmsghdr);
1052 control->m_data += sizeof (struct cmsghdr);
1053 }
1054 #endif
1055 len = mp->msg_controllen;
1056 m = control;
1057 mp->msg_controllen = 0;
1058 ctlbuf = mp->msg_control;
1059
1060 while (m && len > 0) {
1061 unsigned int tocopy;
1062
1063 if (len >= m->m_len)
1064 tocopy = m->m_len;
1065 else {
1066 mp->msg_flags |= MSG_CTRUNC;
1067 tocopy = len;
1068 }
1069
1070 if ((error = copyout(mtod(m, caddr_t),
1071 ctlbuf, tocopy)) != 0)
1072 goto out;
1073
1074 ctlbuf += tocopy;
1075 len -= tocopy;
1076 m = m->m_next;
1077 }
1078 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1079 }
1080 out:
1081 fdrop(fp, td);
1082 NET_UNLOCK_GIANT();
1083 if (fromsa)
1084 FREE(fromsa, M_SONAME);
1085
1086 if (error == 0 && controlp != NULL)
1087 *controlp = control;
1088 else if (control)
1089 m_freem(control);
1090
1091 return (error);
1092 }
1093
1094 static int
1095 recvit(td, s, mp, namelenp)
1096 struct thread *td;
1097 int s;
1098 struct msghdr *mp;
1099 void *namelenp;
1100 {
1101
1102 return (kern_recvit(td, s, mp, namelenp, UIO_USERSPACE, NULL));
1103 }
1104
1105 /*
1106 * MPSAFE
1107 */
1108 int
1109 recvfrom(td, uap)
1110 struct thread *td;
1111 register struct recvfrom_args /* {
1112 int s;
1113 caddr_t buf;
1114 size_t len;
1115 int flags;
1116 struct sockaddr * __restrict from;
1117 socklen_t * __restrict fromlenaddr;
1118 } */ *uap;
1119 {
1120 struct msghdr msg;
1121 struct iovec aiov;
1122 int error;
1123
1124 if (uap->fromlenaddr) {
1125 error = copyin(uap->fromlenaddr,
1126 &msg.msg_namelen, sizeof (msg.msg_namelen));
1127 if (error)
1128 goto done2;
1129 } else {
1130 msg.msg_namelen = 0;
1131 }
1132 msg.msg_name = uap->from;
1133 msg.msg_iov = &aiov;
1134 msg.msg_iovlen = 1;
1135 aiov.iov_base = uap->buf;
1136 aiov.iov_len = uap->len;
1137 msg.msg_control = 0;
1138 msg.msg_flags = uap->flags;
1139 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1140 done2:
1141 return(error);
1142 }
1143
1144 #ifdef COMPAT_OLDSOCK
1145 /*
1146 * MPSAFE
1147 */
1148 int
1149 orecvfrom(td, uap)
1150 struct thread *td;
1151 struct recvfrom_args *uap;
1152 {
1153
1154 uap->flags |= MSG_COMPAT;
1155 return (recvfrom(td, uap));
1156 }
1157 #endif
1158
1159
1160 #ifdef COMPAT_OLDSOCK
1161 /*
1162 * MPSAFE
1163 */
1164 int
1165 orecv(td, uap)
1166 struct thread *td;
1167 register struct orecv_args /* {
1168 int s;
1169 caddr_t buf;
1170 int len;
1171 int flags;
1172 } */ *uap;
1173 {
1174 struct msghdr msg;
1175 struct iovec aiov;
1176 int error;
1177
1178 msg.msg_name = 0;
1179 msg.msg_namelen = 0;
1180 msg.msg_iov = &aiov;
1181 msg.msg_iovlen = 1;
1182 aiov.iov_base = uap->buf;
1183 aiov.iov_len = uap->len;
1184 msg.msg_control = 0;
1185 msg.msg_flags = uap->flags;
1186 error = recvit(td, uap->s, &msg, NULL);
1187 return (error);
1188 }
1189
1190 /*
1191 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1192 * overlays the new one, missing only the flags, and with the (old) access
1193 * rights where the control fields are now.
1194 *
1195 * MPSAFE
1196 */
1197 int
1198 orecvmsg(td, uap)
1199 struct thread *td;
1200 struct orecvmsg_args /* {
1201 int s;
1202 struct omsghdr *msg;
1203 int flags;
1204 } */ *uap;
1205 {
1206 struct msghdr msg;
1207 struct iovec *iov;
1208 int error;
1209
1210 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1211 if (error)
1212 return (error);
1213 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1214 if (error)
1215 return (error);
1216 msg.msg_flags = uap->flags | MSG_COMPAT;
1217 msg.msg_iov = iov;
1218 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1219 if (msg.msg_controllen && error == 0)
1220 error = copyout(&msg.msg_controllen,
1221 &uap->msg->msg_accrightslen, sizeof (int));
1222 free(iov, M_IOV);
1223 return (error);
1224 }
1225 #endif
1226
1227 /*
1228 * MPSAFE
1229 */
1230 int
1231 recvmsg(td, uap)
1232 struct thread *td;
1233 struct recvmsg_args /* {
1234 int s;
1235 struct msghdr *msg;
1236 int flags;
1237 } */ *uap;
1238 {
1239 struct msghdr msg;
1240 struct iovec *uiov, *iov;
1241 int error;
1242
1243 error = copyin(uap->msg, &msg, sizeof (msg));
1244 if (error)
1245 return (error);
1246 error = copyiniov(msg.msg_iov, msg.msg_iovlen, &iov, EMSGSIZE);
1247 if (error)
1248 return (error);
1249 msg.msg_flags = uap->flags;
1250 #ifdef COMPAT_OLDSOCK
1251 msg.msg_flags &= ~MSG_COMPAT;
1252 #endif
1253 uiov = msg.msg_iov;
1254 msg.msg_iov = iov;
1255 error = recvit(td, uap->s, &msg, NULL);
1256 if (error == 0) {
1257 msg.msg_iov = uiov;
1258 error = copyout(&msg, uap->msg, sizeof(msg));
1259 }
1260 free(iov, M_IOV);
1261 return (error);
1262 }
1263
1264 /*
1265 * MPSAFE
1266 */
1267 /* ARGSUSED */
1268 int
1269 shutdown(td, uap)
1270 struct thread *td;
1271 register struct shutdown_args /* {
1272 int s;
1273 int how;
1274 } */ *uap;
1275 {
1276 struct socket *so;
1277 struct file *fp;
1278 int error;
1279
1280 NET_LOCK_GIANT();
1281 error = getsock(td->td_proc->p_fd, uap->s, &fp);
1282 if (error == 0) {
1283 so = fp->f_data;
1284 error = soshutdown(so, uap->how);
1285 fdrop(fp, td);
1286 }
1287 NET_UNLOCK_GIANT();
1288 return (error);
1289 }
1290
1291 /*
1292 * MPSAFE
1293 */
1294 /* ARGSUSED */
1295 int
1296 setsockopt(td, uap)
1297 struct thread *td;
1298 register struct setsockopt_args /* {
1299 int s;
1300 int level;
1301 int name;
1302 caddr_t val;
1303 int valsize;
1304 } */ *uap;
1305 {
1306
1307 return (kern_setsockopt(td, uap->s, uap->level, uap->name,
1308 uap->val, UIO_USERSPACE, uap->valsize));
1309 }
1310
1311 int
1312 kern_setsockopt(td, s, level, name, val, valseg, valsize)
1313 struct thread *td;
1314 int s;
1315 int level;
1316 int name;
1317 void *val;
1318 enum uio_seg valseg;
1319 socklen_t valsize;
1320 {
1321 int error;
1322 struct socket *so;
1323 struct file *fp;
1324 struct sockopt sopt;
1325
1326 if (val == NULL && valsize != 0)
1327 return (EFAULT);
1328 if ((int)valsize < 0)
1329 return (EINVAL);
1330
1331 sopt.sopt_dir = SOPT_SET;
1332 sopt.sopt_level = level;
1333 sopt.sopt_name = name;
1334 sopt.sopt_val = val;
1335 sopt.sopt_valsize = valsize;
1336 switch (valseg) {
1337 case UIO_USERSPACE:
1338 sopt.sopt_td = td;
1339 break;
1340 case UIO_SYSSPACE:
1341 sopt.sopt_td = NULL;
1342 break;
1343 default:
1344 panic("kern_setsockopt called with bad valseg");
1345 }
1346
1347 NET_LOCK_GIANT();
1348 error = getsock(td->td_proc->p_fd, s, &fp);
1349 if (error == 0) {
1350 so = fp->f_data;
1351 error = sosetopt(so, &sopt);
1352 fdrop(fp, td);
1353 }
1354 NET_UNLOCK_GIANT();
1355 return(error);
1356 }
1357
1358 /*
1359 * MPSAFE
1360 */
1361 /* ARGSUSED */
1362 int
1363 getsockopt(td, uap)
1364 struct thread *td;
1365 register struct getsockopt_args /* {
1366 int s;
1367 int level;
1368 int name;
1369 void * __restrict val;
1370 socklen_t * __restrict avalsize;
1371 } */ *uap;
1372 {
1373 socklen_t valsize;
1374 int error;
1375
1376 if (uap->val) {
1377 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1378 if (error)
1379 return (error);
1380 }
1381
1382 error = kern_getsockopt(td, uap->s, uap->level, uap->name,
1383 uap->val, UIO_USERSPACE, &valsize);
1384
1385 if (error == 0)
1386 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1387 return (error);
1388 }
1389
1390 /*
1391 * Kernel version of getsockopt.
1392 * optval can be a userland or userspace. optlen is always a kernel pointer.
1393 */
1394 int
1395 kern_getsockopt(td, s, level, name, val, valseg, valsize)
1396 struct thread *td;
1397 int s;
1398 int level;
1399 int name;
1400 void *val;
1401 enum uio_seg valseg;
1402 socklen_t *valsize;
1403 {
1404 int error;
1405 struct socket *so;
1406 struct file *fp;
1407 struct sockopt sopt;
1408
1409 if (val == NULL)
1410 *valsize = 0;
1411 if ((int)*valsize < 0)
1412 return (EINVAL);
1413
1414 sopt.sopt_dir = SOPT_GET;
1415 sopt.sopt_level = level;
1416 sopt.sopt_name = name;
1417 sopt.sopt_val = val;
1418 sopt.sopt_valsize = (size_t)*valsize; /* checked non-negative above */
1419 switch (valseg) {
1420 case UIO_USERSPACE:
1421 sopt.sopt_td = td;
1422 break;
1423 case UIO_SYSSPACE:
1424 sopt.sopt_td = NULL;
1425 break;
1426 default:
1427 panic("kern_getsockopt called with bad valseg");
1428 }
1429
1430 NET_LOCK_GIANT();
1431 error = getsock(td->td_proc->p_fd, s, &fp);
1432 if (error == 0) {
1433 so = fp->f_data;
1434 error = sogetopt(so, &sopt);
1435 *valsize = sopt.sopt_valsize;
1436 fdrop(fp, td);
1437 }
1438 NET_UNLOCK_GIANT();
1439 return (error);
1440 }
1441
1442 /*
1443 * getsockname1() - Get socket name.
1444 *
1445 * MPSAFE
1446 */
1447 /* ARGSUSED */
1448 static int
1449 getsockname1(td, uap, compat)
1450 struct thread *td;
1451 register struct getsockname_args /* {
1452 int fdes;
1453 struct sockaddr * __restrict asa;
1454 socklen_t * __restrict alen;
1455 } */ *uap;
1456 int compat;
1457 {
1458 struct socket *so;
1459 struct sockaddr *sa;
1460 struct file *fp;
1461 socklen_t len;
1462 int error;
1463
1464 NET_LOCK_GIANT();
1465 error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1466 if (error)
1467 goto done2;
1468 so = fp->f_data;
1469 error = copyin(uap->alen, &len, sizeof (len));
1470 if (error)
1471 goto done1;
1472 if (len < 0) {
1473 error = EINVAL;
1474 goto done1;
1475 }
1476 sa = 0;
1477 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1478 if (error)
1479 goto bad;
1480 if (sa == 0) {
1481 len = 0;
1482 goto gotnothing;
1483 }
1484
1485 len = MIN(len, sa->sa_len);
1486 #ifdef COMPAT_OLDSOCK
1487 if (compat)
1488 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1489 #endif
1490 error = copyout(sa, uap->asa, (u_int)len);
1491 if (error == 0)
1492 gotnothing:
1493 error = copyout(&len, uap->alen, sizeof (len));
1494 bad:
1495 if (sa)
1496 FREE(sa, M_SONAME);
1497 done1:
1498 fdrop(fp, td);
1499 done2:
1500 NET_UNLOCK_GIANT();
1501 return (error);
1502 }
1503
1504 /*
1505 * MPSAFE
1506 */
1507 int
1508 getsockname(td, uap)
1509 struct thread *td;
1510 struct getsockname_args *uap;
1511 {
1512
1513 return (getsockname1(td, uap, 0));
1514 }
1515
1516 #ifdef COMPAT_OLDSOCK
1517 /*
1518 * MPSAFE
1519 */
1520 int
1521 ogetsockname(td, uap)
1522 struct thread *td;
1523 struct getsockname_args *uap;
1524 {
1525
1526 return (getsockname1(td, uap, 1));
1527 }
1528 #endif /* COMPAT_OLDSOCK */
1529
1530 /*
1531 * getpeername1() - Get name of peer for connected socket.
1532 *
1533 * MPSAFE
1534 */
1535 /* ARGSUSED */
1536 static int
1537 getpeername1(td, uap, compat)
1538 struct thread *td;
1539 register struct getpeername_args /* {
1540 int fdes;
1541 struct sockaddr * __restrict asa;
1542 socklen_t * __restrict alen;
1543 } */ *uap;
1544 int compat;
1545 {
1546 struct socket *so;
1547 struct sockaddr *sa;
1548 struct file *fp;
1549 socklen_t len;
1550 int error;
1551
1552 NET_LOCK_GIANT();
1553 error = getsock(td->td_proc->p_fd, uap->fdes, &fp);
1554 if (error)
1555 goto done2;
1556 so = fp->f_data;
1557 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1558 error = ENOTCONN;
1559 goto done1;
1560 }
1561 error = copyin(uap->alen, &len, sizeof (len));
1562 if (error)
1563 goto done1;
1564 if (len < 0) {
1565 error = EINVAL;
1566 goto done1;
1567 }
1568 sa = 0;
1569 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1570 if (error)
1571 goto bad;
1572 if (sa == 0) {
1573 len = 0;
1574 goto gotnothing;
1575 }
1576 len = MIN(len, sa->sa_len);
1577 #ifdef COMPAT_OLDSOCK
1578 if (compat)
1579 ((struct osockaddr *)sa)->sa_family =
1580 sa->sa_family;
1581 #endif
1582 error = copyout(sa, uap->asa, (u_int)len);
1583 if (error)
1584 goto bad;
1585 gotnothing:
1586 error = copyout(&len, uap->alen, sizeof (len));
1587 bad:
1588 if (sa)
1589 FREE(sa, M_SONAME);
1590 done1:
1591 fdrop(fp, td);
1592 done2:
1593 NET_UNLOCK_GIANT();
1594 return (error);
1595 }
1596
1597 /*
1598 * MPSAFE
1599 */
1600 int
1601 getpeername(td, uap)
1602 struct thread *td;
1603 struct getpeername_args *uap;
1604 {
1605
1606 return (getpeername1(td, uap, 0));
1607 }
1608
1609 #ifdef COMPAT_OLDSOCK
1610 /*
1611 * MPSAFE
1612 */
1613 int
1614 ogetpeername(td, uap)
1615 struct thread *td;
1616 struct ogetpeername_args *uap;
1617 {
1618
1619 /* XXX uap should have type `getpeername_args *' to begin with. */
1620 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1621 }
1622 #endif /* COMPAT_OLDSOCK */
1623
1624 int
1625 sockargs(mp, buf, buflen, type)
1626 struct mbuf **mp;
1627 caddr_t buf;
1628 int buflen, type;
1629 {
1630 register struct sockaddr *sa;
1631 register struct mbuf *m;
1632 int error;
1633
1634 if ((u_int)buflen > MLEN) {
1635 #ifdef COMPAT_OLDSOCK
1636 if (type == MT_SONAME && (u_int)buflen <= 112)
1637 buflen = MLEN; /* unix domain compat. hack */
1638 else
1639 #endif
1640 if ((u_int)buflen > MCLBYTES)
1641 return (EINVAL);
1642 }
1643 m = m_get(M_TRYWAIT, type);
1644 if (m == NULL)
1645 return (ENOBUFS);
1646 if ((u_int)buflen > MLEN) {
1647 MCLGET(m, M_TRYWAIT);
1648 if ((m->m_flags & M_EXT) == 0) {
1649 m_free(m);
1650 return (ENOBUFS);
1651 }
1652 }
1653 m->m_len = buflen;
1654 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1655 if (error)
1656 (void) m_free(m);
1657 else {
1658 *mp = m;
1659 if (type == MT_SONAME) {
1660 sa = mtod(m, struct sockaddr *);
1661
1662 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1663 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1664 sa->sa_family = sa->sa_len;
1665 #endif
1666 sa->sa_len = buflen;
1667 }
1668 }
1669 return (error);
1670 }
1671
1672 int
1673 getsockaddr(namp, uaddr, len)
1674 struct sockaddr **namp;
1675 caddr_t uaddr;
1676 size_t len;
1677 {
1678 struct sockaddr *sa;
1679 int error;
1680
1681 if (len > SOCK_MAXADDRLEN)
1682 return (ENAMETOOLONG);
1683 if (len < offsetof(struct sockaddr, sa_data[0]))
1684 return (EINVAL);
1685 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1686 error = copyin(uaddr, sa, len);
1687 if (error) {
1688 FREE(sa, M_SONAME);
1689 } else {
1690 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1691 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1692 sa->sa_family = sa->sa_len;
1693 #endif
1694 sa->sa_len = len;
1695 *namp = sa;
1696 }
1697 return (error);
1698 }
1699
1700 /*
1701 * Detach mapped page and release resources back to the system.
1702 */
1703 void
1704 sf_buf_mext(void *addr, void *args)
1705 {
1706 vm_page_t m;
1707
1708 m = sf_buf_page(args);
1709 sf_buf_free(args);
1710 vm_page_lock_queues();
1711 vm_page_unwire(m, 0);
1712 /*
1713 * Check for the object going away on us. This can
1714 * happen since we don't hold a reference to it.
1715 * If so, we're responsible for freeing the page.
1716 */
1717 if (m->wire_count == 0 && m->object == NULL)
1718 vm_page_free(m);
1719 vm_page_unlock_queues();
1720 }
1721
1722 /*
1723 * sendfile(2)
1724 *
1725 * MPSAFE
1726 *
1727 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1728 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1729 *
1730 * Send a file specified by 'fd' and starting at 'offset' to a socket
1731 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1732 * nbytes == 0. Optionally add a header and/or trailer to the socket
1733 * output. If specified, write the total number of bytes sent into *sbytes.
1734 *
1735 */
1736 int
1737 sendfile(struct thread *td, struct sendfile_args *uap)
1738 {
1739
1740 return (do_sendfile(td, uap, 0));
1741 }
1742
1743 static int
1744 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1745 {
1746 struct sf_hdtr hdtr;
1747 struct uio *hdr_uio, *trl_uio;
1748 int error;
1749
1750 hdr_uio = trl_uio = NULL;
1751
1752 if (uap->hdtr != NULL) {
1753 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1754 if (error)
1755 goto out;
1756 if (hdtr.headers != NULL) {
1757 error = copyinuio(hdtr.headers, hdtr.hdr_cnt, &hdr_uio);
1758 if (error)
1759 goto out;
1760 }
1761 if (hdtr.trailers != NULL) {
1762 error = copyinuio(hdtr.trailers, hdtr.trl_cnt, &trl_uio);
1763 if (error)
1764 goto out;
1765
1766 }
1767 }
1768
1769 error = kern_sendfile(td, uap, hdr_uio, trl_uio, compat);
1770 out:
1771 if (hdr_uio)
1772 free(hdr_uio, M_IOV);
1773 if (trl_uio)
1774 free(trl_uio, M_IOV);
1775 return (error);
1776 }
1777
1778 #ifdef COMPAT_FREEBSD4
1779 int
1780 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1781 {
1782 struct sendfile_args args;
1783
1784 args.fd = uap->fd;
1785 args.s = uap->s;
1786 args.offset = uap->offset;
1787 args.nbytes = uap->nbytes;
1788 args.hdtr = uap->hdtr;
1789 args.sbytes = uap->sbytes;
1790 args.flags = uap->flags;
1791
1792 return (do_sendfile(td, &args, 1));
1793 }
1794 #endif /* COMPAT_FREEBSD4 */
1795
1796 int
1797 kern_sendfile(struct thread *td, struct sendfile_args *uap,
1798 struct uio *hdr_uio, struct uio *trl_uio, int compat)
1799 {
1800 struct vnode *vp;
1801 struct vm_object *obj = NULL;
1802 struct socket *so = NULL;
1803 struct mbuf *m, *m_header = NULL;
1804 struct sf_buf *sf;
1805 struct vm_page *pg;
1806 off_t off, xfsize, hdtr_size, sbytes = 0;
1807 int error, headersize = 0, headersent = 0;
1808 int vfslocked;
1809
1810 NET_LOCK_GIANT();
1811
1812 hdtr_size = 0;
1813
1814 /*
1815 * The descriptor must be a regular file and have a backing VM object.
1816 */
1817 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1818 goto done;
1819 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1820 vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
1821 if (vp->v_type == VREG) {
1822 obj = vp->v_object;
1823 if (obj != NULL) {
1824 /*
1825 * Temporarily increase the backing VM
1826 * object's reference count so that a forced
1827 * reclamation of its vnode does not
1828 * immediately destroy it.
1829 */
1830 VM_OBJECT_LOCK(obj);
1831 if ((obj->flags & OBJ_DEAD) == 0) {
1832 vm_object_reference_locked(obj);
1833 VM_OBJECT_UNLOCK(obj);
1834 } else {
1835 VM_OBJECT_UNLOCK(obj);
1836 obj = NULL;
1837 }
1838 }
1839 }
1840 VOP_UNLOCK(vp, 0, td);
1841 VFS_UNLOCK_GIANT(vfslocked);
1842 if (obj == NULL) {
1843 error = EINVAL;
1844 goto done;
1845 }
1846 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1847 goto done;
1848 if (so->so_type != SOCK_STREAM) {
1849 error = EINVAL;
1850 goto done;
1851 }
1852 if ((so->so_state & SS_ISCONNECTED) == 0) {
1853 error = ENOTCONN;
1854 goto done;
1855 }
1856 if (uap->offset < 0) {
1857 error = EINVAL;
1858 goto done;
1859 }
1860
1861 #ifdef MAC
1862 SOCK_LOCK(so);
1863 error = mac_check_socket_send(td->td_ucred, so);
1864 SOCK_UNLOCK(so);
1865 if (error)
1866 goto done;
1867 #endif
1868
1869 /*
1870 * If specified, get the pointer to the sf_hdtr struct for
1871 * any headers/trailers.
1872 */
1873 if (hdr_uio != NULL) {
1874 hdr_uio->uio_td = td;
1875 hdr_uio->uio_rw = UIO_WRITE;
1876 if (hdr_uio->uio_resid > 0) {
1877 m_header = m_uiotombuf(hdr_uio, M_DONTWAIT, 0, 0);
1878 if (m_header == NULL)
1879 goto done;
1880 headersize = m_header->m_pkthdr.len;
1881 if (compat)
1882 sbytes += headersize;
1883 }
1884 }
1885
1886 /*
1887 * Protect against multiple writers to the socket.
1888 */
1889 SOCKBUF_LOCK(&so->so_snd);
1890 (void) sblock(&so->so_snd, M_WAITOK);
1891 SOCKBUF_UNLOCK(&so->so_snd);
1892
1893 /*
1894 * Loop through the pages in the file, starting with the requested
1895 * offset. Get a file page (do I/O if necessary), map the file page
1896 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1897 * it on the socket.
1898 */
1899 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1900 vm_pindex_t pindex;
1901 vm_offset_t pgoff;
1902
1903 pindex = OFF_TO_IDX(off);
1904 VM_OBJECT_LOCK(obj);
1905 retry_lookup:
1906 /*
1907 * Calculate the amount to transfer. Not to exceed a page,
1908 * the EOF, or the passed in nbytes.
1909 */
1910 xfsize = obj->un_pager.vnp.vnp_size - off;
1911 VM_OBJECT_UNLOCK(obj);
1912 if (xfsize > PAGE_SIZE)
1913 xfsize = PAGE_SIZE;
1914 pgoff = (vm_offset_t)(off & PAGE_MASK);
1915 if (PAGE_SIZE - pgoff < xfsize)
1916 xfsize = PAGE_SIZE - pgoff;
1917 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1918 xfsize = uap->nbytes - sbytes;
1919 if (xfsize <= 0) {
1920 if (m_header != NULL) {
1921 m = m_header;
1922 m_header = NULL;
1923 SOCKBUF_LOCK(&so->so_snd);
1924 goto retry_space;
1925 } else
1926 break;
1927 }
1928 /*
1929 * Optimize the non-blocking case by looking at the socket space
1930 * before going to the extra work of constituting the sf_buf.
1931 */
1932 SOCKBUF_LOCK(&so->so_snd);
1933 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1934 if (so->so_snd.sb_state & SBS_CANTSENDMORE)
1935 error = EPIPE;
1936 else
1937 error = EAGAIN;
1938 sbunlock(&so->so_snd);
1939 SOCKBUF_UNLOCK(&so->so_snd);
1940 goto done;
1941 }
1942 SOCKBUF_UNLOCK(&so->so_snd);
1943 VM_OBJECT_LOCK(obj);
1944 /*
1945 * Attempt to look up the page.
1946 *
1947 * Allocate if not found
1948 *
1949 * Wait and loop if busy.
1950 */
1951 pg = vm_page_lookup(obj, pindex);
1952
1953 if (pg == NULL) {
1954 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NOBUSY |
1955 VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1956 if (pg == NULL) {
1957 VM_OBJECT_UNLOCK(obj);
1958 VM_WAIT;
1959 VM_OBJECT_LOCK(obj);
1960 goto retry_lookup;
1961 }
1962 vm_page_lock_queues();
1963 } else {
1964 vm_page_lock_queues();
1965 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1966 goto retry_lookup;
1967 /*
1968 * Wire the page so it does not get ripped out from
1969 * under us.
1970 */
1971 vm_page_wire(pg);
1972 }
1973
1974 /*
1975 * If page is not valid for what we need, initiate I/O
1976 */
1977
1978 if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize)) {
1979 VM_OBJECT_UNLOCK(obj);
1980 } else if (uap->flags & SF_NODISKIO) {
1981 error = EBUSY;
1982 } else {
1983 int bsize, resid;
1984
1985 /*
1986 * Ensure that our page is still around when the I/O
1987 * completes.
1988 */
1989 vm_page_io_start(pg);
1990 vm_page_unlock_queues();
1991 VM_OBJECT_UNLOCK(obj);
1992
1993 /*
1994 * Get the page from backing store.
1995 */
1996 bsize = vp->v_mount->mnt_stat.f_iosize;
1997 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
1998 vn_lock(vp, LK_SHARED | LK_RETRY, td);
1999 /*
2000 * XXXMAC: Because we don't have fp->f_cred here,
2001 * we pass in NOCRED. This is probably wrong, but
2002 * is consistent with our original implementation.
2003 */
2004 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
2005 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
2006 IO_VMIO | ((MAXBSIZE / bsize) << IO_SEQSHIFT),
2007 td->td_ucred, NOCRED, &resid, td);
2008 VOP_UNLOCK(vp, 0, td);
2009 VFS_UNLOCK_GIANT(vfslocked);
2010 VM_OBJECT_LOCK(obj);
2011 vm_page_lock_queues();
2012 vm_page_io_finish(pg);
2013 if (!error)
2014 VM_OBJECT_UNLOCK(obj);
2015 mbstat.sf_iocnt++;
2016 }
2017
2018 if (error) {
2019 vm_page_unwire(pg, 0);
2020 /*
2021 * See if anyone else might know about this page.
2022 * If not and it is not valid, then free it.
2023 */
2024 if (pg->wire_count == 0 && pg->valid == 0 &&
2025 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
2026 pg->hold_count == 0) {
2027 vm_page_free(pg);
2028 }
2029 vm_page_unlock_queues();
2030 VM_OBJECT_UNLOCK(obj);
2031 SOCKBUF_LOCK(&so->so_snd);
2032 sbunlock(&so->so_snd);
2033 SOCKBUF_UNLOCK(&so->so_snd);
2034 goto done;
2035 }
2036 vm_page_unlock_queues();
2037
2038 /*
2039 * Get a sendfile buf. We usually wait as long as necessary,
2040 * but this wait can be interrupted.
2041 */
2042 if ((sf = sf_buf_alloc(pg, SFB_CATCH)) == NULL) {
2043 mbstat.sf_allocfail++;
2044 vm_page_lock_queues();
2045 vm_page_unwire(pg, 0);
2046 if (pg->wire_count == 0 && pg->object == NULL)
2047 vm_page_free(pg);
2048 vm_page_unlock_queues();
2049 SOCKBUF_LOCK(&so->so_snd);
2050 sbunlock(&so->so_snd);
2051 SOCKBUF_UNLOCK(&so->so_snd);
2052 error = EINTR;
2053 goto done;
2054 }
2055
2056 /*
2057 * Get an mbuf header and set it up as having external storage.
2058 */
2059 if (m_header)
2060 MGET(m, M_TRYWAIT, MT_DATA);
2061 else
2062 MGETHDR(m, M_TRYWAIT, MT_DATA);
2063 if (m == NULL) {
2064 error = ENOBUFS;
2065 sf_buf_mext((void *)sf_buf_kva(sf), sf);
2066 SOCKBUF_LOCK(&so->so_snd);
2067 sbunlock(&so->so_snd);
2068 SOCKBUF_UNLOCK(&so->so_snd);
2069 goto done;
2070 }
2071 /*
2072 * Setup external storage for mbuf.
2073 */
2074 MEXTADD(m, sf_buf_kva(sf), PAGE_SIZE, sf_buf_mext, sf, M_RDONLY,
2075 EXT_SFBUF);
2076 m->m_data = (char *)sf_buf_kva(sf) + pgoff;
2077 m->m_pkthdr.len = m->m_len = xfsize;
2078
2079 if (m_header) {
2080 m_cat(m_header, m);
2081 m = m_header;
2082 m_header = NULL;
2083 m_fixhdr(m);
2084 }
2085
2086 /*
2087 * Add the buffer to the socket buffer chain.
2088 */
2089 SOCKBUF_LOCK(&so->so_snd);
2090 retry_space:
2091 /*
2092 * Make sure that the socket is still able to take more data.
2093 * CANTSENDMORE being true usually means that the connection
2094 * was closed. so_error is true when an error was sensed after
2095 * a previous send.
2096 * The state is checked after the page mapping and buffer
2097 * allocation above since those operations may block and make
2098 * any socket checks stale. From this point forward, nothing
2099 * blocks before the pru_send (or more accurately, any blocking
2100 * results in a loop back to here to re-check).
2101 */
2102 SOCKBUF_LOCK_ASSERT(&so->so_snd);
2103 if ((so->so_snd.sb_state & SBS_CANTSENDMORE) || so->so_error) {
2104 if (so->so_snd.sb_state & SBS_CANTSENDMORE) {
2105 error = EPIPE;
2106 } else {
2107 error = so->so_error;
2108 so->so_error = 0;
2109 }
2110 m_freem(m);
2111 sbunlock(&so->so_snd);
2112 SOCKBUF_UNLOCK(&so->so_snd);
2113 goto done;
2114 }
2115 /*
2116 * Wait for socket space to become available. We do this just
2117 * after checking the connection state above in order to avoid
2118 * a race condition with sbwait().
2119 */
2120 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2121 if (so->so_state & SS_NBIO) {
2122 m_freem(m);
2123 sbunlock(&so->so_snd);
2124 SOCKBUF_UNLOCK(&so->so_snd);
2125 error = EAGAIN;
2126 goto done;
2127 }
2128 error = sbwait(&so->so_snd);
2129 /*
2130 * An error from sbwait usually indicates that we've
2131 * been interrupted by a signal. If we've sent anything
2132 * then return bytes sent, otherwise return the error.
2133 */
2134 if (error) {
2135 m_freem(m);
2136 sbunlock(&so->so_snd);
2137 SOCKBUF_UNLOCK(&so->so_snd);
2138 goto done;
2139 }
2140 goto retry_space;
2141 }
2142 SOCKBUF_UNLOCK(&so->so_snd);
2143 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2144 if (error) {
2145 SOCKBUF_LOCK(&so->so_snd);
2146 sbunlock(&so->so_snd);
2147 SOCKBUF_UNLOCK(&so->so_snd);
2148 goto done;
2149 }
2150 headersent = 1;
2151 }
2152 SOCKBUF_LOCK(&so->so_snd);
2153 sbunlock(&so->so_snd);
2154 SOCKBUF_UNLOCK(&so->so_snd);
2155
2156 /*
2157 * Send trailers. Wimp out and use writev(2).
2158 */
2159 if (trl_uio != NULL) {
2160 error = kern_writev(td, uap->s, trl_uio);
2161 if (error)
2162 goto done;
2163 if (compat)
2164 sbytes += td->td_retval[0];
2165 else
2166 hdtr_size += td->td_retval[0];
2167 }
2168
2169 done:
2170 if (headersent) {
2171 if (!compat)
2172 hdtr_size += headersize;
2173 } else {
2174 if (compat)
2175 sbytes -= headersize;
2176 }
2177 /*
2178 * If there was no error we have to clear td->td_retval[0]
2179 * because it may have been set by writev.
2180 */
2181 if (error == 0) {
2182 td->td_retval[0] = 0;
2183 }
2184 if (uap->sbytes != NULL) {
2185 if (!compat)
2186 sbytes += hdtr_size;
2187 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2188 }
2189 if (obj != NULL)
2190 vm_object_deallocate(obj);
2191 if (vp != NULL) {
2192 vfslocked = VFS_LOCK_GIANT(vp->v_mount);
2193 vrele(vp);
2194 VFS_UNLOCK_GIANT(vfslocked);
2195 }
2196 if (so)
2197 fputsock(so);
2198 if (m_header)
2199 m_freem(m_header);
2200
2201 NET_UNLOCK_GIANT();
2202
2203 if (error == ERESTART)
2204 error = EINTR;
2205
2206 return (error);
2207 }
Cache object: 627112e79075c9cc6da5ec364ef2ce29
|