1 /*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
37 * $FreeBSD: releng/5.1/sys/kern/uipc_syscalls.c 115383 2003-05-29 18:36:26Z dwmalone $
38 */
39
40 #include "opt_compat.h"
41 #include "opt_ktrace.h"
42 #include "opt_mac.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/kernel.h>
47 #include <sys/lock.h>
48 #include <sys/mac.h>
49 #include <sys/mutex.h>
50 #include <sys/sysproto.h>
51 #include <sys/malloc.h>
52 #include <sys/filedesc.h>
53 #include <sys/event.h>
54 #include <sys/proc.h>
55 #include <sys/fcntl.h>
56 #include <sys/file.h>
57 #include <sys/filio.h>
58 #include <sys/mount.h>
59 #include <sys/mbuf.h>
60 #include <sys/protosw.h>
61 #include <sys/socket.h>
62 #include <sys/socketvar.h>
63 #include <sys/signalvar.h>
64 #include <sys/syscallsubr.h>
65 #include <sys/uio.h>
66 #include <sys/vnode.h>
67 #ifdef KTRACE
68 #include <sys/ktrace.h>
69 #endif
70
71 #include <vm/vm.h>
72 #include <vm/vm_object.h>
73 #include <vm/vm_page.h>
74 #include <vm/vm_pageout.h>
75 #include <vm/vm_kern.h>
76 #include <vm/vm_extern.h>
77
78 static void sf_buf_init(void *arg);
79 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
80
81 static int sendit(struct thread *td, int s, struct msghdr *mp, int flags);
82 static int recvit(struct thread *td, int s, struct msghdr *mp, void *namelenp);
83
84 static int accept1(struct thread *td, struct accept_args *uap, int compat);
85 static int do_sendfile(struct thread *td, struct sendfile_args *uap, int compat);
86 static int getsockname1(struct thread *td, struct getsockname_args *uap,
87 int compat);
88 static int getpeername1(struct thread *td, struct getpeername_args *uap,
89 int compat);
90
91 /*
92 * Expanded sf_freelist head. Really an SLIST_HEAD() in disguise, with the
93 * sf_freelist head with the sf_lock mutex.
94 */
95 static struct {
96 SLIST_HEAD(, sf_buf) sf_head;
97 struct mtx sf_lock;
98 } sf_freelist;
99
100 static u_int sf_buf_alloc_want;
101
102 /*
103 * System call interface to the socket abstraction.
104 */
105 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
106 #define COMPAT_OLDSOCK
107 #endif
108
109 /*
110 * MPSAFE
111 */
112 int
113 socket(td, uap)
114 struct thread *td;
115 register struct socket_args /* {
116 int domain;
117 int type;
118 int protocol;
119 } */ *uap;
120 {
121 struct filedesc *fdp;
122 struct socket *so;
123 struct file *fp;
124 int fd, error;
125
126 mtx_lock(&Giant);
127 fdp = td->td_proc->p_fd;
128 error = falloc(td, &fp, &fd);
129 if (error)
130 goto done2;
131 fhold(fp);
132 error = socreate(uap->domain, &so, uap->type, uap->protocol,
133 td->td_ucred, td);
134 FILEDESC_LOCK(fdp);
135 if (error) {
136 if (fdp->fd_ofiles[fd] == fp) {
137 fdp->fd_ofiles[fd] = NULL;
138 FILEDESC_UNLOCK(fdp);
139 fdrop(fp, td);
140 } else
141 FILEDESC_UNLOCK(fdp);
142 } else {
143 fp->f_data = so; /* already has ref count */
144 fp->f_flag = FREAD|FWRITE;
145 fp->f_ops = &socketops;
146 fp->f_type = DTYPE_SOCKET;
147 FILEDESC_UNLOCK(fdp);
148 td->td_retval[0] = fd;
149 }
150 fdrop(fp, td);
151 done2:
152 mtx_unlock(&Giant);
153 return (error);
154 }
155
156 /*
157 * MPSAFE
158 */
159 /* ARGSUSED */
160 int
161 bind(td, uap)
162 struct thread *td;
163 register struct bind_args /* {
164 int s;
165 caddr_t name;
166 int namelen;
167 } */ *uap;
168 {
169 struct sockaddr *sa;
170 int error;
171
172 if ((error = getsockaddr(&sa, uap->name, uap->namelen)) != 0)
173 return (error);
174
175 return (kern_bind(td, uap->s, sa));
176 }
177
178 int
179 kern_bind(td, fd, sa)
180 struct thread *td;
181 int fd;
182 struct sockaddr *sa;
183 {
184 struct socket *so;
185 int error;
186
187 mtx_lock(&Giant);
188 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
189 goto done2;
190 #ifdef MAC
191 error = mac_check_socket_bind(td->td_ucred, so, sa);
192 if (error)
193 goto done1;
194 #endif
195 error = sobind(so, sa, td);
196 #ifdef MAC
197 done1:
198 #endif
199 fputsock(so);
200 done2:
201 mtx_unlock(&Giant);
202 FREE(sa, M_SONAME);
203 return (error);
204 }
205
206 /*
207 * MPSAFE
208 */
209 /* ARGSUSED */
210 int
211 listen(td, uap)
212 struct thread *td;
213 register struct listen_args /* {
214 int s;
215 int backlog;
216 } */ *uap;
217 {
218 struct socket *so;
219 int error;
220
221 mtx_lock(&Giant);
222 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
223 #ifdef MAC
224 error = mac_check_socket_listen(td->td_ucred, so);
225 if (error)
226 goto done;
227 #endif
228 error = solisten(so, uap->backlog, td);
229 #ifdef MAC
230 done:
231 #endif
232 fputsock(so);
233 }
234 mtx_unlock(&Giant);
235 return(error);
236 }
237
238 /*
239 * accept1()
240 * MPSAFE
241 */
242 static int
243 accept1(td, uap, compat)
244 struct thread *td;
245 register struct accept_args /* {
246 int s;
247 caddr_t name;
248 int *anamelen;
249 } */ *uap;
250 int compat;
251 {
252 struct filedesc *fdp;
253 struct file *nfp = NULL;
254 struct sockaddr *sa;
255 int namelen, error, s;
256 struct socket *head, *so;
257 int fd;
258 u_int fflag;
259 pid_t pgid;
260 int tmp;
261
262 mtx_lock(&Giant);
263 fdp = td->td_proc->p_fd;
264 if (uap->name) {
265 error = copyin(uap->anamelen, &namelen, sizeof (namelen));
266 if(error)
267 goto done2;
268 if (namelen < 0) {
269 error = EINVAL;
270 goto done2;
271 }
272 }
273 error = fgetsock(td, uap->s, &head, &fflag);
274 if (error)
275 goto done2;
276 s = splnet();
277 if ((head->so_options & SO_ACCEPTCONN) == 0) {
278 splx(s);
279 error = EINVAL;
280 goto done;
281 }
282 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
283 if (head->so_state & SS_CANTRCVMORE) {
284 head->so_error = ECONNABORTED;
285 break;
286 }
287 if ((head->so_state & SS_NBIO) != 0) {
288 head->so_error = EWOULDBLOCK;
289 break;
290 }
291 error = tsleep(&head->so_timeo, PSOCK | PCATCH,
292 "accept", 0);
293 if (error) {
294 splx(s);
295 goto done;
296 }
297 }
298 if (head->so_error) {
299 error = head->so_error;
300 head->so_error = 0;
301 splx(s);
302 goto done;
303 }
304
305 /*
306 * At this point we know that there is at least one connection
307 * ready to be accepted. Remove it from the queue prior to
308 * allocating the file descriptor for it since falloc() may
309 * block allowing another process to accept the connection
310 * instead.
311 */
312 so = TAILQ_FIRST(&head->so_comp);
313 TAILQ_REMOVE(&head->so_comp, so, so_list);
314 head->so_qlen--;
315
316 error = falloc(td, &nfp, &fd);
317 if (error) {
318 /*
319 * Probably ran out of file descriptors. Put the
320 * unaccepted connection back onto the queue and
321 * do another wakeup so some other process might
322 * have a chance at it.
323 */
324 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
325 head->so_qlen++;
326 wakeup_one(&head->so_timeo);
327 splx(s);
328 goto done;
329 }
330 fhold(nfp);
331 td->td_retval[0] = fd;
332
333 /* connection has been removed from the listen queue */
334 KNOTE(&head->so_rcv.sb_sel.si_note, 0);
335
336 so->so_state &= ~SS_COMP;
337 so->so_head = NULL;
338 pgid = fgetown(&head->so_sigio);
339 if (pgid != 0)
340 fsetown(pgid, &so->so_sigio);
341
342 FILE_LOCK(nfp);
343 soref(so); /* file descriptor reference */
344 nfp->f_data = so; /* nfp has ref count from falloc */
345 nfp->f_flag = fflag;
346 nfp->f_ops = &socketops;
347 nfp->f_type = DTYPE_SOCKET;
348 FILE_UNLOCK(nfp);
349 /* Sync socket nonblocking/async state with file flags */
350 tmp = fflag & FNONBLOCK;
351 (void) fo_ioctl(nfp, FIONBIO, &tmp, td->td_ucred, td);
352 tmp = fflag & FASYNC;
353 (void) fo_ioctl(nfp, FIOASYNC, &tmp, td->td_ucred, td);
354 sa = 0;
355 error = soaccept(so, &sa);
356 if (error) {
357 /*
358 * return a namelen of zero for older code which might
359 * ignore the return value from accept.
360 */
361 if (uap->name != NULL) {
362 namelen = 0;
363 (void) copyout(&namelen,
364 uap->anamelen, sizeof(*uap->anamelen));
365 }
366 goto noconnection;
367 }
368 if (sa == NULL) {
369 namelen = 0;
370 if (uap->name)
371 goto gotnoname;
372 splx(s);
373 error = 0;
374 goto done;
375 }
376 if (uap->name) {
377 /* check sa_len before it is destroyed */
378 if (namelen > sa->sa_len)
379 namelen = sa->sa_len;
380 #ifdef COMPAT_OLDSOCK
381 if (compat)
382 ((struct osockaddr *)sa)->sa_family =
383 sa->sa_family;
384 #endif
385 error = copyout(sa, uap->name, (u_int)namelen);
386 if (!error)
387 gotnoname:
388 error = copyout(&namelen,
389 uap->anamelen, sizeof (*uap->anamelen));
390 }
391 noconnection:
392 if (sa)
393 FREE(sa, M_SONAME);
394
395 /*
396 * close the new descriptor, assuming someone hasn't ripped it
397 * out from under us.
398 */
399 if (error) {
400 FILEDESC_LOCK(fdp);
401 if (fdp->fd_ofiles[fd] == nfp) {
402 fdp->fd_ofiles[fd] = NULL;
403 FILEDESC_UNLOCK(fdp);
404 fdrop(nfp, td);
405 } else {
406 FILEDESC_UNLOCK(fdp);
407 }
408 }
409 splx(s);
410
411 /*
412 * Release explicitly held references before returning.
413 */
414 done:
415 if (nfp != NULL)
416 fdrop(nfp, td);
417 fputsock(head);
418 done2:
419 mtx_unlock(&Giant);
420 return (error);
421 }
422
423 /*
424 * MPSAFE (accept1() is MPSAFE)
425 */
426 int
427 accept(td, uap)
428 struct thread *td;
429 struct accept_args *uap;
430 {
431
432 return (accept1(td, uap, 0));
433 }
434
435 #ifdef COMPAT_OLDSOCK
436 /*
437 * MPSAFE (accept1() is MPSAFE)
438 */
439 int
440 oaccept(td, uap)
441 struct thread *td;
442 struct accept_args *uap;
443 {
444
445 return (accept1(td, uap, 1));
446 }
447 #endif /* COMPAT_OLDSOCK */
448
449 /*
450 * MPSAFE
451 */
452 /* ARGSUSED */
453 int
454 connect(td, uap)
455 struct thread *td;
456 register struct connect_args /* {
457 int s;
458 caddr_t name;
459 int namelen;
460 } */ *uap;
461 {
462 struct sockaddr *sa;
463 int error;
464
465 error = getsockaddr(&sa, uap->name, uap->namelen);
466 if (error)
467 return error;
468
469 return (kern_connect(td, uap->s, sa));
470 }
471
472
473 int
474 kern_connect(td, fd, sa)
475 struct thread *td;
476 int fd;
477 struct sockaddr *sa;
478 {
479 struct socket *so;
480 int error, s;
481
482 mtx_lock(&Giant);
483 if ((error = fgetsock(td, fd, &so, NULL)) != 0)
484 goto done2;
485 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
486 error = EALREADY;
487 goto done1;
488 }
489 #ifdef MAC
490 error = mac_check_socket_connect(td->td_ucred, so, sa);
491 if (error)
492 goto bad;
493 #endif
494 error = soconnect(so, sa, td);
495 if (error)
496 goto bad;
497 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
498 error = EINPROGRESS;
499 goto done1;
500 }
501 s = splnet();
502 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
503 error = tsleep(&so->so_timeo, PSOCK | PCATCH, "connec", 0);
504 if (error)
505 break;
506 }
507 if (error == 0) {
508 error = so->so_error;
509 so->so_error = 0;
510 }
511 splx(s);
512 bad:
513 so->so_state &= ~SS_ISCONNECTING;
514 if (error == ERESTART)
515 error = EINTR;
516 done1:
517 fputsock(so);
518 done2:
519 mtx_unlock(&Giant);
520 FREE(sa, M_SONAME);
521 return (error);
522 }
523
524 /*
525 * MPSAFE
526 */
527 int
528 socketpair(td, uap)
529 struct thread *td;
530 register struct socketpair_args /* {
531 int domain;
532 int type;
533 int protocol;
534 int *rsv;
535 } */ *uap;
536 {
537 register struct filedesc *fdp = td->td_proc->p_fd;
538 struct file *fp1, *fp2;
539 struct socket *so1, *so2;
540 int fd, error, sv[2];
541
542 mtx_lock(&Giant);
543 error = socreate(uap->domain, &so1, uap->type, uap->protocol,
544 td->td_ucred, td);
545 if (error)
546 goto done2;
547 error = socreate(uap->domain, &so2, uap->type, uap->protocol,
548 td->td_ucred, td);
549 if (error)
550 goto free1;
551 error = falloc(td, &fp1, &fd);
552 if (error)
553 goto free2;
554 fhold(fp1);
555 sv[0] = fd;
556 fp1->f_data = so1; /* so1 already has ref count */
557 error = falloc(td, &fp2, &fd);
558 if (error)
559 goto free3;
560 fhold(fp2);
561 fp2->f_data = so2; /* so2 already has ref count */
562 sv[1] = fd;
563 error = soconnect2(so1, so2);
564 if (error)
565 goto free4;
566 if (uap->type == SOCK_DGRAM) {
567 /*
568 * Datagram socket connection is asymmetric.
569 */
570 error = soconnect2(so2, so1);
571 if (error)
572 goto free4;
573 }
574 FILE_LOCK(fp1);
575 fp1->f_flag = FREAD|FWRITE;
576 fp1->f_ops = &socketops;
577 fp1->f_type = DTYPE_SOCKET;
578 FILE_UNLOCK(fp1);
579 FILE_LOCK(fp2);
580 fp2->f_flag = FREAD|FWRITE;
581 fp2->f_ops = &socketops;
582 fp2->f_type = DTYPE_SOCKET;
583 FILE_UNLOCK(fp2);
584 error = copyout(sv, uap->rsv, 2 * sizeof (int));
585 fdrop(fp1, td);
586 fdrop(fp2, td);
587 goto done2;
588 free4:
589 FILEDESC_LOCK(fdp);
590 if (fdp->fd_ofiles[sv[1]] == fp2) {
591 fdp->fd_ofiles[sv[1]] = NULL;
592 FILEDESC_UNLOCK(fdp);
593 fdrop(fp2, td);
594 } else
595 FILEDESC_UNLOCK(fdp);
596 fdrop(fp2, td);
597 free3:
598 FILEDESC_LOCK(fdp);
599 if (fdp->fd_ofiles[sv[0]] == fp1) {
600 fdp->fd_ofiles[sv[0]] = NULL;
601 FILEDESC_UNLOCK(fdp);
602 fdrop(fp1, td);
603 } else
604 FILEDESC_UNLOCK(fdp);
605 fdrop(fp1, td);
606 free2:
607 (void)soclose(so2);
608 free1:
609 (void)soclose(so1);
610 done2:
611 mtx_unlock(&Giant);
612 return (error);
613 }
614
615 static int
616 sendit(td, s, mp, flags)
617 register struct thread *td;
618 int s;
619 register struct msghdr *mp;
620 int flags;
621 {
622 struct mbuf *control;
623 struct sockaddr *to;
624 int error;
625
626 mtx_lock(&Giant);
627 if (mp->msg_name != NULL) {
628 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
629 if (error) {
630 to = NULL;
631 goto bad;
632 }
633 mp->msg_name = to;
634 } else
635 to = NULL;
636
637 if (mp->msg_control) {
638 if (mp->msg_controllen < sizeof(struct cmsghdr)
639 #ifdef COMPAT_OLDSOCK
640 && mp->msg_flags != MSG_COMPAT
641 #endif
642 ) {
643 error = EINVAL;
644 goto bad;
645 }
646 error = sockargs(&control, mp->msg_control,
647 mp->msg_controllen, MT_CONTROL);
648 if (error)
649 goto bad;
650 #ifdef COMPAT_OLDSOCK
651 if (mp->msg_flags == MSG_COMPAT) {
652 register struct cmsghdr *cm;
653
654 M_PREPEND(control, sizeof(*cm), M_TRYWAIT);
655 if (control == 0) {
656 error = ENOBUFS;
657 goto bad;
658 } else {
659 cm = mtod(control, struct cmsghdr *);
660 cm->cmsg_len = control->m_len;
661 cm->cmsg_level = SOL_SOCKET;
662 cm->cmsg_type = SCM_RIGHTS;
663 }
664 }
665 #endif
666 } else {
667 control = NULL;
668 }
669
670 error = kern_sendit(td, s, mp, flags, control);
671
672 bad:
673 if (to)
674 FREE(to, M_SONAME);
675 mtx_unlock(&Giant);
676 return (error);
677 }
678
679 int
680 kern_sendit(td, s, mp, flags, control)
681 struct thread *td;
682 int s;
683 struct msghdr *mp;
684 int flags;
685 struct mbuf *control;
686 {
687 struct uio auio;
688 struct iovec *iov;
689 struct socket *so;
690 int i;
691 int len, error;
692 #ifdef KTRACE
693 struct iovec *ktriov = NULL;
694 struct uio ktruio;
695 int iovlen;
696 #endif
697
698 if ((error = fgetsock(td, s, &so, NULL)) != 0)
699 goto bad2;
700
701 #ifdef MAC
702 error = mac_check_socket_send(td->td_ucred, so);
703 if (error)
704 goto bad;
705 #endif
706
707 auio.uio_iov = mp->msg_iov;
708 auio.uio_iovcnt = mp->msg_iovlen;
709 auio.uio_segflg = UIO_USERSPACE;
710 auio.uio_rw = UIO_WRITE;
711 auio.uio_td = td;
712 auio.uio_offset = 0; /* XXX */
713 auio.uio_resid = 0;
714 iov = mp->msg_iov;
715 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
716 if ((auio.uio_resid += iov->iov_len) < 0) {
717 error = EINVAL;
718 goto bad;
719 }
720 }
721 #ifdef KTRACE
722 if (KTRPOINT(td, KTR_GENIO)) {
723 iovlen = auio.uio_iovcnt * sizeof (struct iovec);
724 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
725 bcopy(auio.uio_iov, ktriov, iovlen);
726 ktruio = auio;
727 }
728 #endif
729 len = auio.uio_resid;
730 error = so->so_proto->pr_usrreqs->pru_sosend(so, mp->msg_name, &auio,
731 0, control, flags, td);
732 if (error) {
733 if (auio.uio_resid != len && (error == ERESTART ||
734 error == EINTR || error == EWOULDBLOCK))
735 error = 0;
736 /* Generation of SIGPIPE can be controlled per socket */
737 if (error == EPIPE && !(so->so_options & SO_NOSIGPIPE)) {
738 PROC_LOCK(td->td_proc);
739 psignal(td->td_proc, SIGPIPE);
740 PROC_UNLOCK(td->td_proc);
741 }
742 }
743 if (error == 0)
744 td->td_retval[0] = len - auio.uio_resid;
745 #ifdef KTRACE
746 if (ktriov != NULL) {
747 if (error == 0) {
748 ktruio.uio_iov = ktriov;
749 ktruio.uio_resid = td->td_retval[0];
750 ktrgenio(s, UIO_WRITE, &ktruio, error);
751 }
752 FREE(ktriov, M_TEMP);
753 }
754 #endif
755 bad:
756 fputsock(so);
757 bad2:
758 return (error);
759 }
760
761 /*
762 * MPSAFE
763 */
764 int
765 sendto(td, uap)
766 struct thread *td;
767 register struct sendto_args /* {
768 int s;
769 caddr_t buf;
770 size_t len;
771 int flags;
772 caddr_t to;
773 int tolen;
774 } */ *uap;
775 {
776 struct msghdr msg;
777 struct iovec aiov;
778 int error;
779
780 msg.msg_name = uap->to;
781 msg.msg_namelen = uap->tolen;
782 msg.msg_iov = &aiov;
783 msg.msg_iovlen = 1;
784 msg.msg_control = 0;
785 #ifdef COMPAT_OLDSOCK
786 msg.msg_flags = 0;
787 #endif
788 aiov.iov_base = uap->buf;
789 aiov.iov_len = uap->len;
790 error = sendit(td, uap->s, &msg, uap->flags);
791 return (error);
792 }
793
794 #ifdef COMPAT_OLDSOCK
795 /*
796 * MPSAFE
797 */
798 int
799 osend(td, uap)
800 struct thread *td;
801 register struct osend_args /* {
802 int s;
803 caddr_t buf;
804 int len;
805 int flags;
806 } */ *uap;
807 {
808 struct msghdr msg;
809 struct iovec aiov;
810 int error;
811
812 msg.msg_name = 0;
813 msg.msg_namelen = 0;
814 msg.msg_iov = &aiov;
815 msg.msg_iovlen = 1;
816 aiov.iov_base = uap->buf;
817 aiov.iov_len = uap->len;
818 msg.msg_control = 0;
819 msg.msg_flags = 0;
820 error = sendit(td, uap->s, &msg, uap->flags);
821 return (error);
822 }
823
824 /*
825 * MPSAFE
826 */
827 int
828 osendmsg(td, uap)
829 struct thread *td;
830 register struct osendmsg_args /* {
831 int s;
832 caddr_t msg;
833 int flags;
834 } */ *uap;
835 {
836 struct msghdr msg;
837 struct iovec aiov[UIO_SMALLIOV], *iov;
838 int error;
839
840 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
841 if (error)
842 goto done2;
843 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
844 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
845 error = EMSGSIZE;
846 goto done2;
847 }
848 MALLOC(iov, struct iovec *,
849 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
850 M_WAITOK);
851 } else {
852 iov = aiov;
853 }
854 error = copyin(msg.msg_iov, iov,
855 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
856 if (error)
857 goto done;
858 msg.msg_flags = MSG_COMPAT;
859 msg.msg_iov = iov;
860 error = sendit(td, uap->s, &msg, uap->flags);
861 done:
862 if (iov != aiov)
863 FREE(iov, M_IOV);
864 done2:
865 return (error);
866 }
867 #endif
868
869 /*
870 * MPSAFE
871 */
872 int
873 sendmsg(td, uap)
874 struct thread *td;
875 register struct sendmsg_args /* {
876 int s;
877 caddr_t msg;
878 int flags;
879 } */ *uap;
880 {
881 struct msghdr msg;
882 struct iovec aiov[UIO_SMALLIOV], *iov;
883 int error;
884
885 error = copyin(uap->msg, &msg, sizeof (msg));
886 if (error)
887 goto done2;
888 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
889 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
890 error = EMSGSIZE;
891 goto done2;
892 }
893 MALLOC(iov, struct iovec *,
894 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
895 M_WAITOK);
896 } else {
897 iov = aiov;
898 }
899 if (msg.msg_iovlen &&
900 (error = copyin(msg.msg_iov, iov,
901 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
902 goto done;
903 msg.msg_iov = iov;
904 #ifdef COMPAT_OLDSOCK
905 msg.msg_flags = 0;
906 #endif
907 error = sendit(td, uap->s, &msg, uap->flags);
908 done:
909 if (iov != aiov)
910 FREE(iov, M_IOV);
911 done2:
912 return (error);
913 }
914
915 static int
916 recvit(td, s, mp, namelenp)
917 register struct thread *td;
918 int s;
919 register struct msghdr *mp;
920 void *namelenp;
921 {
922 struct uio auio;
923 register struct iovec *iov;
924 register int i;
925 int len, error;
926 struct mbuf *m, *control = 0;
927 caddr_t ctlbuf;
928 struct socket *so;
929 struct sockaddr *fromsa = 0;
930 #ifdef KTRACE
931 struct iovec *ktriov = NULL;
932 struct uio ktruio;
933 int iovlen;
934 #endif
935
936 if ((error = fgetsock(td, s, &so, NULL)) != 0)
937 return (error);
938
939 #ifdef MAC
940 error = mac_check_socket_receive(td->td_ucred, so);
941 if (error) {
942 fputsock(so);
943 return (error);
944 }
945 #endif
946
947 auio.uio_iov = mp->msg_iov;
948 auio.uio_iovcnt = mp->msg_iovlen;
949 auio.uio_segflg = UIO_USERSPACE;
950 auio.uio_rw = UIO_READ;
951 auio.uio_td = td;
952 auio.uio_offset = 0; /* XXX */
953 auio.uio_resid = 0;
954 iov = mp->msg_iov;
955 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
956 if ((auio.uio_resid += iov->iov_len) < 0) {
957 fputsock(so);
958 return (EINVAL);
959 }
960 }
961 #ifdef KTRACE
962 if (KTRPOINT(td, KTR_GENIO)) {
963 iovlen = auio.uio_iovcnt * sizeof (struct iovec);
964 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
965 bcopy(auio.uio_iov, ktriov, iovlen);
966 ktruio = auio;
967 }
968 #endif
969 len = auio.uio_resid;
970 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
971 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
972 &mp->msg_flags);
973 if (error) {
974 if (auio.uio_resid != len && (error == ERESTART ||
975 error == EINTR || error == EWOULDBLOCK))
976 error = 0;
977 }
978 #ifdef KTRACE
979 if (ktriov != NULL) {
980 if (error == 0) {
981 ktruio.uio_iov = ktriov;
982 ktruio.uio_resid = len - auio.uio_resid;
983 ktrgenio(s, UIO_READ, &ktruio, error);
984 }
985 FREE(ktriov, M_TEMP);
986 }
987 #endif
988 if (error)
989 goto out;
990 td->td_retval[0] = len - auio.uio_resid;
991 if (mp->msg_name) {
992 len = mp->msg_namelen;
993 if (len <= 0 || fromsa == 0)
994 len = 0;
995 else {
996 /* save sa_len before it is destroyed by MSG_COMPAT */
997 len = MIN(len, fromsa->sa_len);
998 #ifdef COMPAT_OLDSOCK
999 if (mp->msg_flags & MSG_COMPAT)
1000 ((struct osockaddr *)fromsa)->sa_family =
1001 fromsa->sa_family;
1002 #endif
1003 error = copyout(fromsa, mp->msg_name, (unsigned)len);
1004 if (error)
1005 goto out;
1006 }
1007 mp->msg_namelen = len;
1008 if (namelenp &&
1009 (error = copyout(&len, namelenp, sizeof (int)))) {
1010 #ifdef COMPAT_OLDSOCK
1011 if (mp->msg_flags & MSG_COMPAT)
1012 error = 0; /* old recvfrom didn't check */
1013 else
1014 #endif
1015 goto out;
1016 }
1017 }
1018 if (mp->msg_control) {
1019 #ifdef COMPAT_OLDSOCK
1020 /*
1021 * We assume that old recvmsg calls won't receive access
1022 * rights and other control info, esp. as control info
1023 * is always optional and those options didn't exist in 4.3.
1024 * If we receive rights, trim the cmsghdr; anything else
1025 * is tossed.
1026 */
1027 if (control && mp->msg_flags & MSG_COMPAT) {
1028 if (mtod(control, struct cmsghdr *)->cmsg_level !=
1029 SOL_SOCKET ||
1030 mtod(control, struct cmsghdr *)->cmsg_type !=
1031 SCM_RIGHTS) {
1032 mp->msg_controllen = 0;
1033 goto out;
1034 }
1035 control->m_len -= sizeof (struct cmsghdr);
1036 control->m_data += sizeof (struct cmsghdr);
1037 }
1038 #endif
1039 len = mp->msg_controllen;
1040 m = control;
1041 mp->msg_controllen = 0;
1042 ctlbuf = mp->msg_control;
1043
1044 while (m && len > 0) {
1045 unsigned int tocopy;
1046
1047 if (len >= m->m_len)
1048 tocopy = m->m_len;
1049 else {
1050 mp->msg_flags |= MSG_CTRUNC;
1051 tocopy = len;
1052 }
1053
1054 if ((error = copyout(mtod(m, caddr_t),
1055 ctlbuf, tocopy)) != 0)
1056 goto out;
1057
1058 ctlbuf += tocopy;
1059 len -= tocopy;
1060 m = m->m_next;
1061 }
1062 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
1063 }
1064 out:
1065 fputsock(so);
1066 if (fromsa)
1067 FREE(fromsa, M_SONAME);
1068 if (control)
1069 m_freem(control);
1070 return (error);
1071 }
1072
1073 /*
1074 * MPSAFE
1075 */
1076 int
1077 recvfrom(td, uap)
1078 struct thread *td;
1079 register struct recvfrom_args /* {
1080 int s;
1081 caddr_t buf;
1082 size_t len;
1083 int flags;
1084 caddr_t from;
1085 int *fromlenaddr;
1086 } */ *uap;
1087 {
1088 struct msghdr msg;
1089 struct iovec aiov;
1090 int error;
1091
1092 mtx_lock(&Giant);
1093 if (uap->fromlenaddr) {
1094 error = copyin(uap->fromlenaddr,
1095 &msg.msg_namelen, sizeof (msg.msg_namelen));
1096 if (error)
1097 goto done2;
1098 } else {
1099 msg.msg_namelen = 0;
1100 }
1101 msg.msg_name = uap->from;
1102 msg.msg_iov = &aiov;
1103 msg.msg_iovlen = 1;
1104 aiov.iov_base = uap->buf;
1105 aiov.iov_len = uap->len;
1106 msg.msg_control = 0;
1107 msg.msg_flags = uap->flags;
1108 error = recvit(td, uap->s, &msg, uap->fromlenaddr);
1109 done2:
1110 mtx_unlock(&Giant);
1111 return(error);
1112 }
1113
1114 #ifdef COMPAT_OLDSOCK
1115 /*
1116 * MPSAFE
1117 */
1118 int
1119 orecvfrom(td, uap)
1120 struct thread *td;
1121 struct recvfrom_args *uap;
1122 {
1123
1124 uap->flags |= MSG_COMPAT;
1125 return (recvfrom(td, uap));
1126 }
1127 #endif
1128
1129
1130 #ifdef COMPAT_OLDSOCK
1131 /*
1132 * MPSAFE
1133 */
1134 int
1135 orecv(td, uap)
1136 struct thread *td;
1137 register struct orecv_args /* {
1138 int s;
1139 caddr_t buf;
1140 int len;
1141 int flags;
1142 } */ *uap;
1143 {
1144 struct msghdr msg;
1145 struct iovec aiov;
1146 int error;
1147
1148 mtx_lock(&Giant);
1149 msg.msg_name = 0;
1150 msg.msg_namelen = 0;
1151 msg.msg_iov = &aiov;
1152 msg.msg_iovlen = 1;
1153 aiov.iov_base = uap->buf;
1154 aiov.iov_len = uap->len;
1155 msg.msg_control = 0;
1156 msg.msg_flags = uap->flags;
1157 error = recvit(td, uap->s, &msg, NULL);
1158 mtx_unlock(&Giant);
1159 return (error);
1160 }
1161
1162 /*
1163 * Old recvmsg. This code takes advantage of the fact that the old msghdr
1164 * overlays the new one, missing only the flags, and with the (old) access
1165 * rights where the control fields are now.
1166 *
1167 * MPSAFE
1168 */
1169 int
1170 orecvmsg(td, uap)
1171 struct thread *td;
1172 register struct orecvmsg_args /* {
1173 int s;
1174 struct omsghdr *msg;
1175 int flags;
1176 } */ *uap;
1177 {
1178 struct msghdr msg;
1179 struct iovec aiov[UIO_SMALLIOV], *iov;
1180 int error;
1181
1182 error = copyin(uap->msg, &msg, sizeof (struct omsghdr));
1183 if (error)
1184 return (error);
1185
1186 mtx_lock(&Giant);
1187 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1188 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1189 error = EMSGSIZE;
1190 goto done2;
1191 }
1192 MALLOC(iov, struct iovec *,
1193 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1194 M_WAITOK);
1195 } else {
1196 iov = aiov;
1197 }
1198 msg.msg_flags = uap->flags | MSG_COMPAT;
1199 error = copyin(msg.msg_iov, iov,
1200 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1201 if (error)
1202 goto done;
1203 msg.msg_iov = iov;
1204 error = recvit(td, uap->s, &msg, &uap->msg->msg_namelen);
1205
1206 if (msg.msg_controllen && error == 0)
1207 error = copyout(&msg.msg_controllen,
1208 &uap->msg->msg_accrightslen, sizeof (int));
1209 done:
1210 if (iov != aiov)
1211 FREE(iov, M_IOV);
1212 done2:
1213 mtx_unlock(&Giant);
1214 return (error);
1215 }
1216 #endif
1217
1218 /*
1219 * MPSAFE
1220 */
1221 int
1222 recvmsg(td, uap)
1223 struct thread *td;
1224 register struct recvmsg_args /* {
1225 int s;
1226 struct msghdr *msg;
1227 int flags;
1228 } */ *uap;
1229 {
1230 struct msghdr msg;
1231 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1232 register int error;
1233
1234 mtx_lock(&Giant);
1235 error = copyin(uap->msg, &msg, sizeof (msg));
1236 if (error)
1237 goto done2;
1238 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1239 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV) {
1240 error = EMSGSIZE;
1241 goto done2;
1242 }
1243 MALLOC(iov, struct iovec *,
1244 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1245 M_WAITOK);
1246 } else {
1247 iov = aiov;
1248 }
1249 #ifdef COMPAT_OLDSOCK
1250 msg.msg_flags = uap->flags &~ MSG_COMPAT;
1251 #else
1252 msg.msg_flags = uap->flags;
1253 #endif
1254 uiov = msg.msg_iov;
1255 msg.msg_iov = iov;
1256 error = copyin(uiov, iov,
1257 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1258 if (error)
1259 goto done;
1260 error = recvit(td, uap->s, &msg, NULL);
1261 if (!error) {
1262 msg.msg_iov = uiov;
1263 error = copyout(&msg, uap->msg, sizeof(msg));
1264 }
1265 done:
1266 if (iov != aiov)
1267 FREE(iov, M_IOV);
1268 done2:
1269 mtx_unlock(&Giant);
1270 return (error);
1271 }
1272
1273 /*
1274 * MPSAFE
1275 */
1276 /* ARGSUSED */
1277 int
1278 shutdown(td, uap)
1279 struct thread *td;
1280 register struct shutdown_args /* {
1281 int s;
1282 int how;
1283 } */ *uap;
1284 {
1285 struct socket *so;
1286 int error;
1287
1288 mtx_lock(&Giant);
1289 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1290 error = soshutdown(so, uap->how);
1291 fputsock(so);
1292 }
1293 mtx_unlock(&Giant);
1294 return(error);
1295 }
1296
1297 /*
1298 * MPSAFE
1299 */
1300 /* ARGSUSED */
1301 int
1302 setsockopt(td, uap)
1303 struct thread *td;
1304 register struct setsockopt_args /* {
1305 int s;
1306 int level;
1307 int name;
1308 caddr_t val;
1309 int valsize;
1310 } */ *uap;
1311 {
1312 struct socket *so;
1313 struct sockopt sopt;
1314 int error;
1315
1316 if (uap->val == 0 && uap->valsize != 0)
1317 return (EFAULT);
1318 if (uap->valsize < 0)
1319 return (EINVAL);
1320
1321 mtx_lock(&Giant);
1322 if ((error = fgetsock(td, uap->s, &so, NULL)) == 0) {
1323 sopt.sopt_dir = SOPT_SET;
1324 sopt.sopt_level = uap->level;
1325 sopt.sopt_name = uap->name;
1326 sopt.sopt_val = uap->val;
1327 sopt.sopt_valsize = uap->valsize;
1328 sopt.sopt_td = td;
1329 error = sosetopt(so, &sopt);
1330 fputsock(so);
1331 }
1332 mtx_unlock(&Giant);
1333 return(error);
1334 }
1335
1336 /*
1337 * MPSAFE
1338 */
1339 /* ARGSUSED */
1340 int
1341 getsockopt(td, uap)
1342 struct thread *td;
1343 register struct getsockopt_args /* {
1344 int s;
1345 int level;
1346 int name;
1347 caddr_t val;
1348 int *avalsize;
1349 } */ *uap;
1350 {
1351 int valsize, error;
1352 struct socket *so;
1353 struct sockopt sopt;
1354
1355 mtx_lock(&Giant);
1356 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1357 goto done2;
1358 if (uap->val) {
1359 error = copyin(uap->avalsize, &valsize, sizeof (valsize));
1360 if (error)
1361 goto done1;
1362 if (valsize < 0) {
1363 error = EINVAL;
1364 goto done1;
1365 }
1366 } else {
1367 valsize = 0;
1368 }
1369
1370 sopt.sopt_dir = SOPT_GET;
1371 sopt.sopt_level = uap->level;
1372 sopt.sopt_name = uap->name;
1373 sopt.sopt_val = uap->val;
1374 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1375 sopt.sopt_td = td;
1376
1377 error = sogetopt(so, &sopt);
1378 if (error == 0) {
1379 valsize = sopt.sopt_valsize;
1380 error = copyout(&valsize, uap->avalsize, sizeof (valsize));
1381 }
1382 done1:
1383 fputsock(so);
1384 done2:
1385 mtx_unlock(&Giant);
1386 return (error);
1387 }
1388
1389 /*
1390 * getsockname1() - Get socket name.
1391 *
1392 * MPSAFE
1393 */
1394 /* ARGSUSED */
1395 static int
1396 getsockname1(td, uap, compat)
1397 struct thread *td;
1398 register struct getsockname_args /* {
1399 int fdes;
1400 caddr_t asa;
1401 int *alen;
1402 } */ *uap;
1403 int compat;
1404 {
1405 struct socket *so;
1406 struct sockaddr *sa;
1407 int len, error;
1408
1409 mtx_lock(&Giant);
1410 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1411 goto done2;
1412 error = copyin(uap->alen, &len, sizeof (len));
1413 if (error)
1414 goto done1;
1415 if (len < 0) {
1416 error = EINVAL;
1417 goto done1;
1418 }
1419 sa = 0;
1420 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1421 if (error)
1422 goto bad;
1423 if (sa == 0) {
1424 len = 0;
1425 goto gotnothing;
1426 }
1427
1428 len = MIN(len, sa->sa_len);
1429 #ifdef COMPAT_OLDSOCK
1430 if (compat)
1431 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1432 #endif
1433 error = copyout(sa, uap->asa, (u_int)len);
1434 if (error == 0)
1435 gotnothing:
1436 error = copyout(&len, uap->alen, sizeof (len));
1437 bad:
1438 if (sa)
1439 FREE(sa, M_SONAME);
1440 done1:
1441 fputsock(so);
1442 done2:
1443 mtx_unlock(&Giant);
1444 return (error);
1445 }
1446
1447 /*
1448 * MPSAFE
1449 */
1450 int
1451 getsockname(td, uap)
1452 struct thread *td;
1453 struct getsockname_args *uap;
1454 {
1455
1456 return (getsockname1(td, uap, 0));
1457 }
1458
1459 #ifdef COMPAT_OLDSOCK
1460 /*
1461 * MPSAFE
1462 */
1463 int
1464 ogetsockname(td, uap)
1465 struct thread *td;
1466 struct getsockname_args *uap;
1467 {
1468
1469 return (getsockname1(td, uap, 1));
1470 }
1471 #endif /* COMPAT_OLDSOCK */
1472
1473 /*
1474 * getpeername1() - Get name of peer for connected socket.
1475 *
1476 * MPSAFE
1477 */
1478 /* ARGSUSED */
1479 static int
1480 getpeername1(td, uap, compat)
1481 struct thread *td;
1482 register struct getpeername_args /* {
1483 int fdes;
1484 caddr_t asa;
1485 int *alen;
1486 } */ *uap;
1487 int compat;
1488 {
1489 struct socket *so;
1490 struct sockaddr *sa;
1491 int len, error;
1492
1493 mtx_lock(&Giant);
1494 if ((error = fgetsock(td, uap->fdes, &so, NULL)) != 0)
1495 goto done2;
1496 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1497 error = ENOTCONN;
1498 goto done1;
1499 }
1500 error = copyin(uap->alen, &len, sizeof (len));
1501 if (error)
1502 goto done1;
1503 if (len < 0) {
1504 error = EINVAL;
1505 goto done1;
1506 }
1507 sa = 0;
1508 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1509 if (error)
1510 goto bad;
1511 if (sa == 0) {
1512 len = 0;
1513 goto gotnothing;
1514 }
1515 len = MIN(len, sa->sa_len);
1516 #ifdef COMPAT_OLDSOCK
1517 if (compat)
1518 ((struct osockaddr *)sa)->sa_family =
1519 sa->sa_family;
1520 #endif
1521 error = copyout(sa, uap->asa, (u_int)len);
1522 if (error)
1523 goto bad;
1524 gotnothing:
1525 error = copyout(&len, uap->alen, sizeof (len));
1526 bad:
1527 if (sa)
1528 FREE(sa, M_SONAME);
1529 done1:
1530 fputsock(so);
1531 done2:
1532 mtx_unlock(&Giant);
1533 return (error);
1534 }
1535
1536 /*
1537 * MPSAFE
1538 */
1539 int
1540 getpeername(td, uap)
1541 struct thread *td;
1542 struct getpeername_args *uap;
1543 {
1544
1545 return (getpeername1(td, uap, 0));
1546 }
1547
1548 #ifdef COMPAT_OLDSOCK
1549 /*
1550 * MPSAFE
1551 */
1552 int
1553 ogetpeername(td, uap)
1554 struct thread *td;
1555 struct ogetpeername_args *uap;
1556 {
1557
1558 /* XXX uap should have type `getpeername_args *' to begin with. */
1559 return (getpeername1(td, (struct getpeername_args *)uap, 1));
1560 }
1561 #endif /* COMPAT_OLDSOCK */
1562
1563 int
1564 sockargs(mp, buf, buflen, type)
1565 struct mbuf **mp;
1566 caddr_t buf;
1567 int buflen, type;
1568 {
1569 register struct sockaddr *sa;
1570 register struct mbuf *m;
1571 int error;
1572
1573 if ((u_int)buflen > MLEN) {
1574 #ifdef COMPAT_OLDSOCK
1575 if (type == MT_SONAME && (u_int)buflen <= 112)
1576 buflen = MLEN; /* unix domain compat. hack */
1577 else
1578 #endif
1579 return (EINVAL);
1580 }
1581 m = m_get(M_TRYWAIT, type);
1582 if (m == NULL)
1583 return (ENOBUFS);
1584 m->m_len = buflen;
1585 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1586 if (error)
1587 (void) m_free(m);
1588 else {
1589 *mp = m;
1590 if (type == MT_SONAME) {
1591 sa = mtod(m, struct sockaddr *);
1592
1593 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1594 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1595 sa->sa_family = sa->sa_len;
1596 #endif
1597 sa->sa_len = buflen;
1598 }
1599 }
1600 return (error);
1601 }
1602
1603 int
1604 getsockaddr(namp, uaddr, len)
1605 struct sockaddr **namp;
1606 caddr_t uaddr;
1607 size_t len;
1608 {
1609 struct sockaddr *sa;
1610 int error;
1611
1612 if (len > SOCK_MAXADDRLEN)
1613 return ENAMETOOLONG;
1614 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1615 error = copyin(uaddr, sa, len);
1616 if (error) {
1617 FREE(sa, M_SONAME);
1618 } else {
1619 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1620 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1621 sa->sa_family = sa->sa_len;
1622 #endif
1623 sa->sa_len = len;
1624 *namp = sa;
1625 }
1626 return error;
1627 }
1628
1629 /*
1630 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1631 */
1632 static void
1633 sf_buf_init(void *arg)
1634 {
1635 struct sf_buf *sf_bufs;
1636 vm_offset_t sf_base;
1637 int i;
1638
1639 mtx_init(&sf_freelist.sf_lock, "sf_bufs list lock", NULL, MTX_DEF);
1640 mtx_lock(&sf_freelist.sf_lock);
1641 SLIST_INIT(&sf_freelist.sf_head);
1642 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1643 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP,
1644 M_NOWAIT | M_ZERO);
1645 for (i = 0; i < nsfbufs; i++) {
1646 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1647 SLIST_INSERT_HEAD(&sf_freelist.sf_head, &sf_bufs[i], free_list);
1648 }
1649 sf_buf_alloc_want = 0;
1650 mtx_unlock(&sf_freelist.sf_lock);
1651 }
1652
1653 /*
1654 * Get an sf_buf from the freelist. Will block if none are available.
1655 */
1656 struct sf_buf *
1657 sf_buf_alloc(struct vm_page *m)
1658 {
1659 struct sf_buf *sf;
1660 int error;
1661
1662 mtx_lock(&sf_freelist.sf_lock);
1663 while ((sf = SLIST_FIRST(&sf_freelist.sf_head)) == NULL) {
1664 sf_buf_alloc_want++;
1665 error = msleep(&sf_freelist, &sf_freelist.sf_lock, PVM|PCATCH,
1666 "sfbufa", 0);
1667 sf_buf_alloc_want--;
1668
1669 /*
1670 * If we got a signal, don't risk going back to sleep.
1671 */
1672 if (error)
1673 break;
1674 }
1675 if (sf != NULL) {
1676 SLIST_REMOVE_HEAD(&sf_freelist.sf_head, free_list);
1677 sf->m = m;
1678 pmap_qenter(sf->kva, &sf->m, 1);
1679 }
1680 mtx_unlock(&sf_freelist.sf_lock);
1681 return (sf);
1682 }
1683
1684 /*
1685 * Detatch mapped page and release resources back to the system.
1686 */
1687 void
1688 sf_buf_free(void *addr, void *args)
1689 {
1690 struct sf_buf *sf;
1691 struct vm_page *m;
1692
1693 sf = args;
1694 pmap_qremove((vm_offset_t)addr, 1);
1695 m = sf->m;
1696 vm_page_lock_queues();
1697 vm_page_unwire(m, 0);
1698 /*
1699 * Check for the object going away on us. This can
1700 * happen since we don't hold a reference to it.
1701 * If so, we're responsible for freeing the page.
1702 */
1703 if (m->wire_count == 0 && m->object == NULL)
1704 vm_page_free(m);
1705 vm_page_unlock_queues();
1706 sf->m = NULL;
1707 mtx_lock(&sf_freelist.sf_lock);
1708 SLIST_INSERT_HEAD(&sf_freelist.sf_head, sf, free_list);
1709 if (sf_buf_alloc_want > 0)
1710 wakeup_one(&sf_freelist);
1711 mtx_unlock(&sf_freelist.sf_lock);
1712 }
1713
1714 /*
1715 * sendfile(2)
1716 *
1717 * MPSAFE
1718 *
1719 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1720 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1721 *
1722 * Send a file specified by 'fd' and starting at 'offset' to a socket
1723 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1724 * nbytes == 0. Optionally add a header and/or trailer to the socket
1725 * output. If specified, write the total number of bytes sent into *sbytes.
1726 *
1727 */
1728 int
1729 sendfile(struct thread *td, struct sendfile_args *uap)
1730 {
1731
1732 return (do_sendfile(td, uap, 0));
1733 }
1734
1735 #ifdef COMPAT_FREEBSD4
1736 int
1737 freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
1738 {
1739 struct sendfile_args args;
1740
1741 args.fd = uap->fd;
1742 args.s = uap->s;
1743 args.offset = uap->offset;
1744 args.nbytes = uap->nbytes;
1745 args.hdtr = uap->hdtr;
1746 args.sbytes = uap->sbytes;
1747 args.flags = uap->flags;
1748
1749 return (do_sendfile(td, &args, 1));
1750 }
1751 #endif /* COMPAT_FREEBSD4 */
1752
1753 static int
1754 do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
1755 {
1756 struct vnode *vp;
1757 struct vm_object *obj;
1758 struct socket *so = NULL;
1759 struct mbuf *m;
1760 struct sf_buf *sf;
1761 struct vm_page *pg;
1762 struct writev_args nuap;
1763 struct sf_hdtr hdtr;
1764 off_t off, xfsize, hdtr_size, sbytes = 0;
1765 int error, s;
1766
1767 mtx_lock(&Giant);
1768
1769 hdtr_size = 0;
1770
1771 /*
1772 * The descriptor must be a regular file and have a backing VM object.
1773 */
1774 if ((error = fgetvp_read(td, uap->fd, &vp)) != 0)
1775 goto done;
1776 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1777 error = EINVAL;
1778 goto done;
1779 }
1780 if ((error = fgetsock(td, uap->s, &so, NULL)) != 0)
1781 goto done;
1782 if (so->so_type != SOCK_STREAM) {
1783 error = EINVAL;
1784 goto done;
1785 }
1786 if ((so->so_state & SS_ISCONNECTED) == 0) {
1787 error = ENOTCONN;
1788 goto done;
1789 }
1790 if (uap->offset < 0) {
1791 error = EINVAL;
1792 goto done;
1793 }
1794
1795 #ifdef MAC
1796 error = mac_check_socket_send(td->td_ucred, so);
1797 if (error)
1798 goto done;
1799 #endif
1800
1801 /*
1802 * If specified, get the pointer to the sf_hdtr struct for
1803 * any headers/trailers.
1804 */
1805 if (uap->hdtr != NULL) {
1806 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1807 if (error)
1808 goto done;
1809 /*
1810 * Send any headers. Wimp out and use writev(2).
1811 */
1812 if (hdtr.headers != NULL) {
1813 nuap.fd = uap->s;
1814 nuap.iovp = hdtr.headers;
1815 nuap.iovcnt = hdtr.hdr_cnt;
1816 error = writev(td, &nuap);
1817 if (error)
1818 goto done;
1819 if (compat)
1820 sbytes += td->td_retval[0];
1821 else
1822 hdtr_size += td->td_retval[0];
1823 }
1824 }
1825
1826 /*
1827 * Protect against multiple writers to the socket.
1828 */
1829 (void) sblock(&so->so_snd, M_WAITOK);
1830
1831 /*
1832 * Loop through the pages in the file, starting with the requested
1833 * offset. Get a file page (do I/O if necessary), map the file page
1834 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1835 * it on the socket.
1836 */
1837 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1838 vm_pindex_t pindex;
1839 vm_offset_t pgoff;
1840
1841 pindex = OFF_TO_IDX(off);
1842 retry_lookup:
1843 /*
1844 * Calculate the amount to transfer. Not to exceed a page,
1845 * the EOF, or the passed in nbytes.
1846 */
1847 xfsize = obj->un_pager.vnp.vnp_size - off;
1848 if (xfsize > PAGE_SIZE)
1849 xfsize = PAGE_SIZE;
1850 pgoff = (vm_offset_t)(off & PAGE_MASK);
1851 if (PAGE_SIZE - pgoff < xfsize)
1852 xfsize = PAGE_SIZE - pgoff;
1853 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1854 xfsize = uap->nbytes - sbytes;
1855 if (xfsize <= 0)
1856 break;
1857 /*
1858 * Optimize the non-blocking case by looking at the socket space
1859 * before going to the extra work of constituting the sf_buf.
1860 */
1861 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1862 if (so->so_state & SS_CANTSENDMORE)
1863 error = EPIPE;
1864 else
1865 error = EAGAIN;
1866 sbunlock(&so->so_snd);
1867 goto done;
1868 }
1869 /*
1870 * Attempt to look up the page.
1871 *
1872 * Allocate if not found
1873 *
1874 * Wait and loop if busy.
1875 */
1876 pg = vm_page_lookup(obj, pindex);
1877
1878 if (pg == NULL) {
1879 pg = vm_page_alloc(obj, pindex,
1880 VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
1881 if (pg == NULL) {
1882 VM_WAIT;
1883 goto retry_lookup;
1884 }
1885 vm_page_lock_queues();
1886 vm_page_wakeup(pg);
1887 } else {
1888 vm_page_lock_queues();
1889 if (vm_page_sleep_if_busy(pg, TRUE, "sfpbsy"))
1890 goto retry_lookup;
1891 /*
1892 * Wire the page so it does not get ripped out from
1893 * under us.
1894 */
1895 vm_page_wire(pg);
1896 }
1897
1898 /*
1899 * If page is not valid for what we need, initiate I/O
1900 */
1901
1902 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1903 int bsize, resid;
1904
1905 /*
1906 * Ensure that our page is still around when the I/O
1907 * completes.
1908 */
1909 vm_page_io_start(pg);
1910 vm_page_unlock_queues();
1911
1912 /*
1913 * Get the page from backing store.
1914 */
1915 bsize = vp->v_mount->mnt_stat.f_iosize;
1916 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, td);
1917 /*
1918 * XXXMAC: Because we don't have fp->f_cred here,
1919 * we pass in NOCRED. This is probably wrong, but
1920 * is consistent with our original implementation.
1921 */
1922 error = vn_rdwr(UIO_READ, vp, NULL, MAXBSIZE,
1923 trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
1924 IO_VMIO | ((MAXBSIZE / bsize) << 16),
1925 td->td_ucred, NOCRED, &resid, td);
1926 VOP_UNLOCK(vp, 0, td);
1927 vm_page_lock_queues();
1928 vm_page_flag_clear(pg, PG_ZERO);
1929 vm_page_io_finish(pg);
1930 if (error) {
1931 vm_page_unwire(pg, 0);
1932 /*
1933 * See if anyone else might know about this page.
1934 * If not and it is not valid, then free it.
1935 */
1936 if (pg->wire_count == 0 && pg->valid == 0 &&
1937 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1938 pg->hold_count == 0) {
1939 vm_page_busy(pg);
1940 vm_page_free(pg);
1941 }
1942 vm_page_unlock_queues();
1943 sbunlock(&so->so_snd);
1944 goto done;
1945 }
1946 }
1947 vm_page_unlock_queues();
1948
1949 /*
1950 * Get a sendfile buf. We usually wait as long as necessary,
1951 * but this wait can be interrupted.
1952 */
1953 if ((sf = sf_buf_alloc(pg)) == NULL) {
1954 vm_page_lock_queues();
1955 vm_page_unwire(pg, 0);
1956 if (pg->wire_count == 0 && pg->object == NULL)
1957 vm_page_free(pg);
1958 vm_page_unlock_queues();
1959 sbunlock(&so->so_snd);
1960 error = EINTR;
1961 goto done;
1962 }
1963
1964 /*
1965 * Get an mbuf header and set it up as having external storage.
1966 */
1967 MGETHDR(m, M_TRYWAIT, MT_DATA);
1968 if (m == NULL) {
1969 error = ENOBUFS;
1970 sf_buf_free((void *)sf->kva, sf);
1971 sbunlock(&so->so_snd);
1972 goto done;
1973 }
1974 /*
1975 * Setup external storage for mbuf.
1976 */
1977 MEXTADD(m, sf->kva, PAGE_SIZE, sf_buf_free, sf, M_RDONLY,
1978 EXT_SFBUF);
1979 m->m_data = (char *) sf->kva + pgoff;
1980 m->m_pkthdr.len = m->m_len = xfsize;
1981 /*
1982 * Add the buffer to the socket buffer chain.
1983 */
1984 s = splnet();
1985 retry_space:
1986 /*
1987 * Make sure that the socket is still able to take more data.
1988 * CANTSENDMORE being true usually means that the connection
1989 * was closed. so_error is true when an error was sensed after
1990 * a previous send.
1991 * The state is checked after the page mapping and buffer
1992 * allocation above since those operations may block and make
1993 * any socket checks stale. From this point forward, nothing
1994 * blocks before the pru_send (or more accurately, any blocking
1995 * results in a loop back to here to re-check).
1996 */
1997 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1998 if (so->so_state & SS_CANTSENDMORE) {
1999 error = EPIPE;
2000 } else {
2001 error = so->so_error;
2002 so->so_error = 0;
2003 }
2004 m_freem(m);
2005 sbunlock(&so->so_snd);
2006 splx(s);
2007 goto done;
2008 }
2009 /*
2010 * Wait for socket space to become available. We do this just
2011 * after checking the connection state above in order to avoid
2012 * a race condition with sbwait().
2013 */
2014 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
2015 if (so->so_state & SS_NBIO) {
2016 m_freem(m);
2017 sbunlock(&so->so_snd);
2018 splx(s);
2019 error = EAGAIN;
2020 goto done;
2021 }
2022 error = sbwait(&so->so_snd);
2023 /*
2024 * An error from sbwait usually indicates that we've
2025 * been interrupted by a signal. If we've sent anything
2026 * then return bytes sent, otherwise return the error.
2027 */
2028 if (error) {
2029 m_freem(m);
2030 sbunlock(&so->so_snd);
2031 splx(s);
2032 goto done;
2033 }
2034 goto retry_space;
2035 }
2036 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, td);
2037 splx(s);
2038 if (error) {
2039 sbunlock(&so->so_snd);
2040 goto done;
2041 }
2042 }
2043 sbunlock(&so->so_snd);
2044
2045 /*
2046 * Send trailers. Wimp out and use writev(2).
2047 */
2048 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
2049 nuap.fd = uap->s;
2050 nuap.iovp = hdtr.trailers;
2051 nuap.iovcnt = hdtr.trl_cnt;
2052 error = writev(td, &nuap);
2053 if (error)
2054 goto done;
2055 if (compat)
2056 sbytes += td->td_retval[0];
2057 else
2058 hdtr_size += td->td_retval[0];
2059 }
2060
2061 done:
2062 /*
2063 * If there was no error we have to clear td->td_retval[0]
2064 * because it may have been set by writev.
2065 */
2066 if (error == 0) {
2067 td->td_retval[0] = 0;
2068 }
2069 if (uap->sbytes != NULL) {
2070 if (!compat)
2071 sbytes += hdtr_size;
2072 copyout(&sbytes, uap->sbytes, sizeof(off_t));
2073 }
2074 if (vp)
2075 vrele(vp);
2076 if (so)
2077 fputsock(so);
2078 mtx_unlock(&Giant);
2079 return (error);
2080 }
Cache object: 6a4784f827da2977710ce028fc62f374
|