1 /*
2 * Copyright (c) 1982, 1986, 1989, 1990, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * sendfile(2) and related extensions:
6 * Copyright (c) 1998, David Greenman. All rights reserved.
7 *
8 * Redistribution and use in source and binary forms, with or without
9 * modification, are permitted provided that the following conditions
10 * are met:
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 * 3. All advertising materials mentioning features or use of this software
17 * must display the following acknowledgement:
18 * This product includes software developed by the University of
19 * California, Berkeley and its contributors.
20 * 4. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)uipc_syscalls.c 8.4 (Berkeley) 2/21/94
37 * $FreeBSD$
38 */
39
40 #include "opt_compat.h"
41 #include "opt_ktrace.h"
42
43 #include <sys/param.h>
44 #include <sys/systm.h>
45 #include <sys/kernel.h>
46 #include <sys/sysproto.h>
47 #include <sys/malloc.h>
48 #include <sys/filedesc.h>
49 #include <sys/event.h>
50 #include <sys/proc.h>
51 #include <sys/fcntl.h>
52 #include <sys/file.h>
53 #include <sys/filio.h>
54 #include <sys/mbuf.h>
55 #include <sys/protosw.h>
56 #include <sys/socket.h>
57 #include <sys/socketvar.h>
58 #include <sys/signalvar.h>
59 #include <sys/uio.h>
60 #include <sys/vnode.h>
61 #include <sys/lock.h>
62 #include <sys/mount.h>
63 #ifdef KTRACE
64 #include <sys/ktrace.h>
65 #endif
66 #include <vm/vm.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_pageout.h>
70 #include <vm/vm_kern.h>
71 #include <vm/vm_extern.h>
72
73 static void sf_buf_init(void *arg);
74 SYSINIT(sock_sf, SI_SUB_MBUF, SI_ORDER_ANY, sf_buf_init, NULL)
75
76 static int sendit __P((struct proc *p, int s, struct msghdr *mp, int flags));
77 static int recvit __P((struct proc *p, int s, struct msghdr *mp,
78 caddr_t namelenp));
79
80 static int accept1 __P((struct proc *p, struct accept_args *uap, int compat));
81 static int do_sendfile __P((struct proc *p, struct sendfile_args *uap,
82 int compat));
83 static int getsockname1 __P((struct proc *p, struct getsockname_args *uap,
84 int compat));
85 static int getpeername1 __P((struct proc *p, struct getpeername_args *uap,
86 int compat));
87
88 static SLIST_HEAD(, sf_buf) sf_freelist;
89 static vm_offset_t sf_base;
90 static struct sf_buf *sf_bufs;
91 static int sf_buf_alloc_want;
92
93 /*
94 * System call interface to the socket abstraction.
95 */
96 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
97 #define COMPAT_OLDSOCK
98 #endif
99
100 extern struct fileops socketops;
101
102 int
103 socket(p, uap)
104 struct proc *p;
105 register struct socket_args /* {
106 int domain;
107 int type;
108 int protocol;
109 } */ *uap;
110 {
111 struct filedesc *fdp = p->p_fd;
112 struct socket *so;
113 struct file *fp;
114 int fd, error;
115
116 error = falloc(p, &fp, &fd);
117 if (error)
118 return (error);
119 fhold(fp);
120 error = socreate(uap->domain, &so, uap->type, uap->protocol, p);
121 if (error) {
122 if (fdp->fd_ofiles[fd] == fp) {
123 fdp->fd_ofiles[fd] = NULL;
124 fdrop(fp, p);
125 }
126 } else {
127 fp->f_data = (caddr_t)so;
128 fp->f_flag = FREAD|FWRITE;
129 fp->f_ops = &socketops;
130 fp->f_type = DTYPE_SOCKET;
131 p->p_retval[0] = fd;
132 }
133 fdrop(fp, p);
134 return (error);
135 }
136
137 /* ARGSUSED */
138 int
139 bind(p, uap)
140 struct proc *p;
141 register struct bind_args /* {
142 int s;
143 caddr_t name;
144 int namelen;
145 } */ *uap;
146 {
147 struct file *fp;
148 struct sockaddr *sa;
149 int error;
150
151 error = holdsock(p->p_fd, uap->s, &fp);
152 if (error)
153 return (error);
154 error = getsockaddr(&sa, uap->name, uap->namelen);
155 if (error) {
156 fdrop(fp, p);
157 return (error);
158 }
159 error = sobind((struct socket *)fp->f_data, sa, p);
160 FREE(sa, M_SONAME);
161 fdrop(fp, p);
162 return (error);
163 }
164
165 /* ARGSUSED */
166 int
167 listen(p, uap)
168 struct proc *p;
169 register struct listen_args /* {
170 int s;
171 int backlog;
172 } */ *uap;
173 {
174 struct file *fp;
175 int error;
176
177 error = holdsock(p->p_fd, uap->s, &fp);
178 if (error)
179 return (error);
180 error = solisten((struct socket *)fp->f_data, uap->backlog, p);
181 fdrop(fp, p);
182 return(error);
183 }
184
185 static int
186 accept1(p, uap, compat)
187 struct proc *p;
188 register struct accept_args /* {
189 int s;
190 caddr_t name;
191 int *anamelen;
192 } */ *uap;
193 int compat;
194 {
195 struct filedesc *fdp = p->p_fd;
196 struct file *lfp = NULL;
197 struct file *nfp = NULL;
198 struct sockaddr *sa;
199 int namelen, error, s;
200 struct socket *head, *so;
201 int fd;
202 u_int fflag; /* type must match fp->f_flag */
203 int tmp;
204
205 if (uap->name) {
206 error = copyin((caddr_t)uap->anamelen, (caddr_t)&namelen,
207 sizeof (namelen));
208 if(error)
209 return (error);
210 if (namelen < 0)
211 return (EINVAL);
212 }
213 error = holdsock(fdp, uap->s, &lfp);
214 if (error)
215 return (error);
216 s = splnet();
217 head = (struct socket *)lfp->f_data;
218 if ((head->so_options & SO_ACCEPTCONN) == 0) {
219 splx(s);
220 error = EINVAL;
221 goto done;
222 }
223 while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) {
224 if (head->so_state & SS_CANTRCVMORE) {
225 head->so_error = ECONNABORTED;
226 break;
227 }
228 if ((head->so_state & SS_NBIO) != 0) {
229 head->so_error = EWOULDBLOCK;
230 break;
231 }
232 error = tsleep((caddr_t)&head->so_timeo, PSOCK | PCATCH,
233 "accept", 0);
234 if (error) {
235 splx(s);
236 goto done;
237 }
238 }
239 if (head->so_error) {
240 error = head->so_error;
241 head->so_error = 0;
242 splx(s);
243 goto done;
244 }
245
246 /*
247 * At this point we know that there is at least one connection
248 * ready to be accepted. Remove it from the queue prior to
249 * allocating the file descriptor for it since falloc() may
250 * block allowing another process to accept the connection
251 * instead.
252 */
253 so = TAILQ_FIRST(&head->so_comp);
254 TAILQ_REMOVE(&head->so_comp, so, so_list);
255 head->so_qlen--;
256
257 fflag = lfp->f_flag;
258 error = falloc(p, &nfp, &fd);
259 if (error) {
260 /*
261 * Probably ran out of file descriptors. Put the
262 * unaccepted connection back onto the queue and
263 * do another wakeup so some other process might
264 * have a chance at it.
265 */
266 TAILQ_INSERT_HEAD(&head->so_comp, so, so_list);
267 head->so_qlen++;
268 wakeup_one(&head->so_timeo);
269 splx(s);
270 goto done;
271 }
272 fhold(nfp);
273 p->p_retval[0] = fd;
274
275 /* connection has been removed from the listen queue */
276 KNOTE(&head->so_rcv.sb_sel.si_note, 0);
277
278 so->so_state &= ~SS_COMP;
279 so->so_head = NULL;
280 if (head->so_sigio != NULL)
281 fsetown(fgetown(head->so_sigio), &so->so_sigio);
282
283 nfp->f_data = (caddr_t)so;
284 nfp->f_flag = fflag;
285 nfp->f_ops = &socketops;
286 nfp->f_type = DTYPE_SOCKET;
287 /* Sync socket nonblocking/async state with file flags */
288 tmp = fflag & FNONBLOCK;
289 (void) fo_ioctl(nfp, FIONBIO, (caddr_t)&tmp, p);
290 tmp = fflag & FASYNC;
291 (void) fo_ioctl(nfp, FIOASYNC, (caddr_t)&tmp, p);
292 sa = 0;
293 error = soaccept(so, &sa);
294 if (error) {
295 /*
296 * return a namelen of zero for older code which might
297 * ignore the return value from accept.
298 */
299 if (uap->name != NULL) {
300 namelen = 0;
301 (void) copyout((caddr_t)&namelen,
302 (caddr_t)uap->anamelen, sizeof(*uap->anamelen));
303 }
304 goto noconnection;
305 }
306 if (sa == NULL) {
307 namelen = 0;
308 if (uap->name)
309 goto gotnoname;
310 splx(s);
311 error = 0;
312 goto done;
313 }
314 if (uap->name) {
315 /* check sa_len before it is destroyed */
316 if (namelen > sa->sa_len)
317 namelen = sa->sa_len;
318 #ifdef COMPAT_OLDSOCK
319 if (compat)
320 ((struct osockaddr *)sa)->sa_family =
321 sa->sa_family;
322 #endif
323 error = copyout(sa, (caddr_t)uap->name, (u_int)namelen);
324 if (!error)
325 gotnoname:
326 error = copyout((caddr_t)&namelen,
327 (caddr_t)uap->anamelen, sizeof (*uap->anamelen));
328 }
329 noconnection:
330 if (sa)
331 FREE(sa, M_SONAME);
332
333 /*
334 * close the new descriptor, assuming someone hasn't ripped it
335 * out from under us.
336 */
337 if (error) {
338 if (fdp->fd_ofiles[fd] == nfp) {
339 fdp->fd_ofiles[fd] = NULL;
340 fdrop(nfp, p);
341 }
342 }
343 splx(s);
344
345 /*
346 * Release explicitly held references before returning.
347 */
348 done:
349 if (nfp != NULL)
350 fdrop(nfp, p);
351 fdrop(lfp, p);
352 return (error);
353 }
354
355 int
356 accept(p, uap)
357 struct proc *p;
358 struct accept_args *uap;
359 {
360
361 return (accept1(p, uap, 0));
362 }
363
364 #ifdef COMPAT_OLDSOCK
365 int
366 oaccept(p, uap)
367 struct proc *p;
368 struct accept_args *uap;
369 {
370
371 return (accept1(p, uap, 1));
372 }
373 #endif /* COMPAT_OLDSOCK */
374
375 /* ARGSUSED */
376 int
377 connect(p, uap)
378 struct proc *p;
379 register struct connect_args /* {
380 int s;
381 caddr_t name;
382 int namelen;
383 } */ *uap;
384 {
385 struct file *fp;
386 register struct socket *so;
387 struct sockaddr *sa;
388 int error, s;
389
390 error = holdsock(p->p_fd, uap->s, &fp);
391 if (error)
392 return (error);
393 so = (struct socket *)fp->f_data;
394 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
395 error = EALREADY;
396 goto done;
397 }
398 error = getsockaddr(&sa, uap->name, uap->namelen);
399 if (error)
400 goto done;
401 error = soconnect(so, sa, p);
402 if (error)
403 goto bad;
404 if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) {
405 FREE(sa, M_SONAME);
406 error = EINPROGRESS;
407 goto done;
408 }
409 s = splnet();
410 while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) {
411 error = tsleep((caddr_t)&so->so_timeo, PSOCK | PCATCH,
412 "connec", 0);
413 if (error)
414 break;
415 }
416 if (error == 0) {
417 error = so->so_error;
418 so->so_error = 0;
419 }
420 splx(s);
421 bad:
422 so->so_state &= ~SS_ISCONNECTING;
423 FREE(sa, M_SONAME);
424 if (error == ERESTART)
425 error = EINTR;
426 done:
427 fdrop(fp, p);
428 return (error);
429 }
430
431 int
432 socketpair(p, uap)
433 struct proc *p;
434 register struct socketpair_args /* {
435 int domain;
436 int type;
437 int protocol;
438 int *rsv;
439 } */ *uap;
440 {
441 register struct filedesc *fdp = p->p_fd;
442 struct file *fp1, *fp2;
443 struct socket *so1, *so2;
444 int fd, error, sv[2];
445
446 error = socreate(uap->domain, &so1, uap->type, uap->protocol, p);
447 if (error)
448 return (error);
449 error = socreate(uap->domain, &so2, uap->type, uap->protocol, p);
450 if (error)
451 goto free1;
452 error = falloc(p, &fp1, &fd);
453 if (error)
454 goto free2;
455 fhold(fp1);
456 sv[0] = fd;
457 fp1->f_data = (caddr_t)so1;
458 error = falloc(p, &fp2, &fd);
459 if (error)
460 goto free3;
461 fhold(fp2);
462 fp2->f_data = (caddr_t)so2;
463 sv[1] = fd;
464 error = soconnect2(so1, so2);
465 if (error)
466 goto free4;
467 if (uap->type == SOCK_DGRAM) {
468 /*
469 * Datagram socket connection is asymmetric.
470 */
471 error = soconnect2(so2, so1);
472 if (error)
473 goto free4;
474 }
475 fp1->f_flag = fp2->f_flag = FREAD|FWRITE;
476 fp1->f_ops = fp2->f_ops = &socketops;
477 fp1->f_type = fp2->f_type = DTYPE_SOCKET;
478 error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int));
479 fdrop(fp1, p);
480 fdrop(fp2, p);
481 return (error);
482 free4:
483 if (fdp->fd_ofiles[sv[1]] == fp2) {
484 fdp->fd_ofiles[sv[1]] = NULL;
485 fdrop(fp2, p);
486 }
487 fdrop(fp2, p);
488 free3:
489 if (fdp->fd_ofiles[sv[0]] == fp1) {
490 fdp->fd_ofiles[sv[0]] = NULL;
491 fdrop(fp1, p);
492 }
493 fdrop(fp1, p);
494 free2:
495 (void)soclose(so2);
496 free1:
497 (void)soclose(so1);
498 return (error);
499 }
500
501 static int
502 sendit(p, s, mp, flags)
503 register struct proc *p;
504 int s;
505 register struct msghdr *mp;
506 int flags;
507 {
508 struct file *fp;
509 struct uio auio;
510 register struct iovec *iov;
511 register int i;
512 struct mbuf *control;
513 struct sockaddr *to;
514 int len, error;
515 struct socket *so;
516 #ifdef KTRACE
517 struct iovec *ktriov = NULL;
518 struct uio ktruio;
519 #endif
520
521 error = holdsock(p->p_fd, s, &fp);
522 if (error)
523 return (error);
524 auio.uio_iov = mp->msg_iov;
525 auio.uio_iovcnt = mp->msg_iovlen;
526 auio.uio_segflg = UIO_USERSPACE;
527 auio.uio_rw = UIO_WRITE;
528 auio.uio_procp = p;
529 auio.uio_offset = 0; /* XXX */
530 auio.uio_resid = 0;
531 iov = mp->msg_iov;
532 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
533 if ((auio.uio_resid += iov->iov_len) < 0) {
534 fdrop(fp, p);
535 return (EINVAL);
536 }
537 }
538 if (mp->msg_name) {
539 error = getsockaddr(&to, mp->msg_name, mp->msg_namelen);
540 if (error) {
541 fdrop(fp, p);
542 return (error);
543 }
544 } else {
545 to = 0;
546 }
547 if (mp->msg_control) {
548 if (mp->msg_controllen < sizeof(struct cmsghdr)
549 #ifdef COMPAT_OLDSOCK
550 && mp->msg_flags != MSG_COMPAT
551 #endif
552 ) {
553 error = EINVAL;
554 goto bad;
555 }
556 error = sockargs(&control, mp->msg_control,
557 mp->msg_controllen, MT_CONTROL);
558 if (error)
559 goto bad;
560 #ifdef COMPAT_OLDSOCK
561 if (mp->msg_flags == MSG_COMPAT) {
562 register struct cmsghdr *cm;
563
564 M_PREPEND(control, sizeof(*cm), M_WAIT);
565 if (control == 0) {
566 error = ENOBUFS;
567 goto bad;
568 } else {
569 cm = mtod(control, struct cmsghdr *);
570 cm->cmsg_len = control->m_len;
571 cm->cmsg_level = SOL_SOCKET;
572 cm->cmsg_type = SCM_RIGHTS;
573 }
574 }
575 #endif
576 } else {
577 control = 0;
578 }
579 #ifdef KTRACE
580 if (KTRPOINT(p, KTR_GENIO)) {
581 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
582
583 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
584 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
585 ktruio = auio;
586 }
587 #endif
588 len = auio.uio_resid;
589 so = (struct socket *)fp->f_data;
590 error = so->so_proto->pr_usrreqs->pru_sosend(so, to, &auio, 0, control,
591 flags, p);
592 if (error) {
593 if (auio.uio_resid != len && (error == ERESTART ||
594 error == EINTR || error == EWOULDBLOCK))
595 error = 0;
596 if (error == EPIPE)
597 psignal(p, SIGPIPE);
598 }
599 if (error == 0)
600 p->p_retval[0] = len - auio.uio_resid;
601 #ifdef KTRACE
602 if (ktriov != NULL) {
603 if (error == 0) {
604 ktruio.uio_iov = ktriov;
605 ktruio.uio_resid = p->p_retval[0];
606 ktrgenio(p->p_tracep, s, UIO_WRITE, &ktruio, error);
607 }
608 FREE(ktriov, M_TEMP);
609 }
610 #endif
611 bad:
612 fdrop(fp, p);
613 if (to)
614 FREE(to, M_SONAME);
615 return (error);
616 }
617
618 int
619 sendto(p, uap)
620 struct proc *p;
621 register struct sendto_args /* {
622 int s;
623 caddr_t buf;
624 size_t len;
625 int flags;
626 caddr_t to;
627 int tolen;
628 } */ *uap;
629 {
630 struct msghdr msg;
631 struct iovec aiov;
632
633 msg.msg_name = uap->to;
634 msg.msg_namelen = uap->tolen;
635 msg.msg_iov = &aiov;
636 msg.msg_iovlen = 1;
637 msg.msg_control = 0;
638 #ifdef COMPAT_OLDSOCK
639 msg.msg_flags = 0;
640 #endif
641 aiov.iov_base = uap->buf;
642 aiov.iov_len = uap->len;
643 return (sendit(p, uap->s, &msg, uap->flags));
644 }
645
646 #ifdef COMPAT_OLDSOCK
647 int
648 osend(p, uap)
649 struct proc *p;
650 register struct osend_args /* {
651 int s;
652 caddr_t buf;
653 int len;
654 int flags;
655 } */ *uap;
656 {
657 struct msghdr msg;
658 struct iovec aiov;
659
660 msg.msg_name = 0;
661 msg.msg_namelen = 0;
662 msg.msg_iov = &aiov;
663 msg.msg_iovlen = 1;
664 aiov.iov_base = uap->buf;
665 aiov.iov_len = uap->len;
666 msg.msg_control = 0;
667 msg.msg_flags = 0;
668 return (sendit(p, uap->s, &msg, uap->flags));
669 }
670
671 int
672 osendmsg(p, uap)
673 struct proc *p;
674 register struct osendmsg_args /* {
675 int s;
676 caddr_t msg;
677 int flags;
678 } */ *uap;
679 {
680 struct msghdr msg;
681 struct iovec aiov[UIO_SMALLIOV], *iov;
682 int error;
683
684 error = copyin(uap->msg, (caddr_t)&msg, sizeof (struct omsghdr));
685 if (error)
686 return (error);
687 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
688 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
689 return (EMSGSIZE);
690 MALLOC(iov, struct iovec *,
691 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
692 M_WAITOK);
693 } else
694 iov = aiov;
695 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
696 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
697 if (error)
698 goto done;
699 msg.msg_flags = MSG_COMPAT;
700 msg.msg_iov = iov;
701 error = sendit(p, uap->s, &msg, uap->flags);
702 done:
703 if (iov != aiov)
704 FREE(iov, M_IOV);
705 return (error);
706 }
707 #endif
708
709 int
710 sendmsg(p, uap)
711 struct proc *p;
712 register struct sendmsg_args /* {
713 int s;
714 caddr_t msg;
715 int flags;
716 } */ *uap;
717 {
718 struct msghdr msg;
719 struct iovec aiov[UIO_SMALLIOV], *iov;
720 int error;
721
722 error = copyin(uap->msg, (caddr_t)&msg, sizeof (msg));
723 if (error)
724 return (error);
725 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
726 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
727 return (EMSGSIZE);
728 MALLOC(iov, struct iovec *,
729 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
730 M_WAITOK);
731 } else
732 iov = aiov;
733 if (msg.msg_iovlen &&
734 (error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
735 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)))))
736 goto done;
737 msg.msg_iov = iov;
738 #ifdef COMPAT_OLDSOCK
739 msg.msg_flags = 0;
740 #endif
741 error = sendit(p, uap->s, &msg, uap->flags);
742 done:
743 if (iov != aiov)
744 FREE(iov, M_IOV);
745 return (error);
746 }
747
748 static int
749 recvit(p, s, mp, namelenp)
750 register struct proc *p;
751 int s;
752 register struct msghdr *mp;
753 caddr_t namelenp;
754 {
755 struct file *fp;
756 struct uio auio;
757 register struct iovec *iov;
758 register int i;
759 int len, error;
760 struct mbuf *m, *control = 0;
761 caddr_t ctlbuf;
762 struct socket *so;
763 struct sockaddr *fromsa = 0;
764 #ifdef KTRACE
765 struct iovec *ktriov = NULL;
766 struct uio ktruio;
767 #endif
768
769 error = holdsock(p->p_fd, s, &fp);
770 if (error)
771 return (error);
772 auio.uio_iov = mp->msg_iov;
773 auio.uio_iovcnt = mp->msg_iovlen;
774 auio.uio_segflg = UIO_USERSPACE;
775 auio.uio_rw = UIO_READ;
776 auio.uio_procp = p;
777 auio.uio_offset = 0; /* XXX */
778 auio.uio_resid = 0;
779 iov = mp->msg_iov;
780 for (i = 0; i < mp->msg_iovlen; i++, iov++) {
781 if ((auio.uio_resid += iov->iov_len) < 0) {
782 fdrop(fp, p);
783 return (EINVAL);
784 }
785 }
786 #ifdef KTRACE
787 if (KTRPOINT(p, KTR_GENIO)) {
788 int iovlen = auio.uio_iovcnt * sizeof (struct iovec);
789
790 MALLOC(ktriov, struct iovec *, iovlen, M_TEMP, M_WAITOK);
791 bcopy((caddr_t)auio.uio_iov, (caddr_t)ktriov, iovlen);
792 ktruio = auio;
793 }
794 #endif
795 len = auio.uio_resid;
796 so = (struct socket *)fp->f_data;
797 error = so->so_proto->pr_usrreqs->pru_soreceive(so, &fromsa, &auio,
798 (struct mbuf **)0, mp->msg_control ? &control : (struct mbuf **)0,
799 &mp->msg_flags);
800 if (error) {
801 if (auio.uio_resid != len && (error == ERESTART ||
802 error == EINTR || error == EWOULDBLOCK))
803 error = 0;
804 }
805 #ifdef KTRACE
806 if (ktriov != NULL) {
807 if (error == 0) {
808 ktruio.uio_iov = ktriov;
809 ktruio.uio_resid = len - auio.uio_resid;
810 ktrgenio(p->p_tracep, s, UIO_READ, &ktruio, error);
811 }
812 FREE(ktriov, M_TEMP);
813 }
814 #endif
815 if (error)
816 goto out;
817 p->p_retval[0] = len - auio.uio_resid;
818 if (mp->msg_name) {
819 len = mp->msg_namelen;
820 if (len <= 0 || fromsa == 0)
821 len = 0;
822 else {
823 #ifndef MIN
824 #define MIN(a,b) ((a)>(b)?(b):(a))
825 #endif
826 /* save sa_len before it is destroyed by MSG_COMPAT */
827 len = MIN(len, fromsa->sa_len);
828 #ifdef COMPAT_OLDSOCK
829 if (mp->msg_flags & MSG_COMPAT)
830 ((struct osockaddr *)fromsa)->sa_family =
831 fromsa->sa_family;
832 #endif
833 error = copyout(fromsa,
834 (caddr_t)mp->msg_name, (unsigned)len);
835 if (error)
836 goto out;
837 }
838 mp->msg_namelen = len;
839 if (namelenp &&
840 (error = copyout((caddr_t)&len, namelenp, sizeof (int)))) {
841 #ifdef COMPAT_OLDSOCK
842 if (mp->msg_flags & MSG_COMPAT)
843 error = 0; /* old recvfrom didn't check */
844 else
845 #endif
846 goto out;
847 }
848 }
849 if (mp->msg_control) {
850 #ifdef COMPAT_OLDSOCK
851 /*
852 * We assume that old recvmsg calls won't receive access
853 * rights and other control info, esp. as control info
854 * is always optional and those options didn't exist in 4.3.
855 * If we receive rights, trim the cmsghdr; anything else
856 * is tossed.
857 */
858 if (control && mp->msg_flags & MSG_COMPAT) {
859 if (mtod(control, struct cmsghdr *)->cmsg_level !=
860 SOL_SOCKET ||
861 mtod(control, struct cmsghdr *)->cmsg_type !=
862 SCM_RIGHTS) {
863 mp->msg_controllen = 0;
864 goto out;
865 }
866 control->m_len -= sizeof (struct cmsghdr);
867 control->m_data += sizeof (struct cmsghdr);
868 }
869 #endif
870 len = mp->msg_controllen;
871 m = control;
872 mp->msg_controllen = 0;
873 ctlbuf = (caddr_t) mp->msg_control;
874
875 while (m && len > 0) {
876 unsigned int tocopy;
877
878 if (len >= m->m_len)
879 tocopy = m->m_len;
880 else {
881 mp->msg_flags |= MSG_CTRUNC;
882 tocopy = len;
883 }
884
885 if ((error = copyout((caddr_t)mtod(m, caddr_t),
886 ctlbuf, tocopy)) != 0)
887 goto out;
888
889 ctlbuf += tocopy;
890 len -= tocopy;
891 m = m->m_next;
892 }
893 mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control;
894 }
895 out:
896 fdrop(fp, p);
897 if (fromsa)
898 FREE(fromsa, M_SONAME);
899 if (control)
900 m_freem(control);
901 return (error);
902 }
903
904 int
905 recvfrom(p, uap)
906 struct proc *p;
907 register struct recvfrom_args /* {
908 int s;
909 caddr_t buf;
910 size_t len;
911 int flags;
912 caddr_t from;
913 int *fromlenaddr;
914 } */ *uap;
915 {
916 struct msghdr msg;
917 struct iovec aiov;
918 int error;
919
920 if (uap->fromlenaddr) {
921 error = copyin((caddr_t)uap->fromlenaddr,
922 (caddr_t)&msg.msg_namelen, sizeof (msg.msg_namelen));
923 if (error)
924 return (error);
925 } else
926 msg.msg_namelen = 0;
927 msg.msg_name = uap->from;
928 msg.msg_iov = &aiov;
929 msg.msg_iovlen = 1;
930 aiov.iov_base = uap->buf;
931 aiov.iov_len = uap->len;
932 msg.msg_control = 0;
933 msg.msg_flags = uap->flags;
934 return (recvit(p, uap->s, &msg, (caddr_t)uap->fromlenaddr));
935 }
936
937 #ifdef COMPAT_OLDSOCK
938 int
939 orecvfrom(p, uap)
940 struct proc *p;
941 struct recvfrom_args *uap;
942 {
943
944 uap->flags |= MSG_COMPAT;
945 return (recvfrom(p, uap));
946 }
947 #endif
948
949
950 #ifdef COMPAT_OLDSOCK
951 int
952 orecv(p, uap)
953 struct proc *p;
954 register struct orecv_args /* {
955 int s;
956 caddr_t buf;
957 int len;
958 int flags;
959 } */ *uap;
960 {
961 struct msghdr msg;
962 struct iovec aiov;
963
964 msg.msg_name = 0;
965 msg.msg_namelen = 0;
966 msg.msg_iov = &aiov;
967 msg.msg_iovlen = 1;
968 aiov.iov_base = uap->buf;
969 aiov.iov_len = uap->len;
970 msg.msg_control = 0;
971 msg.msg_flags = uap->flags;
972 return (recvit(p, uap->s, &msg, (caddr_t)0));
973 }
974
975 /*
976 * Old recvmsg. This code takes advantage of the fact that the old msghdr
977 * overlays the new one, missing only the flags, and with the (old) access
978 * rights where the control fields are now.
979 */
980 int
981 orecvmsg(p, uap)
982 struct proc *p;
983 register struct orecvmsg_args /* {
984 int s;
985 struct omsghdr *msg;
986 int flags;
987 } */ *uap;
988 {
989 struct msghdr msg;
990 struct iovec aiov[UIO_SMALLIOV], *iov;
991 int error;
992
993 error = copyin((caddr_t)uap->msg, (caddr_t)&msg,
994 sizeof (struct omsghdr));
995 if (error)
996 return (error);
997 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
998 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
999 return (EMSGSIZE);
1000 MALLOC(iov, struct iovec *,
1001 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1002 M_WAITOK);
1003 } else
1004 iov = aiov;
1005 msg.msg_flags = uap->flags | MSG_COMPAT;
1006 error = copyin((caddr_t)msg.msg_iov, (caddr_t)iov,
1007 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1008 if (error)
1009 goto done;
1010 msg.msg_iov = iov;
1011 error = recvit(p, uap->s, &msg, (caddr_t)&uap->msg->msg_namelen);
1012
1013 if (msg.msg_controllen && error == 0)
1014 error = copyout((caddr_t)&msg.msg_controllen,
1015 (caddr_t)&uap->msg->msg_accrightslen, sizeof (int));
1016 done:
1017 if (iov != aiov)
1018 FREE(iov, M_IOV);
1019 return (error);
1020 }
1021 #endif
1022
1023 int
1024 recvmsg(p, uap)
1025 struct proc *p;
1026 register struct recvmsg_args /* {
1027 int s;
1028 struct msghdr *msg;
1029 int flags;
1030 } */ *uap;
1031 {
1032 struct msghdr msg;
1033 struct iovec aiov[UIO_SMALLIOV], *uiov, *iov;
1034 register int error;
1035
1036 error = copyin((caddr_t)uap->msg, (caddr_t)&msg, sizeof (msg));
1037 if (error)
1038 return (error);
1039 if ((u_int)msg.msg_iovlen >= UIO_SMALLIOV) {
1040 if ((u_int)msg.msg_iovlen >= UIO_MAXIOV)
1041 return (EMSGSIZE);
1042 MALLOC(iov, struct iovec *,
1043 sizeof(struct iovec) * (u_int)msg.msg_iovlen, M_IOV,
1044 M_WAITOK);
1045 } else
1046 iov = aiov;
1047 #ifdef COMPAT_OLDSOCK
1048 msg.msg_flags = uap->flags &~ MSG_COMPAT;
1049 #else
1050 msg.msg_flags = uap->flags;
1051 #endif
1052 uiov = msg.msg_iov;
1053 msg.msg_iov = iov;
1054 error = copyin((caddr_t)uiov, (caddr_t)iov,
1055 (unsigned)(msg.msg_iovlen * sizeof (struct iovec)));
1056 if (error)
1057 goto done;
1058 error = recvit(p, uap->s, &msg, (caddr_t)0);
1059 if (!error) {
1060 msg.msg_iov = uiov;
1061 error = copyout((caddr_t)&msg, (caddr_t)uap->msg, sizeof(msg));
1062 }
1063 done:
1064 if (iov != aiov)
1065 FREE(iov, M_IOV);
1066 return (error);
1067 }
1068
1069 /* ARGSUSED */
1070 int
1071 shutdown(p, uap)
1072 struct proc *p;
1073 register struct shutdown_args /* {
1074 int s;
1075 int how;
1076 } */ *uap;
1077 {
1078 struct file *fp;
1079 int error;
1080
1081 error = holdsock(p->p_fd, uap->s, &fp);
1082 if (error)
1083 return (error);
1084 error = soshutdown((struct socket *)fp->f_data, uap->how);
1085 fdrop(fp, p);
1086 return(error);
1087 }
1088
1089 /* ARGSUSED */
1090 int
1091 setsockopt(p, uap)
1092 struct proc *p;
1093 register struct setsockopt_args /* {
1094 int s;
1095 int level;
1096 int name;
1097 caddr_t val;
1098 int valsize;
1099 } */ *uap;
1100 {
1101 struct file *fp;
1102 struct sockopt sopt;
1103 int error;
1104
1105 if (uap->val == 0 && uap->valsize != 0)
1106 return (EFAULT);
1107 if (uap->valsize < 0)
1108 return (EINVAL);
1109
1110 error = holdsock(p->p_fd, uap->s, &fp);
1111 if (error)
1112 return (error);
1113
1114 sopt.sopt_dir = SOPT_SET;
1115 sopt.sopt_level = uap->level;
1116 sopt.sopt_name = uap->name;
1117 sopt.sopt_val = uap->val;
1118 sopt.sopt_valsize = uap->valsize;
1119 sopt.sopt_p = p;
1120 error = sosetopt((struct socket *)fp->f_data, &sopt);
1121 fdrop(fp, p);
1122 return(error);
1123 }
1124
1125 /* ARGSUSED */
1126 int
1127 getsockopt(p, uap)
1128 struct proc *p;
1129 register struct getsockopt_args /* {
1130 int s;
1131 int level;
1132 int name;
1133 caddr_t val;
1134 int *avalsize;
1135 } */ *uap;
1136 {
1137 int valsize, error;
1138 struct file *fp;
1139 struct sockopt sopt;
1140
1141 error = holdsock(p->p_fd, uap->s, &fp);
1142 if (error)
1143 return (error);
1144 if (uap->val) {
1145 error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize,
1146 sizeof (valsize));
1147 if (error) {
1148 fdrop(fp, p);
1149 return (error);
1150 }
1151 if (valsize < 0) {
1152 fdrop(fp, p);
1153 return (EINVAL);
1154 }
1155 } else {
1156 valsize = 0;
1157 }
1158
1159 sopt.sopt_dir = SOPT_GET;
1160 sopt.sopt_level = uap->level;
1161 sopt.sopt_name = uap->name;
1162 sopt.sopt_val = uap->val;
1163 sopt.sopt_valsize = (size_t)valsize; /* checked non-negative above */
1164 sopt.sopt_p = p;
1165
1166 error = sogetopt((struct socket *)fp->f_data, &sopt);
1167 if (error == 0) {
1168 valsize = sopt.sopt_valsize;
1169 error = copyout((caddr_t)&valsize,
1170 (caddr_t)uap->avalsize, sizeof (valsize));
1171 }
1172 fdrop(fp, p);
1173 return (error);
1174 }
1175
1176 /*
1177 * Get socket name.
1178 */
1179 /* ARGSUSED */
1180 static int
1181 getsockname1(p, uap, compat)
1182 struct proc *p;
1183 register struct getsockname_args /* {
1184 int fdes;
1185 caddr_t asa;
1186 int *alen;
1187 } */ *uap;
1188 int compat;
1189 {
1190 struct file *fp;
1191 register struct socket *so;
1192 struct sockaddr *sa;
1193 int len, error;
1194
1195 error = holdsock(p->p_fd, uap->fdes, &fp);
1196 if (error)
1197 return (error);
1198 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1199 if (error) {
1200 fdrop(fp, p);
1201 return (error);
1202 }
1203 if (len < 0) {
1204 fdrop(fp, p);
1205 return (EINVAL);
1206 }
1207 so = (struct socket *)fp->f_data;
1208 sa = 0;
1209 error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa);
1210 if (error)
1211 goto bad;
1212 if (sa == 0) {
1213 len = 0;
1214 goto gotnothing;
1215 }
1216
1217 len = MIN(len, sa->sa_len);
1218 #ifdef COMPAT_OLDSOCK
1219 if (compat)
1220 ((struct osockaddr *)sa)->sa_family = sa->sa_family;
1221 #endif
1222 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1223 if (error == 0)
1224 gotnothing:
1225 error = copyout((caddr_t)&len, (caddr_t)uap->alen,
1226 sizeof (len));
1227 bad:
1228 if (sa)
1229 FREE(sa, M_SONAME);
1230 fdrop(fp, p);
1231 return (error);
1232 }
1233
1234 int
1235 getsockname(p, uap)
1236 struct proc *p;
1237 struct getsockname_args *uap;
1238 {
1239
1240 return (getsockname1(p, uap, 0));
1241 }
1242
1243 #ifdef COMPAT_OLDSOCK
1244 int
1245 ogetsockname(p, uap)
1246 struct proc *p;
1247 struct getsockname_args *uap;
1248 {
1249
1250 return (getsockname1(p, uap, 1));
1251 }
1252 #endif /* COMPAT_OLDSOCK */
1253
1254 /*
1255 * Get name of peer for connected socket.
1256 */
1257 /* ARGSUSED */
1258 static int
1259 getpeername1(p, uap, compat)
1260 struct proc *p;
1261 register struct getpeername_args /* {
1262 int fdes;
1263 caddr_t asa;
1264 int *alen;
1265 } */ *uap;
1266 int compat;
1267 {
1268 struct file *fp;
1269 register struct socket *so;
1270 struct sockaddr *sa;
1271 int len, error;
1272
1273 error = holdsock(p->p_fd, uap->fdes, &fp);
1274 if (error)
1275 return (error);
1276 so = (struct socket *)fp->f_data;
1277 if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) {
1278 fdrop(fp, p);
1279 return (ENOTCONN);
1280 }
1281 error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len));
1282 if (error) {
1283 fdrop(fp, p);
1284 return (error);
1285 }
1286 if (len < 0) {
1287 fdrop(fp, p);
1288 return (EINVAL);
1289 }
1290 sa = 0;
1291 error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa);
1292 if (error)
1293 goto bad;
1294 if (sa == 0) {
1295 len = 0;
1296 goto gotnothing;
1297 }
1298 len = MIN(len, sa->sa_len);
1299 #ifdef COMPAT_OLDSOCK
1300 if (compat)
1301 ((struct osockaddr *)sa)->sa_family =
1302 sa->sa_family;
1303 #endif
1304 error = copyout(sa, (caddr_t)uap->asa, (u_int)len);
1305 if (error)
1306 goto bad;
1307 gotnothing:
1308 error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len));
1309 bad:
1310 if (sa)
1311 FREE(sa, M_SONAME);
1312 fdrop(fp, p);
1313 return (error);
1314 }
1315
1316 int
1317 getpeername(p, uap)
1318 struct proc *p;
1319 struct getpeername_args *uap;
1320 {
1321
1322 return (getpeername1(p, uap, 0));
1323 }
1324
1325 #ifdef COMPAT_OLDSOCK
1326 int
1327 ogetpeername(p, uap)
1328 struct proc *p;
1329 struct ogetpeername_args *uap;
1330 {
1331
1332 /* XXX uap should have type `getpeername_args *' to begin with. */
1333 return (getpeername1(p, (struct getpeername_args *)uap, 1));
1334 }
1335 #endif /* COMPAT_OLDSOCK */
1336
1337 int
1338 sockargs(mp, buf, buflen, type)
1339 struct mbuf **mp;
1340 caddr_t buf;
1341 int buflen, type;
1342 {
1343 register struct sockaddr *sa;
1344 register struct mbuf *m;
1345 int error;
1346
1347 if ((u_int)buflen > MLEN) {
1348 #ifdef COMPAT_OLDSOCK
1349 if (type == MT_SONAME && (u_int)buflen <= 112)
1350 buflen = MLEN; /* unix domain compat. hack */
1351 else
1352 #endif
1353 if ((u_int)buflen > MCLBYTES)
1354 return (EINVAL);
1355 }
1356 m = m_get(M_WAIT, type);
1357 if (m == NULL)
1358 return (ENOBUFS);
1359 if ((u_int)buflen > MLEN)
1360 MCLGET(m, M_WAIT);
1361 m->m_len = buflen;
1362 error = copyin(buf, mtod(m, caddr_t), (u_int)buflen);
1363 if (error)
1364 (void) m_free(m);
1365 else {
1366 *mp = m;
1367 if (type == MT_SONAME) {
1368 sa = mtod(m, struct sockaddr *);
1369
1370 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1371 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1372 sa->sa_family = sa->sa_len;
1373 #endif
1374 sa->sa_len = buflen;
1375 }
1376 }
1377 return (error);
1378 }
1379
1380 int
1381 getsockaddr(namp, uaddr, len)
1382 struct sockaddr **namp;
1383 caddr_t uaddr;
1384 size_t len;
1385 {
1386 struct sockaddr *sa;
1387 int error;
1388
1389 if (len > SOCK_MAXADDRLEN)
1390 return ENAMETOOLONG;
1391 if (len < offsetof(struct sockaddr, sa_data[0]))
1392 return EINVAL;
1393 MALLOC(sa, struct sockaddr *, len, M_SONAME, M_WAITOK);
1394 error = copyin(uaddr, sa, len);
1395 if (error) {
1396 FREE(sa, M_SONAME);
1397 } else {
1398 #if defined(COMPAT_OLDSOCK) && BYTE_ORDER != BIG_ENDIAN
1399 if (sa->sa_family == 0 && sa->sa_len < AF_MAX)
1400 sa->sa_family = sa->sa_len;
1401 #endif
1402 sa->sa_len = len;
1403 *namp = sa;
1404 }
1405 return error;
1406 }
1407
1408 /*
1409 * holdsock() - load the struct file pointer associated
1410 * with a socket into *fpp. If an error occurs, non-zero
1411 * will be returned and *fpp will be set to NULL.
1412 */
1413 int
1414 holdsock(fdp, fdes, fpp)
1415 struct filedesc *fdp;
1416 int fdes;
1417 struct file **fpp;
1418 {
1419 register struct file *fp = NULL;
1420 int error = 0;
1421
1422 if ((unsigned)fdes >= fdp->fd_nfiles ||
1423 (fp = fdp->fd_ofiles[fdes]) == NULL) {
1424 error = EBADF;
1425 } else if (fp->f_type != DTYPE_SOCKET) {
1426 error = ENOTSOCK;
1427 fp = NULL;
1428 } else {
1429 fhold(fp);
1430 }
1431 *fpp = fp;
1432 return(error);
1433 }
1434
1435 /*
1436 * Allocate a pool of sf_bufs (sendfile(2) or "super-fast" if you prefer. :-))
1437 */
1438 static void
1439 sf_buf_init(void *arg)
1440 {
1441 int i;
1442
1443 SLIST_INIT(&sf_freelist);
1444 sf_base = kmem_alloc_pageable(kernel_map, nsfbufs * PAGE_SIZE);
1445 sf_bufs = malloc(nsfbufs * sizeof(struct sf_buf), M_TEMP, M_NOWAIT);
1446 bzero(sf_bufs, nsfbufs * sizeof(struct sf_buf));
1447 for (i = 0; i < nsfbufs; i++) {
1448 sf_bufs[i].kva = sf_base + i * PAGE_SIZE;
1449 SLIST_INSERT_HEAD(&sf_freelist, &sf_bufs[i], free_list);
1450 }
1451 }
1452
1453 /*
1454 * Get an sf_buf from the freelist. Will block if none are available.
1455 */
1456 struct sf_buf *
1457 sf_buf_alloc()
1458 {
1459 struct sf_buf *sf;
1460 int s;
1461 int error;
1462
1463 s = splimp();
1464 while ((sf = SLIST_FIRST(&sf_freelist)) == NULL) {
1465 sf_buf_alloc_want = 1;
1466 mbstat.sf_allocwait++;
1467 error = tsleep(&sf_freelist, PVM|PCATCH, "sfbufa", 0);
1468 if (error)
1469 break;
1470 }
1471 if (sf != NULL) {
1472 SLIST_REMOVE_HEAD(&sf_freelist, free_list);
1473 sf->refcnt = 1;
1474 nsfbufsused++;
1475 nsfbufspeak = imax(nsfbufspeak, nsfbufsused);
1476 }
1477 splx(s);
1478 return (sf);
1479 }
1480
1481 #define dtosf(x) (&sf_bufs[((uintptr_t)(x) - (uintptr_t)sf_base) >> PAGE_SHIFT])
1482 void
1483 sf_buf_ref(caddr_t addr, u_int size)
1484 {
1485 struct sf_buf *sf;
1486
1487 sf = dtosf(addr);
1488 if (sf->refcnt == 0)
1489 panic("sf_buf_ref: referencing a free sf_buf");
1490 sf->refcnt++;
1491 }
1492
1493 /*
1494 * Lose a reference to an sf_buf. When none left, detach mapped page
1495 * and release resources back to the system.
1496 *
1497 * Must be called at splimp.
1498 */
1499 void
1500 sf_buf_free(caddr_t addr, u_int size)
1501 {
1502 struct sf_buf *sf;
1503 struct vm_page *m;
1504 int s;
1505
1506 sf = dtosf(addr);
1507 if (sf->refcnt == 0)
1508 panic("sf_buf_free: freeing free sf_buf");
1509 sf->refcnt--;
1510 if (sf->refcnt == 0) {
1511 nsfbufsused--;
1512 pmap_qremove((vm_offset_t)addr, 1);
1513 m = sf->m;
1514 s = splvm();
1515 vm_page_unwire(m, 0);
1516 /*
1517 * Check for the object going away on us. This can
1518 * happen since we don't hold a reference to it.
1519 * If so, we're responsible for freeing the page.
1520 */
1521 if (m->wire_count == 0 && m->object == NULL)
1522 vm_page_free(m);
1523 splx(s);
1524 sf->m = NULL;
1525 SLIST_INSERT_HEAD(&sf_freelist, sf, free_list);
1526 if (sf_buf_alloc_want) {
1527 sf_buf_alloc_want = 0;
1528 wakeup(&sf_freelist);
1529 }
1530 }
1531 }
1532
1533 /*
1534 * sendfile(2).
1535 * int sendfile(int fd, int s, off_t offset, size_t nbytes,
1536 * struct sf_hdtr *hdtr, off_t *sbytes, int flags)
1537 *
1538 * Send a file specified by 'fd' and starting at 'offset' to a socket
1539 * specified by 's'. Send only 'nbytes' of the file or until EOF if
1540 * nbytes == 0. Optionally add a header and/or trailer to the socket
1541 * output. If specified, write the total number of bytes sent into *sbytes.
1542 */
1543 int
1544 sendfile(struct proc *p, struct sendfile_args *uap)
1545 {
1546
1547 return (do_sendfile(p, uap, 0));
1548 }
1549
1550 #ifdef COMPAT_43
1551 int
1552 osendfile(struct proc *p, struct osendfile_args *uap)
1553 {
1554 struct sendfile_args args;
1555
1556 args.fd = uap->fd;
1557 args.s = uap->s;
1558 args.offset = uap->offset;
1559 args.nbytes = uap->nbytes;
1560 args.hdtr = uap->hdtr;
1561 args.sbytes = uap->sbytes;
1562 args.flags = uap->flags;
1563
1564 return (do_sendfile(p, &args, 1));
1565 }
1566 #endif
1567
1568 int
1569 do_sendfile(struct proc *p, struct sendfile_args *uap, int compat)
1570 {
1571 struct file *fp;
1572 struct filedesc *fdp = p->p_fd;
1573 struct vnode *vp;
1574 struct vm_object *obj;
1575 struct socket *so;
1576 struct mbuf *m;
1577 struct sf_buf *sf;
1578 struct vm_page *pg;
1579 struct writev_args nuap;
1580 struct sf_hdtr hdtr;
1581 off_t off, xfsize, hdtr_size, sbytes = 0;
1582 int error = 0, s;
1583
1584 vp = NULL;
1585 hdtr_size = 0;
1586 /*
1587 * Do argument checking. Must be a regular file in, stream
1588 * type and connected socket out, positive offset.
1589 */
1590 fp = holdfp(fdp, uap->fd, FREAD);
1591 if (fp == NULL) {
1592 error = EBADF;
1593 goto done;
1594 }
1595 if (fp->f_type != DTYPE_VNODE) {
1596 error = EINVAL;
1597 goto done;
1598 }
1599 vp = (struct vnode *)fp->f_data;
1600 vref(vp);
1601 if (vp->v_type != VREG || VOP_GETVOBJECT(vp, &obj) != 0) {
1602 error = EINVAL;
1603 goto done;
1604 }
1605 fdrop(fp, p);
1606 error = holdsock(p->p_fd, uap->s, &fp);
1607 if (error)
1608 goto done;
1609 so = (struct socket *)fp->f_data;
1610 if (so->so_type != SOCK_STREAM) {
1611 error = EINVAL;
1612 goto done;
1613 }
1614 if ((so->so_state & SS_ISCONNECTED) == 0) {
1615 error = ENOTCONN;
1616 goto done;
1617 }
1618 if (uap->offset < 0) {
1619 error = EINVAL;
1620 goto done;
1621 }
1622
1623 /*
1624 * If specified, get the pointer to the sf_hdtr struct for
1625 * any headers/trailers.
1626 */
1627 if (uap->hdtr != NULL) {
1628 error = copyin(uap->hdtr, &hdtr, sizeof(hdtr));
1629 if (error)
1630 goto done;
1631 /*
1632 * Send any headers. Wimp out and use writev(2).
1633 */
1634 if (hdtr.headers != NULL) {
1635 nuap.fd = uap->s;
1636 nuap.iovp = hdtr.headers;
1637 nuap.iovcnt = hdtr.hdr_cnt;
1638 error = writev(p, &nuap);
1639 if (error)
1640 goto done;
1641 if (compat)
1642 sbytes += p->p_retval[0];
1643 else
1644 hdtr_size += p->p_retval[0];
1645 }
1646 }
1647
1648 /*
1649 * Protect against multiple writers to the socket.
1650 */
1651 (void) sblock(&so->so_snd, M_WAITOK);
1652
1653 /*
1654 * Loop through the pages in the file, starting with the requested
1655 * offset. Get a file page (do I/O if necessary), map the file page
1656 * into an sf_buf, attach an mbuf header to the sf_buf, and queue
1657 * it on the socket.
1658 */
1659 for (off = uap->offset; ; off += xfsize, sbytes += xfsize) {
1660 vm_pindex_t pindex;
1661 vm_offset_t pgoff;
1662
1663 pindex = OFF_TO_IDX(off);
1664 retry_lookup:
1665 /*
1666 * Calculate the amount to transfer. Not to exceed a page,
1667 * the EOF, or the passed in nbytes.
1668 */
1669 xfsize = obj->un_pager.vnp.vnp_size - off;
1670 if (xfsize > PAGE_SIZE)
1671 xfsize = PAGE_SIZE;
1672 pgoff = (vm_offset_t)(off & PAGE_MASK);
1673 if (PAGE_SIZE - pgoff < xfsize)
1674 xfsize = PAGE_SIZE - pgoff;
1675 if (uap->nbytes && xfsize > (uap->nbytes - sbytes))
1676 xfsize = uap->nbytes - sbytes;
1677 if (xfsize <= 0)
1678 break;
1679 /*
1680 * Optimize the non-blocking case by looking at the socket space
1681 * before going to the extra work of constituting the sf_buf.
1682 */
1683 if ((so->so_state & SS_NBIO) && sbspace(&so->so_snd) <= 0) {
1684 if (so->so_state & SS_CANTSENDMORE)
1685 error = EPIPE;
1686 else
1687 error = EAGAIN;
1688 sbunlock(&so->so_snd);
1689 goto done;
1690 }
1691 /*
1692 * Attempt to look up the page.
1693 *
1694 * Allocate if not found
1695 *
1696 * Wait and loop if busy.
1697 */
1698 pg = vm_page_lookup(obj, pindex);
1699
1700 if (pg == NULL) {
1701 pg = vm_page_alloc(obj, pindex, VM_ALLOC_NORMAL);
1702 if (pg == NULL) {
1703 VM_WAIT;
1704 goto retry_lookup;
1705 }
1706 vm_page_wakeup(pg);
1707 } else if (vm_page_sleep_busy(pg, TRUE, "sfpbsy")) {
1708 goto retry_lookup;
1709 }
1710
1711 /*
1712 * Wire the page so it does not get ripped out from under
1713 * us.
1714 */
1715
1716 vm_page_wire(pg);
1717
1718 /*
1719 * If page is not valid for what we need, initiate I/O
1720 */
1721
1722 if (!pg->valid || !vm_page_is_valid(pg, pgoff, xfsize)) {
1723 struct uio auio;
1724 struct iovec aiov;
1725 int bsize;
1726
1727 /*
1728 * Ensure that our page is still around when the I/O
1729 * completes.
1730 */
1731 vm_page_io_start(pg);
1732
1733 /*
1734 * Get the page from backing store.
1735 */
1736 bsize = vp->v_mount->mnt_stat.f_iosize;
1737 auio.uio_iov = &aiov;
1738 auio.uio_iovcnt = 1;
1739 aiov.iov_base = 0;
1740 aiov.iov_len = MAXBSIZE;
1741 auio.uio_resid = MAXBSIZE;
1742 auio.uio_offset = trunc_page(off);
1743 auio.uio_segflg = UIO_NOCOPY;
1744 auio.uio_rw = UIO_READ;
1745 auio.uio_procp = p;
1746 vn_lock(vp, LK_SHARED | LK_NOPAUSE | LK_RETRY, p);
1747 error = VOP_READ(vp, &auio, IO_VMIO | ((MAXBSIZE / bsize) << 16),
1748 p->p_ucred);
1749 VOP_UNLOCK(vp, 0, p);
1750 vm_page_flag_clear(pg, PG_ZERO);
1751 vm_page_io_finish(pg);
1752 if (error) {
1753 vm_page_unwire(pg, 0);
1754 /*
1755 * See if anyone else might know about this page.
1756 * If not and it is not valid, then free it.
1757 */
1758 if (pg->wire_count == 0 && pg->valid == 0 &&
1759 pg->busy == 0 && !(pg->flags & PG_BUSY) &&
1760 pg->hold_count == 0) {
1761 vm_page_busy(pg);
1762 vm_page_free(pg);
1763 }
1764 sbunlock(&so->so_snd);
1765 goto done;
1766 }
1767 mbstat.sf_iocnt++;
1768 }
1769
1770
1771 /*
1772 * Get a sendfile buf. We usually wait as long as necessary,
1773 * but this wait can be interrupted.
1774 */
1775 if ((sf = sf_buf_alloc()) == NULL) {
1776 mbstat.sf_allocfail++;
1777 s = splvm();
1778 vm_page_unwire(pg, 0);
1779 if (pg->wire_count == 0 && pg->object == NULL)
1780 vm_page_free(pg);
1781 splx(s);
1782 sbunlock(&so->so_snd);
1783 error = EINTR;
1784 goto done;
1785 }
1786
1787
1788 /*
1789 * Allocate a kernel virtual page and insert the physical page
1790 * into it.
1791 */
1792
1793 sf->m = pg;
1794 pmap_qenter(sf->kva, &pg, 1);
1795 /*
1796 * Get an mbuf header and set it up as having external storage.
1797 */
1798 MGETHDR(m, M_WAIT, MT_DATA);
1799 if (m == NULL) {
1800 error = ENOBUFS;
1801 sf_buf_free((void *)sf->kva, PAGE_SIZE);
1802 sbunlock(&so->so_snd);
1803 goto done;
1804 }
1805 m->m_ext.ext_free = sf_buf_free;
1806 m->m_ext.ext_ref = sf_buf_ref;
1807 m->m_ext.ext_buf = (void *)sf->kva;
1808 m->m_ext.ext_size = PAGE_SIZE;
1809 m->m_data = (char *) sf->kva + pgoff;
1810 m->m_flags |= M_EXT;
1811 m->m_pkthdr.len = m->m_len = xfsize;
1812 /*
1813 * Add the buffer to the socket buffer chain.
1814 */
1815 s = splnet();
1816 retry_space:
1817 /*
1818 * Make sure that the socket is still able to take more data.
1819 * CANTSENDMORE being true usually means that the connection
1820 * was closed. so_error is true when an error was sensed after
1821 * a previous send.
1822 * The state is checked after the page mapping and buffer
1823 * allocation above since those operations may block and make
1824 * any socket checks stale. From this point forward, nothing
1825 * blocks before the pru_send (or more accurately, any blocking
1826 * results in a loop back to here to re-check).
1827 */
1828 if ((so->so_state & SS_CANTSENDMORE) || so->so_error) {
1829 if (so->so_state & SS_CANTSENDMORE) {
1830 error = EPIPE;
1831 } else {
1832 error = so->so_error;
1833 so->so_error = 0;
1834 }
1835 m_freem(m);
1836 sbunlock(&so->so_snd);
1837 splx(s);
1838 goto done;
1839 }
1840 /*
1841 * Wait for socket space to become available. We do this just
1842 * after checking the connection state above in order to avoid
1843 * a race condition with sbwait().
1844 */
1845 if (sbspace(&so->so_snd) < so->so_snd.sb_lowat) {
1846 if (so->so_state & SS_NBIO) {
1847 m_freem(m);
1848 sbunlock(&so->so_snd);
1849 splx(s);
1850 error = EAGAIN;
1851 goto done;
1852 }
1853 error = sbwait(&so->so_snd);
1854 /*
1855 * An error from sbwait usually indicates that we've
1856 * been interrupted by a signal. If we've sent anything
1857 * then return bytes sent, otherwise return the error.
1858 */
1859 if (error) {
1860 m_freem(m);
1861 sbunlock(&so->so_snd);
1862 splx(s);
1863 goto done;
1864 }
1865 goto retry_space;
1866 }
1867 error = (*so->so_proto->pr_usrreqs->pru_send)(so, 0, m, 0, 0, p);
1868 splx(s);
1869 if (error) {
1870 sbunlock(&so->so_snd);
1871 goto done;
1872 }
1873 }
1874 sbunlock(&so->so_snd);
1875
1876 /*
1877 * Send trailers. Wimp out and use writev(2).
1878 */
1879 if (uap->hdtr != NULL && hdtr.trailers != NULL) {
1880 nuap.fd = uap->s;
1881 nuap.iovp = hdtr.trailers;
1882 nuap.iovcnt = hdtr.trl_cnt;
1883 error = writev(p, &nuap);
1884 if (error)
1885 goto done;
1886 if (compat)
1887 sbytes += p->p_retval[0];
1888 else
1889 hdtr_size += p->p_retval[0];
1890 }
1891
1892 done:
1893 if (uap->sbytes != NULL) {
1894 if (compat == 0)
1895 sbytes += hdtr_size;
1896 copyout(&sbytes, uap->sbytes, sizeof(off_t));
1897 }
1898 if (vp)
1899 vrele(vp);
1900 if (fp)
1901 fdrop(fp, p);
1902 /*
1903 * sendfile cannot be restarted.
1904 */
1905 if (error == ERESTART)
1906 error = EINTR;
1907 return (error);
1908 }
Cache object: 6a7e1eeb4fc4f0e591698f78d0b45f24
|