1 /* $NetBSD: sys_generic.c,v 1.97.2.1 2007/09/11 09:58:24 xtraeme Exp $ */
2
3 /*
4 * Copyright (c) 1982, 1986, 1989, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Redistribution and use in source and binary forms, with or without
13 * modification, are permitted provided that the following conditions
14 * are met:
15 * 1. Redistributions of source code must retain the above copyright
16 * notice, this list of conditions and the following disclaimer.
17 * 2. Redistributions in binary form must reproduce the above copyright
18 * notice, this list of conditions and the following disclaimer in the
19 * documentation and/or other materials provided with the distribution.
20 * 3. Neither the name of the University nor the names of its contributors
21 * may be used to endorse or promote products derived from this software
22 * without specific prior written permission.
23 *
24 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
25 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
28 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
29 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
30 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
31 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
32 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
33 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
34 * SUCH DAMAGE.
35 *
36 * @(#)sys_generic.c 8.9 (Berkeley) 2/14/95
37 */
38
39 #include <sys/cdefs.h>
40 __KERNEL_RCSID(0, "$NetBSD: sys_generic.c,v 1.97.2.1 2007/09/11 09:58:24 xtraeme Exp $");
41
42 #include "opt_ktrace.h"
43
44 #include <sys/param.h>
45 #include <sys/systm.h>
46 #include <sys/filedesc.h>
47 #include <sys/ioctl.h>
48 #include <sys/file.h>
49 #include <sys/proc.h>
50 #include <sys/socketvar.h>
51 #include <sys/signalvar.h>
52 #include <sys/uio.h>
53 #include <sys/kernel.h>
54 #include <sys/stat.h>
55 #include <sys/malloc.h>
56 #include <sys/poll.h>
57 #ifdef KTRACE
58 #include <sys/ktrace.h>
59 #endif
60
61 #include <sys/mount.h>
62 #include <sys/sa.h>
63 #include <sys/syscallargs.h>
64
65 #include <uvm/uvm_extern.h>
66
67 int selscan(struct lwp *, fd_mask *, fd_mask *, int, register_t *);
68 int pollscan(struct lwp *, struct pollfd *, int, register_t *);
69
70
71 /*
72 * Read system call.
73 */
74 /* ARGSUSED */
75 int
76 sys_read(struct lwp *l, void *v, register_t *retval)
77 {
78 struct sys_read_args /* {
79 syscallarg(int) fd;
80 syscallarg(void *) buf;
81 syscallarg(size_t) nbyte;
82 } */ *uap = v;
83 int fd;
84 struct file *fp;
85 struct proc *p;
86 struct filedesc *fdp;
87
88 fd = SCARG(uap, fd);
89 p = l->l_proc;
90 fdp = p->p_fd;
91
92 if ((fp = fd_getfile(fdp, fd)) == NULL)
93 return (EBADF);
94
95 if ((fp->f_flag & FREAD) == 0) {
96 simple_unlock(&fp->f_slock);
97 return (EBADF);
98 }
99
100 FILE_USE(fp);
101
102 /* dofileread() will unuse the descriptor for us */
103 return (dofileread(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
104 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
105 }
106
107 int
108 dofileread(struct lwp *l, int fd, struct file *fp, void *buf, size_t nbyte,
109 off_t *offset, int flags, register_t *retval)
110 {
111 struct iovec aiov;
112 struct uio auio;
113 struct proc *p;
114 struct vmspace *vm;
115 size_t cnt;
116 int error;
117 #ifdef KTRACE
118 struct iovec ktriov = { .iov_base = NULL, };
119 #endif
120 p = l->l_proc;
121
122 error = proc_vmspace_getref(p, &vm);
123 if (error) {
124 FILE_UNUSE(fp, l);
125 return error;
126 }
127
128 aiov.iov_base = (caddr_t)buf;
129 aiov.iov_len = nbyte;
130 auio.uio_iov = &aiov;
131 auio.uio_iovcnt = 1;
132 auio.uio_resid = nbyte;
133 auio.uio_rw = UIO_READ;
134 auio.uio_vmspace = vm;
135
136 /*
137 * Reads return ssize_t because -1 is returned on error. Therefore
138 * we must restrict the length to SSIZE_MAX to avoid garbage return
139 * values.
140 */
141 if (auio.uio_resid > SSIZE_MAX) {
142 error = EINVAL;
143 goto out;
144 }
145
146 #ifdef KTRACE
147 /*
148 * if tracing, save a copy of iovec
149 */
150 if (KTRPOINT(p, KTR_GENIO))
151 ktriov = aiov;
152 #endif
153 cnt = auio.uio_resid;
154 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
155 if (error)
156 if (auio.uio_resid != cnt && (error == ERESTART ||
157 error == EINTR || error == EWOULDBLOCK))
158 error = 0;
159 cnt -= auio.uio_resid;
160 #ifdef KTRACE
161 if (KTRPOINT(p, KTR_GENIO) && error == 0)
162 ktrgenio(l, fd, UIO_READ, &ktriov, cnt, error);
163 #endif
164 *retval = cnt;
165 out:
166 FILE_UNUSE(fp, l);
167 uvmspace_free(vm);
168 return (error);
169 }
170
171 /*
172 * Scatter read system call.
173 */
174 int
175 sys_readv(struct lwp *l, void *v, register_t *retval)
176 {
177 struct sys_readv_args /* {
178 syscallarg(int) fd;
179 syscallarg(const struct iovec *) iovp;
180 syscallarg(int) iovcnt;
181 } */ *uap = v;
182 struct filedesc *fdp;
183 struct file *fp;
184 struct proc *p;
185 int fd;
186
187 fd = SCARG(uap, fd);
188 p = l->l_proc;
189 fdp = p->p_fd;
190
191 if ((fp = fd_getfile(fdp, fd)) == NULL)
192 return (EBADF);
193
194 if ((fp->f_flag & FREAD) == 0) {
195 simple_unlock(&fp->f_slock);
196 return (EBADF);
197 }
198
199 FILE_USE(fp);
200
201 /* dofilereadv() will unuse the descriptor for us */
202 return (dofilereadv(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
203 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
204 }
205
206 int
207 dofilereadv(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
208 int iovcnt, off_t *offset, int flags, register_t *retval)
209 {
210 struct proc *p;
211 struct uio auio;
212 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
213 struct vmspace *vm;
214 int i, error;
215 size_t cnt;
216 u_int iovlen;
217 #ifdef KTRACE
218 struct iovec *ktriov;
219 #endif
220
221 p = l->l_proc;
222 error = proc_vmspace_getref(p, &vm);
223 if (error) {
224 FILE_UNUSE(fp, l);
225 return error;
226 }
227
228 #ifdef KTRACE
229 ktriov = NULL;
230 #endif
231 /* note: can't use iovlen until iovcnt is validated */
232 iovlen = iovcnt * sizeof(struct iovec);
233 if ((u_int)iovcnt > UIO_SMALLIOV) {
234 if ((u_int)iovcnt > IOV_MAX) {
235 error = EINVAL;
236 goto out;
237 }
238 iov = malloc(iovlen, M_IOV, M_WAITOK);
239 needfree = iov;
240 } else if ((u_int)iovcnt > 0) {
241 iov = aiov;
242 needfree = NULL;
243 } else {
244 error = EINVAL;
245 goto out;
246 }
247
248 auio.uio_iov = iov;
249 auio.uio_iovcnt = iovcnt;
250 auio.uio_rw = UIO_READ;
251 auio.uio_vmspace = vm;
252 error = copyin(iovp, iov, iovlen);
253 if (error)
254 goto done;
255 auio.uio_resid = 0;
256 for (i = 0; i < iovcnt; i++) {
257 auio.uio_resid += iov->iov_len;
258 /*
259 * Reads return ssize_t because -1 is returned on error.
260 * Therefore we must restrict the length to SSIZE_MAX to
261 * avoid garbage return values.
262 */
263 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
264 error = EINVAL;
265 goto done;
266 }
267 iov++;
268 }
269 #ifdef KTRACE
270 /*
271 * if tracing, save a copy of iovec
272 */
273 if (KTRPOINT(p, KTR_GENIO)) {
274 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
275 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
276 }
277 #endif
278 cnt = auio.uio_resid;
279 error = (*fp->f_ops->fo_read)(fp, offset, &auio, fp->f_cred, flags);
280 if (error)
281 if (auio.uio_resid != cnt && (error == ERESTART ||
282 error == EINTR || error == EWOULDBLOCK))
283 error = 0;
284 cnt -= auio.uio_resid;
285 #ifdef KTRACE
286 if (ktriov != NULL) {
287 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
288 ktrgenio(l, fd, UIO_READ, ktriov, cnt, error);
289 free(ktriov, M_TEMP);
290 }
291 #endif
292 *retval = cnt;
293 done:
294 if (needfree)
295 free(needfree, M_IOV);
296 out:
297 FILE_UNUSE(fp, l);
298 uvmspace_free(vm);
299 return (error);
300 }
301
302 /*
303 * Write system call
304 */
305 int
306 sys_write(struct lwp *l, void *v, register_t *retval)
307 {
308 struct sys_write_args /* {
309 syscallarg(int) fd;
310 syscallarg(const void *) buf;
311 syscallarg(size_t) nbyte;
312 } */ *uap = v;
313 int fd;
314 struct file *fp;
315 struct proc *p;
316 struct filedesc *fdp;
317
318 fd = SCARG(uap, fd);
319 p = l->l_proc;
320 fdp = p->p_fd;
321
322 if ((fp = fd_getfile(fdp, fd)) == NULL)
323 return (EBADF);
324
325 if ((fp->f_flag & FWRITE) == 0) {
326 simple_unlock(&fp->f_slock);
327 return (EBADF);
328 }
329
330 FILE_USE(fp);
331
332 /* dofilewrite() will unuse the descriptor for us */
333 return (dofilewrite(l, fd, fp, SCARG(uap, buf), SCARG(uap, nbyte),
334 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
335 }
336
337 int
338 dofilewrite(struct lwp *l, int fd, struct file *fp, const void *buf,
339 size_t nbyte, off_t *offset, int flags, register_t *retval)
340 {
341 struct iovec aiov;
342 struct uio auio;
343 struct proc *p;
344 struct vmspace *vm;
345 size_t cnt;
346 int error;
347 #ifdef KTRACE
348 struct iovec ktriov = { .iov_base = NULL, };
349 #endif
350
351 p = l->l_proc;
352 error = proc_vmspace_getref(p, &vm);
353 if (error) {
354 FILE_UNUSE(fp, l);
355 return error;
356 }
357 aiov.iov_base = __UNCONST(buf); /* XXXUNCONST kills const */
358 aiov.iov_len = nbyte;
359 auio.uio_iov = &aiov;
360 auio.uio_iovcnt = 1;
361 auio.uio_resid = nbyte;
362 auio.uio_rw = UIO_WRITE;
363 auio.uio_vmspace = vm;
364
365 /*
366 * Writes return ssize_t because -1 is returned on error. Therefore
367 * we must restrict the length to SSIZE_MAX to avoid garbage return
368 * values.
369 */
370 if (auio.uio_resid > SSIZE_MAX) {
371 error = EINVAL;
372 goto out;
373 }
374
375 #ifdef KTRACE
376 /*
377 * if tracing, save a copy of iovec
378 */
379 if (KTRPOINT(p, KTR_GENIO))
380 ktriov = aiov;
381 #endif
382 cnt = auio.uio_resid;
383 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
384 if (error) {
385 if (auio.uio_resid != cnt && (error == ERESTART ||
386 error == EINTR || error == EWOULDBLOCK))
387 error = 0;
388 if (error == EPIPE)
389 psignal(p, SIGPIPE);
390 }
391 cnt -= auio.uio_resid;
392 #ifdef KTRACE
393 if (KTRPOINT(p, KTR_GENIO) && error == 0)
394 ktrgenio(l, fd, UIO_WRITE, &ktriov, cnt, error);
395 #endif
396 *retval = cnt;
397 out:
398 FILE_UNUSE(fp, l);
399 uvmspace_free(vm);
400 return (error);
401 }
402
403 /*
404 * Gather write system call
405 */
406 int
407 sys_writev(struct lwp *l, void *v, register_t *retval)
408 {
409 struct sys_writev_args /* {
410 syscallarg(int) fd;
411 syscallarg(const struct iovec *) iovp;
412 syscallarg(int) iovcnt;
413 } */ *uap = v;
414 int fd;
415 struct file *fp;
416 struct proc *p;
417 struct filedesc *fdp;
418
419 fd = SCARG(uap, fd);
420 p = l->l_proc;
421 fdp = p->p_fd;
422
423 if ((fp = fd_getfile(fdp, fd)) == NULL)
424 return (EBADF);
425
426 if ((fp->f_flag & FWRITE) == 0) {
427 simple_unlock(&fp->f_slock);
428 return (EBADF);
429 }
430
431 FILE_USE(fp);
432
433 /* dofilewritev() will unuse the descriptor for us */
434 return (dofilewritev(l, fd, fp, SCARG(uap, iovp), SCARG(uap, iovcnt),
435 &fp->f_offset, FOF_UPDATE_OFFSET, retval));
436 }
437
438 int
439 dofilewritev(struct lwp *l, int fd, struct file *fp, const struct iovec *iovp,
440 int iovcnt, off_t *offset, int flags, register_t *retval)
441 {
442 struct proc *p;
443 struct uio auio;
444 struct iovec *iov, *needfree, aiov[UIO_SMALLIOV];
445 struct vmspace *vm;
446 int i, error;
447 size_t cnt;
448 u_int iovlen;
449 #ifdef KTRACE
450 struct iovec *ktriov;
451 #endif
452
453 p = l->l_proc;
454 error = proc_vmspace_getref(p, &vm);
455 if (error) {
456 FILE_UNUSE(fp, l);
457 return error;
458 }
459 #ifdef KTRACE
460 ktriov = NULL;
461 #endif
462 /* note: can't use iovlen until iovcnt is validated */
463 iovlen = iovcnt * sizeof(struct iovec);
464 if ((u_int)iovcnt > UIO_SMALLIOV) {
465 if ((u_int)iovcnt > IOV_MAX) {
466 error = EINVAL;
467 goto out;
468 }
469 iov = malloc(iovlen, M_IOV, M_WAITOK);
470 needfree = iov;
471 } else if ((u_int)iovcnt > 0) {
472 iov = aiov;
473 needfree = NULL;
474 } else {
475 error = EINVAL;
476 goto out;
477 }
478
479 auio.uio_iov = iov;
480 auio.uio_iovcnt = iovcnt;
481 auio.uio_rw = UIO_WRITE;
482 auio.uio_vmspace = vm;
483 error = copyin(iovp, iov, iovlen);
484 if (error)
485 goto done;
486 auio.uio_resid = 0;
487 for (i = 0; i < iovcnt; i++) {
488 auio.uio_resid += iov->iov_len;
489 /*
490 * Writes return ssize_t because -1 is returned on error.
491 * Therefore we must restrict the length to SSIZE_MAX to
492 * avoid garbage return values.
493 */
494 if (iov->iov_len > SSIZE_MAX || auio.uio_resid > SSIZE_MAX) {
495 error = EINVAL;
496 goto done;
497 }
498 iov++;
499 }
500 #ifdef KTRACE
501 /*
502 * if tracing, save a copy of iovec
503 */
504 if (KTRPOINT(p, KTR_GENIO)) {
505 ktriov = malloc(iovlen, M_TEMP, M_WAITOK);
506 memcpy((caddr_t)ktriov, (caddr_t)auio.uio_iov, iovlen);
507 }
508 #endif
509 cnt = auio.uio_resid;
510 error = (*fp->f_ops->fo_write)(fp, offset, &auio, fp->f_cred, flags);
511 if (error) {
512 if (auio.uio_resid != cnt && (error == ERESTART ||
513 error == EINTR || error == EWOULDBLOCK))
514 error = 0;
515 if (error == EPIPE)
516 psignal(p, SIGPIPE);
517 }
518 cnt -= auio.uio_resid;
519 #ifdef KTRACE
520 if (ktriov != NULL) {
521 if (KTRPOINT(p, KTR_GENIO) && (error == 0))
522 ktrgenio(l, fd, UIO_WRITE, ktriov, cnt, error);
523 free(ktriov, M_TEMP);
524 }
525 #endif
526 *retval = cnt;
527 done:
528 if (needfree)
529 free(needfree, M_IOV);
530 out:
531 FILE_UNUSE(fp, l);
532 uvmspace_free(vm);
533 return (error);
534 }
535
536 /*
537 * Ioctl system call
538 */
539 /* ARGSUSED */
540 int
541 sys_ioctl(struct lwp *l, void *v, register_t *retval)
542 {
543 struct sys_ioctl_args /* {
544 syscallarg(int) fd;
545 syscallarg(u_long) com;
546 syscallarg(caddr_t) data;
547 } */ *uap = v;
548 struct file *fp;
549 struct proc *p;
550 struct filedesc *fdp;
551 u_long com;
552 int error;
553 u_int size;
554 caddr_t data, memp;
555 #define STK_PARAMS 128
556 u_long stkbuf[STK_PARAMS/sizeof(u_long)];
557
558 error = 0;
559 p = l->l_proc;
560 fdp = p->p_fd;
561
562 if ((fp = fd_getfile(fdp, SCARG(uap, fd))) == NULL)
563 return (EBADF);
564
565 FILE_USE(fp);
566
567 if ((fp->f_flag & (FREAD | FWRITE)) == 0) {
568 error = EBADF;
569 com = 0;
570 goto out;
571 }
572
573 switch (com = SCARG(uap, com)) {
574 case FIONCLEX:
575 fdp->fd_ofileflags[SCARG(uap, fd)] &= ~UF_EXCLOSE;
576 goto out;
577
578 case FIOCLEX:
579 fdp->fd_ofileflags[SCARG(uap, fd)] |= UF_EXCLOSE;
580 goto out;
581 }
582
583 /*
584 * Interpret high order word to find amount of data to be
585 * copied to/from the user's address space.
586 */
587 size = IOCPARM_LEN(com);
588 if (size > IOCPARM_MAX) {
589 error = ENOTTY;
590 goto out;
591 }
592 memp = NULL;
593 if (size > sizeof(stkbuf)) {
594 memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK);
595 data = memp;
596 } else
597 data = (caddr_t)stkbuf;
598 if (com&IOC_IN) {
599 if (size) {
600 error = copyin(SCARG(uap, data), data, size);
601 if (error) {
602 if (memp)
603 free(memp, M_IOCTLOPS);
604 goto out;
605 }
606 #ifdef KTRACE
607 if (KTRPOINT(p, KTR_GENIO)) {
608 struct iovec iov;
609 iov.iov_base = SCARG(uap, data);
610 iov.iov_len = size;
611 ktrgenio(l, SCARG(uap, fd), UIO_WRITE, &iov,
612 size, 0);
613 }
614 #endif
615 } else
616 *(caddr_t *)data = SCARG(uap, data);
617 } else if ((com&IOC_OUT) && size)
618 /*
619 * Zero the buffer so the user always
620 * gets back something deterministic.
621 */
622 memset(data, 0, size);
623 else if (com&IOC_VOID)
624 *(caddr_t *)data = SCARG(uap, data);
625
626 switch (com) {
627
628 case FIONBIO:
629 if (*(int *)data != 0)
630 fp->f_flag |= FNONBLOCK;
631 else
632 fp->f_flag &= ~FNONBLOCK;
633 error = (*fp->f_ops->fo_ioctl)(fp, FIONBIO, data, l);
634 break;
635
636 case FIOASYNC:
637 if (*(int *)data != 0)
638 fp->f_flag |= FASYNC;
639 else
640 fp->f_flag &= ~FASYNC;
641 error = (*fp->f_ops->fo_ioctl)(fp, FIOASYNC, data, l);
642 break;
643
644 default:
645 error = (*fp->f_ops->fo_ioctl)(fp, com, data, l);
646 /*
647 * Copy any data to user, size was
648 * already set and checked above.
649 */
650 if (error == 0 && (com&IOC_OUT) && size) {
651 error = copyout(data, SCARG(uap, data), size);
652 #ifdef KTRACE
653 if (KTRPOINT(p, KTR_GENIO)) {
654 struct iovec iov;
655 iov.iov_base = SCARG(uap, data);
656 iov.iov_len = size;
657 ktrgenio(l, SCARG(uap, fd), UIO_READ, &iov,
658 size, error);
659 }
660 #endif
661 }
662 break;
663 }
664 if (memp)
665 free(memp, M_IOCTLOPS);
666 out:
667 FILE_UNUSE(fp, l);
668 switch (error) {
669 case -1:
670 printf("sys_ioctl: _IO%s%s('%c', %lu, %lu) returned -1: "
671 "pid=%d comm=%s\n",
672 (com & IOC_IN) ? "W" : "", (com & IOC_OUT) ? "R" : "",
673 (char)IOCGROUP(com), (com & 0xff), IOCPARM_LEN(com),
674 p->p_pid, p->p_comm);
675 /* FALLTHROUGH */
676 case EPASSTHROUGH:
677 error = ENOTTY;
678 /* FALLTHROUGH */
679 default:
680 return (error);
681 }
682 }
683
684 int selwait, nselcoll;
685
686 /*
687 * Select system call.
688 */
689 int
690 sys_pselect(struct lwp *l, void *v, register_t *retval)
691 {
692 struct sys_pselect_args /* {
693 syscallarg(int) nd;
694 syscallarg(fd_set *) in;
695 syscallarg(fd_set *) ou;
696 syscallarg(fd_set *) ex;
697 syscallarg(const struct timespec *) ts;
698 syscallarg(sigset_t *) mask;
699 } */ * const uap = v;
700 struct timespec ats;
701 struct timeval atv, *tv = NULL;
702 sigset_t amask, *mask = NULL;
703 int error;
704
705 if (SCARG(uap, ts)) {
706 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
707 if (error)
708 return error;
709 atv.tv_sec = ats.tv_sec;
710 atv.tv_usec = ats.tv_nsec / 1000;
711 tv = &atv;
712 }
713 if (SCARG(uap, mask) != NULL) {
714 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
715 if (error)
716 return error;
717 mask = &amask;
718 }
719
720 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
721 SCARG(uap, ou), SCARG(uap, ex), tv, mask);
722 }
723
724 int
725 inittimeleft(struct timeval *tv, struct timeval *sleeptv)
726 {
727 if (itimerfix(tv))
728 return -1;
729 getmicrouptime(sleeptv);
730 return 0;
731 }
732
733 int
734 gettimeleft(struct timeval *tv, struct timeval *sleeptv)
735 {
736 /*
737 * We have to recalculate the timeout on every retry.
738 */
739 struct timeval slepttv;
740 /*
741 * reduce tv by elapsed time
742 * based on monotonic time scale
743 */
744 getmicrouptime(&slepttv);
745 timeradd(tv, sleeptv, tv);
746 timersub(tv, &slepttv, tv);
747 *sleeptv = slepttv;
748 return tvtohz(tv);
749 }
750
751 int
752 sys_select(struct lwp *l, void *v, register_t *retval)
753 {
754 struct sys_select_args /* {
755 syscallarg(int) nd;
756 syscallarg(fd_set *) in;
757 syscallarg(fd_set *) ou;
758 syscallarg(fd_set *) ex;
759 syscallarg(struct timeval *) tv;
760 } */ * const uap = v;
761 struct timeval atv, *tv = NULL;
762 int error;
763
764 if (SCARG(uap, tv)) {
765 error = copyin(SCARG(uap, tv), (caddr_t)&atv,
766 sizeof(atv));
767 if (error)
768 return error;
769 tv = &atv;
770 }
771
772 return selcommon(l, retval, SCARG(uap, nd), SCARG(uap, in),
773 SCARG(uap, ou), SCARG(uap, ex), tv, NULL);
774 }
775
776 int
777 selcommon(struct lwp *l, register_t *retval, int nd, fd_set *u_in,
778 fd_set *u_ou, fd_set *u_ex, struct timeval *tv, sigset_t *mask)
779 {
780 char smallbits[howmany(FD_SETSIZE, NFDBITS) *
781 sizeof(fd_mask) * 6];
782 struct proc * const p = l->l_proc;
783 caddr_t bits;
784 int s, ncoll, error, timo;
785 size_t ni;
786 sigset_t oldmask;
787 struct timeval sleeptv;
788
789 error = 0;
790 if (nd < 0)
791 return (EINVAL);
792 if (nd > p->p_fd->fd_nfiles) {
793 /* forgiving; slightly wrong */
794 nd = p->p_fd->fd_nfiles;
795 }
796 ni = howmany(nd, NFDBITS) * sizeof(fd_mask);
797 if (ni * 6 > sizeof(smallbits))
798 bits = malloc(ni * 6, M_TEMP, M_WAITOK);
799 else
800 bits = smallbits;
801
802 #define getbits(name, x) \
803 if (u_ ## name) { \
804 error = copyin(u_ ## name, bits + ni * x, ni); \
805 if (error) \
806 goto done; \
807 } else \
808 memset(bits + ni * x, 0, ni);
809 getbits(in, 0);
810 getbits(ou, 1);
811 getbits(ex, 2);
812 #undef getbits
813
814 timo = 0;
815 if (tv && inittimeleft(tv, &sleeptv) == -1) {
816 error = EINVAL;
817 goto done;
818 }
819
820 if (mask)
821 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask);
822
823 retry:
824 ncoll = nselcoll;
825 l->l_flag |= L_SELECT;
826 error = selscan(l, (fd_mask *)(bits + ni * 0),
827 (fd_mask *)(bits + ni * 3), nd, retval);
828 if (error || *retval)
829 goto donemask;
830 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
831 goto donemask;
832 s = splsched();
833 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
834 splx(s);
835 goto retry;
836 }
837 l->l_flag &= ~L_SELECT;
838 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "select", timo);
839 splx(s);
840 if (error == 0)
841 goto retry;
842 donemask:
843 if (mask)
844 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL);
845 l->l_flag &= ~L_SELECT;
846 done:
847 /* select is not restarted after signals... */
848 if (error == ERESTART)
849 error = EINTR;
850 if (error == EWOULDBLOCK)
851 error = 0;
852 if (error == 0) {
853
854 #define putbits(name, x) \
855 if (u_ ## name) { \
856 error = copyout(bits + ni * x, u_ ## name, ni); \
857 if (error) \
858 goto out; \
859 }
860 putbits(in, 3);
861 putbits(ou, 4);
862 putbits(ex, 5);
863 #undef putbits
864 }
865 out:
866 if (ni * 6 > sizeof(smallbits))
867 free(bits, M_TEMP);
868 return (error);
869 }
870
871 int
872 selscan(struct lwp *l, fd_mask *ibitp, fd_mask *obitp, int nfd,
873 register_t *retval)
874 {
875 static const int flag[3] = { POLLRDNORM | POLLHUP | POLLERR,
876 POLLWRNORM | POLLHUP | POLLERR,
877 POLLRDBAND };
878 struct proc *p = l->l_proc;
879 struct filedesc *fdp;
880 int msk, i, j, fd, n;
881 fd_mask ibits, obits;
882 struct file *fp;
883
884 fdp = p->p_fd;
885 n = 0;
886 for (msk = 0; msk < 3; msk++) {
887 for (i = 0; i < nfd; i += NFDBITS) {
888 ibits = *ibitp++;
889 obits = 0;
890 while ((j = ffs(ibits)) && (fd = i + --j) < nfd) {
891 ibits &= ~(1 << j);
892 if ((fp = fd_getfile(fdp, fd)) == NULL)
893 return (EBADF);
894 FILE_USE(fp);
895 if ((*fp->f_ops->fo_poll)(fp, flag[msk], l)) {
896 obits |= (1 << j);
897 n++;
898 }
899 FILE_UNUSE(fp, l);
900 }
901 *obitp++ = obits;
902 }
903 }
904 *retval = n;
905 return (0);
906 }
907
908 /*
909 * Poll system call.
910 */
911 int
912 sys_poll(struct lwp *l, void *v, register_t *retval)
913 {
914 struct sys_poll_args /* {
915 syscallarg(struct pollfd *) fds;
916 syscallarg(u_int) nfds;
917 syscallarg(int) timeout;
918 } */ * const uap = v;
919 struct timeval atv, *tv = NULL;
920
921 if (SCARG(uap, timeout) != INFTIM) {
922 atv.tv_sec = SCARG(uap, timeout) / 1000;
923 atv.tv_usec = (SCARG(uap, timeout) % 1000) * 1000;
924 tv = &atv;
925 }
926
927 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
928 tv, NULL);
929 }
930
931 /*
932 * Poll system call.
933 */
934 int
935 sys_pollts(struct lwp *l, void *v, register_t *retval)
936 {
937 struct sys_pollts_args /* {
938 syscallarg(struct pollfd *) fds;
939 syscallarg(u_int) nfds;
940 syscallarg(const struct timespec *) ts;
941 syscallarg(const sigset_t *) mask;
942 } */ * const uap = v;
943 struct timespec ats;
944 struct timeval atv, *tv = NULL;
945 sigset_t amask, *mask = NULL;
946 int error;
947
948 if (SCARG(uap, ts)) {
949 error = copyin(SCARG(uap, ts), &ats, sizeof(ats));
950 if (error)
951 return error;
952 atv.tv_sec = ats.tv_sec;
953 atv.tv_usec = ats.tv_nsec / 1000;
954 tv = &atv;
955 }
956 if (SCARG(uap, mask)) {
957 error = copyin(SCARG(uap, mask), &amask, sizeof(amask));
958 if (error)
959 return error;
960 mask = &amask;
961 }
962
963 return pollcommon(l, retval, SCARG(uap, fds), SCARG(uap, nfds),
964 tv, mask);
965 }
966
967 int
968 pollcommon(struct lwp *l, register_t *retval,
969 struct pollfd *u_fds, u_int nfds,
970 struct timeval *tv, sigset_t *mask)
971 {
972 char smallbits[32 * sizeof(struct pollfd)];
973 struct proc * const p = l->l_proc;
974 caddr_t bits;
975 sigset_t oldmask;
976 int s, ncoll, error, timo;
977 size_t ni;
978 struct timeval sleeptv;
979
980 if (nfds > p->p_fd->fd_nfiles) {
981 /* forgiving; slightly wrong */
982 nfds = p->p_fd->fd_nfiles;
983 }
984 ni = nfds * sizeof(struct pollfd);
985 if (ni > sizeof(smallbits))
986 bits = malloc(ni, M_TEMP, M_WAITOK);
987 else
988 bits = smallbits;
989
990 error = copyin(u_fds, bits, ni);
991 if (error)
992 goto done;
993
994 timo = 0;
995 if (tv && inittimeleft(tv, &sleeptv) == -1) {
996 error = EINVAL;
997 goto done;
998 }
999
1000 if (mask != NULL)
1001 (void)sigprocmask1(p, SIG_SETMASK, mask, &oldmask);
1002
1003 retry:
1004 ncoll = nselcoll;
1005 l->l_flag |= L_SELECT;
1006 error = pollscan(l, (struct pollfd *)bits, nfds, retval);
1007 if (error || *retval)
1008 goto donemask;
1009 if (tv && (timo = gettimeleft(tv, &sleeptv)) <= 0)
1010 goto donemask;
1011 s = splsched();
1012 if ((l->l_flag & L_SELECT) == 0 || nselcoll != ncoll) {
1013 splx(s);
1014 goto retry;
1015 }
1016 l->l_flag &= ~L_SELECT;
1017 error = tsleep((caddr_t)&selwait, PSOCK | PCATCH, "poll", timo);
1018 splx(s);
1019 if (error == 0)
1020 goto retry;
1021 donemask:
1022 if (mask != NULL)
1023 (void)sigprocmask1(p, SIG_SETMASK, &oldmask, NULL);
1024 l->l_flag &= ~L_SELECT;
1025 done:
1026 /* poll is not restarted after signals... */
1027 if (error == ERESTART)
1028 error = EINTR;
1029 if (error == EWOULDBLOCK)
1030 error = 0;
1031 if (error == 0) {
1032 error = copyout(bits, u_fds, ni);
1033 if (error)
1034 goto out;
1035 }
1036 out:
1037 if (ni > sizeof(smallbits))
1038 free(bits, M_TEMP);
1039 return (error);
1040 }
1041
1042 int
1043 pollscan(struct lwp *l, struct pollfd *fds, int nfd, register_t *retval)
1044 {
1045 struct proc *p = l->l_proc;
1046 struct filedesc *fdp;
1047 int i, n;
1048 struct file *fp;
1049
1050 fdp = p->p_fd;
1051 n = 0;
1052 for (i = 0; i < nfd; i++, fds++) {
1053 if (fds->fd >= fdp->fd_nfiles) {
1054 fds->revents = POLLNVAL;
1055 n++;
1056 } else if (fds->fd < 0) {
1057 fds->revents = 0;
1058 } else {
1059 if ((fp = fd_getfile(fdp, fds->fd)) == NULL) {
1060 fds->revents = POLLNVAL;
1061 n++;
1062 } else {
1063 FILE_USE(fp);
1064 fds->revents = (*fp->f_ops->fo_poll)(fp,
1065 fds->events | POLLERR | POLLHUP, l);
1066 if (fds->revents != 0)
1067 n++;
1068 FILE_UNUSE(fp, l);
1069 }
1070 }
1071 }
1072 *retval = n;
1073 return (0);
1074 }
1075
1076 /*ARGSUSED*/
1077 int
1078 seltrue(dev_t dev, int events, struct lwp *l)
1079 {
1080
1081 return (events & (POLLIN | POLLOUT | POLLRDNORM | POLLWRNORM));
1082 }
1083
1084 /*
1085 * Record a select request.
1086 */
1087 void
1088 selrecord(struct lwp *selector, struct selinfo *sip)
1089 {
1090 struct lwp *l;
1091 struct proc *p;
1092 pid_t mypid;
1093
1094 mypid = selector->l_proc->p_pid;
1095 if (sip->sel_pid == mypid)
1096 return;
1097 if (sip->sel_pid && (p = pfind(sip->sel_pid))) {
1098 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1099 if (l->l_wchan == (caddr_t)&selwait) {
1100 sip->sel_collision = 1;
1101 return;
1102 }
1103 }
1104 }
1105
1106 sip->sel_pid = mypid;
1107 }
1108
1109 /*
1110 * Do a wakeup when a selectable event occurs.
1111 */
1112 void
1113 selwakeup(sip)
1114 struct selinfo *sip;
1115 {
1116 struct lwp *l;
1117 struct proc *p;
1118 int s;
1119
1120 if (sip->sel_pid == 0)
1121 return;
1122 if (sip->sel_collision) {
1123 sip->sel_pid = 0;
1124 nselcoll++;
1125 sip->sel_collision = 0;
1126 wakeup((caddr_t)&selwait);
1127 return;
1128 }
1129 p = pfind(sip->sel_pid);
1130 sip->sel_pid = 0;
1131 if (p != NULL) {
1132 LIST_FOREACH(l, &p->p_lwps, l_sibling) {
1133 SCHED_LOCK(s);
1134 if (l->l_wchan == (caddr_t)&selwait) {
1135 if (l->l_stat == LSSLEEP)
1136 setrunnable(l);
1137 else
1138 unsleep(l);
1139 } else if (l->l_flag & L_SELECT)
1140 l->l_flag &= ~L_SELECT;
1141 SCHED_UNLOCK(s);
1142 }
1143 }
1144 }
Cache object: a7699b55cc447f9be5c2246be9250d02
|