FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_uio.c
1 /*-
2 * SPDX-License-Identifier: BSD-3-Clause
3 *
4 * Copyright (c) 1982, 1986, 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 * (c) UNIX System Laboratories, Inc.
7 * All or some portions of this file are derived from material licensed
8 * to the University of California by American Telephone and Telegraph
9 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
10 * the permission of UNIX System Laboratories, Inc.
11 *
12 * Copyright (c) 2014 The FreeBSD Foundation
13 *
14 * Portions of this software were developed by Konstantin Belousov
15 * under sponsorship from the FreeBSD Foundation.
16 *
17 * Redistribution and use in source and binary forms, with or without
18 * modification, are permitted provided that the following conditions
19 * are met:
20 * 1. Redistributions of source code must retain the above copyright
21 * notice, this list of conditions and the following disclaimer.
22 * 2. Redistributions in binary form must reproduce the above copyright
23 * notice, this list of conditions and the following disclaimer in the
24 * documentation and/or other materials provided with the distribution.
25 * 3. Neither the name of the University nor the names of its contributors
26 * may be used to endorse or promote products derived from this software
27 * without specific prior written permission.
28 *
29 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
30 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
31 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
32 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
33 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
34 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
35 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
36 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
37 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
38 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
39 * SUCH DAMAGE.
40 *
41 * @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
42 */
43
44 #include <sys/cdefs.h>
45 __FBSDID("$FreeBSD$");
46
47 #include <sys/param.h>
48 #include <sys/systm.h>
49 #include <sys/kernel.h>
50 #include <sys/limits.h>
51 #include <sys/lock.h>
52 #include <sys/mman.h>
53 #include <sys/proc.h>
54 #include <sys/resourcevar.h>
55 #include <sys/rwlock.h>
56 #include <sys/sched.h>
57 #include <sys/sysctl.h>
58 #include <sys/vnode.h>
59
60 #include <vm/vm.h>
61 #include <vm/vm_param.h>
62 #include <vm/vm_extern.h>
63 #include <vm/vm_page.h>
64 #include <vm/vm_pageout.h>
65 #include <vm/vm_map.h>
66
67 #include <machine/bus.h>
68
69 SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, SYSCTL_NULL_INT_PTR, UIO_MAXIOV,
70 "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
71
72 static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault);
73
74 int
75 copyin_nofault(const void *udaddr, void *kaddr, size_t len)
76 {
77 int error, save;
78
79 save = vm_fault_disable_pagefaults();
80 error = copyin(udaddr, kaddr, len);
81 vm_fault_enable_pagefaults(save);
82 return (error);
83 }
84
85 int
86 copyout_nofault(const void *kaddr, void *udaddr, size_t len)
87 {
88 int error, save;
89
90 save = vm_fault_disable_pagefaults();
91 error = copyout(kaddr, udaddr, len);
92 vm_fault_enable_pagefaults(save);
93 return (error);
94 }
95
96 #define PHYS_PAGE_COUNT(len) (howmany(len, PAGE_SIZE) + 1)
97
98 int
99 physcopyin(void *src, vm_paddr_t dst, size_t len)
100 {
101 vm_page_t m[PHYS_PAGE_COUNT(len)];
102 struct iovec iov[1];
103 struct uio uio;
104 int i;
105
106 iov[0].iov_base = src;
107 iov[0].iov_len = len;
108 uio.uio_iov = iov;
109 uio.uio_iovcnt = 1;
110 uio.uio_offset = 0;
111 uio.uio_resid = len;
112 uio.uio_segflg = UIO_SYSSPACE;
113 uio.uio_rw = UIO_WRITE;
114 for (i = 0; i < PHYS_PAGE_COUNT(len); i++, dst += PAGE_SIZE)
115 m[i] = PHYS_TO_VM_PAGE(dst);
116 return (uiomove_fromphys(m, dst & PAGE_MASK, len, &uio));
117 }
118
119 int
120 physcopyout(vm_paddr_t src, void *dst, size_t len)
121 {
122 vm_page_t m[PHYS_PAGE_COUNT(len)];
123 struct iovec iov[1];
124 struct uio uio;
125 int i;
126
127 iov[0].iov_base = dst;
128 iov[0].iov_len = len;
129 uio.uio_iov = iov;
130 uio.uio_iovcnt = 1;
131 uio.uio_offset = 0;
132 uio.uio_resid = len;
133 uio.uio_segflg = UIO_SYSSPACE;
134 uio.uio_rw = UIO_READ;
135 for (i = 0; i < PHYS_PAGE_COUNT(len); i++, src += PAGE_SIZE)
136 m[i] = PHYS_TO_VM_PAGE(src);
137 return (uiomove_fromphys(m, src & PAGE_MASK, len, &uio));
138 }
139
140 #undef PHYS_PAGE_COUNT
141
142 int
143 physcopyin_vlist(bus_dma_segment_t *src, off_t offset, vm_paddr_t dst,
144 size_t len)
145 {
146 size_t seg_len;
147 int error;
148
149 error = 0;
150 while (offset >= src->ds_len) {
151 offset -= src->ds_len;
152 src++;
153 }
154
155 while (len > 0 && error == 0) {
156 seg_len = MIN(src->ds_len - offset, len);
157 error = physcopyin((void *)(uintptr_t)(src->ds_addr + offset),
158 dst, seg_len);
159 offset = 0;
160 src++;
161 len -= seg_len;
162 dst += seg_len;
163 }
164
165 return (error);
166 }
167
168 int
169 physcopyout_vlist(vm_paddr_t src, bus_dma_segment_t *dst, off_t offset,
170 size_t len)
171 {
172 size_t seg_len;
173 int error;
174
175 error = 0;
176 while (offset >= dst->ds_len) {
177 offset -= dst->ds_len;
178 dst++;
179 }
180
181 while (len > 0 && error == 0) {
182 seg_len = MIN(dst->ds_len - offset, len);
183 error = physcopyout(src, (void *)(uintptr_t)(dst->ds_addr +
184 offset), seg_len);
185 offset = 0;
186 dst++;
187 len -= seg_len;
188 src += seg_len;
189 }
190
191 return (error);
192 }
193
194 int
195 uiomove(void *cp, int n, struct uio *uio)
196 {
197
198 return (uiomove_faultflag(cp, n, uio, 0));
199 }
200
201 int
202 uiomove_nofault(void *cp, int n, struct uio *uio)
203 {
204
205 return (uiomove_faultflag(cp, n, uio, 1));
206 }
207
208 static int
209 uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault)
210 {
211 struct iovec *iov;
212 size_t cnt;
213 int error, newflags, save;
214
215 save = error = 0;
216
217 KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
218 ("uiomove: mode"));
219 KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
220 ("uiomove proc"));
221
222 if (uio->uio_segflg == UIO_USERSPACE) {
223 newflags = TDP_DEADLKTREAT;
224 if (nofault) {
225 /*
226 * Fail if a non-spurious page fault occurs.
227 */
228 newflags |= TDP_NOFAULTING | TDP_RESETSPUR;
229 } else {
230 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
231 "Calling uiomove()");
232 }
233 save = curthread_pflags_set(newflags);
234 } else {
235 KASSERT(nofault == 0, ("uiomove: nofault"));
236 }
237
238 while (n > 0 && uio->uio_resid) {
239 iov = uio->uio_iov;
240 cnt = iov->iov_len;
241 if (cnt == 0) {
242 uio->uio_iov++;
243 uio->uio_iovcnt--;
244 continue;
245 }
246 if (cnt > n)
247 cnt = n;
248
249 switch (uio->uio_segflg) {
250
251 case UIO_USERSPACE:
252 maybe_yield();
253 if (uio->uio_rw == UIO_READ)
254 error = copyout(cp, iov->iov_base, cnt);
255 else
256 error = copyin(iov->iov_base, cp, cnt);
257 if (error)
258 goto out;
259 break;
260
261 case UIO_SYSSPACE:
262 if (uio->uio_rw == UIO_READ)
263 bcopy(cp, iov->iov_base, cnt);
264 else
265 bcopy(iov->iov_base, cp, cnt);
266 break;
267 case UIO_NOCOPY:
268 break;
269 }
270 iov->iov_base = (char *)iov->iov_base + cnt;
271 iov->iov_len -= cnt;
272 uio->uio_resid -= cnt;
273 uio->uio_offset += cnt;
274 cp = (char *)cp + cnt;
275 n -= cnt;
276 }
277 out:
278 if (save)
279 curthread_pflags_restore(save);
280 return (error);
281 }
282
283 /*
284 * Wrapper for uiomove() that validates the arguments against a known-good
285 * kernel buffer. Currently, uiomove accepts a signed (n) argument, which
286 * is almost definitely a bad thing, so we catch that here as well. We
287 * return a runtime failure, but it might be desirable to generate a runtime
288 * assertion failure instead.
289 */
290 int
291 uiomove_frombuf(void *buf, int buflen, struct uio *uio)
292 {
293 size_t offset, n;
294
295 if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
296 (offset = uio->uio_offset) != uio->uio_offset)
297 return (EINVAL);
298 if (buflen <= 0 || offset >= buflen)
299 return (0);
300 if ((n = buflen - offset) > IOSIZE_MAX)
301 return (EINVAL);
302 return (uiomove((char *)buf + offset, n, uio));
303 }
304
305 /*
306 * Give next character to user as result of read.
307 */
308 int
309 ureadc(int c, struct uio *uio)
310 {
311 struct iovec *iov;
312 char *iov_base;
313
314 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
315 "Calling ureadc()");
316
317 again:
318 if (uio->uio_iovcnt == 0 || uio->uio_resid == 0)
319 panic("ureadc");
320 iov = uio->uio_iov;
321 if (iov->iov_len == 0) {
322 uio->uio_iovcnt--;
323 uio->uio_iov++;
324 goto again;
325 }
326 switch (uio->uio_segflg) {
327
328 case UIO_USERSPACE:
329 if (subyte(iov->iov_base, c) < 0)
330 return (EFAULT);
331 break;
332
333 case UIO_SYSSPACE:
334 iov_base = iov->iov_base;
335 *iov_base = c;
336 break;
337
338 case UIO_NOCOPY:
339 break;
340 }
341 iov->iov_base = (char *)iov->iov_base + 1;
342 iov->iov_len--;
343 uio->uio_resid--;
344 uio->uio_offset++;
345 return (0);
346 }
347
348 int
349 copyinfrom(const void * __restrict src, void * __restrict dst, size_t len,
350 int seg)
351 {
352 int error = 0;
353
354 switch (seg) {
355 case UIO_USERSPACE:
356 error = copyin(src, dst, len);
357 break;
358 case UIO_SYSSPACE:
359 bcopy(src, dst, len);
360 break;
361 default:
362 panic("copyinfrom: bad seg %d\n", seg);
363 }
364 return (error);
365 }
366
367 int
368 copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len,
369 size_t * __restrict copied, int seg)
370 {
371 int error = 0;
372
373 switch (seg) {
374 case UIO_USERSPACE:
375 error = copyinstr(src, dst, len, copied);
376 break;
377 case UIO_SYSSPACE:
378 error = copystr(src, dst, len, copied);
379 break;
380 default:
381 panic("copyinstrfrom: bad seg %d\n", seg);
382 }
383 return (error);
384 }
385
386 int
387 copyiniov(const struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error)
388 {
389 u_int iovlen;
390
391 *iov = NULL;
392 if (iovcnt > UIO_MAXIOV)
393 return (error);
394 iovlen = iovcnt * sizeof (struct iovec);
395 *iov = malloc(iovlen, M_IOV, M_WAITOK);
396 error = copyin(iovp, *iov, iovlen);
397 if (error) {
398 free(*iov, M_IOV);
399 *iov = NULL;
400 }
401 return (error);
402 }
403
404 int
405 copyinuio(const struct iovec *iovp, u_int iovcnt, struct uio **uiop)
406 {
407 struct iovec *iov;
408 struct uio *uio;
409 u_int iovlen;
410 int error, i;
411
412 *uiop = NULL;
413 if (iovcnt > UIO_MAXIOV)
414 return (EINVAL);
415 iovlen = iovcnt * sizeof (struct iovec);
416 uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
417 iov = (struct iovec *)(uio + 1);
418 error = copyin(iovp, iov, iovlen);
419 if (error) {
420 free(uio, M_IOV);
421 return (error);
422 }
423 uio->uio_iov = iov;
424 uio->uio_iovcnt = iovcnt;
425 uio->uio_segflg = UIO_USERSPACE;
426 uio->uio_offset = -1;
427 uio->uio_resid = 0;
428 for (i = 0; i < iovcnt; i++) {
429 if (iov->iov_len > IOSIZE_MAX - uio->uio_resid) {
430 free(uio, M_IOV);
431 return (EINVAL);
432 }
433 uio->uio_resid += iov->iov_len;
434 iov++;
435 }
436 *uiop = uio;
437 return (0);
438 }
439
440 struct uio *
441 cloneuio(struct uio *uiop)
442 {
443 struct uio *uio;
444 int iovlen;
445
446 iovlen = uiop->uio_iovcnt * sizeof (struct iovec);
447 uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
448 *uio = *uiop;
449 uio->uio_iov = (struct iovec *)(uio + 1);
450 bcopy(uiop->uio_iov, uio->uio_iov, iovlen);
451 return (uio);
452 }
453
454 /*
455 * Map some anonymous memory in user space of size sz, rounded up to the page
456 * boundary.
457 */
458 int
459 copyout_map(struct thread *td, vm_offset_t *addr, size_t sz)
460 {
461 struct vmspace *vms;
462 int error;
463 vm_size_t size;
464
465 vms = td->td_proc->p_vmspace;
466
467 /*
468 * Map somewhere after heap in process memory.
469 */
470 *addr = round_page((vm_offset_t)vms->vm_daddr +
471 lim_max(td, RLIMIT_DATA));
472
473 /* round size up to page boundary */
474 size = (vm_size_t)round_page(sz);
475 if (size == 0)
476 return (EINVAL);
477 error = vm_mmap_object(&vms->vm_map, addr, size, VM_PROT_READ |
478 VM_PROT_WRITE, VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, NULL, 0,
479 FALSE, td);
480 return (error);
481 }
482
483 /*
484 * Unmap memory in user space.
485 */
486 int
487 copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz)
488 {
489 vm_map_t map;
490 vm_size_t size;
491
492 if (sz == 0)
493 return (0);
494
495 map = &td->td_proc->p_vmspace->vm_map;
496 size = (vm_size_t)round_page(sz);
497
498 if (vm_map_remove(map, addr, addr + size) != KERN_SUCCESS)
499 return (EINVAL);
500
501 return (0);
502 }
503
504 #ifdef NO_FUEWORD
505 /*
506 * XXXKIB The temporal implementation of fue*() functions which do not
507 * handle usermode -1 properly, mixing it with the fault code. Keep
508 * this until MD code is written. Currently sparc64 does not have a
509 * proper implementation.
510 */
511
512 int
513 fueword(volatile const void *base, long *val)
514 {
515 long res;
516
517 res = fuword(base);
518 if (res == -1)
519 return (-1);
520 *val = res;
521 return (0);
522 }
523
524 int
525 fueword32(volatile const void *base, int32_t *val)
526 {
527 int32_t res;
528
529 res = fuword32(base);
530 if (res == -1)
531 return (-1);
532 *val = res;
533 return (0);
534 }
535
536 #ifdef _LP64
537 int
538 fueword64(volatile const void *base, int64_t *val)
539 {
540 int64_t res;
541
542 res = fuword64(base);
543 if (res == -1)
544 return (-1);
545 *val = res;
546 return (0);
547 }
548 #endif
549
550 int
551 casueword32(volatile uint32_t *base, uint32_t oldval, uint32_t *oldvalp,
552 uint32_t newval)
553 {
554 int32_t ov;
555
556 ov = casuword32(base, oldval, newval);
557 if (ov == -1)
558 return (-1);
559 *oldvalp = ov;
560 return (0);
561 }
562
563 int
564 casueword(volatile u_long *p, u_long oldval, u_long *oldvalp, u_long newval)
565 {
566 u_long ov;
567
568 ov = casuword(p, oldval, newval);
569 if (ov == -1)
570 return (-1);
571 *oldvalp = ov;
572 return (0);
573 }
574 #else /* NO_FUEWORD */
575 int32_t
576 fuword32(volatile const void *addr)
577 {
578 int rv;
579 int32_t val;
580
581 rv = fueword32(addr, &val);
582 return (rv == -1 ? -1 : val);
583 }
584
585 #ifdef _LP64
586 int64_t
587 fuword64(volatile const void *addr)
588 {
589 int rv;
590 int64_t val;
591
592 rv = fueword64(addr, &val);
593 return (rv == -1 ? -1 : val);
594 }
595 #endif /* _LP64 */
596
597 long
598 fuword(volatile const void *addr)
599 {
600 long val;
601 int rv;
602
603 rv = fueword(addr, &val);
604 return (rv == -1 ? -1 : val);
605 }
606
607 uint32_t
608 casuword32(volatile uint32_t *addr, uint32_t old, uint32_t new)
609 {
610 int rv;
611 uint32_t val;
612
613 rv = casueword32(addr, old, &val, new);
614 return (rv == -1 ? -1 : val);
615 }
616
617 u_long
618 casuword(volatile u_long *addr, u_long old, u_long new)
619 {
620 int rv;
621 u_long val;
622
623 rv = casueword(addr, old, &val, new);
624 return (rv == -1 ? -1 : val);
625 }
626
627 #endif /* NO_FUEWORD */
Cache object: a898992783e44507225fd0390bb5b591
|