The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_uio.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*-
    2  * Copyright (c) 1982, 1986, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 4. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)kern_subr.c 8.3 (Berkeley) 1/21/94
   35  */
   36 
   37 #include <sys/cdefs.h>
   38 __FBSDID("$FreeBSD: releng/9.0/sys/kern/subr_uio.c 223889 2011-07-09 15:21:10Z kib $");
   39 
   40 #include "opt_zero.h"
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/kernel.h>
   45 #include <sys/limits.h>
   46 #include <sys/lock.h>
   47 #include <sys/mman.h>
   48 #include <sys/mutex.h>
   49 #include <sys/proc.h>
   50 #include <sys/resourcevar.h>
   51 #include <sys/sched.h>
   52 #include <sys/sysctl.h>
   53 #include <sys/vnode.h>
   54 
   55 #include <vm/vm.h>
   56 #include <vm/vm_extern.h>
   57 #include <vm/vm_page.h>
   58 #include <vm/vm_map.h>
   59 #ifdef ZERO_COPY_SOCKETS
   60 #include <vm/vm_param.h>
   61 #include <vm/vm_object.h>
   62 #endif
   63 
   64 SYSCTL_INT(_kern, KERN_IOV_MAX, iov_max, CTLFLAG_RD, NULL, UIO_MAXIOV,
   65         "Maximum number of elements in an I/O vector; sysconf(_SC_IOV_MAX)");
   66 
   67 static int uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault);
   68 
   69 #ifdef ZERO_COPY_SOCKETS
   70 /* Declared in uipc_socket.c */
   71 extern int so_zero_copy_receive;
   72 
   73 /*
   74  * Identify the physical page mapped at the given kernel virtual
   75  * address.  Insert this physical page into the given address space at
   76  * the given virtual address, replacing the physical page, if any,
   77  * that already exists there.
   78  */
   79 static int
   80 vm_pgmoveco(vm_map_t mapa, vm_offset_t kaddr, vm_offset_t uaddr)
   81 {
   82         vm_map_t map = mapa;
   83         vm_page_t kern_pg, user_pg;
   84         vm_object_t uobject;
   85         vm_map_entry_t entry;
   86         vm_pindex_t upindex;
   87         vm_prot_t prot;
   88         boolean_t wired;
   89 
   90         KASSERT((uaddr & PAGE_MASK) == 0,
   91             ("vm_pgmoveco: uaddr is not page aligned"));
   92 
   93         /*
   94          * Herein the physical page is validated and dirtied.  It is
   95          * unwired in sf_buf_mext().
   96          */
   97         kern_pg = PHYS_TO_VM_PAGE(vtophys(kaddr));
   98         kern_pg->valid = VM_PAGE_BITS_ALL;
   99         KASSERT(kern_pg->queue == PQ_NONE && kern_pg->wire_count == 1,
  100             ("vm_pgmoveco: kern_pg is not correctly wired"));
  101 
  102         if ((vm_map_lookup(&map, uaddr,
  103                            VM_PROT_WRITE, &entry, &uobject,
  104                            &upindex, &prot, &wired)) != KERN_SUCCESS) {
  105                 return(EFAULT);
  106         }
  107         VM_OBJECT_LOCK(uobject);
  108 retry:
  109         if ((user_pg = vm_page_lookup(uobject, upindex)) != NULL) {
  110                 if (vm_page_sleep_if_busy(user_pg, TRUE, "vm_pgmoveco"))
  111                         goto retry;
  112                 vm_page_lock(user_pg);
  113                 pmap_remove_all(user_pg);
  114                 vm_page_free(user_pg);
  115                 vm_page_unlock(user_pg);
  116         } else {
  117                 /*
  118                  * Even if a physical page does not exist in the
  119                  * object chain's first object, a physical page from a
  120                  * backing object may be mapped read only.
  121                  */
  122                 if (uobject->backing_object != NULL)
  123                         pmap_remove(map->pmap, uaddr, uaddr + PAGE_SIZE);
  124         }
  125         vm_page_insert(kern_pg, uobject, upindex);
  126         vm_page_dirty(kern_pg);
  127         VM_OBJECT_UNLOCK(uobject);
  128         vm_map_lookup_done(map, entry);
  129         return(KERN_SUCCESS);
  130 }
  131 #endif /* ZERO_COPY_SOCKETS */
  132 
  133 int
  134 copyin_nofault(const void *udaddr, void *kaddr, size_t len)
  135 {
  136         int error, save;
  137 
  138         save = vm_fault_disable_pagefaults();
  139         error = copyin(udaddr, kaddr, len);
  140         vm_fault_enable_pagefaults(save);
  141         return (error);
  142 }
  143 
  144 int
  145 copyout_nofault(const void *kaddr, void *udaddr, size_t len)
  146 {
  147         int error, save;
  148 
  149         save = vm_fault_disable_pagefaults();
  150         error = copyout(kaddr, udaddr, len);
  151         vm_fault_enable_pagefaults(save);
  152         return (error);
  153 }
  154 
  155 int
  156 uiomove(void *cp, int n, struct uio *uio)
  157 {
  158 
  159         return (uiomove_faultflag(cp, n, uio, 0));
  160 }
  161 
  162 int
  163 uiomove_nofault(void *cp, int n, struct uio *uio)
  164 {
  165 
  166         return (uiomove_faultflag(cp, n, uio, 1));
  167 }
  168 
  169 static int
  170 uiomove_faultflag(void *cp, int n, struct uio *uio, int nofault)
  171 {
  172         struct thread *td;
  173         struct iovec *iov;
  174         u_int cnt;
  175         int error, newflags, save;
  176 
  177         td = curthread;
  178         error = 0;
  179 
  180         KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
  181             ("uiomove: mode"));
  182         KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == td,
  183             ("uiomove proc"));
  184         if (!nofault)
  185                 WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
  186                     "Calling uiomove()");
  187 
  188         /* XXX does it make a sense to set TDP_DEADLKTREAT for UIO_SYSSPACE ? */
  189         newflags = TDP_DEADLKTREAT;
  190         if (uio->uio_segflg == UIO_USERSPACE && nofault)
  191                 newflags |= TDP_NOFAULTING;
  192         save = curthread_pflags_set(newflags);
  193 
  194         while (n > 0 && uio->uio_resid) {
  195                 iov = uio->uio_iov;
  196                 cnt = iov->iov_len;
  197                 if (cnt == 0) {
  198                         uio->uio_iov++;
  199                         uio->uio_iovcnt--;
  200                         continue;
  201                 }
  202                 if (cnt > n)
  203                         cnt = n;
  204 
  205                 switch (uio->uio_segflg) {
  206 
  207                 case UIO_USERSPACE:
  208                         maybe_yield();
  209                         if (uio->uio_rw == UIO_READ)
  210                                 error = copyout(cp, iov->iov_base, cnt);
  211                         else
  212                                 error = copyin(iov->iov_base, cp, cnt);
  213                         if (error)
  214                                 goto out;
  215                         break;
  216 
  217                 case UIO_SYSSPACE:
  218                         if (uio->uio_rw == UIO_READ)
  219                                 bcopy(cp, iov->iov_base, cnt);
  220                         else
  221                                 bcopy(iov->iov_base, cp, cnt);
  222                         break;
  223                 case UIO_NOCOPY:
  224                         break;
  225                 }
  226                 iov->iov_base = (char *)iov->iov_base + cnt;
  227                 iov->iov_len -= cnt;
  228                 uio->uio_resid -= cnt;
  229                 uio->uio_offset += cnt;
  230                 cp = (char *)cp + cnt;
  231                 n -= cnt;
  232         }
  233 out:
  234         curthread_pflags_restore(save);
  235         return (error);
  236 }
  237 
  238 /*
  239  * Wrapper for uiomove() that validates the arguments against a known-good
  240  * kernel buffer.  Currently, uiomove accepts a signed (n) argument, which
  241  * is almost definitely a bad thing, so we catch that here as well.  We
  242  * return a runtime failure, but it might be desirable to generate a runtime
  243  * assertion failure instead.
  244  */
  245 int
  246 uiomove_frombuf(void *buf, int buflen, struct uio *uio)
  247 {
  248         unsigned int offset, n;
  249 
  250         if (uio->uio_offset < 0 || uio->uio_resid < 0 ||
  251             (offset = uio->uio_offset) != uio->uio_offset)
  252                 return (EINVAL);
  253         if (buflen <= 0 || offset >= buflen)
  254                 return (0);
  255         if ((n = buflen - offset) > INT_MAX)
  256                 return (EINVAL);
  257         return (uiomove((char *)buf + offset, n, uio));
  258 }
  259 
  260 #ifdef ZERO_COPY_SOCKETS
  261 /*
  262  * Experimental support for zero-copy I/O
  263  */
  264 static int
  265 userspaceco(void *cp, u_int cnt, struct uio *uio, int disposable)
  266 {
  267         struct iovec *iov;
  268         int error;
  269 
  270         iov = uio->uio_iov;
  271         if (uio->uio_rw == UIO_READ) {
  272                 if ((so_zero_copy_receive != 0)
  273                  && ((cnt & PAGE_MASK) == 0)
  274                  && ((((intptr_t) iov->iov_base) & PAGE_MASK) == 0)
  275                  && ((uio->uio_offset & PAGE_MASK) == 0)
  276                  && ((((intptr_t) cp) & PAGE_MASK) == 0)
  277                  && (disposable != 0)) {
  278                         /* SOCKET: use page-trading */
  279                         /*
  280                          * We only want to call vm_pgmoveco() on
  281                          * disposeable pages, since it gives the
  282                          * kernel page to the userland process.
  283                          */
  284                         error = vm_pgmoveco(&curproc->p_vmspace->vm_map,
  285                             (vm_offset_t)cp, (vm_offset_t)iov->iov_base);
  286 
  287                         /*
  288                          * If we get an error back, attempt
  289                          * to use copyout() instead.  The
  290                          * disposable page should be freed
  291                          * automatically if we weren't able to move
  292                          * it into userland.
  293                          */
  294                         if (error != 0)
  295                                 error = copyout(cp, iov->iov_base, cnt);
  296                 } else {
  297                         error = copyout(cp, iov->iov_base, cnt);
  298                 }
  299         } else {
  300                 error = copyin(iov->iov_base, cp, cnt);
  301         }
  302         return (error);
  303 }
  304 
  305 int
  306 uiomoveco(void *cp, int n, struct uio *uio, int disposable)
  307 {
  308         struct iovec *iov;
  309         u_int cnt;
  310         int error;
  311 
  312         KASSERT(uio->uio_rw == UIO_READ || uio->uio_rw == UIO_WRITE,
  313             ("uiomoveco: mode"));
  314         KASSERT(uio->uio_segflg != UIO_USERSPACE || uio->uio_td == curthread,
  315             ("uiomoveco proc"));
  316 
  317         while (n > 0 && uio->uio_resid) {
  318                 iov = uio->uio_iov;
  319                 cnt = iov->iov_len;
  320                 if (cnt == 0) {
  321                         uio->uio_iov++;
  322                         uio->uio_iovcnt--;
  323                         continue;
  324                 }
  325                 if (cnt > n)
  326                         cnt = n;
  327 
  328                 switch (uio->uio_segflg) {
  329 
  330                 case UIO_USERSPACE:
  331                         maybe_yield();
  332                         error = userspaceco(cp, cnt, uio, disposable);
  333                         if (error)
  334                                 return (error);
  335                         break;
  336 
  337                 case UIO_SYSSPACE:
  338                         if (uio->uio_rw == UIO_READ)
  339                                 bcopy(cp, iov->iov_base, cnt);
  340                         else
  341                                 bcopy(iov->iov_base, cp, cnt);
  342                         break;
  343                 case UIO_NOCOPY:
  344                         break;
  345                 }
  346                 iov->iov_base = (char *)iov->iov_base + cnt;
  347                 iov->iov_len -= cnt;
  348                 uio->uio_resid -= cnt;
  349                 uio->uio_offset += cnt;
  350                 cp = (char *)cp + cnt;
  351                 n -= cnt;
  352         }
  353         return (0);
  354 }
  355 #endif /* ZERO_COPY_SOCKETS */
  356 
  357 /*
  358  * Give next character to user as result of read.
  359  */
  360 int
  361 ureadc(int c, struct uio *uio)
  362 {
  363         struct iovec *iov;
  364         char *iov_base;
  365 
  366         WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL,
  367             "Calling ureadc()");
  368 
  369 again:
  370         if (uio->uio_iovcnt == 0 || uio->uio_resid == 0)
  371                 panic("ureadc");
  372         iov = uio->uio_iov;
  373         if (iov->iov_len == 0) {
  374                 uio->uio_iovcnt--;
  375                 uio->uio_iov++;
  376                 goto again;
  377         }
  378         switch (uio->uio_segflg) {
  379 
  380         case UIO_USERSPACE:
  381                 if (subyte(iov->iov_base, c) < 0)
  382                         return (EFAULT);
  383                 break;
  384 
  385         case UIO_SYSSPACE:
  386                 iov_base = iov->iov_base;
  387                 *iov_base = c;
  388                 iov->iov_base = iov_base;
  389                 break;
  390 
  391         case UIO_NOCOPY:
  392                 break;
  393         }
  394         iov->iov_base = (char *)iov->iov_base + 1;
  395         iov->iov_len--;
  396         uio->uio_resid--;
  397         uio->uio_offset++;
  398         return (0);
  399 }
  400 
  401 int
  402 copyinfrom(const void * __restrict src, void * __restrict dst, size_t len,
  403     int seg)
  404 {
  405         int error = 0;
  406 
  407         switch (seg) {
  408         case UIO_USERSPACE:
  409                 error = copyin(src, dst, len);
  410                 break;
  411         case UIO_SYSSPACE:
  412                 bcopy(src, dst, len);
  413                 break;
  414         default:
  415                 panic("copyinfrom: bad seg %d\n", seg);
  416         }
  417         return (error);
  418 }
  419 
  420 int
  421 copyinstrfrom(const void * __restrict src, void * __restrict dst, size_t len,
  422     size_t * __restrict copied, int seg)
  423 {
  424         int error = 0;
  425 
  426         switch (seg) {
  427         case UIO_USERSPACE:
  428                 error = copyinstr(src, dst, len, copied);
  429                 break;
  430         case UIO_SYSSPACE:
  431                 error = copystr(src, dst, len, copied);
  432                 break;
  433         default:
  434                 panic("copyinstrfrom: bad seg %d\n", seg);
  435         }
  436         return (error);
  437 }
  438 
  439 int
  440 copyiniov(struct iovec *iovp, u_int iovcnt, struct iovec **iov, int error)
  441 {
  442         u_int iovlen;
  443 
  444         *iov = NULL;
  445         if (iovcnt > UIO_MAXIOV)
  446                 return (error);
  447         iovlen = iovcnt * sizeof (struct iovec);
  448         *iov = malloc(iovlen, M_IOV, M_WAITOK);
  449         error = copyin(iovp, *iov, iovlen);
  450         if (error) {
  451                 free(*iov, M_IOV);
  452                 *iov = NULL;
  453         }
  454         return (error);
  455 }
  456 
  457 int
  458 copyinuio(struct iovec *iovp, u_int iovcnt, struct uio **uiop)
  459 {
  460         struct iovec *iov;
  461         struct uio *uio;
  462         u_int iovlen;
  463         int error, i;
  464 
  465         *uiop = NULL;
  466         if (iovcnt > UIO_MAXIOV)
  467                 return (EINVAL);
  468         iovlen = iovcnt * sizeof (struct iovec);
  469         uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
  470         iov = (struct iovec *)(uio + 1);
  471         error = copyin(iovp, iov, iovlen);
  472         if (error) {
  473                 free(uio, M_IOV);
  474                 return (error);
  475         }
  476         uio->uio_iov = iov;
  477         uio->uio_iovcnt = iovcnt;
  478         uio->uio_segflg = UIO_USERSPACE;
  479         uio->uio_offset = -1;
  480         uio->uio_resid = 0;
  481         for (i = 0; i < iovcnt; i++) {
  482                 if (iov->iov_len > INT_MAX - uio->uio_resid) {
  483                         free(uio, M_IOV);
  484                         return (EINVAL);
  485                 }
  486                 uio->uio_resid += iov->iov_len;
  487                 iov++;
  488         }
  489         *uiop = uio;
  490         return (0);
  491 }
  492 
  493 struct uio *
  494 cloneuio(struct uio *uiop)
  495 {
  496         struct uio *uio;
  497         int iovlen;
  498 
  499         iovlen = uiop->uio_iovcnt * sizeof (struct iovec);
  500         uio = malloc(iovlen + sizeof *uio, M_IOV, M_WAITOK);
  501         *uio = *uiop;
  502         uio->uio_iov = (struct iovec *)(uio + 1);
  503         bcopy(uiop->uio_iov, uio->uio_iov, iovlen);
  504         return (uio);
  505 }
  506 
  507 /*
  508  * Map some anonymous memory in user space of size sz, rounded up to the page
  509  * boundary.
  510  */
  511 int
  512 copyout_map(struct thread *td, vm_offset_t *addr, size_t sz)
  513 {
  514         struct vmspace *vms;
  515         int error;
  516         vm_size_t size;
  517 
  518         vms = td->td_proc->p_vmspace;
  519 
  520         /*
  521          * Map somewhere after heap in process memory.
  522          */
  523         PROC_LOCK(td->td_proc);
  524         *addr = round_page((vm_offset_t)vms->vm_daddr +
  525             lim_max(td->td_proc, RLIMIT_DATA));
  526         PROC_UNLOCK(td->td_proc);
  527 
  528         /* round size up to page boundry */
  529         size = (vm_size_t)round_page(sz);
  530 
  531         error = vm_mmap(&vms->vm_map, addr, size, PROT_READ | PROT_WRITE,
  532             VM_PROT_ALL, MAP_PRIVATE | MAP_ANON, OBJT_DEFAULT, NULL, 0);
  533 
  534         return (error);
  535 }
  536 
  537 /*
  538  * Unmap memory in user space.
  539  */
  540 int
  541 copyout_unmap(struct thread *td, vm_offset_t addr, size_t sz)
  542 {
  543         vm_map_t map;
  544         vm_size_t size;
  545 
  546         if (sz == 0)
  547                 return (0);
  548 
  549         map = &td->td_proc->p_vmspace->vm_map;
  550         size = (vm_size_t)round_page(sz);
  551 
  552         if (vm_map_remove(map, addr, addr + size) != KERN_SUCCESS)
  553                 return (EINVAL);
  554 
  555         return (0);
  556 }

Cache object: de60b077f9a4185ce399d0ccca4b57a7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.