The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_mmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $OpenBSD: uvm_mmap.c,v 1.177 2023/01/16 07:09:11 guenther Exp $ */
    2 /*      $NetBSD: uvm_mmap.c,v 1.49 2001/02/18 21:19:08 chs Exp $        */
    3 
    4 /*
    5  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    6  * Copyright (c) 1991, 1993 The Regents of the University of California.
    7  * Copyright (c) 1988 University of Utah.
    8  *
    9  * All rights reserved.
   10  *
   11  * This code is derived from software contributed to Berkeley by
   12  * the Systems Programming Group of the University of Utah Computer
   13  * Science Department.
   14  *
   15  * Redistribution and use in source and binary forms, with or without
   16  * modification, are permitted provided that the following conditions
   17  * are met:
   18  * 1. Redistributions of source code must retain the above copyright
   19  *    notice, this list of conditions and the following disclaimer.
   20  * 2. Redistributions in binary form must reproduce the above copyright
   21  *    notice, this list of conditions and the following disclaimer in the
   22  *    documentation and/or other materials provided with the distribution.
   23  * 3. Neither the name of the University nor the names of its contributors
   24  *    may be used to endorse or promote products derived from this software
   25  *    without specific prior written permission.
   26  *
   27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   37  * SUCH DAMAGE.
   38  *
   39  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
   40  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
   41  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
   42  */
   43 
   44 /*
   45  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
   46  * function.
   47  */
   48 #include <sys/param.h>
   49 #include <sys/systm.h>
   50 #include <sys/fcntl.h>
   51 #include <sys/file.h>
   52 #include <sys/filedesc.h>
   53 #include <sys/resourcevar.h>
   54 #include <sys/mman.h>
   55 #include <sys/mount.h>
   56 #include <sys/proc.h>
   57 #include <sys/malloc.h>
   58 #include <sys/vnode.h>
   59 #include <sys/conf.h>
   60 #include <sys/signalvar.h>
   61 #include <sys/syslog.h>
   62 #include <sys/stat.h>
   63 #include <sys/specdev.h>
   64 #include <sys/stdint.h>
   65 #include <sys/pledge.h>
   66 #include <sys/unistd.h>         /* for KBIND* */
   67 #include <sys/user.h>
   68 
   69 #include <machine/exec.h>       /* for __LDPGSZ */
   70 
   71 #include <sys/syscallargs.h>
   72 
   73 #include <uvm/uvm.h>
   74 #include <uvm/uvm_device.h>
   75 #include <uvm/uvm_vnode.h>
   76 
   77 int uvm_mmapanon(vm_map_t, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, int,
   78     vsize_t, struct proc *);
   79 int uvm_mmapfile(vm_map_t, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t, int,
   80     struct vnode *, voff_t, vsize_t, struct proc *);
   81 
   82 
   83 /*
   84  * Page align addr and size, returning EINVAL on wraparound.
   85  */
   86 #define ALIGN_ADDR(addr, size, pageoff) do {                            \
   87         pageoff = (addr & PAGE_MASK);                                   \
   88         if (pageoff != 0) {                                             \
   89                 if (size > SIZE_MAX - pageoff)                          \
   90                         return EINVAL;  /* wraparound */        \
   91                 addr -= pageoff;                                        \
   92                 size += pageoff;                                        \
   93         }                                                               \
   94         if (size != 0) {                                                \
   95                 size = (vsize_t)round_page(size);                       \
   96                 if (size == 0)                                          \
   97                         return EINVAL;  /* wraparound */        \
   98         }                                                               \
   99 } while (0)
  100 
  101 /*
  102  * sys_mquery: provide mapping hints to applications that do fixed mappings
  103  *
  104  * flags: 0 or MAP_FIXED (MAP_FIXED - means that we insist on this addr and
  105  *      don't care about PMAP_PREFER or such)
  106  * addr: hint where we'd like to place the mapping.
  107  * size: size of the mapping
  108  * fd: fd of the file we want to map
  109  * off: offset within the file
  110  */
  111 int
  112 sys_mquery(struct proc *p, void *v, register_t *retval)
  113 {
  114         struct sys_mquery_args /* {
  115                 syscallarg(void *) addr;
  116                 syscallarg(size_t) len;
  117                 syscallarg(int) prot;
  118                 syscallarg(int) flags;
  119                 syscallarg(int) fd;
  120                 syscallarg(off_t) pos;
  121         } */ *uap = v;
  122         struct file *fp;
  123         voff_t uoff;
  124         int error;
  125         vaddr_t vaddr;
  126         int flags = 0;
  127         vsize_t size;
  128         vm_prot_t prot;
  129         int fd;
  130 
  131         vaddr = (vaddr_t) SCARG(uap, addr);
  132         prot = SCARG(uap, prot);
  133         size = (vsize_t) SCARG(uap, len);
  134         fd = SCARG(uap, fd);
  135 
  136         if ((prot & PROT_MASK) != prot)
  137                 return EINVAL;
  138 
  139         if (SCARG(uap, flags) & MAP_FIXED)
  140                 flags |= UVM_FLAG_FIXED;
  141 
  142         if (fd >= 0) {
  143                 if ((error = getvnode(p, fd, &fp)) != 0)
  144                         return error;
  145                 uoff = SCARG(uap, pos);
  146         } else {
  147                 fp = NULL;
  148                 uoff = UVM_UNKNOWN_OFFSET;
  149         }
  150 
  151         if (vaddr == 0)
  152                 vaddr = uvm_map_hint(p->p_vmspace, prot, VM_MIN_ADDRESS,
  153                     VM_MAXUSER_ADDRESS);
  154 
  155         error = uvm_map_mquery(&p->p_vmspace->vm_map, &vaddr, size, uoff,
  156             flags);
  157         if (error == 0)
  158                 *retval = (register_t)(vaddr);
  159 
  160         if (fp != NULL)
  161                 FRELE(fp, p);
  162         return error;
  163 }
  164 
  165 int     uvm_wxabort;
  166 
  167 /*
  168  * W^X violations are only allowed on permitted filesystems.
  169  */
  170 static inline int
  171 uvm_wxcheck(struct proc *p, char *call)
  172 {
  173         struct process *pr = p->p_p;
  174         int wxallowed = (pr->ps_textvp->v_mount &&
  175             (pr->ps_textvp->v_mount->mnt_flag & MNT_WXALLOWED));
  176 
  177         if (wxallowed && (pr->ps_flags & PS_WXNEEDED))
  178                 return 0;
  179 
  180         if (uvm_wxabort) {
  181                 KERNEL_LOCK();
  182                 /* Report W^X failures */
  183                 if (pr->ps_wxcounter++ == 0)
  184                         log(LOG_NOTICE, "%s(%d): %s W^X violation\n",
  185                             pr->ps_comm, pr->ps_pid, call);
  186                 /* Send uncatchable SIGABRT for coredump */
  187                 sigexit(p, SIGABRT);
  188                 KERNEL_UNLOCK();
  189         }
  190 
  191         return ENOTSUP;
  192 }
  193 
  194 /*
  195  * sys_mmap: mmap system call.
  196  *
  197  * => file offset and address may not be page aligned
  198  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
  199  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
  200  *      and the return value is adjusted up by the page offset.
  201  */
  202 int
  203 sys_mmap(struct proc *p, void *v, register_t *retval)
  204 {
  205         struct sys_mmap_args /* {
  206                 syscallarg(void *) addr;
  207                 syscallarg(size_t) len;
  208                 syscallarg(int) prot;
  209                 syscallarg(int) flags;
  210                 syscallarg(int) fd;
  211                 syscallarg(off_t) pos;
  212         } */ *uap = v;
  213         vaddr_t addr;
  214         struct vattr va;
  215         off_t pos;
  216         vsize_t limit, pageoff, size;
  217         vm_prot_t prot, maxprot;
  218         int flags, fd;
  219         vaddr_t vm_min_address = VM_MIN_ADDRESS;
  220         struct filedesc *fdp = p->p_fd;
  221         struct file *fp = NULL;
  222         struct vnode *vp;
  223         int error;
  224 
  225         /* first, extract syscall args from the uap. */
  226         addr = (vaddr_t) SCARG(uap, addr);
  227         size = (vsize_t) SCARG(uap, len);
  228         prot = SCARG(uap, prot);
  229         flags = SCARG(uap, flags);
  230         fd = SCARG(uap, fd);
  231         pos = SCARG(uap, pos);
  232 
  233         /*
  234          * Validate the flags.
  235          */
  236         if ((prot & PROT_MASK) != prot)
  237                 return EINVAL;
  238         if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC) &&
  239             (error = uvm_wxcheck(p, "mmap")))
  240                 return error;
  241 
  242         if ((flags & MAP_FLAGMASK) != flags)
  243                 return EINVAL;
  244         if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
  245                 return EINVAL;
  246         if ((flags & (MAP_FIXED|__MAP_NOREPLACE)) == __MAP_NOREPLACE)
  247                 return EINVAL;
  248         if (flags & MAP_STACK) {
  249                 if ((flags & (MAP_ANON|MAP_PRIVATE)) != (MAP_ANON|MAP_PRIVATE))
  250                         return EINVAL;
  251                 if (flags & ~(MAP_STACK|MAP_FIXED|MAP_ANON|MAP_PRIVATE))
  252                         return EINVAL;
  253                 if (pos != 0)
  254                         return EINVAL;
  255                 if ((prot & (PROT_READ|PROT_WRITE)) != (PROT_READ|PROT_WRITE))
  256                         return EINVAL;
  257         }
  258         if (size == 0)
  259                 return EINVAL;
  260 
  261         error = pledge_protexec(p, prot);
  262         if (error)
  263                 return error;
  264 
  265         /* align file position and save offset.  adjust size. */
  266         ALIGN_ADDR(pos, size, pageoff);
  267 
  268         /* now check (MAP_FIXED) or get (!MAP_FIXED) the "addr" */
  269         if (flags & MAP_FIXED) {
  270                 /* adjust address by the same amount as we did the offset */
  271                 addr -= pageoff;
  272                 if (addr & PAGE_MASK)
  273                         return EINVAL;          /* not page aligned */
  274 
  275                 if (addr > SIZE_MAX - size)
  276                         return EINVAL;          /* no wrapping! */
  277                 if (VM_MAXUSER_ADDRESS > 0 &&
  278                     (addr + size) > VM_MAXUSER_ADDRESS)
  279                         return EINVAL;
  280                 if (vm_min_address > 0 && addr < vm_min_address)
  281                         return EINVAL;
  282         }
  283 
  284         /* check for file mappings (i.e. not anonymous) and verify file. */
  285         if ((flags & MAP_ANON) == 0) {
  286                 KERNEL_LOCK();
  287                 if ((fp = fd_getfile(fdp, fd)) == NULL) {
  288                         error = EBADF;
  289                         goto out;
  290                 }
  291 
  292                 if (fp->f_type != DTYPE_VNODE) {
  293                         error = ENODEV;         /* only mmap vnodes! */
  294                         goto out;
  295                 }
  296                 vp = (struct vnode *)fp->f_data;        /* convert to vnode */
  297 
  298                 if (vp->v_type != VREG && vp->v_type != VCHR &&
  299                     vp->v_type != VBLK) {
  300                         error = ENODEV; /* only REG/CHR/BLK support mmap */
  301                         goto out;
  302                 }
  303 
  304                 if (vp->v_type == VREG && (pos + size) < pos) {
  305                         error = EINVAL;         /* no offset wrapping */
  306                         goto out;
  307                 }
  308 
  309                 /* special case: catch SunOS style /dev/zero */
  310                 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
  311                         flags |= MAP_ANON;
  312                         FRELE(fp, p);
  313                         fp = NULL;
  314                         KERNEL_UNLOCK();
  315                         goto is_anon;
  316                 }
  317 
  318                 /*
  319                  * Old programs may not select a specific sharing type, so
  320                  * default to an appropriate one.
  321                  */
  322                 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
  323 #if defined(DEBUG)
  324                         printf("WARNING: defaulted mmap() share type to"
  325                             " %s (pid %d comm %s)\n",
  326                             vp->v_type == VCHR ? "MAP_SHARED" : "MAP_PRIVATE",
  327                             p->p_p->ps_pid, p->p_p->ps_comm);
  328 #endif
  329                         if (vp->v_type == VCHR)
  330                                 flags |= MAP_SHARED;    /* for a device */
  331                         else
  332                                 flags |= MAP_PRIVATE;   /* for a file */
  333                 }
  334 
  335                 /*
  336                  * MAP_PRIVATE device mappings don't make sense (and aren't
  337                  * supported anyway).  However, some programs rely on this,
  338                  * so just change it to MAP_SHARED.
  339                  */
  340                 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
  341                         flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
  342                 }
  343 
  344                 /* now check protection */
  345                 maxprot = PROT_EXEC;
  346 
  347                 /* check read access */
  348                 if (fp->f_flag & FREAD)
  349                         maxprot |= PROT_READ;
  350                 else if (prot & PROT_READ) {
  351                         error = EACCES;
  352                         goto out;
  353                 }
  354 
  355                 /* check write access, shared case first */
  356                 if (flags & MAP_SHARED) {
  357                         /*
  358                          * if the file is writable, only add PROT_WRITE to
  359                          * maxprot if the file is not immutable, append-only.
  360                          * otherwise, if we have asked for PROT_WRITE, return
  361                          * EPERM.
  362                          */
  363                         if (fp->f_flag & FWRITE) {
  364                                 error = VOP_GETATTR(vp, &va, p->p_ucred, p);
  365                                 if (error)
  366                                         goto out;
  367                                 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
  368                                         maxprot |= PROT_WRITE;
  369                                 else if (prot & PROT_WRITE) {
  370                                         error = EPERM;
  371                                         goto out;
  372                                 }
  373                         } else if (prot & PROT_WRITE) {
  374                                 error = EACCES;
  375                                 goto out;
  376                         }
  377                 } else {
  378                         /* MAP_PRIVATE mappings can always write to */
  379                         maxprot |= PROT_WRITE;
  380                 }
  381                 if ((flags & __MAP_NOFAULT) != 0 ||
  382                     ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
  383                         limit = lim_cur(RLIMIT_DATA);
  384                         if (limit < size ||
  385                             limit - size < ptoa(p->p_vmspace->vm_dused)) {
  386                                 error = ENOMEM;
  387                                 goto out;
  388                         }
  389                 }
  390                 error = uvm_mmapfile(&p->p_vmspace->vm_map, &addr, size, prot,
  391                     maxprot, flags, vp, pos, lim_cur(RLIMIT_MEMLOCK), p);
  392                 FRELE(fp, p);
  393                 KERNEL_UNLOCK();
  394         } else {                /* MAP_ANON case */
  395                 if (fd != -1)
  396                         return EINVAL;
  397 
  398 is_anon:        /* label for SunOS style /dev/zero */
  399 
  400                 /* __MAP_NOFAULT only makes sense with a backing object */
  401                 if ((flags & __MAP_NOFAULT) != 0)
  402                         return EINVAL;
  403 
  404                 if (prot != PROT_NONE || (flags & MAP_SHARED)) {
  405                         limit = lim_cur(RLIMIT_DATA);
  406                         if (limit < size ||
  407                             limit - size < ptoa(p->p_vmspace->vm_dused)) {
  408                                 return ENOMEM;
  409                         }
  410                 }
  411 
  412                 /*
  413                  * We've been treating (MAP_SHARED|MAP_PRIVATE) == 0 as
  414                  * MAP_PRIVATE, so make that clear.
  415                  */
  416                 if ((flags & MAP_SHARED) == 0)
  417                         flags |= MAP_PRIVATE;
  418 
  419                 maxprot = PROT_MASK;
  420                 error = uvm_mmapanon(&p->p_vmspace->vm_map, &addr, size, prot,
  421                     maxprot, flags, lim_cur(RLIMIT_MEMLOCK), p);
  422         }
  423 
  424         if (error == 0)
  425                 /* remember to add offset */
  426                 *retval = (register_t)(addr + pageoff);
  427 
  428         return error;
  429 
  430 out:
  431         KERNEL_UNLOCK();
  432         if (fp)
  433                 FRELE(fp, p);
  434         return error;
  435 }
  436 
  437 #if 1
  438 int
  439 sys_pad_mquery(struct proc *p, void *v, register_t *retval)
  440 {
  441         struct sys_pad_mquery_args *uap = v;
  442         struct sys_mquery_args unpad;
  443 
  444         SCARG(&unpad, addr) = SCARG(uap, addr);
  445         SCARG(&unpad, len) = SCARG(uap, len);
  446         SCARG(&unpad, prot) = SCARG(uap, prot);
  447         SCARG(&unpad, flags) = SCARG(uap, flags);
  448         SCARG(&unpad, fd) = SCARG(uap, fd);
  449         SCARG(&unpad, pos) = SCARG(uap, pos);
  450         return sys_mquery(p, &unpad, retval);
  451 }
  452 
  453 int
  454 sys_pad_mmap(struct proc *p, void *v, register_t *retval)
  455 {
  456         struct sys_pad_mmap_args *uap = v;
  457         struct sys_mmap_args unpad;
  458 
  459         SCARG(&unpad, addr) = SCARG(uap, addr);
  460         SCARG(&unpad, len) = SCARG(uap, len);
  461         SCARG(&unpad, prot) = SCARG(uap, prot);
  462         SCARG(&unpad, flags) = SCARG(uap, flags);
  463         SCARG(&unpad, fd) = SCARG(uap, fd);
  464         SCARG(&unpad, pos) = SCARG(uap, pos);
  465         return sys_mmap(p, &unpad, retval);
  466 }
  467 #endif
  468 
  469 /*
  470  * sys_msync: the msync system call (a front-end for flush)
  471  */
  472 
  473 int
  474 sys_msync(struct proc *p, void *v, register_t *retval)
  475 {
  476         struct sys_msync_args /* {
  477                 syscallarg(void *) addr;
  478                 syscallarg(size_t) len;
  479                 syscallarg(int) flags;
  480         } */ *uap = v;
  481         vaddr_t addr;
  482         vsize_t size, pageoff;
  483         vm_map_t map;
  484         int flags, uvmflags;
  485 
  486         /* extract syscall args from the uap */
  487         addr = (vaddr_t)SCARG(uap, addr);
  488         size = (vsize_t)SCARG(uap, len);
  489         flags = SCARG(uap, flags);
  490 
  491         /* sanity check flags */
  492         if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
  493                         (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
  494                         (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
  495                 return EINVAL;
  496         if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
  497                 flags |= MS_SYNC;
  498 
  499         /* align the address to a page boundary, and adjust the size accordingly */
  500         ALIGN_ADDR(addr, size, pageoff);
  501         if (addr > SIZE_MAX - size)
  502                 return EINVAL;          /* disallow wrap-around. */
  503 
  504         /* get map */
  505         map = &p->p_vmspace->vm_map;
  506 
  507         /* translate MS_ flags into PGO_ flags */
  508         uvmflags = PGO_CLEANIT;
  509         if (flags & MS_INVALIDATE)
  510                 uvmflags |= PGO_FREE;
  511         if (flags & MS_SYNC)
  512                 uvmflags |= PGO_SYNCIO;
  513         else
  514                 uvmflags |= PGO_SYNCIO;  /* XXXCDC: force sync for now! */
  515 
  516         return uvm_map_clean(map, addr, addr+size, uvmflags);
  517 }
  518 
  519 /*
  520  * sys_munmap: unmap a users memory
  521  */
  522 int
  523 sys_munmap(struct proc *p, void *v, register_t *retval)
  524 {
  525         struct sys_munmap_args /* {
  526                 syscallarg(void *) addr;
  527                 syscallarg(size_t) len;
  528         } */ *uap = v;
  529         vaddr_t addr;
  530         vsize_t size, pageoff;
  531         vm_map_t map;
  532         vaddr_t vm_min_address = VM_MIN_ADDRESS;
  533         struct uvm_map_deadq dead_entries;
  534 
  535         /* get syscall args... */
  536         addr = (vaddr_t) SCARG(uap, addr);
  537         size = (vsize_t) SCARG(uap, len);
  538 
  539         /* align address to a page boundary, and adjust size accordingly */
  540         ALIGN_ADDR(addr, size, pageoff);
  541 
  542         /*
  543          * Check for illegal addresses.  Watch out for address wrap...
  544          * Note that VM_*_ADDRESS are not constants due to casts (argh).
  545          */
  546         if (addr > SIZE_MAX - size)
  547                 return EINVAL;
  548         if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
  549                 return EINVAL;
  550         if (vm_min_address > 0 && addr < vm_min_address)
  551                 return EINVAL;
  552         map = &p->p_vmspace->vm_map;
  553 
  554 
  555         vm_map_lock(map);       /* lock map so we can checkprot */
  556 
  557         /*
  558          * interesting system call semantic: make sure entire range is
  559          * allocated before allowing an unmap.
  560          */
  561         if (!uvm_map_checkprot(map, addr, addr + size, PROT_NONE)) {
  562                 vm_map_unlock(map);
  563                 return EINVAL;
  564         }
  565 
  566         TAILQ_INIT(&dead_entries);
  567         if (uvm_unmap_remove(map, addr, addr + size, &dead_entries,
  568             FALSE, TRUE, TRUE) != 0) {
  569                 vm_map_unlock(map);
  570                 return EPERM;   /* immutable entries found */
  571         }
  572         vm_map_unlock(map);     /* and unlock */
  573 
  574         uvm_unmap_detach(&dead_entries, 0);
  575 
  576         return 0;
  577 }
  578 
  579 /*
  580  * sys_mprotect: the mprotect system call
  581  */
  582 int
  583 sys_mprotect(struct proc *p, void *v, register_t *retval)
  584 {
  585         struct sys_mprotect_args /* {
  586                 syscallarg(void *) addr;
  587                 syscallarg(size_t) len;
  588                 syscallarg(int) prot;
  589         } */ *uap = v;
  590         vaddr_t addr;
  591         vsize_t size, pageoff;
  592         vm_prot_t prot;
  593         int error;
  594 
  595         /*
  596          * extract syscall args from uap
  597          */
  598 
  599         addr = (vaddr_t)SCARG(uap, addr);
  600         size = (vsize_t)SCARG(uap, len);
  601         prot = SCARG(uap, prot);
  602 
  603         if ((prot & PROT_MASK) != prot)
  604                 return EINVAL;
  605         if ((prot & (PROT_WRITE | PROT_EXEC)) == (PROT_WRITE | PROT_EXEC) &&
  606             (error = uvm_wxcheck(p, "mprotect")))
  607                 return error;
  608 
  609         error = pledge_protexec(p, prot);
  610         if (error)
  611                 return error;
  612 
  613         /*
  614          * align the address to a page boundary, and adjust the size accordingly
  615          */
  616         ALIGN_ADDR(addr, size, pageoff);
  617         if (addr > SIZE_MAX - size)
  618                 return EINVAL;          /* disallow wrap-around. */
  619 
  620         return (uvm_map_protect(&p->p_vmspace->vm_map, addr, addr+size,
  621             prot, 0, FALSE, TRUE));
  622 }
  623 
  624 /*
  625  * sys_msyscall: the msyscall system call
  626  */
  627 int
  628 sys_msyscall(struct proc *p, void *v, register_t *retval)
  629 {
  630         struct sys_msyscall_args /* {
  631                 syscallarg(void *) addr;
  632                 syscallarg(size_t) len;
  633         } */ *uap = v;
  634         vaddr_t addr;
  635         vsize_t size, pageoff;
  636 
  637         addr = (vaddr_t)SCARG(uap, addr);
  638         size = (vsize_t)SCARG(uap, len);
  639 
  640         /*
  641          * align the address to a page boundary, and adjust the size accordingly
  642          */
  643         ALIGN_ADDR(addr, size, pageoff);
  644         if (addr > SIZE_MAX - size)
  645                 return EINVAL;          /* disallow wrap-around. */
  646 
  647         return uvm_map_syscall(&p->p_vmspace->vm_map, addr, addr+size);
  648 }
  649 
  650 /*
  651  * sys_mimmutable: the mimmutable system call
  652  */
  653 int
  654 sys_mimmutable(struct proc *p, void *v, register_t *retval)
  655 {
  656         struct sys_mimmutable_args /* {
  657                 immutablearg(void *) addr;
  658                 immutablearg(size_t) len;
  659         } */ *uap = v;
  660         vaddr_t addr;
  661         vsize_t size, pageoff;
  662 
  663         addr = (vaddr_t)SCARG(uap, addr);
  664         size = (vsize_t)SCARG(uap, len);
  665 
  666         /*
  667          * align the address to a page boundary, and adjust the size accordingly
  668          */
  669         ALIGN_ADDR(addr, size, pageoff);
  670         if (addr > SIZE_MAX - size)
  671                 return EINVAL;          /* disallow wrap-around. */
  672 
  673         return uvm_map_immutable(&p->p_vmspace->vm_map, addr, addr+size, 1);
  674 }
  675 
  676 /*
  677  * sys_minherit: the minherit system call
  678  */
  679 int
  680 sys_minherit(struct proc *p, void *v, register_t *retval)
  681 {
  682         struct sys_minherit_args /* {
  683                 syscallarg(void *) addr;
  684                 syscallarg(size_t) len;
  685                 syscallarg(int) inherit;
  686         } */ *uap = v;
  687         vaddr_t addr;
  688         vsize_t size, pageoff;
  689         vm_inherit_t inherit;
  690 
  691         addr = (vaddr_t)SCARG(uap, addr);
  692         size = (vsize_t)SCARG(uap, len);
  693         inherit = SCARG(uap, inherit);
  694 
  695         /*
  696          * align the address to a page boundary, and adjust the size accordingly
  697          */
  698         ALIGN_ADDR(addr, size, pageoff);
  699         if (addr > SIZE_MAX - size)
  700                 return EINVAL;          /* disallow wrap-around. */
  701 
  702         return (uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
  703             inherit));
  704 }
  705 
  706 /*
  707  * sys_madvise: give advice about memory usage.
  708  */
  709 /* ARGSUSED */
  710 int
  711 sys_madvise(struct proc *p, void *v, register_t *retval)
  712 {
  713         struct sys_madvise_args /* {
  714                 syscallarg(void *) addr;
  715                 syscallarg(size_t) len;
  716                 syscallarg(int) behav;
  717         } */ *uap = v;
  718         vaddr_t addr;
  719         vsize_t size, pageoff;
  720         int advice, error;
  721 
  722         addr = (vaddr_t)SCARG(uap, addr);
  723         size = (vsize_t)SCARG(uap, len);
  724         advice = SCARG(uap, behav);
  725 
  726         /*
  727          * align the address to a page boundary, and adjust the size accordingly
  728          */
  729         ALIGN_ADDR(addr, size, pageoff);
  730         if (addr > SIZE_MAX - size)
  731                 return EINVAL;          /* disallow wrap-around. */
  732 
  733         switch (advice) {
  734         case MADV_NORMAL:
  735         case MADV_RANDOM:
  736         case MADV_SEQUENTIAL:
  737                 error = uvm_map_advice(&p->p_vmspace->vm_map, addr,
  738                     addr + size, advice);
  739                 break;
  740 
  741         case MADV_WILLNEED:
  742                 /*
  743                  * Activate all these pages, pre-faulting them in if
  744                  * necessary.
  745                  */
  746                 /*
  747                  * XXX IMPLEMENT ME.
  748                  * Should invent a "weak" mode for uvm_fault()
  749                  * which would only do the PGO_LOCKED pgo_get().
  750                  */
  751                 return 0;
  752 
  753         case MADV_DONTNEED:
  754                 /*
  755                  * Deactivate all these pages.  We don't need them
  756                  * any more.  We don't, however, toss the data in
  757                  * the pages.
  758                  */
  759                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
  760                     PGO_DEACTIVATE);
  761                 break;
  762 
  763         case MADV_FREE:
  764                 /*
  765                  * These pages contain no valid data, and may be
  766                  * garbage-collected.  Toss all resources, including
  767                  * any swap space in use.
  768                  */
  769                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
  770                     PGO_FREE);
  771                 break;
  772 
  773         case MADV_SPACEAVAIL:
  774                 /*
  775                  * XXXMRG What is this?  I think it's:
  776                  *
  777                  *      Ensure that we have allocated backing-store
  778                  *      for these pages.
  779                  *
  780                  * This is going to require changes to the page daemon,
  781                  * as it will free swap space allocated to pages in core.
  782                  * There's also what to do for device/file/anonymous memory.
  783                  */
  784                 return EINVAL;
  785 
  786         default:
  787                 return EINVAL;
  788         }
  789 
  790         return error;
  791 }
  792 
  793 /*
  794  * sys_mlock: memory lock
  795  */
  796 
  797 int
  798 sys_mlock(struct proc *p, void *v, register_t *retval)
  799 {
  800         struct sys_mlock_args /* {
  801                 syscallarg(const void *) addr;
  802                 syscallarg(size_t) len;
  803         } */ *uap = v;
  804         vaddr_t addr;
  805         vsize_t size, pageoff;
  806         int error;
  807 
  808         /* extract syscall args from uap */
  809         addr = (vaddr_t)SCARG(uap, addr);
  810         size = (vsize_t)SCARG(uap, len);
  811 
  812         /* align address to a page boundary and adjust size accordingly */
  813         ALIGN_ADDR(addr, size, pageoff);
  814         if (addr > SIZE_MAX - size)
  815                 return EINVAL;          /* disallow wrap-around. */
  816 
  817         if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
  818                 return EAGAIN;
  819 
  820 #ifdef pmap_wired_count
  821         if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
  822                         lim_cur(RLIMIT_MEMLOCK))
  823                 return EAGAIN;
  824 #else
  825         if ((error = suser(p)) != 0)
  826                 return error;
  827 #endif
  828 
  829         error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
  830             0);
  831         return error == 0 ? 0 : ENOMEM;
  832 }
  833 
  834 /*
  835  * sys_munlock: unlock wired pages
  836  */
  837 
  838 int
  839 sys_munlock(struct proc *p, void *v, register_t *retval)
  840 {
  841         struct sys_munlock_args /* {
  842                 syscallarg(const void *) addr;
  843                 syscallarg(size_t) len;
  844         } */ *uap = v;
  845         vaddr_t addr;
  846         vsize_t size, pageoff;
  847         int error;
  848 
  849         /* extract syscall args from uap */
  850         addr = (vaddr_t)SCARG(uap, addr);
  851         size = (vsize_t)SCARG(uap, len);
  852 
  853         /* align address to a page boundary, and adjust size accordingly */
  854         ALIGN_ADDR(addr, size, pageoff);
  855         if (addr > SIZE_MAX - size)
  856                 return EINVAL;          /* disallow wrap-around. */
  857 
  858 #ifndef pmap_wired_count
  859         if ((error = suser(p)) != 0)
  860                 return error;
  861 #endif
  862 
  863         error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
  864             0);
  865         return error == 0 ? 0 : ENOMEM;
  866 }
  867 
  868 /*
  869  * sys_mlockall: lock all pages mapped into an address space.
  870  */
  871 int
  872 sys_mlockall(struct proc *p, void *v, register_t *retval)
  873 {
  874         struct sys_mlockall_args /* {
  875                 syscallarg(int) flags;
  876         } */ *uap = v;
  877         int error, flags;
  878 
  879         flags = SCARG(uap, flags);
  880 
  881         if (flags == 0 ||
  882             (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
  883                 return EINVAL;
  884 
  885 #ifndef pmap_wired_count
  886         if ((error = suser(p)) != 0)
  887                 return error;
  888 #endif
  889 
  890         error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
  891             lim_cur(RLIMIT_MEMLOCK));
  892         if (error != 0 && error != ENOMEM)
  893                 return EAGAIN;
  894         return error;
  895 }
  896 
  897 /*
  898  * sys_munlockall: unlock all pages mapped into an address space.
  899  */
  900 int
  901 sys_munlockall(struct proc *p, void *v, register_t *retval)
  902 {
  903 
  904         (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
  905         return 0;
  906 }
  907 
  908 /*
  909  * common code for mmapanon and mmapfile to lock a mmaping
  910  */
  911 int
  912 uvm_mmaplock(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
  913     vsize_t locklimit)
  914 {
  915         int error;
  916 
  917         /*
  918          * POSIX 1003.1b -- if our address space was configured
  919          * to lock all future mappings, wire the one we just made.
  920          */
  921         if (prot == PROT_NONE) {
  922                 /*
  923                  * No more work to do in this case.
  924                  */
  925                 return 0;
  926         }
  927 
  928         vm_map_lock(map);
  929         if (map->flags & VM_MAP_WIREFUTURE) {
  930                 KERNEL_LOCK();
  931                 if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
  932 #ifdef pmap_wired_count
  933                     || (locklimit != 0 && (size +
  934                          ptoa(pmap_wired_count(vm_map_pmap(map)))) >
  935                         locklimit)
  936 #endif
  937                 ) {
  938                         error = ENOMEM;
  939                         vm_map_unlock(map);
  940                         /* unmap the region! */
  941                         uvm_unmap(map, *addr, *addr + size);
  942                         KERNEL_UNLOCK();
  943                         return error;
  944                 }
  945                 /*
  946                  * uvm_map_pageable() always returns the map
  947                  * unlocked.
  948                  */
  949                 error = uvm_map_pageable(map, *addr, *addr + size,
  950                     FALSE, UVM_LK_ENTER);
  951                 if (error != 0) {
  952                         /* unmap the region! */
  953                         uvm_unmap(map, *addr, *addr + size);
  954                         KERNEL_UNLOCK();
  955                         return error;
  956                 }
  957                 KERNEL_UNLOCK();
  958                 return 0;
  959         }
  960         vm_map_unlock(map);
  961         return 0;
  962 }
  963 
  964 /*
  965  * uvm_mmapanon: internal version of mmap for anons
  966  *
  967  * - used by sys_mmap
  968  */
  969 int
  970 uvm_mmapanon(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
  971     vm_prot_t maxprot, int flags, vsize_t locklimit, struct proc *p)
  972 {
  973         int error;
  974         int advice = MADV_NORMAL;
  975         unsigned int uvmflag = 0;
  976         vsize_t align = 0;      /* userland page size */
  977 
  978         /*
  979          * for non-fixed mappings, round off the suggested address.
  980          * for fixed mappings, check alignment and zap old mappings.
  981          */
  982         if ((flags & MAP_FIXED) == 0) {
  983                 *addr = round_page(*addr);      /* round */
  984         } else {
  985                 if (*addr & PAGE_MASK)
  986                         return EINVAL;
  987 
  988                 uvmflag |= UVM_FLAG_FIXED;
  989                 if ((flags & __MAP_NOREPLACE) == 0)
  990                         uvmflag |= UVM_FLAG_UNMAP;
  991         }
  992 
  993         if ((flags & MAP_FIXED) == 0 && size >= __LDPGSZ)
  994                 align = __LDPGSZ;
  995         if ((flags & MAP_SHARED) == 0)
  996                 /* XXX: defer amap create */
  997                 uvmflag |= UVM_FLAG_COPYONW;
  998         else
  999                 /* shared: create amap now */
 1000                 uvmflag |= UVM_FLAG_OVERLAY;
 1001         if (flags & MAP_STACK)
 1002                 uvmflag |= UVM_FLAG_STACK;
 1003         if (flags & MAP_CONCEAL)
 1004                 uvmflag |= UVM_FLAG_CONCEAL;
 1005 
 1006         /* set up mapping flags */
 1007         uvmflag = UVM_MAPFLAG(prot, maxprot,
 1008             (flags & MAP_SHARED) ? MAP_INHERIT_SHARE : MAP_INHERIT_COPY,
 1009             advice, uvmflag);
 1010 
 1011         error = uvm_mapanon(map, addr, size, align, uvmflag);
 1012 
 1013         if (error == 0)
 1014                 error = uvm_mmaplock(map, addr, size, prot, locklimit);
 1015         return error;
 1016 }
 1017 
 1018 /*
 1019  * uvm_mmapfile: internal version of mmap for non-anons
 1020  *
 1021  * - used by sys_mmap
 1022  * - caller must page-align the file offset
 1023  */
 1024 int
 1025 uvm_mmapfile(vm_map_t map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
 1026     vm_prot_t maxprot, int flags, struct vnode *vp, voff_t foff,
 1027     vsize_t locklimit, struct proc *p)
 1028 {
 1029         struct uvm_object *uobj;
 1030         int error;
 1031         int advice = MADV_NORMAL;
 1032         unsigned int uvmflag = 0;
 1033         vsize_t align = 0;      /* userland page size */
 1034 
 1035         /*
 1036          * for non-fixed mappings, round off the suggested address.
 1037          * for fixed mappings, check alignment and zap old mappings.
 1038          */
 1039         if ((flags & MAP_FIXED) == 0) {
 1040                 *addr = round_page(*addr);      /* round */
 1041         } else {
 1042                 if (*addr & PAGE_MASK)
 1043                         return EINVAL;
 1044 
 1045                 uvmflag |= UVM_FLAG_FIXED;
 1046                 if ((flags & __MAP_NOREPLACE) == 0)
 1047                         uvmflag |= UVM_FLAG_UNMAP;
 1048         }
 1049 
 1050         /*
 1051          * attach to underlying vm object.
 1052          */
 1053         if (vp->v_type != VCHR) {
 1054                 uobj = uvn_attach(vp, (flags & MAP_SHARED) ?
 1055                    maxprot : (maxprot & ~PROT_WRITE));
 1056 
 1057                 /*
 1058                  * XXXCDC: hack from old code
 1059                  * don't allow vnodes which have been mapped
 1060                  * shared-writeable to persist [forces them to be
 1061                  * flushed out when last reference goes].
 1062                  * XXXCDC: interesting side effect: avoids a bug.
 1063                  * note that in WRITE [ufs_readwrite.c] that we
 1064                  * allocate buffer, uncache, and then do the write.
 1065                  * the problem with this is that if the uncache causes
 1066                  * VM data to be flushed to the same area of the file
 1067                  * we are writing to... in that case we've got the
 1068                  * buffer locked and our process goes to sleep forever.
 1069                  *
 1070                  * XXXCDC: checking maxprot protects us from the
 1071                  * "persistbug" program but this is not a long term
 1072                  * solution.
 1073                  *
 1074                  * XXXCDC: we don't bother calling uncache with the vp
 1075                  * VOP_LOCKed since we know that we are already
 1076                  * holding a valid reference to the uvn (from the
 1077                  * uvn_attach above), and thus it is impossible for
 1078                  * the uncache to kill the uvn and trigger I/O.
 1079                  */
 1080                 if (flags & MAP_SHARED) {
 1081                         if ((prot & PROT_WRITE) ||
 1082                             (maxprot & PROT_WRITE)) {
 1083                                 uvm_vnp_uncache(vp);
 1084                         }
 1085                 }
 1086         } else {
 1087                 uobj = udv_attach(vp->v_rdev,
 1088                     (flags & MAP_SHARED) ? maxprot :
 1089                     (maxprot & ~PROT_WRITE), foff, size);
 1090                 /*
 1091                  * XXX Some devices don't like to be mapped with
 1092                  * XXX PROT_EXEC, but we don't really have a
 1093                  * XXX better way of handling this, right now
 1094                  */
 1095                 if (uobj == NULL && (prot & PROT_EXEC) == 0) {
 1096                         maxprot &= ~PROT_EXEC;
 1097                         uobj = udv_attach(vp->v_rdev,
 1098                             (flags & MAP_SHARED) ? maxprot :
 1099                             (maxprot & ~PROT_WRITE), foff, size);
 1100                 }
 1101                 advice = MADV_RANDOM;
 1102         }
 1103 
 1104         if (uobj == NULL)
 1105                 return vp->v_type == VREG ? ENOMEM : EINVAL;
 1106 
 1107         if ((flags & MAP_SHARED) == 0)
 1108                 uvmflag |= UVM_FLAG_COPYONW;
 1109         if (flags & __MAP_NOFAULT)
 1110                 uvmflag |= (UVM_FLAG_NOFAULT | UVM_FLAG_OVERLAY);
 1111         if (flags & MAP_STACK)
 1112                 uvmflag |= UVM_FLAG_STACK;
 1113         if (flags & MAP_CONCEAL)
 1114                 uvmflag |= UVM_FLAG_CONCEAL;
 1115 
 1116         /* set up mapping flags */
 1117         uvmflag = UVM_MAPFLAG(prot, maxprot,
 1118             (flags & MAP_SHARED) ? MAP_INHERIT_SHARE : MAP_INHERIT_COPY,
 1119             advice, uvmflag);
 1120 
 1121         error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
 1122 
 1123         if (error == 0)
 1124                 return uvm_mmaplock(map, addr, size, prot, locklimit);
 1125 
 1126         /* errors: first detach from the uobj, if any.  */
 1127         if (uobj)
 1128                 uobj->pgops->pgo_detach(uobj);
 1129 
 1130         return error;
 1131 }
 1132 
 1133 int
 1134 sys_kbind(struct proc *p, void *v, register_t *retval)
 1135 {
 1136         struct sys_kbind_args /* {
 1137                 syscallarg(const struct __kbind *) param;
 1138                 syscallarg(size_t) psize;
 1139                 syscallarg(uint64_t) proc_cookie;
 1140         } */ *uap = v;
 1141         const struct __kbind *paramp;
 1142         union {
 1143                 struct __kbind uk[KBIND_BLOCK_MAX];
 1144                 char upad[KBIND_BLOCK_MAX * sizeof(*paramp) + KBIND_DATA_MAX];
 1145         } param;
 1146         struct uvm_map_deadq dead_entries;
 1147         struct process *pr = p->p_p;
 1148         const char *data;
 1149         vaddr_t baseva, last_baseva, endva, pageoffset, kva;
 1150         size_t psize, s;
 1151         u_long pc;
 1152         int count, i, extra;
 1153         int error, sigill = 0;
 1154 
 1155         /*
 1156          * extract syscall args from uap
 1157          */
 1158         paramp = SCARG(uap, param);
 1159         psize = SCARG(uap, psize);
 1160 
 1161         /*
 1162          * If paramp is NULL and we're uninitialized, disable the syscall
 1163          * for the process.  Raise SIGILL if paramp is NULL and we're
 1164          * already initialized.
 1165          *
 1166          * If paramp is non-NULL and we're uninitialized, do initialization.
 1167          * Otherwise, do security checks and raise SIGILL on failure.
 1168          */
 1169         pc = PROC_PC(p);
 1170         mtx_enter(&pr->ps_mtx);
 1171         if (paramp == NULL) {
 1172                 /* ld.so disables kbind() when lazy binding is disabled */
 1173                 if (pr->ps_kbind_addr == 0)
 1174                         pr->ps_kbind_addr = BOGO_PC;
 1175                 /* pre-7.3 static binaries disable kbind */
 1176                 /* XXX delete check in 2026 */
 1177                 else if (pr->ps_kbind_addr != BOGO_PC)
 1178                         sigill = 1;
 1179         } else if (pr->ps_kbind_addr == 0) {
 1180                 pr->ps_kbind_addr = pc;
 1181                 pr->ps_kbind_cookie = SCARG(uap, proc_cookie);
 1182         } else if (pc != pr->ps_kbind_addr || pc == BOGO_PC ||
 1183             pr->ps_kbind_cookie != SCARG(uap, proc_cookie)) {
 1184                 sigill = 1;
 1185         }
 1186         mtx_leave(&pr->ps_mtx);
 1187 
 1188         /* Raise SIGILL if something is off. */
 1189         if (sigill) {
 1190                 KERNEL_LOCK();
 1191                 sigexit(p, SIGILL);
 1192                 /* NOTREACHED */
 1193                 KERNEL_UNLOCK();
 1194         }
 1195 
 1196         /* We're done if we were disabling the syscall. */
 1197         if (paramp == NULL)
 1198                 return 0;
 1199 
 1200         if (psize < sizeof(struct __kbind) || psize > sizeof(param))
 1201                 return EINVAL;
 1202         if ((error = copyin(paramp, &param, psize)))
 1203                 return error;
 1204 
 1205         /*
 1206          * The param argument points to an array of __kbind structures
 1207          * followed by the corresponding new data areas for them.  Verify
 1208          * that the sizes in the __kbind structures add up to the total
 1209          * size and find the start of the new area.
 1210          */
 1211         paramp = &param.uk[0];
 1212         s = psize;
 1213         for (count = 0; s > 0 && count < KBIND_BLOCK_MAX; count++) {
 1214                 if (s < sizeof(*paramp))
 1215                         return EINVAL;
 1216                 s -= sizeof(*paramp);
 1217 
 1218                 baseva = (vaddr_t)paramp[count].kb_addr;
 1219                 endva = baseva + paramp[count].kb_size - 1;
 1220                 if (paramp[count].kb_addr == NULL ||
 1221                     paramp[count].kb_size == 0 ||
 1222                     paramp[count].kb_size > KBIND_DATA_MAX ||
 1223                     baseva >= VM_MAXUSER_ADDRESS ||
 1224                     endva >= VM_MAXUSER_ADDRESS ||
 1225                     s < paramp[count].kb_size)
 1226                         return EINVAL;
 1227 
 1228                 s -= paramp[count].kb_size;
 1229         }
 1230         if (s > 0)
 1231                 return EINVAL;
 1232         data = (const char *)&paramp[count];
 1233 
 1234         /* all looks good, so do the bindings */
 1235         last_baseva = VM_MAXUSER_ADDRESS;
 1236         kva = 0;
 1237         TAILQ_INIT(&dead_entries);
 1238         KERNEL_LOCK();
 1239         for (i = 0; i < count; i++) {
 1240                 baseva = (vaddr_t)paramp[i].kb_addr;
 1241                 s = paramp[i].kb_size;
 1242                 pageoffset = baseva & PAGE_MASK;
 1243                 baseva = trunc_page(baseva);
 1244 
 1245                 /* hppa at least runs PLT entries over page edge */
 1246                 extra = (pageoffset + s) & PAGE_MASK;
 1247                 if (extra > pageoffset)
 1248                         extra = 0;
 1249                 else
 1250                         s -= extra;
 1251 redo:
 1252                 /* make sure sure the desired page is mapped into kernel_map */
 1253                 if (baseva != last_baseva) {
 1254                         if (kva != 0) {
 1255                                 vm_map_lock(kernel_map);
 1256                                 uvm_unmap_remove(kernel_map, kva,
 1257                                     kva+PAGE_SIZE, &dead_entries,
 1258                                     FALSE, TRUE, FALSE);        /* XXX */
 1259                                 vm_map_unlock(kernel_map);
 1260                                 kva = 0;
 1261                         }
 1262                         if ((error = uvm_map_extract(&p->p_vmspace->vm_map,
 1263                             baseva, PAGE_SIZE, &kva, UVM_EXTRACT_FIXPROT)))
 1264                                 break;
 1265                         last_baseva = baseva;
 1266                 }
 1267 
 1268                 /* do the update */
 1269                 if ((error = kcopy(data, (char *)kva + pageoffset, s)))
 1270                         break;
 1271                 data += s;
 1272 
 1273                 if (extra > 0) {
 1274                         baseva += PAGE_SIZE;
 1275                         s = extra;
 1276                         pageoffset = 0;
 1277                         extra = 0;
 1278                         goto redo;
 1279                 }
 1280         }
 1281 
 1282         if (kva != 0) {
 1283                 vm_map_lock(kernel_map);
 1284                 uvm_unmap_remove(kernel_map, kva, kva+PAGE_SIZE,
 1285                     &dead_entries, FALSE, TRUE, FALSE);         /* XXX */
 1286                 vm_map_unlock(kernel_map);
 1287         }
 1288         uvm_unmap_detach(&dead_entries, AMAP_REFALL);
 1289         KERNEL_UNLOCK();
 1290 
 1291         return error;
 1292 }

Cache object: 6773b6aa4342f66714b15b06cd3f478d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.