The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_mmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_mmap.c,v 1.184 2022/07/07 11:29:18 rin Exp $       */
    2 
    3 /*
    4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    5  * Copyright (c) 1991, 1993 The Regents of the University of California.
    6  * Copyright (c) 1988 University of Utah.
    7  *
    8  * All rights reserved.
    9  *
   10  * This code is derived from software contributed to Berkeley by
   11  * the Systems Programming Group of the University of Utah Computer
   12  * Science Department.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
   39  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
   40  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
   41  */
   42 
   43 /*
   44  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
   45  * function.
   46  */
   47 
   48 #include <sys/cdefs.h>
   49 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.184 2022/07/07 11:29:18 rin Exp $");
   50 
   51 #include "opt_compat_netbsd.h"
   52 #include "opt_pax.h"
   53 
   54 #include <sys/param.h>
   55 #include <sys/types.h>
   56 #include <sys/file.h>
   57 #include <sys/filedesc.h>
   58 #include <sys/resourcevar.h>
   59 #include <sys/mman.h>
   60 #include <sys/pax.h>
   61 
   62 #include <sys/syscallargs.h>
   63 
   64 #include <uvm/uvm.h>
   65 #include <uvm/uvm_device.h>
   66 
   67 static int uvm_mmap(struct vm_map *, vaddr_t *, vsize_t, vm_prot_t, vm_prot_t,
   68     int, int, struct uvm_object *, voff_t, vsize_t);
   69 
   70 static int
   71 range_test(const struct vm_map *map, vaddr_t addr, vsize_t size, bool ismmap)
   72 {
   73         vaddr_t vm_min_address = vm_map_min(map);
   74         vaddr_t vm_max_address = vm_map_max(map);
   75         vaddr_t eaddr = addr + size;
   76         int res = 0;
   77 
   78         if (addr < vm_min_address)
   79                 return EINVAL;
   80         if (eaddr > vm_max_address)
   81                 return ismmap ? EFBIG : EINVAL;
   82         if (addr > eaddr) /* no wrapping! */
   83                 return ismmap ? EOVERFLOW : EINVAL;
   84 
   85 #ifdef MD_MMAP_RANGE_TEST
   86         res = MD_MMAP_RANGE_TEST(addr, eaddr);
   87 #endif
   88 
   89         return res;
   90 }
   91 
   92 /*
   93  * align the address to a page boundary, and adjust the size accordingly
   94  */
   95 static int
   96 round_and_check(const struct vm_map *map, vaddr_t *addr, vsize_t *size)
   97 {
   98         const vsize_t pageoff = (vsize_t)(*addr & PAGE_MASK);
   99 
  100         *addr -= pageoff;
  101 
  102         if (*size != 0) {
  103                 *size += pageoff;
  104                 *size = (vsize_t)round_page(*size);
  105         } else if (*addr + *size < *addr) {
  106                 return ENOMEM;
  107         }
  108 
  109         return range_test(map, *addr, *size, false);
  110 }
  111 
  112 /*
  113  * sys_mincore: determine if pages are in core or not.
  114  */
  115 
  116 /* ARGSUSED */
  117 int
  118 sys_mincore(struct lwp *l, const struct sys_mincore_args *uap,
  119     register_t *retval)
  120 {
  121         /* {
  122                 syscallarg(void *) addr;
  123                 syscallarg(size_t) len;
  124                 syscallarg(char *) vec;
  125         } */
  126         struct proc *p = l->l_proc;
  127         struct vm_page *pg;
  128         char *vec, pgi;
  129         struct uvm_object *uobj;
  130         struct vm_amap *amap;
  131         struct vm_anon *anon;
  132         struct vm_map_entry *entry;
  133         vaddr_t start, end, lim;
  134         struct vm_map *map;
  135         vsize_t len;
  136         int error = 0;
  137         size_t npgs;
  138 
  139         map = &p->p_vmspace->vm_map;
  140 
  141         start = (vaddr_t)SCARG(uap, addr);
  142         len = SCARG(uap, len);
  143         vec = SCARG(uap, vec);
  144 
  145         if (start & PAGE_MASK)
  146                 return EINVAL;
  147         len = round_page(len);
  148         end = start + len;
  149         if (end <= start)
  150                 return EINVAL;
  151 
  152         /*
  153          * Lock down vec, so our returned status isn't outdated by
  154          * storing the status byte for a page.
  155          */
  156 
  157         npgs = len >> PAGE_SHIFT;
  158         error = uvm_vslock(p->p_vmspace, vec, npgs, VM_PROT_WRITE);
  159         if (error) {
  160                 return error;
  161         }
  162         vm_map_lock_read(map);
  163 
  164         if (uvm_map_lookup_entry(map, start, &entry) == false) {
  165                 error = ENOMEM;
  166                 goto out;
  167         }
  168 
  169         for (/* nothing */;
  170              entry != &map->header && entry->start < end;
  171              entry = entry->next) {
  172                 KASSERT(!UVM_ET_ISSUBMAP(entry));
  173                 KASSERT(start >= entry->start);
  174 
  175                 /* Make sure there are no holes. */
  176                 if (entry->end < end &&
  177                      (entry->next == &map->header ||
  178                       entry->next->start > entry->end)) {
  179                         error = ENOMEM;
  180                         goto out;
  181                 }
  182 
  183                 lim = end < entry->end ? end : entry->end;
  184 
  185                 /*
  186                  * Special case for objects with no "real" pages.  Those
  187                  * are always considered resident (mapped devices).
  188                  */
  189 
  190                 if (UVM_ET_ISOBJ(entry)) {
  191                         KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
  192                         if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
  193                                 for (/* nothing */; start < lim;
  194                                      start += PAGE_SIZE, vec++)
  195                                         ustore_char(vec, 1);
  196                                 continue;
  197                         }
  198                 }
  199 
  200                 amap = entry->aref.ar_amap;     /* upper layer */
  201                 uobj = entry->object.uvm_obj;   /* lower layer */
  202 
  203                 if (amap != NULL)
  204                         amap_lock(amap, RW_READER);
  205                 if (uobj != NULL)
  206                         rw_enter(uobj->vmobjlock, RW_READER);
  207 
  208                 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
  209                         pgi = 0;
  210                         if (amap != NULL) {
  211                                 /* Check the upper layer first. */
  212                                 anon = amap_lookup(&entry->aref,
  213                                     start - entry->start);
  214                                 /* Don't need to lock anon here. */
  215                                 if (anon != NULL && anon->an_page != NULL) {
  216 
  217                                         /*
  218                                          * Anon has the page for this entry
  219                                          * offset.
  220                                          */
  221 
  222                                         pgi = 1;
  223                                 }
  224                         }
  225                         if (uobj != NULL && pgi == 0) {
  226                                 /* Check the lower layer. */
  227                                 pg = uvm_pagelookup(uobj,
  228                                     entry->offset + (start - entry->start));
  229                                 if (pg != NULL) {
  230 
  231                                         /*
  232                                          * Object has the page for this entry
  233                                          * offset.
  234                                          */
  235 
  236                                         pgi = 1;
  237                                 }
  238                         }
  239                         (void) ustore_char(vec, pgi);
  240                 }
  241                 if (uobj != NULL)
  242                         rw_exit(uobj->vmobjlock);
  243                 if (amap != NULL)
  244                         amap_unlock(amap);
  245         }
  246 
  247  out:
  248         vm_map_unlock_read(map);
  249         uvm_vsunlock(p->p_vmspace, SCARG(uap, vec), npgs);
  250         return error;
  251 }
  252 
  253 /*
  254  * sys_mmap: mmap system call.
  255  *
  256  * => file offset and address may not be page aligned
  257  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
  258  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
  259  *      and the return value is adjusted up by the page offset.
  260  */
  261 
  262 int
  263 sys_mmap(struct lwp *l, const struct sys_mmap_args *uap, register_t *retval)
  264 {
  265         /* {
  266                 syscallarg(void *) addr;
  267                 syscallarg(size_t) len;
  268                 syscallarg(int) prot;
  269                 syscallarg(int) flags;
  270                 syscallarg(int) fd;
  271                 syscallarg(long) pad;
  272                 syscallarg(off_t) pos;
  273         } */
  274         struct proc *p = l->l_proc;
  275         vaddr_t addr;
  276         off_t pos;
  277         vsize_t size, pageoff;
  278         vm_prot_t prot, maxprot, extraprot;
  279         int flags, fd, advice;
  280         vaddr_t defaddr = 0;    /* XXXGCC */
  281         bool addrhint = false;
  282         struct file *fp = NULL;
  283         struct uvm_object *uobj;
  284         int error;
  285 #ifdef PAX_ASLR
  286         vaddr_t orig_addr;
  287 #endif /* PAX_ASLR */
  288 
  289         /*
  290          * first, extract syscall args from the uap.
  291          */
  292 
  293         addr = (vaddr_t)SCARG(uap, addr);
  294         size = (vsize_t)SCARG(uap, len);
  295         prot = SCARG(uap, prot) & VM_PROT_ALL;
  296         extraprot = PROT_MPROTECT_EXTRACT(SCARG(uap, prot));
  297         flags = SCARG(uap, flags);
  298         fd = SCARG(uap, fd);
  299         pos = SCARG(uap, pos);
  300 
  301 #ifdef PAX_ASLR
  302         orig_addr = addr;
  303 #endif /* PAX_ASLR */
  304 
  305         if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
  306                 return EINVAL;
  307 
  308         if (size == 0 && (flags & MAP_ANON) == 0)
  309                 return EINVAL;
  310 
  311         /*
  312          * Align file position and save offset into page.  Adjust size
  313          * so that it is an integral multiple of the page size.
  314          */
  315         pageoff = pos & PAGE_MASK;
  316         pos -= pageoff;
  317         KASSERT(PAGE_MASK <= __type_max(vsize_t));
  318         KASSERT((__type_max(vsize_t) - PAGE_SIZE + 1) % PAGE_SIZE == 0);
  319         if (size > __type_max(vsize_t) - PAGE_SIZE + 1 - pageoff)
  320                 return ENOMEM;
  321         /*
  322          * size + pageoff <= VSIZE_MAX + 1 - PAGE_SIZE, and the
  323          * right-hand side is an integral multiple of the page size, so
  324          * round_page(size + pageoff) <= VSIZE_MAX + 1 - PAGE_SIZE.
  325          */
  326         size = round_page(size + pageoff);
  327 
  328         /*
  329          * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
  330          */
  331         if (flags & MAP_FIXED) {
  332                 /* ensure address and file offset are aligned properly */
  333                 addr -= pageoff;
  334                 if (addr & PAGE_MASK)
  335                         return EINVAL;
  336 
  337                 error = range_test(&p->p_vmspace->vm_map, addr, size, true);
  338                 if (error) {
  339                         return error;
  340                 }
  341         } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
  342                 /*
  343                  * not fixed: make sure we skip over the largest
  344                  * possible heap for non-topdown mapping arrangements.
  345                  * we will refine our guess later (e.g. to account for
  346                  * VAC, etc)
  347                  */
  348 
  349                 defaddr = p->p_emul->e_vm_default_addr(p,
  350                     (vaddr_t)p->p_vmspace->vm_daddr, size,
  351                     p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
  352 
  353                 if (addr == 0 || !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
  354                         addr = MAX(addr, defaddr);
  355                 else
  356                         addr = MIN(addr, defaddr);
  357 
  358                 /*
  359                  * If addr is nonzero and not the default, then the
  360                  * address is a hint.
  361                  */
  362                 addrhint = (addr != 0 && addr != defaddr);
  363         }
  364 
  365         /*
  366          * check for file mappings (i.e. not anonymous) and verify file.
  367          */
  368 
  369         advice = UVM_ADV_NORMAL;
  370         if ((flags & MAP_ANON) == 0) {
  371                 KASSERT(size != 0);
  372 
  373                 if ((fp = fd_getfile(fd)) == NULL)
  374                         return EBADF;
  375 
  376                 if (fp->f_ops->fo_mmap == NULL) {
  377                         error = ENODEV;
  378                         goto out;
  379                 }
  380                 error = (*fp->f_ops->fo_mmap)(fp, &pos, size, prot, &flags,
  381                     &advice, &uobj, &maxprot);
  382                 if (error) {
  383                         goto out;
  384                 }
  385                 if (uobj == NULL) {
  386                         flags |= MAP_ANON;
  387                         fd_putfile(fd);
  388                         fp = NULL;
  389                         goto is_anon;
  390                 }
  391         } else {                /* MAP_ANON case */
  392                 /*
  393                  * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
  394                  */
  395                 if (fd != -1)
  396                         return EINVAL;
  397 
  398  is_anon:               /* label for SunOS style /dev/zero */
  399                 uobj = NULL;
  400                 maxprot = VM_PROT_ALL;
  401                 pos = 0;
  402         }
  403 
  404         maxprot = PAX_MPROTECT_MAXPROTECT(l, prot, extraprot, maxprot);
  405         if (((prot | extraprot) & maxprot) != (prot | extraprot)) {
  406                 error = EACCES;
  407                 goto out;
  408         }
  409         if ((error = PAX_MPROTECT_VALIDATE(l, prot)))
  410                 goto out;
  411 
  412         pax_aslr_mmap(l, &addr, orig_addr, flags);
  413 
  414         /*
  415          * Now let kernel internal function uvm_mmap do the work.
  416          *
  417          * If the user provided a hint, take a reference to uobj in
  418          * case the first attempt to satisfy the hint fails, so we can
  419          * try again with the default address.
  420          */
  421         if (addrhint) {
  422                 if (uobj)
  423                         (*uobj->pgops->pgo_reference)(uobj);
  424         }
  425         error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
  426             flags, advice, uobj, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
  427         if (addrhint) {
  428                 if (error) {
  429                         addr = defaddr;
  430                         pax_aslr_mmap(l, &addr, orig_addr, flags);
  431                         error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size,
  432                             prot, maxprot, flags, advice, uobj, pos,
  433                             p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
  434                 } else if (uobj) {
  435                         /* Release the exta reference we took.  */
  436                         (*uobj->pgops->pgo_detach)(uobj);
  437                 }
  438         }
  439 
  440         /* remember to add offset */
  441         *retval = (register_t)(addr + pageoff);
  442 
  443  out:
  444         if (fp != NULL)
  445                 fd_putfile(fd);
  446 
  447         return error;
  448 }
  449 
  450 /*
  451  * sys___msync13: the msync system call (a front-end for flush)
  452  */
  453 
  454 int
  455 sys___msync13(struct lwp *l, const struct sys___msync13_args *uap,
  456     register_t *retval)
  457 {
  458         /* {
  459                 syscallarg(void *) addr;
  460                 syscallarg(size_t) len;
  461                 syscallarg(int) flags;
  462         } */
  463         struct proc *p = l->l_proc;
  464         vaddr_t addr;
  465         vsize_t size;
  466         struct vm_map *map;
  467         int error, flags, uvmflags;
  468         bool rv;
  469 
  470         /*
  471          * extract syscall args from the uap
  472          */
  473 
  474         addr = (vaddr_t)SCARG(uap, addr);
  475         size = (vsize_t)SCARG(uap, len);
  476         flags = SCARG(uap, flags);
  477 
  478         /* sanity check flags */
  479         if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
  480             (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
  481             (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
  482                 return EINVAL;
  483         if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
  484                 flags |= MS_SYNC;
  485 
  486         /*
  487          * get map
  488          */
  489         map = &p->p_vmspace->vm_map;
  490 
  491         if (round_and_check(map, &addr, &size))
  492                 return ENOMEM;
  493 
  494         /*
  495          * XXXCDC: do we really need this semantic?
  496          *
  497          * XXX Gak!  If size is zero we are supposed to sync "all modified
  498          * pages with the region containing addr".  Unfortunately, we
  499          * don't really keep track of individual mmaps so we approximate
  500          * by flushing the range of the map entry containing addr.
  501          * This can be incorrect if the region splits or is coalesced
  502          * with a neighbor.
  503          */
  504 
  505         if (size == 0) {
  506                 struct vm_map_entry *entry;
  507 
  508                 vm_map_lock_read(map);
  509                 rv = uvm_map_lookup_entry(map, addr, &entry);
  510                 if (rv == true) {
  511                         addr = entry->start;
  512                         size = entry->end - entry->start;
  513                 }
  514                 vm_map_unlock_read(map);
  515                 if (rv == false)
  516                         return EINVAL;
  517         }
  518 
  519         /*
  520          * translate MS_ flags into PGO_ flags
  521          */
  522 
  523         uvmflags = PGO_CLEANIT;
  524         if (flags & MS_INVALIDATE)
  525                 uvmflags |= PGO_FREE;
  526         if (flags & MS_SYNC)
  527                 uvmflags |= PGO_SYNCIO;
  528 
  529         error = uvm_map_clean(map, addr, addr+size, uvmflags);
  530         return error;
  531 }
  532 
  533 /*
  534  * sys_munmap: unmap a users memory
  535  */
  536 
  537 int
  538 sys_munmap(struct lwp *l, const struct sys_munmap_args *uap, register_t *retval)
  539 {
  540         /* {
  541                 syscallarg(void *) addr;
  542                 syscallarg(size_t) len;
  543         } */
  544         struct proc *p = l->l_proc;
  545         vaddr_t addr;
  546         vsize_t size;
  547         struct vm_map *map;
  548         struct vm_map_entry *dead_entries;
  549 
  550         /*
  551          * get syscall args.
  552          */
  553 
  554         addr = (vaddr_t)SCARG(uap, addr);
  555         size = (vsize_t)SCARG(uap, len);
  556 
  557         map = &p->p_vmspace->vm_map;
  558 
  559         if (round_and_check(map, &addr, &size))
  560                 return EINVAL;
  561 
  562         if (size == 0)
  563                 return 0;
  564 
  565         vm_map_lock(map);
  566 #if 0
  567         /*
  568          * interesting system call semantic: make sure entire range is
  569          * allocated before allowing an unmap.
  570          */
  571         if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
  572                 vm_map_unlock(map);
  573                 return EINVAL;
  574         }
  575 #endif
  576         uvm_unmap_remove(map, addr, addr + size, &dead_entries, 0);
  577         vm_map_unlock(map);
  578         if (dead_entries != NULL)
  579                 uvm_unmap_detach(dead_entries, 0);
  580         return 0;
  581 }
  582 
  583 /*
  584  * sys_mprotect: the mprotect system call
  585  */
  586 
  587 int
  588 sys_mprotect(struct lwp *l, const struct sys_mprotect_args *uap,
  589     register_t *retval)
  590 {
  591         /* {
  592                 syscallarg(void *) addr;
  593                 syscallarg(size_t) len;
  594                 syscallarg(int) prot;
  595         } */
  596         struct proc *p = l->l_proc;
  597         vaddr_t addr;
  598         vsize_t size;
  599         vm_prot_t prot;
  600         int error;
  601 
  602         /*
  603          * extract syscall args from uap
  604          */
  605 
  606         addr = (vaddr_t)SCARG(uap, addr);
  607         size = (vsize_t)SCARG(uap, len);
  608         prot = SCARG(uap, prot) & VM_PROT_ALL;
  609 
  610         if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
  611                 return EINVAL;
  612 
  613         error = uvm_map_protect_user(l, addr, addr + size, prot);
  614         return error;
  615 }
  616 
  617 /*
  618  * sys_minherit: the minherit system call
  619  */
  620 
  621 int
  622 sys_minherit(struct lwp *l, const struct sys_minherit_args *uap,
  623    register_t *retval)
  624 {
  625         /* {
  626                 syscallarg(void *) addr;
  627                 syscallarg(int) len;
  628                 syscallarg(int) inherit;
  629         } */
  630         struct proc *p = l->l_proc;
  631         vaddr_t addr;
  632         vsize_t size;
  633         vm_inherit_t inherit;
  634         int error;
  635 
  636         addr = (vaddr_t)SCARG(uap, addr);
  637         size = (vsize_t)SCARG(uap, len);
  638         inherit = SCARG(uap, inherit);
  639 
  640         if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
  641                 return EINVAL;
  642 
  643         error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
  644             inherit);
  645         return error;
  646 }
  647 
  648 /*
  649  * sys_madvise: give advice about memory usage.
  650  */
  651 
  652 /* ARGSUSED */
  653 int
  654 sys_madvise(struct lwp *l, const struct sys_madvise_args *uap,
  655    register_t *retval)
  656 {
  657         /* {
  658                 syscallarg(void *) addr;
  659                 syscallarg(size_t) len;
  660                 syscallarg(int) behav;
  661         } */
  662         struct proc *p = l->l_proc;
  663         vaddr_t addr;
  664         vsize_t size;
  665         int advice, error;
  666 
  667         addr = (vaddr_t)SCARG(uap, addr);
  668         size = (vsize_t)SCARG(uap, len);
  669         advice = SCARG(uap, behav);
  670 
  671         if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
  672                 return EINVAL;
  673 
  674         switch (advice) {
  675         case MADV_NORMAL:
  676         case MADV_RANDOM:
  677         case MADV_SEQUENTIAL:
  678                 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
  679                     advice);
  680                 break;
  681 
  682         case MADV_WILLNEED:
  683 
  684                 /*
  685                  * Activate all these pages, pre-faulting them in if
  686                  * necessary.
  687                  */
  688                 error = uvm_map_willneed(&p->p_vmspace->vm_map,
  689                     addr, addr + size);
  690                 break;
  691 
  692         case MADV_DONTNEED:
  693 
  694                 /*
  695                  * Deactivate all these pages.  We don't need them
  696                  * any more.  We don't, however, toss the data in
  697                  * the pages.
  698                  */
  699 
  700                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
  701                     PGO_DEACTIVATE);
  702                 break;
  703 
  704         case MADV_FREE:
  705 
  706                 /*
  707                  * These pages contain no valid data, and may be
  708                  * garbage-collected.  Toss all resources, including
  709                  * any swap space in use.
  710                  */
  711 
  712                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
  713                     PGO_FREE);
  714                 break;
  715 
  716         case MADV_SPACEAVAIL:
  717 
  718                 /*
  719                  * XXXMRG What is this?  I think it's:
  720                  *
  721                  *      Ensure that we have allocated backing-store
  722                  *      for these pages.
  723                  *
  724                  * This is going to require changes to the page daemon,
  725                  * as it will free swap space allocated to pages in core.
  726                  * There's also what to do for device/file/anonymous memory.
  727                  */
  728 
  729                 return EINVAL;
  730 
  731         default:
  732                 return EINVAL;
  733         }
  734 
  735         return error;
  736 }
  737 
  738 /*
  739  * sys_mlock: memory lock
  740  */
  741 
  742 int
  743 sys_mlock(struct lwp *l, const struct sys_mlock_args *uap, register_t *retval)
  744 {
  745         /* {
  746                 syscallarg(const void *) addr;
  747                 syscallarg(size_t) len;
  748         } */
  749         struct proc *p = l->l_proc;
  750         vaddr_t addr;
  751         vsize_t size;
  752         int error;
  753 
  754         /*
  755          * extract syscall args from uap
  756          */
  757 
  758         addr = (vaddr_t)SCARG(uap, addr);
  759         size = (vsize_t)SCARG(uap, len);
  760 
  761         if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
  762                 return ENOMEM;
  763 
  764         if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
  765                 return EAGAIN;
  766 
  767         if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
  768             p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
  769                 return EAGAIN;
  770 
  771         error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, false,
  772             0);
  773         if (error == EFAULT)
  774                 error = ENOMEM;
  775         return error;
  776 }
  777 
  778 /*
  779  * sys_munlock: unlock wired pages
  780  */
  781 
  782 int
  783 sys_munlock(struct lwp *l, const struct sys_munlock_args *uap,
  784     register_t *retval)
  785 {
  786         /* {
  787                 syscallarg(const void *) addr;
  788                 syscallarg(size_t) len;
  789         } */
  790         struct proc *p = l->l_proc;
  791         vaddr_t addr;
  792         vsize_t size;
  793 
  794         /*
  795          * extract syscall args from uap
  796          */
  797 
  798         addr = (vaddr_t)SCARG(uap, addr);
  799         size = (vsize_t)SCARG(uap, len);
  800 
  801         if (round_and_check(&p->p_vmspace->vm_map, &addr, &size))
  802                 return ENOMEM;
  803 
  804         if (uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, true, 0))
  805                 return ENOMEM;
  806 
  807         return 0;
  808 }
  809 
  810 /*
  811  * sys_mlockall: lock all pages mapped into an address space.
  812  */
  813 
  814 int
  815 sys_mlockall(struct lwp *l, const struct sys_mlockall_args *uap,
  816     register_t *retval)
  817 {
  818         /* {
  819                 syscallarg(int) flags;
  820         } */
  821         struct proc *p = l->l_proc;
  822         int error, flags;
  823 
  824         flags = SCARG(uap, flags);
  825 
  826         if (flags == 0 || (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
  827                 return EINVAL;
  828 
  829         error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
  830             p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
  831         return error;
  832 }
  833 
  834 /*
  835  * sys_munlockall: unlock all pages mapped into an address space.
  836  */
  837 
  838 int
  839 sys_munlockall(struct lwp *l, const void *v, register_t *retval)
  840 {
  841         struct proc *p = l->l_proc;
  842 
  843         (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
  844         return 0;
  845 }
  846 
  847 /*
  848  * uvm_mmap: internal version of mmap
  849  *
  850  * - used by sys_mmap and various framebuffers
  851  * - uobj is a struct uvm_object pointer or NULL for MAP_ANON
  852  * - caller must page-align the file offset
  853  *
  854  * XXX This appears to leak the uobj in various error branches?  Need
  855  * to clean up the contract around uobj reference.
  856  */
  857 
  858 static int
  859 uvm_mmap(struct vm_map *map, vaddr_t *addr, vsize_t size, vm_prot_t prot,
  860     vm_prot_t maxprot, int flags, int advice, struct uvm_object *uobj,
  861     voff_t foff, vsize_t locklimit)
  862 {
  863         vaddr_t align = 0;
  864         int error;
  865         uvm_flag_t uvmflag = 0;
  866 
  867         /*
  868          * check params
  869          */
  870 
  871         if (size == 0)
  872                 return 0;
  873         if (foff & PAGE_MASK)
  874                 return EINVAL;
  875         if ((prot & maxprot) != prot)
  876                 return EINVAL;
  877 
  878         /*
  879          * for non-fixed mappings, round off the suggested address.
  880          * for fixed mappings, check alignment.
  881          */
  882 
  883         if ((flags & MAP_FIXED) == 0) {
  884                 *addr = round_page(*addr);
  885         } else {
  886                 if (*addr & PAGE_MASK)
  887                         return EINVAL;
  888                 uvmflag |= UVM_FLAG_FIXED | UVM_FLAG_UNMAP;
  889         }
  890 
  891         /*
  892          * Try to see if any requested alignment can even be attemped.
  893          * Make sure we can express the alignment (asking for a >= 4GB
  894          * alignment on an ILP32 architecure make no sense) and the
  895          * alignment is at least for a page sized quanitiy.  If the
  896          * request was for a fixed mapping, make sure supplied address
  897          * adheres to the request alignment.
  898          */
  899         align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
  900         if (align) {
  901                 if (align >= sizeof(vaddr_t) * NBBY)
  902                         return EINVAL;
  903                 align = 1UL << align;
  904                 if (align < PAGE_SIZE)
  905                         return EINVAL;
  906                 if (align >= vm_map_max(map))
  907                         return ENOMEM;
  908                 if (flags & MAP_FIXED) {
  909                         if ((*addr & (align-1)) != 0)
  910                                 return EINVAL;
  911                         align = 0;
  912                 }
  913         }
  914 
  915         /*
  916          * check resource limits
  917          */
  918 
  919         if (!VM_MAP_IS_KERNEL(map) &&
  920             (((rlim_t)curproc->p_vmspace->vm_map.size + (rlim_t)size) >
  921             curproc->p_rlimit[RLIMIT_AS].rlim_cur))
  922                 return ENOMEM;
  923 
  924         /*
  925          * handle anon vs. non-anon mappings.   for non-anon mappings attach
  926          * to underlying vm object.
  927          */
  928 
  929         if (flags & MAP_ANON) {
  930                 KASSERT(uobj == NULL);
  931                 foff = UVM_UNKNOWN_OFFSET;
  932                 if ((flags & MAP_SHARED) == 0)
  933                         /* XXX: defer amap create */
  934                         uvmflag |= UVM_FLAG_COPYONW;
  935                 else
  936                         /* shared: create amap now */
  937                         uvmflag |= UVM_FLAG_OVERLAY;
  938 
  939         } else {
  940                 KASSERT(uobj != NULL);
  941                 if ((flags & MAP_SHARED) == 0) {
  942                         uvmflag |= UVM_FLAG_COPYONW;
  943                 }
  944         }
  945 
  946         uvmflag = UVM_MAPFLAG(prot, maxprot,
  947             (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY, advice,
  948             uvmflag);
  949         error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
  950         if (error) {
  951                 if (uobj)
  952                         uobj->pgops->pgo_detach(uobj);
  953                 return error;
  954         }
  955 
  956         /*
  957          * POSIX 1003.1b -- if our address space was configured
  958          * to lock all future mappings, wire the one we just made.
  959          *
  960          * Also handle the MAP_WIRED flag here.
  961          */
  962 
  963         if (prot == VM_PROT_NONE) {
  964 
  965                 /*
  966                  * No more work to do in this case.
  967                  */
  968 
  969                 return 0;
  970         }
  971         if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
  972                 vm_map_lock(map);
  973                 if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
  974                     (locklimit != 0 &&
  975                      size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
  976                      locklimit)) {
  977                         vm_map_unlock(map);
  978                         uvm_unmap(map, *addr, *addr + size);
  979                         return ENOMEM;
  980                 }
  981 
  982                 /*
  983                  * uvm_map_pageable() always returns the map unlocked.
  984                  */
  985 
  986                 error = uvm_map_pageable(map, *addr, *addr + size,
  987                     false, UVM_LK_ENTER);
  988                 if (error) {
  989                         uvm_unmap(map, *addr, *addr + size);
  990                         return error;
  991                 }
  992                 return 0;
  993         }
  994         return 0;
  995 }
  996 
  997 vaddr_t
  998 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz, int topdown)
  999 {
 1000 
 1001         if (topdown)
 1002                 return VM_DEFAULT_ADDRESS_TOPDOWN(base, sz);
 1003         else
 1004                 return VM_DEFAULT_ADDRESS_BOTTOMUP(base, sz);
 1005 }
 1006 
 1007 int
 1008 uvm_mmap_dev(struct proc *p, void **addrp, size_t len, dev_t dev,
 1009     off_t off)
 1010 {
 1011         struct uvm_object *uobj;
 1012         int error, flags, prot;
 1013 
 1014         KASSERT(len > 0);
 1015 
 1016         flags = MAP_SHARED;
 1017         prot = VM_PROT_READ | VM_PROT_WRITE;
 1018         if (*addrp)
 1019                 flags |= MAP_FIXED;
 1020         else
 1021                 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
 1022                     (vaddr_t)p->p_vmspace->vm_daddr, len,
 1023                     p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
 1024 
 1025         uobj = udv_attach(dev, prot, off, len);
 1026         if (uobj == NULL)
 1027                 return EINVAL;
 1028 
 1029         error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
 1030             (vsize_t)len, prot, prot, flags, UVM_ADV_RANDOM, uobj, off,
 1031             p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
 1032         return error;
 1033 }
 1034 
 1035 int
 1036 uvm_mmap_anon(struct proc *p, void **addrp, size_t len)
 1037 {
 1038         int error, flags, prot;
 1039 
 1040         flags = MAP_PRIVATE | MAP_ANON;
 1041         prot = VM_PROT_READ | VM_PROT_WRITE;
 1042         if (*addrp)
 1043                 flags |= MAP_FIXED;
 1044         else
 1045                 *addrp = (void *)p->p_emul->e_vm_default_addr(p,
 1046                     (vaddr_t)p->p_vmspace->vm_daddr, len,
 1047                     p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN);
 1048 
 1049         error = uvm_mmap(&p->p_vmspace->vm_map, (vaddr_t *)addrp,
 1050             (vsize_t)len, prot, prot, flags, UVM_ADV_NORMAL, NULL, 0,
 1051             p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
 1052         return error;
 1053 }

Cache object: 1e9e8e68b74ea515a8a66af482956f8c


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.