The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_mmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_mmap.c,v 1.82 2004/03/24 07:47:33 junyoung Exp $   */
    2 
    3 /*
    4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    5  * Copyright (c) 1991, 1993 The Regents of the University of California.
    6  * Copyright (c) 1988 University of Utah.
    7  *
    8  * All rights reserved.
    9  *
   10  * This code is derived from software contributed to Berkeley by
   11  * the Systems Programming Group of the University of Utah Computer
   12  * Science Department.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. All advertising materials mentioning features or use of this software
   23  *    must display the following acknowledgement:
   24  *      This product includes software developed by the Charles D. Cranor,
   25  *      Washington University, University of California, Berkeley and
   26  *      its contributors.
   27  * 4. Neither the name of the University nor the names of its contributors
   28  *    may be used to endorse or promote products derived from this software
   29  *    without specific prior written permission.
   30  *
   31  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   32  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   33  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   34  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   35  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   36  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   37  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   38  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   39  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   40  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   41  * SUCH DAMAGE.
   42  *
   43  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
   44  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
   45  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
   46  */
   47 
   48 /*
   49  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
   50  * function.
   51  */
   52 
   53 #include <sys/cdefs.h>
   54 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.82 2004/03/24 07:47:33 junyoung Exp $");
   55 
   56 #include "opt_compat_netbsd.h"
   57 
   58 #include <sys/param.h>
   59 #include <sys/systm.h>
   60 #include <sys/file.h>
   61 #include <sys/filedesc.h>
   62 #include <sys/resourcevar.h>
   63 #include <sys/mman.h>
   64 #include <sys/mount.h>
   65 #include <sys/proc.h>
   66 #include <sys/malloc.h>
   67 #include <sys/vnode.h>
   68 #include <sys/conf.h>
   69 #include <sys/stat.h>
   70 
   71 #include <miscfs/specfs/specdev.h>
   72 
   73 #include <sys/sa.h>
   74 #include <sys/syscallargs.h>
   75 
   76 #include <uvm/uvm.h>
   77 #include <uvm/uvm_device.h>
   78 
   79 #ifndef COMPAT_ZERODEV
   80 #define COMPAT_ZERODEV(dev)     (0)
   81 #endif
   82 
   83 /*
   84  * unimplemented VM system calls:
   85  */
   86 
   87 /*
   88  * sys_sbrk: sbrk system call.
   89  */
   90 
   91 /* ARGSUSED */
   92 int
   93 sys_sbrk(l, v, retval)
   94         struct lwp *l;
   95         void *v;
   96         register_t *retval;
   97 {
   98 #if 0
   99         struct sys_sbrk_args /* {
  100                 syscallarg(intptr_t) incr;
  101         } */ *uap = v;
  102 #endif
  103 
  104         return (ENOSYS);
  105 }
  106 
  107 /*
  108  * sys_sstk: sstk system call.
  109  */
  110 
  111 /* ARGSUSED */
  112 int
  113 sys_sstk(l, v, retval)
  114         struct lwp *l;
  115         void *v;
  116         register_t *retval;
  117 {
  118 #if 0
  119         struct sys_sstk_args /* {
  120                 syscallarg(int) incr;
  121         } */ *uap = v;
  122 #endif
  123 
  124         return (ENOSYS);
  125 }
  126 
  127 /*
  128  * sys_mincore: determine if pages are in core or not.
  129  */
  130 
  131 /* ARGSUSED */
  132 int
  133 sys_mincore(l, v, retval)
  134         struct lwp *l;
  135         void *v;
  136         register_t *retval;
  137 {
  138         struct sys_mincore_args /* {
  139                 syscallarg(void *) addr;
  140                 syscallarg(size_t) len;
  141                 syscallarg(char *) vec;
  142         } */ *uap = v;
  143         struct proc *p = l->l_proc;
  144         struct vm_page *pg;
  145         char *vec, pgi;
  146         struct uvm_object *uobj;
  147         struct vm_amap *amap;
  148         struct vm_anon *anon;
  149         struct vm_map_entry *entry;
  150         vaddr_t start, end, lim;
  151         struct vm_map *map;
  152         vsize_t len;
  153         int error = 0, npgs;
  154 
  155         map = &p->p_vmspace->vm_map;
  156 
  157         start = (vaddr_t)SCARG(uap, addr);
  158         len = SCARG(uap, len);
  159         vec = SCARG(uap, vec);
  160 
  161         if (start & PAGE_MASK)
  162                 return (EINVAL);
  163         len = round_page(len);
  164         end = start + len;
  165         if (end <= start)
  166                 return (EINVAL);
  167 
  168         /*
  169          * Lock down vec, so our returned status isn't outdated by
  170          * storing the status byte for a page.
  171          */
  172 
  173         npgs = len >> PAGE_SHIFT;
  174         error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE);
  175         if (error) {
  176                 return error;
  177         }
  178         vm_map_lock_read(map);
  179 
  180         if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
  181                 error = ENOMEM;
  182                 goto out;
  183         }
  184 
  185         for (/* nothing */;
  186              entry != &map->header && entry->start < end;
  187              entry = entry->next) {
  188                 KASSERT(!UVM_ET_ISSUBMAP(entry));
  189                 KASSERT(start >= entry->start);
  190 
  191                 /* Make sure there are no holes. */
  192                 if (entry->end < end &&
  193                      (entry->next == &map->header ||
  194                       entry->next->start > entry->end)) {
  195                         error = ENOMEM;
  196                         goto out;
  197                 }
  198 
  199                 lim = end < entry->end ? end : entry->end;
  200 
  201                 /*
  202                  * Special case for objects with no "real" pages.  Those
  203                  * are always considered resident (mapped devices).
  204                  */
  205 
  206                 if (UVM_ET_ISOBJ(entry)) {
  207                         KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
  208                         if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
  209                                 for (/* nothing */; start < lim;
  210                                      start += PAGE_SIZE, vec++)
  211                                         subyte(vec, 1);
  212                                 continue;
  213                         }
  214                 }
  215 
  216                 amap = entry->aref.ar_amap;     /* top layer */
  217                 uobj = entry->object.uvm_obj;   /* bottom layer */
  218 
  219                 if (amap != NULL)
  220                         amap_lock(amap);
  221                 if (uobj != NULL)
  222                         simple_lock(&uobj->vmobjlock);
  223 
  224                 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
  225                         pgi = 0;
  226                         if (amap != NULL) {
  227                                 /* Check the top layer first. */
  228                                 anon = amap_lookup(&entry->aref,
  229                                     start - entry->start);
  230                                 /* Don't need to lock anon here. */
  231                                 if (anon != NULL && anon->u.an_page != NULL) {
  232 
  233                                         /*
  234                                          * Anon has the page for this entry
  235                                          * offset.
  236                                          */
  237 
  238                                         pgi = 1;
  239                                 }
  240                         }
  241                         if (uobj != NULL && pgi == 0) {
  242                                 /* Check the bottom layer. */
  243                                 pg = uvm_pagelookup(uobj,
  244                                     entry->offset + (start - entry->start));
  245                                 if (pg != NULL) {
  246 
  247                                         /*
  248                                          * Object has the page for this entry
  249                                          * offset.
  250                                          */
  251 
  252                                         pgi = 1;
  253                                 }
  254                         }
  255                         (void) subyte(vec, pgi);
  256                 }
  257                 if (uobj != NULL)
  258                         simple_unlock(&uobj->vmobjlock);
  259                 if (amap != NULL)
  260                         amap_unlock(amap);
  261         }
  262 
  263  out:
  264         vm_map_unlock_read(map);
  265         uvm_vsunlock(p, SCARG(uap, vec), npgs);
  266         return (error);
  267 }
  268 
  269 /*
  270  * sys_mmap: mmap system call.
  271  *
  272  * => file offset and address may not be page aligned
  273  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
  274  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
  275  *      and the return value is adjusted up by the page offset.
  276  */
  277 
  278 int
  279 sys_mmap(l, v, retval)
  280         struct lwp *l;
  281         void *v;
  282         register_t *retval;
  283 {
  284         struct sys_mmap_args /* {
  285                 syscallarg(caddr_t) addr;
  286                 syscallarg(size_t) len;
  287                 syscallarg(int) prot;
  288                 syscallarg(int) flags;
  289                 syscallarg(int) fd;
  290                 syscallarg(long) pad;
  291                 syscallarg(off_t) pos;
  292         } */ *uap = v;
  293         struct proc *p = l->l_proc;
  294         vaddr_t addr;
  295         struct vattr va;
  296         off_t pos;
  297         vsize_t size, pageoff;
  298         vm_prot_t prot, maxprot;
  299         int flags, fd;
  300         vaddr_t vm_min_address = VM_MIN_ADDRESS;
  301         struct filedesc *fdp = p->p_fd;
  302         struct file *fp;
  303         struct vnode *vp;
  304         void *handle;
  305         int error;
  306 
  307         /*
  308          * first, extract syscall args from the uap.
  309          */
  310 
  311         addr = (vaddr_t)SCARG(uap, addr);
  312         size = (vsize_t)SCARG(uap, len);
  313         prot = SCARG(uap, prot) & VM_PROT_ALL;
  314         flags = SCARG(uap, flags);
  315         fd = SCARG(uap, fd);
  316         pos = SCARG(uap, pos);
  317 
  318         /*
  319          * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
  320          * validate the flags.
  321          */
  322         if (flags & MAP_COPY)
  323                 flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
  324         if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
  325                 return (EINVAL);
  326 
  327         /*
  328          * align file position and save offset.  adjust size.
  329          */
  330 
  331         pageoff = (pos & PAGE_MASK);
  332         pos  -= pageoff;
  333         size += pageoff;                        /* add offset */
  334         size = (vsize_t)round_page(size);       /* round up */
  335         if ((ssize_t) size < 0)
  336                 return (EINVAL);                        /* don't allow wrap */
  337 
  338 #ifndef pmap_wired_count
  339         /*
  340          * if we're going to wire the mapping, restrict it to superuser.
  341          */
  342 
  343         if ((flags & MAP_WIRED) != 0 &&
  344             (error = suser(p->p_ucred, &p->p_acflag)) != 0)
  345                 return (error);
  346 #endif
  347 
  348         /*
  349          * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
  350          */
  351 
  352         if (flags & MAP_FIXED) {
  353 
  354                 /* ensure address and file offset are aligned properly */
  355                 addr -= pageoff;
  356                 if (addr & PAGE_MASK)
  357                         return (EINVAL);
  358 
  359                 if (VM_MAXUSER_ADDRESS > 0 &&
  360                     (addr + size) > VM_MAXUSER_ADDRESS)
  361                         return (EFBIG);
  362                 if (vm_min_address > 0 && addr < vm_min_address)
  363                         return (EINVAL);
  364                 if (addr > addr + size)
  365                         return (EOVERFLOW);             /* no wrapping! */
  366 
  367         } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
  368 
  369                 /*
  370                  * not fixed: make sure we skip over the largest
  371                  * possible heap for non-topdown mapping arrangements.
  372                  * we will refine our guess later (e.g. to account for
  373                  * VAC, etc)
  374                  */
  375 
  376                 if (addr == 0 ||
  377                     !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
  378                         addr = MAX(addr,
  379                             VM_DEFAULT_ADDRESS(p->p_vmspace->vm_daddr, size));
  380                 else
  381                         addr = MIN(addr,
  382                             VM_DEFAULT_ADDRESS(p->p_vmspace->vm_daddr, size));
  383         }
  384 
  385         /*
  386          * check for file mappings (i.e. not anonymous) and verify file.
  387          */
  388 
  389         if ((flags & MAP_ANON) == 0) {
  390 
  391                 if ((fp = fd_getfile(fdp, fd)) == NULL)
  392                         return (EBADF);
  393 
  394                 simple_unlock(&fp->f_slock);
  395 
  396                 if (fp->f_type != DTYPE_VNODE)
  397                         return (ENODEV);                /* only mmap vnodes! */
  398                 vp = (struct vnode *)fp->f_data;        /* convert to vnode */
  399 
  400                 if (vp->v_type != VREG && vp->v_type != VCHR &&
  401                     vp->v_type != VBLK)
  402                         return (ENODEV);  /* only REG/CHR/BLK support mmap */
  403 
  404                 if (vp->v_type != VCHR && pos < 0)
  405                         return (EINVAL);
  406 
  407                 if (vp->v_type != VCHR && (pos + size) < pos)
  408                         return (EOVERFLOW);             /* no offset wrapping */
  409 
  410                 /* special case: catch SunOS style /dev/zero */
  411                 if (vp->v_type == VCHR
  412                     && (vp->v_rdev == zerodev || COMPAT_ZERODEV(vp->v_rdev))) {
  413                         flags |= MAP_ANON;
  414                         goto is_anon;
  415                 }
  416 
  417                 /*
  418                  * Old programs may not select a specific sharing type, so
  419                  * default to an appropriate one.
  420                  *
  421                  * XXX: how does MAP_ANON fit in the picture?
  422                  */
  423                 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
  424 #if defined(DEBUG)
  425                         printf("WARNING: defaulted mmap() share type to "
  426                            "%s (pid %d command %s)\n", vp->v_type == VCHR ?
  427                            "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
  428                             p->p_comm);
  429 #endif
  430                         if (vp->v_type == VCHR)
  431                                 flags |= MAP_SHARED;    /* for a device */
  432                         else
  433                                 flags |= MAP_PRIVATE;   /* for a file */
  434                 }
  435 
  436                 /*
  437                  * MAP_PRIVATE device mappings don't make sense (and aren't
  438                  * supported anyway).  However, some programs rely on this,
  439                  * so just change it to MAP_SHARED.
  440                  */
  441                 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
  442                         flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
  443                 }
  444 
  445                 /*
  446                  * now check protection
  447                  */
  448 
  449                 maxprot = VM_PROT_EXECUTE;
  450 
  451                 /* check read access */
  452                 if (fp->f_flag & FREAD)
  453                         maxprot |= VM_PROT_READ;
  454                 else if (prot & PROT_READ)
  455                         return (EACCES);
  456 
  457                 /* check write access, shared case first */
  458                 if (flags & MAP_SHARED) {
  459                         /*
  460                          * if the file is writable, only add PROT_WRITE to
  461                          * maxprot if the file is not immutable, append-only.
  462                          * otherwise, if we have asked for PROT_WRITE, return
  463                          * EPERM.
  464                          */
  465                         if (fp->f_flag & FWRITE) {
  466                                 if ((error =
  467                                     VOP_GETATTR(vp, &va, p->p_ucred, p)))
  468                                         return (error);
  469                                 if ((va.va_flags & (IMMUTABLE|APPEND)) == 0)
  470                                         maxprot |= VM_PROT_WRITE;
  471                                 else if (prot & PROT_WRITE)
  472                                         return (EPERM);
  473                         }
  474                         else if (prot & PROT_WRITE)
  475                                 return (EACCES);
  476                 } else {
  477                         /* MAP_PRIVATE mappings can always write to */
  478                         maxprot |= VM_PROT_WRITE;
  479                 }
  480                 handle = vp;
  481 
  482         } else {                /* MAP_ANON case */
  483                 /*
  484                  * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
  485                  */
  486                 if (fd != -1)
  487                         return (EINVAL);
  488 
  489  is_anon:               /* label for SunOS style /dev/zero */
  490                 handle = NULL;
  491                 maxprot = VM_PROT_ALL;
  492                 pos = 0;
  493         }
  494 
  495         /*
  496          * XXX (in)sanity check.  We don't do proper datasize checking
  497          * XXX for anonymous (or private writable) mmap().  However,
  498          * XXX know that if we're trying to allocate more than the amount
  499          * XXX remaining under our current data size limit, _that_ should
  500          * XXX be disallowed.
  501          */
  502         if ((flags & MAP_ANON) != 0 ||
  503             ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
  504                 if (size >
  505                     (p->p_rlimit[RLIMIT_DATA].rlim_cur -
  506                      ctob(p->p_vmspace->vm_dsize))) {
  507                         return (ENOMEM);
  508                 }
  509         }
  510 
  511         /*
  512          * now let kernel internal function uvm_mmap do the work.
  513          */
  514 
  515         error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
  516             flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
  517 
  518         if (error == 0)
  519                 /* remember to add offset */
  520                 *retval = (register_t)(addr + pageoff);
  521 
  522         return (error);
  523 }
  524 
  525 /*
  526  * sys___msync13: the msync system call (a front-end for flush)
  527  */
  528 
  529 int
  530 sys___msync13(l, v, retval)
  531         struct lwp *l;
  532         void *v;
  533         register_t *retval;
  534 {
  535         struct sys___msync13_args /* {
  536                 syscallarg(caddr_t) addr;
  537                 syscallarg(size_t) len;
  538                 syscallarg(int) flags;
  539         } */ *uap = v;
  540         struct proc *p = l->l_proc;
  541         vaddr_t addr;
  542         vsize_t size, pageoff;
  543         struct vm_map *map;
  544         int error, rv, flags, uvmflags;
  545 
  546         /*
  547          * extract syscall args from the uap
  548          */
  549 
  550         addr = (vaddr_t)SCARG(uap, addr);
  551         size = (vsize_t)SCARG(uap, len);
  552         flags = SCARG(uap, flags);
  553 
  554         /* sanity check flags */
  555         if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
  556             (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
  557             (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
  558                 return (EINVAL);
  559         if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
  560                 flags |= MS_SYNC;
  561 
  562         /*
  563          * align the address to a page boundary and adjust the size accordingly.
  564          */
  565 
  566         pageoff = (addr & PAGE_MASK);
  567         addr -= pageoff;
  568         size += pageoff;
  569         size = (vsize_t)round_page(size);
  570 
  571         /* disallow wrap-around. */
  572         if (addr + size < addr)
  573                 return (EINVAL);
  574 
  575         /*
  576          * get map
  577          */
  578 
  579         map = &p->p_vmspace->vm_map;
  580 
  581         /*
  582          * XXXCDC: do we really need this semantic?
  583          *
  584          * XXX Gak!  If size is zero we are supposed to sync "all modified
  585          * pages with the region containing addr".  Unfortunately, we
  586          * don't really keep track of individual mmaps so we approximate
  587          * by flushing the range of the map entry containing addr.
  588          * This can be incorrect if the region splits or is coalesced
  589          * with a neighbor.
  590          */
  591 
  592         if (size == 0) {
  593                 struct vm_map_entry *entry;
  594 
  595                 vm_map_lock_read(map);
  596                 rv = uvm_map_lookup_entry(map, addr, &entry);
  597                 if (rv == TRUE) {
  598                         addr = entry->start;
  599                         size = entry->end - entry->start;
  600                 }
  601                 vm_map_unlock_read(map);
  602                 if (rv == FALSE)
  603                         return (EINVAL);
  604         }
  605 
  606         /*
  607          * translate MS_ flags into PGO_ flags
  608          */
  609 
  610         uvmflags = PGO_CLEANIT;
  611         if (flags & MS_INVALIDATE)
  612                 uvmflags |= PGO_FREE;
  613         if (flags & MS_SYNC)
  614                 uvmflags |= PGO_SYNCIO;
  615         else
  616                 uvmflags |= PGO_SYNCIO;  /* XXXCDC: force sync for now! */
  617 
  618         error = uvm_map_clean(map, addr, addr+size, uvmflags);
  619         return error;
  620 }
  621 
  622 /*
  623  * sys_munmap: unmap a users memory
  624  */
  625 
  626 int
  627 sys_munmap(l, v, retval)
  628         struct lwp *l;
  629         void *v;
  630         register_t *retval;
  631 {
  632         struct sys_munmap_args /* {
  633                 syscallarg(caddr_t) addr;
  634                 syscallarg(size_t) len;
  635         } */ *uap = v;
  636         struct proc *p = l->l_proc;
  637         vaddr_t addr;
  638         vsize_t size, pageoff;
  639         struct vm_map *map;
  640         vaddr_t vm_min_address = VM_MIN_ADDRESS;
  641         struct vm_map_entry *dead_entries;
  642 
  643         /*
  644          * get syscall args.
  645          */
  646 
  647         addr = (vaddr_t)SCARG(uap, addr);
  648         size = (vsize_t)SCARG(uap, len);
  649 
  650         /*
  651          * align the address to a page boundary and adjust the size accordingly.
  652          */
  653 
  654         pageoff = (addr & PAGE_MASK);
  655         addr -= pageoff;
  656         size += pageoff;
  657         size = (vsize_t)round_page(size);
  658 
  659         if ((int)size < 0)
  660                 return (EINVAL);
  661         if (size == 0)
  662                 return (0);
  663 
  664         /*
  665          * Check for illegal addresses.  Watch out for address wrap...
  666          * Note that VM_*_ADDRESS are not constants due to casts (argh).
  667          */
  668         if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
  669                 return (EINVAL);
  670         if (vm_min_address > 0 && addr < vm_min_address)
  671                 return (EINVAL);
  672         if (addr > addr + size)
  673                 return (EINVAL);
  674         map = &p->p_vmspace->vm_map;
  675 
  676         /*
  677          * interesting system call semantic: make sure entire range is
  678          * allocated before allowing an unmap.
  679          */
  680 
  681         vm_map_lock(map);
  682 #if 0
  683         if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
  684                 vm_map_unlock(map);
  685                 return (EINVAL);
  686         }
  687 #endif
  688         uvm_unmap_remove(map, addr, addr + size, &dead_entries);
  689         vm_map_unlock(map);
  690         if (dead_entries != NULL)
  691                 uvm_unmap_detach(dead_entries, 0);
  692         return (0);
  693 }
  694 
  695 /*
  696  * sys_mprotect: the mprotect system call
  697  */
  698 
  699 int
  700 sys_mprotect(l, v, retval)
  701         struct lwp *l;
  702         void *v;
  703         register_t *retval;
  704 {
  705         struct sys_mprotect_args /* {
  706                 syscallarg(caddr_t) addr;
  707                 syscallarg(size_t) len;
  708                 syscallarg(int) prot;
  709         } */ *uap = v;
  710         struct proc *p = l->l_proc;
  711         vaddr_t addr;
  712         vsize_t size, pageoff;
  713         vm_prot_t prot;
  714         int error;
  715 
  716         /*
  717          * extract syscall args from uap
  718          */
  719 
  720         addr = (vaddr_t)SCARG(uap, addr);
  721         size = (vsize_t)SCARG(uap, len);
  722         prot = SCARG(uap, prot) & VM_PROT_ALL;
  723 
  724         /*
  725          * align the address to a page boundary and adjust the size accordingly.
  726          */
  727 
  728         pageoff = (addr & PAGE_MASK);
  729         addr -= pageoff;
  730         size += pageoff;
  731         size = round_page(size);
  732 
  733         error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
  734                                 FALSE);
  735         return error;
  736 }
  737 
  738 /*
  739  * sys_minherit: the minherit system call
  740  */
  741 
  742 int
  743 sys_minherit(l, v, retval)
  744         struct lwp *l;
  745         void *v;
  746         register_t *retval;
  747 {
  748         struct sys_minherit_args /* {
  749                 syscallarg(caddr_t) addr;
  750                 syscallarg(int) len;
  751                 syscallarg(int) inherit;
  752         } */ *uap = v;
  753         struct proc *p = l->l_proc;
  754         vaddr_t addr;
  755         vsize_t size, pageoff;
  756         vm_inherit_t inherit;
  757         int error;
  758 
  759         addr = (vaddr_t)SCARG(uap, addr);
  760         size = (vsize_t)SCARG(uap, len);
  761         inherit = SCARG(uap, inherit);
  762 
  763         /*
  764          * align the address to a page boundary and adjust the size accordingly.
  765          */
  766 
  767         pageoff = (addr & PAGE_MASK);
  768         addr -= pageoff;
  769         size += pageoff;
  770         size = (vsize_t)round_page(size);
  771 
  772         if ((int)size < 0)
  773                 return (EINVAL);
  774         error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
  775                                 inherit);
  776         return error;
  777 }
  778 
  779 /*
  780  * sys_madvise: give advice about memory usage.
  781  */
  782 
  783 /* ARGSUSED */
  784 int
  785 sys_madvise(l, v, retval)
  786         struct lwp *l;
  787         void *v;
  788         register_t *retval;
  789 {
  790         struct sys_madvise_args /* {
  791                 syscallarg(caddr_t) addr;
  792                 syscallarg(size_t) len;
  793                 syscallarg(int) behav;
  794         } */ *uap = v;
  795         struct proc *p = l->l_proc;
  796         vaddr_t addr;
  797         vsize_t size, pageoff;
  798         int advice, error;
  799 
  800         addr = (vaddr_t)SCARG(uap, addr);
  801         size = (vsize_t)SCARG(uap, len);
  802         advice = SCARG(uap, behav);
  803 
  804         /*
  805          * align the address to a page boundary, and adjust the size accordingly
  806          */
  807 
  808         pageoff = (addr & PAGE_MASK);
  809         addr -= pageoff;
  810         size += pageoff;
  811         size = (vsize_t)round_page(size);
  812 
  813         if ((ssize_t)size <= 0)
  814                 return (EINVAL);
  815 
  816         switch (advice) {
  817         case MADV_NORMAL:
  818         case MADV_RANDOM:
  819         case MADV_SEQUENTIAL:
  820                 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
  821                     advice);
  822                 break;
  823 
  824         case MADV_WILLNEED:
  825 
  826                 /*
  827                  * Activate all these pages, pre-faulting them in if
  828                  * necessary.
  829                  */
  830                 /*
  831                  * XXX IMPLEMENT ME.
  832                  * Should invent a "weak" mode for uvm_fault()
  833                  * which would only do the PGO_LOCKED pgo_get().
  834                  */
  835 
  836                 return (0);
  837 
  838         case MADV_DONTNEED:
  839 
  840                 /*
  841                  * Deactivate all these pages.  We don't need them
  842                  * any more.  We don't, however, toss the data in
  843                  * the pages.
  844                  */
  845 
  846                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
  847                     PGO_DEACTIVATE);
  848                 break;
  849 
  850         case MADV_FREE:
  851 
  852                 /*
  853                  * These pages contain no valid data, and may be
  854                  * garbage-collected.  Toss all resources, including
  855                  * any swap space in use.
  856                  */
  857 
  858                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
  859                     PGO_FREE);
  860                 break;
  861 
  862         case MADV_SPACEAVAIL:
  863 
  864                 /*
  865                  * XXXMRG What is this?  I think it's:
  866                  *
  867                  *      Ensure that we have allocated backing-store
  868                  *      for these pages.
  869                  *
  870                  * This is going to require changes to the page daemon,
  871                  * as it will free swap space allocated to pages in core.
  872                  * There's also what to do for device/file/anonymous memory.
  873                  */
  874 
  875                 return (EINVAL);
  876 
  877         default:
  878                 return (EINVAL);
  879         }
  880 
  881         return error;
  882 }
  883 
  884 /*
  885  * sys_mlock: memory lock
  886  */
  887 
  888 int
  889 sys_mlock(l, v, retval)
  890         struct lwp *l;
  891         void *v;
  892         register_t *retval;
  893 {
  894         struct sys_mlock_args /* {
  895                 syscallarg(const void *) addr;
  896                 syscallarg(size_t) len;
  897         } */ *uap = v;
  898         struct proc *p = l->l_proc;
  899         vaddr_t addr;
  900         vsize_t size, pageoff;
  901         int error;
  902 
  903         /*
  904          * extract syscall args from uap
  905          */
  906 
  907         addr = (vaddr_t)SCARG(uap, addr);
  908         size = (vsize_t)SCARG(uap, len);
  909 
  910         /*
  911          * align the address to a page boundary and adjust the size accordingly
  912          */
  913 
  914         pageoff = (addr & PAGE_MASK);
  915         addr -= pageoff;
  916         size += pageoff;
  917         size = (vsize_t)round_page(size);
  918 
  919         /* disallow wrap-around. */
  920         if (addr + size < addr)
  921                 return (EINVAL);
  922 
  923         if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
  924                 return (EAGAIN);
  925 
  926 #ifdef pmap_wired_count
  927         if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
  928                         p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
  929                 return (EAGAIN);
  930 #else
  931         if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
  932                 return (error);
  933 #endif
  934 
  935         error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
  936             0);
  937         return error;
  938 }
  939 
  940 /*
  941  * sys_munlock: unlock wired pages
  942  */
  943 
  944 int
  945 sys_munlock(l, v, retval)
  946         struct lwp *l;
  947         void *v;
  948         register_t *retval;
  949 {
  950         struct sys_munlock_args /* {
  951                 syscallarg(const void *) addr;
  952                 syscallarg(size_t) len;
  953         } */ *uap = v;
  954         struct proc *p = l->l_proc;
  955         vaddr_t addr;
  956         vsize_t size, pageoff;
  957         int error;
  958 
  959         /*
  960          * extract syscall args from uap
  961          */
  962 
  963         addr = (vaddr_t)SCARG(uap, addr);
  964         size = (vsize_t)SCARG(uap, len);
  965 
  966         /*
  967          * align the address to a page boundary, and adjust the size accordingly
  968          */
  969 
  970         pageoff = (addr & PAGE_MASK);
  971         addr -= pageoff;
  972         size += pageoff;
  973         size = (vsize_t)round_page(size);
  974 
  975         /* disallow wrap-around. */
  976         if (addr + size < addr)
  977                 return (EINVAL);
  978 
  979 #ifndef pmap_wired_count
  980         if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
  981                 return (error);
  982 #endif
  983 
  984         error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
  985             0);
  986         return error;
  987 }
  988 
  989 /*
  990  * sys_mlockall: lock all pages mapped into an address space.
  991  */
  992 
  993 int
  994 sys_mlockall(l, v, retval)
  995         struct lwp *l;
  996         void *v;
  997         register_t *retval;
  998 {
  999         struct sys_mlockall_args /* {
 1000                 syscallarg(int) flags;
 1001         } */ *uap = v;
 1002         struct proc *p = l->l_proc;
 1003         int error, flags;
 1004 
 1005         flags = SCARG(uap, flags);
 1006 
 1007         if (flags == 0 ||
 1008             (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
 1009                 return (EINVAL);
 1010 
 1011 #ifndef pmap_wired_count
 1012         if ((error = suser(p->p_ucred, &p->p_acflag)) != 0)
 1013                 return (error);
 1014 #endif
 1015 
 1016         error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
 1017             p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
 1018         return (error);
 1019 }
 1020 
 1021 /*
 1022  * sys_munlockall: unlock all pages mapped into an address space.
 1023  */
 1024 
 1025 int
 1026 sys_munlockall(l, v, retval)
 1027         struct lwp *l;
 1028         void *v;
 1029         register_t *retval;
 1030 {
 1031         struct proc *p = l->l_proc;
 1032 
 1033         (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
 1034         return (0);
 1035 }
 1036 
 1037 /*
 1038  * uvm_mmap: internal version of mmap
 1039  *
 1040  * - used by sys_mmap and various framebuffers
 1041  * - handle is a vnode pointer or NULL for MAP_ANON
 1042  * - caller must page-align the file offset
 1043  */
 1044 
 1045 int
 1046 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
 1047         struct vm_map *map;
 1048         vaddr_t *addr;
 1049         vsize_t size;
 1050         vm_prot_t prot, maxprot;
 1051         int flags;
 1052         void *handle;
 1053         voff_t foff;
 1054         vsize_t locklimit;
 1055 {
 1056         struct uvm_object *uobj;
 1057         struct vnode *vp;
 1058         vaddr_t align = 0;
 1059         int error;
 1060         int advice = UVM_ADV_NORMAL;
 1061         uvm_flag_t uvmflag = 0;
 1062 
 1063         /*
 1064          * check params
 1065          */
 1066 
 1067         if (size == 0)
 1068                 return(0);
 1069         if (foff & PAGE_MASK)
 1070                 return(EINVAL);
 1071         if ((prot & maxprot) != prot)
 1072                 return(EINVAL);
 1073 
 1074         /*
 1075          * for non-fixed mappings, round off the suggested address.
 1076          * for fixed mappings, check alignment and zap old mappings.
 1077          */
 1078 
 1079         if ((flags & MAP_FIXED) == 0) {
 1080                 *addr = round_page(*addr);
 1081         } else {
 1082                 if (*addr & PAGE_MASK)
 1083                         return(EINVAL);
 1084                 uvmflag |= UVM_FLAG_FIXED;
 1085                 (void) uvm_unmap(map, *addr, *addr + size);
 1086         }
 1087 
 1088         /*
 1089          * Try to see if any requested alignment can even be attemped.
 1090          * Make sure we can express the alignment (asking for a >= 4GB
 1091          * alignment on an ILP32 architecure make no sense) and the
 1092          * alignment is at least for a page sized quanitiy.  If the
 1093          * request was for a fixed mapping, make sure supplied address
 1094          * adheres to the request alignment.
 1095          */
 1096         align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
 1097         if (align) {
 1098                 if (align >= sizeof(vaddr_t) * NBBY)
 1099                         return(EINVAL);
 1100                 align = 1L << align;
 1101                 if (align < PAGE_SIZE)
 1102                         return(EINVAL);
 1103                 if (align >= map->max_offset)
 1104                         return(ENOMEM);
 1105                 if (flags & MAP_FIXED) {
 1106                         if ((*addr & (align-1)) != 0)
 1107                                 return(EINVAL);
 1108                         align = 0;
 1109                 }
 1110         }
 1111 
 1112         /*
 1113          * handle anon vs. non-anon mappings.   for non-anon mappings attach
 1114          * to underlying vm object.
 1115          */
 1116 
 1117         if (flags & MAP_ANON) {
 1118                 foff = UVM_UNKNOWN_OFFSET;
 1119                 uobj = NULL;
 1120                 if ((flags & MAP_SHARED) == 0)
 1121                         /* XXX: defer amap create */
 1122                         uvmflag |= UVM_FLAG_COPYONW;
 1123                 else
 1124                         /* shared: create amap now */
 1125                         uvmflag |= UVM_FLAG_OVERLAY;
 1126 
 1127         } else {
 1128                 vp = (struct vnode *)handle;
 1129 
 1130                 /*
 1131                  * Don't allow mmap for EXEC if the file system
 1132                  * is mounted NOEXEC.
 1133                  */
 1134                 if ((prot & PROT_EXEC) != 0 &&
 1135                     (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0)
 1136                         return (EACCES);
 1137 
 1138                 if (vp->v_type != VCHR) {
 1139                         error = VOP_MMAP(vp, 0, curproc->p_ucred, curproc);
 1140                         if (error) {
 1141                                 return error;
 1142                         }
 1143 
 1144                         uobj = uvn_attach((void *)vp, (flags & MAP_SHARED) ?
 1145                            maxprot : (maxprot & ~VM_PROT_WRITE));
 1146 
 1147                         /* XXX for now, attach doesn't gain a ref */
 1148                         VREF(vp);
 1149 
 1150                         /*
 1151                          * If the vnode is being mapped with PROT_EXEC,
 1152                          * then mark it as text.
 1153                          */
 1154                         if (prot & PROT_EXEC)
 1155                                 vn_markexec(vp);
 1156                 } else {
 1157                         uobj = udv_attach((void *) &vp->v_rdev,
 1158                             (flags & MAP_SHARED) ? maxprot :
 1159                             (maxprot & ~VM_PROT_WRITE), foff, size);
 1160                         /*
 1161                          * XXX Some devices don't like to be mapped with
 1162                          * XXX PROT_EXEC, but we don't really have a
 1163                          * XXX better way of handling this, right now
 1164                          */
 1165                         if (uobj == NULL && (prot & PROT_EXEC) == 0) {
 1166                                 maxprot &= ~VM_PROT_EXECUTE;
 1167                                 uobj = udv_attach((void *)&vp->v_rdev,
 1168                                     (flags & MAP_SHARED) ? maxprot :
 1169                                     (maxprot & ~VM_PROT_WRITE), foff, size);
 1170                         }
 1171                         advice = UVM_ADV_RANDOM;
 1172                 }
 1173                 if (uobj == NULL)
 1174                         return((vp->v_type == VREG) ? ENOMEM : EINVAL);
 1175                 if ((flags & MAP_SHARED) == 0)
 1176                         uvmflag |= UVM_FLAG_COPYONW;
 1177         }
 1178 
 1179         uvmflag = UVM_MAPFLAG(prot, maxprot,
 1180                         (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
 1181                         advice, uvmflag);
 1182         error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
 1183         if (error) {
 1184                 if (uobj)
 1185                         uobj->pgops->pgo_detach(uobj);
 1186                 return error;
 1187         }
 1188 
 1189         /*
 1190          * POSIX 1003.1b -- if our address space was configured
 1191          * to lock all future mappings, wire the one we just made.
 1192          *
 1193          * Also handle the MAP_WIRED flag here.
 1194          */
 1195 
 1196         if (prot == VM_PROT_NONE) {
 1197 
 1198                 /*
 1199                  * No more work to do in this case.
 1200                  */
 1201 
 1202                 return (0);
 1203         }
 1204         vm_map_lock(map);
 1205         if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
 1206                 if ((atop(size) + uvmexp.wired) > uvmexp.wiredmax
 1207 #ifdef pmap_wired_count
 1208                     || (locklimit != 0 && (size +
 1209                     ptoa(pmap_wired_count(vm_map_pmap(map)))) >
 1210                         locklimit)
 1211 #endif
 1212                 ) {
 1213                         vm_map_unlock(map);
 1214                         uvm_unmap(map, *addr, *addr + size);
 1215                         return ENOMEM;
 1216                 }
 1217 
 1218                 /*
 1219                  * uvm_map_pageable() always returns the map unlocked.
 1220                  */
 1221 
 1222                 error = uvm_map_pageable(map, *addr, *addr + size,
 1223                                          FALSE, UVM_LK_ENTER);
 1224                 if (error) {
 1225                         uvm_unmap(map, *addr, *addr + size);
 1226                         return error;
 1227                 }
 1228                 return (0);
 1229         }
 1230         vm_map_unlock(map);
 1231         return 0;
 1232 }

Cache object: b4a2ebe01f6c52570ed3c8b1bf95ab51


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.