The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/uvm/uvm_mmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: uvm_mmap.c,v 1.88.4.3 2005/10/15 15:34:08 riz Exp $    */
    2 
    3 /*
    4  * Copyright (c) 1997 Charles D. Cranor and Washington University.
    5  * Copyright (c) 1991, 1993 The Regents of the University of California.
    6  * Copyright (c) 1988 University of Utah.
    7  *
    8  * All rights reserved.
    9  *
   10  * This code is derived from software contributed to Berkeley by
   11  * the Systems Programming Group of the University of Utah Computer
   12  * Science Department.
   13  *
   14  * Redistribution and use in source and binary forms, with or without
   15  * modification, are permitted provided that the following conditions
   16  * are met:
   17  * 1. Redistributions of source code must retain the above copyright
   18  *    notice, this list of conditions and the following disclaimer.
   19  * 2. Redistributions in binary form must reproduce the above copyright
   20  *    notice, this list of conditions and the following disclaimer in the
   21  *    documentation and/or other materials provided with the distribution.
   22  * 3. All advertising materials mentioning features or use of this software
   23  *    must display the following acknowledgement:
   24  *      This product includes software developed by the Charles D. Cranor,
   25  *      Washington University, University of California, Berkeley and
   26  *      its contributors.
   27  * 4. Neither the name of the University nor the names of its contributors
   28  *    may be used to endorse or promote products derived from this software
   29  *    without specific prior written permission.
   30  *
   31  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   32  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   33  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   34  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   35  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   36  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   37  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   38  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   39  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   40  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   41  * SUCH DAMAGE.
   42  *
   43  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
   44  *      @(#)vm_mmap.c   8.5 (Berkeley) 5/19/94
   45  * from: Id: uvm_mmap.c,v 1.1.2.14 1998/01/05 21:04:26 chuck Exp
   46  */
   47 
   48 /*
   49  * uvm_mmap.c: system call interface into VM system, plus kernel vm_mmap
   50  * function.
   51  */
   52 
   53 #include <sys/cdefs.h>
   54 __KERNEL_RCSID(0, "$NetBSD: uvm_mmap.c,v 1.88.4.3 2005/10/15 15:34:08 riz Exp $");
   55 
   56 #include "opt_compat_netbsd.h"
   57 
   58 #include <sys/param.h>
   59 #include <sys/systm.h>
   60 #include <sys/file.h>
   61 #include <sys/filedesc.h>
   62 #include <sys/resourcevar.h>
   63 #include <sys/mman.h>
   64 #include <sys/mount.h>
   65 #include <sys/proc.h>
   66 #include <sys/malloc.h>
   67 #include <sys/vnode.h>
   68 #include <sys/conf.h>
   69 #include <sys/stat.h>
   70 
   71 #include <miscfs/specfs/specdev.h>
   72 
   73 #include <sys/sa.h>
   74 #include <sys/syscallargs.h>
   75 
   76 #include <uvm/uvm.h>
   77 #include <uvm/uvm_device.h>
   78 
   79 #ifndef COMPAT_ZERODEV
   80 #define COMPAT_ZERODEV(dev)     (0)
   81 #endif
   82 
   83 /*
   84  * unimplemented VM system calls:
   85  */
   86 
   87 /*
   88  * sys_sbrk: sbrk system call.
   89  */
   90 
   91 /* ARGSUSED */
   92 int
   93 sys_sbrk(l, v, retval)
   94         struct lwp *l;
   95         void *v;
   96         register_t *retval;
   97 {
   98 #if 0
   99         struct sys_sbrk_args /* {
  100                 syscallarg(intptr_t) incr;
  101         } */ *uap = v;
  102 #endif
  103 
  104         return (ENOSYS);
  105 }
  106 
  107 /*
  108  * sys_sstk: sstk system call.
  109  */
  110 
  111 /* ARGSUSED */
  112 int
  113 sys_sstk(l, v, retval)
  114         struct lwp *l;
  115         void *v;
  116         register_t *retval;
  117 {
  118 #if 0
  119         struct sys_sstk_args /* {
  120                 syscallarg(int) incr;
  121         } */ *uap = v;
  122 #endif
  123 
  124         return (ENOSYS);
  125 }
  126 
  127 /*
  128  * sys_mincore: determine if pages are in core or not.
  129  */
  130 
  131 /* ARGSUSED */
  132 int
  133 sys_mincore(l, v, retval)
  134         struct lwp *l;
  135         void *v;
  136         register_t *retval;
  137 {
  138         struct sys_mincore_args /* {
  139                 syscallarg(void *) addr;
  140                 syscallarg(size_t) len;
  141                 syscallarg(char *) vec;
  142         } */ *uap = v;
  143         struct proc *p = l->l_proc;
  144         struct vm_page *pg;
  145         char *vec, pgi;
  146         struct uvm_object *uobj;
  147         struct vm_amap *amap;
  148         struct vm_anon *anon;
  149         struct vm_map_entry *entry;
  150         vaddr_t start, end, lim;
  151         struct vm_map *map;
  152         vsize_t len;
  153         int error = 0, npgs;
  154 
  155         map = &p->p_vmspace->vm_map;
  156 
  157         start = (vaddr_t)SCARG(uap, addr);
  158         len = SCARG(uap, len);
  159         vec = SCARG(uap, vec);
  160 
  161         if (start & PAGE_MASK)
  162                 return (EINVAL);
  163         len = round_page(len);
  164         end = start + len;
  165         if (end <= start)
  166                 return (EINVAL);
  167 
  168         /*
  169          * Lock down vec, so our returned status isn't outdated by
  170          * storing the status byte for a page.
  171          */
  172 
  173         npgs = len >> PAGE_SHIFT;
  174         error = uvm_vslock(p, vec, npgs, VM_PROT_WRITE);
  175         if (error) {
  176                 return error;
  177         }
  178         vm_map_lock_read(map);
  179 
  180         if (uvm_map_lookup_entry(map, start, &entry) == FALSE) {
  181                 error = ENOMEM;
  182                 goto out;
  183         }
  184 
  185         for (/* nothing */;
  186              entry != &map->header && entry->start < end;
  187              entry = entry->next) {
  188                 KASSERT(!UVM_ET_ISSUBMAP(entry));
  189                 KASSERT(start >= entry->start);
  190 
  191                 /* Make sure there are no holes. */
  192                 if (entry->end < end &&
  193                      (entry->next == &map->header ||
  194                       entry->next->start > entry->end)) {
  195                         error = ENOMEM;
  196                         goto out;
  197                 }
  198 
  199                 lim = end < entry->end ? end : entry->end;
  200 
  201                 /*
  202                  * Special case for objects with no "real" pages.  Those
  203                  * are always considered resident (mapped devices).
  204                  */
  205 
  206                 if (UVM_ET_ISOBJ(entry)) {
  207                         KASSERT(!UVM_OBJ_IS_KERN_OBJECT(entry->object.uvm_obj));
  208                         if (UVM_OBJ_IS_DEVICE(entry->object.uvm_obj)) {
  209                                 for (/* nothing */; start < lim;
  210                                      start += PAGE_SIZE, vec++)
  211                                         subyte(vec, 1);
  212                                 continue;
  213                         }
  214                 }
  215 
  216                 amap = entry->aref.ar_amap;     /* top layer */
  217                 uobj = entry->object.uvm_obj;   /* bottom layer */
  218 
  219                 if (amap != NULL)
  220                         amap_lock(amap);
  221                 if (uobj != NULL)
  222                         simple_lock(&uobj->vmobjlock);
  223 
  224                 for (/* nothing */; start < lim; start += PAGE_SIZE, vec++) {
  225                         pgi = 0;
  226                         if (amap != NULL) {
  227                                 /* Check the top layer first. */
  228                                 anon = amap_lookup(&entry->aref,
  229                                     start - entry->start);
  230                                 /* Don't need to lock anon here. */
  231                                 if (anon != NULL && anon->u.an_page != NULL) {
  232 
  233                                         /*
  234                                          * Anon has the page for this entry
  235                                          * offset.
  236                                          */
  237 
  238                                         pgi = 1;
  239                                 }
  240                         }
  241                         if (uobj != NULL && pgi == 0) {
  242                                 /* Check the bottom layer. */
  243                                 pg = uvm_pagelookup(uobj,
  244                                     entry->offset + (start - entry->start));
  245                                 if (pg != NULL) {
  246 
  247                                         /*
  248                                          * Object has the page for this entry
  249                                          * offset.
  250                                          */
  251 
  252                                         pgi = 1;
  253                                 }
  254                         }
  255                         (void) subyte(vec, pgi);
  256                 }
  257                 if (uobj != NULL)
  258                         simple_unlock(&uobj->vmobjlock);
  259                 if (amap != NULL)
  260                         amap_unlock(amap);
  261         }
  262 
  263  out:
  264         vm_map_unlock_read(map);
  265         uvm_vsunlock(p, SCARG(uap, vec), npgs);
  266         return (error);
  267 }
  268 
  269 /*
  270  * sys_mmap: mmap system call.
  271  *
  272  * => file offset and address may not be page aligned
  273  *    - if MAP_FIXED, offset and address must have remainder mod PAGE_SIZE
  274  *    - if address isn't page aligned the mapping starts at trunc_page(addr)
  275  *      and the return value is adjusted up by the page offset.
  276  */
  277 
  278 int
  279 sys_mmap(l, v, retval)
  280         struct lwp *l;
  281         void *v;
  282         register_t *retval;
  283 {
  284         struct sys_mmap_args /* {
  285                 syscallarg(caddr_t) addr;
  286                 syscallarg(size_t) len;
  287                 syscallarg(int) prot;
  288                 syscallarg(int) flags;
  289                 syscallarg(int) fd;
  290                 syscallarg(long) pad;
  291                 syscallarg(off_t) pos;
  292         } */ *uap = v;
  293         struct proc *p = l->l_proc;
  294         vaddr_t addr;
  295         struct vattr va;
  296         off_t pos;
  297         vsize_t size, pageoff;
  298         vm_prot_t prot, maxprot;
  299         int flags, fd;
  300         vaddr_t vm_min_address = VM_MIN_ADDRESS, defaddr;
  301         struct filedesc *fdp = p->p_fd;
  302         struct file *fp;
  303         struct vnode *vp;
  304         void *handle;
  305         int error;
  306 
  307         /*
  308          * first, extract syscall args from the uap.
  309          */
  310 
  311         addr = (vaddr_t)SCARG(uap, addr);
  312         size = (vsize_t)SCARG(uap, len);
  313         prot = SCARG(uap, prot) & VM_PROT_ALL;
  314         flags = SCARG(uap, flags);
  315         fd = SCARG(uap, fd);
  316         pos = SCARG(uap, pos);
  317 
  318         /*
  319          * Fixup the old deprecated MAP_COPY into MAP_PRIVATE, and
  320          * validate the flags.
  321          */
  322         if (flags & MAP_COPY)
  323                 flags = (flags & ~MAP_COPY) | MAP_PRIVATE;
  324         if ((flags & (MAP_SHARED|MAP_PRIVATE)) == (MAP_SHARED|MAP_PRIVATE))
  325                 return (EINVAL);
  326 
  327         /*
  328          * align file position and save offset.  adjust size.
  329          */
  330 
  331         pageoff = (pos & PAGE_MASK);
  332         pos  -= pageoff;
  333         size += pageoff;                        /* add offset */
  334         size = (vsize_t)round_page(size);       /* round up */
  335         if ((ssize_t) size < 0)
  336                 return (EINVAL);                        /* don't allow wrap */
  337 
  338         /*
  339          * now check (MAP_FIXED) or get (!MAP_FIXED) the "addr"
  340          */
  341 
  342         if (flags & MAP_FIXED) {
  343 
  344                 /* ensure address and file offset are aligned properly */
  345                 addr -= pageoff;
  346                 if (addr & PAGE_MASK)
  347                         return (EINVAL);
  348 
  349                 if (VM_MAXUSER_ADDRESS > 0 &&
  350                     (addr + size) > VM_MAXUSER_ADDRESS)
  351                         return (EFBIG);
  352                 if (vm_min_address > 0 && addr < vm_min_address)
  353                         return (EINVAL);
  354                 if (addr > addr + size)
  355                         return (EOVERFLOW);             /* no wrapping! */
  356 
  357         } else if (addr == 0 || !(flags & MAP_TRYFIXED)) {
  358 
  359                 /*
  360                  * not fixed: make sure we skip over the largest
  361                  * possible heap for non-topdown mapping arrangements.
  362                  * we will refine our guess later (e.g. to account for
  363                  * VAC, etc)
  364                  */
  365 
  366                 defaddr = p->p_emul->e_vm_default_addr(p,
  367                     (vaddr_t)p->p_vmspace->vm_daddr, size);
  368 
  369                 if (addr == 0 ||
  370                     !(p->p_vmspace->vm_map.flags & VM_MAP_TOPDOWN))
  371                         addr = MAX(addr, defaddr);
  372                 else
  373                         addr = MIN(addr, defaddr);
  374         }
  375 
  376         /*
  377          * check for file mappings (i.e. not anonymous) and verify file.
  378          */
  379 
  380         if ((flags & MAP_ANON) == 0) {
  381 
  382                 if ((fp = fd_getfile(fdp, fd)) == NULL)
  383                         return (EBADF);
  384 
  385                 simple_unlock(&fp->f_slock);
  386 
  387                 if (fp->f_type != DTYPE_VNODE)
  388                         return (ENODEV);                /* only mmap vnodes! */
  389                 vp = (struct vnode *)fp->f_data;        /* convert to vnode */
  390 
  391                 if (vp->v_type != VREG && vp->v_type != VCHR &&
  392                     vp->v_type != VBLK)
  393                         return (ENODEV);  /* only REG/CHR/BLK support mmap */
  394 
  395                 if (vp->v_type != VCHR && pos < 0)
  396                         return (EINVAL);
  397 
  398                 if (vp->v_type != VCHR && (pos + size) < pos)
  399                         return (EOVERFLOW);             /* no offset wrapping */
  400 
  401                 /* special case: catch SunOS style /dev/zero */
  402                 if (vp->v_type == VCHR
  403                     && (vp->v_rdev == zerodev || COMPAT_ZERODEV(vp->v_rdev))) {
  404                         flags |= MAP_ANON;
  405                         goto is_anon;
  406                 }
  407 
  408                 /*
  409                  * Old programs may not select a specific sharing type, so
  410                  * default to an appropriate one.
  411                  *
  412                  * XXX: how does MAP_ANON fit in the picture?
  413                  */
  414                 if ((flags & (MAP_SHARED|MAP_PRIVATE)) == 0) {
  415 #if defined(DEBUG)
  416                         printf("WARNING: defaulted mmap() share type to "
  417                            "%s (pid %d command %s)\n", vp->v_type == VCHR ?
  418                            "MAP_SHARED" : "MAP_PRIVATE", p->p_pid,
  419                             p->p_comm);
  420 #endif
  421                         if (vp->v_type == VCHR)
  422                                 flags |= MAP_SHARED;    /* for a device */
  423                         else
  424                                 flags |= MAP_PRIVATE;   /* for a file */
  425                 }
  426 
  427                 /*
  428                  * MAP_PRIVATE device mappings don't make sense (and aren't
  429                  * supported anyway).  However, some programs rely on this,
  430                  * so just change it to MAP_SHARED.
  431                  */
  432                 if (vp->v_type == VCHR && (flags & MAP_PRIVATE) != 0) {
  433                         flags = (flags & ~MAP_PRIVATE) | MAP_SHARED;
  434                 }
  435 
  436                 /*
  437                  * now check protection
  438                  */
  439 
  440                 maxprot = VM_PROT_EXECUTE;
  441 
  442                 /* check read access */
  443                 if (fp->f_flag & FREAD)
  444                         maxprot |= VM_PROT_READ;
  445                 else if (prot & PROT_READ)
  446                         return (EACCES);
  447 
  448                 /* check write access, shared case first */
  449                 if (flags & MAP_SHARED) {
  450                         /*
  451                          * if the file is writable, only add PROT_WRITE to
  452                          * maxprot if the file is not immutable, append-only.
  453                          * otherwise, if we have asked for PROT_WRITE, return
  454                          * EPERM.
  455                          */
  456                         if (fp->f_flag & FWRITE) {
  457                                 if ((error =
  458                                     VOP_GETATTR(vp, &va, p->p_ucred, p)))
  459                                         return (error);
  460                                 if ((va.va_flags &
  461                                     (SF_SNAPSHOT|IMMUTABLE|APPEND)) == 0)
  462                                         maxprot |= VM_PROT_WRITE;
  463                                 else if (prot & PROT_WRITE)
  464                                         return (EPERM);
  465                         }
  466                         else if (prot & PROT_WRITE)
  467                                 return (EACCES);
  468                 } else {
  469                         /* MAP_PRIVATE mappings can always write to */
  470                         maxprot |= VM_PROT_WRITE;
  471                 }
  472                 handle = vp;
  473 
  474         } else {                /* MAP_ANON case */
  475                 /*
  476                  * XXX What do we do about (MAP_SHARED|MAP_PRIVATE) == 0?
  477                  */
  478                 if (fd != -1)
  479                         return (EINVAL);
  480 
  481  is_anon:               /* label for SunOS style /dev/zero */
  482                 handle = NULL;
  483                 maxprot = VM_PROT_ALL;
  484                 pos = 0;
  485         }
  486 
  487         /*
  488          * XXX (in)sanity check.  We don't do proper datasize checking
  489          * XXX for anonymous (or private writable) mmap().  However,
  490          * XXX know that if we're trying to allocate more than the amount
  491          * XXX remaining under our current data size limit, _that_ should
  492          * XXX be disallowed.
  493          */
  494         if ((flags & MAP_ANON) != 0 ||
  495             ((flags & MAP_PRIVATE) != 0 && (prot & PROT_WRITE) != 0)) {
  496                 if (size >
  497                     (p->p_rlimit[RLIMIT_DATA].rlim_cur -
  498                      ctob(p->p_vmspace->vm_dsize))) {
  499                         return (ENOMEM);
  500                 }
  501         }
  502 
  503         /*
  504          * now let kernel internal function uvm_mmap do the work.
  505          */
  506 
  507         error = uvm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
  508             flags, handle, pos, p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
  509 
  510         if (error == 0)
  511                 /* remember to add offset */
  512                 *retval = (register_t)(addr + pageoff);
  513 
  514         return (error);
  515 }
  516 
  517 /*
  518  * sys___msync13: the msync system call (a front-end for flush)
  519  */
  520 
  521 int
  522 sys___msync13(l, v, retval)
  523         struct lwp *l;
  524         void *v;
  525         register_t *retval;
  526 {
  527         struct sys___msync13_args /* {
  528                 syscallarg(caddr_t) addr;
  529                 syscallarg(size_t) len;
  530                 syscallarg(int) flags;
  531         } */ *uap = v;
  532         struct proc *p = l->l_proc;
  533         vaddr_t addr;
  534         vsize_t size, pageoff;
  535         struct vm_map *map;
  536         int error, rv, flags, uvmflags;
  537 
  538         /*
  539          * extract syscall args from the uap
  540          */
  541 
  542         addr = (vaddr_t)SCARG(uap, addr);
  543         size = (vsize_t)SCARG(uap, len);
  544         flags = SCARG(uap, flags);
  545 
  546         /* sanity check flags */
  547         if ((flags & ~(MS_ASYNC | MS_SYNC | MS_INVALIDATE)) != 0 ||
  548             (flags & (MS_ASYNC | MS_SYNC | MS_INVALIDATE)) == 0 ||
  549             (flags & (MS_ASYNC | MS_SYNC)) == (MS_ASYNC | MS_SYNC))
  550                 return (EINVAL);
  551         if ((flags & (MS_ASYNC | MS_SYNC)) == 0)
  552                 flags |= MS_SYNC;
  553 
  554         /*
  555          * align the address to a page boundary and adjust the size accordingly.
  556          */
  557 
  558         pageoff = (addr & PAGE_MASK);
  559         addr -= pageoff;
  560         size += pageoff;
  561         size = (vsize_t)round_page(size);
  562 
  563         /* disallow wrap-around. */
  564         if (addr + size < addr)
  565                 return (EINVAL);
  566 
  567         /*
  568          * get map
  569          */
  570 
  571         map = &p->p_vmspace->vm_map;
  572 
  573         /*
  574          * XXXCDC: do we really need this semantic?
  575          *
  576          * XXX Gak!  If size is zero we are supposed to sync "all modified
  577          * pages with the region containing addr".  Unfortunately, we
  578          * don't really keep track of individual mmaps so we approximate
  579          * by flushing the range of the map entry containing addr.
  580          * This can be incorrect if the region splits or is coalesced
  581          * with a neighbor.
  582          */
  583 
  584         if (size == 0) {
  585                 struct vm_map_entry *entry;
  586 
  587                 vm_map_lock_read(map);
  588                 rv = uvm_map_lookup_entry(map, addr, &entry);
  589                 if (rv == TRUE) {
  590                         addr = entry->start;
  591                         size = entry->end - entry->start;
  592                 }
  593                 vm_map_unlock_read(map);
  594                 if (rv == FALSE)
  595                         return (EINVAL);
  596         }
  597 
  598         /*
  599          * translate MS_ flags into PGO_ flags
  600          */
  601 
  602         uvmflags = PGO_CLEANIT;
  603         if (flags & MS_INVALIDATE)
  604                 uvmflags |= PGO_FREE;
  605         if (flags & MS_SYNC)
  606                 uvmflags |= PGO_SYNCIO;
  607 
  608         error = uvm_map_clean(map, addr, addr+size, uvmflags);
  609         return error;
  610 }
  611 
  612 /*
  613  * sys_munmap: unmap a users memory
  614  */
  615 
  616 int
  617 sys_munmap(l, v, retval)
  618         struct lwp *l;
  619         void *v;
  620         register_t *retval;
  621 {
  622         struct sys_munmap_args /* {
  623                 syscallarg(caddr_t) addr;
  624                 syscallarg(size_t) len;
  625         } */ *uap = v;
  626         struct proc *p = l->l_proc;
  627         vaddr_t addr;
  628         vsize_t size, pageoff;
  629         struct vm_map *map;
  630         vaddr_t vm_min_address = VM_MIN_ADDRESS;
  631         struct vm_map_entry *dead_entries;
  632 
  633         /*
  634          * get syscall args.
  635          */
  636 
  637         addr = (vaddr_t)SCARG(uap, addr);
  638         size = (vsize_t)SCARG(uap, len);
  639 
  640         /*
  641          * align the address to a page boundary and adjust the size accordingly.
  642          */
  643 
  644         pageoff = (addr & PAGE_MASK);
  645         addr -= pageoff;
  646         size += pageoff;
  647         size = (vsize_t)round_page(size);
  648 
  649         if ((int)size < 0)
  650                 return (EINVAL);
  651         if (size == 0)
  652                 return (0);
  653 
  654         /*
  655          * Check for illegal addresses.  Watch out for address wrap...
  656          * Note that VM_*_ADDRESS are not constants due to casts (argh).
  657          */
  658         if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
  659                 return (EINVAL);
  660         if (vm_min_address > 0 && addr < vm_min_address)
  661                 return (EINVAL);
  662         if (addr > addr + size)
  663                 return (EINVAL);
  664         map = &p->p_vmspace->vm_map;
  665 
  666         /*
  667          * interesting system call semantic: make sure entire range is
  668          * allocated before allowing an unmap.
  669          */
  670 
  671         vm_map_lock(map);
  672 #if 0
  673         if (!uvm_map_checkprot(map, addr, addr + size, VM_PROT_NONE)) {
  674                 vm_map_unlock(map);
  675                 return (EINVAL);
  676         }
  677 #endif
  678         uvm_unmap_remove(map, addr, addr + size, &dead_entries, NULL);
  679         vm_map_unlock(map);
  680         if (dead_entries != NULL)
  681                 uvm_unmap_detach(dead_entries, 0);
  682         return (0);
  683 }
  684 
  685 /*
  686  * sys_mprotect: the mprotect system call
  687  */
  688 
  689 int
  690 sys_mprotect(l, v, retval)
  691         struct lwp *l;
  692         void *v;
  693         register_t *retval;
  694 {
  695         struct sys_mprotect_args /* {
  696                 syscallarg(caddr_t) addr;
  697                 syscallarg(size_t) len;
  698                 syscallarg(int) prot;
  699         } */ *uap = v;
  700         struct proc *p = l->l_proc;
  701         vaddr_t addr;
  702         vsize_t size, pageoff;
  703         vm_prot_t prot;
  704         int error;
  705 
  706         /*
  707          * extract syscall args from uap
  708          */
  709 
  710         addr = (vaddr_t)SCARG(uap, addr);
  711         size = (vsize_t)SCARG(uap, len);
  712         prot = SCARG(uap, prot) & VM_PROT_ALL;
  713 
  714         /*
  715          * align the address to a page boundary and adjust the size accordingly.
  716          */
  717 
  718         pageoff = (addr & PAGE_MASK);
  719         addr -= pageoff;
  720         size += pageoff;
  721         size = round_page(size);
  722 
  723         error = uvm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
  724                                 FALSE);
  725         return error;
  726 }
  727 
  728 /*
  729  * sys_minherit: the minherit system call
  730  */
  731 
  732 int
  733 sys_minherit(l, v, retval)
  734         struct lwp *l;
  735         void *v;
  736         register_t *retval;
  737 {
  738         struct sys_minherit_args /* {
  739                 syscallarg(caddr_t) addr;
  740                 syscallarg(int) len;
  741                 syscallarg(int) inherit;
  742         } */ *uap = v;
  743         struct proc *p = l->l_proc;
  744         vaddr_t addr;
  745         vsize_t size, pageoff;
  746         vm_inherit_t inherit;
  747         int error;
  748 
  749         addr = (vaddr_t)SCARG(uap, addr);
  750         size = (vsize_t)SCARG(uap, len);
  751         inherit = SCARG(uap, inherit);
  752 
  753         /*
  754          * align the address to a page boundary and adjust the size accordingly.
  755          */
  756 
  757         pageoff = (addr & PAGE_MASK);
  758         addr -= pageoff;
  759         size += pageoff;
  760         size = (vsize_t)round_page(size);
  761 
  762         if ((int)size < 0)
  763                 return (EINVAL);
  764         error = uvm_map_inherit(&p->p_vmspace->vm_map, addr, addr + size,
  765                                 inherit);
  766         return error;
  767 }
  768 
  769 /*
  770  * sys_madvise: give advice about memory usage.
  771  */
  772 
  773 /* ARGSUSED */
  774 int
  775 sys_madvise(l, v, retval)
  776         struct lwp *l;
  777         void *v;
  778         register_t *retval;
  779 {
  780         struct sys_madvise_args /* {
  781                 syscallarg(caddr_t) addr;
  782                 syscallarg(size_t) len;
  783                 syscallarg(int) behav;
  784         } */ *uap = v;
  785         struct proc *p = l->l_proc;
  786         vaddr_t addr;
  787         vsize_t size, pageoff;
  788         int advice, error;
  789 
  790         addr = (vaddr_t)SCARG(uap, addr);
  791         size = (vsize_t)SCARG(uap, len);
  792         advice = SCARG(uap, behav);
  793 
  794         /*
  795          * align the address to a page boundary, and adjust the size accordingly
  796          */
  797 
  798         pageoff = (addr & PAGE_MASK);
  799         addr -= pageoff;
  800         size += pageoff;
  801         size = (vsize_t)round_page(size);
  802 
  803         if ((ssize_t)size <= 0)
  804                 return (EINVAL);
  805 
  806         switch (advice) {
  807         case MADV_NORMAL:
  808         case MADV_RANDOM:
  809         case MADV_SEQUENTIAL:
  810                 error = uvm_map_advice(&p->p_vmspace->vm_map, addr, addr + size,
  811                     advice);
  812                 break;
  813 
  814         case MADV_WILLNEED:
  815 
  816                 /*
  817                  * Activate all these pages, pre-faulting them in if
  818                  * necessary.
  819                  */
  820                 /*
  821                  * XXX IMPLEMENT ME.
  822                  * Should invent a "weak" mode for uvm_fault()
  823                  * which would only do the PGO_LOCKED pgo_get().
  824                  */
  825 
  826                 return (0);
  827 
  828         case MADV_DONTNEED:
  829 
  830                 /*
  831                  * Deactivate all these pages.  We don't need them
  832                  * any more.  We don't, however, toss the data in
  833                  * the pages.
  834                  */
  835 
  836                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
  837                     PGO_DEACTIVATE);
  838                 break;
  839 
  840         case MADV_FREE:
  841 
  842                 /*
  843                  * These pages contain no valid data, and may be
  844                  * garbage-collected.  Toss all resources, including
  845                  * any swap space in use.
  846                  */
  847 
  848                 error = uvm_map_clean(&p->p_vmspace->vm_map, addr, addr + size,
  849                     PGO_FREE);
  850                 break;
  851 
  852         case MADV_SPACEAVAIL:
  853 
  854                 /*
  855                  * XXXMRG What is this?  I think it's:
  856                  *
  857                  *      Ensure that we have allocated backing-store
  858                  *      for these pages.
  859                  *
  860                  * This is going to require changes to the page daemon,
  861                  * as it will free swap space allocated to pages in core.
  862                  * There's also what to do for device/file/anonymous memory.
  863                  */
  864 
  865                 return (EINVAL);
  866 
  867         default:
  868                 return (EINVAL);
  869         }
  870 
  871         return error;
  872 }
  873 
  874 /*
  875  * sys_mlock: memory lock
  876  */
  877 
  878 int
  879 sys_mlock(l, v, retval)
  880         struct lwp *l;
  881         void *v;
  882         register_t *retval;
  883 {
  884         struct sys_mlock_args /* {
  885                 syscallarg(const void *) addr;
  886                 syscallarg(size_t) len;
  887         } */ *uap = v;
  888         struct proc *p = l->l_proc;
  889         vaddr_t addr;
  890         vsize_t size, pageoff;
  891         int error;
  892 
  893         /*
  894          * extract syscall args from uap
  895          */
  896 
  897         addr = (vaddr_t)SCARG(uap, addr);
  898         size = (vsize_t)SCARG(uap, len);
  899 
  900         /*
  901          * align the address to a page boundary and adjust the size accordingly
  902          */
  903 
  904         pageoff = (addr & PAGE_MASK);
  905         addr -= pageoff;
  906         size += pageoff;
  907         size = (vsize_t)round_page(size);
  908 
  909         /* disallow wrap-around. */
  910         if (addr + size < addr)
  911                 return (EINVAL);
  912 
  913         if (atop(size) + uvmexp.wired > uvmexp.wiredmax)
  914                 return (EAGAIN);
  915 
  916         if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
  917                         p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
  918                 return (EAGAIN);
  919 
  920         error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, FALSE,
  921             0);
  922         if (error == EFAULT)
  923                 error = ENOMEM;
  924         return error;
  925 }
  926 
  927 /*
  928  * sys_munlock: unlock wired pages
  929  */
  930 
  931 int
  932 sys_munlock(l, v, retval)
  933         struct lwp *l;
  934         void *v;
  935         register_t *retval;
  936 {
  937         struct sys_munlock_args /* {
  938                 syscallarg(const void *) addr;
  939                 syscallarg(size_t) len;
  940         } */ *uap = v;
  941         struct proc *p = l->l_proc;
  942         vaddr_t addr;
  943         vsize_t size, pageoff;
  944         int error;
  945 
  946         /*
  947          * extract syscall args from uap
  948          */
  949 
  950         addr = (vaddr_t)SCARG(uap, addr);
  951         size = (vsize_t)SCARG(uap, len);
  952 
  953         /*
  954          * align the address to a page boundary, and adjust the size accordingly
  955          */
  956 
  957         pageoff = (addr & PAGE_MASK);
  958         addr -= pageoff;
  959         size += pageoff;
  960         size = (vsize_t)round_page(size);
  961 
  962         /* disallow wrap-around. */
  963         if (addr + size < addr)
  964                 return (EINVAL);
  965 
  966         error = uvm_map_pageable(&p->p_vmspace->vm_map, addr, addr+size, TRUE,
  967             0);
  968         if (error == EFAULT)
  969                 error = ENOMEM;
  970         return error;
  971 }
  972 
  973 /*
  974  * sys_mlockall: lock all pages mapped into an address space.
  975  */
  976 
  977 int
  978 sys_mlockall(l, v, retval)
  979         struct lwp *l;
  980         void *v;
  981         register_t *retval;
  982 {
  983         struct sys_mlockall_args /* {
  984                 syscallarg(int) flags;
  985         } */ *uap = v;
  986         struct proc *p = l->l_proc;
  987         int error, flags;
  988 
  989         flags = SCARG(uap, flags);
  990 
  991         if (flags == 0 ||
  992             (flags & ~(MCL_CURRENT|MCL_FUTURE)) != 0)
  993                 return (EINVAL);
  994 
  995         error = uvm_map_pageable_all(&p->p_vmspace->vm_map, flags,
  996             p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur);
  997         return (error);
  998 }
  999 
 1000 /*
 1001  * sys_munlockall: unlock all pages mapped into an address space.
 1002  */
 1003 
 1004 int
 1005 sys_munlockall(l, v, retval)
 1006         struct lwp *l;
 1007         void *v;
 1008         register_t *retval;
 1009 {
 1010         struct proc *p = l->l_proc;
 1011 
 1012         (void) uvm_map_pageable_all(&p->p_vmspace->vm_map, 0, 0);
 1013         return (0);
 1014 }
 1015 
 1016 /*
 1017  * uvm_mmap: internal version of mmap
 1018  *
 1019  * - used by sys_mmap and various framebuffers
 1020  * - handle is a vnode pointer or NULL for MAP_ANON
 1021  * - caller must page-align the file offset
 1022  */
 1023 
 1024 int
 1025 uvm_mmap(map, addr, size, prot, maxprot, flags, handle, foff, locklimit)
 1026         struct vm_map *map;
 1027         vaddr_t *addr;
 1028         vsize_t size;
 1029         vm_prot_t prot, maxprot;
 1030         int flags;
 1031         void *handle;
 1032         voff_t foff;
 1033         vsize_t locklimit;
 1034 {
 1035         struct uvm_object *uobj;
 1036         struct vnode *vp;
 1037         vaddr_t align = 0;
 1038         int error;
 1039         int advice = UVM_ADV_NORMAL;
 1040         uvm_flag_t uvmflag = 0;
 1041 
 1042         /*
 1043          * check params
 1044          */
 1045 
 1046         if (size == 0)
 1047                 return(0);
 1048         if (foff & PAGE_MASK)
 1049                 return(EINVAL);
 1050         if ((prot & maxprot) != prot)
 1051                 return(EINVAL);
 1052 
 1053         /*
 1054          * for non-fixed mappings, round off the suggested address.
 1055          * for fixed mappings, check alignment and zap old mappings.
 1056          */
 1057 
 1058         if ((flags & MAP_FIXED) == 0) {
 1059                 *addr = round_page(*addr);
 1060         } else {
 1061                 if (*addr & PAGE_MASK)
 1062                         return(EINVAL);
 1063                 uvmflag |= UVM_FLAG_FIXED;
 1064                 (void) uvm_unmap(map, *addr, *addr + size);
 1065         }
 1066 
 1067         /*
 1068          * Try to see if any requested alignment can even be attemped.
 1069          * Make sure we can express the alignment (asking for a >= 4GB
 1070          * alignment on an ILP32 architecure make no sense) and the
 1071          * alignment is at least for a page sized quanitiy.  If the
 1072          * request was for a fixed mapping, make sure supplied address
 1073          * adheres to the request alignment.
 1074          */
 1075         align = (flags & MAP_ALIGNMENT_MASK) >> MAP_ALIGNMENT_SHIFT;
 1076         if (align) {
 1077                 if (align >= sizeof(vaddr_t) * NBBY)
 1078                         return(EINVAL);
 1079                 align = 1L << align;
 1080                 if (align < PAGE_SIZE)
 1081                         return(EINVAL);
 1082                 if (align >= vm_map_max(map))
 1083                         return(ENOMEM);
 1084                 if (flags & MAP_FIXED) {
 1085                         if ((*addr & (align-1)) != 0)
 1086                                 return(EINVAL);
 1087                         align = 0;
 1088                 }
 1089         }
 1090 
 1091         /*
 1092          * handle anon vs. non-anon mappings.   for non-anon mappings attach
 1093          * to underlying vm object.
 1094          */
 1095 
 1096         if (flags & MAP_ANON) {
 1097                 foff = UVM_UNKNOWN_OFFSET;
 1098                 uobj = NULL;
 1099                 if ((flags & MAP_SHARED) == 0)
 1100                         /* XXX: defer amap create */
 1101                         uvmflag |= UVM_FLAG_COPYONW;
 1102                 else
 1103                         /* shared: create amap now */
 1104                         uvmflag |= UVM_FLAG_OVERLAY;
 1105 
 1106         } else {
 1107                 vp = (struct vnode *)handle;
 1108 
 1109                 /*
 1110                  * Don't allow mmap for EXEC if the file system
 1111                  * is mounted NOEXEC.
 1112                  */
 1113                 if ((prot & PROT_EXEC) != 0 &&
 1114                     (vp->v_mount->mnt_flag & MNT_NOEXEC) != 0)
 1115                         return (EACCES);
 1116 
 1117                 if (vp->v_type != VCHR) {
 1118                         error = VOP_MMAP(vp, 0, curproc->p_ucred, curproc);
 1119                         if (error) {
 1120                                 return error;
 1121                         }
 1122 
 1123                         uobj = uvn_attach((void *)vp, (flags & MAP_SHARED) ?
 1124                            maxprot : (maxprot & ~VM_PROT_WRITE));
 1125 
 1126                         /* XXX for now, attach doesn't gain a ref */
 1127                         VREF(vp);
 1128 
 1129                         /*
 1130                          * If the vnode is being mapped with PROT_EXEC,
 1131                          * then mark it as text.
 1132                          */
 1133                         if (prot & PROT_EXEC)
 1134                                 vn_markexec(vp);
 1135                 } else {
 1136                         int i = maxprot;
 1137 
 1138                         /*
 1139                          * XXX Some devices don't like to be mapped with
 1140                          * XXX PROT_EXEC or PROT_WRITE, but we don't really
 1141                          * XXX have a better way of handling this, right now
 1142                          */
 1143                         do {
 1144                                 uobj = udv_attach((void *) &vp->v_rdev,
 1145                                     (flags & MAP_SHARED) ? i :
 1146                                     (i & ~VM_PROT_WRITE), foff, size);
 1147                                 i--;
 1148                         } while ((uobj == NULL) && (i > 0));
 1149                         advice = UVM_ADV_RANDOM;
 1150                 }
 1151                 if (uobj == NULL)
 1152                         return((vp->v_type == VREG) ? ENOMEM : EINVAL);
 1153                 if ((flags & MAP_SHARED) == 0) {
 1154                         uvmflag |= UVM_FLAG_COPYONW;
 1155                 } else if ((maxprot & VM_PROT_WRITE) != 0) {
 1156                         simple_lock(&vp->v_interlock);
 1157                         vp->v_flag |= VWRITEMAP;
 1158                         simple_unlock(&vp->v_interlock);
 1159                 }
 1160         }
 1161 
 1162         uvmflag = UVM_MAPFLAG(prot, maxprot,
 1163                         (flags & MAP_SHARED) ? UVM_INH_SHARE : UVM_INH_COPY,
 1164                         advice, uvmflag);
 1165         error = uvm_map(map, addr, size, uobj, foff, align, uvmflag);
 1166         if (error) {
 1167                 if (uobj)
 1168                         uobj->pgops->pgo_detach(uobj);
 1169                 return error;
 1170         }
 1171 
 1172         /*
 1173          * POSIX 1003.1b -- if our address space was configured
 1174          * to lock all future mappings, wire the one we just made.
 1175          *
 1176          * Also handle the MAP_WIRED flag here.
 1177          */
 1178 
 1179         if (prot == VM_PROT_NONE) {
 1180 
 1181                 /*
 1182                  * No more work to do in this case.
 1183                  */
 1184 
 1185                 return (0);
 1186         }
 1187         vm_map_lock(map);
 1188         if ((flags & MAP_WIRED) != 0 || (map->flags & VM_MAP_WIREFUTURE) != 0) {
 1189                 if (atop(size) + uvmexp.wired > uvmexp.wiredmax ||
 1190                     (locklimit != 0 &&
 1191                      size + ptoa(pmap_wired_count(vm_map_pmap(map))) >
 1192                      locklimit)) {
 1193                         vm_map_unlock(map);
 1194                         uvm_unmap(map, *addr, *addr + size);
 1195                         return ENOMEM;
 1196                 }
 1197 
 1198                 /*
 1199                  * uvm_map_pageable() always returns the map unlocked.
 1200                  */
 1201 
 1202                 error = uvm_map_pageable(map, *addr, *addr + size,
 1203                                          FALSE, UVM_LK_ENTER);
 1204                 if (error) {
 1205                         uvm_unmap(map, *addr, *addr + size);
 1206                         return error;
 1207                 }
 1208                 return (0);
 1209         }
 1210         vm_map_unlock(map);
 1211         return 0;
 1212 }
 1213 
 1214 vaddr_t
 1215 uvm_default_mapaddr(struct proc *p, vaddr_t base, vsize_t sz)
 1216 {
 1217         return VM_DEFAULT_ADDRESS(base, sz);
 1218 }

Cache object: 950fa7e6ba098207323727d9a523bccb


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.