The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_mmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1988 University of Utah.
    3  * Copyright (c) 1991, 1993
    4  *      The Regents of the University of California.  All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * the Systems Programming Group of the University of Utah Computer
    8  * Science Department.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
   39  *
   40  *      @(#)vm_mmap.c   8.4 (Berkeley) 1/12/94
   41  * $FreeBSD$
   42  */
   43 
   44 /*
   45  * Mapped file (mmap) interface to VM
   46  */
   47 
   48 #include "opt_compat.h"
   49 
   50 #include <sys/param.h>
   51 #include <sys/kernel.h>
   52 #include <sys/systm.h>
   53 #include <sys/sysproto.h>
   54 #include <sys/filedesc.h>
   55 #include <sys/proc.h>
   56 #include <sys/resource.h>
   57 #include <sys/resourcevar.h>
   58 #include <sys/vnode.h>
   59 #include <sys/fcntl.h>
   60 #include <sys/file.h>
   61 #include <sys/mman.h>
   62 #include <sys/conf.h>
   63 #include <sys/stat.h>
   64 #include <sys/vmmeter.h>
   65 #include <sys/sysctl.h>
   66 
   67 #include <vm/vm.h>
   68 #include <vm/vm_param.h>
   69 #include <sys/lock.h>
   70 #include <vm/pmap.h>
   71 #include <vm/vm_map.h>
   72 #include <vm/vm_object.h>
   73 #include <vm/vm_page.h>
   74 #include <vm/vm_pager.h>
   75 #include <vm/vm_pageout.h>
   76 #include <vm/vm_extern.h>
   77 #include <vm/vm_page.h>
   78 #include <vm/vm_kern.h>
   79 
   80 #ifndef _SYS_SYSPROTO_H_
   81 struct sbrk_args {
   82         int incr;
   83 };
   84 #endif
   85 
   86 static int max_proc_mmap;
   87 SYSCTL_INT(_vm, OID_AUTO, max_proc_mmap, CTLFLAG_RW, &max_proc_mmap, 0, "");
   88 
   89 /*
   90  * Set the maximum number of vm_map_entry structures per process.  Roughly
   91  * speaking vm_map_entry structures are tiny, so allowing them to eat 1/100
   92  * of our KVM malloc space still results in generous limits.  We want a 
   93  * default that is good enough to prevent the kernel running out of resources
   94  * if attacked from compromised user account but generous enough such that
   95  * multi-threaded processes are not unduly inconvenienced.
   96  */
   97 
   98 static void vmmapentry_rsrc_init __P((void *));
   99 SYSINIT(vmmersrc, SI_SUB_KVM_RSRC, SI_ORDER_FIRST, vmmapentry_rsrc_init, NULL)
  100 
  101 static void
  102 vmmapentry_rsrc_init(dummy)
  103         void *dummy;
  104 {
  105     max_proc_mmap = vm_kmem_size / sizeof(struct vm_map_entry);
  106     max_proc_mmap /= 100;
  107 }
  108 
  109 /* ARGSUSED */
  110 int
  111 sbrk(p, uap)
  112         struct proc *p;
  113         struct sbrk_args *uap;
  114 {
  115 
  116         /* Not yet implemented */
  117         return (EOPNOTSUPP);
  118 }
  119 
  120 #ifndef _SYS_SYSPROTO_H_
  121 struct sstk_args {
  122         int incr;
  123 };
  124 #endif
  125 
  126 /* ARGSUSED */
  127 int
  128 sstk(p, uap)
  129         struct proc *p;
  130         struct sstk_args *uap;
  131 {
  132 
  133         /* Not yet implemented */
  134         return (EOPNOTSUPP);
  135 }
  136 
  137 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
  138 #ifndef _SYS_SYSPROTO_H_
  139 struct getpagesize_args {
  140         int dummy;
  141 };
  142 #endif
  143 
  144 /* ARGSUSED */
  145 int
  146 ogetpagesize(p, uap)
  147         struct proc *p;
  148         struct getpagesize_args *uap;
  149 {
  150 
  151         p->p_retval[0] = PAGE_SIZE;
  152         return (0);
  153 }
  154 #endif                          /* COMPAT_43 || COMPAT_SUNOS */
  155 
  156 
  157 /* 
  158  * Memory Map (mmap) system call.  Note that the file offset
  159  * and address are allowed to be NOT page aligned, though if
  160  * the MAP_FIXED flag it set, both must have the same remainder
  161  * modulo the PAGE_SIZE (POSIX 1003.1b).  If the address is not
  162  * page-aligned, the actual mapping starts at trunc_page(addr)
  163  * and the return value is adjusted up by the page offset.
  164  *
  165  * Generally speaking, only character devices which are themselves
  166  * memory-based, such as a video framebuffer, can be mmap'd.  Otherwise
  167  * there would be no cache coherency between a descriptor and a VM mapping
  168  * both to the same character device.
  169  *
  170  * Block devices can be mmap'd no matter what they represent.  Cache coherency
  171  * is maintained as long as you do not write directly to the underlying
  172  * character device.
  173  */
  174 #ifndef _SYS_SYSPROTO_H_
  175 struct mmap_args {
  176         void *addr;
  177         size_t len;
  178         int prot;
  179         int flags;
  180         int fd;
  181         long pad;
  182         off_t pos;
  183 };
  184 #endif
  185 
  186 int
  187 mmap(p, uap)
  188         struct proc *p;
  189         register struct mmap_args *uap;
  190 {
  191         register struct filedesc *fdp = p->p_fd;
  192         register struct file *fp = NULL;
  193         struct vnode *vp;
  194         vm_offset_t addr;
  195         vm_size_t size, pageoff;
  196         vm_prot_t prot, maxprot;
  197         void *handle;
  198         int flags, error;
  199         int disablexworkaround;
  200         off_t pos;
  201         struct vmspace *vms = p->p_vmspace;
  202         vm_object_t obj;
  203 
  204         addr = (vm_offset_t) uap->addr;
  205         size = uap->len;
  206         prot = uap->prot & VM_PROT_ALL;
  207         flags = uap->flags;
  208         pos = uap->pos;
  209 
  210         /* make sure mapping fits into numeric range etc */
  211         if ((ssize_t) uap->len < 0 ||
  212             ((flags & MAP_ANON) && uap->fd != -1))
  213                 return (EINVAL);
  214 
  215         if (flags & MAP_STACK) {
  216                 if ((uap->fd != -1) ||
  217                     ((prot & (PROT_READ | PROT_WRITE)) != (PROT_READ | PROT_WRITE)))
  218                         return (EINVAL);
  219                 flags |= MAP_ANON;
  220                 pos = 0;
  221         }
  222 
  223         /*
  224          * Align the file position to a page boundary,
  225          * and save its page offset component.
  226          */
  227         pageoff = (pos & PAGE_MASK);
  228         pos -= pageoff;
  229 
  230         /* Adjust size for rounding (on both ends). */
  231         size += pageoff;                        /* low end... */
  232         size = (vm_size_t) round_page(size);    /* hi end */
  233 
  234         /*
  235          * Check for illegal addresses.  Watch out for address wrap... Note
  236          * that VM_*_ADDRESS are not constants due to casts (argh).
  237          */
  238         if (flags & MAP_FIXED) {
  239                 /*
  240                  * The specified address must have the same remainder
  241                  * as the file offset taken modulo PAGE_SIZE, so it
  242                  * should be aligned after adjustment by pageoff.
  243                  */
  244                 addr -= pageoff;
  245                 if (addr & PAGE_MASK)
  246                         return (EINVAL);
  247                 /* Address range must be all in user VM space. */
  248                 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
  249                         return (EINVAL);
  250 #ifndef i386
  251                 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
  252                         return (EINVAL);
  253 #endif
  254                 if (addr + size < addr)
  255                         return (EINVAL);
  256         }
  257         /*
  258          * XXX for non-fixed mappings where no hint is provided or
  259          * the hint would fall in the potential heap space,
  260          * place it after the end of the largest possible heap.
  261          *
  262          * There should really be a pmap call to determine a reasonable
  263          * location.
  264          */
  265         else if (addr == 0 ||
  266             (addr >= round_page((vm_offset_t)vms->vm_taddr) &&
  267              addr < round_page((vm_offset_t)vms->vm_daddr + maxdsiz)))
  268                 addr = round_page((vm_offset_t)vms->vm_daddr + maxdsiz);
  269 
  270         if (flags & MAP_ANON) {
  271                 /*
  272                  * Mapping blank space is trivial.
  273                  */
  274                 handle = NULL;
  275                 maxprot = VM_PROT_ALL;
  276                 pos = 0;
  277         } else {
  278                 /*
  279                  * Mapping file, get fp for validation. Obtain vnode and make
  280                  * sure it is of appropriate type.
  281                  */
  282                 if (((unsigned) uap->fd) >= fdp->fd_nfiles ||
  283                     (fp = fdp->fd_ofiles[uap->fd]) == NULL)
  284                         return (EBADF);
  285                 if (fp->f_type != DTYPE_VNODE)
  286                         return (EINVAL);
  287                 /*
  288                  * POSIX shared-memory objects are defined to have
  289                  * kernel persistence, and are not defined to support
  290                  * read(2)/write(2) -- or even open(2).  Thus, we can
  291                  * use MAP_ASYNC to trade on-disk coherence for speed.
  292                  * The shm_open(3) library routine turns on the FPOSIXSHM
  293                  * flag to request this behavior.
  294                  */
  295                 if (fp->f_flag & FPOSIXSHM)
  296                         flags |= MAP_NOSYNC;
  297                 vp = (struct vnode *) fp->f_data;
  298                 if (vp->v_type != VREG && vp->v_type != VCHR)
  299                         return (EINVAL);
  300                 if (vp->v_type == VREG) {
  301                         /*
  302                          * Get the proper underlying object
  303                          */
  304                         if (VOP_GETVOBJECT(vp, &obj) != 0)
  305                                 return (EINVAL);
  306                         vp = (struct vnode*)obj->handle;
  307                 }
  308 
  309                 /*
  310                  * don't let the descriptor disappear on us if we block
  311                  */
  312                 fhold(fp);
  313 
  314                 /*
  315                  * XXX hack to handle use of /dev/zero to map anon memory (ala
  316                  * SunOS).
  317                  */
  318                 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
  319                         handle = NULL;
  320                         maxprot = VM_PROT_ALL;
  321                         flags |= MAP_ANON;
  322                         pos = 0;
  323                 } else {
  324                         /*
  325                          * cdevs does not provide private mappings of any kind.
  326                          */
  327                         /*
  328                          * However, for XIG X server to continue to work,
  329                          * we should allow the superuser to do it anyway.
  330                          * We only allow it at securelevel < 1.
  331                          * (Because the XIG X server writes directly to video
  332                          * memory via /dev/mem, it should never work at any
  333                          * other securelevel.
  334                          * XXX this will have to go
  335                          */
  336                         if (securelevel >= 1)
  337                                 disablexworkaround = 1;
  338                         else
  339                                 disablexworkaround = suser(p);
  340                         if (vp->v_type == VCHR && disablexworkaround &&
  341                             (flags & (MAP_PRIVATE|MAP_COPY))) {
  342                                 error = EINVAL;
  343                                 goto done;
  344                         }
  345                         /*
  346                          * Ensure that file and memory protections are
  347                          * compatible.  Note that we only worry about
  348                          * writability if mapping is shared; in this case,
  349                          * current and max prot are dictated by the open file.
  350                          * XXX use the vnode instead?  Problem is: what
  351                          * credentials do we use for determination? What if
  352                          * proc does a setuid?
  353                          */
  354                         maxprot = VM_PROT_EXECUTE;      /* ??? */
  355                         if (fp->f_flag & FREAD) {
  356                                 maxprot |= VM_PROT_READ;
  357                         } else if (prot & PROT_READ) {
  358                                 error = EACCES;
  359                                 goto done;
  360                         }
  361                         /*
  362                          * If we are sharing potential changes (either via
  363                          * MAP_SHARED or via the implicit sharing of character
  364                          * device mappings), and we are trying to get write
  365                          * permission although we opened it without asking
  366                          * for it, bail out.  Check for superuser, only if
  367                          * we're at securelevel < 1, to allow the XIG X server
  368                          * to continue to work.
  369                          */
  370 
  371                         if ((flags & MAP_SHARED) != 0 ||
  372                             (vp->v_type == VCHR && disablexworkaround)) {
  373                                 if ((fp->f_flag & FWRITE) != 0) {
  374                                         struct vattr va;
  375                                         if ((error =
  376                                             VOP_GETATTR(vp, &va,
  377                                                         p->p_ucred, p))) {
  378                                                 goto done;
  379                                         }
  380                                         if ((va.va_flags &
  381                                             (IMMUTABLE|APPEND)) == 0) {
  382                                                 maxprot |= VM_PROT_WRITE;
  383                                         } else if (prot & PROT_WRITE) {
  384                                                 error = EPERM;
  385                                                 goto done;
  386                                         }
  387                                 } else if ((prot & PROT_WRITE) != 0) {
  388                                         error = EACCES;
  389                                         goto done;
  390                                 }
  391                         } else {
  392                                 maxprot |= VM_PROT_WRITE;
  393                         }
  394                         handle = (void *)vp;
  395                 }
  396         }
  397 
  398         /*
  399          * Do not allow more then a certain number of vm_map_entry structures
  400          * per process.  Scale with the number of rforks sharing the map
  401          * to make the limit reasonable for threads.
  402          */
  403         if (max_proc_mmap && 
  404             vms->vm_map.nentries >= max_proc_mmap * vms->vm_refcnt) {
  405                 error = ENOMEM;
  406                 goto done;
  407         }
  408 
  409         error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot,
  410             flags, handle, pos);
  411         if (error == 0)
  412                 p->p_retval[0] = (register_t) (addr + pageoff);
  413 done:
  414         if (fp)
  415                 fdrop(fp, p);
  416         return (error);
  417 }
  418 
  419 #ifdef COMPAT_43
  420 #ifndef _SYS_SYSPROTO_H_
  421 struct ommap_args {
  422         caddr_t addr;
  423         int len;
  424         int prot;
  425         int flags;
  426         int fd;
  427         long pos;
  428 };
  429 #endif
  430 int
  431 ommap(p, uap)
  432         struct proc *p;
  433         register struct ommap_args *uap;
  434 {
  435         struct mmap_args nargs;
  436         static const char cvtbsdprot[8] = {
  437                 0,
  438                 PROT_EXEC,
  439                 PROT_WRITE,
  440                 PROT_EXEC | PROT_WRITE,
  441                 PROT_READ,
  442                 PROT_EXEC | PROT_READ,
  443                 PROT_WRITE | PROT_READ,
  444                 PROT_EXEC | PROT_WRITE | PROT_READ,
  445         };
  446 
  447 #define OMAP_ANON       0x0002
  448 #define OMAP_COPY       0x0020
  449 #define OMAP_SHARED     0x0010
  450 #define OMAP_FIXED      0x0100
  451 #define OMAP_INHERIT    0x0800
  452 
  453         nargs.addr = uap->addr;
  454         nargs.len = uap->len;
  455         nargs.prot = cvtbsdprot[uap->prot & 0x7];
  456         nargs.flags = 0;
  457         if (uap->flags & OMAP_ANON)
  458                 nargs.flags |= MAP_ANON;
  459         if (uap->flags & OMAP_COPY)
  460                 nargs.flags |= MAP_COPY;
  461         if (uap->flags & OMAP_SHARED)
  462                 nargs.flags |= MAP_SHARED;
  463         else
  464                 nargs.flags |= MAP_PRIVATE;
  465         if (uap->flags & OMAP_FIXED)
  466                 nargs.flags |= MAP_FIXED;
  467         if (uap->flags & OMAP_INHERIT)
  468                 nargs.flags |= MAP_INHERIT;
  469         nargs.fd = uap->fd;
  470         nargs.pos = uap->pos;
  471         return (mmap(p, &nargs));
  472 }
  473 #endif                          /* COMPAT_43 */
  474 
  475 
  476 #ifndef _SYS_SYSPROTO_H_
  477 struct msync_args {
  478         void *addr;
  479         int len;
  480         int flags;
  481 };
  482 #endif
  483 int
  484 msync(p, uap)
  485         struct proc *p;
  486         struct msync_args *uap;
  487 {
  488         vm_offset_t addr;
  489         vm_size_t size, pageoff;
  490         int flags;
  491         vm_map_t map;
  492         int rv;
  493 
  494         addr = (vm_offset_t) uap->addr;
  495         size = uap->len;
  496         flags = uap->flags;
  497 
  498         pageoff = (addr & PAGE_MASK);
  499         addr -= pageoff;
  500         size += pageoff;
  501         size = (vm_size_t) round_page(size);
  502         if (addr + size < addr)
  503                 return(EINVAL);
  504 
  505         if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
  506                 return (EINVAL);
  507 
  508         map = &p->p_vmspace->vm_map;
  509 
  510         /*
  511          * XXX Gak!  If size is zero we are supposed to sync "all modified
  512          * pages with the region containing addr".  Unfortunately, we don't
  513          * really keep track of individual mmaps so we approximate by flushing
  514          * the range of the map entry containing addr. This can be incorrect
  515          * if the region splits or is coalesced with a neighbor.
  516          */
  517         if (size == 0) {
  518                 vm_map_entry_t entry;
  519 
  520                 vm_map_lock_read(map);
  521                 rv = vm_map_lookup_entry(map, addr, &entry);
  522                 vm_map_unlock_read(map);
  523                 if (rv == FALSE)
  524                         return (EINVAL);
  525                 addr = entry->start;
  526                 size = entry->end - entry->start;
  527         }
  528 
  529         /*
  530          * Clean the pages and interpret the return value.
  531          */
  532         rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0,
  533             (flags & MS_INVALIDATE) != 0);
  534 
  535         switch (rv) {
  536         case KERN_SUCCESS:
  537                 break;
  538         case KERN_INVALID_ADDRESS:
  539                 return (EINVAL);        /* Sun returns ENOMEM? */
  540         case KERN_FAILURE:
  541                 return (EIO);
  542         default:
  543                 return (EINVAL);
  544         }
  545 
  546         return (0);
  547 }
  548 
  549 #ifndef _SYS_SYSPROTO_H_
  550 struct munmap_args {
  551         void *addr;
  552         size_t len;
  553 };
  554 #endif
  555 int
  556 munmap(p, uap)
  557         register struct proc *p;
  558         register struct munmap_args *uap;
  559 {
  560         vm_offset_t addr;
  561         vm_size_t size, pageoff;
  562         vm_map_t map;
  563 
  564         addr = (vm_offset_t) uap->addr;
  565         size = uap->len;
  566 
  567         pageoff = (addr & PAGE_MASK);
  568         addr -= pageoff;
  569         size += pageoff;
  570         size = (vm_size_t) round_page(size);
  571         if (addr + size < addr)
  572                 return(EINVAL);
  573 
  574         if (size == 0)
  575                 return (0);
  576 
  577         /*
  578          * Check for illegal addresses.  Watch out for address wrap... Note
  579          * that VM_*_ADDRESS are not constants due to casts (argh).
  580          */
  581         if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
  582                 return (EINVAL);
  583 #ifndef i386
  584         if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
  585                 return (EINVAL);
  586 #endif
  587         map = &p->p_vmspace->vm_map;
  588         /*
  589          * Make sure entire range is allocated.
  590          */
  591         if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
  592                 return (EINVAL);
  593         /* returns nothing but KERN_SUCCESS anyway */
  594         (void) vm_map_remove(map, addr, addr + size);
  595         return (0);
  596 }
  597 
  598 #if 0
  599 void
  600 munmapfd(p, fd)
  601         struct proc *p;
  602         int fd;
  603 {
  604         /*
  605          * XXX should unmap any regions mapped to this file
  606          */
  607         p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
  608 }
  609 #endif
  610 
  611 #ifndef _SYS_SYSPROTO_H_
  612 struct mprotect_args {
  613         const void *addr;
  614         size_t len;
  615         int prot;
  616 };
  617 #endif
  618 int
  619 mprotect(p, uap)
  620         struct proc *p;
  621         struct mprotect_args *uap;
  622 {
  623         vm_offset_t addr;
  624         vm_size_t size, pageoff;
  625         register vm_prot_t prot;
  626 
  627         addr = (vm_offset_t) uap->addr;
  628         size = uap->len;
  629         prot = uap->prot & VM_PROT_ALL;
  630 #if defined(VM_PROT_READ_IS_EXEC)
  631         if (prot & VM_PROT_READ)
  632                 prot |= VM_PROT_EXECUTE;
  633 #endif
  634 
  635         pageoff = (addr & PAGE_MASK);
  636         addr -= pageoff;
  637         size += pageoff;
  638         size = (vm_size_t) round_page(size);
  639         if (addr + size < addr)
  640                 return(EINVAL);
  641 
  642         switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
  643                 FALSE)) {
  644         case KERN_SUCCESS:
  645                 return (0);
  646         case KERN_PROTECTION_FAILURE:
  647                 return (EACCES);
  648         }
  649         return (EINVAL);
  650 }
  651 
  652 #ifndef _SYS_SYSPROTO_H_
  653 struct minherit_args {
  654         void *addr;
  655         size_t len;
  656         int inherit;
  657 };
  658 #endif
  659 int
  660 minherit(p, uap)
  661         struct proc *p;
  662         struct minherit_args *uap;
  663 {
  664         vm_offset_t addr;
  665         vm_size_t size, pageoff;
  666         register vm_inherit_t inherit;
  667 
  668         addr = (vm_offset_t)uap->addr;
  669         size = uap->len;
  670         inherit = uap->inherit;
  671 
  672         pageoff = (addr & PAGE_MASK);
  673         addr -= pageoff;
  674         size += pageoff;
  675         size = (vm_size_t) round_page(size);
  676         if (addr + size < addr)
  677                 return(EINVAL);
  678 
  679         switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
  680             inherit)) {
  681         case KERN_SUCCESS:
  682                 return (0);
  683         case KERN_PROTECTION_FAILURE:
  684                 return (EACCES);
  685         }
  686         return (EINVAL);
  687 }
  688 
  689 #ifndef _SYS_SYSPROTO_H_
  690 struct madvise_args {
  691         void *addr;
  692         size_t len;
  693         int behav;
  694 };
  695 #endif
  696 
  697 /* ARGSUSED */
  698 int
  699 madvise(p, uap)
  700         struct proc *p;
  701         struct madvise_args *uap;
  702 {
  703         vm_offset_t start, end;
  704 
  705         /*
  706          * Check for illegal behavior
  707          */
  708         if (uap->behav < 0 || uap->behav > MADV_CORE)
  709                 return (EINVAL);
  710         /*
  711          * Check for illegal addresses.  Watch out for address wrap... Note
  712          * that VM_*_ADDRESS are not constants due to casts (argh).
  713          */
  714         if (VM_MAXUSER_ADDRESS > 0 &&
  715                 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS)
  716                 return (EINVAL);
  717 #ifndef i386
  718         if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS)
  719                 return (EINVAL);
  720 #endif
  721         if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr)
  722                 return (EINVAL);
  723 
  724         /*
  725          * Since this routine is only advisory, we default to conservative
  726          * behavior.
  727          */
  728         start = trunc_page((vm_offset_t) uap->addr);
  729         end = round_page((vm_offset_t) uap->addr + uap->len);
  730         
  731         if (vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav))
  732                 return (EINVAL);
  733         return (0);
  734 }
  735 
  736 #ifndef _SYS_SYSPROTO_H_
  737 struct mincore_args {
  738         const void *addr;
  739         size_t len;
  740         char *vec;
  741 };
  742 #endif
  743 
  744 /* ARGSUSED */
  745 int
  746 mincore(p, uap)
  747         struct proc *p;
  748         struct mincore_args *uap;
  749 {
  750         vm_offset_t addr, first_addr;
  751         vm_offset_t end, cend;
  752         pmap_t pmap;
  753         vm_map_t map;
  754         char *vec;
  755         int error;
  756         int vecindex, lastvecindex;
  757         register vm_map_entry_t current;
  758         vm_map_entry_t entry;
  759         int mincoreinfo;
  760         unsigned int timestamp;
  761 
  762         /*
  763          * Make sure that the addresses presented are valid for user
  764          * mode.
  765          */
  766         first_addr = addr = trunc_page((vm_offset_t) uap->addr);
  767         end = addr + (vm_size_t)round_page(uap->len);
  768         if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS)
  769                 return (EINVAL);
  770         if (end < addr)
  771                 return (EINVAL);
  772 
  773         /*
  774          * Address of byte vector
  775          */
  776         vec = uap->vec;
  777 
  778         map = &p->p_vmspace->vm_map;
  779         pmap = vmspace_pmap(p->p_vmspace);
  780 
  781         vm_map_lock_read(map);
  782 RestartScan:
  783         timestamp = map->timestamp;
  784 
  785         if (!vm_map_lookup_entry(map, addr, &entry))
  786                 entry = entry->next;
  787 
  788         /*
  789          * Do this on a map entry basis so that if the pages are not
  790          * in the current processes address space, we can easily look
  791          * up the pages elsewhere.
  792          */
  793         lastvecindex = -1;
  794         for(current = entry;
  795                 (current != &map->header) && (current->start < end);
  796                 current = current->next) {
  797 
  798                 /*
  799                  * ignore submaps (for now) or null objects
  800                  */
  801                 if ((current->eflags & MAP_ENTRY_IS_SUB_MAP) ||
  802                         current->object.vm_object == NULL)
  803                         continue;
  804                 
  805                 /*
  806                  * limit this scan to the current map entry and the
  807                  * limits for the mincore call
  808                  */
  809                 if (addr < current->start)
  810                         addr = current->start;
  811                 cend = current->end;
  812                 if (cend > end)
  813                         cend = end;
  814 
  815                 /*
  816                  * scan this entry one page at a time
  817                  */
  818                 while(addr < cend) {
  819                         /*
  820                          * Check pmap first, it is likely faster, also
  821                          * it can provide info as to whether we are the
  822                          * one referencing or modifying the page.
  823                          */
  824                         mincoreinfo = pmap_mincore(pmap, addr);
  825                         if (!mincoreinfo) {
  826                                 vm_pindex_t pindex;
  827                                 vm_ooffset_t offset;
  828                                 vm_page_t m;
  829                                 /*
  830                                  * calculate the page index into the object
  831                                  */
  832                                 offset = current->offset + (addr - current->start);
  833                                 pindex = OFF_TO_IDX(offset);
  834                                 m = vm_page_lookup(current->object.vm_object,
  835                                         pindex);
  836                                 /*
  837                                  * if the page is resident, then gather information about
  838                                  * it.
  839                                  */
  840                                 if (m) {
  841                                         mincoreinfo = MINCORE_INCORE;
  842                                         if (m->dirty ||
  843                                                 pmap_is_modified(m))
  844                                                 mincoreinfo |= MINCORE_MODIFIED_OTHER;
  845                                         if ((m->flags & PG_REFERENCED) ||
  846                                                 pmap_ts_referenced(m)) {
  847                                                 vm_page_flag_set(m, PG_REFERENCED);
  848                                                 mincoreinfo |= MINCORE_REFERENCED_OTHER;
  849                                         }
  850                                 }
  851                         }
  852 
  853                         /*
  854                          * subyte may page fault.  In case it needs to modify
  855                          * the map, we release the lock.
  856                          */
  857                         vm_map_unlock_read(map);
  858 
  859                         /*
  860                          * calculate index into user supplied byte vector
  861                          */
  862                         vecindex = OFF_TO_IDX(addr - first_addr);
  863 
  864                         /*
  865                          * If we have skipped map entries, we need to make sure that
  866                          * the byte vector is zeroed for those skipped entries.
  867                          */
  868                         while((lastvecindex + 1) < vecindex) {
  869                                 error = subyte( vec + lastvecindex, 0);
  870                                 if (error) {
  871                                         return (EFAULT);
  872                                 }
  873                                 ++lastvecindex;
  874                         }
  875 
  876                         /*
  877                          * Pass the page information to the user
  878                          */
  879                         error = subyte( vec + vecindex, mincoreinfo);
  880                         if (error) {
  881                                 return (EFAULT);
  882                         }
  883 
  884                         /*
  885                          * If the map has changed, due to the subyte, the previous
  886                          * output may be invalid.
  887                          */
  888                         vm_map_lock_read(map);
  889                         if (timestamp != map->timestamp)
  890                                 goto RestartScan;
  891 
  892                         lastvecindex = vecindex;
  893                         addr += PAGE_SIZE;
  894                 }
  895         }
  896 
  897         /*
  898          * subyte may page fault.  In case it needs to modify
  899          * the map, we release the lock.
  900          */
  901         vm_map_unlock_read(map);
  902 
  903         /*
  904          * Zero the last entries in the byte vector.
  905          */
  906         vecindex = OFF_TO_IDX(end - first_addr);
  907         while((lastvecindex + 1) < vecindex) {
  908                 error = subyte( vec + lastvecindex, 0);
  909                 if (error) {
  910                         return (EFAULT);
  911                 }
  912                 ++lastvecindex;
  913         }
  914         
  915         /*
  916          * If the map has changed, due to the subyte, the previous
  917          * output may be invalid.
  918          */
  919         vm_map_lock_read(map);
  920         if (timestamp != map->timestamp)
  921                 goto RestartScan;
  922         vm_map_unlock_read(map);
  923 
  924         return (0);
  925 }
  926 
  927 #ifndef _SYS_SYSPROTO_H_
  928 struct mlock_args {
  929         const void *addr;
  930         size_t len;
  931 };
  932 #endif
  933 int
  934 mlock(p, uap)
  935         struct proc *p;
  936         struct mlock_args *uap;
  937 {
  938         vm_offset_t addr;
  939         vm_size_t size, pageoff;
  940         int error;
  941 
  942         addr = (vm_offset_t) uap->addr;
  943         size = uap->len;
  944 
  945         pageoff = (addr & PAGE_MASK);
  946         addr -= pageoff;
  947         size += pageoff;
  948         size = (vm_size_t) round_page(size);
  949 
  950         /* disable wrap around */
  951         if (addr + size < addr)
  952                 return (EINVAL);
  953 
  954         if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
  955                 return (EAGAIN);
  956 
  957 #ifdef pmap_wired_count
  958         if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
  959             p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
  960                 return (ENOMEM);
  961 #else
  962         error = suser(p);
  963         if (error)
  964                 return (error);
  965 #endif
  966 
  967         error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE);
  968         return (error == KERN_SUCCESS ? 0 : ENOMEM);
  969 }
  970 
  971 #ifndef _SYS_SYSPROTO_H_
  972 struct mlockall_args {
  973         int     how;
  974 };
  975 #endif
  976 
  977 int
  978 mlockall(p, uap)
  979         struct proc *p;
  980         struct mlockall_args *uap;
  981 {
  982         return 0;
  983 }
  984 
  985 #ifndef _SYS_SYSPROTO_H_
  986 struct mlockall_args {
  987         int     how;
  988 };
  989 #endif
  990 
  991 int
  992 munlockall(p, uap)
  993         struct proc *p;
  994         struct munlockall_args *uap;
  995 {
  996         return 0;
  997 }
  998 
  999 #ifndef _SYS_SYSPROTO_H_
 1000 struct munlock_args {
 1001         const void *addr;
 1002         size_t len;
 1003 };
 1004 #endif
 1005 int
 1006 munlock(p, uap)
 1007         struct proc *p;
 1008         struct munlock_args *uap;
 1009 {
 1010         vm_offset_t addr;
 1011         vm_size_t size, pageoff;
 1012         int error;
 1013 
 1014         addr = (vm_offset_t) uap->addr;
 1015         size = uap->len;
 1016 
 1017         pageoff = (addr & PAGE_MASK);
 1018         addr -= pageoff;
 1019         size += pageoff;
 1020         size = (vm_size_t) round_page(size);
 1021 
 1022         /* disable wrap around */
 1023         if (addr + size < addr)
 1024                 return (EINVAL);
 1025 
 1026 #ifndef pmap_wired_count
 1027         error = suser(p);
 1028         if (error)
 1029                 return (error);
 1030 #endif
 1031 
 1032         error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE);
 1033         return (error == KERN_SUCCESS ? 0 : ENOMEM);
 1034 }
 1035 
 1036 /*
 1037  * Internal version of mmap.
 1038  * Currently used by mmap, exec, and sys5 shared memory.
 1039  * Handle is either a vnode pointer or NULL for MAP_ANON.
 1040  */
 1041 int
 1042 vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
 1043         vm_prot_t maxprot, int flags,
 1044         void *handle,
 1045         vm_ooffset_t foff)
 1046 {
 1047         boolean_t fitit;
 1048         vm_object_t object;
 1049         struct vnode *vp = NULL;
 1050         objtype_t type;
 1051         int rv = KERN_SUCCESS;
 1052         vm_ooffset_t objsize;
 1053         int docow;
 1054         struct proc *p = curproc;
 1055 
 1056         if (size == 0)
 1057                 return (0);
 1058 
 1059         objsize = size = round_page(size);
 1060 
 1061         if (p->p_vmspace->vm_map.size + size >
 1062             p->p_rlimit[RLIMIT_VMEM].rlim_cur) {
 1063                 return(ENOMEM);
 1064         }
 1065 
 1066         /*
 1067          * We currently can only deal with page aligned file offsets.
 1068          * The check is here rather than in the syscall because the
 1069          * kernel calls this function internally for other mmaping
 1070          * operations (such as in exec) and non-aligned offsets will
 1071          * cause pmap inconsistencies...so we want to be sure to
 1072          * disallow this in all cases.
 1073          */
 1074         if (foff & PAGE_MASK)
 1075                 return (EINVAL);
 1076 
 1077         if ((flags & MAP_FIXED) == 0) {
 1078                 fitit = TRUE;
 1079                 *addr = round_page(*addr);
 1080         } else {
 1081                 if (*addr != trunc_page(*addr))
 1082                         return (EINVAL);
 1083                 fitit = FALSE;
 1084                 (void) vm_map_remove(map, *addr, *addr + size);
 1085         }
 1086 
 1087         /*
 1088          * Lookup/allocate object.
 1089          */
 1090         if (flags & MAP_ANON) {
 1091                 type = OBJT_DEFAULT;
 1092                 /*
 1093                  * Unnamed anonymous regions always start at 0.
 1094                  */
 1095                 if (handle == 0)
 1096                         foff = 0;
 1097         } else {
 1098                 vp = (struct vnode *) handle;
 1099                 if (vp->v_type == VCHR) {
 1100                         type = OBJT_DEVICE;
 1101                         handle = (void *)(intptr_t)vp->v_rdev;
 1102                 } else {
 1103                         struct vattr vat;
 1104                         int error;
 1105 
 1106                         error = VOP_GETATTR(vp, &vat, p->p_ucred, p);
 1107                         if (error)
 1108                                 return (error);
 1109                         objsize = round_page(vat.va_size);
 1110                         type = OBJT_VNODE;
 1111                         /*
 1112                          * if it is a regular file without any references
 1113                          * we do not need to sync it.
 1114                          */
 1115                         if (vp->v_type == VREG && vat.va_nlink == 0) {
 1116                                 flags |= MAP_NOSYNC;
 1117                         }
 1118                 }
 1119         }
 1120 
 1121         if (handle == NULL) {
 1122                 object = NULL;
 1123                 docow = 0;
 1124         } else {
 1125                 object = vm_pager_allocate(type,
 1126                         handle, objsize, prot, foff);
 1127                 if (object == NULL)
 1128                         return (type == OBJT_DEVICE ? EINVAL : ENOMEM);
 1129                 docow = MAP_PREFAULT_PARTIAL;
 1130         }
 1131 
 1132         /*
 1133          * Force device mappings to be shared.
 1134          */
 1135         if (type == OBJT_DEVICE || type == OBJT_PHYS) {
 1136                 flags &= ~(MAP_PRIVATE|MAP_COPY);
 1137                 flags |= MAP_SHARED;
 1138         }
 1139 
 1140         if ((flags & (MAP_ANON|MAP_SHARED)) == 0)
 1141                 docow |= MAP_COPY_ON_WRITE;
 1142         if (flags & MAP_NOSYNC)
 1143                 docow |= MAP_DISABLE_SYNCER;
 1144         if (flags & MAP_NOCORE)
 1145                 docow |= MAP_DISABLE_COREDUMP;
 1146 
 1147 #if defined(VM_PROT_READ_IS_EXEC)
 1148         if (prot & VM_PROT_READ)
 1149                 prot |= VM_PROT_EXECUTE;
 1150 
 1151         if (maxprot & VM_PROT_READ)
 1152                 maxprot |= VM_PROT_EXECUTE;
 1153 #endif
 1154 
 1155         if (fitit) {
 1156                 *addr = pmap_addr_hint(object, *addr, size);
 1157         }
 1158 
 1159         if (flags & MAP_STACK)
 1160                 rv = vm_map_stack (map, *addr, size, prot,
 1161                                    maxprot, docow);
 1162         else
 1163                 rv = vm_map_find(map, object, foff, addr, size, fitit,
 1164                                  prot, maxprot, docow);
 1165 
 1166         if (rv != KERN_SUCCESS) {
 1167                 /*
 1168                  * Lose the object reference. Will destroy the
 1169                  * object if it's an unnamed anonymous mapping
 1170                  * or named anonymous without other references.
 1171                  */
 1172                 vm_object_deallocate(object);
 1173                 goto out;
 1174         }
 1175 
 1176         /*
 1177          * Shared memory is also shared with children.
 1178          */
 1179         if (flags & (MAP_SHARED|MAP_INHERIT)) {
 1180                 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE);
 1181                 if (rv != KERN_SUCCESS) {
 1182                         (void) vm_map_remove(map, *addr, *addr + size);
 1183                         goto out;
 1184                 }
 1185         }
 1186 out:
 1187         switch (rv) {
 1188         case KERN_SUCCESS:
 1189                 return (0);
 1190         case KERN_INVALID_ADDRESS:
 1191         case KERN_NO_SPACE:
 1192                 return (ENOMEM);
 1193         case KERN_PROTECTION_FAILURE:
 1194                 return (EACCES);
 1195         default:
 1196                 return (EINVAL);
 1197         }
 1198 }

Cache object: 4c3c828f050f7b6c920d1f1d4bfb4ee7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.