The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_mmap.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1988 University of Utah.
    3  * Copyright (c) 1991, 1993
    4  *      The Regents of the University of California.  All rights reserved.
    5  *
    6  * This code is derived from software contributed to Berkeley by
    7  * the Systems Programming Group of the University of Utah Computer
    8  * Science Department.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
   39  *
   40  *      @(#)vm_mmap.c   8.4 (Berkeley) 1/12/94
   41  * $FreeBSD: src/sys/vm/vm_mmap.c,v 1.53.2.5 1999/09/05 08:24:31 peter Exp $
   42  */
   43 
   44 /*
   45  * Mapped file (mmap) interface to VM
   46  */
   47 
   48 #include "opt_rlimit.h"
   49 
   50 #include <sys/param.h>
   51 #include <sys/systm.h>
   52 #include <sys/sysproto.h>
   53 #include <sys/filedesc.h>
   54 #include <sys/resourcevar.h>
   55 #include <sys/proc.h>
   56 #include <sys/vnode.h>
   57 #include <sys/file.h>
   58 #include <sys/mman.h>
   59 #include <sys/conf.h>
   60 #include <sys/stat.h>
   61 #include <sys/vmmeter.h>
   62 
   63 #include <miscfs/specfs/specdev.h>
   64 
   65 #include <vm/vm.h>
   66 #include <vm/vm_param.h>
   67 #include <vm/vm_prot.h>
   68 #include <vm/vm_inherit.h>
   69 #include <vm/lock.h>
   70 #include <vm/pmap.h>
   71 #include <vm/vm_map.h>
   72 #include <vm/vm_object.h>
   73 #include <vm/vm_pager.h>
   74 #include <vm/vm_pageout.h>
   75 #include <vm/vm_extern.h>
   76 #include <vm/vm_kern.h>
   77 #include <vm/vm_page.h>
   78 
   79 #ifndef _SYS_SYSPROTO_H_
   80 struct sbrk_args {
   81         int incr;
   82 };
   83 #endif
   84 
   85 /* ARGSUSED */
   86 int
   87 sbrk(p, uap, retval)
   88         struct proc *p;
   89         struct sbrk_args *uap;
   90         int *retval;
   91 {
   92 
   93         /* Not yet implemented */
   94         return (EOPNOTSUPP);
   95 }
   96 
   97 #ifndef _SYS_SYSPROTO_H_
   98 struct sstk_args {
   99         int incr;
  100 };
  101 #endif
  102 
  103 /* ARGSUSED */
  104 int
  105 sstk(p, uap, retval)
  106         struct proc *p;
  107         struct sstk_args *uap;
  108         int *retval;
  109 {
  110 
  111         /* Not yet implemented */
  112         return (EOPNOTSUPP);
  113 }
  114 
  115 #if defined(COMPAT_43) || defined(COMPAT_SUNOS)
  116 #ifndef _SYS_SYSPROTO_H_
  117 struct getpagesize_args {
  118         int dummy;
  119 };
  120 #endif
  121 
  122 /* ARGSUSED */
  123 int
  124 ogetpagesize(p, uap, retval)
  125         struct proc *p;
  126         struct getpagesize_args *uap;
  127         int *retval;
  128 {
  129 
  130         *retval = PAGE_SIZE;
  131         return (0);
  132 }
  133 #endif                          /* COMPAT_43 || COMPAT_SUNOS */
  134 
  135 #ifndef _SYS_SYSPROTO_H_
  136 struct mmap_args {
  137         caddr_t addr;
  138         size_t len;
  139         int prot;
  140         int flags;
  141         int fd;
  142         long pad;
  143         off_t pos;
  144 };
  145 #endif
  146 
  147 int
  148 mmap(p, uap, retval)
  149         struct proc *p;
  150         register struct mmap_args *uap;
  151         int *retval;
  152 {
  153         register struct filedesc *fdp = p->p_fd;
  154         register struct file *fp;
  155         struct vnode *vp;
  156         vm_offset_t addr;
  157         vm_size_t size, pageoff;
  158         vm_prot_t prot, maxprot;
  159         caddr_t handle;
  160         int flags, error;
  161         int disablexworkaround;
  162   
  163         addr = (vm_offset_t) uap->addr;
  164 
  165         prot = uap->prot & VM_PROT_ALL;
  166         flags = uap->flags;
  167         /*
  168          * Address (if FIXED) must be page aligned. Size is implicitly rounded
  169          * to a page boundary.
  170          */
  171         addr = (vm_offset_t) uap->addr;
  172         if (((flags & MAP_FIXED) && (addr & PAGE_MASK)) ||
  173             (ssize_t) uap->len < 0 || ((flags & MAP_ANON) && uap->fd != -1))
  174                 return (EINVAL);
  175 
  176         /*
  177          * Round page if not already disallowed by above test
  178          * XXX: Is there any point in the MAP_FIXED align requirement above?
  179          */
  180         size = uap->len;
  181         pageoff = (addr & PAGE_MASK);
  182         addr -= pageoff;
  183         size += pageoff;
  184         size = (vm_size_t) round_page(size);
  185 
  186         /*
  187          * Check for illegal addresses.  Watch out for address wrap... Note
  188          * that VM_*_ADDRESS are not constants due to casts (argh).
  189          */
  190         if (flags & MAP_FIXED) {
  191                 if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
  192                         return (EINVAL);
  193 #ifndef i386
  194                 if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
  195                         return (EINVAL);
  196 #endif
  197                 if (addr + size < addr)
  198                         return (EINVAL);
  199         }
  200         /*
  201          * XXX if no hint provided for a non-fixed mapping place it after the
  202          * end of the largest possible heap.
  203          *
  204          * There should really be a pmap call to determine a reasonable location.
  205          */
  206         if (addr == 0 && (flags & MAP_FIXED) == 0)
  207                 addr = round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
  208         if (flags & MAP_ANON) {
  209                 /*
  210                  * Mapping blank space is trivial.
  211                  */
  212                 handle = NULL;
  213                 maxprot = VM_PROT_ALL;
  214         } else {
  215                 /*
  216                  * Mapping file, get fp for validation. Obtain vnode and make
  217                  * sure it is of appropriate type.
  218                  */
  219                 if (((unsigned) uap->fd) >= fdp->fd_nfiles ||
  220                     (fp = fdp->fd_ofiles[uap->fd]) == NULL)
  221                         return (EBADF);
  222                 if (fp->f_type != DTYPE_VNODE)
  223                         return (EINVAL);
  224                 vp = (struct vnode *) fp->f_data;
  225                 if (vp->v_type != VREG && vp->v_type != VCHR)
  226                         return (EINVAL);
  227                 /*
  228                  * XXX hack to handle use of /dev/zero to map anon memory (ala
  229                  * SunOS).
  230                  */
  231                 if (vp->v_type == VCHR && iszerodev(vp->v_rdev)) {
  232                         handle = NULL;
  233                         maxprot = VM_PROT_ALL;
  234                         flags |= MAP_ANON;
  235                 } else {
  236                         /*
  237                          * cdevs does not provide private mappings of any kind.
  238                          */
  239                         /*
  240                          * However, for XIG X server to continue to work,
  241                          * we should allow the superuser to do it anyway.
  242                          * We only allow it at securelevel < 1.
  243                          * (Because the XIG X server writes directly to video
  244                          * memory via /dev/mem, it should never work at any
  245                          * other securelevel.
  246                          * XXX this will have to go
  247                          */
  248                         if (securelevel >= 1)
  249                                 disablexworkaround = 1;
  250                         else
  251                                 disablexworkaround = suser(p->p_ucred,
  252                                                            &p->p_acflag);
  253                         if (vp->v_type == VCHR && disablexworkaround &&
  254                                 (flags & (MAP_PRIVATE|MAP_COPY)))
  255                                  return (EINVAL);
  256                         /*
  257                          * Ensure that file and memory protections are
  258                          * compatible.  Note that we only worry about
  259                          * writability if mapping is shared; in this case,
  260                          * current and max prot are dictated by the open file.
  261                          * XXX use the vnode instead?  Problem is: what
  262                          * credentials do we use for determination? What if
  263                          * proc does a setuid?
  264                          */
  265                         maxprot = VM_PROT_EXECUTE;      /* ??? */
  266                         if (fp->f_flag & FREAD)
  267                                 maxprot |= VM_PROT_READ;
  268                         else if (prot & PROT_READ)
  269                                 return (EACCES);
  270                         /*
  271                          * If we are sharing potential changes (either via
  272                          * MAP_SHARED or via the implicit sharing of character
  273                          * device mappings), and we are trying to get write
  274                          * permission although we opened it without asking
  275                          * for it, bail out.  Check for superuser, only if
  276                          * we're at securelevel < 1, to allow the XIG X server
  277                          * to continue to work.
  278                          */
  279 
  280                         if ((flags & MAP_SHARED) != 0 ||
  281                             (vp->v_type == VCHR && disablexworkaround)) {
  282                                 if ((fp->f_flag & FWRITE) != 0) {
  283                                         struct vattr va;
  284 
  285                                         if ((error =
  286                                             VOP_GETATTR(vp, &va,
  287                                                         p->p_ucred, p)))
  288                                                 return (error);
  289                                         if ((va.va_flags &
  290                                             (IMMUTABLE|APPEND)) == 0)
  291                                                 maxprot |= VM_PROT_WRITE;
  292                                         else if (prot & PROT_WRITE)
  293                                                 return (EPERM);
  294                                 } else if ((prot & PROT_WRITE) != 0)
  295                                         return (EACCES);
  296                         } else
  297                                 maxprot |= VM_PROT_WRITE;
  298 
  299                         handle = (caddr_t) vp;
  300                 }
  301         }
  302         error = vm_mmap(&p->p_vmspace->vm_map, &addr, size, prot, maxprot,
  303             flags, handle, uap->pos);
  304         if (error == 0)
  305                 *retval = (int) addr;
  306         return (error);
  307 }
  308 
  309 #ifdef COMPAT_43
  310 #ifndef _SYS_SYSPROTO_H_
  311 struct ommap_args {
  312         caddr_t addr;
  313         int len;
  314         int prot;
  315         int flags;
  316         int fd;
  317         long pos;
  318 };
  319 #endif
  320 int
  321 ommap(p, uap, retval)
  322         struct proc *p;
  323         register struct ommap_args *uap;
  324         int *retval;
  325 {
  326         struct mmap_args nargs;
  327         static const char cvtbsdprot[8] = {
  328                 0,
  329                 PROT_EXEC,
  330                 PROT_WRITE,
  331                 PROT_EXEC | PROT_WRITE,
  332                 PROT_READ,
  333                 PROT_EXEC | PROT_READ,
  334                 PROT_WRITE | PROT_READ,
  335                 PROT_EXEC | PROT_WRITE | PROT_READ,
  336         };
  337 
  338 #define OMAP_ANON       0x0002
  339 #define OMAP_COPY       0x0020
  340 #define OMAP_SHARED     0x0010
  341 #define OMAP_FIXED      0x0100
  342 #define OMAP_INHERIT    0x0800
  343 
  344         nargs.addr = uap->addr;
  345         nargs.len = uap->len;
  346         nargs.prot = cvtbsdprot[uap->prot & 0x7];
  347         nargs.flags = 0;
  348         if (uap->flags & OMAP_ANON)
  349                 nargs.flags |= MAP_ANON;
  350         if (uap->flags & OMAP_COPY)
  351                 nargs.flags |= MAP_COPY;
  352         if (uap->flags & OMAP_SHARED)
  353                 nargs.flags |= MAP_SHARED;
  354         else
  355                 nargs.flags |= MAP_PRIVATE;
  356         if (uap->flags & OMAP_FIXED)
  357                 nargs.flags |= MAP_FIXED;
  358         if (uap->flags & OMAP_INHERIT)
  359                 nargs.flags |= MAP_INHERIT;
  360         nargs.fd = uap->fd;
  361         nargs.pos = uap->pos;
  362         return (mmap(p, &nargs, retval));
  363 }
  364 #endif                          /* COMPAT_43 */
  365 
  366 
  367 #ifndef _SYS_SYSPROTO_H_
  368 struct msync_args {
  369         caddr_t addr;
  370         int len;
  371         int flags;
  372 };
  373 #endif
  374 int
  375 msync(p, uap, retval)
  376         struct proc *p;
  377         struct msync_args *uap;
  378         int *retval;
  379 {
  380         vm_offset_t addr;
  381         vm_size_t size, pageoff;
  382         int flags;
  383         vm_map_t map;
  384         int rv;
  385 
  386         addr = (vm_offset_t) uap->addr;
  387         size = uap->len;
  388         flags = uap->flags;
  389 
  390         pageoff = (addr & PAGE_MASK);
  391         addr -= pageoff;
  392         size += pageoff;
  393         size = (vm_size_t) round_page(size);
  394         if (addr + size < addr)
  395                 return(EINVAL);
  396 
  397         if ((flags & (MS_ASYNC|MS_INVALIDATE)) == (MS_ASYNC|MS_INVALIDATE))
  398                 return (EINVAL);
  399 
  400         map = &p->p_vmspace->vm_map;
  401 
  402         /*
  403          * XXX Gak!  If size is zero we are supposed to sync "all modified
  404          * pages with the region containing addr".  Unfortunately, we don't
  405          * really keep track of individual mmaps so we approximate by flushing
  406          * the range of the map entry containing addr. This can be incorrect
  407          * if the region splits or is coalesced with a neighbor.
  408          */
  409         if (size == 0) {
  410                 vm_map_entry_t entry;
  411 
  412                 vm_map_lock_read(map);
  413                 rv = vm_map_lookup_entry(map, addr, &entry);
  414                 vm_map_unlock_read(map);
  415                 if (rv == FALSE)
  416                         return (EINVAL);
  417                 addr = entry->start;
  418                 size = entry->end - entry->start;
  419         }
  420 
  421         /*
  422          * Clean the pages and interpret the return value.
  423          */
  424         rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0,
  425             (flags & MS_INVALIDATE) != 0);
  426 
  427         switch (rv) {
  428         case KERN_SUCCESS:
  429                 break;
  430         case KERN_INVALID_ADDRESS:
  431                 return (EINVAL);        /* Sun returns ENOMEM? */
  432         case KERN_FAILURE:
  433                 return (EIO);
  434         default:
  435                 return (EINVAL);
  436         }
  437 
  438         return (0);
  439 }
  440 
  441 #ifndef _SYS_SYSPROTO_H_
  442 struct munmap_args {
  443         caddr_t addr;
  444         size_t len;
  445 };
  446 #endif
  447 int
  448 munmap(p, uap, retval)
  449         register struct proc *p;
  450         register struct munmap_args *uap;
  451         int *retval;
  452 {
  453         vm_offset_t addr;
  454         vm_size_t size, pageoff;
  455         vm_map_t map;
  456 
  457         addr = (vm_offset_t) uap->addr;
  458         size = uap->len;
  459 
  460         pageoff = (addr & PAGE_MASK);
  461         addr -= pageoff;
  462         size += pageoff;
  463         size = (vm_size_t) round_page(size);
  464         if (addr + size < addr)
  465                 return(EINVAL);
  466 
  467         if (size == 0)
  468                 return (0);
  469 
  470         /*
  471          * Check for illegal addresses.  Watch out for address wrap... Note
  472          * that VM_*_ADDRESS are not constants due to casts (argh).
  473          */
  474         if (VM_MAXUSER_ADDRESS > 0 && addr + size > VM_MAXUSER_ADDRESS)
  475                 return (EINVAL);
  476 #ifndef i386
  477         if (VM_MIN_ADDRESS > 0 && addr < VM_MIN_ADDRESS)
  478                 return (EINVAL);
  479 #endif
  480         if (addr + size < addr)
  481                 return (EINVAL);
  482         map = &p->p_vmspace->vm_map;
  483         /*
  484          * Make sure entire range is allocated.
  485          */
  486         if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
  487                 return (EINVAL);
  488         /* returns nothing but KERN_SUCCESS anyway */
  489         (void) vm_map_remove(map, addr, addr + size);
  490         return (0);
  491 }
  492 
  493 void
  494 munmapfd(p, fd)
  495         struct proc *p;
  496         int fd;
  497 {
  498         /*
  499          * XXX should unmap any regions mapped to this file
  500          */
  501         p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED;
  502 }
  503 
  504 #ifndef _SYS_SYSPROTO_H_
  505 struct mprotect_args {
  506         caddr_t addr;
  507         size_t len;
  508         int prot;
  509 };
  510 #endif
  511 int
  512 mprotect(p, uap, retval)
  513         struct proc *p;
  514         struct mprotect_args *uap;
  515         int *retval;
  516 {
  517         vm_offset_t addr;
  518         vm_size_t size, pageoff;
  519         register vm_prot_t prot;
  520 
  521         addr = (vm_offset_t) uap->addr;
  522         size = uap->len;
  523         prot = uap->prot & VM_PROT_ALL;
  524 #if defined(VM_PROT_READ_IS_EXEC)
  525         if (prot & VM_PROT_READ)
  526                 prot |= VM_PROT_EXECUTE;
  527 #endif
  528 
  529         pageoff = (addr & PAGE_MASK);
  530         addr -= pageoff;
  531         size += pageoff;
  532         size = (vm_size_t) round_page(size);
  533         if (addr + size < addr)
  534                 return(EINVAL);
  535 
  536         switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
  537                 FALSE)) {
  538         case KERN_SUCCESS:
  539                 return (0);
  540         case KERN_PROTECTION_FAILURE:
  541                 return (EACCES);
  542         }
  543         return (EINVAL);
  544 }
  545 
  546 #ifndef _SYS_SYSPROTO_H_
  547 struct minherit_args {
  548         caddr_t addr;
  549         size_t len;
  550         int inherit;
  551 };
  552 #endif
  553 int
  554 minherit(p, uap, retval)
  555         struct proc *p;
  556         struct minherit_args *uap;
  557         int *retval;
  558 {
  559         vm_offset_t addr;
  560         vm_size_t size, pageoff;
  561         register vm_inherit_t inherit;
  562 
  563         addr = (vm_offset_t)uap->addr;
  564         size = uap->len;
  565         inherit = uap->inherit;
  566 
  567         pageoff = (addr & PAGE_MASK);
  568         addr -= pageoff;
  569         size += pageoff;
  570         size = (vm_size_t) round_page(size);
  571         if (addr + size < addr)
  572                 return(EINVAL);
  573 
  574         switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
  575             inherit)) {
  576         case KERN_SUCCESS:
  577                 return (0);
  578         case KERN_PROTECTION_FAILURE:
  579                 return (EACCES);
  580         }
  581         return (EINVAL);
  582 }
  583 
  584 #ifndef _SYS_SYSPROTO_H_
  585 struct madvise_args {
  586         caddr_t addr;
  587         size_t len;
  588         int behav;
  589 };
  590 #endif
  591 
  592 /* ARGSUSED */
  593 int
  594 madvise(p, uap, retval)
  595         struct proc *p;
  596         struct madvise_args *uap;
  597         int *retval;
  598 {
  599         vm_map_t map;
  600         pmap_t pmap;
  601         vm_offset_t start, end;
  602         /*
  603          * Check for illegal addresses.  Watch out for address wrap... Note
  604          * that VM_*_ADDRESS are not constants due to casts (argh).
  605          */
  606         if (VM_MAXUSER_ADDRESS > 0 &&
  607                 ((vm_offset_t) uap->addr + uap->len) > VM_MAXUSER_ADDRESS)
  608                 return (EINVAL);
  609 #ifndef i386
  610         if (VM_MIN_ADDRESS > 0 && uap->addr < VM_MIN_ADDRESS)
  611                 return (EINVAL);
  612 #endif
  613         if (((vm_offset_t) uap->addr + uap->len) < (vm_offset_t) uap->addr)
  614                 return (EINVAL);
  615 
  616         /*
  617          * Since this routine is only advisory, we default to conservative
  618          * behavior.
  619          */
  620         start = trunc_page((vm_offset_t) uap->addr);
  621         end = round_page((vm_offset_t) uap->addr + uap->len);
  622         
  623         map = &p->p_vmspace->vm_map;
  624         pmap = &p->p_vmspace->vm_pmap;
  625 
  626         vm_map_madvise(map, pmap, start, end, uap->behav);
  627 
  628         return (0);
  629 }
  630 
  631 #ifndef _SYS_SYSPROTO_H_
  632 struct mincore_args {
  633         caddr_t addr;
  634         size_t len;
  635         char *vec;
  636 };
  637 #endif
  638 
  639 /* ARGSUSED */
  640 int
  641 mincore(p, uap, retval)
  642         struct proc *p;
  643         struct mincore_args *uap;
  644         int *retval;
  645 {
  646         vm_offset_t addr, first_addr;
  647         vm_offset_t end, cend;
  648         pmap_t pmap;
  649         vm_map_t map;
  650         char *vec;
  651         int error;
  652         int vecindex, lastvecindex;
  653         register vm_map_entry_t current;
  654         vm_map_entry_t entry;
  655         int mincoreinfo;
  656 
  657         /*
  658          * Make sure that the addresses presented are valid for user
  659          * mode.
  660          */
  661         first_addr = addr = trunc_page((vm_offset_t) uap->addr);
  662         end = addr + (vm_size_t)round_page(uap->len);
  663         if (VM_MAXUSER_ADDRESS > 0 && end > VM_MAXUSER_ADDRESS)
  664                 return (EINVAL);
  665         if (end < addr)
  666                 return (EINVAL);
  667 
  668         /*
  669          * Address of byte vector
  670          */
  671         vec = uap->vec;
  672 
  673         map = &p->p_vmspace->vm_map;
  674         pmap = &p->p_vmspace->vm_pmap;
  675 
  676         vm_map_lock(map);
  677 
  678         /*
  679          * Not needed here
  680          */
  681 #if 0
  682         VM_MAP_RANGE_CHECK(map, addr, end);
  683 #endif
  684 
  685         if (!vm_map_lookup_entry(map, addr, &entry))
  686                 entry = entry->next;
  687 
  688         /*
  689          * Do this on a map entry basis so that if the pages are not
  690          * in the current processes address space, we can easily look
  691          * up the pages elsewhere.
  692          */
  693         lastvecindex = -1;
  694         for(current = entry;
  695                 (current != &map->header) && (current->start < end);
  696                 current = current->next) {
  697 
  698                 /*
  699                  * ignore submaps (for now) or null objects
  700                  */
  701                 if ((current->eflags & (MAP_ENTRY_IS_A_MAP|MAP_ENTRY_IS_SUB_MAP)) ||
  702                         current->object.vm_object == NULL)
  703                         continue;
  704                 
  705                 /*
  706                  * limit this scan to the current map entry and the
  707                  * limits for the mincore call
  708                  */
  709                 if (addr < current->start)
  710                         addr = current->start;
  711                 cend = current->end;
  712                 if (cend > end)
  713                         cend = end;
  714 
  715                 /*
  716                  * scan this entry one page at a time
  717                  */
  718                 while(addr < cend) {
  719                         /*
  720                          * Check pmap first, it is likely faster, also
  721                          * it can provide info as to whether we are the
  722                          * one referencing or modifying the page.
  723                          */
  724                         mincoreinfo = pmap_mincore(pmap, addr);
  725                         if (!mincoreinfo) {
  726                                 vm_pindex_t pindex;
  727                                 vm_ooffset_t offset;
  728                                 vm_page_t m;
  729                                 /*
  730                                  * calculate the page index into the object
  731                                  */
  732                                 offset = current->offset + (addr - current->start);
  733                                 pindex = OFF_TO_IDX(offset);
  734                                 m = vm_page_lookup(current->object.vm_object,
  735                                         pindex);
  736                                 /*
  737                                  * if the page is resident, then gather information about
  738                                  * it.
  739                                  */
  740                                 if (m) {
  741                                         mincoreinfo = MINCORE_INCORE;
  742                                         if (m->dirty ||
  743                                                 pmap_is_modified(VM_PAGE_TO_PHYS(m)))
  744                                                 mincoreinfo |= MINCORE_MODIFIED_OTHER;
  745                                         if ((m->flags & PG_REFERENCED) ||
  746                                                 pmap_is_referenced(VM_PAGE_TO_PHYS(m)))
  747                                                 mincoreinfo |= MINCORE_REFERENCED_OTHER;
  748                                 }
  749                         }
  750 
  751                         /*
  752                          * calculate index into user supplied byte vector
  753                          */
  754                         vecindex = OFF_TO_IDX(addr - first_addr);
  755 
  756                         /*
  757                          * If we have skipped map entries, we need to make sure that
  758                          * the byte vector is zeroed for those skipped entries.
  759                          */
  760                         while((lastvecindex + 1) < vecindex) {
  761                                 error = subyte( vec + lastvecindex, 0);
  762                                 if (error) {
  763                                         vm_map_unlock(map);
  764                                         return (EFAULT);
  765                                 }
  766                                 ++lastvecindex;
  767                         }
  768 
  769                         /*
  770                          * Pass the page information to the user
  771                          */
  772                         error = subyte( vec + vecindex, mincoreinfo);
  773                         if (error) {
  774                                 vm_map_unlock(map);
  775                                 return (EFAULT);
  776                         }
  777                         lastvecindex = vecindex;
  778                         addr += PAGE_SIZE;
  779                 }
  780         }
  781 
  782         /*
  783          * Zero the last entries in the byte vector.
  784          */
  785         vecindex = OFF_TO_IDX(end - first_addr);
  786         while((lastvecindex + 1) < vecindex) {
  787                 error = subyte( vec + lastvecindex, 0);
  788                 if (error) {
  789                         vm_map_unlock(map);
  790                         return (EFAULT);
  791                 }
  792                 ++lastvecindex;
  793         }
  794         
  795         vm_map_unlock(map);
  796         return (0);
  797 }
  798 
  799 #ifndef _SYS_SYSPROTO_H_
  800 struct mlock_args {
  801         caddr_t addr;
  802         size_t len;
  803 };
  804 #endif
  805 int
  806 mlock(p, uap, retval)
  807         struct proc *p;
  808         struct mlock_args *uap;
  809         int *retval;
  810 {
  811         vm_offset_t addr;
  812         vm_size_t size, pageoff;
  813         int error;
  814 
  815         addr = (vm_offset_t) uap->addr;
  816         size = uap->len;
  817 
  818         pageoff = (addr & PAGE_MASK);
  819         addr -= pageoff;
  820         size += pageoff;
  821         size = (vm_size_t) round_page(size);
  822 
  823         /* disable wrap around */
  824         if (addr + size < addr)
  825                 return (EINVAL);
  826 
  827         if (atop(size) + cnt.v_wire_count > vm_page_max_wired)
  828                 return (EAGAIN);
  829 
  830 #ifdef pmap_wired_count
  831         if (size + ptoa(pmap_wired_count(vm_map_pmap(&p->p_vmspace->vm_map))) >
  832             p->p_rlimit[RLIMIT_MEMLOCK].rlim_cur)
  833                 return (EAGAIN);
  834 #else
  835         error = suser(p->p_ucred, &p->p_acflag);
  836         if (error)
  837                 return (error);
  838 #endif
  839 
  840         error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE);
  841         return (error == KERN_SUCCESS ? 0 : ENOMEM);
  842 }
  843 
  844 #ifndef _SYS_SYSPROTO_H_
  845 struct munlock_args {
  846         caddr_t addr;
  847         size_t len;
  848 };
  849 #endif
  850 int
  851 munlock(p, uap, retval)
  852         struct proc *p;
  853         struct munlock_args *uap;
  854         int *retval;
  855 {
  856         vm_offset_t addr;
  857         vm_size_t size, pageoff;
  858         int error;
  859 
  860         addr = (vm_offset_t) uap->addr;
  861         size = uap->len;
  862 
  863         pageoff = (addr & PAGE_MASK);
  864         addr -= pageoff;
  865         size += pageoff;
  866         size = (vm_size_t) round_page(size);
  867 
  868         /* disable wrap around */
  869         if (addr + size < addr)
  870                 return (EINVAL);
  871 
  872 #ifndef pmap_wired_count
  873         error = suser(p->p_ucred, &p->p_acflag);
  874         if (error)
  875                 return (error);
  876 #endif
  877 
  878         error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE);
  879         return (error == KERN_SUCCESS ? 0 : ENOMEM);
  880 }
  881 
  882 /*
  883  * Internal version of mmap.
  884  * Currently used by mmap, exec, and sys5 shared memory.
  885  * Handle is either a vnode pointer or NULL for MAP_ANON.
  886  */
  887 int
  888 vm_mmap(map, addr, size, prot, maxprot, flags, handle, foff)
  889         register vm_map_t map;
  890         register vm_offset_t *addr;
  891         register vm_size_t size;
  892         vm_prot_t prot, maxprot;
  893         register int flags;
  894         caddr_t handle;         /* XXX should be vp */
  895         vm_ooffset_t foff;
  896 {
  897         boolean_t fitit;
  898         vm_object_t object;
  899         struct vnode *vp = NULL;
  900         objtype_t type;
  901         int rv = KERN_SUCCESS;
  902         vm_ooffset_t objsize;
  903         int docow;
  904         struct proc *p = curproc;
  905 
  906         if (size == 0)
  907                 return (0);
  908 
  909         objsize = size = round_page(size);
  910 
  911         /*
  912          * We currently can only deal with page aligned file offsets.
  913          * The check is here rather than in the syscall because the
  914          * kernel calls this function internally for other mmaping
  915          * operations (such as in exec) and non-aligned offsets will
  916          * cause pmap inconsistencies...so we want to be sure to
  917          * disallow this in all cases.
  918          */
  919         if (foff & PAGE_MASK)
  920                 return (EINVAL);
  921 
  922         if ((flags & MAP_FIXED) == 0) {
  923                 fitit = TRUE;
  924                 *addr = round_page(*addr);
  925         } else {
  926                 if (*addr != trunc_page(*addr))
  927                         return (EINVAL);
  928                 fitit = FALSE;
  929                 (void) vm_map_remove(map, *addr, *addr + size);
  930         }
  931 
  932         /*
  933          * Lookup/allocate object.
  934          */
  935         if (flags & MAP_ANON) {
  936                 type = OBJT_DEFAULT;
  937                 /*
  938                  * Unnamed anonymous regions always start at 0.
  939                  */
  940                 if (handle == 0)
  941                         foff = 0;
  942         } else {
  943                 vp = (struct vnode *) handle;
  944                 if (vp->v_type == VCHR) {
  945                         type = OBJT_DEVICE;
  946                         handle = (caddr_t) vp->v_rdev;
  947                 } else {
  948                         struct vattr vat;
  949                         int error;
  950 
  951                         error = VOP_GETATTR(vp, &vat, p->p_ucred, p);
  952                         if (error)
  953                                 return (error);
  954                         objsize = round_page(vat.va_size);
  955                         type = OBJT_VNODE;
  956                 }
  957         }
  958 
  959         if (handle == NULL) {
  960                 object = NULL;
  961         } else {
  962                 object = vm_pager_allocate(type, handle, OFF_TO_IDX(objsize), prot, foff);
  963                 if (object == NULL)
  964                         return (type == OBJT_DEVICE ? EINVAL : ENOMEM);
  965         }
  966 
  967         /*
  968          * Force device mappings to be shared.
  969          */
  970         if (type == OBJT_DEVICE) {
  971                 flags &= ~(MAP_PRIVATE|MAP_COPY);
  972                 flags |= MAP_SHARED;
  973         }
  974 
  975         docow = 0;
  976         if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
  977                 docow = MAP_COPY_ON_WRITE | MAP_COPY_NEEDED;
  978         }
  979 
  980 #if defined(VM_PROT_READ_IS_EXEC)
  981         if (prot & VM_PROT_READ)
  982                 prot |= VM_PROT_EXECUTE;
  983 
  984         if (maxprot & VM_PROT_READ)
  985                 maxprot |= VM_PROT_EXECUTE;
  986 #endif
  987 
  988         rv = vm_map_find(map, object, foff, addr, size, fitit,
  989                         prot, maxprot, docow);
  990 
  991 
  992         if (rv != KERN_SUCCESS) {
  993                 /*
  994                  * Lose the object reference. Will destroy the
  995                  * object if it's an unnamed anonymous mapping
  996                  * or named anonymous without other references.
  997                  */
  998                 vm_object_deallocate(object);
  999                 goto out;
 1000         }
 1001 
 1002         /*
 1003          * "Pre-fault" resident pages.
 1004          */
 1005         if ((type == OBJT_VNODE) && (map->pmap != NULL) && (object != NULL)) {
 1006                 pmap_object_init_pt(map->pmap, *addr,
 1007                         object, (vm_pindex_t) OFF_TO_IDX(foff), size, 1);
 1008         }
 1009 
 1010         /*
 1011          * Shared memory is also shared with children.
 1012          */
 1013         if (flags & (MAP_SHARED|MAP_INHERIT)) {
 1014                 rv = vm_map_inherit(map, *addr, *addr + size, VM_INHERIT_SHARE);
 1015                 if (rv != KERN_SUCCESS) {
 1016                         (void) vm_map_remove(map, *addr, *addr + size);
 1017                         goto out;
 1018                 }
 1019         }
 1020 out:
 1021         switch (rv) {
 1022         case KERN_SUCCESS:
 1023                 return (0);
 1024         case KERN_INVALID_ADDRESS:
 1025         case KERN_NO_SPACE:
 1026                 return (ENOMEM);
 1027         case KERN_PROTECTION_FAILURE:
 1028                 return (EACCES);
 1029         default:
 1030                 return (EINVAL);
 1031         }
 1032 }

Cache object: 71c4ceda37081157f5a43b6e9791d508


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.