[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/bsd/kern/kern_mman.c

Version: -  FREEBSD  -  FREEBSD8  -  FREEBSD7  -  FREEBSD72  -  FREEBSD71  -  FREEBSD70  -  FREEBSD6  -  FREEBSD64  -  FREEBSD63  -  FREEBSD62  -  FREEBSD61  -  FREEBSD60  -  FREEBSD5  -  FREEBSD55  -  FREEBSD54  -  FREEBSD53  -  FREEBSD52  -  FREEBSD51  -  FREEBSD50  -  FREEBSD4  -  FREEBSD3  -  FREEBSD22  -  linux-2.6  -  linux-2.4.22  -  MK83  -  MK84  -  PLAN9  -  DFBSD  -  NETBSD  -  NETBSD5  -  NETBSD4  -  NETBSD3  -  NETBSD20  -  OPENBSD  -  xnu-517  -  xnu-792  -  xnu-792.6.70  -  xnu-1228  -  xnu-1456.1.26  -  OPENSOLARIS  -  minix-3-1-1  -  FREEBSD-LIBC  -  FREEBSD7-LIBC  -  FREEBSD6-LIBC  -  GLIBC27 
SearchContext: -  none  -  excerpts  -  bigexcerpts 

    1 /*
    2  * Copyright (c) 2007 Apple Inc. All Rights Reserved.
    3  * 
    4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
    5  * 
    6  * This file contains Original Code and/or Modifications of Original Code
    7  * as defined in and that are subject to the Apple Public Source License
    8  * Version 2.0 (the 'License'). You may not use this file except in
    9  * compliance with the License. The rights granted to you under the License
   10  * may not be used to create, or enable the creation or redistribution of,
   11  * unlawful or unlicensed copies of an Apple operating system, or to
   12  * circumvent, violate, or enable the circumvention or violation of, any
   13  * terms of an Apple operating system software license agreement.
   14  * 
   15  * Please obtain a copy of the License at
   16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
   17  * 
   18  * The Original Code and all software distributed under the License are
   19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   23  * Please see the License for the specific language governing rights and
   24  * limitations under the License.
   25  * 
   26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   27  */
   28 /*
   29  * Copyright (c) 1988 University of Utah.
   30  * Copyright (c) 1991, 1993
   31  *      The Regents of the University of California.  All rights reserved.
   32  *
   33  * This code is derived from software contributed to Berkeley by
   34  * the Systems Programming Group of the University of Utah Computer
   35  * Science Department.
   36  *
   37  * Redistribution and use in source and binary forms, with or without
   38  * modification, are permitted provided that the following conditions
   39  * are met:
   40  * 1. Redistributions of source code must retain the above copyright
   41  *    notice, this list of conditions and the following disclaimer.
   42  * 2. Redistributions in binary form must reproduce the above copyright
   43  *    notice, this list of conditions and the following disclaimer in the
   44  *    documentation and/or other materials provided with the distribution.
   45  * 3. All advertising materials mentioning features or use of this software
   46  *    must display the following acknowledgement:
   47  *      This product includes software developed by the University of
   48  *      California, Berkeley and its contributors.
   49  * 4. Neither the name of the University nor the names of its contributors
   50  *    may be used to endorse or promote products derived from this software
   51  *    without specific prior written permission.
   52  *
   53  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   54  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   55  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   56  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   57  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   58  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   59  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   60  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   61  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   62  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   63  * SUCH DAMAGE.
   64  *
   65  * from: Utah $Hdr: vm_mmap.c 1.6 91/10/21$
   66  *
   67  *      @(#)vm_mmap.c   8.10 (Berkeley) 2/19/95
   68  */
   69 /*
   70  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
   71  * support for mandatory and extensible security protections.  This notice
   72  * is included in support of clause 2.2 (b) of the Apple Public License,
   73  * Version 2.0.
   74  */
   75 
   76 /*
   77  * Mapped file (mmap) interface to VM
   78  */
   79 
   80 #include <sys/param.h>
   81 #include <sys/systm.h>
   82 #include <sys/filedesc.h>
   83 #include <sys/proc_internal.h>
   84 #include <sys/kauth.h>
   85 #include <sys/resourcevar.h>
   86 #include <sys/vnode_internal.h>
   87 #include <sys/acct.h>
   88 #include <sys/wait.h>
   89 #include <sys/file_internal.h>
   90 #include <sys/vadvise.h>
   91 #include <sys/trace.h>
   92 #include <sys/mman.h>
   93 #include <sys/conf.h>
   94 #include <sys/stat.h>
   95 #include <sys/ubc.h>
   96 #include <sys/ubc_internal.h>
   97 #include <sys/sysproto.h>
   98 
   99 #include <sys/syscall.h>
  100 #include <sys/kdebug.h>
  101 
  102 #include <bsm/audit_kernel.h>
  103 #include <bsm/audit_kevents.h>
  104 
  105 #include <mach/mach_types.h>
  106 #include <mach/mach_traps.h>
  107 #include <mach/vm_sync.h>
  108 #include <mach/vm_behavior.h>
  109 #include <mach/vm_inherit.h>
  110 #include <mach/vm_statistics.h>
  111 #include <mach/mach_vm.h>
  112 #include <mach/vm_map.h>
  113 #include <mach/host_priv.h>
  114 
  115 #include <kern/cpu_number.h>
  116 #include <kern/host.h>
  117 
  118 #include <vm/vm_map.h>
  119 #include <vm/vm_kern.h>
  120 #include <vm/vm_pager.h>
  121 
  122 struct osmmap_args {
  123                 caddr_t addr;
  124                 int     len;
  125                 int     prot;
  126                 int     share;
  127                 int     fd;
  128                 long    pos;
  129 };
  130 
  131 /* XXX the following function should probably be static */
  132 kern_return_t map_fd_funneled(int, vm_object_offset_t, vm_offset_t *,
  133                                 boolean_t, vm_size_t);
  134 
  135 /* XXX the following two functions aren't used anywhere */
  136 int osmmap(proc_t , struct osmmap_args *, register_t *);
  137 int mremap(void);
  138 
  139 int
  140 sbrk(__unused proc_t p, __unused struct sbrk_args *uap, __unused register_t *retval)
  141 {
  142         /* Not yet implemented */
  143         return (ENOTSUP);
  144 }
  145 
  146 int
  147 sstk(__unused proc_t p, __unused struct sstk_args *uap, __unused register_t *retval)
  148 {
  149         /* Not yet implemented */
  150         return (ENOTSUP);
  151 }
  152 
  153 
  154 int
  155 osmmap(
  156         proc_t curp,
  157         struct osmmap_args *uap,
  158         register_t *retval)
  159 {
  160         struct mmap_args newargs;
  161         user_addr_t addr;
  162         int ret;
  163 
  164         if ((uap->share ==  MAP_SHARED )|| (uap->share ==  MAP_PRIVATE )) {
  165                 newargs.addr = CAST_USER_ADDR_T(uap->addr);
  166                 newargs.len = CAST_USER_ADDR_T(uap->len);
  167                 newargs.prot = uap->prot;
  168                 newargs.flags = uap->share;
  169                 newargs.fd = uap->fd;
  170                 newargs.pos = (off_t)uap->pos;
  171                 ret = mmap(curp, &newargs, &addr);
  172                 if (ret == 0)
  173                         *retval = CAST_DOWN(register_t, addr);
  174         } else
  175                 ret = EINVAL;
  176         return ret;
  177 }
  178 
  179 
  180 /*
  181  * XXX Internally, we use VM_PROT_* somewhat interchangeably, but the correct
  182  * XXX usage is PROT_* from an interface perspective.  Thus the values of
  183  * XXX VM_PROT_* and PROT_* need to correspond.
  184  */
  185 int
  186 mmap(proc_t p, struct mmap_args *uap, user_addr_t *retval)
  187 {
  188         /*
  189          *      Map in special device (must be SHARED) or file
  190          */
  191         struct fileproc *fp;
  192         register struct         vnode *vp;
  193         int                     flags;
  194         int                     prot, file_prot;
  195         int                     err=0;
  196         vm_map_t                user_map;
  197         kern_return_t           result;
  198         mach_vm_offset_t        user_addr;
  199         mach_vm_size_t          user_size;
  200         vm_object_offset_t      pageoff;
  201         vm_object_offset_t      file_pos;
  202         int                     alloc_flags=0;
  203         boolean_t               docow;
  204         vm_prot_t               maxprot;
  205         void                    *handle;
  206         vm_pager_t              pager;
  207         int                     mapanon=0;
  208         int                     fpref=0;
  209         int error =0;
  210         int fd = uap->fd;
  211 
  212         user_addr = (mach_vm_offset_t)uap->addr;
  213         user_size = (mach_vm_size_t) uap->len;
  214 
  215         AUDIT_ARG(addr, user_addr);
  216         AUDIT_ARG(len, user_size);
  217         AUDIT_ARG(fd, uap->fd);
  218 
  219         prot = (uap->prot & VM_PROT_ALL);
  220 #if 3777787
  221         /*
  222          * Since the hardware currently does not support writing without
  223          * read-before-write, or execution-without-read, if the request is
  224          * for write or execute access, we must imply read access as well;
  225          * otherwise programs expecting this to work will fail to operate.
  226          */
  227         if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
  228                 prot |= VM_PROT_READ;
  229 #endif  /* radar 3777787 */
  230 
  231         flags = uap->flags;
  232         vp = NULLVP;
  233 
  234         /*
  235          * The vm code does not have prototypes & compiler doesn't do the'
  236          * the right thing when you cast 64bit value and pass it in function 
  237          * call. So here it is.
  238          */
  239         file_pos = (vm_object_offset_t)uap->pos;
  240 
  241 
  242         /* make sure mapping fits into numeric range etc */
  243         if (file_pos + user_size > (vm_object_offset_t)-PAGE_SIZE_64)
  244                 return (EINVAL);
  245 
  246         /*
  247          * Align the file position to a page boundary,
  248          * and save its page offset component.
  249          */
  250         pageoff = (file_pos & PAGE_MASK);
  251         file_pos -= (vm_object_offset_t)pageoff;
  252 
  253 
  254         /* Adjust size for rounding (on both ends). */
  255         user_size += pageoff;                   /* low end... */
  256         user_size = mach_vm_round_page(user_size);      /* hi end */
  257 
  258 
  259         /*
  260          * Check for illegal addresses.  Watch out for address wrap... Note
  261          * that VM_*_ADDRESS are not constants due to casts (argh).
  262          */
  263         if (flags & MAP_FIXED) {
  264                 /*
  265                  * The specified address must have the same remainder
  266                  * as the file offset taken modulo PAGE_SIZE, so it
  267                  * should be aligned after adjustment by pageoff.
  268                  */
  269                 user_addr -= pageoff;
  270                 if (user_addr & PAGE_MASK)
  271                 return (EINVAL);
  272         }
  273 #ifdef notyet
  274         /* DO not have apis to get this info, need to wait till then*/
  275         /*
  276          * XXX for non-fixed mappings where no hint is provided or
  277          * the hint would fall in the potential heap space,
  278          * place it after the end of the largest possible heap.
  279          *
  280          * There should really be a pmap call to determine a reasonable
  281          * location.
  282          */
  283         else if (addr < mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ))
  284                 addr = mach_vm_round_page(p->p_vmspace->vm_daddr + MAXDSIZ);
  285 
  286 #endif
  287 
  288         alloc_flags = 0;
  289 
  290         if (flags & MAP_ANON) {
  291                 /*
  292                  * Mapping blank space is trivial.  Use positive fds as the alias
  293                  * value for memory tracking. 
  294                  */
  295                 if (fd != -1) {
  296                         /*
  297                          * Use "fd" to pass (some) Mach VM allocation flags,
  298                          * (see the VM_FLAGS_* definitions).
  299                          */
  300                         alloc_flags = fd & (VM_FLAGS_ALIAS_MASK |
  301                                             VM_FLAGS_PURGABLE);
  302                         if (alloc_flags != fd) {
  303                                 /* reject if there are any extra flags */
  304                                 return EINVAL;
  305                         }
  306                 }
  307                         
  308                 handle = NULL;
  309                 maxprot = VM_PROT_ALL;
  310                 file_pos = 0;
  311                 mapanon = 1;
  312         } else {
  313                 struct vnode_attr va;
  314                 vfs_context_t ctx = vfs_context_current();
  315 
  316                 /*
  317                  * Mapping file, get fp for validation. Obtain vnode and make
  318                  * sure it is of appropriate type.
  319                  */
  320                 err = fp_lookup(p, fd, &fp, 0);
  321                 if (err)
  322                         return(err);
  323                 fpref = 1;
  324                 if(fp->f_fglob->fg_type == DTYPE_PSXSHM) {
  325                         uap->addr = (user_addr_t)user_addr;
  326                         uap->len = (user_size_t)user_size;
  327                         uap->prot = prot;
  328                         uap->flags = flags;
  329                         uap->pos = file_pos;
  330                         error = pshm_mmap(p, uap, retval, fp, (off_t)pageoff);
  331                         goto bad;
  332                 }
  333 
  334                 if (fp->f_fglob->fg_type != DTYPE_VNODE) {
  335                         error = EINVAL;
  336                         goto bad;
  337                 }
  338                 vp = (struct vnode *)fp->f_fglob->fg_data;
  339                 error = vnode_getwithref(vp);
  340                 if(error != 0)
  341                         goto bad;
  342 
  343                 if (vp->v_type != VREG && vp->v_type != VCHR) {
  344                         (void)vnode_put(vp);
  345                         error = EINVAL;
  346                         goto bad;
  347                 }
  348 
  349                 AUDIT_ARG(vnpath, vp, ARG_VNODE1);
  350                 
  351                 /*
  352                  * POSIX: mmap needs to update access time for mapped files
  353                  */
  354                 if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
  355                         VATTR_INIT(&va);
  356                         nanotime(&va.va_access_time);
  357                         VATTR_SET_ACTIVE(&va, va_access_time);
  358                         vnode_setattr(vp, &va, ctx);
  359                 }
  360                 
  361                 /*
  362                  * XXX hack to handle use of /dev/zero to map anon memory (ala
  363                  * SunOS).
  364                  */
  365                 if (vp->v_type == VCHR || vp->v_type == VSTR) {
  366                         (void)vnode_put(vp);
  367                         error = ENODEV;
  368                         goto bad;
  369                 } else {
  370                         /*
  371                          * Ensure that file and memory protections are
  372                          * compatible.  Note that we only worry about
  373                          * writability if mapping is shared; in this case,
  374                          * current and max prot are dictated by the open file.
  375                          * XXX use the vnode instead?  Problem is: what
  376                          * credentials do we use for determination? What if
  377                          * proc does a setuid?
  378                          */
  379                         maxprot = VM_PROT_EXECUTE;      /* ??? */
  380                         if (fp->f_fglob->fg_flag & FREAD)
  381                                 maxprot |= VM_PROT_READ;
  382                         else if (prot & PROT_READ) {
  383                                 (void)vnode_put(vp);
  384                                 error = EACCES;
  385                                 goto bad;
  386                         }
  387                         /*
  388                          * If we are sharing potential changes (either via
  389                          * MAP_SHARED or via the implicit sharing of character
  390                          * device mappings), and we are trying to get write
  391                          * permission although we opened it without asking
  392                          * for it, bail out. 
  393                          */
  394 
  395                         if ((flags & MAP_SHARED) != 0) {
  396                                 if ((fp->f_fglob->fg_flag & FWRITE) != 0) {
  397                                         /*
  398                                          * check for write access
  399                                          *
  400                                          * Note that we already made this check when granting FWRITE
  401                                          * against the file, so it seems redundant here.
  402                                          */
  403                                         error = vnode_authorize(vp, NULL, KAUTH_VNODE_CHECKIMMUTABLE, ctx);
  404  
  405                                         /* if not granted for any reason, but we wanted it, bad */
  406                                         if ((prot & PROT_WRITE) && (error != 0)) {
  407                                                 vnode_put(vp);
  408                                                 goto bad;
  409                                         }
  410  
  411                                         /* if writable, remember */
  412                                         if (error == 0)
  413                                                 maxprot |= VM_PROT_WRITE;
  414 
  415                                 } else if ((prot & PROT_WRITE) != 0) {
  416                                         (void)vnode_put(vp);
  417                                         error = EACCES;
  418                                         goto bad;
  419                                 }
  420                         } else
  421                                 maxprot |= VM_PROT_WRITE;
  422 
  423                         handle = (void *)vp;
  424 #if CONFIG_MACF
  425                         error = mac_file_check_mmap(vfs_context_ucred(ctx),
  426                             fp->f_fglob, prot, flags, &maxprot);
  427                         if (error) {
  428                                 (void)vnode_put(vp);
  429                                 goto bad;
  430                         }
  431 #endif /* MAC */
  432                 }
  433         }
  434 
  435         if (user_size == 0)  {
  436                 if (!mapanon)
  437                         (void)vnode_put(vp);
  438                 error = 0;
  439                 goto bad;
  440         }
  441 
  442         /*
  443          *      We bend a little - round the start and end addresses
  444          *      to the nearest page boundary.
  445          */
  446         user_size = mach_vm_round_page(user_size);
  447 
  448         if (file_pos & PAGE_MASK_64) {
  449                 if (!mapanon)
  450                         (void)vnode_put(vp);
  451                 error = EINVAL;
  452                 goto bad;
  453         }
  454 
  455         user_map = current_map();
  456 
  457         if ((flags & MAP_FIXED) == 0) {
  458                 alloc_flags |= VM_FLAGS_ANYWHERE;
  459                 user_addr = mach_vm_round_page(user_addr);
  460         } else {
  461                 if (user_addr != mach_vm_trunc_page(user_addr)) {
  462                         if (!mapanon)
  463                                 (void)vnode_put(vp);
  464                         error = EINVAL;
  465                         goto bad;
  466                 }
  467                 /*
  468                  * mmap(MAP_FIXED) will replace any existing mappings in the
  469                  * specified range, if the new mapping is successful.
  470                  * If we just deallocate the specified address range here,
  471                  * another thread might jump in and allocate memory in that
  472                  * range before we get a chance to establish the new mapping,
  473                  * and we won't have a chance to restore the old mappings.
  474                  * So we use VM_FLAGS_OVERWRITE to let Mach VM know that it
  475                  * has to deallocate the existing mappings and establish the
  476                  * new ones atomically.
  477                  */
  478                 alloc_flags |= VM_FLAGS_FIXED | VM_FLAGS_OVERWRITE;
  479         }
  480 
  481         if (flags & MAP_NOCACHE)
  482                 alloc_flags |= VM_FLAGS_NO_CACHE;
  483 
  484         /*
  485          * Lookup/allocate object.
  486          */
  487         if (handle == NULL) {
  488                 pager = NULL;
  489 #ifdef notyet
  490 /* Hmm .. */
  491 #if defined(VM_PROT_READ_IS_EXEC)
  492                 if (prot & VM_PROT_READ)
  493                         prot |= VM_PROT_EXECUTE;
  494                 if (maxprot & VM_PROT_READ)
  495                         maxprot |= VM_PROT_EXECUTE;
  496 #endif
  497 #endif
  498 
  499 #if 3777787
  500                 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
  501                         prot |= VM_PROT_READ;
  502                 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
  503                         maxprot |= VM_PROT_READ;
  504 #endif  /* radar 3777787 */
  505 
  506                 result = vm_map_enter_mem_object(user_map,
  507                                                  &user_addr, user_size,
  508                                                  0, alloc_flags,
  509                                                  IPC_PORT_NULL, 0, FALSE,
  510                                                  prot, maxprot,
  511                                                  (flags & MAP_SHARED) ?
  512                                                  VM_INHERIT_SHARE : 
  513                                                  VM_INHERIT_DEFAULT);
  514                 if (result != KERN_SUCCESS) 
  515                                 goto out;
  516         } else {
  517                 pager = (vm_pager_t)ubc_getpager(vp);
  518                 
  519                 if (pager == NULL) {
  520                         (void)vnode_put(vp);
  521                         error = ENOMEM;
  522                         goto bad;
  523                 }
  524 
  525                 /*
  526                  *  Set credentials:
  527                  *      FIXME: if we're writing the file we need a way to
  528                  *      ensure that someone doesn't replace our R/W creds
  529                  *      with ones that only work for read.
  530                  */
  531 
  532                 ubc_setthreadcred(vp, p, current_thread());
  533                 docow = FALSE;
  534                 if ((flags & (MAP_ANON|MAP_SHARED)) == 0) {
  535                         docow = TRUE;
  536                 }
  537 
  538 #ifdef notyet
  539 /* Hmm .. */
  540 #if defined(VM_PROT_READ_IS_EXEC)
  541                 if (prot & VM_PROT_READ)
  542                         prot |= VM_PROT_EXECUTE;
  543                 if (maxprot & VM_PROT_READ)
  544                         maxprot |= VM_PROT_EXECUTE;
  545 #endif
  546 #endif /* notyet */
  547 
  548 #if 3777787
  549                 if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
  550                         prot |= VM_PROT_READ;
  551                 if (maxprot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
  552                         maxprot |= VM_PROT_READ;
  553 #endif  /* radar 3777787 */
  554 
  555                 result = vm_map_enter_mem_object(user_map,
  556                                                  &user_addr, user_size,
  557                                                  0, alloc_flags,
  558                                                  (ipc_port_t)pager, file_pos,
  559                                                  docow, prot, maxprot, 
  560                                                  (flags & MAP_SHARED) ?
  561                                                  VM_INHERIT_SHARE : 
  562                                                  VM_INHERIT_DEFAULT);
  563 
  564                 if (result != KERN_SUCCESS)  {
  565                                 (void)vnode_put(vp);
  566                                 goto out;
  567                 }
  568 
  569                 file_prot = prot & (PROT_READ | PROT_WRITE | PROT_EXEC);
  570                 if (docow) {
  571                         /* private mapping: won't write to the file */
  572                         file_prot &= ~PROT_WRITE;
  573                 }
  574                 (void) ubc_map(vp, file_prot);
  575         }
  576 
  577         if (!mapanon)
  578                 (void)vnode_put(vp);
  579 
  580 out:
  581         switch (result) {
  582         case KERN_SUCCESS:
  583                 *retval = user_addr + pageoff;
  584                 error = 0;
  585                 break;
  586         case KERN_INVALID_ADDRESS:
  587         case KERN_NO_SPACE:
  588                 error =  ENOMEM;
  589                 break;
  590         case KERN_PROTECTION_FAILURE:
  591                 error =  EACCES;
  592                 break;
  593         default:
  594                 error =  EINVAL;
  595                 break;
  596         }
  597 bad:
  598         if (fpref)
  599                 fp_drop(p, fd, fp, 0);
  600 
  601         KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_mmap) | DBG_FUNC_NONE), fd, (uint32_t)(*retval), (uint32_t)user_size, error, 0);
  602         KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO2, SYS_mmap) | DBG_FUNC_NONE), (uint32_t)(*retval >> 32), (uint32_t)(user_size >> 32),
  603                               (uint32_t)(file_pos >> 32), (uint32_t)file_pos, 0);
  604 
  605         return(error);
  606 }
  607 
  608 int
  609 msync(__unused proc_t p, struct msync_args *uap, register_t *retval)
  610 {
  611         __pthread_testcancel(1);
  612         return(msync_nocancel(p, (struct msync_nocancel_args *)uap, retval));
  613 }
  614 
  615 int
  616 msync_nocancel(__unused proc_t p, struct msync_nocancel_args *uap, __unused register_t *retval)
  617 {
  618         mach_vm_offset_t addr;
  619         mach_vm_size_t size;
  620         int flags;
  621         vm_map_t user_map;
  622         int rv;
  623         vm_sync_t sync_flags=0;
  624 
  625         addr = (mach_vm_offset_t) uap->addr;
  626         size = (mach_vm_size_t)uap->len;
  627 
  628         KERNEL_DEBUG_CONSTANT((BSDDBG_CODE(DBG_BSD_SC_EXTENDED_INFO, SYS_msync) | DBG_FUNC_NONE), (uint32_t)(addr >> 32), (uint32_t)(size >> 32), 0, 0, 0);
  629 
  630         if (addr & PAGE_MASK_64) {
  631                 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
  632                 return EINVAL;
  633         }
  634         if (size == 0) {
  635                 /*
  636                  * We cannot support this properly without maintaining
  637                  * list all mmaps done. Cannot use vm_map_entry as they could be
  638                  * split or coalesced by indepenedant actions. So instead of 
  639                  * inaccurate results, lets just return error as invalid size
  640                  * specified
  641                  */
  642                 return (EINVAL); /* XXX breaks posix apps */
  643         }
  644 
  645         flags = uap->flags;
  646         /* disallow contradictory flags */
  647         if ((flags & (MS_SYNC|MS_ASYNC)) == (MS_SYNC|MS_ASYNC))
  648                 return (EINVAL);
  649 
  650         if (flags & MS_KILLPAGES)
  651                 sync_flags |= VM_SYNC_KILLPAGES;
  652         if (flags & MS_DEACTIVATE)
  653                 sync_flags |= VM_SYNC_DEACTIVATE;
  654         if (flags & MS_INVALIDATE)
  655                 sync_flags |= VM_SYNC_INVALIDATE;
  656 
  657         if ( !(flags & (MS_KILLPAGES | MS_DEACTIVATE))) {
  658                 if (flags & MS_ASYNC) 
  659                         sync_flags |= VM_SYNC_ASYNCHRONOUS;
  660                 else 
  661                         sync_flags |= VM_SYNC_SYNCHRONOUS;
  662         }
  663 
  664         sync_flags |= VM_SYNC_CONTIGUOUS;       /* complain if holes */
  665 
  666         user_map = current_map();
  667         rv = mach_vm_msync(user_map, addr, size, sync_flags);
  668 
  669         switch (rv) {
  670         case KERN_SUCCESS:
  671                 break;
  672         case KERN_INVALID_ADDRESS:      /* hole in region being sync'ed */
  673                 return (ENOMEM);
  674         case KERN_FAILURE:
  675                 return (EIO);
  676         default:
  677                 return (EINVAL);
  678         }
  679         return (0);
  680 }
  681 
  682 
  683 int
  684 mremap(void)
  685 {
  686         /* Not yet implemented */
  687         return (ENOTSUP);
  688 }
  689 
  690 int
  691 munmap(__unused proc_t p, struct munmap_args *uap, __unused register_t *retval)
  692 {
  693         mach_vm_offset_t        user_addr;
  694         mach_vm_size_t  user_size;
  695         kern_return_t   result;
  696 
  697         user_addr = (mach_vm_offset_t) uap->addr;
  698         user_size = (mach_vm_size_t) uap->len;
  699 
  700         AUDIT_ARG(addr, user_addr);
  701         AUDIT_ARG(len, user_size);
  702 
  703         if (user_addr & PAGE_MASK_64) {
  704                 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
  705                 return EINVAL;
  706         }
  707 
  708         if (user_addr + user_size < user_addr)
  709                 return(EINVAL);
  710 
  711         if (user_size == 0) {
  712                 /* UNIX SPEC: size is 0, return EINVAL */
  713                 return EINVAL;
  714         }
  715 
  716         result = mach_vm_deallocate(current_map(), user_addr, user_size);
  717         if (result != KERN_SUCCESS) {
  718                 return(EINVAL);
  719         }
  720         return(0);
  721 }
  722 
  723 int
  724 mprotect(__unused proc_t p, struct mprotect_args *uap, __unused register_t *retval)
  725 {
  726         register vm_prot_t prot;
  727         mach_vm_offset_t        user_addr;
  728         mach_vm_size_t  user_size;
  729         kern_return_t   result;
  730         vm_map_t        user_map;
  731 #if CONFIG_MACF
  732         int error;
  733 #endif
  734 
  735         AUDIT_ARG(addr, uap->addr);
  736         AUDIT_ARG(len, uap->len);
  737         AUDIT_ARG(value, uap->prot);
  738 
  739         user_addr = (mach_vm_offset_t) uap->addr;
  740         user_size = (mach_vm_size_t) uap->len;
  741         prot = (vm_prot_t)(uap->prot & VM_PROT_ALL);
  742 
  743         if (user_addr & PAGE_MASK_64) {
  744                 /* UNIX SPEC: user address is not page-aligned, return EINVAL */
  745                 return EINVAL;
  746         }
  747                 
  748 #ifdef notyet
  749 /* Hmm .. */
  750 #if defined(VM_PROT_READ_IS_EXEC)
  751         if (prot & VM_PROT_READ)
  752                 prot |= VM_PROT_EXECUTE;
  753 #endif
  754 #endif /* notyet */
  755 
  756 #if 3936456
  757         if (prot & (VM_PROT_EXECUTE | VM_PROT_WRITE))
  758                 prot |= VM_PROT_READ;
  759 #endif  /* 3936456 */
  760 
  761         user_map = current_map();
  762 
  763 #if CONFIG_MACF
  764         /*
  765          * The MAC check for mprotect is of limited use for 2 reasons:
  766          * Without mmap revocation, the caller could have asked for the max
  767          * protections initially instead of a reduced set, so a mprotect
  768          * check would offer no new security.
  769          * It is not possible to extract the vnode from the pager object(s)
  770          * of the target memory range.
  771          * However, the MAC check may be used to prevent a process from,
  772          * e.g., making the stack executable.
  773          */
  774         error = mac_proc_check_mprotect(p, user_addr,
  775                         user_size, prot);
  776         if (error)
  777                 return (error);
  778 #endif
  779         result = mach_vm_protect(user_map, user_addr, user_size,
  780                                  FALSE, prot);
  781         switch (result) {
  782         case KERN_SUCCESS:
  783                 return (0);
  784         case KERN_PROTECTION_FAILURE:
  785                 return (EACCES);
  786         case KERN_INVALID_ADDRESS:
  787                 /* UNIX SPEC: for an invalid address range, return ENOMEM */
  788                 return ENOMEM;
  789         }
  790         return (EINVAL);
  791 }
  792 
  793 
  794 int
  795 minherit(__unused proc_t p, struct minherit_args *uap, __unused register_t *retval)
  796 {
  797         mach_vm_offset_t addr;
  798         mach_vm_size_t size;
  799         register vm_inherit_t inherit;
  800         vm_map_t        user_map;
  801         kern_return_t   result;
  802 
  803         AUDIT_ARG(addr, uap->addr);
  804         AUDIT_ARG(len, uap->len);
  805         AUDIT_ARG(value, uap->inherit);
  806 
  807         addr = (mach_vm_offset_t)uap->addr;
  808         size = (mach_vm_size_t)uap->len;
  809         inherit = uap->inherit;
  810 
  811         user_map = current_map();
  812         result = mach_vm_inherit(user_map, addr, size,
  813                                 inherit);
  814         switch (result) {
  815         case KERN_SUCCESS:
  816                 return (0);
  817         case KERN_PROTECTION_FAILURE:
  818                 return (EACCES);
  819         }
  820         return (EINVAL);
  821 }
  822 
  823 int
  824 madvise(__unused proc_t p, struct madvise_args *uap, __unused register_t *retval)
  825 {
  826         vm_map_t user_map;
  827         mach_vm_offset_t start;
  828         mach_vm_size_t size;
  829         vm_behavior_t new_behavior;
  830         kern_return_t   result;
  831 
  832         /*
  833          * Since this routine is only advisory, we default to conservative
  834          * behavior.
  835          */
  836         switch (uap->behav) {
  837                 case MADV_RANDOM:
  838                         new_behavior = VM_BEHAVIOR_RANDOM;
  839                         break;
  840                 case MADV_SEQUENTIAL: 
  841                         new_behavior = VM_BEHAVIOR_SEQUENTIAL;
  842                         break;
  843                 case MADV_NORMAL:
  844                         new_behavior = VM_BEHAVIOR_DEFAULT;
  845                         break;
  846                 case MADV_WILLNEED:
  847                         new_behavior = VM_BEHAVIOR_WILLNEED;
  848                         break;
  849                 case MADV_DONTNEED:
  850                         new_behavior = VM_BEHAVIOR_DONTNEED;
  851                         break;
  852                 default:
  853                         return(EINVAL);
  854         }
  855 
  856         start = (mach_vm_offset_t) uap->addr;
  857         size = (mach_vm_size_t) uap->len;
  858         
  859         user_map = current_map();
  860 
  861         result = mach_vm_behavior_set(user_map, start, size, new_behavior);
  862         switch (result) {
  863                 case KERN_SUCCESS:
  864                         return (0);
  865                 case KERN_INVALID_ADDRESS:
  866                         return (ENOMEM);
  867         }
  868 
  869         return (EINVAL);
  870 }
  871 
  872 int
  873 mincore(__unused proc_t p, struct mincore_args *uap, __unused register_t *retval)
  874 {
  875         mach_vm_offset_t addr, first_addr, end;
  876         vm_map_t map;
  877         user_addr_t vec;
  878         int error;
  879         int vecindex, lastvecindex;
  880         int mincoreinfo=0;
  881         int pqueryinfo;
  882         kern_return_t   ret;
  883         int numref;
  884 
  885         char c;
  886 
  887         map = current_map();
  888 
  889         /*
  890          * Make sure that the addresses presented are valid for user
  891          * mode.
  892          */
  893         first_addr = addr = mach_vm_trunc_page(uap->addr);
  894         end = addr + mach_vm_round_page(uap->len);
  895 
  896         if (end < addr)
  897                 return (EINVAL);
  898 
  899         /*
  900          * Address of byte vector
  901          */
  902         vec = uap->vec;
  903 
  904         map = current_map();
  905 
  906         /*
  907          * Do this on a map entry basis so that if the pages are not
  908          * in the current processes address space, we can easily look
  909          * up the pages elsewhere.
  910          */
  911         lastvecindex = -1;
  912         for( ; addr < end; addr += PAGE_SIZE ) {
  913                 pqueryinfo = 0;
  914                 ret = mach_vm_page_query(map, addr, &pqueryinfo, &numref);
  915                 if (ret != KERN_SUCCESS) 
  916                         pqueryinfo = 0;
  917                 mincoreinfo = 0;
  918                 if (pqueryinfo & VM_PAGE_QUERY_PAGE_PRESENT)
  919                         mincoreinfo |= MINCORE_INCORE;
  920                 if (pqueryinfo & VM_PAGE_QUERY_PAGE_REF)
  921                         mincoreinfo |= MINCORE_REFERENCED;
  922                 if (pqueryinfo & VM_PAGE_QUERY_PAGE_DIRTY)
  923                         mincoreinfo |= MINCORE_MODIFIED;
  924                 
  925                 
  926                 /*
  927                  * calculate index into user supplied byte vector
  928                  */
  929                 vecindex = (addr - first_addr)>> PAGE_SHIFT;
  930 
  931                 /*
  932                  * If we have skipped map entries, we need to make sure that
  933                  * the byte vector is zeroed for those skipped entries.
  934                  */
  935                 while((lastvecindex + 1) < vecindex) {
  936                         c = 0;
  937                         error = copyout(&c, vec + lastvecindex, 1);
  938                         if (error) {
  939                                 return (EFAULT);
  940                         }
  941                         ++lastvecindex;
  942                 }
  943 
  944                 /*
  945                  * Pass the page information to the user
  946                  */
  947                 c = (char)mincoreinfo;
  948                 error = copyout(&c, vec + vecindex, 1);
  949                 if (error) {
  950                         return (EFAULT);
  951                 }
  952                 lastvecindex = vecindex;
  953         }
  954 
  955 
  956         /*
  957          * Zero the last entries in the byte vector.
  958          */
  959         vecindex = (end - first_addr) >> PAGE_SHIFT;
  960         while((lastvecindex + 1) < vecindex) {
  961                 c = 0;
  962                 error = copyout(&c, vec + lastvecindex, 1);
  963                 if (error) {
  964                         return (EFAULT);
  965                 }
  966                 ++lastvecindex;
  967         }
  968         
  969         return (0);
  970 }
  971 
  972 int
  973 mlock(__unused proc_t p, struct mlock_args *uap, __unused register_t *retvalval)
  974 {
  975         vm_map_t user_map;
  976         vm_map_offset_t addr;
  977         vm_map_size_t size, pageoff;
  978         kern_return_t   result;
  979 
  980         AUDIT_ARG(addr, uap->addr);
  981         AUDIT_ARG(len, uap->len);
  982 
  983         addr = (vm_map_offset_t) uap->addr;
  984         size = (vm_map_size_t)uap->len;
  985 
  986         /* disable wrap around */
  987         if (addr + size < addr)
  988                 return (EINVAL);
  989 
  990         if (size == 0)
  991                 return (0);
  992 
  993         pageoff = (addr & PAGE_MASK);
  994         addr -= pageoff;
  995         size = vm_map_round_page(size+pageoff);
  996         user_map = current_map();
  997 
  998         /* have to call vm_map_wire directly to pass "I don't know" protections */
  999         result = vm_map_wire(user_map, addr, addr+size, VM_PROT_NONE, TRUE);
 1000 
 1001         if (result == KERN_RESOURCE_SHORTAGE)
 1002                 return EAGAIN;
 1003         else if (result != KERN_SUCCESS)
 1004                 return ENOMEM;
 1005 
 1006         return 0;       /* KERN_SUCCESS */
 1007 }
 1008 
 1009 int
 1010 munlock(__unused proc_t p, struct munlock_args *uap, __unused register_t *retval)
 1011 {
 1012         mach_vm_offset_t addr;
 1013         mach_vm_size_t size;
 1014         vm_map_t user_map;
 1015         kern_return_t   result;
 1016 
 1017         AUDIT_ARG(addr, uap->addr);
 1018         AUDIT_ARG(addr, uap->len);
 1019 
 1020         addr = (mach_vm_offset_t) uap->addr;
 1021         size = (mach_vm_size_t)uap->len;
 1022         user_map = current_map();
 1023 
 1024         /* JMM - need to remove all wirings by spec - this just removes one */
 1025         result = mach_vm_wire(host_priv_self(), user_map, addr, size, VM_PROT_NONE);
 1026         return (result == KERN_SUCCESS ? 0 : ENOMEM);
 1027 }
 1028 
 1029 
 1030 int
 1031 mlockall(__unused proc_t p, __unused struct mlockall_args *uap, __unused register_t *retval)
 1032 {
 1033         return (ENOSYS);
 1034 }
 1035 
 1036 int
 1037 munlockall(__unused proc_t p, __unused struct munlockall_args *uap, __unused register_t *retval)
 1038 {
 1039         return(ENOSYS);
 1040 }
 1041 
 1042 
 1043 /* BEGIN DEFUNCT */
 1044 int
 1045 obreak(__unused proc_t p, __unused struct obreak_args *uap, __unused register_t *retval)
 1046 {
 1047         /* Not implemented, obsolete */
 1048         return (ENOMEM);
 1049 }
 1050 
 1051 int     both;
 1052 
 1053 int
 1054 ovadvise(__unused proc_t p, __unused struct ovadvise_args *uap, __unused register_t *retval)
 1055 {
 1056 
 1057 #ifdef lint
 1058         both = 0;
 1059 #endif
 1060         return( 0 );
 1061 }
 1062 /* END DEFUNCT */
 1063 
 1064 /* USV: No! need to obsolete map_fd()! mmap() already supports 64 bits */
 1065 kern_return_t
 1066 map_fd(struct map_fd_args *args)
 1067 {
 1068         int             fd = args->fd;
 1069         vm_offset_t     offset = args->offset;
 1070         vm_offset_t     *va = args->va;
 1071         boolean_t       findspace = args->findspace;
 1072         vm_size_t       size = args->size;
 1073         kern_return_t ret;
 1074 
 1075         AUDIT_MACH_SYSCALL_ENTER(AUE_MAPFD);
 1076         AUDIT_ARG(addr, CAST_DOWN(user_addr_t, va));
 1077         AUDIT_ARG(fd, fd);
 1078 
 1079         ret = map_fd_funneled( fd, (vm_object_offset_t)offset, va, findspace, size);
 1080 
 1081         AUDIT_MACH_SYSCALL_EXIT(ret);
 1082         return ret;
 1083 }
 1084 
 1085 kern_return_t
 1086 map_fd_funneled(
 1087         int                     fd,
 1088         vm_object_offset_t      offset,
 1089         vm_offset_t             *va,
 1090         boolean_t               findspace,
 1091         vm_size_t               size)
 1092 {
 1093         kern_return_t   result;
 1094         struct fileproc *fp;
 1095         struct vnode    *vp;
 1096         void *  pager;
 1097         vm_offset_t     map_addr=0;
 1098         vm_size_t       map_size;
 1099         int             err=0;
 1100         vm_map_t        my_map;
 1101         proc_t          p = current_proc();
 1102         struct vnode_attr vattr;
 1103 
 1104         /*
 1105          *      Find the inode; verify that it's a regular file.
 1106          */
 1107 
 1108         err = fp_lookup(p, fd, &fp, 0);
 1109         if (err)
 1110                 return(err);
 1111         
 1112         if (fp->f_fglob->fg_type != DTYPE_VNODE){
 1113                 err = KERN_INVALID_ARGUMENT;
 1114                 goto bad;
 1115         }
 1116 
 1117         if (!(fp->f_fglob->fg_flag & FREAD)) {
 1118                 err = KERN_PROTECTION_FAILURE;
 1119                 goto bad;
 1120         }
 1121 
 1122         vp = (struct vnode *)fp->f_fglob->fg_data;
 1123         err = vnode_getwithref(vp);
 1124         if(err != 0) 
 1125                 goto bad;
 1126 
 1127         if (vp->v_type != VREG) {
 1128                 (void)vnode_put(vp);
 1129                 err = KERN_INVALID_ARGUMENT;
 1130                 goto bad;
 1131         }
 1132 
 1133         AUDIT_ARG(vnpath, vp, ARG_VNODE1);
 1134 
 1135         /*
 1136          * POSIX: mmap needs to update access time for mapped files
 1137          */
 1138         if ((vnode_vfsvisflags(vp) & MNT_NOATIME) == 0) {
 1139                 VATTR_INIT(&vattr);
 1140                 nanotime(&vattr.va_access_time);
 1141                 VATTR_SET_ACTIVE(&vattr, va_access_time);
 1142                 vnode_setattr(vp, &vattr, vfs_context_current());
 1143         }
 1144         
 1145         if (offset & PAGE_MASK_64) {
 1146                 printf("map_fd: file offset not page aligned(%d : %s)\n",p->p_pid, p->p_comm);
 1147                 (void)vnode_put(vp);
 1148                 err = KERN_INVALID_ARGUMENT;
 1149                 goto bad;
 1150         }
 1151         map_size = round_page(size);
 1152 
 1153         /*
 1154          * Allow user to map in a zero length file.
 1155          */
 1156         if (size == 0) {
 1157                 (void)vnode_put(vp);
 1158                 err = KERN_SUCCESS;
 1159                 goto bad;
 1160         }
 1161         /*
 1162          *      Map in the file.
 1163          */
 1164         pager = (void *)ubc_getpager(vp);
 1165         if (pager == NULL) {
 1166                 (void)vnode_put(vp);
 1167                 err = KERN_FAILURE;
 1168                 goto bad;
 1169         }
 1170 
 1171 
 1172         my_map = current_map();
 1173 
 1174         result = vm_map_64(
 1175                         my_map,
 1176                         &map_addr, map_size, (vm_offset_t)0, 
 1177                         VM_FLAGS_ANYWHERE, pager, offset, TRUE,
 1178                         VM_PROT_DEFAULT, VM_PROT_ALL,
 1179                         VM_INHERIT_DEFAULT);
 1180         if (result != KERN_SUCCESS) {
 1181                 (void)vnode_put(vp);
 1182                 err = result;
 1183                 goto bad;
 1184         }
 1185 
 1186 
 1187         if (!findspace) {
 1188                 vm_offset_t     dst_addr;
 1189                 vm_map_copy_t   tmp;
 1190 
 1191                 if (copyin(CAST_USER_ADDR_T(va), &dst_addr, sizeof (dst_addr))  ||
 1192                                         trunc_page_32(dst_addr) != dst_addr) {
 1193                         (void) vm_map_remove(
 1194                                         my_map,
 1195                                         map_addr, map_addr + map_size,
 1196                                         VM_MAP_NO_FLAGS);
 1197                         (void)vnode_put(vp);
 1198                         err = KERN_INVALID_ADDRESS;
 1199                         goto bad;
 1200                 }
 1201 
 1202                 result = vm_map_copyin(my_map, (vm_map_address_t)map_addr,
 1203                                        (vm_map_size_t)map_size, TRUE, &tmp);
 1204                 if (result != KERN_SUCCESS) {
 1205                         
 1206                         (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
 1207                                         vm_map_round_page(map_addr + map_size),
 1208                                         VM_MAP_NO_FLAGS);
 1209                         (void)vnode_put(vp);
 1210                         err = result;
 1211                         goto bad;
 1212                 }
 1213 
 1214                 result = vm_map_copy_overwrite(my_map,
 1215                                         (vm_map_address_t)dst_addr, tmp, FALSE);
 1216                 if (result != KERN_SUCCESS) {
 1217                         vm_map_copy_discard(tmp);
 1218                         (void)vnode_put(vp);
 1219                         err = result;
 1220                         goto bad;
 1221                 }
 1222         } else {
 1223                 if (copyout(&map_addr, CAST_USER_ADDR_T(va), sizeof (map_addr))) {
 1224                         (void) vm_map_remove(my_map, vm_map_trunc_page(map_addr),
 1225                                         vm_map_round_page(map_addr + map_size),
 1226                                         VM_MAP_NO_FLAGS);
 1227                         (void)vnode_put(vp);
 1228                         err = KERN_INVALID_ADDRESS;
 1229                         goto bad;
 1230                 }
 1231         }
 1232 
 1233         ubc_setthreadcred(vp, current_proc(), current_thread());
 1234         (void)ubc_map(vp, (PROT_READ | PROT_EXEC));
 1235         (void)vnode_put(vp);
 1236         err = 0;
 1237 bad:
 1238         fp_drop(p, fd, fp, 0);
 1239         return (err);
 1240 }
 1241 

Cache object: b08c61c80f1a41f3436dc91a68da4927


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.