The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/bsd/vm/vm_unix.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
    3  *
    4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
    5  * 
    6  * This file contains Original Code and/or Modifications of Original Code
    7  * as defined in and that are subject to the Apple Public Source License
    8  * Version 2.0 (the 'License'). You may not use this file except in
    9  * compliance with the License. The rights granted to you under the License
   10  * may not be used to create, or enable the creation or redistribution of,
   11  * unlawful or unlicensed copies of an Apple operating system, or to
   12  * circumvent, violate, or enable the circumvention or violation of, any
   13  * terms of an Apple operating system software license agreement.
   14  * 
   15  * Please obtain a copy of the License at
   16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
   17  * 
   18  * The Original Code and all software distributed under the License are
   19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   23  * Please see the License for the specific language governing rights and
   24  * limitations under the License.
   25  * 
   26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   27  */
   28 /* 
   29  * Mach Operating System
   30  * Copyright (c) 1987 Carnegie-Mellon University
   31  * All rights reserved.  The CMU software License Agreement specifies
   32  * the terms and conditions for use and redistribution.
   33  */
   34 /*
   35  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
   36  * support for mandatory and extensible security protections.  This notice
   37  * is included in support of clause 2.2 (b) of the Apple Public License,
   38  * Version 2.0.
   39  */
   40 
   41 #include <meta_features.h>
   42 
   43 #include <kern/task.h>
   44 #include <kern/thread.h>
   45 #include <kern/debug.h>
   46 #include <kern/lock.h>
   47 #include <mach/mach_traps.h>
   48 #include <mach/port.h>
   49 #include <mach/task.h>
   50 #include <mach/task_access.h>
   51 #include <mach/task_special_ports.h>
   52 #include <mach/time_value.h>
   53 #include <mach/vm_map.h>
   54 #include <mach/vm_param.h>
   55 #include <mach/vm_prot.h>
   56 
   57 #include <sys/file_internal.h>
   58 #include <sys/param.h>
   59 #include <sys/systm.h>
   60 #include <sys/dir.h>
   61 #include <sys/namei.h>
   62 #include <sys/proc_internal.h>
   63 #include <sys/kauth.h>
   64 #include <sys/vm.h>
   65 #include <sys/file.h>
   66 #include <sys/vnode_internal.h>
   67 #include <sys/mount.h>
   68 #include <sys/trace.h>
   69 #include <sys/kernel.h>
   70 #include <sys/ubc_internal.h>
   71 #include <sys/user.h>
   72 #include <sys/syslog.h>
   73 #include <sys/stat.h>
   74 #include <sys/sysproto.h>
   75 #include <sys/mman.h>
   76 #include <sys/sysctl.h>
   77 
   78 #include <bsm/audit_kernel.h>
   79 #include <bsm/audit_kevents.h>
   80 
   81 #include <kern/kalloc.h>
   82 #include <vm/vm_map.h>
   83 #include <vm/vm_kern.h>
   84 
   85 #include <machine/spl.h>
   86 
   87 #include <mach/shared_region.h>
   88 #include <vm/vm_shared_region.h>
   89 
   90 #include <vm/vm_protos.h>
   91 
   92 /*
   93  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
   94  */
   95 
   96 extern int allow_stack_exec, allow_data_exec;
   97 
   98 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW, &allow_stack_exec, 0, "");
   99 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW, &allow_data_exec, 0, "");
  100 
  101 #if CONFIG_NO_PRINTF_STRINGS
  102 void
  103 log_stack_execution_failure(__unused addr64_t a, __unused vm_prot_t b)
  104 {
  105 }
  106 #else
  107 static const char *prot_values[] = {
  108         "none",
  109         "read-only",
  110         "write-only",
  111         "read-write",
  112         "execute-only",
  113         "read-execute",
  114         "write-execute",
  115         "read-write-execute"
  116 };
  117 
  118 void
  119 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
  120 {
  121         printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n", 
  122                 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
  123 }
  124 #endif
  125 
  126 
  127 int
  128 useracc(
  129         user_addr_t     addr,
  130         user_size_t     len,
  131         int     prot)
  132 {
  133         return (vm_map_check_protection(
  134                         current_map(),
  135                         vm_map_trunc_page(addr), vm_map_round_page(addr+len),
  136                         prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
  137 }
  138 
  139 int
  140 vslock(
  141         user_addr_t     addr,
  142         user_size_t     len)
  143 {
  144         kern_return_t kret;
  145         kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
  146                         vm_map_round_page(addr+len), 
  147                         VM_PROT_READ | VM_PROT_WRITE ,FALSE);
  148 
  149         switch (kret) {
  150         case KERN_SUCCESS:
  151                 return (0);
  152         case KERN_INVALID_ADDRESS:
  153         case KERN_NO_SPACE:
  154                 return (ENOMEM);
  155         case KERN_PROTECTION_FAILURE:
  156                 return (EACCES);
  157         default:
  158                 return (EINVAL);
  159         }
  160 }
  161 
  162 int
  163 vsunlock(
  164         user_addr_t addr,
  165         user_size_t len,
  166         __unused int dirtied)
  167 {
  168 #if FIXME  /* [ */
  169         pmap_t          pmap;
  170         vm_page_t       pg;
  171         vm_map_offset_t vaddr;
  172         ppnum_t         paddr;
  173 #endif  /* FIXME ] */
  174         kern_return_t kret;
  175 
  176 #if FIXME  /* [ */
  177         if (dirtied) {
  178                 pmap = get_task_pmap(current_task());
  179                 for (vaddr = vm_map_trunc_page(addr);
  180                      vaddr < vm_map_round_page(addr+len);
  181                                 vaddr += PAGE_SIZE) {
  182                         paddr = pmap_extract(pmap, vaddr);
  183                         pg = PHYS_TO_VM_PAGE(paddr);
  184                         vm_page_set_modified(pg);
  185                 }
  186         }
  187 #endif  /* FIXME ] */
  188 #ifdef  lint
  189         dirtied++;
  190 #endif  /* lint */
  191         kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
  192                                 vm_map_round_page(addr+len), FALSE);
  193         switch (kret) {
  194         case KERN_SUCCESS:
  195                 return (0);
  196         case KERN_INVALID_ADDRESS:
  197         case KERN_NO_SPACE:
  198                 return (ENOMEM);
  199         case KERN_PROTECTION_FAILURE:
  200                 return (EACCES);
  201         default:
  202                 return (EINVAL);
  203         }
  204 }
  205 
  206 int
  207 subyte(
  208         user_addr_t addr,
  209         int byte)
  210 {
  211         char character;
  212         
  213         character = (char)byte;
  214         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
  215 }
  216 
  217 int
  218 suibyte(
  219         user_addr_t addr,
  220         int byte)
  221 {
  222         char character;
  223         
  224         character = (char)byte;
  225         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
  226 }
  227 
  228 int fubyte(user_addr_t addr)
  229 {
  230         unsigned char byte;
  231 
  232         if (copyin(addr, (void *) &byte, sizeof(char)))
  233                 return(-1);
  234         return(byte);
  235 }
  236 
  237 int fuibyte(user_addr_t addr)
  238 {
  239         unsigned char byte;
  240 
  241         if (copyin(addr, (void *) &(byte), sizeof(char)))
  242                 return(-1);
  243         return(byte);
  244 }
  245 
  246 int
  247 suword(
  248         user_addr_t addr,
  249         long word)
  250 {
  251         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
  252 }
  253 
  254 long fuword(user_addr_t addr)
  255 {
  256         long word;
  257 
  258         if (copyin(addr, (void *) &word, sizeof(int)))
  259                 return(-1);
  260         return(word);
  261 }
  262 
  263 /* suiword and fuiword are the same as suword and fuword, respectively */
  264 
  265 int
  266 suiword(
  267         user_addr_t addr,
  268         long word)
  269 {
  270         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
  271 }
  272 
  273 long fuiword(user_addr_t addr)
  274 {
  275         long word;
  276 
  277         if (copyin(addr, (void *) &word, sizeof(int)))
  278                 return(-1);
  279         return(word);
  280 }
  281 
  282 /*
  283  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
  284  * fetching and setting of process-sized size_t and pointer values.
  285  */
  286 int
  287 sulong(user_addr_t addr, int64_t word)
  288 {
  289 
  290         if (IS_64BIT_PROCESS(current_proc())) {
  291                 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
  292         } else {
  293                 return(suiword(addr, (long)word));
  294         }
  295 }
  296 
  297 int64_t
  298 fulong(user_addr_t addr)
  299 {
  300         int64_t longword;
  301 
  302         if (IS_64BIT_PROCESS(current_proc())) {
  303                 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
  304                         return(-1);
  305                 return(longword);
  306         } else {
  307                 return((int64_t)fuiword(addr));
  308         }
  309 }
  310 
  311 int
  312 suulong(user_addr_t addr, uint64_t uword)
  313 {
  314 
  315         if (IS_64BIT_PROCESS(current_proc())) {
  316                 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
  317         } else {
  318                 return(suiword(addr, (u_long)uword));
  319         }
  320 }
  321 
  322 uint64_t
  323 fuulong(user_addr_t addr)
  324 {
  325         uint64_t ulongword;
  326 
  327         if (IS_64BIT_PROCESS(current_proc())) {
  328                 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
  329                         return(-1ULL);
  330                 return(ulongword);
  331         } else {
  332                 return((uint64_t)fuiword(addr));
  333         }
  334 }
  335 
  336 int
  337 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
  338 {
  339         return(ENOTSUP);
  340 }
  341 
  342 
  343 kern_return_t
  344 pid_for_task(
  345         struct pid_for_task_args *args)
  346 {
  347         mach_port_name_t        t = args->t;
  348         user_addr_t             pid_addr  = args->pid;  
  349         proc_t p;
  350         task_t          t1;
  351         int     pid = -1;
  352         kern_return_t   err = KERN_SUCCESS;
  353 
  354         AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
  355         AUDIT_ARG(mach_port1, t);
  356 
  357         t1 = port_name_to_task(t);
  358 
  359         if (t1 == TASK_NULL) {
  360                 err = KERN_FAILURE;
  361                 goto pftout;
  362         } else {
  363                 p = get_bsdtask_info(t1);
  364                 if (p) {
  365                         pid  = proc_pid(p);
  366                         err = KERN_SUCCESS;
  367                 } else {
  368                         err = KERN_FAILURE;
  369                 }
  370         }
  371         task_deallocate(t1);
  372 pftout:
  373         AUDIT_ARG(pid, pid);
  374         (void) copyout((char *) &pid, pid_addr, sizeof(int));
  375         AUDIT_MACH_SYSCALL_EXIT(err);
  376         return(err);
  377 }
  378 
  379 /* 
  380  *
  381  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
  382  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
  383  *
  384  */
  385 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
  386 
  387 /*
  388  *      Routine:        task_for_pid_posix_check
  389  *      Purpose:
  390  *                      Verify that the current process should be allowed to
  391  *                      get the target process's task port. This is only 
  392  *                      permitted if:
  393  *                      - The current process is root
  394  *                      OR all of the following are true:
  395  *                      - The target process's real, effective, and saved uids
  396  *                        are the same as the current proc's euid,
  397  *                      - The target process's group set is a subset of the
  398  *                        calling process's group set, and
  399  *                      - The target process hasn't switched credentials.
  400  *
  401  *      Returns:        TRUE: permitted
  402  *                      FALSE: denied
  403  */
  404 static int
  405 task_for_pid_posix_check(proc_t target)
  406 {
  407         kauth_cred_t targetcred, mycred;
  408         uid_t myuid;
  409         int allowed; 
  410 
  411         /* No task_for_pid on bad targets */
  412         if (target == PROC_NULL || target->p_stat == SZOMB) {
  413                 return FALSE;
  414         }
  415 
  416         mycred = kauth_cred_get();
  417         myuid = kauth_cred_getuid(mycred);
  418 
  419         /* If we're running as root, the check passes */
  420         if (kauth_cred_issuser(mycred))
  421                 return TRUE;
  422 
  423         /* We're allowed to get our own task port */
  424         if (target == current_proc())
  425                 return TRUE;
  426 
  427         /* 
  428          * Under DENY, only root can get another proc's task port,
  429          * so no more checks are needed.
  430          */
  431         if (tfp_policy == KERN_TFP_POLICY_DENY) { 
  432                 return FALSE;
  433         }
  434 
  435         targetcred = kauth_cred_proc_ref(target);
  436         allowed = TRUE;
  437 
  438         /* Do target's ruid, euid, and saved uid match my euid? */
  439         if ((kauth_cred_getuid(targetcred) != myuid) || 
  440                         (targetcred->cr_ruid != myuid) ||
  441                         (targetcred->cr_svuid != myuid)) {
  442                 allowed = FALSE;
  443                 goto out;
  444         }
  445 
  446         /* Are target's groups a subset of my groups? */
  447         if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
  448                         allowed == 0) {
  449                 allowed = FALSE;
  450                 goto out;
  451         }
  452 
  453         /* Has target switched credentials? */
  454         if (target->p_flag & P_SUGID) {
  455                 allowed = FALSE;
  456                 goto out;
  457         }
  458         
  459 out:
  460         kauth_cred_unref(&targetcred);
  461         return allowed;
  462 }
  463 
  464 /*
  465  *      Routine:        task_for_pid
  466  *      Purpose:
  467  *              Get the task port for another "process", named by its
  468  *              process ID on the same host as "target_task".
  469  *
  470  *              Only permitted to privileged processes, or processes
  471  *              with the same user ID.
  472  *
  473  * XXX This should be a BSD system call, not a Mach trap!!!
  474  */
  475 kern_return_t
  476 task_for_pid(
  477         struct task_for_pid_args *args)
  478 {
  479         mach_port_name_t        target_tport = args->target_tport;
  480         int                     pid = args->pid;
  481         user_addr_t             task_addr = args->t;
  482         struct uthread          *uthread;
  483         proc_t                  p = PROC_NULL;
  484         task_t                  t1 = TASK_NULL;
  485         mach_port_name_t        tret = MACH_PORT_NULL;
  486         ipc_port_t              tfpport;
  487         void * sright;
  488         int error = 0;
  489 
  490         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
  491         AUDIT_ARG(pid, pid);
  492         AUDIT_ARG(mach_port1, target_tport);
  493 
  494 #if defined(SECURE_KERNEL)
  495         if (0 == pid) {
  496                 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
  497                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
  498                 return(KERN_FAILURE);
  499         }
  500 #endif
  501 
  502         t1 = port_name_to_task(target_tport);
  503         if (t1 == TASK_NULL) {
  504                 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
  505                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
  506                 return(KERN_FAILURE);
  507         } 
  508 
  509 
  510         /*
  511          * Delayed binding of thread credential to process credential, if we
  512          * are not running with an explicitly set thread credential.
  513          */
  514         uthread = get_bsdthread_info(current_thread());
  515         kauth_cred_uthread_update(uthread, current_proc());
  516 
  517         p = proc_find(pid);
  518         AUDIT_ARG(process, p);
  519 
  520         if (!(task_for_pid_posix_check(p))) {
  521                 error = KERN_FAILURE;
  522                 goto tfpout;
  523         }
  524 
  525         if (p->task != TASK_NULL) {
  526                 /* If we aren't root and target's task access port is set... */
  527                 if (!kauth_cred_issuser(kauth_cred_get()) &&
  528                         (task_get_task_access_port(p->task, &tfpport) == 0) &&
  529                         (tfpport != IPC_PORT_NULL)) {
  530 
  531                         if (tfpport == IPC_PORT_DEAD) {
  532                                 error = KERN_PROTECTION_FAILURE;
  533                                 goto tfpout;
  534                         }
  535 
  536                         /* Call up to the task access server */
  537                         error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
  538 
  539                         if (error != MACH_MSG_SUCCESS) {
  540                                 if (error == MACH_RCV_INTERRUPTED)
  541                                         error = KERN_ABORTED;
  542                                 else
  543                                         error = KERN_FAILURE;
  544                                 goto tfpout;
  545                         }
  546                 }
  547 #if CONFIG_MACF
  548                 error = mac_proc_check_get_task(kauth_cred_get(), p);
  549                 if (error) {
  550                         error = KERN_FAILURE;
  551                         goto tfpout;
  552                 }
  553 #endif
  554 
  555                 /* Grant task port access */
  556                 task_reference(p->task);
  557                 sright = (void *) convert_task_to_port(p->task);
  558                 tret = ipc_port_copyout_send(
  559                                 sright, 
  560                                 get_task_ipcspace(current_task()));
  561         } 
  562         error = KERN_SUCCESS;
  563 
  564 tfpout:
  565         task_deallocate(t1);
  566         AUDIT_ARG(mach_port2, tret);
  567         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
  568         if (p != PROC_NULL)
  569                 proc_rele(p);
  570         AUDIT_MACH_SYSCALL_EXIT(error);
  571         return(error);
  572 }
  573 
  574 /*
  575  *      Routine:        task_name_for_pid
  576  *      Purpose:
  577  *              Get the task name port for another "process", named by its
  578  *              process ID on the same host as "target_task".
  579  *
  580  *              Only permitted to privileged processes, or processes
  581  *              with the same user ID.
  582  *
  583  * XXX This should be a BSD system call, not a Mach trap!!!
  584  */
  585 
  586 kern_return_t
  587 task_name_for_pid(
  588         struct task_name_for_pid_args *args)
  589 {
  590         mach_port_name_t        target_tport = args->target_tport;
  591         int                     pid = args->pid;
  592         user_addr_t             task_addr = args->t;
  593         struct uthread          *uthread;
  594         proc_t          p = PROC_NULL;
  595         task_t          t1;
  596         mach_port_name_t        tret;
  597         void * sright;
  598         int error = 0, refheld = 0;
  599         kauth_cred_t target_cred;
  600 
  601         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
  602         AUDIT_ARG(pid, pid);
  603         AUDIT_ARG(mach_port1, target_tport);
  604 
  605         t1 = port_name_to_task(target_tport);
  606         if (t1 == TASK_NULL) {
  607                 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
  608                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
  609                 return(KERN_FAILURE);
  610         } 
  611 
  612 
  613         /*
  614          * Delayed binding of thread credential to process credential, if we
  615          * are not running with an explicitly set thread credential.
  616          */
  617         uthread = get_bsdthread_info(current_thread());
  618         kauth_cred_uthread_update(uthread, current_proc());
  619 
  620         p = proc_find(pid);
  621         AUDIT_ARG(process, p);
  622         if (p != PROC_NULL) {
  623                 target_cred = kauth_cred_proc_ref(p);
  624                 refheld = 1;
  625 
  626                 if ((p->p_stat != SZOMB)
  627                     && ((current_proc() == p)
  628                         || kauth_cred_issuser(kauth_cred_get()) 
  629                         || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && 
  630                             ((target_cred->cr_ruid == kauth_cred_get()->cr_ruid))))) {
  631 
  632                         if (p->task != TASK_NULL) {
  633                                 task_reference(p->task);
  634 #if CONFIG_MACF
  635                                 error = mac_proc_check_get_task_name(kauth_cred_get(),  p);
  636                                 if (error) {
  637                                         task_deallocate(p->task);
  638                                         goto noperm;
  639                                 }
  640 #endif
  641                                 sright = (void *)convert_task_name_to_port(p->task);
  642                                 tret = ipc_port_copyout_send(sright, 
  643                                                 get_task_ipcspace(current_task()));
  644                         } else
  645                                 tret  = MACH_PORT_NULL;
  646 
  647                         AUDIT_ARG(mach_port2, tret);
  648                         (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
  649                         task_deallocate(t1);
  650                         error = KERN_SUCCESS;
  651                         goto tnfpout;
  652                 }
  653         }
  654 
  655 #if CONFIG_MACF
  656 noperm:
  657 #endif
  658     task_deallocate(t1);
  659         tret = MACH_PORT_NULL;
  660         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
  661         error = KERN_FAILURE;
  662 tnfpout:
  663         if (refheld != 0)
  664                 kauth_cred_unref(&target_cred);
  665         if (p != PROC_NULL)
  666                 proc_rele(p);
  667         AUDIT_MACH_SYSCALL_EXIT(error);
  668         return(error);
  669 }
  670 
  671 static int
  672 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
  673     __unused int arg2, struct sysctl_req *req)
  674 {
  675     int error = 0;
  676         int new_value;
  677 
  678     error = SYSCTL_OUT(req, arg1, sizeof(int));
  679     if (error || req->newptr == USER_ADDR_NULL)
  680         return(error);
  681 
  682         if (!is_suser())
  683                 return(EPERM);
  684 
  685         if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
  686                 goto out;
  687         }
  688         if ((new_value == KERN_TFP_POLICY_DENY) 
  689                 || (new_value == KERN_TFP_POLICY_DEFAULT))
  690                         tfp_policy = new_value;
  691         else
  692                         error = EINVAL;         
  693 out:
  694     return(error);
  695 
  696 }
  697 
  698 #if defined(SECURE_KERNEL)
  699 static int kern_secure_kernel = 1;
  700 #else
  701 static int kern_secure_kernel = 0;
  702 #endif
  703 
  704 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD, &kern_secure_kernel, 0, "");
  705 
  706 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW|CTLFLAG_LOCKED, 0, "tfp");
  707 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW,
  708     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
  709 
  710 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW,
  711            &shared_region_trace_level, 0, "");
  712 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD,
  713            &shared_region_version, 0, "");
  714 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW,
  715            &shared_region_persistence, 0, "");
  716 
  717 /*
  718  * shared_region_check_np:
  719  *
  720  * This system call is intended for dyld.
  721  *
  722  * dyld calls this when any process starts to see if the process's shared
  723  * region is already set up and ready to use.
  724  * This call returns the base address of the first mapping in the
  725  * process's shared region's first mapping.
  726  * dyld will then check what's mapped at that address.
  727  *
  728  * If the shared region is empty, dyld will then attempt to map the shared
  729  * cache file in the shared region via the shared_region_map_np() system call.
  730  *
  731  * If something's already mapped in the shared region, dyld will check if it
  732  * matches the shared cache it would like to use for that process.
  733  * If it matches, evrything's ready and the process can proceed and use the
  734  * shared region.
  735  * If it doesn't match, dyld will unmap the shared region and map the shared
  736  * cache into the process's address space via mmap().
  737  *
  738  * ERROR VALUES
  739  * EINVAL       no shared region
  740  * ENOMEM       shared region is empty
  741  * EFAULT       bad address for "start_address"
  742  */
  743 int
  744 shared_region_check_np(
  745         __unused struct proc                    *p,
  746         struct shared_region_check_np_args      *uap,
  747         __unused int                            *retvalp)
  748 {
  749         vm_shared_region_t      shared_region;
  750         mach_vm_offset_t        start_address;
  751         int                     error;
  752         kern_return_t           kr;
  753 
  754         SHARED_REGION_TRACE_DEBUG(
  755                 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
  756                  current_thread(), p->p_pid, p->p_comm,
  757                  (uint64_t)uap->start_address));
  758 
  759         /* retrieve the current tasks's shared region */
  760         shared_region = vm_shared_region_get(current_task());
  761         if (shared_region != NULL) {
  762                 /* retrieve address of its first mapping... */
  763                 kr = vm_shared_region_start_address(shared_region,
  764                                                     &start_address);
  765                 if (kr != KERN_SUCCESS) {
  766                         error = ENOMEM;
  767                 } else {
  768                         /* ... and give it to the caller */
  769                         error = copyout(&start_address,
  770                                         (user_addr_t) uap->start_address,
  771                                         sizeof (start_address));
  772                         if (error) {
  773                                 SHARED_REGION_TRACE_ERROR(
  774                                         ("shared_region: %p [%d(%s)] "
  775                                          "check_np(0x%llx) "
  776                                          "copyout(0x%llx) error %d\n",
  777                                          current_thread(), p->p_pid, p->p_comm,
  778                                          (uint64_t)uap->start_address, (uint64_t)start_address,
  779                                          error));
  780                         }
  781                 }
  782                 vm_shared_region_deallocate(shared_region);
  783         } else {
  784                 /* no shared region ! */
  785                 error = EINVAL;
  786         }
  787 
  788         SHARED_REGION_TRACE_DEBUG(
  789                 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
  790                  current_thread(), p->p_pid, p->p_comm,
  791                  (uint64_t)uap->start_address, (uint64_t)start_address, error));
  792 
  793         return error;
  794 }
  795 
  796 /*
  797  * shared_region_map_np()
  798  *
  799  * This system call is intended for dyld.
  800  *
  801  * dyld uses this to map a shared cache file into a shared region.
  802  * This is usually done only the first time a shared cache is needed.
  803  * Subsequent processes will just use the populated shared region without
  804  * requiring any further setup.
  805  */
  806 int
  807 shared_region_map_np(
  808         struct proc                             *p,
  809         struct shared_region_map_np_args        *uap,
  810         __unused int                            *retvalp)
  811 {
  812         int                             error;
  813         kern_return_t                   kr;
  814         int                             fd;
  815         struct fileproc                 *fp;
  816         struct vnode                    *vp, *root_vp;
  817         struct vnode_attr               va;
  818         off_t                           fs;
  819         memory_object_size_t            file_size;
  820         user_addr_t                     user_mappings;
  821         struct shared_file_mapping_np   *mappings;
  822 #define SFM_MAX_STACK   4
  823         struct shared_file_mapping_np   stack_mappings[SFM_MAX_STACK];
  824         unsigned int                    mappings_count;
  825         vm_size_t                       mappings_size;
  826         memory_object_control_t         file_control;
  827         struct vm_shared_region         *shared_region;
  828 
  829         SHARED_REGION_TRACE_DEBUG(
  830                 ("shared_region: %p [%d(%s)] -> map\n",
  831                  current_thread(), p->p_pid, p->p_comm));
  832 
  833         shared_region = NULL;
  834         mappings_count = 0;
  835         mappings_size = 0;
  836         mappings = NULL;
  837         fp = NULL;
  838         vp = NULL;
  839 
  840         /* get file descriptor for shared region cache file */
  841         fd = uap->fd;
  842 
  843         /* get file structure from file descriptor */
  844         error = fp_lookup(p, fd, &fp, 0);
  845         if (error) {
  846                 SHARED_REGION_TRACE_ERROR(
  847                         ("shared_region: %p [%d(%s)] map: "
  848                          "fd=%d lookup failed (error=%d)\n",
  849                          current_thread(), p->p_pid, p->p_comm, fd, error));
  850                 goto done;
  851         }
  852 
  853         /* make sure we're attempting to map a vnode */
  854         if (fp->f_fglob->fg_type != DTYPE_VNODE) {
  855                 SHARED_REGION_TRACE_ERROR(
  856                         ("shared_region: %p [%d(%s)] map: "
  857                          "fd=%d not a vnode (type=%d)\n",
  858                          current_thread(), p->p_pid, p->p_comm,
  859                          fd, fp->f_fglob->fg_type));
  860                 error = EINVAL;
  861                 goto done;
  862         }
  863 
  864         /* we need at least read permission on the file */
  865         if (! (fp->f_fglob->fg_flag & FREAD)) {
  866                 SHARED_REGION_TRACE_ERROR(
  867                         ("shared_region: %p [%d(%s)] map: "
  868                          "fd=%d not readable\n",
  869                          current_thread(), p->p_pid, p->p_comm, fd));
  870                 error = EPERM;
  871                 goto done;
  872         }
  873 
  874         /* get vnode from file structure */
  875         error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
  876         if (error) {
  877                 SHARED_REGION_TRACE_ERROR(
  878                         ("shared_region: %p [%d(%s)] map: "
  879                          "fd=%d getwithref failed (error=%d)\n",
  880                          current_thread(), p->p_pid, p->p_comm, fd, error));
  881                 goto done;
  882         }
  883         vp = (struct vnode *) fp->f_fglob->fg_data;
  884 
  885         /* make sure the vnode is a regular file */
  886         if (vp->v_type != VREG) {
  887                 SHARED_REGION_TRACE_ERROR(
  888                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
  889                          "not a file (type=%d)\n",
  890                          current_thread(), p->p_pid, p->p_comm,
  891                          vp, vp->v_name, vp->v_type));
  892                 error = EINVAL;
  893                 goto done;
  894         }
  895 
  896         /* make sure vnode is on the process's root volume */
  897         root_vp = p->p_fd->fd_rdir;
  898         if (root_vp == NULL) {
  899                 root_vp = rootvnode;
  900         }
  901         if (vp->v_mount != root_vp->v_mount) {
  902                 SHARED_REGION_TRACE_ERROR(
  903                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
  904                          "not on process's root volume\n",
  905                          current_thread(), p->p_pid, p->p_comm,
  906                          vp, vp->v_name));
  907                 error = EPERM;
  908                 goto done;
  909         }
  910 
  911         /* make sure vnode is owned by "root" */
  912         VATTR_INIT(&va);
  913         VATTR_WANTED(&va, va_uid);
  914         error = vnode_getattr(vp, &va, vfs_context_current());
  915         if (error) {
  916                 SHARED_REGION_TRACE_ERROR(
  917                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
  918                          "vnode_getattr(%p) failed (error=%d)\n",
  919                          current_thread(), p->p_pid, p->p_comm,
  920                          vp, vp->v_name, vp, error));
  921                 goto done;
  922         }
  923         if (va.va_uid != 0) {
  924                 SHARED_REGION_TRACE_ERROR(
  925                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
  926                          "owned by uid=%d instead of 0\n",
  927                          current_thread(), p->p_pid, p->p_comm,
  928                          vp, vp->v_name, va.va_uid));
  929                 error = EPERM;
  930                 goto done;
  931         }
  932 
  933         /* get vnode size */
  934         error = vnode_size(vp, &fs, vfs_context_current());
  935         if (error) {
  936                 SHARED_REGION_TRACE_ERROR(
  937                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
  938                          "vnode_size(%p) failed (error=%d)\n",
  939                          current_thread(), p->p_pid, p->p_comm,
  940                          vp, vp->v_name, vp, error));
  941                 goto done;
  942         }
  943         file_size = fs;
  944 
  945         /* get the file's memory object handle */
  946         file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
  947         if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
  948                 SHARED_REGION_TRACE_ERROR(
  949                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
  950                          "no memory object\n",
  951                          current_thread(), p->p_pid, p->p_comm,
  952                          vp, vp->v_name));
  953                 error = EINVAL;
  954                 goto done;
  955         }
  956                          
  957         /* get the list of mappings the caller wants us to establish */
  958         mappings_count = uap->count;    /* number of mappings */
  959         mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
  960         if (mappings_count == 0) {
  961                 SHARED_REGION_TRACE_INFO(
  962                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
  963                          "no mappings\n",
  964                          current_thread(), p->p_pid, p->p_comm,
  965                          vp, vp->v_name));
  966                 error = 0;      /* no mappings: we're done ! */
  967                 goto done;
  968         } else if (mappings_count <= SFM_MAX_STACK) {
  969                 mappings = &stack_mappings[0];
  970         } else {
  971                 SHARED_REGION_TRACE_ERROR(
  972                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
  973                          "too many mappings (%d)\n",
  974                          current_thread(), p->p_pid, p->p_comm,
  975                          vp, vp->v_name, mappings_count));
  976                 error = EINVAL;
  977                 goto done;
  978         }
  979 
  980         user_mappings = uap->mappings;  /* the mappings, in user space */
  981         error = copyin(user_mappings,
  982                        mappings,
  983                        mappings_size);
  984         if (error) {
  985                 SHARED_REGION_TRACE_ERROR(
  986                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
  987                          "copyin(0x%llx, %d) failed (error=%d)\n",
  988                          current_thread(), p->p_pid, p->p_comm,
  989                          vp, vp->v_name, (uint64_t)user_mappings, mappings_count, error));
  990                 goto done;
  991         }
  992 
  993         /* get the process's shared region (setup in vm_map_exec()) */
  994         shared_region = vm_shared_region_get(current_task());
  995         if (shared_region == NULL) {
  996                 SHARED_REGION_TRACE_ERROR(
  997                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
  998                          "no shared region\n",
  999                          current_thread(), p->p_pid, p->p_comm,
 1000                          vp, vp->v_name));
 1001                 goto done;
 1002         }
 1003 
 1004         /* map the file into that shared region's submap */
 1005         kr = vm_shared_region_map_file(shared_region,
 1006                                        mappings_count,
 1007                                        mappings,
 1008                                        file_control,
 1009                                        file_size,
 1010                                        (void *) p->p_fd->fd_rdir);
 1011         if (kr != KERN_SUCCESS) {
 1012                 SHARED_REGION_TRACE_ERROR(
 1013                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 1014                          "vm_shared_region_map_file() failed kr=0x%x\n",
 1015                          current_thread(), p->p_pid, p->p_comm,
 1016                          vp, vp->v_name, kr));
 1017                 switch (kr) {
 1018                 case KERN_INVALID_ADDRESS:
 1019                         error = EFAULT;
 1020                         break;
 1021                 case KERN_PROTECTION_FAILURE:
 1022                         error = EPERM;
 1023                         break;
 1024                 case KERN_NO_SPACE:
 1025                         error = ENOMEM;
 1026                         break;
 1027                 case KERN_FAILURE:
 1028                 case KERN_INVALID_ARGUMENT:
 1029                 default:
 1030                         error = EINVAL;
 1031                         break;
 1032                 }
 1033                 goto done;
 1034         }
 1035 
 1036         /*
 1037          * The mapping was successful.  Let the buffer cache know
 1038          * that we've mapped that file with these protections.  This
 1039          * prevents the vnode from getting recycled while it's mapped.
 1040          */
 1041         (void) ubc_map(vp, VM_PROT_READ);
 1042         error = 0;
 1043 
 1044         /* update the vnode's access time */
 1045         if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
 1046                 VATTR_INIT(&va);
 1047                 nanotime(&va.va_access_time);
 1048                 VATTR_SET_ACTIVE(&va, va_access_time);
 1049                 vnode_setattr(vp, &va, vfs_context_current());
 1050         }
 1051 
 1052         if (p->p_flag & P_NOSHLIB) {
 1053                 /* signal that this process is now using split libraries */
 1054                 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), (UInt32 *)&p->p_flag);
 1055         }
 1056 
 1057 done:
 1058         if (vp != NULL) {
 1059                 /*
 1060                  * release the vnode...
 1061                  * ubc_map() still holds it for us in the non-error case
 1062                  */
 1063                 (void) vnode_put(vp);
 1064                 vp = NULL;
 1065         }
 1066         if (fp != NULL) {
 1067                 /* release the file descriptor */
 1068                 fp_drop(p, fd, fp, 0);
 1069                 fp = NULL;
 1070         }
 1071 
 1072         if (shared_region != NULL) {
 1073                 vm_shared_region_deallocate(shared_region);
 1074         }
 1075 
 1076         SHARED_REGION_TRACE_DEBUG(
 1077                 ("shared_region: %p [%d(%s)] <- map\n",
 1078                  current_thread(), p->p_pid, p->p_comm));
 1079 
 1080         return error;
 1081 }
 1082 
 1083 
 1084 /* sysctl overflow room */
 1085 
 1086 /* vm_page_free_target is provided as a makeshift solution for applications that want to
 1087         allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
 1088         reclaimed. It allows the app to calculate how much memory is free outside the free target. */
 1089 extern unsigned int     vm_page_free_target;
 1090 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD, 
 1091                    &vm_page_free_target, 0, "Pageout daemon free target");
 1092 

Cache object: 8255934232f9a95b1c5058c22a9c95b5


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.