The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/bsd/vm/vm_unix.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2000-2010 Apple Inc. All rights reserved.
    3  *
    4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
    5  * 
    6  * This file contains Original Code and/or Modifications of Original Code
    7  * as defined in and that are subject to the Apple Public Source License
    8  * Version 2.0 (the 'License'). You may not use this file except in
    9  * compliance with the License. The rights granted to you under the License
   10  * may not be used to create, or enable the creation or redistribution of,
   11  * unlawful or unlicensed copies of an Apple operating system, or to
   12  * circumvent, violate, or enable the circumvention or violation of, any
   13  * terms of an Apple operating system software license agreement.
   14  * 
   15  * Please obtain a copy of the License at
   16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
   17  * 
   18  * The Original Code and all software distributed under the License are
   19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   23  * Please see the License for the specific language governing rights and
   24  * limitations under the License.
   25  * 
   26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   27  */
   28 /* 
   29  * Mach Operating System
   30  * Copyright (c) 1987 Carnegie-Mellon University
   31  * All rights reserved.  The CMU software License Agreement specifies
   32  * the terms and conditions for use and redistribution.
   33  */
   34 /*
   35  * NOTICE: This file was modified by SPARTA, Inc. in 2006 to introduce
   36  * support for mandatory and extensible security protections.  This notice
   37  * is included in support of clause 2.2 (b) of the Apple Public License,
   38  * Version 2.0.
   39  */
   40 
   41 #include <meta_features.h>
   42 
   43 #include <kern/task.h>
   44 #include <kern/thread.h>
   45 #include <kern/debug.h>
   46 #include <kern/lock.h>
   47 #include <kern/extmod_statistics.h>
   48 #include <mach/mach_traps.h>
   49 #include <mach/port.h>
   50 #include <mach/task.h>
   51 #include <mach/task_access.h>
   52 #include <mach/task_special_ports.h>
   53 #include <mach/time_value.h>
   54 #include <mach/vm_map.h>
   55 #include <mach/vm_param.h>
   56 #include <mach/vm_prot.h>
   57 
   58 #include <sys/file_internal.h>
   59 #include <sys/param.h>
   60 #include <sys/systm.h>
   61 #include <sys/dir.h>
   62 #include <sys/namei.h>
   63 #include <sys/proc_internal.h>
   64 #include <sys/kauth.h>
   65 #include <sys/vm.h>
   66 #include <sys/file.h>
   67 #include <sys/vnode_internal.h>
   68 #include <sys/mount.h>
   69 #include <sys/trace.h>
   70 #include <sys/kernel.h>
   71 #include <sys/ubc_internal.h>
   72 #include <sys/user.h>
   73 #include <sys/syslog.h>
   74 #include <sys/stat.h>
   75 #include <sys/sysproto.h>
   76 #include <sys/mman.h>
   77 #include <sys/sysctl.h>
   78 #include <sys/cprotect.h>
   79 #include <sys/kpi_socket.h>
   80 
   81 #include <security/audit/audit.h>
   82 #include <security/mac.h>
   83 #include <bsm/audit_kevents.h>
   84 
   85 #include <kern/kalloc.h>
   86 #include <vm/vm_map.h>
   87 #include <vm/vm_kern.h>
   88 #include <vm/vm_pageout.h>
   89 
   90 #include <machine/spl.h>
   91 
   92 #include <mach/shared_region.h>
   93 #include <vm/vm_shared_region.h>
   94 
   95 #include <vm/vm_protos.h>
   96 
   97 #if CONFIG_FREEZE
   98 #include <sys/kern_memorystatus.h>
   99 #endif
  100 
  101 
  102 int _shared_region_map( struct proc*, int, unsigned int, struct shared_file_mapping_np*, memory_object_control_t*, struct shared_file_mapping_np*); 
  103 int _shared_region_slide(uint32_t, mach_vm_offset_t, mach_vm_size_t, mach_vm_offset_t, mach_vm_size_t, memory_object_control_t);
  104 int shared_region_copyin_mappings(struct proc*, user_addr_t, unsigned int, struct shared_file_mapping_np *);
  105 
  106 SYSCTL_INT(_vm, OID_AUTO, vm_debug_events, CTLFLAG_RW | CTLFLAG_LOCKED, &vm_debug_events, 0, "");
  107 
  108 
  109 /*
  110  * Sysctl's related to data/stack execution.  See osfmk/vm/vm_map.c
  111  */
  112 
  113 #ifndef SECURE_KERNEL
  114 extern int allow_stack_exec, allow_data_exec;
  115 
  116 SYSCTL_INT(_vm, OID_AUTO, allow_stack_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_stack_exec, 0, "");
  117 SYSCTL_INT(_vm, OID_AUTO, allow_data_exec, CTLFLAG_RW | CTLFLAG_LOCKED, &allow_data_exec, 0, "");
  118 #endif /* !SECURE_KERNEL */
  119 
  120 static const char *prot_values[] = {
  121         "none",
  122         "read-only",
  123         "write-only",
  124         "read-write",
  125         "execute-only",
  126         "read-execute",
  127         "write-execute",
  128         "read-write-execute"
  129 };
  130 
  131 void
  132 log_stack_execution_failure(addr64_t vaddr, vm_prot_t prot)
  133 {
  134         printf("Data/Stack execution not permitted: %s[pid %d] at virtual address 0x%qx, protections were %s\n", 
  135                 current_proc()->p_comm, current_proc()->p_pid, vaddr, prot_values[prot & VM_PROT_ALL]);
  136 }
  137 
  138 int shared_region_unnest_logging = 1;
  139 
  140 SYSCTL_INT(_vm, OID_AUTO, shared_region_unnest_logging, CTLFLAG_RW | CTLFLAG_LOCKED,
  141            &shared_region_unnest_logging, 0, "");
  142 
  143 int vm_shared_region_unnest_log_interval = 10;
  144 int shared_region_unnest_log_count_threshold = 5;
  145 
  146 /* These log rate throttling state variables aren't thread safe, but
  147  * are sufficient unto the task.
  148  */
  149 static int64_t last_unnest_log_time = 0; 
  150 static int shared_region_unnest_log_count = 0;
  151 
  152 void log_unnest_badness(vm_map_t m, vm_map_offset_t s, vm_map_offset_t e) {
  153         struct timeval tv;
  154         const char *pcommstr;
  155 
  156         if (shared_region_unnest_logging == 0)
  157                 return;
  158 
  159         if (shared_region_unnest_logging == 1) {
  160                 microtime(&tv);
  161                 if ((tv.tv_sec - last_unnest_log_time) < vm_shared_region_unnest_log_interval) {
  162                         if (shared_region_unnest_log_count++ > shared_region_unnest_log_count_threshold)
  163                                 return;
  164                 }
  165                 else {
  166                         last_unnest_log_time = tv.tv_sec;
  167                         shared_region_unnest_log_count = 0;
  168                 }
  169         }
  170 
  171         pcommstr = current_proc()->p_comm;
  172 
  173         printf("%s (map: %p) triggered DYLD shared region unnest for map: %p, region 0x%qx->0x%qx. While not abnormal for debuggers, this increases system memory footprint until the target exits.\n", current_proc()->p_comm, get_task_map(current_proc()->task), m, (uint64_t)s, (uint64_t)e);
  174 }
  175 
  176 int
  177 useracc(
  178         user_addr_t     addr,
  179         user_size_t     len,
  180         int     prot)
  181 {
  182         return (vm_map_check_protection(
  183                         current_map(),
  184                         vm_map_trunc_page(addr), vm_map_round_page(addr+len),
  185                         prot == B_READ ? VM_PROT_READ : VM_PROT_WRITE));
  186 }
  187 
  188 int
  189 vslock(
  190         user_addr_t     addr,
  191         user_size_t     len)
  192 {
  193         kern_return_t kret;
  194         kret = vm_map_wire(current_map(), vm_map_trunc_page(addr),
  195                         vm_map_round_page(addr+len), 
  196                         VM_PROT_READ | VM_PROT_WRITE ,FALSE);
  197 
  198         switch (kret) {
  199         case KERN_SUCCESS:
  200                 return (0);
  201         case KERN_INVALID_ADDRESS:
  202         case KERN_NO_SPACE:
  203                 return (ENOMEM);
  204         case KERN_PROTECTION_FAILURE:
  205                 return (EACCES);
  206         default:
  207                 return (EINVAL);
  208         }
  209 }
  210 
  211 int
  212 vsunlock(
  213         user_addr_t addr,
  214         user_size_t len,
  215         __unused int dirtied)
  216 {
  217 #if FIXME  /* [ */
  218         pmap_t          pmap;
  219         vm_page_t       pg;
  220         vm_map_offset_t vaddr;
  221         ppnum_t         paddr;
  222 #endif  /* FIXME ] */
  223         kern_return_t kret;
  224 
  225 #if FIXME  /* [ */
  226         if (dirtied) {
  227                 pmap = get_task_pmap(current_task());
  228                 for (vaddr = vm_map_trunc_page(addr);
  229                      vaddr < vm_map_round_page(addr+len);
  230                                 vaddr += PAGE_SIZE) {
  231                         paddr = pmap_extract(pmap, vaddr);
  232                         pg = PHYS_TO_VM_PAGE(paddr);
  233                         vm_page_set_modified(pg);
  234                 }
  235         }
  236 #endif  /* FIXME ] */
  237 #ifdef  lint
  238         dirtied++;
  239 #endif  /* lint */
  240         kret = vm_map_unwire(current_map(), vm_map_trunc_page(addr),
  241                                 vm_map_round_page(addr+len), FALSE);
  242         switch (kret) {
  243         case KERN_SUCCESS:
  244                 return (0);
  245         case KERN_INVALID_ADDRESS:
  246         case KERN_NO_SPACE:
  247                 return (ENOMEM);
  248         case KERN_PROTECTION_FAILURE:
  249                 return (EACCES);
  250         default:
  251                 return (EINVAL);
  252         }
  253 }
  254 
  255 int
  256 subyte(
  257         user_addr_t addr,
  258         int byte)
  259 {
  260         char character;
  261         
  262         character = (char)byte;
  263         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
  264 }
  265 
  266 int
  267 suibyte(
  268         user_addr_t addr,
  269         int byte)
  270 {
  271         char character;
  272         
  273         character = (char)byte;
  274         return (copyout((void *)&(character), addr, sizeof(char)) == 0 ? 0 : -1);
  275 }
  276 
  277 int fubyte(user_addr_t addr)
  278 {
  279         unsigned char byte;
  280 
  281         if (copyin(addr, (void *) &byte, sizeof(char)))
  282                 return(-1);
  283         return(byte);
  284 }
  285 
  286 int fuibyte(user_addr_t addr)
  287 {
  288         unsigned char byte;
  289 
  290         if (copyin(addr, (void *) &(byte), sizeof(char)))
  291                 return(-1);
  292         return(byte);
  293 }
  294 
  295 int
  296 suword(
  297         user_addr_t addr,
  298         long word)
  299 {
  300         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
  301 }
  302 
  303 long fuword(user_addr_t addr)
  304 {
  305         long word = 0;
  306 
  307         if (copyin(addr, (void *) &word, sizeof(int)))
  308                 return(-1);
  309         return(word);
  310 }
  311 
  312 /* suiword and fuiword are the same as suword and fuword, respectively */
  313 
  314 int
  315 suiword(
  316         user_addr_t addr,
  317         long word)
  318 {
  319         return (copyout((void *) &word, addr, sizeof(int)) == 0 ? 0 : -1);
  320 }
  321 
  322 long fuiword(user_addr_t addr)
  323 {
  324         long word = 0;
  325 
  326         if (copyin(addr, (void *) &word, sizeof(int)))
  327                 return(-1);
  328         return(word);
  329 }
  330 
  331 /*
  332  * With a 32-bit kernel and mixed 32/64-bit user tasks, this interface allows the
  333  * fetching and setting of process-sized size_t and pointer values.
  334  */
  335 int
  336 sulong(user_addr_t addr, int64_t word)
  337 {
  338 
  339         if (IS_64BIT_PROCESS(current_proc())) {
  340                 return(copyout((void *)&word, addr, sizeof(word)) == 0 ? 0 : -1);
  341         } else {
  342                 return(suiword(addr, (long)word));
  343         }
  344 }
  345 
  346 int64_t
  347 fulong(user_addr_t addr)
  348 {
  349         int64_t longword;
  350 
  351         if (IS_64BIT_PROCESS(current_proc())) {
  352                 if (copyin(addr, (void *)&longword, sizeof(longword)) != 0)
  353                         return(-1);
  354                 return(longword);
  355         } else {
  356                 return((int64_t)fuiword(addr));
  357         }
  358 }
  359 
  360 int
  361 suulong(user_addr_t addr, uint64_t uword)
  362 {
  363 
  364         if (IS_64BIT_PROCESS(current_proc())) {
  365                 return(copyout((void *)&uword, addr, sizeof(uword)) == 0 ? 0 : -1);
  366         } else {
  367                 return(suiword(addr, (uint32_t)uword));
  368         }
  369 }
  370 
  371 uint64_t
  372 fuulong(user_addr_t addr)
  373 {
  374         uint64_t ulongword;
  375 
  376         if (IS_64BIT_PROCESS(current_proc())) {
  377                 if (copyin(addr, (void *)&ulongword, sizeof(ulongword)) != 0)
  378                         return(-1ULL);
  379                 return(ulongword);
  380         } else {
  381                 return((uint64_t)fuiword(addr));
  382         }
  383 }
  384 
  385 int
  386 swapon(__unused proc_t procp, __unused struct swapon_args *uap, __unused int *retval)
  387 {
  388         return(ENOTSUP);
  389 }
  390 
  391 /*
  392  * pid_for_task
  393  *
  394  * Find the BSD process ID for the Mach task associated with the given Mach port 
  395  * name
  396  *
  397  * Parameters:  args            User argument descriptor (see below)
  398  *
  399  * Indirect parameters: args->t         Mach port name
  400  *                      args->pid       Process ID (returned value; see below)
  401  *
  402  * Returns:     KERL_SUCCESS    Success
  403  *              KERN_FAILURE    Not success           
  404  *
  405  * Implicit returns: args->pid          Process ID
  406  *
  407  */
  408 kern_return_t
  409 pid_for_task(
  410         struct pid_for_task_args *args)
  411 {
  412         mach_port_name_t        t = args->t;
  413         user_addr_t             pid_addr  = args->pid;  
  414         proc_t p;
  415         task_t          t1;
  416         int     pid = -1;
  417         kern_return_t   err = KERN_SUCCESS;
  418 
  419         AUDIT_MACH_SYSCALL_ENTER(AUE_PIDFORTASK);
  420         AUDIT_ARG(mach_port1, t);
  421 
  422         t1 = port_name_to_task(t);
  423 
  424         if (t1 == TASK_NULL) {
  425                 err = KERN_FAILURE;
  426                 goto pftout;
  427         } else {
  428                 p = get_bsdtask_info(t1);
  429                 if (p) {
  430                         pid  = proc_pid(p);
  431                         err = KERN_SUCCESS;
  432                 } else {
  433                         err = KERN_FAILURE;
  434                 }
  435         }
  436         task_deallocate(t1);
  437 pftout:
  438         AUDIT_ARG(pid, pid);
  439         (void) copyout((char *) &pid, pid_addr, sizeof(int));
  440         AUDIT_MACH_SYSCALL_EXIT(err);
  441         return(err);
  442 }
  443 
  444 /* 
  445  *
  446  * tfp_policy = KERN_TFP_POLICY_DENY; Deny Mode: None allowed except for self
  447  * tfp_policy = KERN_TFP_POLICY_DEFAULT; default mode: all posix checks and upcall via task port for authentication
  448  *
  449  */
  450 static  int tfp_policy = KERN_TFP_POLICY_DEFAULT;
  451 
  452 /*
  453  *      Routine:        task_for_pid_posix_check
  454  *      Purpose:
  455  *                      Verify that the current process should be allowed to
  456  *                      get the target process's task port. This is only 
  457  *                      permitted if:
  458  *                      - The current process is root
  459  *                      OR all of the following are true:
  460  *                      - The target process's real, effective, and saved uids
  461  *                        are the same as the current proc's euid,
  462  *                      - The target process's group set is a subset of the
  463  *                        calling process's group set, and
  464  *                      - The target process hasn't switched credentials.
  465  *
  466  *      Returns:        TRUE: permitted
  467  *                      FALSE: denied
  468  */
  469 static int
  470 task_for_pid_posix_check(proc_t target)
  471 {
  472         kauth_cred_t targetcred, mycred;
  473         uid_t myuid;
  474         int allowed; 
  475 
  476         /* No task_for_pid on bad targets */
  477         if (target == PROC_NULL || target->p_stat == SZOMB) {
  478                 return FALSE;
  479         }
  480 
  481         mycred = kauth_cred_get();
  482         myuid = kauth_cred_getuid(mycred);
  483 
  484         /* If we're running as root, the check passes */
  485         if (kauth_cred_issuser(mycred))
  486                 return TRUE;
  487 
  488         /* We're allowed to get our own task port */
  489         if (target == current_proc())
  490                 return TRUE;
  491 
  492         /* 
  493          * Under DENY, only root can get another proc's task port,
  494          * so no more checks are needed.
  495          */
  496         if (tfp_policy == KERN_TFP_POLICY_DENY) { 
  497                 return FALSE;
  498         }
  499 
  500         targetcred = kauth_cred_proc_ref(target);
  501         allowed = TRUE;
  502 
  503         /* Do target's ruid, euid, and saved uid match my euid? */
  504         if ((kauth_cred_getuid(targetcred) != myuid) || 
  505                         (kauth_cred_getruid(targetcred) != myuid) ||
  506                         (kauth_cred_getsvuid(targetcred) != myuid)) {
  507                 allowed = FALSE;
  508                 goto out;
  509         }
  510 
  511         /* Are target's groups a subset of my groups? */
  512         if (kauth_cred_gid_subset(targetcred, mycred, &allowed) ||
  513                         allowed == 0) {
  514                 allowed = FALSE;
  515                 goto out;
  516         }
  517 
  518         /* Has target switched credentials? */
  519         if (target->p_flag & P_SUGID) {
  520                 allowed = FALSE;
  521                 goto out;
  522         }
  523         
  524 out:
  525         kauth_cred_unref(&targetcred);
  526         return allowed;
  527 }
  528 
  529 /*
  530  *      Routine:        task_for_pid
  531  *      Purpose:
  532  *              Get the task port for another "process", named by its
  533  *              process ID on the same host as "target_task".
  534  *
  535  *              Only permitted to privileged processes, or processes
  536  *              with the same user ID.
  537  *
  538  *              Note: if pid == 0, an error is return no matter who is calling.
  539  *
  540  * XXX This should be a BSD system call, not a Mach trap!!!
  541  */
  542 kern_return_t
  543 task_for_pid(
  544         struct task_for_pid_args *args)
  545 {
  546         mach_port_name_t        target_tport = args->target_tport;
  547         int                     pid = args->pid;
  548         user_addr_t             task_addr = args->t;
  549         proc_t                  p = PROC_NULL;
  550         task_t                  t1 = TASK_NULL;
  551         mach_port_name_t        tret = MACH_PORT_NULL;
  552         ipc_port_t              tfpport;
  553         void * sright;
  554         int error = 0;
  555 
  556         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKFORPID);
  557         AUDIT_ARG(pid, pid);
  558         AUDIT_ARG(mach_port1, target_tport);
  559 
  560         /* Always check if pid == 0 */
  561         if (pid == 0) {
  562                 (void ) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
  563                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
  564                 return(KERN_FAILURE);
  565         }
  566 
  567         t1 = port_name_to_task(target_tport);
  568         if (t1 == TASK_NULL) {
  569                 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
  570                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
  571                 return(KERN_FAILURE);
  572         } 
  573 
  574 
  575         p = proc_find(pid);
  576 #if CONFIG_AUDIT
  577         if (p != PROC_NULL)
  578                 AUDIT_ARG(process, p);
  579 #endif
  580 
  581         if (!(task_for_pid_posix_check(p))) {
  582                 error = KERN_FAILURE;
  583                 goto tfpout;
  584         }
  585 
  586         if (p->task != TASK_NULL) {
  587                 /* If we aren't root and target's task access port is set... */
  588                 if (!kauth_cred_issuser(kauth_cred_get()) &&
  589                         p != current_proc() &&
  590                         (task_get_task_access_port(p->task, &tfpport) == 0) &&
  591                         (tfpport != IPC_PORT_NULL)) {
  592 
  593                         if (tfpport == IPC_PORT_DEAD) {
  594                                 error = KERN_PROTECTION_FAILURE;
  595                                 goto tfpout;
  596                         }
  597 
  598                         /* Call up to the task access server */
  599                         error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
  600 
  601                         if (error != MACH_MSG_SUCCESS) {
  602                                 if (error == MACH_RCV_INTERRUPTED)
  603                                         error = KERN_ABORTED;
  604                                 else
  605                                         error = KERN_FAILURE;
  606                                 goto tfpout;
  607                         }
  608                 }
  609 #if CONFIG_MACF
  610                 error = mac_proc_check_get_task(kauth_cred_get(), p);
  611                 if (error) {
  612                         error = KERN_FAILURE;
  613                         goto tfpout;
  614                 }
  615 #endif
  616 
  617                 /* Grant task port access */
  618                 task_reference(p->task);
  619                 extmod_statistics_incr_task_for_pid(p->task);
  620 
  621                 sright = (void *) convert_task_to_port(p->task);
  622                 tret = ipc_port_copyout_send(
  623                                 sright, 
  624                                 get_task_ipcspace(current_task()));
  625         } 
  626         error = KERN_SUCCESS;
  627 
  628 tfpout:
  629         task_deallocate(t1);
  630         AUDIT_ARG(mach_port2, tret);
  631         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
  632         if (p != PROC_NULL)
  633                 proc_rele(p);
  634         AUDIT_MACH_SYSCALL_EXIT(error);
  635         return(error);
  636 }
  637 
  638 /*
  639  *      Routine:        task_name_for_pid
  640  *      Purpose:
  641  *              Get the task name port for another "process", named by its
  642  *              process ID on the same host as "target_task".
  643  *
  644  *              Only permitted to privileged processes, or processes
  645  *              with the same user ID.
  646  *
  647  * XXX This should be a BSD system call, not a Mach trap!!!
  648  */
  649 
  650 kern_return_t
  651 task_name_for_pid(
  652         struct task_name_for_pid_args *args)
  653 {
  654         mach_port_name_t        target_tport = args->target_tport;
  655         int                     pid = args->pid;
  656         user_addr_t             task_addr = args->t;
  657         proc_t          p = PROC_NULL;
  658         task_t          t1;
  659         mach_port_name_t        tret;
  660         void * sright;
  661         int error = 0, refheld = 0;
  662         kauth_cred_t target_cred;
  663 
  664         AUDIT_MACH_SYSCALL_ENTER(AUE_TASKNAMEFORPID);
  665         AUDIT_ARG(pid, pid);
  666         AUDIT_ARG(mach_port1, target_tport);
  667 
  668         t1 = port_name_to_task(target_tport);
  669         if (t1 == TASK_NULL) {
  670                 (void) copyout((char *)&t1, task_addr, sizeof(mach_port_name_t));
  671                 AUDIT_MACH_SYSCALL_EXIT(KERN_FAILURE);
  672                 return(KERN_FAILURE);
  673         } 
  674 
  675         p = proc_find(pid);
  676         if (p != PROC_NULL) {
  677                 AUDIT_ARG(process, p);
  678                 target_cred = kauth_cred_proc_ref(p);
  679                 refheld = 1;
  680 
  681                 if ((p->p_stat != SZOMB)
  682                     && ((current_proc() == p)
  683                         || kauth_cred_issuser(kauth_cred_get()) 
  684                         || ((kauth_cred_getuid(target_cred) == kauth_cred_getuid(kauth_cred_get())) && 
  685                             ((kauth_cred_getruid(target_cred) == kauth_getruid()))))) {
  686 
  687                         if (p->task != TASK_NULL) {
  688                                 task_reference(p->task);
  689 #if CONFIG_MACF
  690                                 error = mac_proc_check_get_task_name(kauth_cred_get(),  p);
  691                                 if (error) {
  692                                         task_deallocate(p->task);
  693                                         goto noperm;
  694                                 }
  695 #endif
  696                                 sright = (void *)convert_task_name_to_port(p->task);
  697                                 tret = ipc_port_copyout_send(sright, 
  698                                                 get_task_ipcspace(current_task()));
  699                         } else
  700                                 tret  = MACH_PORT_NULL;
  701 
  702                         AUDIT_ARG(mach_port2, tret);
  703                         (void) copyout((char *)&tret, task_addr, sizeof(mach_port_name_t));
  704                         task_deallocate(t1);
  705                         error = KERN_SUCCESS;
  706                         goto tnfpout;
  707                 }
  708         }
  709 
  710 #if CONFIG_MACF
  711 noperm:
  712 #endif
  713     task_deallocate(t1);
  714         tret = MACH_PORT_NULL;
  715         (void) copyout((char *) &tret, task_addr, sizeof(mach_port_name_t));
  716         error = KERN_FAILURE;
  717 tnfpout:
  718         if (refheld != 0)
  719                 kauth_cred_unref(&target_cred);
  720         if (p != PROC_NULL)
  721                 proc_rele(p);
  722         AUDIT_MACH_SYSCALL_EXIT(error);
  723         return(error);
  724 }
  725 
  726 kern_return_t
  727 pid_suspend(struct proc *p __unused, struct pid_suspend_args *args, int *ret)
  728 {
  729         task_t  target = NULL;
  730         proc_t  targetproc = PROC_NULL;
  731         int     pid = args->pid;
  732         int     error = 0;
  733 
  734 #if CONFIG_MACF
  735         error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SUSPEND);
  736         if (error) {
  737                 error = EPERM;
  738                 goto out;
  739         }
  740 #endif
  741 
  742         if (pid == 0) {
  743                 error = EPERM;
  744                 goto out;
  745         }
  746 
  747         targetproc = proc_find(pid);
  748         if (!task_for_pid_posix_check(targetproc)) {
  749                 error = EPERM;
  750                 goto out;
  751         }
  752 
  753         target = targetproc->task;
  754 #ifndef CONFIG_EMBEDDED
  755         if (target != TASK_NULL) {
  756                 mach_port_t tfpport;
  757 
  758                 /* If we aren't root and target's task access port is set... */
  759                 if (!kauth_cred_issuser(kauth_cred_get()) &&
  760                         targetproc != current_proc() &&
  761                         (task_get_task_access_port(target, &tfpport) == 0) &&
  762                         (tfpport != IPC_PORT_NULL)) {
  763 
  764                         if (tfpport == IPC_PORT_DEAD) {
  765                                 error = EACCES;
  766                                 goto out;
  767                         }
  768 
  769                         /* Call up to the task access server */
  770                         error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
  771 
  772                         if (error != MACH_MSG_SUCCESS) {
  773                                 if (error == MACH_RCV_INTERRUPTED)
  774                                         error = EINTR;
  775                                 else
  776                                         error = EPERM;
  777                                 goto out;
  778                         }
  779                 }
  780         }
  781 #endif
  782 
  783         task_reference(target);
  784         error = task_suspend(target);
  785         if (error) {
  786                 if (error == KERN_INVALID_ARGUMENT) {
  787                         error = EINVAL;
  788                 } else {
  789                         error = EPERM;
  790                 }
  791         }
  792         task_deallocate(target);
  793 
  794 #if CONFIG_FREEZE
  795         kern_hibernation_on_pid_suspend(pid);
  796 #endif
  797 
  798 out:
  799         if (targetproc != PROC_NULL)
  800                 proc_rele(targetproc);
  801         *ret = error;
  802         return error;
  803 }
  804 
  805 kern_return_t
  806 pid_resume(struct proc *p __unused, struct pid_resume_args *args, int *ret)
  807 {
  808         task_t  target = NULL;
  809         proc_t  targetproc = PROC_NULL;
  810         int     pid = args->pid;
  811         int     error = 0;
  812 
  813 #if CONFIG_MACF
  814         error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_RESUME);
  815         if (error) {
  816                 error = EPERM;
  817                 goto out;
  818         }
  819 #endif
  820 
  821         if (pid == 0) {
  822                 error = EPERM;
  823                 goto out;
  824         }
  825 
  826         targetproc = proc_find(pid);
  827         if (!task_for_pid_posix_check(targetproc)) {
  828                 error = EPERM;
  829                 goto out;
  830         }
  831 
  832         target = targetproc->task;
  833 #ifndef CONFIG_EMBEDDED
  834         if (target != TASK_NULL) {
  835                 mach_port_t tfpport;
  836 
  837                 /* If we aren't root and target's task access port is set... */
  838                 if (!kauth_cred_issuser(kauth_cred_get()) &&
  839                         targetproc != current_proc() &&
  840                         (task_get_task_access_port(target, &tfpport) == 0) &&
  841                         (tfpport != IPC_PORT_NULL)) {
  842 
  843                         if (tfpport == IPC_PORT_DEAD) {
  844                                 error = EACCES;
  845                                 goto out;
  846                         }
  847 
  848                         /* Call up to the task access server */
  849                         error = check_task_access(tfpport, proc_selfpid(), kauth_getgid(), pid);
  850 
  851                         if (error != MACH_MSG_SUCCESS) {
  852                                 if (error == MACH_RCV_INTERRUPTED)
  853                                         error = EINTR;
  854                                 else
  855                                         error = EPERM;
  856                                 goto out;
  857                         }
  858                 }
  859         }
  860 #endif
  861 
  862         task_reference(target);
  863 
  864 #if CONFIG_FREEZE
  865         kern_hibernation_on_pid_resume(pid, target);
  866 #endif
  867 
  868         error = task_resume(target);
  869         if (error) {
  870                 if (error == KERN_INVALID_ARGUMENT) {
  871                         error = EINVAL;
  872                 } else {
  873                         error = EPERM;
  874                 }
  875         }
  876         task_deallocate(target);
  877 
  878 out:
  879         if (targetproc != PROC_NULL)
  880                 proc_rele(targetproc);
  881         *ret = error;
  882         return error;
  883 
  884         return 0;
  885 }
  886 
  887 #if CONFIG_EMBEDDED
  888 kern_return_t
  889 pid_hibernate(struct proc *p __unused, struct pid_hibernate_args *args, int *ret)
  890 {
  891         int     error = 0;
  892         proc_t  targetproc = PROC_NULL;
  893         int     pid = args->pid;
  894 
  895 #ifndef CONFIG_FREEZE
  896         #pragma unused(pid)
  897 #else
  898 
  899 #if CONFIG_MACF
  900         error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_HIBERNATE);
  901         if (error) {
  902                 error = EPERM;
  903                 goto out;
  904         }
  905 #endif
  906 
  907         /*
  908          * The only accepted pid value here is currently -1, since we just kick off the hibernation thread
  909          * here - individual ids aren't required. However, it's intended that that this call is to change
  910          * in the future to initiate hibernation of individual processes. In anticipation, we'll obtain the
  911          * process handle for potentially valid values and call task_for_pid_posix_check(); this way, everything
  912          * is validated correctly and set for further refactoring. See <rdar://problem/7839708> for more details.
  913          */
  914         if (pid >= 0) {
  915                 targetproc = proc_find(pid);
  916                 if (!task_for_pid_posix_check(targetproc)) {
  917                         error = EPERM;
  918                         goto out;
  919                 }
  920         }
  921 
  922         if (pid == -1) {
  923                 kern_hibernation_on_pid_hibernate(pid);
  924         } else {
  925                 error = EPERM;
  926         }
  927 
  928 out:
  929 
  930 #endif /* CONFIG_FREEZE */
  931 
  932         if (targetproc != PROC_NULL)
  933                 proc_rele(targetproc);
  934         *ret = error;
  935         return error;
  936 }
  937 
  938 int
  939 pid_shutdown_sockets(struct proc *p __unused, struct pid_shutdown_sockets_args *args, int *ret)
  940 {
  941         int                             error = 0;
  942         proc_t                          targetproc = PROC_NULL;
  943         struct filedesc         *fdp;
  944         struct fileproc         *fp;
  945         int                             pid = args->pid;
  946         int                                     level = args->level;
  947         int                                     i;
  948 
  949         if (level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_SVC &&
  950                 level != SHUTDOWN_SOCKET_LEVEL_DISCONNECT_ALL)
  951         {
  952                 error = EINVAL;
  953                 goto out;
  954         }
  955 
  956 #if CONFIG_MACF
  957         error = mac_proc_check_suspend_resume(p, MAC_PROC_CHECK_SHUTDOWN_SOCKETS);
  958         if (error) {
  959                 error = EPERM;
  960                 goto out;
  961         }
  962 #endif
  963 
  964         targetproc = proc_find(pid);
  965         if (!task_for_pid_posix_check(targetproc)) {
  966                 error = EPERM;
  967                 goto out;
  968         }
  969 
  970         proc_fdlock(targetproc);
  971         fdp = targetproc->p_fd;
  972 
  973         for (i = 0; i < fdp->fd_nfiles; i++) {
  974                 struct socket *sockp;
  975 
  976                 fp = fdp->fd_ofiles[i];
  977                 if (fp == NULL || (fdp->fd_ofileflags[i] & UF_RESERVED) != 0 ||
  978                         fp->f_fglob->fg_type != DTYPE_SOCKET)
  979                 {
  980                         continue;
  981                 }
  982 
  983                 sockp = (struct socket *)fp->f_fglob->fg_data;
  984 
  985                 /* Call networking stack with socket and level */
  986                 (void) socket_defunct(targetproc, sockp, level);
  987         }
  988 
  989         proc_fdunlock(targetproc);
  990 
  991 out:
  992         if (targetproc != PROC_NULL)
  993                 proc_rele(targetproc);
  994         *ret = error;
  995         return error;
  996 }
  997 #endif /* CONFIG_EMBEDDED */
  998 
  999 static int
 1000 sysctl_settfp_policy(__unused struct sysctl_oid *oidp, void *arg1,
 1001     __unused int arg2, struct sysctl_req *req)
 1002 {
 1003     int error = 0;
 1004         int new_value;
 1005 
 1006     error = SYSCTL_OUT(req, arg1, sizeof(int));
 1007     if (error || req->newptr == USER_ADDR_NULL)
 1008         return(error);
 1009 
 1010         if (!is_suser())
 1011                 return(EPERM);
 1012 
 1013         if ((error = SYSCTL_IN(req, &new_value, sizeof(int)))) {
 1014                 goto out;
 1015         }
 1016         if ((new_value == KERN_TFP_POLICY_DENY) 
 1017                 || (new_value == KERN_TFP_POLICY_DEFAULT))
 1018                         tfp_policy = new_value;
 1019         else
 1020                         error = EINVAL;         
 1021 out:
 1022     return(error);
 1023 
 1024 }
 1025 
 1026 #if defined(SECURE_KERNEL)
 1027 static int kern_secure_kernel = 1;
 1028 #else
 1029 static int kern_secure_kernel = 0;
 1030 #endif
 1031 
 1032 SYSCTL_INT(_kern, OID_AUTO, secure_kernel, CTLFLAG_RD | CTLFLAG_LOCKED, &kern_secure_kernel, 0, "");
 1033 
 1034 SYSCTL_NODE(_kern, KERN_TFP, tfp, CTLFLAG_RW | CTLFLAG_LOCKED, 0, "tfp");
 1035 SYSCTL_PROC(_kern_tfp, KERN_TFP_POLICY, policy, CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_LOCKED,
 1036     &tfp_policy, sizeof(uint32_t), &sysctl_settfp_policy ,"I","policy");
 1037 
 1038 SYSCTL_INT(_vm, OID_AUTO, shared_region_trace_level, CTLFLAG_RW | CTLFLAG_LOCKED,
 1039            &shared_region_trace_level, 0, "");
 1040 SYSCTL_INT(_vm, OID_AUTO, shared_region_version, CTLFLAG_RD | CTLFLAG_LOCKED,
 1041            &shared_region_version, 0, "");
 1042 SYSCTL_INT(_vm, OID_AUTO, shared_region_persistence, CTLFLAG_RW | CTLFLAG_LOCKED,
 1043            &shared_region_persistence, 0, "");
 1044 
 1045 /*
 1046  * shared_region_check_np:
 1047  *
 1048  * This system call is intended for dyld.
 1049  *
 1050  * dyld calls this when any process starts to see if the process's shared
 1051  * region is already set up and ready to use.
 1052  * This call returns the base address of the first mapping in the
 1053  * process's shared region's first mapping.
 1054  * dyld will then check what's mapped at that address.
 1055  *
 1056  * If the shared region is empty, dyld will then attempt to map the shared
 1057  * cache file in the shared region via the shared_region_map_np() system call.
 1058  *
 1059  * If something's already mapped in the shared region, dyld will check if it
 1060  * matches the shared cache it would like to use for that process.
 1061  * If it matches, evrything's ready and the process can proceed and use the
 1062  * shared region.
 1063  * If it doesn't match, dyld will unmap the shared region and map the shared
 1064  * cache into the process's address space via mmap().
 1065  *
 1066  * ERROR VALUES
 1067  * EINVAL       no shared region
 1068  * ENOMEM       shared region is empty
 1069  * EFAULT       bad address for "start_address"
 1070  */
 1071 int
 1072 shared_region_check_np(
 1073         __unused struct proc                    *p,
 1074         struct shared_region_check_np_args      *uap,
 1075         __unused int                            *retvalp)
 1076 {
 1077         vm_shared_region_t      shared_region;
 1078         mach_vm_offset_t        start_address;
 1079         int                     error;
 1080         kern_return_t           kr;
 1081 
 1082         SHARED_REGION_TRACE_DEBUG(
 1083                 ("shared_region: %p [%d(%s)] -> check_np(0x%llx)\n",
 1084                  current_thread(), p->p_pid, p->p_comm,
 1085                  (uint64_t)uap->start_address));
 1086 
 1087         /* retrieve the current tasks's shared region */
 1088         shared_region = vm_shared_region_get(current_task());
 1089         if (shared_region != NULL) {
 1090                 /* retrieve address of its first mapping... */
 1091                 kr = vm_shared_region_start_address(shared_region,
 1092                                                     &start_address);
 1093                 if (kr != KERN_SUCCESS) {
 1094                         error = ENOMEM;
 1095                 } else {
 1096                         /* ... and give it to the caller */
 1097                         error = copyout(&start_address,
 1098                                         (user_addr_t) uap->start_address,
 1099                                         sizeof (start_address));
 1100                         if (error) {
 1101                                 SHARED_REGION_TRACE_ERROR(
 1102                                         ("shared_region: %p [%d(%s)] "
 1103                                          "check_np(0x%llx) "
 1104                                          "copyout(0x%llx) error %d\n",
 1105                                          current_thread(), p->p_pid, p->p_comm,
 1106                                          (uint64_t)uap->start_address, (uint64_t)start_address,
 1107                                          error));
 1108                         }
 1109                 }
 1110                 vm_shared_region_deallocate(shared_region);
 1111         } else {
 1112                 /* no shared region ! */
 1113                 error = EINVAL;
 1114         }
 1115 
 1116         SHARED_REGION_TRACE_DEBUG(
 1117                 ("shared_region: %p [%d(%s)] check_np(0x%llx) <- 0x%llx %d\n",
 1118                  current_thread(), p->p_pid, p->p_comm,
 1119                  (uint64_t)uap->start_address, (uint64_t)start_address, error));
 1120 
 1121         return error;
 1122 }
 1123 
 1124 
 1125 int
 1126 shared_region_copyin_mappings(
 1127                 struct proc                     *p,
 1128                 user_addr_t                     user_mappings,
 1129                 unsigned int                    mappings_count,
 1130                 struct shared_file_mapping_np   *mappings)
 1131 {
 1132         int             error = 0;
 1133         vm_size_t       mappings_size = 0;
 1134 
 1135         /* get the list of mappings the caller wants us to establish */
 1136         mappings_size = (vm_size_t) (mappings_count * sizeof (mappings[0]));
 1137         error = copyin(user_mappings,
 1138                        mappings,
 1139                        mappings_size);
 1140         if (error) {
 1141                 SHARED_REGION_TRACE_ERROR(
 1142                         ("shared_region: %p [%d(%s)] map(): "
 1143                          "copyin(0x%llx, %d) failed (error=%d)\n",
 1144                          current_thread(), p->p_pid, p->p_comm,
 1145                          (uint64_t)user_mappings, mappings_count, error));
 1146         }
 1147         return error;
 1148 }
 1149 /*
 1150  * shared_region_map_np()
 1151  *
 1152  * This system call is intended for dyld.
 1153  *
 1154  * dyld uses this to map a shared cache file into a shared region.
 1155  * This is usually done only the first time a shared cache is needed.
 1156  * Subsequent processes will just use the populated shared region without
 1157  * requiring any further setup.
 1158  */
 1159 int
 1160 _shared_region_map(
 1161         struct proc                             *p,
 1162         int                                     fd,
 1163         uint32_t                                mappings_count,
 1164         struct shared_file_mapping_np           *mappings,
 1165         memory_object_control_t                 *sr_file_control,
 1166         struct shared_file_mapping_np           *mapping_to_slide)
 1167 {
 1168         int                             error;
 1169         kern_return_t                   kr;
 1170         struct fileproc                 *fp;
 1171         struct vnode                    *vp, *root_vp;
 1172         struct vnode_attr               va;
 1173         off_t                           fs;
 1174         memory_object_size_t            file_size;
 1175         vm_prot_t                       maxprot = VM_PROT_ALL;
 1176         memory_object_control_t         file_control;
 1177         struct vm_shared_region         *shared_region;
 1178 
 1179         SHARED_REGION_TRACE_DEBUG(
 1180                 ("shared_region: %p [%d(%s)] -> map\n",
 1181                  current_thread(), p->p_pid, p->p_comm));
 1182 
 1183         shared_region = NULL;
 1184         fp = NULL;
 1185         vp = NULL;
 1186 
 1187         /* get file structure from file descriptor */
 1188         error = fp_lookup(p, fd, &fp, 0);
 1189         if (error) {
 1190                 SHARED_REGION_TRACE_ERROR(
 1191                         ("shared_region: %p [%d(%s)] map: "
 1192                          "fd=%d lookup failed (error=%d)\n",
 1193                          current_thread(), p->p_pid, p->p_comm, fd, error));
 1194                 goto done;
 1195         }
 1196 
 1197         /* make sure we're attempting to map a vnode */
 1198         if (fp->f_fglob->fg_type != DTYPE_VNODE) {
 1199                 SHARED_REGION_TRACE_ERROR(
 1200                         ("shared_region: %p [%d(%s)] map: "
 1201                          "fd=%d not a vnode (type=%d)\n",
 1202                          current_thread(), p->p_pid, p->p_comm,
 1203                          fd, fp->f_fglob->fg_type));
 1204                 error = EINVAL;
 1205                 goto done;
 1206         }
 1207 
 1208         /* we need at least read permission on the file */
 1209         if (! (fp->f_fglob->fg_flag & FREAD)) {
 1210                 SHARED_REGION_TRACE_ERROR(
 1211                         ("shared_region: %p [%d(%s)] map: "
 1212                          "fd=%d not readable\n",
 1213                          current_thread(), p->p_pid, p->p_comm, fd));
 1214                 error = EPERM;
 1215                 goto done;
 1216         }
 1217 
 1218         /* get vnode from file structure */
 1219         error = vnode_getwithref((vnode_t) fp->f_fglob->fg_data);
 1220         if (error) {
 1221                 SHARED_REGION_TRACE_ERROR(
 1222                         ("shared_region: %p [%d(%s)] map: "
 1223                          "fd=%d getwithref failed (error=%d)\n",
 1224                          current_thread(), p->p_pid, p->p_comm, fd, error));
 1225                 goto done;
 1226         }
 1227         vp = (struct vnode *) fp->f_fglob->fg_data;
 1228 
 1229         /* make sure the vnode is a regular file */
 1230         if (vp->v_type != VREG) {
 1231                 SHARED_REGION_TRACE_ERROR(
 1232                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 1233                          "not a file (type=%d)\n",
 1234                          current_thread(), p->p_pid, p->p_comm,
 1235                          vp, vp->v_name, vp->v_type));
 1236                 error = EINVAL;
 1237                 goto done;
 1238         }
 1239 
 1240 #if CONFIG_MACF
 1241         error = mac_file_check_mmap(vfs_context_ucred(vfs_context_current()),
 1242                         fp->f_fglob, VM_PROT_ALL, MAP_FILE, &maxprot);
 1243         if (error) {
 1244                 goto done;
 1245         }
 1246 #endif /* MAC */
 1247 
 1248 #if CONFIG_PROTECT
 1249         /* check for content protection access */
 1250         {
 1251         void *cnode;
 1252         if ((cnode = cp_get_protected_cnode(vp)) != NULL) {
 1253                 error = cp_handle_vnop(cnode, CP_READ_ACCESS | CP_WRITE_ACCESS);
 1254                 if (error) 
 1255                         goto done;
 1256         }
 1257         }
 1258 #endif /* CONFIG_PROTECT */
 1259 
 1260         /* make sure vnode is on the process's root volume */
 1261         root_vp = p->p_fd->fd_rdir;
 1262         if (root_vp == NULL) {
 1263                 root_vp = rootvnode;
 1264         } else {
 1265                 /*
 1266                  * Chroot-ed processes can't use the shared_region.
 1267                  */
 1268                 error = EINVAL;
 1269                 goto done;
 1270         }
 1271 
 1272         if (vp->v_mount != root_vp->v_mount) {
 1273                 SHARED_REGION_TRACE_ERROR(
 1274                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 1275                          "not on process's root volume\n",
 1276                          current_thread(), p->p_pid, p->p_comm,
 1277                          vp, vp->v_name));
 1278                 error = EPERM;
 1279                 goto done;
 1280         }
 1281 
 1282         /* make sure vnode is owned by "root" */
 1283         VATTR_INIT(&va);
 1284         VATTR_WANTED(&va, va_uid);
 1285         error = vnode_getattr(vp, &va, vfs_context_current());
 1286         if (error) {
 1287                 SHARED_REGION_TRACE_ERROR(
 1288                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 1289                          "vnode_getattr(%p) failed (error=%d)\n",
 1290                          current_thread(), p->p_pid, p->p_comm,
 1291                          vp, vp->v_name, vp, error));
 1292                 goto done;
 1293         }
 1294         if (va.va_uid != 0) {
 1295                 SHARED_REGION_TRACE_ERROR(
 1296                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 1297                          "owned by uid=%d instead of 0\n",
 1298                          current_thread(), p->p_pid, p->p_comm,
 1299                          vp, vp->v_name, va.va_uid));
 1300                 error = EPERM;
 1301                 goto done;
 1302         }
 1303 
 1304         /* get vnode size */
 1305         error = vnode_size(vp, &fs, vfs_context_current());
 1306         if (error) {
 1307                 SHARED_REGION_TRACE_ERROR(
 1308                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 1309                          "vnode_size(%p) failed (error=%d)\n",
 1310                          current_thread(), p->p_pid, p->p_comm,
 1311                          vp, vp->v_name, vp, error));
 1312                 goto done;
 1313         }
 1314         file_size = fs;
 1315 
 1316         /* get the file's memory object handle */
 1317         file_control = ubc_getobject(vp, UBC_HOLDOBJECT);
 1318         if (file_control == MEMORY_OBJECT_CONTROL_NULL) {
 1319                 SHARED_REGION_TRACE_ERROR(
 1320                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 1321                          "no memory object\n",
 1322                          current_thread(), p->p_pid, p->p_comm,
 1323                          vp, vp->v_name));
 1324                 error = EINVAL;
 1325                 goto done;
 1326         }
 1327 
 1328         if (sr_file_control != NULL) {
 1329                 *sr_file_control = file_control;
 1330         }
 1331                          
 1332 
 1333 
 1334         /* get the process's shared region (setup in vm_map_exec()) */
 1335         shared_region = vm_shared_region_get(current_task());
 1336         if (shared_region == NULL) {
 1337                 SHARED_REGION_TRACE_ERROR(
 1338                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 1339                          "no shared region\n",
 1340                          current_thread(), p->p_pid, p->p_comm,
 1341                          vp, vp->v_name));
 1342                 goto done;
 1343         }
 1344 
 1345         /* map the file into that shared region's submap */
 1346         kr = vm_shared_region_map_file(shared_region,
 1347                                        mappings_count,
 1348                                        mappings,
 1349                                        file_control,
 1350                                        file_size,
 1351                                        (void *) p->p_fd->fd_rdir,
 1352                                        mapping_to_slide);
 1353         if (kr != KERN_SUCCESS) {
 1354                 SHARED_REGION_TRACE_ERROR(
 1355                         ("shared_region: %p [%d(%s)] map(%p:'%s'): "
 1356                          "vm_shared_region_map_file() failed kr=0x%x\n",
 1357                          current_thread(), p->p_pid, p->p_comm,
 1358                          vp, vp->v_name, kr));
 1359                 switch (kr) {
 1360                 case KERN_INVALID_ADDRESS:
 1361                         error = EFAULT;
 1362                         break;
 1363                 case KERN_PROTECTION_FAILURE:
 1364                         error = EPERM;
 1365                         break;
 1366                 case KERN_NO_SPACE:
 1367                         error = ENOMEM;
 1368                         break;
 1369                 case KERN_FAILURE:
 1370                 case KERN_INVALID_ARGUMENT:
 1371                 default:
 1372                         error = EINVAL;
 1373                         break;
 1374                 }
 1375                 goto done;
 1376         }
 1377 
 1378         error = 0;
 1379 
 1380         vnode_lock_spin(vp);
 1381 
 1382         vp->v_flag |= VSHARED_DYLD;
 1383 
 1384         vnode_unlock(vp);
 1385 
 1386         /* update the vnode's access time */
 1387         if (! (vnode_vfsvisflags(vp) & MNT_NOATIME)) {
 1388                 VATTR_INIT(&va);
 1389                 nanotime(&va.va_access_time);
 1390                 VATTR_SET_ACTIVE(&va, va_access_time);
 1391                 vnode_setattr(vp, &va, vfs_context_current());
 1392         }
 1393 
 1394         if (p->p_flag & P_NOSHLIB) {
 1395                 /* signal that this process is now using split libraries */
 1396                 OSBitAndAtomic(~((uint32_t)P_NOSHLIB), &p->p_flag);
 1397         }
 1398 
 1399 done:
 1400         if (vp != NULL) {
 1401                 /*
 1402                  * release the vnode...
 1403                  * ubc_map() still holds it for us in the non-error case
 1404                  */
 1405                 (void) vnode_put(vp);
 1406                 vp = NULL;
 1407         }
 1408         if (fp != NULL) {
 1409                 /* release the file descriptor */
 1410                 fp_drop(p, fd, fp, 0);
 1411                 fp = NULL;
 1412         }
 1413 
 1414         if (shared_region != NULL) {
 1415                 vm_shared_region_deallocate(shared_region);
 1416         }
 1417 
 1418         SHARED_REGION_TRACE_DEBUG(
 1419                 ("shared_region: %p [%d(%s)] <- map\n",
 1420                  current_thread(), p->p_pid, p->p_comm));
 1421 
 1422         return error;
 1423 }
 1424 
 1425 int
 1426 _shared_region_slide(uint32_t slide,
 1427                         mach_vm_offset_t        entry_start_address,
 1428                         mach_vm_size_t          entry_size,
 1429                         mach_vm_offset_t        slide_start,
 1430                         mach_vm_size_t          slide_size,
 1431                         memory_object_control_t sr_file_control)
 1432 {
 1433         void *slide_info_entry = NULL;
 1434         int                     error;
 1435 
 1436         if((error = vm_shared_region_slide_init(slide_size, entry_start_address, entry_size, slide, sr_file_control))) {
 1437                 printf("slide_info initialization failed with kr=%d\n", error);
 1438                 goto done;
 1439         }
 1440 
 1441         slide_info_entry = vm_shared_region_get_slide_info_entry();
 1442         if (slide_info_entry == NULL){
 1443                 error = EFAULT;
 1444         } else {        
 1445                 error = copyin(slide_start,
 1446                                slide_info_entry,
 1447                                (vm_size_t)slide_size);
 1448         }
 1449         if (error) {
 1450                 goto done;
 1451         }
 1452  
 1453         if (vm_shared_region_slide_sanity_check() != KERN_SUCCESS) {
 1454                 error = EFAULT; 
 1455                 printf("Sanity Check failed for slide_info\n");
 1456         } else {
 1457 #if DEBUG
 1458                 printf("Succesfully init slide_info with start_address: %p region_size: %ld slide_header_size: %ld\n",
 1459                                 (void*)(uintptr_t)entry_start_address, 
 1460                                 (unsigned long)entry_size, 
 1461                                 (unsigned long)slide_size);
 1462 #endif
 1463         }
 1464 done:
 1465         return error;
 1466 }
 1467 
 1468 int
 1469 shared_region_map_and_slide_np(
 1470         struct proc                             *p,
 1471         struct shared_region_map_and_slide_np_args      *uap,
 1472         __unused int                                    *retvalp)
 1473 {
 1474         struct shared_file_mapping_np   mapping_to_slide;
 1475         struct shared_file_mapping_np   *mappings;
 1476         unsigned int mappings_count = uap->count;
 1477 
 1478         memory_object_control_t         sr_file_control;
 1479         kern_return_t                   kr = KERN_SUCCESS;
 1480         uint32_t                        slide = uap->slide;
 1481         
 1482 #define SFM_MAX_STACK   8
 1483         struct shared_file_mapping_np   stack_mappings[SFM_MAX_STACK];
 1484 
 1485         if ((kr = vm_shared_region_sliding_valid(slide)) != KERN_SUCCESS) {
 1486                 if (kr == KERN_INVALID_ARGUMENT) {
 1487                         /*
 1488                          * This will happen if we request sliding again 
 1489                          * with the same slide value that was used earlier
 1490                          * for the very first sliding. We continue through
 1491                          * to the mapping layer. This is so that we can be
 1492                          * absolutely certain that the same mappings have
 1493                          * been requested.
 1494                          */
 1495                         kr = KERN_SUCCESS;
 1496                 } else {
 1497                         goto done;
 1498                 }
 1499         }
 1500 
 1501         if (mappings_count == 0) {
 1502                 SHARED_REGION_TRACE_INFO(
 1503                         ("shared_region: %p [%d(%s)] map(): "
 1504                          "no mappings\n",
 1505                          current_thread(), p->p_pid, p->p_comm));
 1506                 kr = 0; /* no mappings: we're done ! */
 1507                 goto done;
 1508         } else if (mappings_count <= SFM_MAX_STACK) {
 1509                 mappings = &stack_mappings[0];
 1510         } else {
 1511                 SHARED_REGION_TRACE_ERROR(
 1512                         ("shared_region: %p [%d(%s)] map(): "
 1513                          "too many mappings (%d)\n",
 1514                          current_thread(), p->p_pid, p->p_comm,
 1515                          mappings_count));
 1516                 kr = KERN_FAILURE;
 1517                 goto done;
 1518         }
 1519 
 1520         if ( (kr = shared_region_copyin_mappings(p, uap->mappings, uap->count, mappings))) {
 1521                 goto done;
 1522         }
 1523 
 1524 
 1525         kr = _shared_region_map(p, uap->fd, mappings_count, mappings, &sr_file_control, &mapping_to_slide);
 1526         if (kr != KERN_SUCCESS) {
 1527                 return kr;
 1528         }
 1529 
 1530         if (slide) {
 1531                 kr = _shared_region_slide(slide, 
 1532                                 mapping_to_slide.sfm_file_offset, 
 1533                                 mapping_to_slide.sfm_size, 
 1534                                 uap->slide_start, 
 1535                                 uap->slide_size, 
 1536                                 sr_file_control);
 1537                 if (kr  != KERN_SUCCESS) {
 1538                         vm_shared_region_undo_mappings(NULL, 0, mappings, mappings_count);
 1539                         return kr;
 1540                 }
 1541         }
 1542 done:
 1543         return kr;
 1544 }
 1545 
 1546 /* sysctl overflow room */
 1547 
 1548 /* vm_page_free_target is provided as a makeshift solution for applications that want to
 1549         allocate buffer space, possibly purgeable memory, but not cause inactive pages to be
 1550         reclaimed. It allows the app to calculate how much memory is free outside the free target. */
 1551 extern unsigned int     vm_page_free_target;
 1552 SYSCTL_INT(_vm, OID_AUTO, vm_page_free_target, CTLFLAG_RD | CTLFLAG_LOCKED, 
 1553                    &vm_page_free_target, 0, "Pageout daemon free target");
 1554 
 1555 extern unsigned int     vm_memory_pressure;
 1556 SYSCTL_INT(_vm, OID_AUTO, memory_pressure, CTLFLAG_RD | CTLFLAG_LOCKED,
 1557            &vm_memory_pressure, 0, "Memory pressure indicator");
 1558 
 1559 static int
 1560 vm_ctl_page_free_wanted SYSCTL_HANDLER_ARGS
 1561 {
 1562 #pragma unused(oidp, arg1, arg2)
 1563         unsigned int page_free_wanted;
 1564 
 1565         page_free_wanted = mach_vm_ctl_page_free_wanted();
 1566         return SYSCTL_OUT(req, &page_free_wanted, sizeof (page_free_wanted));
 1567 }
 1568 SYSCTL_PROC(_vm, OID_AUTO, page_free_wanted,
 1569             CTLTYPE_INT | CTLFLAG_RD | CTLFLAG_LOCKED,
 1570             0, 0, vm_ctl_page_free_wanted, "I", "");
 1571 
 1572 extern unsigned int     vm_page_purgeable_count;
 1573 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 1574            &vm_page_purgeable_count, 0, "Purgeable page count");
 1575 
 1576 extern unsigned int     vm_page_purgeable_wired_count;
 1577 SYSCTL_INT(_vm, OID_AUTO, page_purgeable_wired_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 1578            &vm_page_purgeable_wired_count, 0, "Wired purgeable page count");
 1579 
 1580 SYSCTL_INT(_vm, OID_AUTO, page_reusable_count, CTLFLAG_RD | CTLFLAG_LOCKED,
 1581            &vm_page_stats_reusable.reusable_count, 0, "Reusable page count");
 1582 SYSCTL_QUAD(_vm, OID_AUTO, reusable_success, CTLFLAG_RD | CTLFLAG_LOCKED,
 1583            &vm_page_stats_reusable.reusable_pages_success, "");
 1584 SYSCTL_QUAD(_vm, OID_AUTO, reusable_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
 1585            &vm_page_stats_reusable.reusable_pages_failure, "");
 1586 SYSCTL_QUAD(_vm, OID_AUTO, reusable_shared, CTLFLAG_RD | CTLFLAG_LOCKED,
 1587            &vm_page_stats_reusable.reusable_pages_shared, "");
 1588 SYSCTL_QUAD(_vm, OID_AUTO, all_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
 1589            &vm_page_stats_reusable.all_reusable_calls, "");
 1590 SYSCTL_QUAD(_vm, OID_AUTO, partial_reusable_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
 1591            &vm_page_stats_reusable.partial_reusable_calls, "");
 1592 SYSCTL_QUAD(_vm, OID_AUTO, reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
 1593            &vm_page_stats_reusable.reuse_pages_success, "");
 1594 SYSCTL_QUAD(_vm, OID_AUTO, reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
 1595            &vm_page_stats_reusable.reuse_pages_failure, "");
 1596 SYSCTL_QUAD(_vm, OID_AUTO, all_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
 1597            &vm_page_stats_reusable.all_reuse_calls, "");
 1598 SYSCTL_QUAD(_vm, OID_AUTO, partial_reuse_calls, CTLFLAG_RD | CTLFLAG_LOCKED,
 1599            &vm_page_stats_reusable.partial_reuse_calls, "");
 1600 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_success, CTLFLAG_RD | CTLFLAG_LOCKED,
 1601            &vm_page_stats_reusable.can_reuse_success, "");
 1602 SYSCTL_QUAD(_vm, OID_AUTO, can_reuse_failure, CTLFLAG_RD | CTLFLAG_LOCKED,
 1603            &vm_page_stats_reusable.can_reuse_failure, "");
 1604 
 1605 
 1606 int
 1607 vm_pressure_monitor(
 1608         __unused struct proc *p,
 1609         struct vm_pressure_monitor_args *uap,
 1610         int *retval)
 1611 {
 1612         kern_return_t   kr;
 1613         uint32_t        pages_reclaimed;
 1614         uint32_t        pages_wanted;
 1615 
 1616         kr = mach_vm_pressure_monitor(
 1617                 (boolean_t) uap->wait_for_pressure,
 1618                 uap->nsecs_monitored,
 1619                 (uap->pages_reclaimed) ? &pages_reclaimed : NULL,
 1620                 &pages_wanted);
 1621 
 1622         switch (kr) {
 1623         case KERN_SUCCESS:
 1624                 break;
 1625         case KERN_ABORTED:
 1626                 return EINTR;
 1627         default:
 1628                 return EINVAL;
 1629         }
 1630 
 1631         if (uap->pages_reclaimed) {
 1632                 if (copyout((void *)&pages_reclaimed,
 1633                             uap->pages_reclaimed,
 1634                             sizeof (pages_reclaimed)) != 0) {
 1635                         return EFAULT;
 1636                 }
 1637         }
 1638 
 1639         *retval = (int) pages_wanted;
 1640         return 0;
 1641 }

Cache object: ec6259b5fbc5f5f53f1bf5948f262a8a


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.