The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/bsd/kern/kern_fork.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 2000-2007 Apple Inc. All rights reserved.
    3  *
    4  * @APPLE_OSREFERENCE_LICENSE_HEADER_START@
    5  * 
    6  * This file contains Original Code and/or Modifications of Original Code
    7  * as defined in and that are subject to the Apple Public Source License
    8  * Version 2.0 (the 'License'). You may not use this file except in
    9  * compliance with the License. The rights granted to you under the License
   10  * may not be used to create, or enable the creation or redistribution of,
   11  * unlawful or unlicensed copies of an Apple operating system, or to
   12  * circumvent, violate, or enable the circumvention or violation of, any
   13  * terms of an Apple operating system software license agreement.
   14  * 
   15  * Please obtain a copy of the License at
   16  * http://www.opensource.apple.com/apsl/ and read it before using this file.
   17  * 
   18  * The Original Code and all software distributed under the License are
   19  * distributed on an 'AS IS' basis, WITHOUT WARRANTY OF ANY KIND, EITHER
   20  * EXPRESS OR IMPLIED, AND APPLE HEREBY DISCLAIMS ALL SUCH WARRANTIES,
   21  * INCLUDING WITHOUT LIMITATION, ANY WARRANTIES OF MERCHANTABILITY,
   22  * FITNESS FOR A PARTICULAR PURPOSE, QUIET ENJOYMENT OR NON-INFRINGEMENT.
   23  * Please see the License for the specific language governing rights and
   24  * limitations under the License.
   25  * 
   26  * @APPLE_OSREFERENCE_LICENSE_HEADER_END@
   27  */
   28 /* Copyright (c) 1995, 1997 Apple Computer, Inc. All Rights Reserved */
   29 /*
   30  * Copyright (c) 1982, 1986, 1989, 1991, 1993
   31  *      The Regents of the University of California.  All rights reserved.
   32  * (c) UNIX System Laboratories, Inc.
   33  * All or some portions of this file are derived from material licensed
   34  * to the University of California by American Telephone and Telegraph
   35  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   36  * the permission of UNIX System Laboratories, Inc.
   37  *
   38  * Redistribution and use in source and binary forms, with or without
   39  * modification, are permitted provided that the following conditions
   40  * are met:
   41  * 1. Redistributions of source code must retain the above copyright
   42  *    notice, this list of conditions and the following disclaimer.
   43  * 2. Redistributions in binary form must reproduce the above copyright
   44  *    notice, this list of conditions and the following disclaimer in the
   45  *    documentation and/or other materials provided with the distribution.
   46  * 3. All advertising materials mentioning features or use of this software
   47  *    must display the following acknowledgement:
   48  *      This product includes software developed by the University of
   49  *      California, Berkeley and its contributors.
   50  * 4. Neither the name of the University nor the names of its contributors
   51  *    may be used to endorse or promote products derived from this software
   52  *    without specific prior written permission.
   53  *
   54  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   55  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   56  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   57  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   58  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   59  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   60  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   61  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   62  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   63  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   64  * SUCH DAMAGE.
   65  *
   66  *      @(#)kern_fork.c 8.8 (Berkeley) 2/14/95
   67  */
   68 /*
   69  * NOTICE: This file was modified by McAfee Research in 2004 to introduce
   70  * support for mandatory and extensible security protections.  This notice
   71  * is included in support of clause 2.2 (b) of the Apple Public License,
   72  * Version 2.0.
   73  */
   74 /*
   75  * NOTICE: This file was modified by SPARTA, Inc. in 2005 to introduce
   76  * support for mandatory and extensible security protections.  This notice
   77  * is included in support of clause 2.2 (b) of the Apple Public License,
   78  * Version 2.0.
   79  */
   80 
   81 #include <kern/assert.h>
   82 #include <sys/param.h>
   83 #include <sys/systm.h>
   84 #include <sys/filedesc.h>
   85 #include <sys/kernel.h>
   86 #include <sys/malloc.h>
   87 #include <sys/proc_internal.h>
   88 #include <sys/kauth.h>
   89 #include <sys/user.h>
   90 #include <sys/resourcevar.h>
   91 #include <sys/vnode_internal.h>
   92 #include <sys/file_internal.h>
   93 #include <sys/acct.h>
   94 #include <sys/codesign.h>
   95 #include <sys/sysproto.h>
   96 #if CONFIG_DTRACE
   97 /* Do not include dtrace.h, it redefines kmem_[alloc/free] */
   98 extern void dtrace_fasttrap_fork(proc_t, proc_t);
   99 extern void (*dtrace_helpers_fork)(proc_t, proc_t);
  100 extern void dtrace_lazy_dofs_duplicate(proc_t, proc_t);
  101 
  102 #include <sys/dtrace_ptss.h>
  103 #endif
  104 
  105 #include <security/audit/audit.h>
  106 
  107 #include <mach/mach_types.h>
  108 #include <kern/kern_types.h>
  109 #include <kern/kalloc.h>
  110 #include <kern/mach_param.h>
  111 #include <kern/task.h>
  112 #include <kern/thread_call.h>
  113 #include <kern/zalloc.h>
  114 
  115 #include <machine/spl.h>
  116 
  117 #if CONFIG_MACF
  118 #include <security/mac.h>
  119 #include <security/mac_mach_internal.h>
  120 #endif
  121 
  122 #include <vm/vm_map.h>
  123 #include <vm/vm_protos.h>
  124 #include <vm/vm_shared_region.h>
  125 
  126 #include <sys/shm_internal.h>   /* for shmfork() */
  127 #include <mach/task.h>          /* for thread_create() */
  128 #include <mach/thread_act.h>    /* for thread_resume() */
  129 
  130 #include <sys/sdt.h>
  131 
  132 
  133 /* XXX routines which should have Mach prototypes, but don't */
  134 void thread_set_parent(thread_t parent, int pid);
  135 extern void act_thread_catt(void *ctx);
  136 void thread_set_child(thread_t child, int pid);
  137 void *act_thread_csave(void);
  138 
  139 
  140 thread_t cloneproc(task_t, proc_t, int);
  141 proc_t forkproc(proc_t);
  142 void forkproc_free(proc_t);
  143 thread_t fork_create_child(task_t parent_task, proc_t child, int inherit_memory, int is64bit);
  144 void proc_vfork_begin(proc_t parent_proc);
  145 void proc_vfork_end(proc_t parent_proc);
  146 
  147 #define DOFORK  0x1     /* fork() system call */
  148 #define DOVFORK 0x2     /* vfork() system call */
  149 
  150 /*
  151  * proc_vfork_begin
  152  *
  153  * Description: start a vfork on a process
  154  *
  155  * Parameters:  parent_proc             process (re)entering vfork state
  156  *
  157  * Returns:     (void)
  158  *
  159  * Notes:       Although this function increments a count, a count in
  160  *              excess of 1 is not currently supported.  According to the
  161  *              POSIX standard, calling anything other than execve() or
  162  *              _exit() fillowing a vfork(), including calling vfork()
  163  *              itself again, will result in undefned behaviour
  164  */
  165 void
  166 proc_vfork_begin(proc_t parent_proc)
  167 {
  168         proc_lock(parent_proc);
  169         parent_proc->p_lflag  |= P_LVFORK;
  170         parent_proc->p_vforkcnt++;
  171         proc_unlock(parent_proc);
  172 }
  173 
  174 /*
  175  * proc_vfork_end
  176  *
  177  * Description: stop a vfork on a process
  178  *
  179  * Parameters:  parent_proc             process leaving vfork state
  180  *
  181  * Returns:     (void)
  182  *
  183  * Notes:       Decerements the count; currently, reentrancy of vfork()
  184  *              is unsupported on the current process
  185  */
  186 void
  187 proc_vfork_end(proc_t parent_proc)
  188 {
  189         proc_lock(parent_proc);
  190         parent_proc->p_vforkcnt--;
  191         if (parent_proc->p_vforkcnt < 0)
  192                 panic("vfork cnt is -ve");
  193         /* resude the vfork count; clear the flag when it goes to 0 */
  194         if (parent_proc->p_vforkcnt == 0)
  195                 parent_proc->p_lflag  &= ~P_LVFORK;
  196         proc_unlock(parent_proc);
  197 }
  198 
  199 
  200 /*
  201  * vfork
  202  *
  203  * Description: vfork system call
  204  *
  205  * Parameters:  void                    [no arguments]
  206  *
  207  * Retval:      0                       (to child process)
  208  *              !0                      pid of child (to parent process)
  209  *              -1                      error (see "Returns:")
  210  *
  211  * Returns:     EAGAIN                  Administrative limit reached
  212  *              EINVAL                  vfork() called during vfork()
  213  *              ENOMEM                  Failed to allocate new process
  214  *
  215  * Note:        After a successful call to this function, the parent process
  216  *              has its task, thread, and uthread lent to the child process,
  217  *              and control is returned to the caller; if this function is
  218  *              invoked as a system call, the return is to user space, and
  219  *              is effectively running on the child process.
  220  *
  221  *              Subsequent calls that operate on process state are permitted,
  222  *              though discouraged, and will operate on the child process; any
  223  *              operations on the task, thread, or uthread will result in
  224  *              changes in the parent state, and, if inheritable, the child
  225  *              state, when a task, thread, and uthread are realized for the
  226  *              child process at execve() time, will also be effected.  Given
  227  *              this, it's recemmended that people use the posix_spawn() call
  228  *              instead.
  229  *
  230  * BLOCK DIAGRAM OF VFORK
  231  *
  232  * Before:
  233  *
  234  *     ,----------------.         ,-------------.
  235  *     |                |   task  |             |
  236  *     | parent_thread  | ------> | parent_task |
  237  *     |                | <.list. |             |
  238  *     `----------------'         `-------------'
  239  *    uthread |  ^             bsd_info |  ^
  240  *            v  | vc_thread            v  | task
  241  *     ,----------------.         ,-------------.
  242  *     |                |         |             |
  243  *     | parent_uthread | <.list. | parent_proc | <-- current_proc()
  244  *     |                |         |             |
  245  *     `----------------'         `-------------'
  246  *    uu_proc |
  247  *            v
  248  *           NULL
  249  *
  250  * After:
  251  *
  252  *                 ,----------------.         ,-------------.
  253  *                 |                |   task  |             |
  254  *          ,----> | parent_thread  | ------> | parent_task |
  255  *          |      |                | <.list. |             |
  256  *          |      `----------------'         `-------------'
  257  *          |     uthread |  ^             bsd_info |  ^
  258  *          |             v  | vc_thread            v  | task
  259  *          |      ,----------------.         ,-------------.
  260  *          |      |                |         |             |
  261  *          |      | parent_uthread | <.list. | parent_proc |
  262  *          |      |                |         |             |
  263  *          |      `----------------'         `-------------'
  264  *          |     uu_proc |  . list
  265  *          |             v  v
  266  *          |      ,----------------.
  267  *          `----- |                |
  268  *      p_vforkact | child_proc     | <-- current_proc()
  269  *                 |                |
  270  *                 `----------------'
  271  */
  272 int
  273 vfork(proc_t parent_proc, __unused struct vfork_args *uap, int32_t *retval)
  274 {
  275         thread_t child_thread;
  276         int err;
  277 
  278         if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_VFORK)) != 0) {
  279                 retval[1] = 0;
  280         } else {
  281                 /*
  282                  * kludge: rely on uu_proc being set in the vfork case,
  283                  * rather than returning the actual thread.  We can remove
  284                  * this when we remove the uu_proc/current_proc() kludge.
  285                  */
  286                 proc_t child_proc = current_proc();
  287 
  288                 retval[0] = child_proc->p_pid;
  289                 retval[1] = 1;          /* flag child return for user space */
  290 
  291                 /*
  292                  * Drop the signal lock on the child which was taken on our
  293                  * behalf by forkproc()/cloneproc() to prevent signals being
  294                  * received by the child in a partially constructed state.
  295                  */
  296                 proc_signalend(child_proc, 0);
  297                 proc_transend(child_proc, 0);
  298 
  299                 /* flag the fork has occurred */
  300                 proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid);
  301                 DTRACE_PROC1(create, proc_t, child_proc);
  302         }
  303 
  304         return(err);
  305 }
  306 
  307 
  308 /*
  309  * fork1
  310  *
  311  * Description: common code used by all new process creation other than the
  312  *              bootstrap of the initial process on the system
  313  *
  314  * Parameters: parent_proc              parent process of the process being
  315  *              child_threadp           pointer to location to receive the
  316  *                                      Mach thread_t of the child process
  317  *                                      breated
  318  *              kind                    kind of creation being requested
  319  *
  320  * Notes:       Permissable values for 'kind':
  321  *
  322  *              PROC_CREATE_FORK        Create a complete process which will
  323  *                                      return actively running in both the
  324  *                                      parent and the child; the child copies
  325  *                                      the parent address space.
  326  *              PROC_CREATE_SPAWN       Create a complete process which will
  327  *                                      return actively running in the parent
  328  *                                      only after returning actively running
  329  *                                      in the child; the child address space
  330  *                                      is newly created by an image activator,
  331  *                                      after which the child is run.
  332  *              PROC_CREATE_VFORK       Creates a partial process which will
  333  *                                      borrow the parent task, thread, and
  334  *                                      uthread to return running in the child;
  335  *                                      the child address space and other parts
  336  *                                      are lazily created at execve() time, or
  337  *                                      the child is terminated, and the parent
  338  *                                      does not actively run until that
  339  *                                      happens.
  340  *
  341  *              At first it may seem strange that we return the child thread
  342  *              address rather than process structure, since the process is
  343  *              the only part guaranteed to be "new"; however, since we do
  344  *              not actualy adjust other references between Mach and BSD (see
  345  *              the block diagram above the implementation of vfork()), this
  346  *              is the only method which guarantees us the ability to get
  347  *              back to the other information.
  348  */
  349 int
  350 fork1(proc_t parent_proc, thread_t *child_threadp, int kind)
  351 {
  352         thread_t parent_thread = (thread_t)current_thread();
  353         uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
  354         proc_t child_proc = NULL;       /* set in switch, but compiler... */
  355         thread_t child_thread = NULL;
  356         uid_t uid;
  357         int count;
  358         int err = 0;
  359         int spawn = 0;
  360 
  361         /*
  362          * Although process entries are dynamically created, we still keep
  363          * a global limit on the maximum number we will create.  Don't allow
  364          * a nonprivileged user to use the last process; don't let root
  365          * exceed the limit. The variable nprocs is the current number of
  366          * processes, maxproc is the limit.
  367          */
  368         uid = kauth_cred_get()->cr_ruid;
  369         proc_list_lock();
  370         if ((nprocs >= maxproc - 1 && uid != 0) || nprocs >= maxproc) {
  371                 proc_list_unlock();
  372                 tablefull("proc");
  373                 return (EAGAIN);
  374         }
  375         proc_list_unlock();
  376 
  377         /*
  378          * Increment the count of procs running with this uid. Don't allow
  379          * a nonprivileged user to exceed their current limit, which is
  380          * always less than what an rlim_t can hold.
  381          * (locking protection is provided by list lock held in chgproccnt)
  382          */
  383         count = chgproccnt(uid, 1);
  384         if (uid != 0 &&
  385             (rlim_t)count > parent_proc->p_rlimit[RLIMIT_NPROC].rlim_cur) {
  386                 err = EAGAIN;
  387                 goto bad;
  388         }
  389 
  390 #if CONFIG_MACF
  391         /*
  392          * Determine if MAC policies applied to the process will allow
  393          * it to fork.  This is an advisory-only check.
  394          */
  395         err = mac_proc_check_fork(parent_proc);
  396         if (err  != 0) {
  397                 goto bad;
  398         }
  399 #endif
  400 
  401         switch(kind) {
  402         case PROC_CREATE_VFORK:
  403                 /*
  404                  * Prevent a vfork while we are in vfork(); we should
  405                  * also likely preventing a fork here as well, and this
  406                  * check should then be outside the switch statement,
  407                  * since the proc struct contents will copy from the
  408                  * child and the tash/thread/uthread from the parent in
  409                  * that case.  We do not support vfork() in vfork()
  410                  * because we don't have to; the same non-requirement
  411                  * is true of both fork() and posix_spawn() and any
  412                  * call  other than execve() amd _exit(), but we've
  413                  * been historically lenient, so we continue to be so
  414                  * (for now).
  415                  *
  416                  * <rdar://6640521> Probably a source of random panics
  417                  */
  418                 if (parent_uthread->uu_flag & UT_VFORK) {
  419                         printf("fork1 called within vfork by %s\n", parent_proc->p_comm);
  420                         err = EINVAL;
  421                         goto bad;
  422                 }
  423 
  424                 /*
  425                  * Flag us in progress; if we chose to support vfork() in
  426                  * vfork(), we would chain our parent at this point (in
  427                  * effect, a stack push).  We don't, since we actually want
  428                  * to disallow everything not specified in the standard
  429                  */
  430                 proc_vfork_begin(parent_proc);
  431 
  432                 /* The newly created process comes with signal lock held */
  433                 if ((child_proc = forkproc(parent_proc)) == NULL) {
  434                         /* Failed to allocate new process */
  435                         proc_vfork_end(parent_proc);
  436                         err = ENOMEM;
  437                         goto bad;
  438                 }
  439 
  440 // XXX BEGIN: wants to move to be common code (and safe)
  441 #if CONFIG_MACF
  442                 /*
  443                  * allow policies to associate the credential/label that
  444                  * we referenced from the parent ... with the child
  445                  * JMM - this really isn't safe, as we can drop that
  446                  *       association without informing the policy in other
  447                  *       situations (keep long enough to get policies changed)
  448                  */
  449                 mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
  450 #endif
  451 
  452                 /*
  453                  * Propogate change of PID - may get new cred if auditing.
  454                  *
  455                  * NOTE: This has no effect in the vfork case, since
  456                  *      child_proc->task != current_task(), but we duplicate it
  457                  *      because this is probably, ultimately, wrong, since we
  458                  *      will be running in the "child" which is the parent task
  459                  *      with the wrong token until we get to the execve() or
  460                  *      _exit() call; a lot of "undefined" can happen before
  461                  *      that.
  462                  *
  463                  * <rdar://6640530> disallow everything but exeve()/_exit()?
  464                  */
  465                 set_security_token(child_proc);
  466 
  467                 AUDIT_ARG(pid, child_proc->p_pid);
  468 
  469                 AUDIT_SESSION_PROCNEW(child_proc->p_ucred);
  470 // XXX END: wants to move to be common code (and safe)
  471 
  472                 /*
  473                  * BORROW PARENT TASK, THREAD, UTHREAD FOR CHILD
  474                  *
  475                  * Note: this is where we would "push" state instead of setting
  476                  * it for nested vfork() support (see proc_vfork_end() for
  477                  * description if issues here).
  478                  */
  479                 child_proc->task = parent_proc->task;
  480 
  481                 child_proc->p_lflag  |= P_LINVFORK;
  482                 child_proc->p_vforkact = parent_thread;
  483                 child_proc->p_stat = SRUN;
  484 
  485                 parent_uthread->uu_flag |= UT_VFORK;
  486                 parent_uthread->uu_proc = child_proc;
  487                 parent_uthread->uu_userstate = (void *)act_thread_csave();
  488                 parent_uthread->uu_vforkmask = parent_uthread->uu_sigmask;
  489 
  490                 /* temporarily drop thread-set-id state */
  491                 if (parent_uthread->uu_flag & UT_SETUID) {
  492                         parent_uthread->uu_flag |= UT_WASSETUID;
  493                         parent_uthread->uu_flag &= ~UT_SETUID;
  494                 }
  495 
  496                 /* blow thread state information */
  497                 /* XXX is this actually necessary, given syscall return? */
  498                 thread_set_child(parent_thread, child_proc->p_pid);
  499 
  500                 child_proc->p_acflag = AFORK;   /* forked but not exec'ed */
  501 
  502                 /*
  503                  * Preserve synchronization semantics of vfork.  If
  504                  * waiting for child to exec or exit, set P_PPWAIT
  505                  * on child, and sleep on our proc (in case of exit).
  506                  */
  507                 child_proc->p_lflag |= P_LPPWAIT;
  508                 pinsertchild(parent_proc, child_proc);  /* set visible */
  509 
  510                 break;
  511 
  512         case PROC_CREATE_SPAWN:
  513                 /*
  514                  * A spawned process differs from a forked process in that
  515                  * the spawned process does not carry around the parents
  516                  * baggage with regard to address space copying, dtrace,
  517                  * and so on.
  518                  */
  519                 spawn = 1;
  520 
  521                 /* FALLSTHROUGH */
  522 
  523         case PROC_CREATE_FORK:
  524                 /*
  525                  * When we clone the parent process, we are going to inherit
  526                  * its task attributes and memory, since when we fork, we
  527                  * will, in effect, create a duplicate of it, with only minor
  528                  * differences.  Contrarily, spawned processes do not inherit.
  529                  */
  530                 if ((child_thread = cloneproc(parent_proc->task, parent_proc, spawn ? FALSE : TRUE)) == NULL) {
  531                         /* Failed to create thread */
  532                         err = EAGAIN;
  533                         goto bad;
  534                 }
  535 
  536                 /* copy current thread state into the child thread (only for fork) */
  537                 if (!spawn) {
  538                         thread_dup(child_thread);
  539                 }
  540 
  541                 /* child_proc = child_thread->task->proc; */
  542                 child_proc = (proc_t)(get_bsdtask_info(get_threadtask(child_thread)));
  543 
  544 // XXX BEGIN: wants to move to be common code (and safe)
  545 #if CONFIG_MACF
  546                 /*
  547                  * allow policies to associate the credential/label that
  548                  * we referenced from the parent ... with the child
  549                  * JMM - this really isn't safe, as we can drop that
  550                  *       association without informing the policy in other
  551                  *       situations (keep long enough to get policies changed)
  552                  */
  553                 mac_cred_label_associate_fork(child_proc->p_ucred, child_proc);
  554 #endif
  555 
  556                 /*
  557                  * Propogate change of PID - may get new cred if auditing.
  558                  *
  559                  * NOTE: This has no effect in the vfork case, since
  560                  *      child_proc->task != current_task(), but we duplicate it
  561                  *      because this is probably, ultimately, wrong, since we
  562                  *      will be running in the "child" which is the parent task
  563                  *      with the wrong token until we get to the execve() or
  564                  *      _exit() call; a lot of "undefined" can happen before
  565                  *      that.
  566                  *
  567                  * <rdar://6640530> disallow everything but exeve()/_exit()?
  568                  */
  569                 set_security_token(child_proc);
  570 
  571                 AUDIT_ARG(pid, child_proc->p_pid);
  572 
  573                 AUDIT_SESSION_PROCNEW(child_proc->p_ucred);
  574 // XXX END: wants to move to be common code (and safe)
  575 
  576                 /*
  577                  * Blow thread state information; this is what gives the child
  578                  * process its "return" value from a fork() call.
  579                  *
  580                  * Note: this should probably move to fork() proper, since it
  581                  * is not relevent to spawn, and the value won't matter
  582                  * until we resume the child there.  If you are in here
  583                  * refactoring code, consider doing this at the same time.
  584                  */
  585                 thread_set_child(child_thread, child_proc->p_pid);
  586 
  587                 child_proc->p_acflag = AFORK;   /* forked but not exec'ed */
  588 
  589 // <rdar://6598155> dtrace code cleanup needed
  590 #if CONFIG_DTRACE
  591                 /*
  592                  * This code applies to new processes who are copying the task
  593                  * and thread state and address spaces of their parent process.
  594                  */
  595                 if (!spawn) {
  596 // <rdar://6598155> call dtrace specific function here instead of all this...
  597                 /*
  598                  * APPLE NOTE: Solaris does a sprlock() and drops the
  599                  * proc_lock here. We're cheating a bit and only taking
  600                  * the p_dtrace_sprlock lock. A full sprlock would
  601                  * task_suspend the parent.
  602                  */
  603                 lck_mtx_lock(&parent_proc->p_dtrace_sprlock);
  604 
  605                 /*
  606                  * Remove all DTrace tracepoints from the child process. We
  607                  * need to do this _before_ duplicating USDT providers since
  608                  * any associated probes may be immediately enabled.
  609                  */
  610                 if (parent_proc->p_dtrace_count > 0) {
  611                         dtrace_fasttrap_fork(parent_proc, child_proc);
  612                 }
  613 
  614                 lck_mtx_unlock(&parent_proc->p_dtrace_sprlock);
  615 
  616                 /*
  617                  * Duplicate any lazy dof(s). This must be done while NOT
  618                  * holding the parent sprlock! Lock ordering is
  619                  * dtrace_dof_mode_lock, then sprlock.  It is imperative we
  620                  * always call dtrace_lazy_dofs_duplicate, rather than null
  621                  * check and call if !NULL. If we NULL test, during lazy dof
  622                  * faulting we can race with the faulting code and proceed
  623                  * from here to beyond the helpers copy. The lazy dof
  624                  * faulting will then fail to copy the helpers to the child
  625                  * process.
  626                  */
  627                 dtrace_lazy_dofs_duplicate(parent_proc, child_proc);
  628                 
  629                 /*
  630                  * Duplicate any helper actions and providers. The SFORKING
  631                  * we set above informs the code to enable USDT probes that
  632                  * sprlock() may fail because the child is being forked.
  633                  */
  634                 /*
  635                  * APPLE NOTE: As best I can tell, Apple's sprlock() equivalent
  636                  * never fails to find the child. We do not set SFORKING.
  637                  */
  638                 if (parent_proc->p_dtrace_helpers != NULL && dtrace_helpers_fork) {
  639                         (*dtrace_helpers_fork)(parent_proc, child_proc);
  640                 }
  641 
  642                 }
  643 #endif  /* CONFIG_DTRACE */
  644 
  645                 break;
  646 
  647         default:
  648                 panic("fork1 called with unknown kind %d", kind);
  649                 break;
  650         }
  651 
  652 
  653         /* return the thread pointer to the caller */
  654         *child_threadp = child_thread;
  655 
  656 bad:
  657         /*
  658          * In the error case, we return a 0 value for the returned pid (but
  659          * it is ignored in the trampoline due to the error return); this
  660          * is probably not necessary.
  661          */
  662         if (err) {
  663                 (void)chgproccnt(uid, -1);
  664         }
  665 
  666         return (err);
  667 }
  668 
  669 
  670 /*
  671  * vfork_return
  672  *
  673  * Description: "Return" to parent vfork thread() following execve/_exit;
  674  *              this is done by reassociating the parent process structure
  675  *              with the task, thread, and uthread.
  676  *
  677  * Parameters:  child_proc              Child process
  678  *              retval                  System call return value array
  679  *              rval                    Return value to present to parent
  680  *
  681  * Returns:     void
  682  *
  683  * Note:        The caller resumes or exits the parent, as appropriate, after
  684  *              callling this function.
  685  */
  686 void
  687 vfork_return(proc_t child_proc, int32_t *retval, int rval)
  688 {
  689         proc_t parent_proc = child_proc->p_pptr;
  690         thread_t parent_thread = (thread_t)current_thread();
  691         uthread_t parent_uthread = (uthread_t)get_bsdthread_info(parent_thread);
  692         
  693 
  694         act_thread_catt(parent_uthread->uu_userstate);
  695 
  696         /* end vfork in parent */
  697         proc_vfork_end(parent_proc);
  698 
  699         /* REPATRIATE PARENT TASK, THREAD, UTHREAD */
  700         parent_uthread->uu_userstate = 0;
  701         parent_uthread->uu_flag &= ~UT_VFORK;
  702         /* restore thread-set-id state */
  703         if (parent_uthread->uu_flag & UT_WASSETUID) {
  704                 parent_uthread->uu_flag |= UT_SETUID;
  705                 parent_uthread->uu_flag &= UT_WASSETUID;
  706         }
  707         parent_uthread->uu_proc = 0;
  708         parent_uthread->uu_sigmask = parent_uthread->uu_vforkmask;
  709         child_proc->p_lflag  &= ~P_LINVFORK;
  710         child_proc->p_vforkact = (void *)0;
  711 
  712         thread_set_parent(parent_thread, rval);
  713 
  714         if (retval) {
  715                 retval[0] = rval;
  716                 retval[1] = 0;                  /* mark parent */
  717         }
  718 
  719         return;
  720 }
  721 
  722 
  723 /*
  724  * fork_create_child
  725  *
  726  * Description: Common operations associated with the creation of a child
  727  *              process
  728  *
  729  * Parameters:  parent_task             parent task
  730  *              child_proc              child process
  731  *              inherit_memory          TRUE, if the parents address space is
  732  *                                      to be inherited by the child
  733  *              is64bit                 TRUE, if the child being created will
  734  *                                      be associated with a 64 bit process
  735  *                                      rather than a 32 bit process
  736  *
  737  * Note:        This code is called in the fork() case, from the execve() call
  738  *              graph, if implementing an execve() following a vfork(), from
  739  *              the posix_spawn() call graph (which implicitly includes a
  740  *              vfork() equivalent call, and in the system bootstrap case.
  741  *
  742  *              It creates a new task and thread (and as a side effect of the
  743  *              thread creation, a uthread), which is then associated with the
  744  *              process 'child'.  If the parent process address space is to
  745  *              be inherited, then a flag indicates that the newly created
  746  *              task should inherit this from the child task.
  747  *
  748  *              As a special concession to bootstrapping the initial process
  749  *              in the system, it's possible for 'parent_task' to be TASK_NULL;
  750  *              in this case, 'inherit_memory' MUST be FALSE.
  751  */
  752 thread_t
  753 fork_create_child(task_t parent_task, proc_t child_proc, int inherit_memory, int is64bit)
  754 {
  755         thread_t        child_thread = NULL;
  756         task_t          child_task;
  757         kern_return_t   result;
  758 
  759         /* Create a new task for the child process */
  760         result = task_create_internal(parent_task,
  761                                         inherit_memory,
  762                                         is64bit,
  763                                         &child_task);
  764         if (result != KERN_SUCCESS) {
  765                 printf("execve: task_create_internal failed.  Code: %d\n", result);
  766                 goto bad;
  767         }
  768 
  769         /* Set the child process task to the new task */
  770         child_proc->task = child_task;
  771 
  772         /* Set child task process to child proc */
  773         set_bsdtask_info(child_task, child_proc);
  774 
  775         /* Propagate CPU limit timer from parent */
  776         if (timerisset(&child_proc->p_rlim_cpu))
  777                 task_vtimer_set(child_task, TASK_VTIMER_RLIM);
  778 
  779         /* Set/clear 64 bit vm_map flag */
  780         if (is64bit)
  781                 vm_map_set_64bit(get_task_map(child_task));
  782         else
  783                 vm_map_set_32bit(get_task_map(child_task));
  784 
  785 #if CONFIG_MACF
  786         /* Update task for MAC framework */
  787         /* valid to use p_ucred as child is still not running ... */
  788         mac_task_label_update_cred(child_proc->p_ucred, child_task);
  789 #endif
  790 
  791         /*
  792          * Set child process BSD visible scheduler priority if nice value
  793          * inherited from parent
  794          */
  795         if (child_proc->p_nice != 0)
  796                 resetpriority(child_proc);
  797 
  798         /* Create a new thread for the child process */
  799         result = thread_create(child_task, &child_thread);
  800         if (result != KERN_SUCCESS) {
  801                 printf("execve: thread_create failed. Code: %d\n", result);
  802                 task_deallocate(child_task);
  803                 child_task = NULL;
  804         }
  805 bad:
  806         thread_yield_internal(1);
  807 
  808         return(child_thread);
  809 }
  810 
  811 
  812 /*
  813  * fork
  814  *
  815  * Description: fork system call.
  816  *
  817  * Parameters:  parent                  Parent process to fork
  818  *              uap (void)              [unused]
  819  *              retval                  Return value
  820  *
  821  * Returns:     0                       Success
  822  *              EAGAIN                  Resource unavailable, try again
  823  *
  824  * Notes:       Attempts to create a new child process which inherits state
  825  *              from the parent process.  If successful, the call returns
  826  *              having created an initially suspended child process with an
  827  *              extra Mach task and thread reference, for which the thread
  828  *              is initially suspended.  Until we resume the child process,
  829  *              it is not yet running.
  830  *
  831  *              The return information to the child is contained in the
  832  *              thread state structure of the new child, and does not
  833  *              become visible to the child through a normal return process,
  834  *              since it never made the call into the kernel itself in the
  835  *              first place.
  836  *
  837  *              After resuming the thread, this function returns directly to
  838  *              the parent process which invoked the fork() system call.
  839  *
  840  * Important:   The child thread_resume occurs before the parent returns;
  841  *              depending on scheduling latency, this means that it is not
  842  *              deterministic as to whether the parent or child is scheduled
  843  *              to run first.  It is entirely possible that the child could
  844  *              run to completion prior to the parent running.
  845  */
  846 int
  847 fork(proc_t parent_proc, __unused struct fork_args *uap, int32_t *retval)
  848 {
  849         thread_t child_thread;
  850         int err;
  851 
  852         retval[1] = 0;          /* flag parent return for user space */
  853 
  854         if ((err = fork1(parent_proc, &child_thread, PROC_CREATE_FORK)) == 0) {
  855                 task_t child_task;
  856                 proc_t child_proc;
  857 
  858                 /* Return to the parent */
  859                 child_proc = (proc_t)get_bsdthreadtask_info(child_thread);
  860                 retval[0] = child_proc->p_pid;
  861 
  862                 /*
  863                  * Drop the signal lock on the child which was taken on our
  864                  * behalf by forkproc()/cloneproc() to prevent signals being
  865                  * received by the child in a partially constructed state.
  866                  */
  867                 proc_signalend(child_proc, 0);
  868                 proc_transend(child_proc, 0);
  869 
  870                 /* flag the fork has occurred */
  871                 proc_knote(parent_proc, NOTE_FORK | child_proc->p_pid);
  872                 DTRACE_PROC1(create, proc_t, child_proc);
  873 
  874                 /* "Return" to the child */
  875                 (void)thread_resume(child_thread);
  876 
  877                 /* drop the extra references we got during the creation */
  878                 if ((child_task = (task_t)get_threadtask(child_thread)) != NULL) {
  879                         task_deallocate(child_task);
  880                 }
  881                 thread_deallocate(child_thread);
  882         }
  883 
  884         return(err);
  885 }
  886 
  887 
  888 /*
  889  * cloneproc
  890  *
  891  * Description: Create a new process from a specified process.
  892  *
  893  * Parameters:  parent_task             The parent task to be cloned, or
  894  *                                      TASK_NULL is task characteristics
  895  *                                      are not to be inherited
  896  *                                      be cloned, or TASK_NULL if the new
  897  *                                      task is not to inherit the VM
  898  *                                      characteristics of the parent
  899  *              parent_proc             The parent process to be cloned
  900  *              inherit_memory          True if the child is to inherit
  901  *                                      memory from the parent; if this is
  902  *                                      non-NULL, then the parent_task must
  903  *                                      also be non-NULL
  904  *
  905  * Returns:     !NULL                   pointer to new child thread
  906  *              NULL                    Failure (unspecified)
  907  *
  908  * Note:        On return newly created child process has signal lock held
  909  *              to block delivery of signal to it if called with lock set.
  910  *              fork() code needs to explicity remove this lock before
  911  *              signals can be delivered
  912  *
  913  *              In the case of bootstrap, this function can be called from
  914  *              bsd_utaskbootstrap() in order to bootstrap the first process;
  915  *              the net effect is to provide a uthread structure for the
  916  *              kernel process associated with the kernel task.
  917  *
  918  * XXX:         Tristating using the value parent_task as the major key
  919  *              and inherit_memory as the minor key is something we should
  920  *              refactor later; we owe the current semantics, ultimately,
  921  *              to the semantics of task_create_internal.  For now, we will
  922  *              live with this being somewhat awkward.
  923  */
  924 thread_t
  925 cloneproc(task_t parent_task, proc_t parent_proc, int inherit_memory)
  926 {
  927         task_t child_task;
  928         proc_t child_proc;
  929         thread_t child_thread = NULL;
  930 
  931         if ((child_proc = forkproc(parent_proc)) == NULL) {
  932                 /* Failed to allocate new process */
  933                 goto bad;
  934         }
  935 
  936         child_thread = fork_create_child(parent_task, child_proc, inherit_memory, (parent_task == TASK_NULL) ? FALSE : (parent_proc->p_flag & P_LP64));
  937 
  938         if (child_thread == NULL) {
  939                 /*
  940                  * Failed to create thread; now we must deconstruct the new
  941                  * process previously obtained from forkproc().
  942                  */
  943                 forkproc_free(child_proc);
  944                 goto bad;
  945         }
  946 
  947         child_task = get_threadtask(child_thread);
  948         if (parent_proc->p_flag & P_LP64) {
  949                 task_set_64bit(child_task, TRUE);
  950                 OSBitOrAtomic(P_LP64, (UInt32 *)&child_proc->p_flag);
  951 #ifdef __ppc__
  952                 /*
  953                  * PPC51: ppc64 is limited to 51-bit addresses.
  954                  * Memory above that limit is handled specially at
  955                  * the pmap level.
  956                  */
  957                 pmap_map_sharedpage(child_task, get_map_pmap(get_task_map(child_task)));
  958 #endif /* __ppc__ */
  959         } else {
  960                 task_set_64bit(child_task, FALSE);
  961                 OSBitAndAtomic(~((uint32_t)P_LP64), (UInt32 *)&child_proc->p_flag);
  962         }
  963 
  964         /* make child visible */
  965         pinsertchild(parent_proc, child_proc);
  966 
  967         /*
  968          * Make child runnable, set start time.
  969          */
  970         child_proc->p_stat = SRUN;
  971 bad:
  972         return(child_thread);
  973 }
  974 
  975 
  976 /*
  977  * Destroy a process structure that resulted from a call to forkproc(), but
  978  * which must be returned to the system because of a subsequent failure
  979  * preventing it from becoming active.
  980  *
  981  * Parameters:  p                       The incomplete process from forkproc()
  982  *
  983  * Returns:     (void)
  984  *
  985  * Note:        This function should only be used in an error handler following
  986  *              a call to forkproc().
  987  *
  988  *              Operations occur in reverse order of those in forkproc().
  989  */
  990 void
  991 forkproc_free(proc_t p)
  992 {
  993 
  994         /* We held signal and a transition locks; drop them */
  995         proc_signalend(p, 0);
  996         proc_transend(p, 0);
  997 
  998         /*
  999          * If we have our own copy of the resource limits structure, we
 1000          * need to free it.  If it's a shared copy, we need to drop our
 1001          * reference on it.
 1002          */
 1003         proc_limitdrop(p, 0);
 1004         p->p_limit = NULL;
 1005 
 1006 #if SYSV_SHM
 1007         /* Need to drop references to the shared memory segment(s), if any */
 1008         if (p->vm_shm) {
 1009                 /*
 1010                  * Use shmexec(): we have no address space, so no mappings
 1011                  *
 1012                  * XXX Yes, the routine is badly named.
 1013                  */
 1014                 shmexec(p);
 1015         }
 1016 #endif
 1017 
 1018         /* Need to undo the effects of the fdcopy(), if any */
 1019         fdfree(p);
 1020 
 1021         /*
 1022          * Drop the reference on a text vnode pointer, if any
 1023          * XXX This code is broken in forkproc(); see <rdar://4256419>;
 1024          * XXX if anyone ever uses this field, we will be extremely unhappy.
 1025          */
 1026         if (p->p_textvp) {
 1027                 vnode_rele(p->p_textvp);
 1028                 p->p_textvp = NULL;
 1029         }
 1030 
 1031         /* Stop the profiling clock */
 1032         stopprofclock(p);
 1033 
 1034         /* Release the credential reference */
 1035         kauth_cred_unref(&p->p_ucred);
 1036 
 1037         proc_list_lock();
 1038         /* Decrement the count of processes in the system */
 1039         nprocs--;
 1040         proc_list_unlock();
 1041 
 1042         thread_call_free(p->p_rcall);
 1043 
 1044         /* Free allocated memory */
 1045         FREE_ZONE(p->p_sigacts, sizeof *p->p_sigacts, M_SIGACTS);
 1046         FREE_ZONE(p->p_stats, sizeof *p->p_stats, M_PSTATS);
 1047         proc_checkdeadrefs(p);
 1048         FREE_ZONE(p, sizeof *p, M_PROC);
 1049 }
 1050 
 1051 
 1052 /*
 1053  * forkproc
 1054  *
 1055  * Description: Create a new process structure, given a parent process
 1056  *              structure.
 1057  *
 1058  * Parameters:  parent_proc             The parent process
 1059  *
 1060  * Returns:     !NULL                   The new process structure
 1061  *              NULL                    Error (insufficient free memory)
 1062  *
 1063  * Note:        When successful, the newly created process structure is
 1064  *              partially initialized; if a caller needs to deconstruct the
 1065  *              returned structure, they must call forkproc_free() to do so.
 1066  */
 1067 proc_t
 1068 forkproc(proc_t parent_proc)
 1069 {
 1070         proc_t child_proc;      /* Our new process */
 1071         static int nextpid = 0, pidwrap = 0, nextpidversion = 0;
 1072         int error = 0;
 1073         struct session *sessp;
 1074         uthread_t parent_uthread = (uthread_t)get_bsdthread_info(current_thread());
 1075 
 1076         MALLOC_ZONE(child_proc, proc_t , sizeof *child_proc, M_PROC, M_WAITOK);
 1077         if (child_proc == NULL) {
 1078                 printf("forkproc: M_PROC zone exhausted\n");
 1079                 goto bad;
 1080         }
 1081         /* zero it out as we need to insert in hash */
 1082         bzero(child_proc, sizeof *child_proc);
 1083 
 1084         MALLOC_ZONE(child_proc->p_stats, struct pstats *,
 1085                         sizeof *child_proc->p_stats, M_PSTATS, M_WAITOK);
 1086         if (child_proc->p_stats == NULL) {
 1087                 printf("forkproc: M_SUBPROC zone exhausted (p_stats)\n");
 1088                 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
 1089                 child_proc = NULL;
 1090                 goto bad;
 1091         }
 1092         MALLOC_ZONE(child_proc->p_sigacts, struct sigacts *,
 1093                         sizeof *child_proc->p_sigacts, M_SIGACTS, M_WAITOK);
 1094         if (child_proc->p_sigacts == NULL) {
 1095                 printf("forkproc: M_SUBPROC zone exhausted (p_sigacts)\n");
 1096                 FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
 1097                 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
 1098                 child_proc = NULL;
 1099                 goto bad;
 1100         }
 1101 
 1102         /* allocate a callout for use by interval timers */
 1103         child_proc->p_rcall = thread_call_allocate((thread_call_func_t)realitexpire, child_proc);
 1104         if (child_proc->p_rcall == NULL) {
 1105                 FREE_ZONE(child_proc->p_sigacts, sizeof *child_proc->p_sigacts, M_SIGACTS);
 1106                 FREE_ZONE(child_proc->p_stats, sizeof *child_proc->p_stats, M_PSTATS);
 1107                 FREE_ZONE(child_proc, sizeof *child_proc, M_PROC);
 1108                 child_proc = NULL;
 1109                 goto bad;
 1110         }
 1111 
 1112 
 1113         /*
 1114          * Find an unused PID.  
 1115          */
 1116 
 1117         proc_list_lock();
 1118 
 1119         nextpid++;
 1120 retry:
 1121         /*
 1122          * If the process ID prototype has wrapped around,
 1123          * restart somewhat above 0, as the low-numbered procs
 1124          * tend to include daemons that don't exit.
 1125          */
 1126         if (nextpid >= PID_MAX) {
 1127                 nextpid = 100;
 1128                 pidwrap = 1;
 1129         }
 1130         if (pidwrap != 0) {
 1131 
 1132                 /* if the pid stays in hash both for zombie and runniing state */
 1133                 if  (pfind_locked(nextpid) != PROC_NULL) {
 1134                         nextpid++;
 1135                         goto retry;
 1136                 }
 1137 
 1138                 if (pgfind_internal(nextpid) != PGRP_NULL) {
 1139                         nextpid++;
 1140                         goto retry;
 1141                 }       
 1142                 if (session_find_internal(nextpid) != SESSION_NULL) {
 1143                         nextpid++;
 1144                         goto retry;
 1145                 }       
 1146         }
 1147         nprocs++;
 1148         child_proc->p_pid = nextpid;
 1149         child_proc->p_idversion = nextpidversion++;
 1150 #if 1
 1151         if (child_proc->p_pid != 0) {
 1152                 if (pfind_locked(child_proc->p_pid) != PROC_NULL)
 1153                         panic("proc in the list already\n");
 1154         }
 1155 #endif
 1156         /* Insert in the hash */
 1157         child_proc->p_listflag |= (P_LIST_INHASH | P_LIST_INCREATE);
 1158         LIST_INSERT_HEAD(PIDHASH(child_proc->p_pid), child_proc, p_hash);
 1159         proc_list_unlock();
 1160 
 1161 
 1162         /*
 1163          * We've identified the PID we are going to use; initialize the new
 1164          * process structure.
 1165          */
 1166         child_proc->p_stat = SIDL;
 1167         child_proc->p_pgrpid = PGRPID_DEAD;
 1168 
 1169         /*
 1170          * The zero'ing of the proc was at the allocation time due to need
 1171          * for insertion to hash.  Copy the section that is to be copied
 1172          * directly from the parent.
 1173          */
 1174         bcopy(&parent_proc->p_startcopy, &child_proc->p_startcopy,
 1175             (unsigned) ((caddr_t)&child_proc->p_endcopy - (caddr_t)&child_proc->p_startcopy));
 1176 
 1177         /*
 1178          * Some flags are inherited from the parent.
 1179          * Duplicate sub-structures as needed.
 1180          * Increase reference counts on shared objects.
 1181          * The p_stats and p_sigacts substructs are set in vm_fork.
 1182          */
 1183         child_proc->p_flag = (parent_proc->p_flag & (P_LP64 | P_TRANSLATED | P_AFFINITY));
 1184         if (parent_proc->p_flag & P_PROFIL)
 1185                 startprofclock(child_proc);
 1186         /*
 1187          * Note that if the current thread has an assumed identity, this
 1188          * credential will be granted to the new process.
 1189          */
 1190         child_proc->p_ucred = kauth_cred_get_with_ref();
 1191 
 1192 #ifdef CONFIG_EMBEDDED
 1193         lck_mtx_init(&child_proc->p_mlock, proc_lck_grp, proc_lck_attr);
 1194         lck_mtx_init(&child_proc->p_fdmlock, proc_lck_grp, proc_lck_attr);
 1195 #if CONFIG_DTRACE
 1196         lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
 1197 #endif
 1198         lck_spin_init(&child_proc->p_slock, proc_lck_grp, proc_lck_attr);
 1199 #else /* !CONFIG_EMBEDDED */
 1200         lck_mtx_init(&child_proc->p_mlock, proc_mlock_grp, proc_lck_attr);
 1201         lck_mtx_init(&child_proc->p_fdmlock, proc_fdmlock_grp, proc_lck_attr);
 1202 #if CONFIG_DTRACE
 1203         lck_mtx_init(&child_proc->p_dtrace_sprlock, proc_lck_grp, proc_lck_attr);
 1204 #endif
 1205         lck_spin_init(&child_proc->p_slock, proc_slock_grp, proc_lck_attr);
 1206 #endif /* !CONFIG_EMBEDDED */
 1207         klist_init(&child_proc->p_klist);
 1208 
 1209         if (child_proc->p_textvp != NULLVP) {
 1210                 /* bump references to the text vnode */
 1211                 /* Need to hold iocount across the ref call */
 1212                 if (vnode_getwithref(child_proc->p_textvp) == 0) {
 1213                         error = vnode_ref(child_proc->p_textvp);
 1214                         vnode_put(child_proc->p_textvp);
 1215                         if (error != 0)
 1216                                 child_proc->p_textvp = NULLVP;
 1217                 }
 1218         }
 1219 
 1220         /*
 1221          * Copy the parents per process open file table to the child; if
 1222          * there is a per-thread current working directory, set the childs
 1223          * per-process current working directory to that instead of the
 1224          * parents.
 1225          *
 1226          * XXX may fail to copy descriptors to child
 1227          */
 1228         child_proc->p_fd = fdcopy(parent_proc, parent_uthread->uu_cdir);
 1229 
 1230 #if SYSV_SHM
 1231         if (parent_proc->vm_shm) {
 1232                 /* XXX may fail to attach shm to child */
 1233                 (void)shmfork(parent_proc, child_proc);
 1234         }
 1235 #endif
 1236         /*
 1237          * inherit the limit structure to child
 1238          */
 1239         proc_limitfork(parent_proc, child_proc);
 1240 
 1241         if (child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur != RLIM_INFINITY) {
 1242                 uint64_t rlim_cur = child_proc->p_limit->pl_rlimit[RLIMIT_CPU].rlim_cur;
 1243                 child_proc->p_rlim_cpu.tv_sec = (rlim_cur > __INT_MAX__) ? __INT_MAX__ : rlim_cur;
 1244         }
 1245 
 1246         /* Intialize new process stats, including start time */
 1247         /* <rdar://6640543> non-zeroed portion contains garbage AFAICT */
 1248         bzero(&child_proc->p_stats->pstat_startzero,
 1249             (unsigned) ((caddr_t)&child_proc->p_stats->pstat_endzero -
 1250             (caddr_t)&child_proc->p_stats->pstat_startzero));
 1251         bzero(&child_proc->p_stats->user_p_prof, sizeof(struct user_uprof));
 1252         microtime(&child_proc->p_start);
 1253         child_proc->p_stats->p_start = child_proc->p_start;     /* for compat */
 1254 
 1255         if (parent_proc->p_sigacts != NULL)
 1256                 (void)memcpy(child_proc->p_sigacts,
 1257                                 parent_proc->p_sigacts, sizeof *child_proc->p_sigacts);
 1258         else
 1259                 (void)memset(child_proc->p_sigacts, 0, sizeof *child_proc->p_sigacts);
 1260 
 1261         sessp = proc_session(parent_proc);
 1262         if (sessp->s_ttyvp != NULL && parent_proc->p_flag & P_CONTROLT)
 1263                 OSBitOrAtomic(P_CONTROLT, &child_proc->p_flag);
 1264         session_rele(sessp);
 1265 
 1266         /*
 1267          * block all signals to reach the process.
 1268          * no transition race should be occuring with the child yet,
 1269          * but indicate that the process is in (the creation) transition.
 1270          */
 1271         proc_signalstart(child_proc, 0);
 1272         proc_transstart(child_proc, 0);
 1273 
 1274         child_proc->p_pcaction = (parent_proc->p_pcaction) & P_PCMAX;
 1275         TAILQ_INIT(&child_proc->p_uthlist);
 1276         TAILQ_INIT(&child_proc->p_aio_activeq);
 1277         TAILQ_INIT(&child_proc->p_aio_doneq);
 1278 
 1279         /* Inherit the parent flags for code sign */
 1280         child_proc->p_csflags = parent_proc->p_csflags;
 1281 
 1282         /*
 1283          * All processes have work queue locks; cleaned up by
 1284          * reap_child_locked()
 1285          */
 1286         workqueue_init_lock(child_proc);
 1287 
 1288         /*
 1289          * Copy work queue information
 1290          *
 1291          * Note: This should probably only happen in the case where we are
 1292          *      creating a child that is a copy of the parent; since this
 1293          *      routine is called in the non-duplication case of vfork()
 1294          *      or posix_spawn(), then this information should likely not
 1295          *      be duplicated.
 1296          *
 1297          * <rdar://6640553> Work queue pointers that no longer point to code
 1298          */
 1299         child_proc->p_wqthread = parent_proc->p_wqthread;
 1300         child_proc->p_threadstart = parent_proc->p_threadstart;
 1301         child_proc->p_pthsize = parent_proc->p_pthsize;
 1302         child_proc->p_targconc = parent_proc->p_targconc;
 1303         if ((parent_proc->p_lflag & P_LREGISTER) != 0) {
 1304                 child_proc->p_lflag |= P_LREGISTER;
 1305         }
 1306         child_proc->p_dispatchqueue_offset = parent_proc->p_dispatchqueue_offset;
 1307 #if PSYNCH
 1308         pth_proc_hashinit(child_proc);
 1309 #endif /* PSYNCH */
 1310 
 1311 #if CONFIG_LCTX
 1312         child_proc->p_lctx = NULL;
 1313         /* Add new process to login context (if any). */
 1314         if (parent_proc->p_lctx != NULL) {
 1315                 /*
 1316                  * <rdar://6640564> This should probably be delayed in the
 1317                  * vfork() or posix_spawn() cases.
 1318                  */
 1319                 LCTX_LOCK(parent_proc->p_lctx);
 1320                 enterlctx(child_proc, parent_proc->p_lctx, 0);
 1321         }
 1322 #endif
 1323 
 1324 bad:
 1325         return(child_proc);
 1326 }
 1327 
 1328 void
 1329 proc_lock(proc_t p)
 1330 {
 1331         lck_mtx_lock(&p->p_mlock);
 1332 }
 1333 
 1334 void
 1335 proc_unlock(proc_t p)
 1336 {
 1337         lck_mtx_unlock(&p->p_mlock);
 1338 }
 1339 
 1340 void
 1341 proc_spinlock(proc_t p)
 1342 {
 1343         lck_spin_lock(&p->p_slock);
 1344 }
 1345 
 1346 void
 1347 proc_spinunlock(proc_t p)
 1348 {
 1349         lck_spin_unlock(&p->p_slock);
 1350 }
 1351 
 1352 void 
 1353 proc_list_lock(void)
 1354 {
 1355         lck_mtx_lock(proc_list_mlock);
 1356 }
 1357 
 1358 void 
 1359 proc_list_unlock(void)
 1360 {
 1361         lck_mtx_unlock(proc_list_mlock);
 1362 }
 1363 
 1364 #include <kern/zalloc.h>
 1365 
 1366 struct zone     *uthread_zone;
 1367 static int uthread_zone_inited = 0;
 1368 
 1369 static void
 1370 uthread_zone_init(void)
 1371 {
 1372         if (!uthread_zone_inited) {
 1373                 uthread_zone = zinit(sizeof(struct uthread),
 1374                                         thread_max * sizeof(struct uthread),
 1375                                         THREAD_CHUNK * sizeof(struct uthread),
 1376                                         "uthreads");
 1377                 uthread_zone_inited = 1;
 1378         }
 1379 }
 1380 
 1381 void *
 1382 uthread_alloc(task_t task, thread_t thread, int noinherit)
 1383 {
 1384         proc_t p;
 1385         uthread_t uth;
 1386         uthread_t uth_parent;
 1387         void *ut;
 1388 
 1389         if (!uthread_zone_inited)
 1390                 uthread_zone_init();
 1391 
 1392         ut = (void *)zalloc(uthread_zone);
 1393         bzero(ut, sizeof(struct uthread));
 1394 
 1395         p = (proc_t) get_bsdtask_info(task);
 1396         uth = (uthread_t)ut;
 1397 
 1398         /*
 1399          * Thread inherits credential from the creating thread, if both
 1400          * are in the same task.
 1401          *
 1402          * If the creating thread has no credential or is from another
 1403          * task we can leave the new thread credential NULL.  If it needs
 1404          * one later, it will be lazily assigned from the task's process.
 1405          */
 1406         uth_parent = (uthread_t)get_bsdthread_info(current_thread());
 1407         if ((noinherit == 0) && task == current_task() && 
 1408             uth_parent != NULL &&
 1409             IS_VALID_CRED(uth_parent->uu_ucred)) {
 1410                 /*
 1411                  * XXX The new thread is, in theory, being created in context
 1412                  * XXX of parent thread, so a direct reference to the parent
 1413                  * XXX is OK.
 1414                  */
 1415                 kauth_cred_ref(uth_parent->uu_ucred);
 1416                 uth->uu_ucred = uth_parent->uu_ucred;
 1417                 /* the credential we just inherited is an assumed credential */
 1418                 if (uth_parent->uu_flag & UT_SETUID)
 1419                         uth->uu_flag |= UT_SETUID;
 1420         } else {
 1421                 /* sometimes workqueue threads are created out task context */
 1422                 if ((task != kernel_task) && (p != PROC_NULL))
 1423                         uth->uu_ucred = kauth_cred_proc_ref(p);
 1424                 else
 1425                         uth->uu_ucred = NOCRED;
 1426         }
 1427 
 1428         
 1429         if ((task != kernel_task) && p) {
 1430                 
 1431                 proc_lock(p);
 1432                 if (noinherit != 0) {
 1433                         /* workq threads will not inherit masks */
 1434                         uth->uu_sigmask = ~workq_threadmask;
 1435                 } else if (uth_parent) {
 1436                         if (uth_parent->uu_flag & UT_SAS_OLDMASK)
 1437                                 uth->uu_sigmask = uth_parent->uu_oldmask;
 1438                         else
 1439                                 uth->uu_sigmask = uth_parent->uu_sigmask;
 1440                 }
 1441                 uth->uu_context.vc_thread = thread;
 1442                 TAILQ_INSERT_TAIL(&p->p_uthlist, uth, uu_list);
 1443                 proc_unlock(p);
 1444 
 1445 #if CONFIG_DTRACE
 1446                 if (p->p_dtrace_ptss_pages != NULL) {
 1447                         uth->t_dtrace_scratch = dtrace_ptss_claim_entry(p);
 1448                 }
 1449 #endif
 1450         }
 1451 
 1452         return (ut);
 1453 }
 1454 
 1455 
 1456 /* 
 1457  * This routine frees all the BSD context in uthread except the credential.
 1458  * It does not free the uthread structure as well
 1459  */
 1460 void
 1461 uthread_cleanup(task_t task, void *uthread, void * bsd_info)
 1462 {
 1463         struct _select *sel;
 1464         uthread_t uth = (uthread_t)uthread;
 1465         proc_t p = (proc_t)bsd_info;
 1466 
 1467 
 1468         if (uth->uu_lowpri_window || uth->uu_throttle_info) {
 1469                 /*
 1470                  * task is marked as a low priority I/O type
 1471                  * and we've somehow managed to not dismiss the throttle
 1472                  * through the normal exit paths back to user space...
 1473                  * no need to throttle this thread since its going away
 1474                  * but we do need to update our bookeeping w/r to throttled threads
 1475                  *
 1476                  * Calling this routine will clean up any throttle info reference
 1477                  * still inuse by the thread.
 1478                  */
 1479                 throttle_lowpri_io(FALSE);
 1480         }
 1481         /*
 1482          * Per-thread audit state should never last beyond system
 1483          * call return.  Since we don't audit the thread creation/
 1484          * removal, the thread state pointer should never be
 1485          * non-NULL when we get here.
 1486          */
 1487         assert(uth->uu_ar == NULL);
 1488 
 1489         sel = &uth->uu_select;
 1490         /* cleanup the select bit space */
 1491         if (sel->nbytes) {
 1492                 FREE(sel->ibits, M_TEMP);
 1493                 FREE(sel->obits, M_TEMP);
 1494                 sel->nbytes = 0;
 1495         }
 1496 
 1497         if (uth->uu_cdir) {
 1498                 vnode_rele(uth->uu_cdir);
 1499                 uth->uu_cdir = NULLVP;
 1500         }
 1501 
 1502         if (uth->uu_allocsize && uth->uu_wqset){
 1503                 kfree(uth->uu_wqset, uth->uu_allocsize);
 1504                 sel->count = 0;
 1505                 uth->uu_allocsize = 0;
 1506                 uth->uu_wqset = 0;
 1507                 sel->wql = 0;
 1508         }
 1509 
 1510         if(uth->pth_name != NULL)
 1511         {
 1512                 kfree(uth->pth_name, MAXTHREADNAMESIZE);
 1513                 uth->pth_name = 0;
 1514         }
 1515         if ((task != kernel_task) && p) {
 1516 
 1517                 if (((uth->uu_flag & UT_VFORK) == UT_VFORK) && (uth->uu_proc != PROC_NULL))  {
 1518                         vfork_exit_internal(uth->uu_proc, 0, 1);
 1519                 }
 1520                 /*
 1521                  * Remove the thread from the process list and
 1522                  * transfer [appropriate] pending signals to the process.
 1523                  */
 1524                 if (get_bsdtask_info(task) == p) { 
 1525                         proc_lock(p);
 1526                         TAILQ_REMOVE(&p->p_uthlist, uth, uu_list);
 1527                         p->p_siglist |= (uth->uu_siglist & execmask & (~p->p_sigignore | sigcantmask));
 1528                         proc_unlock(p);
 1529                 }
 1530 #if CONFIG_DTRACE
 1531                 struct dtrace_ptss_page_entry *tmpptr = uth->t_dtrace_scratch;
 1532                 uth->t_dtrace_scratch = NULL;
 1533                 if (tmpptr != NULL) {
 1534                         dtrace_ptss_release_entry(p, tmpptr);
 1535                 }
 1536 #endif
 1537         }
 1538 }
 1539 
 1540 /* This routine releases the credential stored in uthread */
 1541 void
 1542 uthread_cred_free(void *uthread)
 1543 {
 1544         uthread_t uth = (uthread_t)uthread;
 1545 
 1546         /* and free the uthread itself */
 1547         if (IS_VALID_CRED(uth->uu_ucred)) {
 1548                 kauth_cred_t oldcred = uth->uu_ucred;
 1549                 uth->uu_ucred = NOCRED;
 1550                 kauth_cred_unref(&oldcred);
 1551         }
 1552 }
 1553 
 1554 /* This routine frees the uthread structure held in thread structure */
 1555 void
 1556 uthread_zone_free(void *uthread)
 1557 {
 1558         /* and free the uthread itself */
 1559         zfree(uthread_zone, uthread);
 1560 }

Cache object: 9036562c3fb11ea034cf228c97be1f77


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.