kern_fork.c

Version: - FREEBSD - FREEBSD-13-STABLE - FREEBSD-13-0 - FREEBSD-12-STABLE - FREEBSD-12-0 - FREEBSD-11-STABLE - FREEBSD-11-0 - FREEBSD-10-STABLE - FREEBSD-10-0 - FREEBSD-9-STABLE - FREEBSD-9-0 - FREEBSD-8-STABLE - FREEBSD-8-0 - FREEBSD-7-STABLE - FREEBSD-7-0 - FREEBSD-6-STABLE - FREEBSD-6-0 - FREEBSD-5-STABLE - FREEBSD-5-0 - FREEBSD-4-STABLE - FREEBSD-3-STABLE - FREEBSD22 - l41 - OPENBSD - linux-2.6 - MK84 - PLAN9 - xnu-8792
SearchContext: - none - 3 - 10
    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. Neither the name of the University nor the names of its contributors
   19  *    may be used to endorse or promote products derived from this software
   20  *    without specific prior written permission.
   21  *
   22  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   25  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   32  * SUCH DAMAGE.
   33  *
   34  *      @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
   35  * $FreeBSD: src/sys/kern/kern_fork.c,v 1.72.2.14 2003/06/26 04:15:10 silby Exp $
   36  */
   37 
   38 #include "opt_ktrace.h"
   39 
   40 #include <sys/param.h>
   41 #include <sys/systm.h>
   42 #include <sys/sysproto.h>
   43 #include <sys/filedesc.h>
   44 #include <sys/kernel.h>
   45 #include <sys/sysctl.h>
   46 #include <sys/malloc.h>
   47 #include <sys/proc.h>
   48 #include <sys/resourcevar.h>
   49 #include <sys/vnode.h>
   50 #include <sys/acct.h>
   51 #include <sys/ktrace.h>
   52 #include <sys/unistd.h>
   53 #include <sys/jail.h>
   54 
   55 #include <vm/vm.h>
   56 #include <sys/lock.h>
   57 #include <vm/pmap.h>
   58 #include <vm/vm_map.h>
   59 #include <vm/vm_extern.h>
   60 
   61 #include <sys/vmmeter.h>
   62 #include <sys/refcount.h>
   63 #include <sys/thread2.h>
   64 #include <sys/signal2.h>
   65 #include <sys/spinlock2.h>
   66 
   67 #include <sys/dsched.h>
   68 
   69 static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback");
   70 
   71 /*
   72  * These are the stuctures used to create a callout list for things to do
   73  * when forking a process
   74  */
   75 struct forklist {
   76         forklist_fn function;
   77         TAILQ_ENTRY(forklist) next;
   78 };
   79 
   80 TAILQ_HEAD(forklist_head, forklist);
   81 static struct forklist_head fork_list = TAILQ_HEAD_INITIALIZER(fork_list);
   82 
   83 static struct lwp *lwp_fork(struct lwp *, struct proc *, int flags);
   84 
   85 int forksleep; /* Place for fork1() to sleep on. */
   86 
   87 /*
   88  * Red-Black tree support for LWPs
   89  */
   90 
   91 static int
   92 rb_lwp_compare(struct lwp *lp1, struct lwp *lp2)
   93 {
   94         if (lp1->lwp_tid < lp2->lwp_tid)
   95                 return(-1);
   96         if (lp1->lwp_tid > lp2->lwp_tid)
   97                 return(1);
   98         return(0);
   99 }
  100 
  101 RB_GENERATE2(lwp_rb_tree, lwp, u.lwp_rbnode, rb_lwp_compare, lwpid_t, lwp_tid);
  102 
  103 /*
  104  * fork() system call
  105  */
  106 int
  107 sys_fork(struct fork_args *uap)
  108 {
  109         struct lwp *lp = curthread->td_lwp;
  110         struct proc *p2;
  111         int error;
  112 
  113         error = fork1(lp, RFFDG | RFPROC | RFPGLOCK, &p2);
  114         if (error == 0) {
  115                 PHOLD(p2);
  116                 start_forked_proc(lp, p2);
  117                 uap->sysmsg_fds[0] = p2->p_pid;
  118                 uap->sysmsg_fds[1] = 0;
  119                 PRELE(p2);
  120         }
  121         return error;
  122 }
  123 
  124 /*
  125  * vfork() system call
  126  */
  127 int
  128 sys_vfork(struct vfork_args *uap)
  129 {
  130         struct lwp *lp = curthread->td_lwp;
  131         struct proc *p2;
  132         int error;
  133 
  134         error = fork1(lp, RFFDG | RFPROC | RFPPWAIT | RFMEM | RFPGLOCK, &p2);
  135         if (error == 0) {
  136                 PHOLD(p2);
  137                 start_forked_proc(lp, p2);
  138                 uap->sysmsg_fds[0] = p2->p_pid;
  139                 uap->sysmsg_fds[1] = 0;
  140                 PRELE(p2);
  141         }
  142         return error;
  143 }
  144 
  145 /*
  146  * Handle rforks.  An rfork may (1) operate on the current process without
  147  * creating a new, (2) create a new process that shared the current process's
  148  * vmspace, signals, and/or descriptors, or (3) create a new process that does
  149  * not share these things (normal fork).
  150  *
  151  * Note that we only call start_forked_proc() if a new process is actually
  152  * created.
  153  *
  154  * rfork { int flags }
  155  */
  156 int
  157 sys_rfork(struct rfork_args *uap)
  158 {
  159         struct lwp *lp = curthread->td_lwp;
  160         struct proc *p2;
  161         int error;
  162 
  163         if ((uap->flags & RFKERNELONLY) != 0)
  164                 return (EINVAL);
  165 
  166         error = fork1(lp, uap->flags | RFPGLOCK, &p2);
  167         if (error == 0) {
  168                 if (p2) {
  169                         PHOLD(p2);
  170                         start_forked_proc(lp, p2);
  171                         uap->sysmsg_fds[0] = p2->p_pid;
  172                         uap->sysmsg_fds[1] = 0;
  173                         PRELE(p2);
  174                 } else {
  175                         uap->sysmsg_fds[0] = 0;
  176                         uap->sysmsg_fds[1] = 0;
  177                 }
  178         }
  179         return error;
  180 }
  181 
  182 /*
  183  * Low level thread create used by pthreads.
  184  */
  185 int
  186 sys_lwp_create(struct lwp_create_args *uap)
  187 {
  188         struct proc *p = curproc;
  189         struct lwp *lp;
  190         struct lwp_params params;
  191         int error;
  192 
  193         error = copyin(uap->params, &params, sizeof(params));
  194         if (error)
  195                 goto fail2;
  196 
  197         lwkt_gettoken(&p->p_token);
  198         plimit_lwp_fork(p);     /* force exclusive access */
  199         lp = lwp_fork(curthread->td_lwp, p, RFPROC);
  200         error = cpu_prepare_lwp(lp, &params);
  201         if (error)
  202                 goto fail;
  203         if (params.tid1 != NULL &&
  204             (error = copyout(&lp->lwp_tid, params.tid1, sizeof(lp->lwp_tid))))
  205                 goto fail;
  206         if (params.tid2 != NULL &&
  207             (error = copyout(&lp->lwp_tid, params.tid2, sizeof(lp->lwp_tid))))
  208                 goto fail;
  209 
  210         /*
  211          * Now schedule the new lwp. 
  212          */
  213         p->p_usched->resetpriority(lp);
  214         crit_enter();
  215         lp->lwp_stat = LSRUN;
  216         p->p_usched->setrunqueue(lp);
  217         crit_exit();
  218         lwkt_reltoken(&p->p_token);
  219 
  220         return (0);
  221 
  222 fail:
  223         lwp_rb_tree_RB_REMOVE(&p->p_lwp_tree, lp);
  224         --p->p_nthreads;
  225         /* lwp_dispose expects an exited lwp, and a held proc */
  226         atomic_set_int(&lp->lwp_mpflags, LWP_MP_WEXIT);
  227         lp->lwp_thread->td_flags |= TDF_EXITING;
  228         lwkt_remove_tdallq(lp->lwp_thread);
  229         PHOLD(p);
  230         biosched_done(lp->lwp_thread);
  231         dsched_exit_thread(lp->lwp_thread);
  232         lwp_dispose(lp);
  233         lwkt_reltoken(&p->p_token);
  234 fail2:
  235         return (error);
  236 }
  237 
  238 int     nprocs = 1;             /* process 0 */
  239 
  240 int
  241 fork1(struct lwp *lp1, int flags, struct proc **procp)
  242 {
  243         struct proc *p1 = lp1->lwp_proc;
  244         struct proc *p2;
  245         struct proc *pptr;
  246         struct pgrp *p1grp;
  247         struct pgrp *plkgrp;
  248         uid_t uid;
  249         int ok, error;
  250         static int curfail = 0;
  251         static struct timeval lastfail;
  252         struct forklist *ep;
  253         struct filedesc_to_leader *fdtol;
  254 
  255         if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
  256                 return (EINVAL);
  257 
  258         lwkt_gettoken(&p1->p_token);
  259         plkgrp = NULL;
  260         p2 = NULL;
  261 
  262         /*
  263          * Here we don't create a new process, but we divorce
  264          * certain parts of a process from itself.
  265          */
  266         if ((flags & RFPROC) == 0) {
  267                 /*
  268                  * This kind of stunt does not work anymore if
  269                  * there are native threads (lwps) running
  270                  */
  271                 if (p1->p_nthreads != 1) {
  272                         error = EINVAL;
  273                         goto done;
  274                 }
  275 
  276                 vm_fork(p1, 0, flags);
  277 
  278                 /*
  279                  * Close all file descriptors.
  280                  */
  281                 if (flags & RFCFDG) {
  282                         struct filedesc *fdtmp;
  283                         fdtmp = fdinit(p1);
  284                         fdfree(p1, fdtmp);
  285                 }
  286 
  287                 /*
  288                  * Unshare file descriptors (from parent.)
  289                  */
  290                 if (flags & RFFDG) {
  291                         if (p1->p_fd->fd_refcnt > 1) {
  292                                 struct filedesc *newfd;
  293                                 error = fdcopy(p1, &newfd);
  294                                 if (error != 0) {
  295                                         error = ENOMEM;
  296                                         goto done;
  297                                 }
  298                                 fdfree(p1, newfd);
  299                         }
  300                 }
  301                 *procp = NULL;
  302                 error = 0;
  303                 goto done;
  304         }
  305 
  306         /*
  307          * Interlock against process group signal delivery.  If signals
  308          * are pending after the interlock is obtained we have to restart
  309          * the system call to process the signals.  If we don't the child
  310          * can miss a pgsignal (such as ^C) sent during the fork.
  311          *
  312          * We can't use CURSIG() here because it will process any STOPs
  313          * and cause the process group lock to be held indefinitely.  If
  314          * a STOP occurs, the fork will be restarted after the CONT.
  315          */
  316         p1grp = p1->p_pgrp;
  317         if ((flags & RFPGLOCK) && (plkgrp = p1->p_pgrp) != NULL) {
  318                 pgref(plkgrp);
  319                 lockmgr(&plkgrp->pg_lock, LK_SHARED);
  320                 if (CURSIG_NOBLOCK(lp1)) {
  321                         error = ERESTART;
  322                         goto done;
  323                 }
  324         }
  325 
  326         /*
  327          * Although process entries are dynamically created, we still keep
  328          * a global limit on the maximum number we will create.  Don't allow
  329          * a nonprivileged user to use the last ten processes; don't let root
  330          * exceed the limit. The variable nprocs is the current number of
  331          * processes, maxproc is the limit.
  332          */
  333         uid = lp1->lwp_thread->td_ucred->cr_ruid;
  334         if ((nprocs >= maxproc - 10 && uid != 0) || nprocs >= maxproc) {
  335                 if (ppsratecheck(&lastfail, &curfail, 1))
  336                         kprintf("maxproc limit exceeded by uid %d, please "
  337                                "see tuning(7) and login.conf(5).\n", uid);
  338                 tsleep(&forksleep, 0, "fork", hz / 2);
  339                 error = EAGAIN;
  340                 goto done;
  341         }
  342 
  343         /*
  344          * Increment the nprocs resource before blocking can occur.  There
  345          * are hard-limits as to the number of processes that can run.
  346          */
  347         atomic_add_int(&nprocs, 1);
  348 
  349         /*
  350          * Increment the count of procs running with this uid. Don't allow
  351          * a nonprivileged user to exceed their current limit.
  352          */
  353         ok = chgproccnt(lp1->lwp_thread->td_ucred->cr_ruidinfo, 1,
  354                 (uid != 0) ? p1->p_rlimit[RLIMIT_NPROC].rlim_cur : 0);
  355         if (!ok) {
  356                 /*
  357                  * Back out the process count
  358                  */
  359                 atomic_add_int(&nprocs, -1);
  360                 if (ppsratecheck(&lastfail, &curfail, 1))
  361                         kprintf("maxproc limit exceeded by uid %d, please "
  362                                "see tuning(7) and login.conf(5).\n", uid);
  363                 tsleep(&forksleep, 0, "fork", hz / 2);
  364                 error = EAGAIN;
  365                 goto done;
  366         }
  367 
  368         /*
  369          * Allocate a new process, don't get fancy: zero the structure.
  370          */
  371         p2 = kmalloc(sizeof(struct proc), M_PROC, M_WAITOK|M_ZERO);
  372 
  373         /*
  374          * Core initialization.  SIDL is a safety state that protects the
  375          * partially initialized process once it starts getting hooked
  376          * into system structures and becomes addressable.
  377          *
  378          * We must be sure to acquire p2->p_token as well, we must hold it
  379          * once the process is on the allproc list to avoid things such
  380          * as competing modifications to p_flags.
  381          */
  382         p2->p_lasttid = -1;     /* first tid will be 0 */
  383         p2->p_stat = SIDL;
  384 
  385         RB_INIT(&p2->p_lwp_tree);
  386         spin_init(&p2->p_spin);
  387         lwkt_token_init(&p2->p_token, "proc");
  388         lwkt_gettoken(&p2->p_token);
  389 
  390         /*
  391          * Setup linkage for kernel based threading XXX lwp.  Also add the
  392          * process to the allproclist.
  393          *
  394          * The process structure is addressable after this point.
  395          */
  396         if (flags & RFTHREAD) {
  397                 p2->p_peers = p1->p_peers;
  398                 p1->p_peers = p2;
  399                 p2->p_leader = p1->p_leader;
  400         } else {
  401                 p2->p_leader = p2;
  402         }
  403         proc_add_allproc(p2);
  404 
  405         /*
  406          * Initialize the section which is copied verbatim from the parent.
  407          */
  408         bcopy(&p1->p_startcopy, &p2->p_startcopy,
  409               ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
  410 
  411         /*
  412          * Duplicate sub-structures as needed.  Increase reference counts
  413          * on shared objects.
  414          *
  415          * NOTE: because we are now on the allproc list it is possible for
  416          *       other consumers to gain temporary references to p2
  417          *       (p2->p_lock can change).
  418          */
  419         if (p1->p_flags & P_PROFIL)
  420                 startprofclock(p2);
  421         p2->p_ucred = crhold(lp1->lwp_thread->td_ucred);
  422 
  423         if (jailed(p2->p_ucred))
  424                 p2->p_flags |= P_JAILED;
  425 
  426         if (p2->p_args)
  427                 refcount_acquire(&p2->p_args->ar_ref);
  428 
  429         p2->p_usched = p1->p_usched;
  430         /* XXX: verify copy of the secondary iosched stuff */
  431         dsched_new_proc(p2);
  432 
  433         if (flags & RFSIGSHARE) {
  434                 p2->p_sigacts = p1->p_sigacts;
  435                 refcount_acquire(&p2->p_sigacts->ps_refcnt);
  436         } else {
  437                 p2->p_sigacts = kmalloc(sizeof(*p2->p_sigacts),
  438                                         M_SUBPROC, M_WAITOK);
  439                 bcopy(p1->p_sigacts, p2->p_sigacts, sizeof(*p2->p_sigacts));
  440                 refcount_init(&p2->p_sigacts->ps_refcnt, 1);
  441         }
  442         if (flags & RFLINUXTHPN) 
  443                 p2->p_sigparent = SIGUSR1;
  444         else
  445                 p2->p_sigparent = SIGCHLD;
  446 
  447         /* bump references to the text vnode (for procfs) */
  448         p2->p_textvp = p1->p_textvp;
  449         if (p2->p_textvp)
  450                 vref(p2->p_textvp);
  451 
  452         /* copy namecache handle to the text file */
  453         if (p1->p_textnch.mount)
  454                 cache_copy(&p1->p_textnch, &p2->p_textnch);
  455 
  456         /*
  457          * Handle file descriptors
  458          */
  459         if (flags & RFCFDG) {
  460                 p2->p_fd = fdinit(p1);
  461                 fdtol = NULL;
  462         } else if (flags & RFFDG) {
  463                 error = fdcopy(p1, &p2->p_fd);
  464                 if (error != 0) {
  465                         error = ENOMEM;
  466                         goto done;
  467                 }
  468                 fdtol = NULL;
  469         } else {
  470                 p2->p_fd = fdshare(p1);
  471                 if (p1->p_fdtol == NULL) {
  472                         p1->p_fdtol = filedesc_to_leader_alloc(NULL,
  473                                                                p1->p_leader);
  474                 }
  475                 if ((flags & RFTHREAD) != 0) {
  476                         /*
  477                          * Shared file descriptor table and
  478                          * shared process leaders.
  479                          */
  480                         fdtol = p1->p_fdtol;
  481                         fdtol->fdl_refcount++;
  482                 } else {
  483                         /* 
  484                          * Shared file descriptor table, and
  485                          * different process leaders 
  486                          */
  487                         fdtol = filedesc_to_leader_alloc(p1->p_fdtol, p2);
  488                 }
  489         }
  490         p2->p_fdtol = fdtol;
  491         p2->p_limit = plimit_fork(p1);
  492 
  493         /*
  494          * Preserve some more flags in subprocess.  P_PROFIL has already
  495          * been preserved.
  496          */
  497         p2->p_flags |= p1->p_flags & P_SUGID;
  498         if (p1->p_session->s_ttyvp != NULL && (p1->p_flags & P_CONTROLT))
  499                 p2->p_flags |= P_CONTROLT;
  500         if (flags & RFPPWAIT)
  501                 p2->p_flags |= P_PPWAIT;
  502 
  503         /*
  504          * Inherit the virtual kernel structure (allows a virtual kernel
  505          * to fork to simulate multiple cpus).
  506          */
  507         if (p1->p_vkernel)
  508                 vkernel_inherit(p1, p2);
  509 
  510         /*
  511          * Once we are on a pglist we may receive signals.  XXX we might
  512          * race a ^C being sent to the process group by not receiving it
  513          * at all prior to this line.
  514          */
  515         pgref(p1grp);
  516         lwkt_gettoken(&p1grp->pg_token);
  517         LIST_INSERT_AFTER(p1, p2, p_pglist);
  518         lwkt_reltoken(&p1grp->pg_token);
  519 
  520         /*
  521          * Attach the new process to its parent.
  522          *
  523          * If RFNOWAIT is set, the newly created process becomes a child
  524          * of init.  This effectively disassociates the child from the
  525          * parent.
  526          */
  527         if (flags & RFNOWAIT)
  528                 pptr = initproc;
  529         else
  530                 pptr = p1;
  531         p2->p_pptr = pptr;
  532         LIST_INIT(&p2->p_children);
  533 
  534         lwkt_gettoken(&pptr->p_token);
  535         LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
  536         lwkt_reltoken(&pptr->p_token);
  537 
  538         varsymset_init(&p2->p_varsymset, &p1->p_varsymset);
  539         callout_init_mp(&p2->p_ithandle);
  540 
  541 #ifdef KTRACE
  542         /*
  543          * Copy traceflag and tracefile if enabled.  If not inherited,
  544          * these were zeroed above but we still could have a trace race
  545          * so make sure p2's p_tracenode is NULL.
  546          */
  547         if ((p1->p_traceflag & KTRFAC_INHERIT) && p2->p_tracenode == NULL) {
  548                 p2->p_traceflag = p1->p_traceflag;
  549                 p2->p_tracenode = ktrinherit(p1->p_tracenode);
  550         }
  551 #endif
  552 
  553         /*
  554          * This begins the section where we must prevent the parent
  555          * from being swapped.
  556          *
  557          * Gets PRELE'd in the caller in start_forked_proc().
  558          */
  559         PHOLD(p1);
  560 
  561         vm_fork(p1, p2, flags);
  562 
  563         /*
  564          * Create the first lwp associated with the new proc.
  565          * It will return via a different execution path later, directly
  566          * into userland, after it was put on the runq by
  567          * start_forked_proc().
  568          */
  569         lwp_fork(lp1, p2, flags);
  570 
  571         if (flags == (RFFDG | RFPROC | RFPGLOCK)) {
  572                 mycpu->gd_cnt.v_forks++;
  573                 mycpu->gd_cnt.v_forkpages += p2->p_vmspace->vm_dsize +
  574                                              p2->p_vmspace->vm_ssize;
  575         } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM | RFPGLOCK)) {
  576                 mycpu->gd_cnt.v_vforks++;
  577                 mycpu->gd_cnt.v_vforkpages += p2->p_vmspace->vm_dsize +
  578                                               p2->p_vmspace->vm_ssize;
  579         } else if (p1 == &proc0) {
  580                 mycpu->gd_cnt.v_kthreads++;
  581                 mycpu->gd_cnt.v_kthreadpages += p2->p_vmspace->vm_dsize +
  582                                                 p2->p_vmspace->vm_ssize;
  583         } else {
  584                 mycpu->gd_cnt.v_rforks++;
  585                 mycpu->gd_cnt.v_rforkpages += p2->p_vmspace->vm_dsize +
  586                                               p2->p_vmspace->vm_ssize;
  587         }
  588 
  589         /*
  590          * Both processes are set up, now check if any loadable modules want
  591          * to adjust anything.
  592          *   What if they have an error? XXX
  593          */
  594         TAILQ_FOREACH(ep, &fork_list, next) {
  595                 (*ep->function)(p1, p2, flags);
  596         }
  597 
  598         /*
  599          * Set the start time.  Note that the process is not runnable.  The
  600          * caller is responsible for making it runnable.
  601          */
  602         microtime(&p2->p_start);
  603         p2->p_acflag = AFORK;
  604 
  605         /*
  606          * tell any interested parties about the new process
  607          */
  608         KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);
  609 
  610         /*
  611          * Return child proc pointer to parent.
  612          */
  613         *procp = p2;
  614         error = 0;
  615 done:
  616         if (p2)
  617                 lwkt_reltoken(&p2->p_token);
  618         lwkt_reltoken(&p1->p_token);
  619         if (plkgrp) {
  620                 lockmgr(&plkgrp->pg_lock, LK_RELEASE);
  621                 pgrel(plkgrp);
  622         }
  623         return (error);
  624 }
  625 
  626 static struct lwp *
  627 lwp_fork(struct lwp *origlp, struct proc *destproc, int flags)
  628 {
  629         globaldata_t gd = mycpu;
  630         struct lwp *lp;
  631         struct thread *td;
  632 
  633         lp = kmalloc(sizeof(struct lwp), M_LWP, M_WAITOK|M_ZERO);
  634 
  635         lp->lwp_proc = destproc;
  636         lp->lwp_vmspace = destproc->p_vmspace;
  637         lp->lwp_stat = LSRUN;
  638         bcopy(&origlp->lwp_startcopy, &lp->lwp_startcopy,
  639             (unsigned) ((caddr_t)&lp->lwp_endcopy -
  640                         (caddr_t)&lp->lwp_startcopy));
  641         lp->lwp_flags |= origlp->lwp_flags & LWP_ALTSTACK;
  642         /*
  643          * Set cpbase to the last timeout that occured (not the upcoming
  644          * timeout).
  645          *
  646          * A critical section is required since a timer IPI can update
  647          * scheduler specific data.
  648          */
  649         crit_enter();
  650         lp->lwp_cpbase = gd->gd_schedclock.time - gd->gd_schedclock.periodic;
  651         destproc->p_usched->heuristic_forking(origlp, lp);
  652         crit_exit();
  653         lp->lwp_cpumask &= usched_mastermask;
  654         lwkt_token_init(&lp->lwp_token, "lwp_token");
  655         spin_init(&lp->lwp_spin);
  656 
  657         /*
  658          * Assign the thread to the current cpu to begin with so we
  659          * can manipulate it.
  660          */
  661         td = lwkt_alloc_thread(NULL, LWKT_THREAD_STACK, gd->gd_cpuid, 0);
  662         lp->lwp_thread = td;
  663         td->td_ucred = crhold(destproc->p_ucred);
  664         td->td_proc = destproc;
  665         td->td_lwp = lp;
  666         td->td_switch = cpu_heavy_switch;
  667 #ifdef NO_LWKT_SPLIT_USERPRI
  668         lwkt_setpri(td, TDPRI_USER_NORM);
  669 #else
  670         lwkt_setpri(td, TDPRI_KERN_USER);
  671 #endif
  672         lwkt_set_comm(td, "%s", destproc->p_comm);
  673 
  674         /*
  675          * cpu_fork will copy and update the pcb, set up the kernel stack,
  676          * and make the child ready to run.
  677          */
  678         cpu_fork(origlp, lp, flags);
  679         kqueue_init(&lp->lwp_kqueue, destproc->p_fd);
  680 
  681         /*
  682          * Assign a TID to the lp.  Loop until the insert succeeds (returns
  683          * NULL).
  684          */
  685         lp->lwp_tid = destproc->p_lasttid;
  686         do {
  687                 if (++lp->lwp_tid < 0)
  688                         lp->lwp_tid = 1;
  689         } while (lwp_rb_tree_RB_INSERT(&destproc->p_lwp_tree, lp) != NULL);
  690         destproc->p_lasttid = lp->lwp_tid;
  691         destproc->p_nthreads++;
  692 
  693         /*
  694          * This flag is set and never cleared.  It means that the process
  695          * was threaded at some point.  Used to improve exit performance.
  696          */
  697         destproc->p_flags |= P_MAYBETHREADED;
  698 
  699         return (lp);
  700 }
  701 
  702 /*
  703  * The next two functionms are general routines to handle adding/deleting
  704  * items on the fork callout list.
  705  *
  706  * at_fork():
  707  * Take the arguments given and put them onto the fork callout list,
  708  * However first make sure that it's not already there.
  709  * Returns 0 on success or a standard error number.
  710  */
  711 int
  712 at_fork(forklist_fn function)
  713 {
  714         struct forklist *ep;
  715 
  716 #ifdef INVARIANTS
  717         /* let the programmer know if he's been stupid */
  718         if (rm_at_fork(function)) {
  719                 kprintf("WARNING: fork callout entry (%p) already present\n",
  720                     function);
  721         }
  722 #endif
  723         ep = kmalloc(sizeof(*ep), M_ATFORK, M_WAITOK|M_ZERO);
  724         ep->function = function;
  725         TAILQ_INSERT_TAIL(&fork_list, ep, next);
  726         return (0);
  727 }
  728 
  729 /*
  730  * Scan the exit callout list for the given item and remove it..
  731  * Returns the number of items removed (0 or 1)
  732  */
  733 int
  734 rm_at_fork(forklist_fn function)
  735 {
  736         struct forklist *ep;
  737 
  738         TAILQ_FOREACH(ep, &fork_list, next) {
  739                 if (ep->function == function) {
  740                         TAILQ_REMOVE(&fork_list, ep, next);
  741                         kfree(ep, M_ATFORK);
  742                         return(1);
  743                 }
  744         }       
  745         return (0);
  746 }
  747 
  748 /*
  749  * Add a forked process to the run queue after any remaining setup, such
  750  * as setting the fork handler, has been completed.
  751  *
  752  * p2 is held by the caller.
  753  */
  754 void
  755 start_forked_proc(struct lwp *lp1, struct proc *p2)
  756 {
  757         struct lwp *lp2 = ONLY_LWP_IN_PROC(p2);
  758         int pflags;
  759 
  760         /*
  761          * Move from SIDL to RUN queue, and activate the process's thread.
  762          * Activation of the thread effectively makes the process "a"
  763          * current process, so we do not setrunqueue().
  764          *
  765          * YYY setrunqueue works here but we should clean up the trampoline
  766          * code so we just schedule the LWKT thread and let the trampoline
  767          * deal with the userland scheduler on return to userland.
  768          */
  769         KASSERT(p2->p_stat == SIDL,
  770             ("cannot start forked process, bad status: %p", p2));
  771         p2->p_usched->resetpriority(lp2);
  772         crit_enter();
  773         p2->p_stat = SACTIVE;
  774         lp2->lwp_stat = LSRUN;
  775         p2->p_usched->setrunqueue(lp2);
  776         crit_exit();
  777 
  778         /*
  779          * Now can be swapped.
  780          */
  781         PRELE(lp1->lwp_proc);
  782 
  783         /*
  784          * Preserve synchronization semantics of vfork.  P_PPWAIT is set in
  785          * the child until it has retired the parent's resources.  The parent
  786          * must wait for the flag to be cleared by the child.
  787          *
  788          * Interlock the flag/tsleep with atomic ops to avoid unnecessary
  789          * p_token conflicts.
  790          *
  791          * XXX Is this use of an atomic op on a field that is not normally
  792          *     manipulated with atomic ops ok?
  793          */
  794         while ((pflags = p2->p_flags) & P_PPWAIT) {
  795                 cpu_ccfence();
  796                 tsleep_interlock(lp1->lwp_proc, 0);
  797                 if (atomic_cmpset_int(&p2->p_flags, pflags, pflags))
  798                         tsleep(lp1->lwp_proc, PINTERLOCKED, "ppwait", 0);
  799         }
  800 }
Cache object: 11e4c21fe8a614e32e1c99f78ef30c59
[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]
This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.
FreeBSD/Linux Kernel Cross Reference sys/kern/kern_fork.c

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_fork.c