The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_fork.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1989, 1991, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)kern_fork.c 8.6 (Berkeley) 4/8/94
   39  * $FreeBSD: releng/5.0/sys/kern/kern_fork.c 107719 2002-12-10 02:33:45Z julian $
   40  */
   41 
   42 #include "opt_ktrace.h"
   43 #include "opt_mac.h"
   44 
   45 #include <sys/param.h>
   46 #include <sys/systm.h>
   47 #include <sys/sysproto.h>
   48 #include <sys/filedesc.h>
   49 #include <sys/kernel.h>
   50 #include <sys/sysctl.h>
   51 #include <sys/lock.h>
   52 #include <sys/malloc.h>
   53 #include <sys/mutex.h>
   54 #include <sys/proc.h>
   55 #include <sys/pioctl.h>
   56 #include <sys/resourcevar.h>
   57 #include <sys/sched.h>
   58 #include <sys/syscall.h>
   59 #include <sys/vnode.h>
   60 #include <sys/acct.h>
   61 #include <sys/mac.h>
   62 #include <sys/ktr.h>
   63 #include <sys/ktrace.h>
   64 #include <sys/kthread.h>
   65 #include <sys/unistd.h> 
   66 #include <sys/jail.h>
   67 #include <sys/sx.h>
   68 
   69 #include <vm/vm.h>
   70 #include <vm/pmap.h>
   71 #include <vm/vm_map.h>
   72 #include <vm/vm_extern.h>
   73 #include <vm/uma.h>
   74 
   75 #include <sys/vmmeter.h>
   76 #include <sys/user.h>
   77 #include <machine/critical.h>
   78 
   79 static MALLOC_DEFINE(M_ATFORK, "atfork", "atfork callback");
   80 
   81 /*
   82  * These are the stuctures used to create a callout list for things to do
   83  * when forking a process
   84  */
   85 struct forklist {
   86         forklist_fn function;
   87         TAILQ_ENTRY(forklist) next;
   88 };
   89 
   90 static struct sx fork_list_lock;
   91 
   92 TAILQ_HEAD(forklist_head, forklist);
   93 static struct forklist_head fork_list = TAILQ_HEAD_INITIALIZER(fork_list);
   94 
   95 #ifndef _SYS_SYSPROTO_H_
   96 struct fork_args {
   97         int     dummy;
   98 };
   99 #endif
  100 
  101 int forksleep; /* Place for fork1() to sleep on. */
  102 
  103 static void
  104 init_fork_list(void *data __unused)
  105 {
  106 
  107         sx_init(&fork_list_lock, "fork list");
  108 }
  109 SYSINIT(fork_list, SI_SUB_INTRINSIC, SI_ORDER_ANY, init_fork_list, NULL);
  110 
  111 /*
  112  * MPSAFE
  113  */
  114 /* ARGSUSED */
  115 int
  116 fork(td, uap)
  117         struct thread *td;
  118         struct fork_args *uap;
  119 {
  120         int error;
  121         struct proc *p2;
  122 
  123         mtx_lock(&Giant);
  124         error = fork1(td, RFFDG | RFPROC, 0, &p2);
  125         if (error == 0) {
  126                 td->td_retval[0] = p2->p_pid;
  127                 td->td_retval[1] = 0;
  128         }
  129         mtx_unlock(&Giant);
  130         return error;
  131 }
  132 
  133 /*
  134  * MPSAFE
  135  */
  136 /* ARGSUSED */
  137 int
  138 vfork(td, uap)
  139         struct thread *td;
  140         struct vfork_args *uap;
  141 {
  142         int error;
  143         struct proc *p2;
  144 
  145         mtx_lock(&Giant);
  146         error = fork1(td, RFFDG | RFPROC | RFPPWAIT | RFMEM, 0, &p2);
  147         if (error == 0) {
  148                 td->td_retval[0] = p2->p_pid;
  149                 td->td_retval[1] = 0;
  150         }
  151         mtx_unlock(&Giant);
  152         return error;
  153 }
  154 
  155 /*
  156  * MPSAFE
  157  */
  158 int
  159 rfork(td, uap)
  160         struct thread *td;
  161         struct rfork_args *uap;
  162 {
  163         int error;
  164         struct proc *p2;
  165 
  166         /* Don't allow kernel only flags. */
  167         if ((uap->flags & RFKERNELONLY) != 0)
  168                 return (EINVAL);
  169         mtx_lock(&Giant);
  170         error = fork1(td, uap->flags, 0, &p2);
  171         if (error == 0) {
  172                 td->td_retval[0] = p2 ? p2->p_pid : 0;
  173                 td->td_retval[1] = 0;
  174         }
  175         mtx_unlock(&Giant);
  176         return error;
  177 }
  178 
  179 
  180 int     nprocs = 1;                             /* process 0 */
  181 int     lastpid = 0;
  182 SYSCTL_INT(_kern, OID_AUTO, lastpid, CTLFLAG_RD, &lastpid, 0, 
  183     "Last used PID");
  184 
  185 /*
  186  * Random component to lastpid generation.  We mix in a random factor to make
  187  * it a little harder to predict.  We sanity check the modulus value to avoid
  188  * doing it in critical paths.  Don't let it be too small or we pointlessly
  189  * waste randomness entropy, and don't let it be impossibly large.  Using a
  190  * modulus that is too big causes a LOT more process table scans and slows
  191  * down fork processing as the pidchecked caching is defeated.
  192  */
  193 static int randompid = 0;
  194 
  195 static int
  196 sysctl_kern_randompid(SYSCTL_HANDLER_ARGS)
  197 {
  198         int error, pid;
  199 
  200         sysctl_wire_old_buffer(req, sizeof(int));
  201         sx_xlock(&allproc_lock);
  202         pid = randompid;
  203         error = sysctl_handle_int(oidp, &pid, 0, req);
  204         if (error == 0 && req->newptr != NULL) {
  205                 if (pid < 0 || pid > PID_MAX - 100)     /* out of range */
  206                         pid = PID_MAX - 100;
  207                 else if (pid < 2)                       /* NOP */
  208                         pid = 0;
  209                 else if (pid < 100)                     /* Make it reasonable */
  210                         pid = 100;
  211                 randompid = pid;
  212         }
  213         sx_xunlock(&allproc_lock);
  214         return (error);
  215 }
  216 
  217 SYSCTL_PROC(_kern, OID_AUTO, randompid, CTLTYPE_INT|CTLFLAG_RW,
  218     0, 0, sysctl_kern_randompid, "I", "Random PID modulus");
  219 
  220 int
  221 fork1(td, flags, pages, procp)
  222         struct thread *td;                      /* parent proc */
  223         int flags;
  224         int pages;
  225         struct proc **procp;                    /* child proc */
  226 {
  227         struct proc *p2, *pptr;
  228         uid_t uid;
  229         struct proc *newproc;
  230         int trypid;
  231         int ok;
  232         static int pidchecked = 0;
  233         struct forklist *ep;
  234         struct filedesc *fd;
  235         struct proc *p1 = td->td_proc;
  236         struct thread *td2;
  237         struct kse *ke2;
  238         struct ksegrp *kg2;
  239         struct sigacts *newsigacts;
  240         struct procsig *newprocsig;
  241         int error;
  242 
  243         GIANT_REQUIRED;
  244 
  245         /* Can't copy and clear */
  246         if ((flags & (RFFDG|RFCFDG)) == (RFFDG|RFCFDG))
  247                 return (EINVAL);
  248 
  249         /*
  250          * Here we don't create a new process, but we divorce
  251          * certain parts of a process from itself.
  252          */
  253         if ((flags & RFPROC) == 0) {
  254                 vm_forkproc(td, NULL, NULL, flags);
  255 
  256                 /*
  257                  * Close all file descriptors.
  258                  */
  259                 if (flags & RFCFDG) {
  260                         struct filedesc *fdtmp;
  261                         fdtmp = fdinit(td);     /* XXXKSE */
  262                         fdfree(td);             /* XXXKSE */
  263                         p1->p_fd = fdtmp;
  264                 }
  265 
  266                 /*
  267                  * Unshare file descriptors (from parent.)
  268                  */
  269                 if (flags & RFFDG) {
  270                         FILEDESC_LOCK(p1->p_fd);
  271                         if (p1->p_fd->fd_refcnt > 1) {
  272                                 struct filedesc *newfd;
  273 
  274                                 newfd = fdcopy(td);
  275                                 FILEDESC_UNLOCK(p1->p_fd);
  276                                 fdfree(td);
  277                                 p1->p_fd = newfd;
  278                         } else
  279                                 FILEDESC_UNLOCK(p1->p_fd);
  280                 }
  281                 *procp = NULL;
  282                 return (0);
  283         }
  284 
  285         if (p1->p_flag & P_KSES) {
  286                 /*
  287                  * Idle the other threads for a second.
  288                  * Since the user space is copied, it must remain stable.
  289                  * In addition, all threads (from the user perspective)
  290                  * need to either be suspended or in the kernel,
  291                  * where they will try restart in the parent and will
  292                  * be aborted in the child.
  293                  */
  294                 PROC_LOCK(p1);
  295                 if (thread_single(SINGLE_NO_EXIT)) {
  296                         /* Abort.. someone else is single threading before us */
  297                         PROC_UNLOCK(p1);
  298                         return (ERESTART);
  299                 }
  300                 PROC_UNLOCK(p1);
  301                 /*
  302                  * All other activity in this process
  303                  * is now suspended at the user boundary,
  304                  * (or other safe places if we think of any).
  305                  */
  306         }
  307 
  308         /* Allocate new proc. */
  309         newproc = uma_zalloc(proc_zone, M_WAITOK);
  310 #ifdef MAC
  311         mac_init_proc(newproc);
  312 #endif
  313 
  314         /*
  315          * Although process entries are dynamically created, we still keep
  316          * a global limit on the maximum number we will create.  Don't allow
  317          * a nonprivileged user to use the last ten processes; don't let root
  318          * exceed the limit. The variable nprocs is the current number of
  319          * processes, maxproc is the limit.
  320          */
  321         sx_xlock(&allproc_lock);
  322         uid = td->td_ucred->cr_ruid;
  323         if ((nprocs >= maxproc - 10 && uid != 0) || nprocs >= maxproc) {
  324                 error = EAGAIN;
  325                 goto fail;
  326         }
  327 
  328         /*
  329          * Increment the count of procs running with this uid. Don't allow
  330          * a nonprivileged user to exceed their current limit.
  331          */
  332         PROC_LOCK(p1);
  333         ok = chgproccnt(td->td_ucred->cr_ruidinfo, 1,
  334                 (uid != 0) ? p1->p_rlimit[RLIMIT_NPROC].rlim_cur : 0);
  335         PROC_UNLOCK(p1);
  336         if (!ok) {
  337                 error = EAGAIN;
  338                 goto fail;
  339         }
  340 
  341         /*
  342          * Increment the nprocs resource before blocking can occur.  There
  343          * are hard-limits as to the number of processes that can run.
  344          */
  345         nprocs++;
  346 
  347         /*
  348          * Find an unused process ID.  We remember a range of unused IDs
  349          * ready to use (from lastpid+1 through pidchecked-1).
  350          *
  351          * If RFHIGHPID is set (used during system boot), do not allocate
  352          * low-numbered pids.
  353          */
  354         trypid = lastpid + 1;
  355         if (flags & RFHIGHPID) {
  356                 if (trypid < 10) {
  357                         trypid = 10;
  358                 }
  359         } else {
  360                 if (randompid)
  361                         trypid += arc4random() % randompid;
  362         }
  363 retry:
  364         /*
  365          * If the process ID prototype has wrapped around,
  366          * restart somewhat above 0, as the low-numbered procs
  367          * tend to include daemons that don't exit.
  368          */
  369         if (trypid >= PID_MAX) {
  370                 trypid = trypid % PID_MAX;
  371                 if (trypid < 100)
  372                         trypid += 100;
  373                 pidchecked = 0;
  374         }
  375         if (trypid >= pidchecked) {
  376                 int doingzomb = 0;
  377 
  378                 pidchecked = PID_MAX;
  379                 /*
  380                  * Scan the active and zombie procs to check whether this pid
  381                  * is in use.  Remember the lowest pid that's greater
  382                  * than trypid, so we can avoid checking for a while.
  383                  */
  384                 p2 = LIST_FIRST(&allproc);
  385 again:
  386                 for (; p2 != NULL; p2 = LIST_NEXT(p2, p_list)) {
  387                         PROC_LOCK(p2);
  388                         while (p2->p_pid == trypid ||
  389                             p2->p_pgrp->pg_id == trypid ||
  390                             p2->p_session->s_sid == trypid) {
  391                                 trypid++;
  392                                 if (trypid >= pidchecked) {
  393                                         PROC_UNLOCK(p2);
  394                                         goto retry;
  395                                 }
  396                         }
  397                         if (p2->p_pid > trypid && pidchecked > p2->p_pid)
  398                                 pidchecked = p2->p_pid;
  399                         if (p2->p_pgrp->pg_id > trypid &&
  400                             pidchecked > p2->p_pgrp->pg_id)
  401                                 pidchecked = p2->p_pgrp->pg_id;
  402                         if (p2->p_session->s_sid > trypid &&
  403                             pidchecked > p2->p_session->s_sid)
  404                                 pidchecked = p2->p_session->s_sid;
  405                         PROC_UNLOCK(p2);
  406                 }
  407                 if (!doingzomb) {
  408                         doingzomb = 1;
  409                         p2 = LIST_FIRST(&zombproc);
  410                         goto again;
  411                 }
  412         }
  413 
  414         /*
  415          * RFHIGHPID does not mess with the lastpid counter during boot.
  416          */
  417         if (flags & RFHIGHPID)
  418                 pidchecked = 0;
  419         else
  420                 lastpid = trypid;
  421 
  422         p2 = newproc;
  423         p2->p_state = PRS_NEW;          /* protect against others */
  424         p2->p_pid = trypid;
  425         LIST_INSERT_HEAD(&allproc, p2, p_list);
  426         LIST_INSERT_HEAD(PIDHASH(p2->p_pid), p2, p_hash);
  427         sx_xunlock(&allproc_lock);
  428 
  429         /*
  430          * Malloc things while we don't hold any locks.
  431          */
  432         if (flags & RFSIGSHARE) {
  433                 MALLOC(newsigacts, struct sigacts *,
  434                     sizeof(struct sigacts), M_SUBPROC, M_WAITOK);
  435                 newprocsig = NULL;
  436         } else {
  437                 newsigacts = NULL;
  438                 MALLOC(newprocsig, struct procsig *, sizeof(struct procsig),
  439                     M_SUBPROC, M_WAITOK);
  440         }
  441 
  442         /*
  443          * Copy filedesc.
  444          * XXX: This is busted.  fd*() need to not take proc
  445          * arguments or something.
  446          */
  447         if (flags & RFCFDG)
  448                 fd = fdinit(td);
  449         else if (flags & RFFDG) {
  450                 FILEDESC_LOCK(p1->p_fd);
  451                 fd = fdcopy(td);
  452                 FILEDESC_UNLOCK(p1->p_fd);
  453         } else
  454                 fd = fdshare(p1);
  455 
  456         /*
  457          * Make a proc table entry for the new process.
  458          * Start by zeroing the section of proc that is zero-initialized,
  459          * then copy the section that is copied directly from the parent.
  460          */
  461         td2 = FIRST_THREAD_IN_PROC(p2);
  462         kg2 = FIRST_KSEGRP_IN_PROC(p2);
  463         ke2 = FIRST_KSE_IN_KSEGRP(kg2);
  464 
  465         /* Allocate and switch to an alternate kstack if specified */
  466         if (pages != 0)
  467                 pmap_new_altkstack(td2, pages);
  468 
  469 #define RANGEOF(type, start, end) (offsetof(type, end) - offsetof(type, start))
  470 
  471         bzero(&p2->p_startzero,
  472             (unsigned) RANGEOF(struct proc, p_startzero, p_endzero));
  473         bzero(&ke2->ke_startzero,
  474             (unsigned) RANGEOF(struct kse, ke_startzero, ke_endzero));
  475         bzero(&td2->td_startzero,
  476             (unsigned) RANGEOF(struct thread, td_startzero, td_endzero));
  477         bzero(&kg2->kg_startzero,
  478             (unsigned) RANGEOF(struct ksegrp, kg_startzero, kg_endzero));
  479 
  480         mtx_init(&p2->p_mtx, "process lock", NULL, MTX_DEF | MTX_DUPOK);
  481         PROC_LOCK(p2);
  482         PROC_LOCK(p1);
  483 
  484         bcopy(&p1->p_startcopy, &p2->p_startcopy,
  485             (unsigned) RANGEOF(struct proc, p_startcopy, p_endcopy));
  486         bcopy(&td->td_startcopy, &td2->td_startcopy,
  487             (unsigned) RANGEOF(struct thread, td_startcopy, td_endcopy));
  488         bcopy(&td->td_ksegrp->kg_startcopy, &kg2->kg_startcopy,
  489             (unsigned) RANGEOF(struct ksegrp, kg_startcopy, kg_endcopy));
  490 #undef RANGEOF
  491 
  492         /* Set up the thread as an active thread (as if runnable). */
  493         ke2->ke_state = KES_THREAD;
  494         ke2->ke_thread = td2;
  495         td2->td_kse = ke2;
  496         td2->td_flags &= ~TDF_UNBOUND; /* For the rest of this syscall. */
  497 
  498         /*
  499          * Duplicate sub-structures as needed.
  500          * Increase reference counts on shared objects.
  501          * The p_stats and p_sigacts substructs are set in vm_forkproc.
  502          */
  503         p2->p_flag = 0;
  504         mtx_lock_spin(&sched_lock);
  505         p2->p_sflag = PS_INMEM;
  506         if (p1->p_sflag & PS_PROFIL)
  507                 startprofclock(p2);
  508         /*
  509          * Allow the scheduler to adjust the priority of the child and
  510          * parent while we hold the sched_lock.
  511          */
  512         sched_fork(td->td_ksegrp, kg2);
  513 
  514         mtx_unlock_spin(&sched_lock);
  515         p2->p_ucred = crhold(td->td_ucred);
  516         td2->td_ucred = crhold(p2->p_ucred);    /* XXXKSE */
  517 
  518         pargs_hold(p2->p_args);
  519 
  520         if (flags & RFSIGSHARE) {
  521                 p2->p_procsig = p1->p_procsig;
  522                 p2->p_procsig->ps_refcnt++;
  523                 if (p1->p_sigacts == &p1->p_uarea->u_sigacts) {
  524                         /*
  525                          * Set p_sigacts to the new shared structure.
  526                          * Note that this is updating p1->p_sigacts at the
  527                          * same time, since p_sigacts is just a pointer to
  528                          * the shared p_procsig->ps_sigacts.
  529                          */
  530                         p2->p_sigacts  = newsigacts;
  531                         newsigacts = NULL;
  532                         *p2->p_sigacts = p1->p_uarea->u_sigacts;
  533                 }
  534         } else {
  535                 p2->p_procsig = newprocsig;
  536                 newprocsig = NULL;
  537                 bcopy(p1->p_procsig, p2->p_procsig, sizeof(*p2->p_procsig));
  538                 p2->p_procsig->ps_refcnt = 1;
  539                 p2->p_sigacts = NULL;   /* finished in vm_forkproc() */
  540         }
  541         if (flags & RFLINUXTHPN) 
  542                 p2->p_sigparent = SIGUSR1;
  543         else
  544                 p2->p_sigparent = SIGCHLD;
  545 
  546         /* Bump references to the text vnode (for procfs) */
  547         p2->p_textvp = p1->p_textvp;
  548         if (p2->p_textvp)
  549                 VREF(p2->p_textvp);
  550         p2->p_fd = fd;
  551         PROC_UNLOCK(p1);
  552         PROC_UNLOCK(p2);
  553 
  554         /*
  555          * If p_limit is still copy-on-write, bump refcnt,
  556          * otherwise get a copy that won't be modified.
  557          * (If PL_SHAREMOD is clear, the structure is shared
  558          * copy-on-write.)
  559          */
  560         if (p1->p_limit->p_lflags & PL_SHAREMOD)
  561                 p2->p_limit = limcopy(p1->p_limit);
  562         else {
  563                 p2->p_limit = p1->p_limit;
  564                 p2->p_limit->p_refcnt++;
  565         }
  566 
  567         /*
  568          * Setup linkage for kernel based threading
  569          */
  570         if((flags & RFTHREAD) != 0) {
  571                 mtx_lock(&ppeers_lock);
  572                 p2->p_peers = p1->p_peers;
  573                 p1->p_peers = p2;
  574                 p2->p_leader = p1->p_leader;
  575                 mtx_unlock(&ppeers_lock);
  576                 PROC_LOCK(p1->p_leader);
  577                 if ((p1->p_leader->p_flag & P_WEXIT) != 0) {
  578                         PROC_UNLOCK(p1->p_leader);
  579                         /*
  580                          * The task leader is exiting, so process p1 is
  581                          * going to be killed shortly.  Since p1 obviously
  582                          * isn't dead yet, we know that the leader is either
  583                          * sending SIGKILL's to all the processes in this
  584                          * task or is sleeping waiting for all the peers to
  585                          * exit.  We let p1 complete the fork, but we need
  586                          * to go ahead and kill the new process p2 since
  587                          * the task leader may not get a chance to send
  588                          * SIGKILL to it.  We leave it on the list so that
  589                          * the task leader will wait for this new process
  590                          * to commit suicide.
  591                          */
  592                         PROC_LOCK(p2);
  593                         psignal(p2, SIGKILL);
  594                         PROC_UNLOCK(p2);
  595                 } else
  596                         PROC_UNLOCK(p1->p_leader);
  597         } else {
  598                 p2->p_peers = NULL;
  599                 p2->p_leader = p2;
  600         }
  601 
  602         sx_xlock(&proctree_lock);
  603         PGRP_LOCK(p1->p_pgrp);
  604         PROC_LOCK(p2);
  605         PROC_LOCK(p1);
  606 
  607         /*
  608          * Preserve some more flags in subprocess.  PS_PROFIL has already
  609          * been preserved.
  610          */
  611         p2->p_flag |= p1->p_flag & (P_SUGID | P_ALTSTACK);
  612         SESS_LOCK(p1->p_session);
  613         if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
  614                 p2->p_flag |= P_CONTROLT;
  615         SESS_UNLOCK(p1->p_session);
  616         if (flags & RFPPWAIT)
  617                 p2->p_flag |= P_PPWAIT;
  618 
  619         LIST_INSERT_AFTER(p1, p2, p_pglist);
  620         PGRP_UNLOCK(p1->p_pgrp);
  621         LIST_INIT(&p2->p_children);
  622 
  623         callout_init(&p2->p_itcallout, 0);
  624 
  625 #ifdef KTRACE
  626         /*
  627          * Copy traceflag and tracefile if enabled.
  628          */
  629         mtx_lock(&ktrace_mtx);
  630         KASSERT(p2->p_tracep == NULL, ("new process has a ktrace vnode"));
  631         if (p1->p_traceflag & KTRFAC_INHERIT) {
  632                 p2->p_traceflag = p1->p_traceflag;
  633                 if ((p2->p_tracep = p1->p_tracep) != NULL)
  634                         VREF(p2->p_tracep);
  635         }
  636         mtx_unlock(&ktrace_mtx);
  637 #endif
  638 
  639         /*
  640          * If PF_FORK is set, the child process inherits the
  641          * procfs ioctl flags from its parent.
  642          */
  643         if (p1->p_pfsflags & PF_FORK) {
  644                 p2->p_stops = p1->p_stops;
  645                 p2->p_pfsflags = p1->p_pfsflags;
  646         }
  647 
  648         /*
  649          * This begins the section where we must prevent the parent
  650          * from being swapped.
  651          */
  652         _PHOLD(p1);
  653         PROC_UNLOCK(p1);
  654 
  655         /*
  656          * Attach the new process to its parent.
  657          *
  658          * If RFNOWAIT is set, the newly created process becomes a child
  659          * of init.  This effectively disassociates the child from the
  660          * parent.
  661          */
  662         if (flags & RFNOWAIT)
  663                 pptr = initproc;
  664         else
  665                 pptr = p1;
  666         p2->p_pptr = pptr;
  667         LIST_INSERT_HEAD(&pptr->p_children, p2, p_sibling);
  668         PROC_UNLOCK(p2);
  669         sx_xunlock(&proctree_lock);
  670 
  671         KASSERT(newprocsig == NULL, ("unused newprocsig"));
  672         if (newsigacts != NULL)
  673                 FREE(newsigacts, M_SUBPROC);
  674         /*
  675          * Finish creating the child process.  It will return via a different
  676          * execution path later.  (ie: directly into user mode)
  677          */
  678         vm_forkproc(td, p2, td2, flags);
  679 
  680         if (flags == (RFFDG | RFPROC)) {
  681                 cnt.v_forks++;
  682                 cnt.v_forkpages += p2->p_vmspace->vm_dsize +
  683                     p2->p_vmspace->vm_ssize;
  684         } else if (flags == (RFFDG | RFPROC | RFPPWAIT | RFMEM)) {
  685                 cnt.v_vforks++;
  686                 cnt.v_vforkpages += p2->p_vmspace->vm_dsize +
  687                     p2->p_vmspace->vm_ssize;
  688         } else if (p1 == &proc0) {
  689                 cnt.v_kthreads++;
  690                 cnt.v_kthreadpages += p2->p_vmspace->vm_dsize +
  691                     p2->p_vmspace->vm_ssize;
  692         } else {
  693                 cnt.v_rforks++;
  694                 cnt.v_rforkpages += p2->p_vmspace->vm_dsize +
  695                     p2->p_vmspace->vm_ssize;
  696         }
  697 
  698         /*
  699          * Both processes are set up, now check if any loadable modules want
  700          * to adjust anything.
  701          *   What if they have an error? XXX
  702          */
  703         sx_slock(&fork_list_lock);
  704         TAILQ_FOREACH(ep, &fork_list, next) {
  705                 (*ep->function)(p1, p2, flags);
  706         }
  707         sx_sunlock(&fork_list_lock);
  708 
  709         /*
  710          * If RFSTOPPED not requested, make child runnable and add to
  711          * run queue.
  712          */
  713         microtime(&(p2->p_stats->p_start));
  714         p2->p_acflag = AFORK;
  715         if ((flags & RFSTOPPED) == 0) {
  716                 mtx_lock_spin(&sched_lock);
  717                 p2->p_state = PRS_NORMAL;
  718                 TD_SET_CAN_RUN(td2);
  719                 setrunqueue(td2);
  720                 mtx_unlock_spin(&sched_lock);
  721         }
  722 
  723         /*
  724          * Now can be swapped.
  725          */
  726         PROC_LOCK(p1);
  727         _PRELE(p1);
  728 
  729         /*
  730          * tell any interested parties about the new process
  731          */
  732         KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);
  733         PROC_UNLOCK(p1);
  734 
  735         /*
  736          * Preserve synchronization semantics of vfork.  If waiting for
  737          * child to exec or exit, set P_PPWAIT on child, and sleep on our
  738          * proc (in case of exit).
  739          */
  740         PROC_LOCK(p2);
  741         while (p2->p_flag & P_PPWAIT)
  742                 msleep(p1, &p2->p_mtx, PWAIT, "ppwait", 0);
  743         PROC_UNLOCK(p2);
  744 
  745         /*
  746          * If other threads are waiting, let them continue now
  747          */
  748         if (p1->p_flag & P_KSES) {
  749                 PROC_LOCK(p1);
  750                 thread_single_end();
  751                 PROC_UNLOCK(p1);
  752         }
  753 
  754         /*
  755          * Return child proc pointer to parent.
  756          */
  757         *procp = p2;
  758         return (0);
  759 fail:
  760         sx_xunlock(&allproc_lock);
  761         uma_zfree(proc_zone, newproc);
  762         if (p1->p_flag & P_KSES) {
  763                 PROC_LOCK(p1);
  764                 thread_single_end();
  765                 PROC_UNLOCK(p1);
  766         }
  767         tsleep(&forksleep, PUSER, "fork", hz / 2);
  768         return (error);
  769 }
  770 
  771 /*
  772  * The next two functionms are general routines to handle adding/deleting
  773  * items on the fork callout list.
  774  *
  775  * at_fork():
  776  * Take the arguments given and put them onto the fork callout list,
  777  * However first make sure that it's not already there.
  778  * Returns 0 on success or a standard error number.
  779  */
  780 
  781 int
  782 at_fork(function)
  783         forklist_fn function;
  784 {
  785         struct forklist *ep;
  786 
  787 #ifdef INVARIANTS
  788         /* let the programmer know if he's been stupid */
  789         if (rm_at_fork(function)) 
  790                 printf("WARNING: fork callout entry (%p) already present\n",
  791                     function);
  792 #endif
  793         ep = malloc(sizeof(*ep), M_ATFORK, M_NOWAIT);
  794         if (ep == NULL)
  795                 return (ENOMEM);
  796         ep->function = function;
  797         sx_xlock(&fork_list_lock);
  798         TAILQ_INSERT_TAIL(&fork_list, ep, next);
  799         sx_xunlock(&fork_list_lock);
  800         return (0);
  801 }
  802 
  803 /*
  804  * Scan the exit callout list for the given item and remove it..
  805  * Returns the number of items removed (0 or 1)
  806  */
  807 
  808 int
  809 rm_at_fork(function)
  810         forklist_fn function;
  811 {
  812         struct forklist *ep;
  813 
  814         sx_xlock(&fork_list_lock);
  815         TAILQ_FOREACH(ep, &fork_list, next) {
  816                 if (ep->function == function) {
  817                         TAILQ_REMOVE(&fork_list, ep, next);
  818                         sx_xunlock(&fork_list_lock);
  819                         free(ep, M_ATFORK);
  820                         return(1);
  821                 }
  822         }
  823         sx_xunlock(&fork_list_lock);
  824         return (0);
  825 }
  826 
  827 /*
  828  * Handle the return of a child process from fork1().  This function
  829  * is called from the MD fork_trampoline() entry point.
  830  */
  831 void
  832 fork_exit(callout, arg, frame)
  833         void (*callout)(void *, struct trapframe *);
  834         void *arg;
  835         struct trapframe *frame;
  836 {
  837         struct thread *td;
  838         struct proc *p;
  839 
  840         if ((td = PCPU_GET(deadthread))) {
  841                 PCPU_SET(deadthread, NULL);
  842                 thread_stash(td);
  843         }
  844         td = curthread;
  845         p = td->td_proc;
  846         td->td_kse->ke_oncpu = PCPU_GET(cpuid);
  847         p->p_state = PRS_NORMAL;
  848         /*
  849          * Finish setting up thread glue.  We need to initialize
  850          * the thread into a td_critnest=1 state.  Some platforms
  851          * may have already partially or fully initialized td_critnest
  852          * and/or td_md.md_savecrit (when applciable).
  853          *
  854          * see <arch>/<arch>/critical.c
  855          */
  856         sched_lock.mtx_lock = (uintptr_t)td;
  857         sched_lock.mtx_recurse = 0;
  858         cpu_critical_fork_exit();
  859         CTR3(KTR_PROC, "fork_exit: new thread %p (pid %d, %s)", td, p->p_pid,
  860             p->p_comm);
  861         if (PCPU_GET(switchtime.sec) == 0)
  862                 binuptime(PCPU_PTR(switchtime));
  863         PCPU_SET(switchticks, ticks);
  864         mtx_unlock_spin(&sched_lock);
  865 
  866         /*
  867          * cpu_set_fork_handler intercepts this function call to
  868          * have this call a non-return function to stay in kernel mode.
  869          * initproc has its own fork handler, but it does return.
  870          */
  871         KASSERT(callout != NULL, ("NULL callout in fork_exit"));
  872         callout(arg, frame);
  873 
  874         /*
  875          * Check if a kernel thread misbehaved and returned from its main
  876          * function.
  877          */
  878         PROC_LOCK(p);
  879         if (p->p_flag & P_KTHREAD) {
  880                 PROC_UNLOCK(p);
  881                 mtx_lock(&Giant);
  882                 printf("Kernel thread \"%s\" (pid %d) exited prematurely.\n",
  883                     p->p_comm, p->p_pid);
  884                 kthread_exit(0);
  885         }
  886         PROC_UNLOCK(p);
  887 #ifdef DIAGNOSTIC
  888         cred_free_thread(td);
  889 #endif
  890         mtx_assert(&Giant, MA_NOTOWNED);
  891 }
  892 
  893 /*
  894  * Simplified back end of syscall(), used when returning from fork()
  895  * directly into user mode.  Giant is not held on entry, and must not
  896  * be held on return.  This function is passed in to fork_exit() as the
  897  * first parameter and is called when returning to a new userland process.
  898  */
  899 void
  900 fork_return(td, frame)
  901         struct thread *td;
  902         struct trapframe *frame;
  903 {
  904 
  905         userret(td, frame, 0);
  906 #ifdef KTRACE
  907         if (KTRPOINT(td, KTR_SYSRET))
  908                 ktrsysret(SYS_fork, 0, 0);
  909 #endif
  910         mtx_assert(&Giant, MA_NOTOWNED);
  911 }

Cache object: 448c43bdfdd3638da7c9f15b82515724


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.