The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kern/kern_fork.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*      $NetBSD: kern_fork.c,v 1.128 2006/11/01 10:17:58 yamt Exp $     */
    2 
    3 /*-
    4  * Copyright (c) 1999, 2001, 2004 The NetBSD Foundation, Inc.
    5  * All rights reserved.
    6  *
    7  * This code is derived from software contributed to The NetBSD Foundation
    8  * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
    9  * NASA Ames Research Center.
   10  * This code is derived from software contributed to The NetBSD Foundation
   11  * by Charles M. Hannum.
   12  *
   13  * Redistribution and use in source and binary forms, with or without
   14  * modification, are permitted provided that the following conditions
   15  * are met:
   16  * 1. Redistributions of source code must retain the above copyright
   17  *    notice, this list of conditions and the following disclaimer.
   18  * 2. Redistributions in binary form must reproduce the above copyright
   19  *    notice, this list of conditions and the following disclaimer in the
   20  *    documentation and/or other materials provided with the distribution.
   21  * 3. All advertising materials mentioning features or use of this software
   22  *    must display the following acknowledgement:
   23  *      This product includes software developed by the NetBSD
   24  *      Foundation, Inc. and its contributors.
   25  * 4. Neither the name of The NetBSD Foundation nor the names of its
   26  *    contributors may be used to endorse or promote products derived
   27  *    from this software without specific prior written permission.
   28  *
   29  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
   30  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
   31  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
   32  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
   33  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
   34  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
   35  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
   36  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
   37  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
   38  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
   39  * POSSIBILITY OF SUCH DAMAGE.
   40  */
   41 
   42 /*
   43  * Copyright (c) 1982, 1986, 1989, 1991, 1993
   44  *      The Regents of the University of California.  All rights reserved.
   45  * (c) UNIX System Laboratories, Inc.
   46  * All or some portions of this file are derived from material licensed
   47  * to the University of California by American Telephone and Telegraph
   48  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
   49  * the permission of UNIX System Laboratories, Inc.
   50  *
   51  * Redistribution and use in source and binary forms, with or without
   52  * modification, are permitted provided that the following conditions
   53  * are met:
   54  * 1. Redistributions of source code must retain the above copyright
   55  *    notice, this list of conditions and the following disclaimer.
   56  * 2. Redistributions in binary form must reproduce the above copyright
   57  *    notice, this list of conditions and the following disclaimer in the
   58  *    documentation and/or other materials provided with the distribution.
   59  * 3. Neither the name of the University nor the names of its contributors
   60  *    may be used to endorse or promote products derived from this software
   61  *    without specific prior written permission.
   62  *
   63  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   64  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   65  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   66  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   67  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   68  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   69  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   70  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   71  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   72  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   73  * SUCH DAMAGE.
   74  *
   75  *      @(#)kern_fork.c 8.8 (Berkeley) 2/14/95
   76  */
   77 
   78 #include <sys/cdefs.h>
   79 __KERNEL_RCSID(0, "$NetBSD: kern_fork.c,v 1.128 2006/11/01 10:17:58 yamt Exp $");
   80 
   81 #include "opt_ktrace.h"
   82 #include "opt_systrace.h"
   83 #include "opt_multiprocessor.h"
   84 
   85 #include <sys/param.h>
   86 #include <sys/systm.h>
   87 #include <sys/filedesc.h>
   88 #include <sys/kernel.h>
   89 #include <sys/malloc.h>
   90 #include <sys/pool.h>
   91 #include <sys/mount.h>
   92 #include <sys/proc.h>
   93 #include <sys/ras.h>
   94 #include <sys/resourcevar.h>
   95 #include <sys/vnode.h>
   96 #include <sys/file.h>
   97 #include <sys/acct.h>
   98 #include <sys/ktrace.h>
   99 #include <sys/vmmeter.h>
  100 #include <sys/sched.h>
  101 #include <sys/signalvar.h>
  102 #include <sys/systrace.h>
  103 #include <sys/kauth.h>
  104 
  105 #include <sys/sa.h>
  106 #include <sys/syscallargs.h>
  107 
  108 #include <uvm/uvm_extern.h>
  109 
  110 
  111 int     nprocs = 1;             /* process 0 */
  112 
  113 /*
  114  * Number of ticks to sleep if fork() would fail due to process hitting
  115  * limits. Exported in miliseconds to userland via sysctl.
  116  */
  117 int     forkfsleep = 0;
  118 
  119 /*ARGSUSED*/
  120 int
  121 sys_fork(struct lwp *l, void *v, register_t *retval)
  122 {
  123 
  124         return (fork1(l, 0, SIGCHLD, NULL, 0, NULL, NULL, retval, NULL));
  125 }
  126 
  127 /*
  128  * vfork(2) system call compatible with 4.4BSD (i.e. BSD with Mach VM).
  129  * Address space is not shared, but parent is blocked until child exit.
  130  */
  131 /*ARGSUSED*/
  132 int
  133 sys_vfork(struct lwp *l, void *v, register_t *retval)
  134 {
  135 
  136         return (fork1(l, FORK_PPWAIT, SIGCHLD, NULL, 0, NULL, NULL,
  137             retval, NULL));
  138 }
  139 
  140 /*
  141  * New vfork(2) system call for NetBSD, which implements original 3BSD vfork(2)
  142  * semantics.  Address space is shared, and parent is blocked until child exit.
  143  */
  144 /*ARGSUSED*/
  145 int
  146 sys___vfork14(struct lwp *l, void *v, register_t *retval)
  147 {
  148 
  149         return (fork1(l, FORK_PPWAIT|FORK_SHAREVM, SIGCHLD, NULL, 0,
  150             NULL, NULL, retval, NULL));
  151 }
  152 
  153 /*
  154  * Linux-compatible __clone(2) system call.
  155  */
  156 int
  157 sys___clone(struct lwp *l, void *v, register_t *retval)
  158 {
  159         struct sys___clone_args /* {
  160                 syscallarg(int) flags;
  161                 syscallarg(void *) stack;
  162         } */ *uap = v;
  163         int flags, sig;
  164 
  165         /*
  166          * We don't support the CLONE_PID or CLONE_PTRACE flags.
  167          */
  168         if (SCARG(uap, flags) & (CLONE_PID|CLONE_PTRACE))
  169                 return (EINVAL);
  170 
  171         /*
  172          * Linux enforces CLONE_VM with CLONE_SIGHAND, do same.
  173          */
  174         if (SCARG(uap, flags) & CLONE_SIGHAND
  175             && (SCARG(uap, flags) & CLONE_VM) == 0)
  176                 return (EINVAL);
  177 
  178         flags = 0;
  179 
  180         if (SCARG(uap, flags) & CLONE_VM)
  181                 flags |= FORK_SHAREVM;
  182         if (SCARG(uap, flags) & CLONE_FS)
  183                 flags |= FORK_SHARECWD;
  184         if (SCARG(uap, flags) & CLONE_FILES)
  185                 flags |= FORK_SHAREFILES;
  186         if (SCARG(uap, flags) & CLONE_SIGHAND)
  187                 flags |= FORK_SHARESIGS;
  188         if (SCARG(uap, flags) & CLONE_VFORK)
  189                 flags |= FORK_PPWAIT;
  190 
  191         sig = SCARG(uap, flags) & CLONE_CSIGNAL;
  192         if (sig < 0 || sig >= _NSIG)
  193                 return (EINVAL);
  194 
  195         /*
  196          * Note that the Linux API does not provide a portable way of
  197          * specifying the stack area; the caller must know if the stack
  198          * grows up or down.  So, we pass a stack size of 0, so that the
  199          * code that makes this adjustment is a noop.
  200          */
  201         return (fork1(l, flags, sig, SCARG(uap, stack), 0,
  202             NULL, NULL, retval, NULL));
  203 }
  204 
  205 /* print the 'table full' message once per 10 seconds */
  206 struct timeval fork_tfmrate = { 10, 0 };
  207 
  208 int
  209 fork1(struct lwp *l1, int flags, int exitsig, void *stack, size_t stacksize,
  210     void (*func)(void *), void *arg, register_t *retval,
  211     struct proc **rnewprocp)
  212 {
  213         struct proc     *p1, *p2, *parent;
  214         uid_t           uid;
  215         struct lwp      *l2;
  216         int             count, s;
  217         vaddr_t         uaddr;
  218         boolean_t       inmem;
  219 
  220         /*
  221          * Although process entries are dynamically created, we still keep
  222          * a global limit on the maximum number we will create.  Don't allow
  223          * a nonprivileged user to use the last few processes; don't let root
  224          * exceed the limit. The variable nprocs is the current number of
  225          * processes, maxproc is the limit.
  226          */
  227         p1 = l1->l_proc;
  228         uid = kauth_cred_getuid(p1->p_cred);
  229         if (__predict_false((nprocs >= maxproc - 5 && uid != 0) ||
  230                             nprocs >= maxproc)) {
  231                 static struct timeval lasttfm;
  232 
  233                 if (ratecheck(&lasttfm, &fork_tfmrate))
  234                         tablefull("proc", "increase kern.maxproc or NPROC");
  235                 if (forkfsleep)
  236                         (void)tsleep(&nprocs, PUSER, "forkmx", forkfsleep);
  237                 return (EAGAIN);
  238         }
  239         nprocs++;
  240 
  241         /*
  242          * Increment the count of procs running with this uid. Don't allow
  243          * a nonprivileged user to exceed their current limit.
  244          */
  245         count = chgproccnt(uid, 1);
  246         if (__predict_false(uid != 0 && count >
  247                             p1->p_rlimit[RLIMIT_NPROC].rlim_cur)) {
  248                 (void)chgproccnt(uid, -1);
  249                 nprocs--;
  250                 if (forkfsleep)
  251                         (void)tsleep(&nprocs, PUSER, "forkulim", forkfsleep);
  252                 return (EAGAIN);
  253         }
  254 
  255         /*
  256          * Allocate virtual address space for the U-area now, while it
  257          * is still easy to abort the fork operation if we're out of
  258          * kernel virtual address space.  The actual U-area pages will
  259          * be allocated and wired in uvm_fork() if needed.
  260          */
  261 
  262         inmem = uvm_uarea_alloc(&uaddr);
  263         if (__predict_false(uaddr == 0)) {
  264                 (void)chgproccnt(uid, -1);
  265                 nprocs--;
  266                 return (ENOMEM);
  267         }
  268 
  269         /*
  270          * We are now committed to the fork.  From here on, we may
  271          * block on resources, but resource allocation may NOT fail.
  272          */
  273 
  274         /* Allocate new proc. */
  275         p2 = proc_alloc();
  276 
  277         /*
  278          * Make a proc table entry for the new process.
  279          * Start by zeroing the section of proc that is zero-initialized,
  280          * then copy the section that is copied directly from the parent.
  281          */
  282         memset(&p2->p_startzero, 0,
  283             (unsigned) ((caddr_t)&p2->p_endzero - (caddr_t)&p2->p_startzero));
  284         memcpy(&p2->p_startcopy, &p1->p_startcopy,
  285             (unsigned) ((caddr_t)&p2->p_endcopy - (caddr_t)&p2->p_startcopy));
  286 
  287         simple_lock_init(&p2->p_sigctx.ps_silock);
  288         CIRCLEQ_INIT(&p2->p_sigctx.ps_siginfo);
  289         simple_lock_init(&p2->p_lock);
  290         LIST_INIT(&p2->p_lwps);
  291 
  292         /*
  293          * Duplicate sub-structures as needed.
  294          * Increase reference counts on shared objects.
  295          * The p_stats and p_sigacts substructs are set in uvm_fork().
  296          * Inherit flags we want to keep.  The flags related to SIGCHLD
  297          * handling are important in order to keep a consistent behaviour
  298          * for the child after the fork.
  299          */
  300         p2->p_flag = p1->p_flag & (P_SUGID | P_STOPFORK | P_STOPEXEC |
  301             P_NOCLDSTOP | P_NOCLDWAIT | P_CLDSIGIGN);
  302         p2->p_emul = p1->p_emul;
  303         p2->p_execsw = p1->p_execsw;
  304 
  305         if (p1->p_flag & P_PROFIL)
  306                 startprofclock(p2);
  307 
  308         kauth_cred_hold(p1->p_cred);
  309         p2->p_cred = p1->p_cred;
  310 
  311         LIST_INIT(&p2->p_raslist);
  312 #if defined(__HAVE_RAS)
  313         ras_fork(p1, p2);
  314 #endif
  315 
  316         /* bump references to the text vnode (for procfs) */
  317         p2->p_textvp = p1->p_textvp;
  318         if (p2->p_textvp)
  319                 VREF(p2->p_textvp);
  320 
  321         if (flags & FORK_SHAREFILES)
  322                 fdshare(p1, p2);
  323         else if (flags & FORK_CLEANFILES)
  324                 p2->p_fd = fdinit(p1);
  325         else
  326                 p2->p_fd = fdcopy(p1);
  327 
  328         if (flags & FORK_SHARECWD)
  329                 cwdshare(p1, p2);
  330         else
  331                 p2->p_cwdi = cwdinit(p1);
  332 
  333         /*
  334          * If p_limit is still copy-on-write, bump refcnt,
  335          * otherwise get a copy that won't be modified.
  336          * (If PL_SHAREMOD is clear, the structure is shared
  337          * copy-on-write.)
  338          */
  339         if (p1->p_limit->p_lflags & PL_SHAREMOD)
  340                 p2->p_limit = limcopy(p1->p_limit);
  341         else {
  342                 simple_lock(&p1->p_limit->p_slock);
  343                 p1->p_limit->p_refcnt++;
  344                 simple_unlock(&p1->p_limit->p_slock);
  345                 p2->p_limit = p1->p_limit;
  346         }
  347 
  348         if (p1->p_session->s_ttyvp != NULL && p1->p_flag & P_CONTROLT)
  349                 p2->p_flag |= P_CONTROLT;
  350         if (flags & FORK_PPWAIT)
  351                 p2->p_flag |= P_PPWAIT;
  352         parent = (flags & FORK_NOWAIT) ? initproc : p1;
  353         p2->p_pptr = parent;
  354         LIST_INIT(&p2->p_children);
  355 
  356         s = proclist_lock_write();
  357         LIST_INSERT_AFTER(p1, p2, p_pglist);
  358         LIST_INSERT_HEAD(&parent->p_children, p2, p_sibling);
  359         proclist_unlock_write(s);
  360 
  361 #ifdef KTRACE
  362         /*
  363          * Copy traceflag and tracefile if enabled.
  364          * If not inherited, these were zeroed above.
  365          */
  366         if (p1->p_traceflag & KTRFAC_INHERIT) {
  367                 p2->p_traceflag = p1->p_traceflag;
  368                 if ((p2->p_tracep = p1->p_tracep) != NULL)
  369                         ktradref(p2);
  370         }
  371 #endif
  372 
  373         scheduler_fork_hook(p1, p2);
  374 
  375         /*
  376          * Create signal actions for the child process.
  377          */
  378         sigactsinit(p2, p1, flags & FORK_SHARESIGS);
  379 
  380         /*
  381          * p_stats.
  382          * Copy parts of p_stats, and zero out the rest.
  383          */
  384         p2->p_stats = pstatscopy(p1->p_stats);
  385 
  386         /*
  387          * If emulation has process fork hook, call it now.
  388          */
  389         if (p2->p_emul->e_proc_fork)
  390                 (*p2->p_emul->e_proc_fork)(p2, p1, flags);
  391 
  392         /*
  393          * ...and finally, any other random fork hooks that subsystems
  394          * might have registered.
  395          */
  396         doforkhooks(p2, p1);
  397 
  398         /*
  399          * This begins the section where we must prevent the parent
  400          * from being swapped.
  401          */
  402         PHOLD(l1);
  403 
  404         uvm_proc_fork(p1, p2, (flags & FORK_SHAREVM) ? TRUE : FALSE);
  405 
  406         /*
  407          * Finish creating the child process.
  408          * It will return through a different path later.
  409          */
  410         newlwp(l1, p2, uaddr, inmem, 0, stack, stacksize,
  411             (func != NULL) ? func : child_return,
  412             arg, &l2);
  413 
  414         /* Now safe for scheduler to see child process */
  415         s = proclist_lock_write();
  416         p2->p_exitsig = exitsig;                /* signal for parent on exit */
  417         LIST_INSERT_HEAD(&allproc, p2, p_list);
  418         proclist_unlock_write(s);
  419 
  420 #ifdef SYSTRACE
  421         /* Tell systrace what's happening. */
  422         if (ISSET(p1->p_flag, P_SYSTRACE))
  423                 systrace_sys_fork(p1, p2);
  424 #endif
  425 
  426 #ifdef __HAVE_SYSCALL_INTERN
  427         (*p2->p_emul->e_syscall_intern)(p2);
  428 #endif
  429 
  430         /*
  431          * Make child runnable, set start time, and add to run queue
  432          * except if the parent requested the child to start in SSTOP state.
  433          */
  434         SCHED_LOCK(s);
  435         getmicrotime(&p2->p_stats->p_start);
  436         p2->p_acflag = AFORK;
  437         if (p1->p_flag & P_STOPFORK) {
  438                 p2->p_nrlwps = 0;
  439                 p1->p_nstopchild++;
  440                 p2->p_stat = SSTOP;
  441                 l2->l_stat = LSSTOP;
  442         } else {
  443                 p2->p_nrlwps = 1;
  444                 p2->p_stat = SACTIVE;
  445                 l2->l_stat = LSRUN;
  446                 setrunqueue(l2);
  447         }
  448         SCHED_UNLOCK(s);
  449 
  450         /*
  451          * Now can be swapped.
  452          */
  453         PRELE(l1);
  454 
  455         /*
  456          * Notify any interested parties about the new process.
  457          */
  458         KNOTE(&p1->p_klist, NOTE_FORK | p2->p_pid);
  459 
  460         /*
  461          * Update stats now that we know the fork was successful.
  462          */
  463         uvmexp.forks++;
  464         if (flags & FORK_PPWAIT)
  465                 uvmexp.forks_ppwait++;
  466         if (flags & FORK_SHAREVM)
  467                 uvmexp.forks_sharevm++;
  468 
  469         /*
  470          * Pass a pointer to the new process to the caller.
  471          */
  472         if (rnewprocp != NULL)
  473                 *rnewprocp = p2;
  474 
  475 #ifdef KTRACE
  476         if (KTRPOINT(p2, KTR_EMUL))
  477                 p2->p_traceflag |= KTRFAC_TRC_EMUL;
  478 #endif
  479 
  480         /*
  481          * Preserve synchronization semantics of vfork.  If waiting for
  482          * child to exec or exit, set P_PPWAIT on child, and sleep on our
  483          * proc (in case of exit).
  484          */
  485         if (flags & FORK_PPWAIT)
  486                 while (p2->p_flag & P_PPWAIT)
  487                         tsleep(p1, PWAIT, "ppwait", 0);
  488 
  489         /*
  490          * Return child pid to parent process,
  491          * marking us as parent via retval[1].
  492          */
  493         if (retval != NULL) {
  494                 retval[0] = p2->p_pid;
  495                 retval[1] = 0;
  496         }
  497 
  498         return (0);
  499 }
  500 
  501 #if defined(MULTIPROCESSOR)
  502 /*
  503  * XXX This is a slight hack to get newly-formed processes to
  504  * XXX acquire the kernel lock as soon as they run.
  505  */
  506 void
  507 proc_trampoline_mp(void)
  508 {
  509         struct lwp *l;
  510 
  511         l = curlwp;
  512 
  513         SCHED_ASSERT_UNLOCKED();
  514         KERNEL_PROC_LOCK(l);
  515 }
  516 #endif

Cache object: 6a0be433dd8f31663f78ff7cad6db0c7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.