The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kernel/fork.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  linux/kernel/fork.c
    3  *
    4  *  Copyright (C) 1991, 1992  Linus Torvalds
    5  */
    6 
    7 /*
    8  *  'fork.c' contains the help-routines for the 'fork' system call
    9  * (see also entry.S and others).
   10  * Fork is rather simple, once you get the hang of it, but the memory
   11  * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
   12  */
   13 
   14 #include <linux/config.h>
   15 #include <linux/slab.h>
   16 #include <linux/init.h>
   17 #include <linux/unistd.h>
   18 #include <linux/smp_lock.h>
   19 #include <linux/module.h>
   20 #include <linux/vmalloc.h>
   21 #include <linux/completion.h>
   22 #include <linux/namespace.h>
   23 #include <linux/personality.h>
   24 #include <linux/compiler.h>
   25 
   26 #include <asm/pgtable.h>
   27 #include <asm/pgalloc.h>
   28 #include <asm/uaccess.h>
   29 #include <asm/mmu_context.h>
   30 #include <asm/processor.h>
   31 
   32 /* The idle threads do not count.. */
   33 int nr_threads;
   34 int nr_running;
   35 
   36 int max_threads;
   37 unsigned long total_forks;      /* Handle normal Linux uptimes. */
   38 int last_pid;
   39 
   40 struct task_struct *pidhash[PIDHASH_SZ];
   41 
   42 void add_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
   43 {
   44         unsigned long flags;
   45 
   46         wait->flags &= ~WQ_FLAG_EXCLUSIVE;
   47         wq_write_lock_irqsave(&q->lock, flags);
   48         __add_wait_queue(q, wait);
   49         wq_write_unlock_irqrestore(&q->lock, flags);
   50 }
   51 
   52 void add_wait_queue_exclusive(wait_queue_head_t *q, wait_queue_t * wait)
   53 {
   54         unsigned long flags;
   55 
   56         wait->flags |= WQ_FLAG_EXCLUSIVE;
   57         wq_write_lock_irqsave(&q->lock, flags);
   58         __add_wait_queue_tail(q, wait);
   59         wq_write_unlock_irqrestore(&q->lock, flags);
   60 }
   61 
   62 void remove_wait_queue(wait_queue_head_t *q, wait_queue_t * wait)
   63 {
   64         unsigned long flags;
   65 
   66         wq_write_lock_irqsave(&q->lock, flags);
   67         __remove_wait_queue(q, wait);
   68         wq_write_unlock_irqrestore(&q->lock, flags);
   69 }
   70 
   71 void __init fork_init(unsigned long mempages)
   72 {
   73         /*
   74          * The default maximum number of threads is set to a safe
   75          * value: the thread structures can take up at most half
   76          * of memory.
   77          */
   78         max_threads = mempages / (THREAD_SIZE/PAGE_SIZE) / 8;
   79 
   80         init_task.rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
   81         init_task.rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
   82 }
   83 
   84 /* Protects next_safe and last_pid. */
   85 spinlock_t lastpid_lock = SPIN_LOCK_UNLOCKED;
   86 
   87 static int get_pid(unsigned long flags)
   88 {
   89         static int next_safe = PID_MAX;
   90         struct task_struct *p;
   91         int pid, beginpid;
   92 
   93         if (flags & CLONE_PID)
   94                 return current->pid;
   95 
   96         spin_lock(&lastpid_lock);
   97         beginpid = last_pid;
   98         if((++last_pid) & 0xffff8000) {
   99                 last_pid = 300;         /* Skip daemons etc. */
  100                 goto inside;
  101         }
  102         if(last_pid >= next_safe) {
  103 inside:
  104                 next_safe = PID_MAX;
  105                 read_lock(&tasklist_lock);
  106         repeat:
  107                 for_each_task(p) {
  108                         if(p->pid == last_pid   ||
  109                            p->pgrp == last_pid  ||
  110                            p->tgid == last_pid  ||
  111                            p->session == last_pid) {
  112                                 if(++last_pid >= next_safe) {
  113                                         if(last_pid & 0xffff8000)
  114                                                 last_pid = 300;
  115                                         next_safe = PID_MAX;
  116                                 }
  117                                 if(unlikely(last_pid == beginpid))
  118                                         goto nomorepids;
  119                                 goto repeat;
  120                         }
  121                         if(p->pid > last_pid && next_safe > p->pid)
  122                                 next_safe = p->pid;
  123                         if(p->pgrp > last_pid && next_safe > p->pgrp)
  124                                 next_safe = p->pgrp;
  125                         if(p->tgid > last_pid && next_safe > p->tgid)
  126                                 next_safe = p->tgid;
  127                         if(p->session > last_pid && next_safe > p->session)
  128                                 next_safe = p->session;
  129                 }
  130                 read_unlock(&tasklist_lock);
  131         }
  132         pid = last_pid;
  133         spin_unlock(&lastpid_lock);
  134 
  135         return pid;
  136 
  137 nomorepids:
  138         read_unlock(&tasklist_lock);
  139         spin_unlock(&lastpid_lock);
  140         return 0;
  141 }
  142 
  143 static inline int dup_mmap(struct mm_struct * mm)
  144 {
  145         struct vm_area_struct * mpnt, *tmp, **pprev;
  146         int retval;
  147 
  148         flush_cache_mm(current->mm);
  149         mm->locked_vm = 0;
  150         mm->mmap = NULL;
  151         mm->mmap_cache = NULL;
  152         mm->map_count = 0;
  153         mm->rss = 0;
  154         mm->cpu_vm_mask = 0;
  155         mm->swap_address = 0;
  156         pprev = &mm->mmap;
  157 
  158         /*
  159          * Add it to the mmlist after the parent.
  160          * Doing it this way means that we can order the list,
  161          * and fork() won't mess up the ordering significantly.
  162          * Add it first so that swapoff can see any swap entries.
  163          */
  164         spin_lock(&mmlist_lock);
  165         list_add(&mm->mmlist, &current->mm->mmlist);
  166         mmlist_nr++;
  167         spin_unlock(&mmlist_lock);
  168 
  169         for (mpnt = current->mm->mmap ; mpnt ; mpnt = mpnt->vm_next) {
  170                 struct file *file;
  171 
  172                 retval = -ENOMEM;
  173                 if(mpnt->vm_flags & VM_DONTCOPY)
  174                         continue;
  175                 tmp = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
  176                 if (!tmp)
  177                         goto fail_nomem;
  178                 *tmp = *mpnt;
  179                 tmp->vm_flags &= ~VM_LOCKED;
  180                 tmp->vm_mm = mm;
  181                 tmp->vm_next = NULL;
  182                 file = tmp->vm_file;
  183                 if (file) {
  184                         struct inode *inode = file->f_dentry->d_inode;
  185                         get_file(file);
  186                         if (tmp->vm_flags & VM_DENYWRITE)
  187                                 atomic_dec(&inode->i_writecount);
  188       
  189                         /* insert tmp into the share list, just after mpnt */
  190                         spin_lock(&inode->i_mapping->i_shared_lock);
  191                         if((tmp->vm_next_share = mpnt->vm_next_share) != NULL)
  192                                 mpnt->vm_next_share->vm_pprev_share =
  193                                         &tmp->vm_next_share;
  194                         mpnt->vm_next_share = tmp;
  195                         tmp->vm_pprev_share = &mpnt->vm_next_share;
  196                         spin_unlock(&inode->i_mapping->i_shared_lock);
  197                 }
  198 
  199                 /*
  200                  * Link in the new vma and copy the page table entries:
  201                  * link in first so that swapoff can see swap entries.
  202                  */
  203                 spin_lock(&mm->page_table_lock);
  204                 *pprev = tmp;
  205                 pprev = &tmp->vm_next;
  206                 mm->map_count++;
  207                 retval = copy_page_range(mm, current->mm, tmp);
  208                 spin_unlock(&mm->page_table_lock);
  209 
  210                 if (tmp->vm_ops && tmp->vm_ops->open)
  211                         tmp->vm_ops->open(tmp);
  212 
  213                 if (retval)
  214                         goto fail_nomem;
  215         }
  216         retval = 0;
  217         build_mmap_rb(mm);
  218 
  219 fail_nomem:
  220         flush_tlb_mm(current->mm);
  221         return retval;
  222 }
  223 
  224 spinlock_t mmlist_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED;
  225 int mmlist_nr;
  226 
  227 #define allocate_mm()   (kmem_cache_alloc(mm_cachep, SLAB_KERNEL))
  228 #define free_mm(mm)     (kmem_cache_free(mm_cachep, (mm)))
  229 
  230 static struct mm_struct * mm_init(struct mm_struct * mm)
  231 {
  232         atomic_set(&mm->mm_users, 1);
  233         atomic_set(&mm->mm_count, 1);
  234         init_rwsem(&mm->mmap_sem);
  235         mm->page_table_lock = SPIN_LOCK_UNLOCKED;
  236         mm->pgd = pgd_alloc(mm);
  237         mm->def_flags = 0;
  238         if (mm->pgd)
  239                 return mm;
  240         free_mm(mm);
  241         return NULL;
  242 }
  243         
  244 
  245 /*
  246  * Allocate and initialize an mm_struct.
  247  */
  248 struct mm_struct * mm_alloc(void)
  249 {
  250         struct mm_struct * mm;
  251 
  252         mm = allocate_mm();
  253         if (mm) {
  254                 memset(mm, 0, sizeof(*mm));
  255                 return mm_init(mm);
  256         }
  257         return NULL;
  258 }
  259 
  260 /*
  261  * Called when the last reference to the mm
  262  * is dropped: either by a lazy thread or by
  263  * mmput. Free the page directory and the mm.
  264  */
  265 inline void __mmdrop(struct mm_struct *mm)
  266 {
  267         BUG_ON(mm == &init_mm);
  268         pgd_free(mm->pgd);
  269         destroy_context(mm);
  270         free_mm(mm);
  271 }
  272 
  273 /*
  274  * Decrement the use count and release all resources for an mm.
  275  */
  276 void mmput(struct mm_struct *mm)
  277 {
  278         if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
  279                 extern struct mm_struct *swap_mm;
  280                 if (swap_mm == mm)
  281                         swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
  282                 list_del(&mm->mmlist);
  283                 mmlist_nr--;
  284                 spin_unlock(&mmlist_lock);
  285                 exit_mmap(mm);
  286                 mmdrop(mm);
  287         }
  288 }
  289 
  290 /* Please note the differences between mmput and mm_release.
  291  * mmput is called whenever we stop holding onto a mm_struct,
  292  * error success whatever.
  293  *
  294  * mm_release is called after a mm_struct has been removed
  295  * from the current process.
  296  *
  297  * This difference is important for error handling, when we
  298  * only half set up a mm_struct for a new process and need to restore
  299  * the old one.  Because we mmput the new mm_struct before
  300  * restoring the old one. . .
  301  * Eric Biederman 10 January 1998
  302  */
  303 void mm_release(void)
  304 {
  305         struct task_struct *tsk = current;
  306         struct completion *vfork_done = tsk->vfork_done;
  307 
  308         /* notify parent sleeping on vfork() */
  309         if (vfork_done) {
  310                 tsk->vfork_done = NULL;
  311                 complete(vfork_done);
  312         }
  313 }
  314 
  315 static int copy_mm(unsigned long clone_flags, struct task_struct * tsk)
  316 {
  317         struct mm_struct * mm, *oldmm;
  318         int retval;
  319 
  320         tsk->min_flt = tsk->maj_flt = 0;
  321         tsk->cmin_flt = tsk->cmaj_flt = 0;
  322         tsk->nswap = tsk->cnswap = 0;
  323 
  324         tsk->mm = NULL;
  325         tsk->active_mm = NULL;
  326 
  327         /*
  328          * Are we cloning a kernel thread?
  329          *
  330          * We need to steal a active VM for that..
  331          */
  332         oldmm = current->mm;
  333         if (!oldmm)
  334                 return 0;
  335 
  336         if (clone_flags & CLONE_VM) {
  337                 atomic_inc(&oldmm->mm_users);
  338                 mm = oldmm;
  339                 goto good_mm;
  340         }
  341 
  342         retval = -ENOMEM;
  343         mm = allocate_mm();
  344         if (!mm)
  345                 goto fail_nomem;
  346 
  347         /* Copy the current MM stuff.. */
  348         memcpy(mm, oldmm, sizeof(*mm));
  349         if (!mm_init(mm))
  350                 goto fail_nomem;
  351 
  352         if (init_new_context(tsk,mm))
  353                 goto free_pt;
  354 
  355         down_write(&oldmm->mmap_sem);
  356         retval = dup_mmap(mm);
  357         up_write(&oldmm->mmap_sem);
  358 
  359         if (retval)
  360                 goto free_pt;
  361 
  362         /*
  363          * child gets a private LDT (if there was an LDT in the parent)
  364          */
  365         copy_segments(tsk, mm);
  366 
  367 good_mm:
  368         tsk->mm = mm;
  369         tsk->active_mm = mm;
  370         return 0;
  371 
  372 free_pt:
  373         mmput(mm);
  374 fail_nomem:
  375         return retval;
  376 }
  377 
  378 static inline struct fs_struct *__copy_fs_struct(struct fs_struct *old)
  379 {
  380         struct fs_struct *fs = kmem_cache_alloc(fs_cachep, GFP_KERNEL);
  381         /* We don't need to lock fs - think why ;-) */
  382         if (fs) {
  383                 atomic_set(&fs->count, 1);
  384                 fs->lock = RW_LOCK_UNLOCKED;
  385                 fs->umask = old->umask;
  386                 read_lock(&old->lock);
  387                 fs->rootmnt = mntget(old->rootmnt);
  388                 fs->root = dget(old->root);
  389                 fs->pwdmnt = mntget(old->pwdmnt);
  390                 fs->pwd = dget(old->pwd);
  391                 if (old->altroot) {
  392                         fs->altrootmnt = mntget(old->altrootmnt);
  393                         fs->altroot = dget(old->altroot);
  394                 } else {
  395                         fs->altrootmnt = NULL;
  396                         fs->altroot = NULL;
  397                 }       
  398                 read_unlock(&old->lock);
  399         }
  400         return fs;
  401 }
  402 
  403 struct fs_struct *copy_fs_struct(struct fs_struct *old)
  404 {
  405         return __copy_fs_struct(old);
  406 }
  407 
  408 static inline int copy_fs(unsigned long clone_flags, struct task_struct * tsk)
  409 {
  410         if (clone_flags & CLONE_FS) {
  411                 atomic_inc(&current->fs->count);
  412                 return 0;
  413         }
  414         tsk->fs = __copy_fs_struct(current->fs);
  415         if (!tsk->fs)
  416                 return -1;
  417         return 0;
  418 }
  419 
  420 static int count_open_files(struct files_struct *files, int size)
  421 {
  422         int i;
  423         
  424         /* Find the last open fd */
  425         for (i = size/(8*sizeof(long)); i > 0; ) {
  426                 if (files->open_fds->fds_bits[--i])
  427                         break;
  428         }
  429         i = (i+1) * 8 * sizeof(long);
  430         return i;
  431 }
  432 
  433 static int copy_files(unsigned long clone_flags, struct task_struct * tsk)
  434 {
  435         struct files_struct *oldf, *newf;
  436         struct file **old_fds, **new_fds;
  437         int open_files, nfds, size, i, error = 0;
  438 
  439         /*
  440          * A background process may not have any files ...
  441          */
  442         oldf = current->files;
  443         if (!oldf)
  444                 goto out;
  445 
  446         if (clone_flags & CLONE_FILES) {
  447                 atomic_inc(&oldf->count);
  448                 goto out;
  449         }
  450 
  451         /*
  452          * Note: we may be using current for both targets (See exec.c)
  453          * This works because we cache current->files (old) as oldf. Don't
  454          * break this.
  455          */
  456         tsk->files = NULL;
  457         error = -ENOMEM;
  458         newf = kmem_cache_alloc(files_cachep, SLAB_KERNEL);
  459         if (!newf) 
  460                 goto out;
  461 
  462         atomic_set(&newf->count, 1);
  463 
  464         newf->file_lock     = RW_LOCK_UNLOCKED;
  465         newf->next_fd       = 0;
  466         newf->max_fds       = NR_OPEN_DEFAULT;
  467         newf->max_fdset     = __FD_SETSIZE;
  468         newf->close_on_exec = &newf->close_on_exec_init;
  469         newf->open_fds      = &newf->open_fds_init;
  470         newf->fd            = &newf->fd_array[0];
  471 
  472         /* We don't yet have the oldf readlock, but even if the old
  473            fdset gets grown now, we'll only copy up to "size" fds */
  474         size = oldf->max_fdset;
  475         if (size > __FD_SETSIZE) {
  476                 newf->max_fdset = 0;
  477                 write_lock(&newf->file_lock);
  478                 error = expand_fdset(newf, size-1);
  479                 write_unlock(&newf->file_lock);
  480                 if (error)
  481                         goto out_release;
  482         }
  483         read_lock(&oldf->file_lock);
  484 
  485         open_files = count_open_files(oldf, size);
  486 
  487         /*
  488          * Check whether we need to allocate a larger fd array.
  489          * Note: we're not a clone task, so the open count won't
  490          * change.
  491          */
  492         nfds = NR_OPEN_DEFAULT;
  493         if (open_files > nfds) {
  494                 read_unlock(&oldf->file_lock);
  495                 newf->max_fds = 0;
  496                 write_lock(&newf->file_lock);
  497                 error = expand_fd_array(newf, open_files-1);
  498                 write_unlock(&newf->file_lock);
  499                 if (error) 
  500                         goto out_release;
  501                 nfds = newf->max_fds;
  502                 read_lock(&oldf->file_lock);
  503         }
  504 
  505         old_fds = oldf->fd;
  506         new_fds = newf->fd;
  507 
  508         memcpy(newf->open_fds->fds_bits, oldf->open_fds->fds_bits, open_files/8);
  509         memcpy(newf->close_on_exec->fds_bits, oldf->close_on_exec->fds_bits, open_files/8);
  510 
  511         for (i = open_files; i != 0; i--) {
  512                 struct file *f = *old_fds++;
  513                 if (f)
  514                         get_file(f);
  515                 *new_fds++ = f;
  516         }
  517         read_unlock(&oldf->file_lock);
  518 
  519         /* compute the remainder to be cleared */
  520         size = (newf->max_fds - open_files) * sizeof(struct file *);
  521 
  522         /* This is long word aligned thus could use a optimized version */ 
  523         memset(new_fds, 0, size); 
  524 
  525         if (newf->max_fdset > open_files) {
  526                 int left = (newf->max_fdset-open_files)/8;
  527                 int start = open_files / (8 * sizeof(unsigned long));
  528                 
  529                 memset(&newf->open_fds->fds_bits[start], 0, left);
  530                 memset(&newf->close_on_exec->fds_bits[start], 0, left);
  531         }
  532 
  533         tsk->files = newf;
  534         error = 0;
  535 out:
  536         return error;
  537 
  538 out_release:
  539         free_fdset (newf->close_on_exec, newf->max_fdset);
  540         free_fdset (newf->open_fds, newf->max_fdset);
  541         kmem_cache_free(files_cachep, newf);
  542         goto out;
  543 }
  544 
  545 /*
  546  *      Helper to unshare the files of the current task. 
  547  *      We don't want to expose copy_files internals to 
  548  *      the exec layer of the kernel.
  549  */
  550 
  551 int unshare_files(void)
  552 {
  553         struct files_struct *files  = current->files;
  554         int rc;
  555         
  556         if(!files)
  557                 BUG();
  558                 
  559         /* This can race but the race causes us to copy when we don't
  560            need to and drop the copy */
  561         if(atomic_read(&files->count) == 1)
  562         {
  563                 atomic_inc(&files->count);
  564                 return 0;
  565         }
  566         rc = copy_files(0, current);
  567         if(rc)
  568                 current->files = files;
  569         return rc;
  570 }               
  571 
  572 static inline int copy_sighand(unsigned long clone_flags, struct task_struct * tsk)
  573 {
  574         struct signal_struct *sig;
  575 
  576         if (clone_flags & CLONE_SIGHAND) {
  577                 atomic_inc(&current->sig->count);
  578                 return 0;
  579         }
  580         sig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
  581         tsk->sig = sig;
  582         if (!sig)
  583                 return -1;
  584         spin_lock_init(&sig->siglock);
  585         atomic_set(&sig->count, 1);
  586         memcpy(tsk->sig->action, current->sig->action, sizeof(tsk->sig->action));
  587         return 0;
  588 }
  589 
  590 static inline void copy_flags(unsigned long clone_flags, struct task_struct *p)
  591 {
  592         unsigned long new_flags = p->flags;
  593 
  594         new_flags &= ~(PF_SUPERPRIV | PF_USEDFPU);
  595         new_flags |= PF_FORKNOEXEC;
  596         if (!(clone_flags & CLONE_PTRACE))
  597                 p->ptrace = 0;
  598         p->flags = new_flags;
  599 }
  600 
  601 long kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)
  602 {
  603         struct task_struct *task = current;
  604         unsigned old_task_dumpable;
  605         long ret;
  606 
  607         /* lock out any potential ptracer */
  608         task_lock(task);
  609         if (task->ptrace) {
  610                 task_unlock(task);
  611                 return -EPERM;
  612         }
  613 
  614         old_task_dumpable = task->task_dumpable;
  615         task->task_dumpable = 0;
  616         task_unlock(task);
  617 
  618         ret = arch_kernel_thread(fn, arg, flags);
  619 
  620         /* never reached in child process, only in parent */
  621         current->task_dumpable = old_task_dumpable;
  622 
  623         return ret;
  624 }
  625 
  626 /*
  627  *  Ok, this is the main fork-routine. It copies the system process
  628  * information (task[nr]) and sets up the necessary registers. It also
  629  * copies the data segment in its entirety.  The "stack_start" and
  630  * "stack_top" arguments are simply passed along to the platform
  631  * specific copy_thread() routine.  Most platforms ignore stack_top.
  632  * For an example that's using stack_top, see
  633  * arch/ia64/kernel/process.c.
  634  */
  635 int do_fork(unsigned long clone_flags, unsigned long stack_start,
  636             struct pt_regs *regs, unsigned long stack_size)
  637 {
  638         int retval;
  639         struct task_struct *p;
  640         struct completion vfork;
  641 
  642         if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
  643                 return -EINVAL;
  644 
  645         retval = -EPERM;
  646 
  647         /* 
  648          * CLONE_PID is only allowed for the initial SMP swapper
  649          * calls
  650          */
  651         if (clone_flags & CLONE_PID) {
  652                 if (current->pid)
  653                         goto fork_out;
  654         }
  655 
  656         retval = -ENOMEM;
  657         p = alloc_task_struct();
  658         if (!p)
  659                 goto fork_out;
  660 
  661         *p = *current;
  662 
  663         retval = -EAGAIN;
  664         /*
  665          * Check if we are over our maximum process limit, but be sure to
  666          * exclude root. This is needed to make it possible for login and
  667          * friends to set the per-user process limit to something lower
  668          * than the amount of processes root is running. -- Rik
  669          */
  670         if (atomic_read(&p->user->processes) >= p->rlim[RLIMIT_NPROC].rlim_cur
  671                       && !capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE))
  672                 goto bad_fork_free;
  673 
  674         atomic_inc(&p->user->__count);
  675         atomic_inc(&p->user->processes);
  676 
  677         /*
  678          * Counter increases are protected by
  679          * the kernel lock so nr_threads can't
  680          * increase under us (but it may decrease).
  681          */
  682         if (nr_threads >= max_threads)
  683                 goto bad_fork_cleanup_count;
  684         
  685         get_exec_domain(p->exec_domain);
  686 
  687         if (p->binfmt && p->binfmt->module)
  688                 __MOD_INC_USE_COUNT(p->binfmt->module);
  689 
  690         p->did_exec = 0;
  691         p->swappable = 0;
  692         p->state = TASK_UNINTERRUPTIBLE;
  693 
  694         copy_flags(clone_flags, p);
  695         p->pid = get_pid(clone_flags);
  696         if (p->pid == 0 && current->pid != 0)
  697                 goto bad_fork_cleanup;
  698 
  699         p->run_list.next = NULL;
  700         p->run_list.prev = NULL;
  701 
  702         p->p_cptr = NULL;
  703         init_waitqueue_head(&p->wait_chldexit);
  704         p->vfork_done = NULL;
  705         if (clone_flags & CLONE_VFORK) {
  706                 p->vfork_done = &vfork;
  707                 init_completion(&vfork);
  708         }
  709         spin_lock_init(&p->alloc_lock);
  710 
  711         p->sigpending = 0;
  712         init_sigpending(&p->pending);
  713 
  714         p->it_real_value = p->it_virt_value = p->it_prof_value = 0;
  715         p->it_real_incr = p->it_virt_incr = p->it_prof_incr = 0;
  716         init_timer(&p->real_timer);
  717         p->real_timer.data = (unsigned long) p;
  718 
  719         p->leader = 0;          /* session leadership doesn't inherit */
  720         p->tty_old_pgrp = 0;
  721         p->times.tms_utime = p->times.tms_stime = 0;
  722         p->times.tms_cutime = p->times.tms_cstime = 0;
  723 #ifdef CONFIG_SMP
  724         {
  725                 int i;
  726                 p->cpus_runnable = ~0UL;
  727                 p->processor = current->processor;
  728                 /* ?? should we just memset this ?? */
  729                 for(i = 0; i < smp_num_cpus; i++)
  730                         p->per_cpu_utime[i] = p->per_cpu_stime[i] = 0;
  731                 spin_lock_init(&p->sigmask_lock);
  732         }
  733 #endif
  734         p->lock_depth = -1;             /* -1 = no lock */
  735         p->start_time = jiffies;
  736 
  737         INIT_LIST_HEAD(&p->local_pages);
  738 
  739         retval = -ENOMEM;
  740         /* copy all the process information */
  741         if (copy_files(clone_flags, p))
  742                 goto bad_fork_cleanup;
  743         if (copy_fs(clone_flags, p))
  744                 goto bad_fork_cleanup_files;
  745         if (copy_sighand(clone_flags, p))
  746                 goto bad_fork_cleanup_fs;
  747         if (copy_mm(clone_flags, p))
  748                 goto bad_fork_cleanup_sighand;
  749         if (copy_namespace(clone_flags, p))
  750                 goto bad_fork_cleanup_mm;
  751         retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);
  752         if (retval)
  753                 goto bad_fork_cleanup_namespace;
  754         p->semundo = NULL;
  755         
  756         /* Our parent execution domain becomes current domain
  757            These must match for thread signalling to apply */
  758            
  759         p->parent_exec_id = p->self_exec_id;
  760 
  761         /* ok, now we should be set up.. */
  762         p->swappable = 1;
  763         p->exit_signal = clone_flags & CSIGNAL;
  764         p->pdeath_signal = 0;
  765 
  766         /*
  767          * "share" dynamic priority between parent and child, thus the
  768          * total amount of dynamic priorities in the system doesn't change,
  769          * more scheduling fairness. This is only important in the first
  770          * timeslice, on the long run the scheduling behaviour is unchanged.
  771          */
  772         p->counter = (current->counter + 1) >> 1;
  773         current->counter >>= 1;
  774         if (!current->counter)
  775                 current->need_resched = 1;
  776 
  777         /*
  778          * Ok, add it to the run-queues and make it
  779          * visible to the rest of the system.
  780          *
  781          * Let it rip!
  782          */
  783         retval = p->pid;
  784         p->tgid = retval;
  785         INIT_LIST_HEAD(&p->thread_group);
  786 
  787         /* Need tasklist lock for parent etc handling! */
  788         write_lock_irq(&tasklist_lock);
  789 
  790         /* CLONE_PARENT re-uses the old parent */
  791         p->p_opptr = current->p_opptr;
  792         p->p_pptr = current->p_pptr;
  793         if (!(clone_flags & CLONE_PARENT)) {
  794                 p->p_opptr = current;
  795                 if (!(p->ptrace & PT_PTRACED))
  796                         p->p_pptr = current;
  797         }
  798 
  799         if (clone_flags & CLONE_THREAD) {
  800                 p->tgid = current->tgid;
  801                 list_add(&p->thread_group, &current->thread_group);
  802         }
  803 
  804         SET_LINKS(p);
  805         hash_pid(p);
  806         nr_threads++;
  807         write_unlock_irq(&tasklist_lock);
  808 
  809         if (p->ptrace & PT_PTRACED)
  810                 send_sig(SIGSTOP, p, 1);
  811 
  812         wake_up_process(p);             /* do this last */
  813         ++total_forks;
  814         if (clone_flags & CLONE_VFORK)
  815                 wait_for_completion(&vfork);
  816 
  817 fork_out:
  818         return retval;
  819 
  820 bad_fork_cleanup_namespace:
  821         exit_namespace(p);
  822 bad_fork_cleanup_mm:
  823         exit_mm(p);
  824 bad_fork_cleanup_sighand:
  825         exit_sighand(p);
  826 bad_fork_cleanup_fs:
  827         exit_fs(p); /* blocking */
  828 bad_fork_cleanup_files:
  829         exit_files(p); /* blocking */
  830 bad_fork_cleanup:
  831         put_exec_domain(p->exec_domain);
  832         if (p->binfmt && p->binfmt->module)
  833                 __MOD_DEC_USE_COUNT(p->binfmt->module);
  834 bad_fork_cleanup_count:
  835         atomic_dec(&p->user->processes);
  836         free_uid(p->user);
  837 bad_fork_free:
  838         free_task_struct(p);
  839         goto fork_out;
  840 }
  841 
  842 /* SLAB cache for signal_struct structures (tsk->sig) */
  843 kmem_cache_t *sigact_cachep;
  844 
  845 /* SLAB cache for files_struct structures (tsk->files) */
  846 kmem_cache_t *files_cachep;
  847 
  848 /* SLAB cache for fs_struct structures (tsk->fs) */
  849 kmem_cache_t *fs_cachep;
  850 
  851 /* SLAB cache for vm_area_struct structures */
  852 kmem_cache_t *vm_area_cachep;
  853 
  854 /* SLAB cache for mm_struct structures (tsk->mm) */
  855 kmem_cache_t *mm_cachep;
  856 
  857 void __init proc_caches_init(void)
  858 {
  859         sigact_cachep = kmem_cache_create("signal_act",
  860                         sizeof(struct signal_struct), 0,
  861                         SLAB_HWCACHE_ALIGN, NULL, NULL);
  862         if (!sigact_cachep)
  863                 panic("Cannot create signal action SLAB cache");
  864 
  865         files_cachep = kmem_cache_create("files_cache", 
  866                          sizeof(struct files_struct), 0, 
  867                          SLAB_HWCACHE_ALIGN, NULL, NULL);
  868         if (!files_cachep) 
  869                 panic("Cannot create files SLAB cache");
  870 
  871         fs_cachep = kmem_cache_create("fs_cache", 
  872                          sizeof(struct fs_struct), 0, 
  873                          SLAB_HWCACHE_ALIGN, NULL, NULL);
  874         if (!fs_cachep) 
  875                 panic("Cannot create fs_struct SLAB cache");
  876  
  877         vm_area_cachep = kmem_cache_create("vm_area_struct",
  878                         sizeof(struct vm_area_struct), 0,
  879                         SLAB_HWCACHE_ALIGN, NULL, NULL);
  880         if(!vm_area_cachep)
  881                 panic("vma_init: Cannot alloc vm_area_struct SLAB cache");
  882 
  883         mm_cachep = kmem_cache_create("mm_struct",
  884                         sizeof(struct mm_struct), 0,
  885                         SLAB_HWCACHE_ALIGN, NULL, NULL);
  886         if(!mm_cachep)
  887                 panic("vma_init: Cannot alloc mm_struct SLAB cache");
  888 }

Cache object: 28839702fedbbd7099257b31162a1907


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.