The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/kernel/fork.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  linux/kernel/fork.c
    3  *
    4  *  Copyright (C) 1991, 1992  Linus Torvalds
    5  */
    6 
    7 /*
    8  *  'fork.c' contains the help-routines for the 'fork' system call
    9  * (see also entry.S and others).
   10  * Fork is rather simple, once you get the hang of it, but the memory
   11  * management can be a bitch. See 'mm/memory.c': 'copy_page_range()'
   12  */
   13 
   14 #include <linux/slab.h>
   15 #include <linux/init.h>
   16 #include <linux/unistd.h>
   17 #include <linux/module.h>
   18 #include <linux/vmalloc.h>
   19 #include <linux/completion.h>
   20 #include <linux/personality.h>
   21 #include <linux/mempolicy.h>
   22 #include <linux/sem.h>
   23 #include <linux/file.h>
   24 #include <linux/fdtable.h>
   25 #include <linux/iocontext.h>
   26 #include <linux/key.h>
   27 #include <linux/binfmts.h>
   28 #include <linux/mman.h>
   29 #include <linux/mmu_notifier.h>
   30 #include <linux/fs.h>
   31 #include <linux/nsproxy.h>
   32 #include <linux/capability.h>
   33 #include <linux/cpu.h>
   34 #include <linux/cgroup.h>
   35 #include <linux/security.h>
   36 #include <linux/hugetlb.h>
   37 #include <linux/seccomp.h>
   38 #include <linux/swap.h>
   39 #include <linux/syscalls.h>
   40 #include <linux/jiffies.h>
   41 #include <linux/futex.h>
   42 #include <linux/compat.h>
   43 #include <linux/kthread.h>
   44 #include <linux/task_io_accounting_ops.h>
   45 #include <linux/rcupdate.h>
   46 #include <linux/ptrace.h>
   47 #include <linux/mount.h>
   48 #include <linux/audit.h>
   49 #include <linux/memcontrol.h>
   50 #include <linux/ftrace.h>
   51 #include <linux/proc_fs.h>
   52 #include <linux/profile.h>
   53 #include <linux/rmap.h>
   54 #include <linux/ksm.h>
   55 #include <linux/acct.h>
   56 #include <linux/tsacct_kern.h>
   57 #include <linux/cn_proc.h>
   58 #include <linux/freezer.h>
   59 #include <linux/delayacct.h>
   60 #include <linux/taskstats_kern.h>
   61 #include <linux/random.h>
   62 #include <linux/tty.h>
   63 #include <linux/blkdev.h>
   64 #include <linux/fs_struct.h>
   65 #include <linux/magic.h>
   66 #include <linux/perf_event.h>
   67 #include <linux/posix-timers.h>
   68 #include <linux/user-return-notifier.h>
   69 #include <linux/oom.h>
   70 #include <linux/khugepaged.h>
   71 #include <linux/signalfd.h>
   72 #include <linux/uprobes.h>
   73 
   74 #include <asm/pgtable.h>
   75 #include <asm/pgalloc.h>
   76 #include <asm/uaccess.h>
   77 #include <asm/mmu_context.h>
   78 #include <asm/cacheflush.h>
   79 #include <asm/tlbflush.h>
   80 
   81 #include <trace/events/sched.h>
   82 
   83 #define CREATE_TRACE_POINTS
   84 #include <trace/events/task.h>
   85 
   86 /*
   87  * Protected counters by write_lock_irq(&tasklist_lock)
   88  */
   89 unsigned long total_forks;      /* Handle normal Linux uptimes. */
   90 int nr_threads;                 /* The idle threads do not count.. */
   91 
   92 int max_threads;                /* tunable limit on nr_threads */
   93 
   94 DEFINE_PER_CPU(unsigned long, process_counts) = 0;
   95 
   96 __cacheline_aligned DEFINE_RWLOCK(tasklist_lock);  /* outer */
   97 
   98 #ifdef CONFIG_PROVE_RCU
   99 int lockdep_tasklist_lock_is_held(void)
  100 {
  101         return lockdep_is_held(&tasklist_lock);
  102 }
  103 EXPORT_SYMBOL_GPL(lockdep_tasklist_lock_is_held);
  104 #endif /* #ifdef CONFIG_PROVE_RCU */
  105 
  106 int nr_processes(void)
  107 {
  108         int cpu;
  109         int total = 0;
  110 
  111         for_each_possible_cpu(cpu)
  112                 total += per_cpu(process_counts, cpu);
  113 
  114         return total;
  115 }
  116 
  117 void __weak arch_release_task_struct(struct task_struct *tsk)
  118 {
  119 }
  120 
  121 #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
  122 static struct kmem_cache *task_struct_cachep;
  123 
  124 static inline struct task_struct *alloc_task_struct_node(int node)
  125 {
  126         return kmem_cache_alloc_node(task_struct_cachep, GFP_KERNEL, node);
  127 }
  128 
  129 static inline void free_task_struct(struct task_struct *tsk)
  130 {
  131         kmem_cache_free(task_struct_cachep, tsk);
  132 }
  133 #endif
  134 
  135 void __weak arch_release_thread_info(struct thread_info *ti)
  136 {
  137 }
  138 
  139 #ifndef CONFIG_ARCH_THREAD_INFO_ALLOCATOR
  140 
  141 /*
  142  * Allocate pages if THREAD_SIZE is >= PAGE_SIZE, otherwise use a
  143  * kmemcache based allocator.
  144  */
  145 # if THREAD_SIZE >= PAGE_SIZE
  146 static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
  147                                                   int node)
  148 {
  149         struct page *page = alloc_pages_node(node, THREADINFO_GFP_ACCOUNTED,
  150                                              THREAD_SIZE_ORDER);
  151 
  152         return page ? page_address(page) : NULL;
  153 }
  154 
  155 static inline void free_thread_info(struct thread_info *ti)
  156 {
  157         free_memcg_kmem_pages((unsigned long)ti, THREAD_SIZE_ORDER);
  158 }
  159 # else
  160 static struct kmem_cache *thread_info_cache;
  161 
  162 static struct thread_info *alloc_thread_info_node(struct task_struct *tsk,
  163                                                   int node)
  164 {
  165         return kmem_cache_alloc_node(thread_info_cache, THREADINFO_GFP, node);
  166 }
  167 
  168 static void free_thread_info(struct thread_info *ti)
  169 {
  170         kmem_cache_free(thread_info_cache, ti);
  171 }
  172 
  173 void thread_info_cache_init(void)
  174 {
  175         thread_info_cache = kmem_cache_create("thread_info", THREAD_SIZE,
  176                                               THREAD_SIZE, 0, NULL);
  177         BUG_ON(thread_info_cache == NULL);
  178 }
  179 # endif
  180 #endif
  181 
  182 /* SLAB cache for signal_struct structures (tsk->signal) */
  183 static struct kmem_cache *signal_cachep;
  184 
  185 /* SLAB cache for sighand_struct structures (tsk->sighand) */
  186 struct kmem_cache *sighand_cachep;
  187 
  188 /* SLAB cache for files_struct structures (tsk->files) */
  189 struct kmem_cache *files_cachep;
  190 
  191 /* SLAB cache for fs_struct structures (tsk->fs) */
  192 struct kmem_cache *fs_cachep;
  193 
  194 /* SLAB cache for vm_area_struct structures */
  195 struct kmem_cache *vm_area_cachep;
  196 
  197 /* SLAB cache for mm_struct structures (tsk->mm) */
  198 static struct kmem_cache *mm_cachep;
  199 
  200 static void account_kernel_stack(struct thread_info *ti, int account)
  201 {
  202         struct zone *zone = page_zone(virt_to_page(ti));
  203 
  204         mod_zone_page_state(zone, NR_KERNEL_STACK, account);
  205 }
  206 
  207 void free_task(struct task_struct *tsk)
  208 {
  209         account_kernel_stack(tsk->stack, -1);
  210         arch_release_thread_info(tsk->stack);
  211         free_thread_info(tsk->stack);
  212         rt_mutex_debug_task_free(tsk);
  213         ftrace_graph_exit_task(tsk);
  214         put_seccomp_filter(tsk);
  215         arch_release_task_struct(tsk);
  216         free_task_struct(tsk);
  217 }
  218 EXPORT_SYMBOL(free_task);
  219 
  220 static inline void free_signal_struct(struct signal_struct *sig)
  221 {
  222         taskstats_tgid_free(sig);
  223         sched_autogroup_exit(sig);
  224         kmem_cache_free(signal_cachep, sig);
  225 }
  226 
  227 static inline void put_signal_struct(struct signal_struct *sig)
  228 {
  229         if (atomic_dec_and_test(&sig->sigcnt))
  230                 free_signal_struct(sig);
  231 }
  232 
  233 void __put_task_struct(struct task_struct *tsk)
  234 {
  235         WARN_ON(!tsk->exit_state);
  236         WARN_ON(atomic_read(&tsk->usage));
  237         WARN_ON(tsk == current);
  238 
  239         security_task_free(tsk);
  240         exit_creds(tsk);
  241         delayacct_tsk_free(tsk);
  242         put_signal_struct(tsk->signal);
  243 
  244         if (!profile_handoff_task(tsk))
  245                 free_task(tsk);
  246 }
  247 EXPORT_SYMBOL_GPL(__put_task_struct);
  248 
  249 void __init __weak arch_task_cache_init(void) { }
  250 
  251 void __init fork_init(unsigned long mempages)
  252 {
  253 #ifndef CONFIG_ARCH_TASK_STRUCT_ALLOCATOR
  254 #ifndef ARCH_MIN_TASKALIGN
  255 #define ARCH_MIN_TASKALIGN      L1_CACHE_BYTES
  256 #endif
  257         /* create a slab on which task_structs can be allocated */
  258         task_struct_cachep =
  259                 kmem_cache_create("task_struct", sizeof(struct task_struct),
  260                         ARCH_MIN_TASKALIGN, SLAB_PANIC | SLAB_NOTRACK, NULL);
  261 #endif
  262 
  263         /* do the arch specific task caches init */
  264         arch_task_cache_init();
  265 
  266         /*
  267          * The default maximum number of threads is set to a safe
  268          * value: the thread structures can take up at most half
  269          * of memory.
  270          */
  271         max_threads = mempages / (8 * THREAD_SIZE / PAGE_SIZE);
  272 
  273         /*
  274          * we need to allow at least 20 threads to boot a system
  275          */
  276         if (max_threads < 20)
  277                 max_threads = 20;
  278 
  279         init_task.signal->rlim[RLIMIT_NPROC].rlim_cur = max_threads/2;
  280         init_task.signal->rlim[RLIMIT_NPROC].rlim_max = max_threads/2;
  281         init_task.signal->rlim[RLIMIT_SIGPENDING] =
  282                 init_task.signal->rlim[RLIMIT_NPROC];
  283 }
  284 
  285 int __attribute__((weak)) arch_dup_task_struct(struct task_struct *dst,
  286                                                struct task_struct *src)
  287 {
  288         *dst = *src;
  289         return 0;
  290 }
  291 
  292 static struct task_struct *dup_task_struct(struct task_struct *orig)
  293 {
  294         struct task_struct *tsk;
  295         struct thread_info *ti;
  296         unsigned long *stackend;
  297         int node = tsk_fork_get_node(orig);
  298         int err;
  299 
  300         tsk = alloc_task_struct_node(node);
  301         if (!tsk)
  302                 return NULL;
  303 
  304         ti = alloc_thread_info_node(tsk, node);
  305         if (!ti)
  306                 goto free_tsk;
  307 
  308         err = arch_dup_task_struct(tsk, orig);
  309         if (err)
  310                 goto free_ti;
  311 
  312         tsk->stack = ti;
  313 
  314         setup_thread_stack(tsk, orig);
  315         clear_user_return_notifier(tsk);
  316         clear_tsk_need_resched(tsk);
  317         stackend = end_of_stack(tsk);
  318         *stackend = STACK_END_MAGIC;    /* for overflow detection */
  319 
  320 #ifdef CONFIG_CC_STACKPROTECTOR
  321         tsk->stack_canary = get_random_int();
  322 #endif
  323 
  324         /*
  325          * One for us, one for whoever does the "release_task()" (usually
  326          * parent)
  327          */
  328         atomic_set(&tsk->usage, 2);
  329 #ifdef CONFIG_BLK_DEV_IO_TRACE
  330         tsk->btrace_seq = 0;
  331 #endif
  332         tsk->splice_pipe = NULL;
  333         tsk->task_frag.page = NULL;
  334 
  335         account_kernel_stack(ti, 1);
  336 
  337         return tsk;
  338 
  339 free_ti:
  340         free_thread_info(ti);
  341 free_tsk:
  342         free_task_struct(tsk);
  343         return NULL;
  344 }
  345 
  346 #ifdef CONFIG_MMU
  347 static int dup_mmap(struct mm_struct *mm, struct mm_struct *oldmm)
  348 {
  349         struct vm_area_struct *mpnt, *tmp, *prev, **pprev;
  350         struct rb_node **rb_link, *rb_parent;
  351         int retval;
  352         unsigned long charge;
  353         struct mempolicy *pol;
  354 
  355         uprobe_start_dup_mmap();
  356         down_write(&oldmm->mmap_sem);
  357         flush_cache_dup_mm(oldmm);
  358         uprobe_dup_mmap(oldmm, mm);
  359         /*
  360          * Not linked in yet - no deadlock potential:
  361          */
  362         down_write_nested(&mm->mmap_sem, SINGLE_DEPTH_NESTING);
  363 
  364         mm->locked_vm = 0;
  365         mm->mmap = NULL;
  366         mm->mmap_cache = NULL;
  367         mm->free_area_cache = oldmm->mmap_base;
  368         mm->cached_hole_size = ~0UL;
  369         mm->map_count = 0;
  370         cpumask_clear(mm_cpumask(mm));
  371         mm->mm_rb = RB_ROOT;
  372         rb_link = &mm->mm_rb.rb_node;
  373         rb_parent = NULL;
  374         pprev = &mm->mmap;
  375         retval = ksm_fork(mm, oldmm);
  376         if (retval)
  377                 goto out;
  378         retval = khugepaged_fork(mm, oldmm);
  379         if (retval)
  380                 goto out;
  381 
  382         prev = NULL;
  383         for (mpnt = oldmm->mmap; mpnt; mpnt = mpnt->vm_next) {
  384                 struct file *file;
  385 
  386                 if (mpnt->vm_flags & VM_DONTCOPY) {
  387                         vm_stat_account(mm, mpnt->vm_flags, mpnt->vm_file,
  388                                                         -vma_pages(mpnt));
  389                         continue;
  390                 }
  391                 charge = 0;
  392                 if (mpnt->vm_flags & VM_ACCOUNT) {
  393                         unsigned long len = vma_pages(mpnt);
  394 
  395                         if (security_vm_enough_memory_mm(oldmm, len)) /* sic */
  396                                 goto fail_nomem;
  397                         charge = len;
  398                 }
  399                 tmp = kmem_cache_alloc(vm_area_cachep, GFP_KERNEL);
  400                 if (!tmp)
  401                         goto fail_nomem;
  402                 *tmp = *mpnt;
  403                 INIT_LIST_HEAD(&tmp->anon_vma_chain);
  404                 pol = mpol_dup(vma_policy(mpnt));
  405                 retval = PTR_ERR(pol);
  406                 if (IS_ERR(pol))
  407                         goto fail_nomem_policy;
  408                 vma_set_policy(tmp, pol);
  409                 tmp->vm_mm = mm;
  410                 if (anon_vma_fork(tmp, mpnt))
  411                         goto fail_nomem_anon_vma_fork;
  412                 tmp->vm_flags &= ~VM_LOCKED;
  413                 tmp->vm_next = tmp->vm_prev = NULL;
  414                 file = tmp->vm_file;
  415                 if (file) {
  416                         struct inode *inode = file->f_path.dentry->d_inode;
  417                         struct address_space *mapping = file->f_mapping;
  418 
  419                         get_file(file);
  420                         if (tmp->vm_flags & VM_DENYWRITE)
  421                                 atomic_dec(&inode->i_writecount);
  422                         mutex_lock(&mapping->i_mmap_mutex);
  423                         if (tmp->vm_flags & VM_SHARED)
  424                                 mapping->i_mmap_writable++;
  425                         flush_dcache_mmap_lock(mapping);
  426                         /* insert tmp into the share list, just after mpnt */
  427                         if (unlikely(tmp->vm_flags & VM_NONLINEAR))
  428                                 vma_nonlinear_insert(tmp,
  429                                                 &mapping->i_mmap_nonlinear);
  430                         else
  431                                 vma_interval_tree_insert_after(tmp, mpnt,
  432                                                         &mapping->i_mmap);
  433                         flush_dcache_mmap_unlock(mapping);
  434                         mutex_unlock(&mapping->i_mmap_mutex);
  435                 }
  436 
  437                 /*
  438                  * Clear hugetlb-related page reserves for children. This only
  439                  * affects MAP_PRIVATE mappings. Faults generated by the child
  440                  * are not guaranteed to succeed, even if read-only
  441                  */
  442                 if (is_vm_hugetlb_page(tmp))
  443                         reset_vma_resv_huge_pages(tmp);
  444 
  445                 /*
  446                  * Link in the new vma and copy the page table entries.
  447                  */
  448                 *pprev = tmp;
  449                 pprev = &tmp->vm_next;
  450                 tmp->vm_prev = prev;
  451                 prev = tmp;
  452 
  453                 __vma_link_rb(mm, tmp, rb_link, rb_parent);
  454                 rb_link = &tmp->vm_rb.rb_right;
  455                 rb_parent = &tmp->vm_rb;
  456 
  457                 mm->map_count++;
  458                 retval = copy_page_range(mm, oldmm, mpnt);
  459 
  460                 if (tmp->vm_ops && tmp->vm_ops->open)
  461                         tmp->vm_ops->open(tmp);
  462 
  463                 if (retval)
  464                         goto out;
  465         }
  466         /* a new mm has just been created */
  467         arch_dup_mmap(oldmm, mm);
  468         retval = 0;
  469 out:
  470         up_write(&mm->mmap_sem);
  471         flush_tlb_mm(oldmm);
  472         up_write(&oldmm->mmap_sem);
  473         uprobe_end_dup_mmap();
  474         return retval;
  475 fail_nomem_anon_vma_fork:
  476         mpol_put(pol);
  477 fail_nomem_policy:
  478         kmem_cache_free(vm_area_cachep, tmp);
  479 fail_nomem:
  480         retval = -ENOMEM;
  481         vm_unacct_memory(charge);
  482         goto out;
  483 }
  484 
  485 static inline int mm_alloc_pgd(struct mm_struct *mm)
  486 {
  487         mm->pgd = pgd_alloc(mm);
  488         if (unlikely(!mm->pgd))
  489                 return -ENOMEM;
  490         return 0;
  491 }
  492 
  493 static inline void mm_free_pgd(struct mm_struct *mm)
  494 {
  495         pgd_free(mm, mm->pgd);
  496 }
  497 #else
  498 #define dup_mmap(mm, oldmm)     (0)
  499 #define mm_alloc_pgd(mm)        (0)
  500 #define mm_free_pgd(mm)
  501 #endif /* CONFIG_MMU */
  502 
  503 __cacheline_aligned_in_smp DEFINE_SPINLOCK(mmlist_lock);
  504 
  505 #define allocate_mm()   (kmem_cache_alloc(mm_cachep, GFP_KERNEL))
  506 #define free_mm(mm)     (kmem_cache_free(mm_cachep, (mm)))
  507 
  508 static unsigned long default_dump_filter = MMF_DUMP_FILTER_DEFAULT;
  509 
  510 static int __init coredump_filter_setup(char *s)
  511 {
  512         default_dump_filter =
  513                 (simple_strtoul(s, NULL, 0) << MMF_DUMP_FILTER_SHIFT) &
  514                 MMF_DUMP_FILTER_MASK;
  515         return 1;
  516 }
  517 
  518 __setup("coredump_filter=", coredump_filter_setup);
  519 
  520 #include <linux/init_task.h>
  521 
  522 static void mm_init_aio(struct mm_struct *mm)
  523 {
  524 #ifdef CONFIG_AIO
  525         spin_lock_init(&mm->ioctx_lock);
  526         INIT_HLIST_HEAD(&mm->ioctx_list);
  527 #endif
  528 }
  529 
  530 static struct mm_struct *mm_init(struct mm_struct *mm, struct task_struct *p)
  531 {
  532         atomic_set(&mm->mm_users, 1);
  533         atomic_set(&mm->mm_count, 1);
  534         init_rwsem(&mm->mmap_sem);
  535         INIT_LIST_HEAD(&mm->mmlist);
  536         mm->flags = (current->mm) ?
  537                 (current->mm->flags & MMF_INIT_MASK) : default_dump_filter;
  538         mm->core_state = NULL;
  539         mm->nr_ptes = 0;
  540         memset(&mm->rss_stat, 0, sizeof(mm->rss_stat));
  541         spin_lock_init(&mm->page_table_lock);
  542         mm->free_area_cache = TASK_UNMAPPED_BASE;
  543         mm->cached_hole_size = ~0UL;
  544         mm_init_aio(mm);
  545         mm_init_owner(mm, p);
  546 
  547         if (likely(!mm_alloc_pgd(mm))) {
  548                 mm->def_flags = 0;
  549                 mmu_notifier_mm_init(mm);
  550                 return mm;
  551         }
  552 
  553         free_mm(mm);
  554         return NULL;
  555 }
  556 
  557 static void check_mm(struct mm_struct *mm)
  558 {
  559         int i;
  560 
  561         for (i = 0; i < NR_MM_COUNTERS; i++) {
  562                 long x = atomic_long_read(&mm->rss_stat.count[i]);
  563 
  564                 if (unlikely(x))
  565                         printk(KERN_ALERT "BUG: Bad rss-counter state "
  566                                           "mm:%p idx:%d val:%ld\n", mm, i, x);
  567         }
  568 
  569 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  570         VM_BUG_ON(mm->pmd_huge_pte);
  571 #endif
  572 }
  573 
  574 /*
  575  * Allocate and initialize an mm_struct.
  576  */
  577 struct mm_struct *mm_alloc(void)
  578 {
  579         struct mm_struct *mm;
  580 
  581         mm = allocate_mm();
  582         if (!mm)
  583                 return NULL;
  584 
  585         memset(mm, 0, sizeof(*mm));
  586         mm_init_cpumask(mm);
  587         return mm_init(mm, current);
  588 }
  589 
  590 /*
  591  * Called when the last reference to the mm
  592  * is dropped: either by a lazy thread or by
  593  * mmput. Free the page directory and the mm.
  594  */
  595 void __mmdrop(struct mm_struct *mm)
  596 {
  597         BUG_ON(mm == &init_mm);
  598         mm_free_pgd(mm);
  599         destroy_context(mm);
  600         mmu_notifier_mm_destroy(mm);
  601         check_mm(mm);
  602         free_mm(mm);
  603 }
  604 EXPORT_SYMBOL_GPL(__mmdrop);
  605 
  606 /*
  607  * Decrement the use count and release all resources for an mm.
  608  */
  609 void mmput(struct mm_struct *mm)
  610 {
  611         might_sleep();
  612 
  613         if (atomic_dec_and_test(&mm->mm_users)) {
  614                 uprobe_clear_state(mm);
  615                 exit_aio(mm);
  616                 ksm_exit(mm);
  617                 khugepaged_exit(mm); /* must run before exit_mmap */
  618                 exit_mmap(mm);
  619                 set_mm_exe_file(mm, NULL);
  620                 if (!list_empty(&mm->mmlist)) {
  621                         spin_lock(&mmlist_lock);
  622                         list_del(&mm->mmlist);
  623                         spin_unlock(&mmlist_lock);
  624                 }
  625                 if (mm->binfmt)
  626                         module_put(mm->binfmt->module);
  627                 mmdrop(mm);
  628         }
  629 }
  630 EXPORT_SYMBOL_GPL(mmput);
  631 
  632 void set_mm_exe_file(struct mm_struct *mm, struct file *new_exe_file)
  633 {
  634         if (new_exe_file)
  635                 get_file(new_exe_file);
  636         if (mm->exe_file)
  637                 fput(mm->exe_file);
  638         mm->exe_file = new_exe_file;
  639 }
  640 
  641 struct file *get_mm_exe_file(struct mm_struct *mm)
  642 {
  643         struct file *exe_file;
  644 
  645         /* We need mmap_sem to protect against races with removal of exe_file */
  646         down_read(&mm->mmap_sem);
  647         exe_file = mm->exe_file;
  648         if (exe_file)
  649                 get_file(exe_file);
  650         up_read(&mm->mmap_sem);
  651         return exe_file;
  652 }
  653 
  654 static void dup_mm_exe_file(struct mm_struct *oldmm, struct mm_struct *newmm)
  655 {
  656         /* It's safe to write the exe_file pointer without exe_file_lock because
  657          * this is called during fork when the task is not yet in /proc */
  658         newmm->exe_file = get_mm_exe_file(oldmm);
  659 }
  660 
  661 /**
  662  * get_task_mm - acquire a reference to the task's mm
  663  *
  664  * Returns %NULL if the task has no mm.  Checks PF_KTHREAD (meaning
  665  * this kernel workthread has transiently adopted a user mm with use_mm,
  666  * to do its AIO) is not set and if so returns a reference to it, after
  667  * bumping up the use count.  User must release the mm via mmput()
  668  * after use.  Typically used by /proc and ptrace.
  669  */
  670 struct mm_struct *get_task_mm(struct task_struct *task)
  671 {
  672         struct mm_struct *mm;
  673 
  674         task_lock(task);
  675         mm = task->mm;
  676         if (mm) {
  677                 if (task->flags & PF_KTHREAD)
  678                         mm = NULL;
  679                 else
  680                         atomic_inc(&mm->mm_users);
  681         }
  682         task_unlock(task);
  683         return mm;
  684 }
  685 EXPORT_SYMBOL_GPL(get_task_mm);
  686 
  687 struct mm_struct *mm_access(struct task_struct *task, unsigned int mode)
  688 {
  689         struct mm_struct *mm;
  690         int err;
  691 
  692         err =  mutex_lock_killable(&task->signal->cred_guard_mutex);
  693         if (err)
  694                 return ERR_PTR(err);
  695 
  696         mm = get_task_mm(task);
  697         if (mm && mm != current->mm &&
  698                         !ptrace_may_access(task, mode)) {
  699                 mmput(mm);
  700                 mm = ERR_PTR(-EACCES);
  701         }
  702         mutex_unlock(&task->signal->cred_guard_mutex);
  703 
  704         return mm;
  705 }
  706 
  707 static void complete_vfork_done(struct task_struct *tsk)
  708 {
  709         struct completion *vfork;
  710 
  711         task_lock(tsk);
  712         vfork = tsk->vfork_done;
  713         if (likely(vfork)) {
  714                 tsk->vfork_done = NULL;
  715                 complete(vfork);
  716         }
  717         task_unlock(tsk);
  718 }
  719 
  720 static int wait_for_vfork_done(struct task_struct *child,
  721                                 struct completion *vfork)
  722 {
  723         int killed;
  724 
  725         freezer_do_not_count();
  726         killed = wait_for_completion_killable(vfork);
  727         freezer_count();
  728 
  729         if (killed) {
  730                 task_lock(child);
  731                 child->vfork_done = NULL;
  732                 task_unlock(child);
  733         }
  734 
  735         put_task_struct(child);
  736         return killed;
  737 }
  738 
  739 /* Please note the differences between mmput and mm_release.
  740  * mmput is called whenever we stop holding onto a mm_struct,
  741  * error success whatever.
  742  *
  743  * mm_release is called after a mm_struct has been removed
  744  * from the current process.
  745  *
  746  * This difference is important for error handling, when we
  747  * only half set up a mm_struct for a new process and need to restore
  748  * the old one.  Because we mmput the new mm_struct before
  749  * restoring the old one. . .
  750  * Eric Biederman 10 January 1998
  751  */
  752 void mm_release(struct task_struct *tsk, struct mm_struct *mm)
  753 {
  754         /* Get rid of any futexes when releasing the mm */
  755 #ifdef CONFIG_FUTEX
  756         if (unlikely(tsk->robust_list)) {
  757                 exit_robust_list(tsk);
  758                 tsk->robust_list = NULL;
  759         }
  760 #ifdef CONFIG_COMPAT
  761         if (unlikely(tsk->compat_robust_list)) {
  762                 compat_exit_robust_list(tsk);
  763                 tsk->compat_robust_list = NULL;
  764         }
  765 #endif
  766         if (unlikely(!list_empty(&tsk->pi_state_list)))
  767                 exit_pi_state_list(tsk);
  768 #endif
  769 
  770         uprobe_free_utask(tsk);
  771 
  772         /* Get rid of any cached register state */
  773         deactivate_mm(tsk, mm);
  774 
  775         /*
  776          * If we're exiting normally, clear a user-space tid field if
  777          * requested.  We leave this alone when dying by signal, to leave
  778          * the value intact in a core dump, and to save the unnecessary
  779          * trouble, say, a killed vfork parent shouldn't touch this mm.
  780          * Userland only wants this done for a sys_exit.
  781          */
  782         if (tsk->clear_child_tid) {
  783                 if (!(tsk->flags & PF_SIGNALED) &&
  784                     atomic_read(&mm->mm_users) > 1) {
  785                         /*
  786                          * We don't check the error code - if userspace has
  787                          * not set up a proper pointer then tough luck.
  788                          */
  789                         put_user(0, tsk->clear_child_tid);
  790                         sys_futex(tsk->clear_child_tid, FUTEX_WAKE,
  791                                         1, NULL, NULL, 0);
  792                 }
  793                 tsk->clear_child_tid = NULL;
  794         }
  795 
  796         /*
  797          * All done, finally we can wake up parent and return this mm to him.
  798          * Also kthread_stop() uses this completion for synchronization.
  799          */
  800         if (tsk->vfork_done)
  801                 complete_vfork_done(tsk);
  802 }
  803 
  804 /*
  805  * Allocate a new mm structure and copy contents from the
  806  * mm structure of the passed in task structure.
  807  */
  808 struct mm_struct *dup_mm(struct task_struct *tsk)
  809 {
  810         struct mm_struct *mm, *oldmm = current->mm;
  811         int err;
  812 
  813         if (!oldmm)
  814                 return NULL;
  815 
  816         mm = allocate_mm();
  817         if (!mm)
  818                 goto fail_nomem;
  819 
  820         memcpy(mm, oldmm, sizeof(*mm));
  821         mm_init_cpumask(mm);
  822 
  823 #ifdef CONFIG_TRANSPARENT_HUGEPAGE
  824         mm->pmd_huge_pte = NULL;
  825 #endif
  826 #ifdef CONFIG_NUMA_BALANCING
  827         mm->first_nid = NUMA_PTE_SCAN_INIT;
  828 #endif
  829         if (!mm_init(mm, tsk))
  830                 goto fail_nomem;
  831 
  832         if (init_new_context(tsk, mm))
  833                 goto fail_nocontext;
  834 
  835         dup_mm_exe_file(oldmm, mm);
  836 
  837         err = dup_mmap(mm, oldmm);
  838         if (err)
  839                 goto free_pt;
  840 
  841         mm->hiwater_rss = get_mm_rss(mm);
  842         mm->hiwater_vm = mm->total_vm;
  843 
  844         if (mm->binfmt && !try_module_get(mm->binfmt->module))
  845                 goto free_pt;
  846 
  847         return mm;
  848 
  849 free_pt:
  850         /* don't put binfmt in mmput, we haven't got module yet */
  851         mm->binfmt = NULL;
  852         mmput(mm);
  853 
  854 fail_nomem:
  855         return NULL;
  856 
  857 fail_nocontext:
  858         /*
  859          * If init_new_context() failed, we cannot use mmput() to free the mm
  860          * because it calls destroy_context()
  861          */
  862         mm_free_pgd(mm);
  863         free_mm(mm);
  864         return NULL;
  865 }
  866 
  867 static int copy_mm(unsigned long clone_flags, struct task_struct *tsk)
  868 {
  869         struct mm_struct *mm, *oldmm;
  870         int retval;
  871 
  872         tsk->min_flt = tsk->maj_flt = 0;
  873         tsk->nvcsw = tsk->nivcsw = 0;
  874 #ifdef CONFIG_DETECT_HUNG_TASK
  875         tsk->last_switch_count = tsk->nvcsw + tsk->nivcsw;
  876 #endif
  877 
  878         tsk->mm = NULL;
  879         tsk->active_mm = NULL;
  880 
  881         /*
  882          * Are we cloning a kernel thread?
  883          *
  884          * We need to steal a active VM for that..
  885          */
  886         oldmm = current->mm;
  887         if (!oldmm)
  888                 return 0;
  889 
  890         if (clone_flags & CLONE_VM) {
  891                 atomic_inc(&oldmm->mm_users);
  892                 mm = oldmm;
  893                 goto good_mm;
  894         }
  895 
  896         retval = -ENOMEM;
  897         mm = dup_mm(tsk);
  898         if (!mm)
  899                 goto fail_nomem;
  900 
  901 good_mm:
  902         tsk->mm = mm;
  903         tsk->active_mm = mm;
  904         return 0;
  905 
  906 fail_nomem:
  907         return retval;
  908 }
  909 
  910 static int copy_fs(unsigned long clone_flags, struct task_struct *tsk)
  911 {
  912         struct fs_struct *fs = current->fs;
  913         if (clone_flags & CLONE_FS) {
  914                 /* tsk->fs is already what we want */
  915                 spin_lock(&fs->lock);
  916                 if (fs->in_exec) {
  917                         spin_unlock(&fs->lock);
  918                         return -EAGAIN;
  919                 }
  920                 fs->users++;
  921                 spin_unlock(&fs->lock);
  922                 return 0;
  923         }
  924         tsk->fs = copy_fs_struct(fs);
  925         if (!tsk->fs)
  926                 return -ENOMEM;
  927         return 0;
  928 }
  929 
  930 static int copy_files(unsigned long clone_flags, struct task_struct *tsk)
  931 {
  932         struct files_struct *oldf, *newf;
  933         int error = 0;
  934 
  935         /*
  936          * A background process may not have any files ...
  937          */
  938         oldf = current->files;
  939         if (!oldf)
  940                 goto out;
  941 
  942         if (clone_flags & CLONE_FILES) {
  943                 atomic_inc(&oldf->count);
  944                 goto out;
  945         }
  946 
  947         newf = dup_fd(oldf, &error);
  948         if (!newf)
  949                 goto out;
  950 
  951         tsk->files = newf;
  952         error = 0;
  953 out:
  954         return error;
  955 }
  956 
  957 static int copy_io(unsigned long clone_flags, struct task_struct *tsk)
  958 {
  959 #ifdef CONFIG_BLOCK
  960         struct io_context *ioc = current->io_context;
  961         struct io_context *new_ioc;
  962 
  963         if (!ioc)
  964                 return 0;
  965         /*
  966          * Share io context with parent, if CLONE_IO is set
  967          */
  968         if (clone_flags & CLONE_IO) {
  969                 ioc_task_link(ioc);
  970                 tsk->io_context = ioc;
  971         } else if (ioprio_valid(ioc->ioprio)) {
  972                 new_ioc = get_task_io_context(tsk, GFP_KERNEL, NUMA_NO_NODE);
  973                 if (unlikely(!new_ioc))
  974                         return -ENOMEM;
  975 
  976                 new_ioc->ioprio = ioc->ioprio;
  977                 put_io_context(new_ioc);
  978         }
  979 #endif
  980         return 0;
  981 }
  982 
  983 static int copy_sighand(unsigned long clone_flags, struct task_struct *tsk)
  984 {
  985         struct sighand_struct *sig;
  986 
  987         if (clone_flags & CLONE_SIGHAND) {
  988                 atomic_inc(&current->sighand->count);
  989                 return 0;
  990         }
  991         sig = kmem_cache_alloc(sighand_cachep, GFP_KERNEL);
  992         rcu_assign_pointer(tsk->sighand, sig);
  993         if (!sig)
  994                 return -ENOMEM;
  995         atomic_set(&sig->count, 1);
  996         memcpy(sig->action, current->sighand->action, sizeof(sig->action));
  997         return 0;
  998 }
  999 
 1000 void __cleanup_sighand(struct sighand_struct *sighand)
 1001 {
 1002         if (atomic_dec_and_test(&sighand->count)) {
 1003                 signalfd_cleanup(sighand);
 1004                 kmem_cache_free(sighand_cachep, sighand);
 1005         }
 1006 }
 1007 
 1008 
 1009 /*
 1010  * Initialize POSIX timer handling for a thread group.
 1011  */
 1012 static void posix_cpu_timers_init_group(struct signal_struct *sig)
 1013 {
 1014         unsigned long cpu_limit;
 1015 
 1016         /* Thread group counters. */
 1017         thread_group_cputime_init(sig);
 1018 
 1019         cpu_limit = ACCESS_ONCE(sig->rlim[RLIMIT_CPU].rlim_cur);
 1020         if (cpu_limit != RLIM_INFINITY) {
 1021                 sig->cputime_expires.prof_exp = secs_to_cputime(cpu_limit);
 1022                 sig->cputimer.running = 1;
 1023         }
 1024 
 1025         /* The timer lists. */
 1026         INIT_LIST_HEAD(&sig->cpu_timers[0]);
 1027         INIT_LIST_HEAD(&sig->cpu_timers[1]);
 1028         INIT_LIST_HEAD(&sig->cpu_timers[2]);
 1029 }
 1030 
 1031 static int copy_signal(unsigned long clone_flags, struct task_struct *tsk)
 1032 {
 1033         struct signal_struct *sig;
 1034 
 1035         if (clone_flags & CLONE_THREAD)
 1036                 return 0;
 1037 
 1038         sig = kmem_cache_zalloc(signal_cachep, GFP_KERNEL);
 1039         tsk->signal = sig;
 1040         if (!sig)
 1041                 return -ENOMEM;
 1042 
 1043         sig->nr_threads = 1;
 1044         atomic_set(&sig->live, 1);
 1045         atomic_set(&sig->sigcnt, 1);
 1046         init_waitqueue_head(&sig->wait_chldexit);
 1047         sig->curr_target = tsk;
 1048         init_sigpending(&sig->shared_pending);
 1049         INIT_LIST_HEAD(&sig->posix_timers);
 1050 
 1051         hrtimer_init(&sig->real_timer, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
 1052         sig->real_timer.function = it_real_fn;
 1053 
 1054         task_lock(current->group_leader);
 1055         memcpy(sig->rlim, current->signal->rlim, sizeof sig->rlim);
 1056         task_unlock(current->group_leader);
 1057 
 1058         posix_cpu_timers_init_group(sig);
 1059 
 1060         tty_audit_fork(sig);
 1061         sched_autogroup_fork(sig);
 1062 
 1063 #ifdef CONFIG_CGROUPS
 1064         init_rwsem(&sig->group_rwsem);
 1065 #endif
 1066 
 1067         sig->oom_score_adj = current->signal->oom_score_adj;
 1068         sig->oom_score_adj_min = current->signal->oom_score_adj_min;
 1069 
 1070         sig->has_child_subreaper = current->signal->has_child_subreaper ||
 1071                                    current->signal->is_child_subreaper;
 1072 
 1073         mutex_init(&sig->cred_guard_mutex);
 1074 
 1075         return 0;
 1076 }
 1077 
 1078 static void copy_flags(unsigned long clone_flags, struct task_struct *p)
 1079 {
 1080         unsigned long new_flags = p->flags;
 1081 
 1082         new_flags &= ~(PF_SUPERPRIV | PF_WQ_WORKER);
 1083         new_flags |= PF_FORKNOEXEC;
 1084         p->flags = new_flags;
 1085 }
 1086 
 1087 SYSCALL_DEFINE1(set_tid_address, int __user *, tidptr)
 1088 {
 1089         current->clear_child_tid = tidptr;
 1090 
 1091         return task_pid_vnr(current);
 1092 }
 1093 
 1094 static void rt_mutex_init_task(struct task_struct *p)
 1095 {
 1096         raw_spin_lock_init(&p->pi_lock);
 1097 #ifdef CONFIG_RT_MUTEXES
 1098         plist_head_init(&p->pi_waiters);
 1099         p->pi_blocked_on = NULL;
 1100 #endif
 1101 }
 1102 
 1103 #ifdef CONFIG_MM_OWNER
 1104 void mm_init_owner(struct mm_struct *mm, struct task_struct *p)
 1105 {
 1106         mm->owner = p;
 1107 }
 1108 #endif /* CONFIG_MM_OWNER */
 1109 
 1110 /*
 1111  * Initialize POSIX timer handling for a single task.
 1112  */
 1113 static void posix_cpu_timers_init(struct task_struct *tsk)
 1114 {
 1115         tsk->cputime_expires.prof_exp = 0;
 1116         tsk->cputime_expires.virt_exp = 0;
 1117         tsk->cputime_expires.sched_exp = 0;
 1118         INIT_LIST_HEAD(&tsk->cpu_timers[0]);
 1119         INIT_LIST_HEAD(&tsk->cpu_timers[1]);
 1120         INIT_LIST_HEAD(&tsk->cpu_timers[2]);
 1121 }
 1122 
 1123 /*
 1124  * This creates a new process as a copy of the old one,
 1125  * but does not actually start it yet.
 1126  *
 1127  * It copies the registers, and all the appropriate
 1128  * parts of the process environment (as per the clone
 1129  * flags). The actual kick-off is left to the caller.
 1130  */
 1131 static struct task_struct *copy_process(unsigned long clone_flags,
 1132                                         unsigned long stack_start,
 1133                                         unsigned long stack_size,
 1134                                         int __user *child_tidptr,
 1135                                         struct pid *pid,
 1136                                         int trace)
 1137 {
 1138         int retval;
 1139         struct task_struct *p;
 1140 
 1141         if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))
 1142                 return ERR_PTR(-EINVAL);
 1143 
 1144         /*
 1145          * Thread groups must share signals as well, and detached threads
 1146          * can only be started up within the thread group.
 1147          */
 1148         if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))
 1149                 return ERR_PTR(-EINVAL);
 1150 
 1151         /*
 1152          * Shared signal handlers imply shared VM. By way of the above,
 1153          * thread groups also imply shared VM. Blocking this case allows
 1154          * for various simplifications in other code.
 1155          */
 1156         if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))
 1157                 return ERR_PTR(-EINVAL);
 1158 
 1159         /*
 1160          * Siblings of global init remain as zombies on exit since they are
 1161          * not reaped by their parent (swapper). To solve this and to avoid
 1162          * multi-rooted process trees, prevent global and container-inits
 1163          * from creating siblings.
 1164          */
 1165         if ((clone_flags & CLONE_PARENT) &&
 1166                                 current->signal->flags & SIGNAL_UNKILLABLE)
 1167                 return ERR_PTR(-EINVAL);
 1168 
 1169         /*
 1170          * If the new process will be in a different pid namespace
 1171          * don't allow the creation of threads.
 1172          */
 1173         if ((clone_flags & (CLONE_VM|CLONE_NEWPID)) &&
 1174             (task_active_pid_ns(current) != current->nsproxy->pid_ns))
 1175                 return ERR_PTR(-EINVAL);
 1176 
 1177         retval = security_task_create(clone_flags);
 1178         if (retval)
 1179                 goto fork_out;
 1180 
 1181         retval = -ENOMEM;
 1182         p = dup_task_struct(current);
 1183         if (!p)
 1184                 goto fork_out;
 1185 
 1186         ftrace_graph_init_task(p);
 1187         get_seccomp_filter(p);
 1188 
 1189         rt_mutex_init_task(p);
 1190 
 1191 #ifdef CONFIG_PROVE_LOCKING
 1192         DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);
 1193         DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);
 1194 #endif
 1195         retval = -EAGAIN;
 1196         if (atomic_read(&p->real_cred->user->processes) >=
 1197                         task_rlimit(p, RLIMIT_NPROC)) {
 1198                 if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&
 1199                     p->real_cred->user != INIT_USER)
 1200                         goto bad_fork_free;
 1201         }
 1202         current->flags &= ~PF_NPROC_EXCEEDED;
 1203 
 1204         retval = copy_creds(p, clone_flags);
 1205         if (retval < 0)
 1206                 goto bad_fork_free;
 1207 
 1208         /*
 1209          * If multiple threads are within copy_process(), then this check
 1210          * triggers too late. This doesn't hurt, the check is only there
 1211          * to stop root fork bombs.
 1212          */
 1213         retval = -EAGAIN;
 1214         if (nr_threads >= max_threads)
 1215                 goto bad_fork_cleanup_count;
 1216 
 1217         if (!try_module_get(task_thread_info(p)->exec_domain->module))
 1218                 goto bad_fork_cleanup_count;
 1219 
 1220         p->did_exec = 0;
 1221         delayacct_tsk_init(p);  /* Must remain after dup_task_struct() */
 1222         copy_flags(clone_flags, p);
 1223         INIT_LIST_HEAD(&p->children);
 1224         INIT_LIST_HEAD(&p->sibling);
 1225         rcu_copy_process(p);
 1226         p->vfork_done = NULL;
 1227         spin_lock_init(&p->alloc_lock);
 1228 
 1229         init_sigpending(&p->pending);
 1230 
 1231         p->utime = p->stime = p->gtime = 0;
 1232         p->utimescaled = p->stimescaled = 0;
 1233 #ifndef CONFIG_VIRT_CPU_ACCOUNTING
 1234         p->prev_cputime.utime = p->prev_cputime.stime = 0;
 1235 #endif
 1236 #if defined(SPLIT_RSS_COUNTING)
 1237         memset(&p->rss_stat, 0, sizeof(p->rss_stat));
 1238 #endif
 1239 
 1240         p->default_timer_slack_ns = current->timer_slack_ns;
 1241 
 1242         task_io_accounting_init(&p->ioac);
 1243         acct_clear_integrals(p);
 1244 
 1245         posix_cpu_timers_init(p);
 1246 
 1247         do_posix_clock_monotonic_gettime(&p->start_time);
 1248         p->real_start_time = p->start_time;
 1249         monotonic_to_bootbased(&p->real_start_time);
 1250         p->io_context = NULL;
 1251         p->audit_context = NULL;
 1252         if (clone_flags & CLONE_THREAD)
 1253                 threadgroup_change_begin(current);
 1254         cgroup_fork(p);
 1255 #ifdef CONFIG_NUMA
 1256         p->mempolicy = mpol_dup(p->mempolicy);
 1257         if (IS_ERR(p->mempolicy)) {
 1258                 retval = PTR_ERR(p->mempolicy);
 1259                 p->mempolicy = NULL;
 1260                 goto bad_fork_cleanup_cgroup;
 1261         }
 1262         mpol_fix_fork_child_flag(p);
 1263 #endif
 1264 #ifdef CONFIG_CPUSETS
 1265         p->cpuset_mem_spread_rotor = NUMA_NO_NODE;
 1266         p->cpuset_slab_spread_rotor = NUMA_NO_NODE;
 1267         seqcount_init(&p->mems_allowed_seq);
 1268 #endif
 1269 #ifdef CONFIG_TRACE_IRQFLAGS
 1270         p->irq_events = 0;
 1271         p->hardirqs_enabled = 0;
 1272         p->hardirq_enable_ip = 0;
 1273         p->hardirq_enable_event = 0;
 1274         p->hardirq_disable_ip = _THIS_IP_;
 1275         p->hardirq_disable_event = 0;
 1276         p->softirqs_enabled = 1;
 1277         p->softirq_enable_ip = _THIS_IP_;
 1278         p->softirq_enable_event = 0;
 1279         p->softirq_disable_ip = 0;
 1280         p->softirq_disable_event = 0;
 1281         p->hardirq_context = 0;
 1282         p->softirq_context = 0;
 1283 #endif
 1284 #ifdef CONFIG_LOCKDEP
 1285         p->lockdep_depth = 0; /* no locks held yet */
 1286         p->curr_chain_key = 0;
 1287         p->lockdep_recursion = 0;
 1288 #endif
 1289 
 1290 #ifdef CONFIG_DEBUG_MUTEXES
 1291         p->blocked_on = NULL; /* not blocked yet */
 1292 #endif
 1293 #ifdef CONFIG_MEMCG
 1294         p->memcg_batch.do_batch = 0;
 1295         p->memcg_batch.memcg = NULL;
 1296 #endif
 1297 
 1298         /* Perform scheduler related setup. Assign this task to a CPU. */
 1299         sched_fork(p);
 1300 
 1301         retval = perf_event_init_task(p);
 1302         if (retval)
 1303                 goto bad_fork_cleanup_policy;
 1304         retval = audit_alloc(p);
 1305         if (retval)
 1306                 goto bad_fork_cleanup_policy;
 1307         /* copy all the process information */
 1308         retval = copy_semundo(clone_flags, p);
 1309         if (retval)
 1310                 goto bad_fork_cleanup_audit;
 1311         retval = copy_files(clone_flags, p);
 1312         if (retval)
 1313                 goto bad_fork_cleanup_semundo;
 1314         retval = copy_fs(clone_flags, p);
 1315         if (retval)
 1316                 goto bad_fork_cleanup_files;
 1317         retval = copy_sighand(clone_flags, p);
 1318         if (retval)
 1319                 goto bad_fork_cleanup_fs;
 1320         retval = copy_signal(clone_flags, p);
 1321         if (retval)
 1322                 goto bad_fork_cleanup_sighand;
 1323         retval = copy_mm(clone_flags, p);
 1324         if (retval)
 1325                 goto bad_fork_cleanup_signal;
 1326         retval = copy_namespaces(clone_flags, p);
 1327         if (retval)
 1328                 goto bad_fork_cleanup_mm;
 1329         retval = copy_io(clone_flags, p);
 1330         if (retval)
 1331                 goto bad_fork_cleanup_namespaces;
 1332         retval = copy_thread(clone_flags, stack_start, stack_size, p);
 1333         if (retval)
 1334                 goto bad_fork_cleanup_io;
 1335 
 1336         if (pid != &init_struct_pid) {
 1337                 retval = -ENOMEM;
 1338                 pid = alloc_pid(p->nsproxy->pid_ns);
 1339                 if (!pid)
 1340                         goto bad_fork_cleanup_io;
 1341         }
 1342 
 1343         p->pid = pid_nr(pid);
 1344         p->tgid = p->pid;
 1345         if (clone_flags & CLONE_THREAD)
 1346                 p->tgid = current->tgid;
 1347 
 1348         p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;
 1349         /*
 1350          * Clear TID on mm_release()?
 1351          */
 1352         p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr : NULL;
 1353 #ifdef CONFIG_BLOCK
 1354         p->plug = NULL;
 1355 #endif
 1356 #ifdef CONFIG_FUTEX
 1357         p->robust_list = NULL;
 1358 #ifdef CONFIG_COMPAT
 1359         p->compat_robust_list = NULL;
 1360 #endif
 1361         INIT_LIST_HEAD(&p->pi_state_list);
 1362         p->pi_state_cache = NULL;
 1363 #endif
 1364         uprobe_copy_process(p);
 1365         /*
 1366          * sigaltstack should be cleared when sharing the same VM
 1367          */
 1368         if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)
 1369                 p->sas_ss_sp = p->sas_ss_size = 0;
 1370 
 1371         /*
 1372          * Syscall tracing and stepping should be turned off in the
 1373          * child regardless of CLONE_PTRACE.
 1374          */
 1375         user_disable_single_step(p);
 1376         clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);
 1377 #ifdef TIF_SYSCALL_EMU
 1378         clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);
 1379 #endif
 1380         clear_all_latency_tracing(p);
 1381 
 1382         /* ok, now we should be set up.. */
 1383         if (clone_flags & CLONE_THREAD)
 1384                 p->exit_signal = -1;
 1385         else if (clone_flags & CLONE_PARENT)
 1386                 p->exit_signal = current->group_leader->exit_signal;
 1387         else
 1388                 p->exit_signal = (clone_flags & CSIGNAL);
 1389 
 1390         p->pdeath_signal = 0;
 1391         p->exit_state = 0;
 1392 
 1393         p->nr_dirtied = 0;
 1394         p->nr_dirtied_pause = 128 >> (PAGE_SHIFT - 10);
 1395         p->dirty_paused_when = 0;
 1396 
 1397         /*
 1398          * Ok, make it visible to the rest of the system.
 1399          * We dont wake it up yet.
 1400          */
 1401         p->group_leader = p;
 1402         INIT_LIST_HEAD(&p->thread_group);
 1403         p->task_works = NULL;
 1404 
 1405         /* Need tasklist lock for parent etc handling! */
 1406         write_lock_irq(&tasklist_lock);
 1407 
 1408         /* CLONE_PARENT re-uses the old parent */
 1409         if (clone_flags & (CLONE_PARENT|CLONE_THREAD)) {
 1410                 p->real_parent = current->real_parent;
 1411                 p->parent_exec_id = current->parent_exec_id;
 1412         } else {
 1413                 p->real_parent = current;
 1414                 p->parent_exec_id = current->self_exec_id;
 1415         }
 1416 
 1417         spin_lock(&current->sighand->siglock);
 1418 
 1419         /*
 1420          * Process group and session signals need to be delivered to just the
 1421          * parent before the fork or both the parent and the child after the
 1422          * fork. Restart if a signal comes in before we add the new process to
 1423          * it's process group.
 1424          * A fatal signal pending means that current will exit, so the new
 1425          * thread can't slip out of an OOM kill (or normal SIGKILL).
 1426         */
 1427         recalc_sigpending();
 1428         if (signal_pending(current)) {
 1429                 spin_unlock(&current->sighand->siglock);
 1430                 write_unlock_irq(&tasklist_lock);
 1431                 retval = -ERESTARTNOINTR;
 1432                 goto bad_fork_free_pid;
 1433         }
 1434 
 1435         if (clone_flags & CLONE_THREAD) {
 1436                 current->signal->nr_threads++;
 1437                 atomic_inc(&current->signal->live);
 1438                 atomic_inc(&current->signal->sigcnt);
 1439                 p->group_leader = current->group_leader;
 1440                 list_add_tail_rcu(&p->thread_group, &p->group_leader->thread_group);
 1441         }
 1442 
 1443         if (likely(p->pid)) {
 1444                 ptrace_init_task(p, (clone_flags & CLONE_PTRACE) || trace);
 1445 
 1446                 if (thread_group_leader(p)) {
 1447                         if (is_child_reaper(pid)) {
 1448                                 ns_of_pid(pid)->child_reaper = p;
 1449                                 p->signal->flags |= SIGNAL_UNKILLABLE;
 1450                         }
 1451 
 1452                         p->signal->leader_pid = pid;
 1453                         p->signal->tty = tty_kref_get(current->signal->tty);
 1454                         attach_pid(p, PIDTYPE_PGID, task_pgrp(current));
 1455                         attach_pid(p, PIDTYPE_SID, task_session(current));
 1456                         list_add_tail(&p->sibling, &p->real_parent->children);
 1457                         list_add_tail_rcu(&p->tasks, &init_task.tasks);
 1458                         __this_cpu_inc(process_counts);
 1459                 }
 1460                 attach_pid(p, PIDTYPE_PID, pid);
 1461                 nr_threads++;
 1462         }
 1463 
 1464         total_forks++;
 1465         spin_unlock(&current->sighand->siglock);
 1466         write_unlock_irq(&tasklist_lock);
 1467         proc_fork_connector(p);
 1468         cgroup_post_fork(p);
 1469         if (clone_flags & CLONE_THREAD)
 1470                 threadgroup_change_end(current);
 1471         perf_event_fork(p);
 1472 
 1473         trace_task_newtask(p, clone_flags);
 1474 
 1475         return p;
 1476 
 1477 bad_fork_free_pid:
 1478         if (pid != &init_struct_pid)
 1479                 free_pid(pid);
 1480 bad_fork_cleanup_io:
 1481         if (p->io_context)
 1482                 exit_io_context(p);
 1483 bad_fork_cleanup_namespaces:
 1484         exit_task_namespaces(p);
 1485 bad_fork_cleanup_mm:
 1486         if (p->mm)
 1487                 mmput(p->mm);
 1488 bad_fork_cleanup_signal:
 1489         if (!(clone_flags & CLONE_THREAD))
 1490                 free_signal_struct(p->signal);
 1491 bad_fork_cleanup_sighand:
 1492         __cleanup_sighand(p->sighand);
 1493 bad_fork_cleanup_fs:
 1494         exit_fs(p); /* blocking */
 1495 bad_fork_cleanup_files:
 1496         exit_files(p); /* blocking */
 1497 bad_fork_cleanup_semundo:
 1498         exit_sem(p);
 1499 bad_fork_cleanup_audit:
 1500         audit_free(p);
 1501 bad_fork_cleanup_policy:
 1502         perf_event_free_task(p);
 1503 #ifdef CONFIG_NUMA
 1504         mpol_put(p->mempolicy);
 1505 bad_fork_cleanup_cgroup:
 1506 #endif
 1507         if (clone_flags & CLONE_THREAD)
 1508                 threadgroup_change_end(current);
 1509         cgroup_exit(p, 0);
 1510         delayacct_tsk_free(p);
 1511         module_put(task_thread_info(p)->exec_domain->module);
 1512 bad_fork_cleanup_count:
 1513         atomic_dec(&p->cred->user->processes);
 1514         exit_creds(p);
 1515 bad_fork_free:
 1516         free_task(p);
 1517 fork_out:
 1518         return ERR_PTR(retval);
 1519 }
 1520 
 1521 static inline void init_idle_pids(struct pid_link *links)
 1522 {
 1523         enum pid_type type;
 1524 
 1525         for (type = PIDTYPE_PID; type < PIDTYPE_MAX; ++type) {
 1526                 INIT_HLIST_NODE(&links[type].node); /* not really needed */
 1527                 links[type].pid = &init_struct_pid;
 1528         }
 1529 }
 1530 
 1531 struct task_struct * __cpuinit fork_idle(int cpu)
 1532 {
 1533         struct task_struct *task;
 1534         task = copy_process(CLONE_VM, 0, 0, NULL, &init_struct_pid, 0);
 1535         if (!IS_ERR(task)) {
 1536                 init_idle_pids(task->pids);
 1537                 init_idle(task, cpu);
 1538         }
 1539 
 1540         return task;
 1541 }
 1542 
 1543 /*
 1544  *  Ok, this is the main fork-routine.
 1545  *
 1546  * It copies the process, and if successful kick-starts
 1547  * it and waits for it to finish using the VM if required.
 1548  */
 1549 long do_fork(unsigned long clone_flags,
 1550               unsigned long stack_start,
 1551               unsigned long stack_size,
 1552               int __user *parent_tidptr,
 1553               int __user *child_tidptr)
 1554 {
 1555         struct task_struct *p;
 1556         int trace = 0;
 1557         long nr;
 1558 
 1559         /*
 1560          * Do some preliminary argument and permissions checking before we
 1561          * actually start allocating stuff
 1562          */
 1563         if (clone_flags & (CLONE_NEWUSER | CLONE_NEWPID)) {
 1564                 if (clone_flags & (CLONE_THREAD|CLONE_PARENT))
 1565                         return -EINVAL;
 1566         }
 1567 
 1568         /*
 1569          * Determine whether and which event to report to ptracer.  When
 1570          * called from kernel_thread or CLONE_UNTRACED is explicitly
 1571          * requested, no event is reported; otherwise, report if the event
 1572          * for the type of forking is enabled.
 1573          */
 1574         if (!(clone_flags & CLONE_UNTRACED)) {
 1575                 if (clone_flags & CLONE_VFORK)
 1576                         trace = PTRACE_EVENT_VFORK;
 1577                 else if ((clone_flags & CSIGNAL) != SIGCHLD)
 1578                         trace = PTRACE_EVENT_CLONE;
 1579                 else
 1580                         trace = PTRACE_EVENT_FORK;
 1581 
 1582                 if (likely(!ptrace_event_enabled(current, trace)))
 1583                         trace = 0;
 1584         }
 1585 
 1586         p = copy_process(clone_flags, stack_start, stack_size,
 1587                          child_tidptr, NULL, trace);
 1588         /*
 1589          * Do this prior waking up the new thread - the thread pointer
 1590          * might get invalid after that point, if the thread exits quickly.
 1591          */
 1592         if (!IS_ERR(p)) {
 1593                 struct completion vfork;
 1594 
 1595                 trace_sched_process_fork(current, p);
 1596 
 1597                 nr = task_pid_vnr(p);
 1598 
 1599                 if (clone_flags & CLONE_PARENT_SETTID)
 1600                         put_user(nr, parent_tidptr);
 1601 
 1602                 if (clone_flags & CLONE_VFORK) {
 1603                         p->vfork_done = &vfork;
 1604                         init_completion(&vfork);
 1605                         get_task_struct(p);
 1606                 }
 1607 
 1608                 wake_up_new_task(p);
 1609 
 1610                 /* forking complete and child started to run, tell ptracer */
 1611                 if (unlikely(trace))
 1612                         ptrace_event(trace, nr);
 1613 
 1614                 if (clone_flags & CLONE_VFORK) {
 1615                         if (!wait_for_vfork_done(p, &vfork))
 1616                                 ptrace_event(PTRACE_EVENT_VFORK_DONE, nr);
 1617                 }
 1618         } else {
 1619                 nr = PTR_ERR(p);
 1620         }
 1621         return nr;
 1622 }
 1623 
 1624 /*
 1625  * Create a kernel thread.
 1626  */
 1627 pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags)
 1628 {
 1629         return do_fork(flags|CLONE_VM|CLONE_UNTRACED, (unsigned long)fn,
 1630                 (unsigned long)arg, NULL, NULL);
 1631 }
 1632 
 1633 #ifdef __ARCH_WANT_SYS_FORK
 1634 SYSCALL_DEFINE0(fork)
 1635 {
 1636 #ifdef CONFIG_MMU
 1637         return do_fork(SIGCHLD, 0, 0, NULL, NULL);
 1638 #else
 1639         /* can not support in nommu mode */
 1640         return(-EINVAL);
 1641 #endif
 1642 }
 1643 #endif
 1644 
 1645 #ifdef __ARCH_WANT_SYS_VFORK
 1646 SYSCALL_DEFINE0(vfork)
 1647 {
 1648         return do_fork(CLONE_VFORK | CLONE_VM | SIGCHLD, 0, 
 1649                         0, NULL, NULL);
 1650 }
 1651 #endif
 1652 
 1653 #ifdef __ARCH_WANT_SYS_CLONE
 1654 #ifdef CONFIG_CLONE_BACKWARDS
 1655 SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
 1656                  int __user *, parent_tidptr,
 1657                  int, tls_val,
 1658                  int __user *, child_tidptr)
 1659 #elif defined(CONFIG_CLONE_BACKWARDS2)
 1660 SYSCALL_DEFINE5(clone, unsigned long, newsp, unsigned long, clone_flags,
 1661                  int __user *, parent_tidptr,
 1662                  int __user *, child_tidptr,
 1663                  int, tls_val)
 1664 #else
 1665 SYSCALL_DEFINE5(clone, unsigned long, clone_flags, unsigned long, newsp,
 1666                  int __user *, parent_tidptr,
 1667                  int __user *, child_tidptr,
 1668                  int, tls_val)
 1669 #endif
 1670 {
 1671         long ret = do_fork(clone_flags, newsp, 0, parent_tidptr, child_tidptr);
 1672         asmlinkage_protect(5, ret, clone_flags, newsp,
 1673                         parent_tidptr, child_tidptr, tls_val);
 1674         return ret;
 1675 }
 1676 #endif
 1677 
 1678 #ifndef ARCH_MIN_MMSTRUCT_ALIGN
 1679 #define ARCH_MIN_MMSTRUCT_ALIGN 0
 1680 #endif
 1681 
 1682 static void sighand_ctor(void *data)
 1683 {
 1684         struct sighand_struct *sighand = data;
 1685 
 1686         spin_lock_init(&sighand->siglock);
 1687         init_waitqueue_head(&sighand->signalfd_wqh);
 1688 }
 1689 
 1690 void __init proc_caches_init(void)
 1691 {
 1692         sighand_cachep = kmem_cache_create("sighand_cache",
 1693                         sizeof(struct sighand_struct), 0,
 1694                         SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_DESTROY_BY_RCU|
 1695                         SLAB_NOTRACK, sighand_ctor);
 1696         signal_cachep = kmem_cache_create("signal_cache",
 1697                         sizeof(struct signal_struct), 0,
 1698                         SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 1699         files_cachep = kmem_cache_create("files_cache",
 1700                         sizeof(struct files_struct), 0,
 1701                         SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 1702         fs_cachep = kmem_cache_create("fs_cache",
 1703                         sizeof(struct fs_struct), 0,
 1704                         SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 1705         /*
 1706          * FIXME! The "sizeof(struct mm_struct)" currently includes the
 1707          * whole struct cpumask for the OFFSTACK case. We could change
 1708          * this to *only* allocate as much of it as required by the
 1709          * maximum number of CPU's we can ever have.  The cpumask_allocation
 1710          * is at the end of the structure, exactly for that reason.
 1711          */
 1712         mm_cachep = kmem_cache_create("mm_struct",
 1713                         sizeof(struct mm_struct), ARCH_MIN_MMSTRUCT_ALIGN,
 1714                         SLAB_HWCACHE_ALIGN|SLAB_PANIC|SLAB_NOTRACK, NULL);
 1715         vm_area_cachep = KMEM_CACHE(vm_area_struct, SLAB_PANIC);
 1716         mmap_init();
 1717         nsproxy_cache_init();
 1718 }
 1719 
 1720 /*
 1721  * Check constraints on flags passed to the unshare system call.
 1722  */
 1723 static int check_unshare_flags(unsigned long unshare_flags)
 1724 {
 1725         if (unshare_flags & ~(CLONE_THREAD|CLONE_FS|CLONE_NEWNS|CLONE_SIGHAND|
 1726                                 CLONE_VM|CLONE_FILES|CLONE_SYSVSEM|
 1727                                 CLONE_NEWUTS|CLONE_NEWIPC|CLONE_NEWNET|
 1728                                 CLONE_NEWUSER|CLONE_NEWPID))
 1729                 return -EINVAL;
 1730         /*
 1731          * Not implemented, but pretend it works if there is nothing to
 1732          * unshare. Note that unsharing CLONE_THREAD or CLONE_SIGHAND
 1733          * needs to unshare vm.
 1734          */
 1735         if (unshare_flags & (CLONE_THREAD | CLONE_SIGHAND | CLONE_VM)) {
 1736                 /* FIXME: get_task_mm() increments ->mm_users */
 1737                 if (atomic_read(&current->mm->mm_users) > 1)
 1738                         return -EINVAL;
 1739         }
 1740 
 1741         return 0;
 1742 }
 1743 
 1744 /*
 1745  * Unshare the filesystem structure if it is being shared
 1746  */
 1747 static int unshare_fs(unsigned long unshare_flags, struct fs_struct **new_fsp)
 1748 {
 1749         struct fs_struct *fs = current->fs;
 1750 
 1751         if (!(unshare_flags & CLONE_FS) || !fs)
 1752                 return 0;
 1753 
 1754         /* don't need lock here; in the worst case we'll do useless copy */
 1755         if (fs->users == 1)
 1756                 return 0;
 1757 
 1758         *new_fsp = copy_fs_struct(fs);
 1759         if (!*new_fsp)
 1760                 return -ENOMEM;
 1761 
 1762         return 0;
 1763 }
 1764 
 1765 /*
 1766  * Unshare file descriptor table if it is being shared
 1767  */
 1768 static int unshare_fd(unsigned long unshare_flags, struct files_struct **new_fdp)
 1769 {
 1770         struct files_struct *fd = current->files;
 1771         int error = 0;
 1772 
 1773         if ((unshare_flags & CLONE_FILES) &&
 1774             (fd && atomic_read(&fd->count) > 1)) {
 1775                 *new_fdp = dup_fd(fd, &error);
 1776                 if (!*new_fdp)
 1777                         return error;
 1778         }
 1779 
 1780         return 0;
 1781 }
 1782 
 1783 /*
 1784  * unshare allows a process to 'unshare' part of the process
 1785  * context which was originally shared using clone.  copy_*
 1786  * functions used by do_fork() cannot be used here directly
 1787  * because they modify an inactive task_struct that is being
 1788  * constructed. Here we are modifying the current, active,
 1789  * task_struct.
 1790  */
 1791 SYSCALL_DEFINE1(unshare, unsigned long, unshare_flags)
 1792 {
 1793         struct fs_struct *fs, *new_fs = NULL;
 1794         struct files_struct *fd, *new_fd = NULL;
 1795         struct cred *new_cred = NULL;
 1796         struct nsproxy *new_nsproxy = NULL;
 1797         int do_sysvsem = 0;
 1798         int err;
 1799 
 1800         /*
 1801          * If unsharing a user namespace must also unshare the thread.
 1802          */
 1803         if (unshare_flags & CLONE_NEWUSER)
 1804                 unshare_flags |= CLONE_THREAD;
 1805         /*
 1806          * If unsharing a pid namespace must also unshare the thread.
 1807          */
 1808         if (unshare_flags & CLONE_NEWPID)
 1809                 unshare_flags |= CLONE_THREAD;
 1810         /*
 1811          * If unsharing a thread from a thread group, must also unshare vm.
 1812          */
 1813         if (unshare_flags & CLONE_THREAD)
 1814                 unshare_flags |= CLONE_VM;
 1815         /*
 1816          * If unsharing vm, must also unshare signal handlers.
 1817          */
 1818         if (unshare_flags & CLONE_VM)
 1819                 unshare_flags |= CLONE_SIGHAND;
 1820         /*
 1821          * If unsharing namespace, must also unshare filesystem information.
 1822          */
 1823         if (unshare_flags & CLONE_NEWNS)
 1824                 unshare_flags |= CLONE_FS;
 1825 
 1826         err = check_unshare_flags(unshare_flags);
 1827         if (err)
 1828                 goto bad_unshare_out;
 1829         /*
 1830          * CLONE_NEWIPC must also detach from the undolist: after switching
 1831          * to a new ipc namespace, the semaphore arrays from the old
 1832          * namespace are unreachable.
 1833          */
 1834         if (unshare_flags & (CLONE_NEWIPC|CLONE_SYSVSEM))
 1835                 do_sysvsem = 1;
 1836         err = unshare_fs(unshare_flags, &new_fs);
 1837         if (err)
 1838                 goto bad_unshare_out;
 1839         err = unshare_fd(unshare_flags, &new_fd);
 1840         if (err)
 1841                 goto bad_unshare_cleanup_fs;
 1842         err = unshare_userns(unshare_flags, &new_cred);
 1843         if (err)
 1844                 goto bad_unshare_cleanup_fd;
 1845         err = unshare_nsproxy_namespaces(unshare_flags, &new_nsproxy,
 1846                                          new_cred, new_fs);
 1847         if (err)
 1848                 goto bad_unshare_cleanup_cred;
 1849 
 1850         if (new_fs || new_fd || do_sysvsem || new_cred || new_nsproxy) {
 1851                 if (do_sysvsem) {
 1852                         /*
 1853                          * CLONE_SYSVSEM is equivalent to sys_exit().
 1854                          */
 1855                         exit_sem(current);
 1856                 }
 1857 
 1858                 if (new_nsproxy) {
 1859                         switch_task_namespaces(current, new_nsproxy);
 1860                         new_nsproxy = NULL;
 1861                 }
 1862 
 1863                 task_lock(current);
 1864 
 1865                 if (new_fs) {
 1866                         fs = current->fs;
 1867                         spin_lock(&fs->lock);
 1868                         current->fs = new_fs;
 1869                         if (--fs->users)
 1870                                 new_fs = NULL;
 1871                         else
 1872                                 new_fs = fs;
 1873                         spin_unlock(&fs->lock);
 1874                 }
 1875 
 1876                 if (new_fd) {
 1877                         fd = current->files;
 1878                         current->files = new_fd;
 1879                         new_fd = fd;
 1880                 }
 1881 
 1882                 task_unlock(current);
 1883 
 1884                 if (new_cred) {
 1885                         /* Install the new user namespace */
 1886                         commit_creds(new_cred);
 1887                         new_cred = NULL;
 1888                 }
 1889         }
 1890 
 1891         if (new_nsproxy)
 1892                 put_nsproxy(new_nsproxy);
 1893 
 1894 bad_unshare_cleanup_cred:
 1895         if (new_cred)
 1896                 put_cred(new_cred);
 1897 bad_unshare_cleanup_fd:
 1898         if (new_fd)
 1899                 put_files_struct(new_fd);
 1900 
 1901 bad_unshare_cleanup_fs:
 1902         if (new_fs)
 1903                 free_fs_struct(new_fs);
 1904 
 1905 bad_unshare_out:
 1906         return err;
 1907 }
 1908 
 1909 /*
 1910  *      Helper to unshare the files of the current task.
 1911  *      We don't want to expose copy_files internals to
 1912  *      the exec layer of the kernel.
 1913  */
 1914 
 1915 int unshare_files(struct files_struct **displaced)
 1916 {
 1917         struct task_struct *task = current;
 1918         struct files_struct *copy = NULL;
 1919         int error;
 1920 
 1921         error = unshare_fd(CLONE_FILES, &copy);
 1922         if (error || !copy) {
 1923                 *displaced = NULL;
 1924                 return error;
 1925         }
 1926         *displaced = task->files;
 1927         task_lock(task);
 1928         task->files = copy;
 1929         task_unlock(task);
 1930         return 0;
 1931 }

Cache object: 283cb13d38e247eca21f34e39c896d33


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.