The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/exec.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  linux/fs/exec.c
    3  *
    4  *  Copyright (C) 1991, 1992  Linus Torvalds
    5  */
    6 
    7 /*
    8  * #!-checking implemented by tytso.
    9  */
   10 /*
   11  * Demand-loading implemented 01.12.91 - no need to read anything but
   12  * the header into memory. The inode of the executable is put into
   13  * "current->executable", and page faults do the actual loading. Clean.
   14  *
   15  * Once more I can proudly say that linux stood up to being changed: it
   16  * was less than 2 hours work to get demand-loading completely implemented.
   17  *
   18  * Demand loading changed July 1993 by Eric Youngdale.   Use mmap instead,
   19  * current->executable is only used by the procfs.  This allows a dispatch
   20  * table to check for several different types  of binary formats.  We keep
   21  * trying until we recognize the file or we run out of supported binary
   22  * formats. 
   23  */
   24 
   25 #include <linux/config.h>
   26 #include <linux/slab.h>
   27 #include <linux/file.h>
   28 #include <linux/mman.h>
   29 #include <linux/a.out.h>
   30 #include <linux/stat.h>
   31 #include <linux/fcntl.h>
   32 #include <linux/smp_lock.h>
   33 #include <linux/init.h>
   34 #include <linux/pagemap.h>
   35 #include <linux/highmem.h>
   36 #include <linux/spinlock.h>
   37 #include <linux/personality.h>
   38 #include <linux/swap.h>
   39 #include <linux/utsname.h>
   40 #define __NO_VERSION__
   41 #include <linux/module.h>
   42 
   43 #include <asm/uaccess.h>
   44 #include <asm/pgalloc.h>
   45 #include <asm/mmu_context.h>
   46 
   47 #ifdef CONFIG_KMOD
   48 #include <linux/kmod.h>
   49 #endif
   50 
   51 int core_uses_pid;
   52 char core_pattern[65] = "core";
   53 /* The maximal length of core_pattern is also specified in sysctl.c */ 
   54 
   55 static struct linux_binfmt *formats;
   56 static rwlock_t binfmt_lock = RW_LOCK_UNLOCKED;
   57 
   58 int register_binfmt(struct linux_binfmt * fmt)
   59 {
   60         struct linux_binfmt ** tmp = &formats;
   61 
   62         if (!fmt)
   63                 return -EINVAL;
   64         if (fmt->next)
   65                 return -EBUSY;
   66         write_lock(&binfmt_lock);
   67         while (*tmp) {
   68                 if (fmt == *tmp) {
   69                         write_unlock(&binfmt_lock);
   70                         return -EBUSY;
   71                 }
   72                 tmp = &(*tmp)->next;
   73         }
   74         fmt->next = formats;
   75         formats = fmt;
   76         write_unlock(&binfmt_lock);
   77         return 0;       
   78 }
   79 
   80 int unregister_binfmt(struct linux_binfmt * fmt)
   81 {
   82         struct linux_binfmt ** tmp = &formats;
   83 
   84         write_lock(&binfmt_lock);
   85         while (*tmp) {
   86                 if (fmt == *tmp) {
   87                         *tmp = fmt->next;
   88                         write_unlock(&binfmt_lock);
   89                         return 0;
   90                 }
   91                 tmp = &(*tmp)->next;
   92         }
   93         write_unlock(&binfmt_lock);
   94         return -EINVAL;
   95 }
   96 
   97 static inline void put_binfmt(struct linux_binfmt * fmt)
   98 {
   99         if (fmt->module)
  100                 __MOD_DEC_USE_COUNT(fmt->module);
  101 }
  102 
  103 /*
  104  * Note that a shared library must be both readable and executable due to
  105  * security reasons.
  106  *
  107  * Also note that we take the address to load from from the file itself.
  108  */
  109 asmlinkage long sys_uselib(const char * library)
  110 {
  111         struct file * file;
  112         struct nameidata nd;
  113         int error;
  114 
  115         error = user_path_walk(library, &nd);
  116         if (error)
  117                 goto out;
  118 
  119         error = -EINVAL;
  120         if (!S_ISREG(nd.dentry->d_inode->i_mode))
  121                 goto exit;
  122 
  123         error = permission(nd.dentry->d_inode, MAY_READ | MAY_EXEC);
  124         if (error)
  125                 goto exit;
  126 
  127         file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
  128         error = PTR_ERR(file);
  129         if (IS_ERR(file))
  130                 goto out;
  131 
  132         error = -ENOEXEC;
  133         if(file->f_op && file->f_op->read) {
  134                 struct linux_binfmt * fmt;
  135 
  136                 read_lock(&binfmt_lock);
  137                 for (fmt = formats ; fmt ; fmt = fmt->next) {
  138                         if (!fmt->load_shlib)
  139                                 continue;
  140                         if (!try_inc_mod_count(fmt->module))
  141                                 continue;
  142                         read_unlock(&binfmt_lock);
  143                         error = fmt->load_shlib(file);
  144                         read_lock(&binfmt_lock);
  145                         put_binfmt(fmt);
  146                         if (error != -ENOEXEC)
  147                                 break;
  148                 }
  149                 read_unlock(&binfmt_lock);
  150         }
  151         fput(file);
  152 out:
  153         return error;
  154 exit:
  155         path_release(&nd);
  156         goto out;
  157 }
  158 
  159 /*
  160  * count() counts the number of arguments/envelopes
  161  */
  162 static int count(char ** argv, int max)
  163 {
  164         int i = 0;
  165 
  166         if (argv != NULL) {
  167                 for (;;) {
  168                         char * p;
  169 
  170                         if (get_user(p, argv))
  171                                 return -EFAULT;
  172                         if (!p)
  173                                 break;
  174                         argv++;
  175                         if(++i > max)
  176                                 return -E2BIG;
  177                 }
  178         }
  179         return i;
  180 }
  181 
  182 /*
  183  * 'copy_strings()' copies argument/envelope strings from user
  184  * memory to free pages in kernel mem. These are in a format ready
  185  * to be put directly into the top of new user memory.
  186  */
  187 int copy_strings(int argc,char ** argv, struct linux_binprm *bprm) 
  188 {
  189         struct page *kmapped_page = NULL;
  190         char *kaddr = NULL;
  191         int ret;
  192 
  193         while (argc-- > 0) {
  194                 char *str;
  195                 int len;
  196                 unsigned long pos;
  197 
  198                 if (get_user(str, argv+argc) ||
  199                                 !(len = strnlen_user(str, bprm->p))) {
  200                         ret = -EFAULT;
  201                         goto out;
  202                 }
  203 
  204                 if (bprm->p < len)  {
  205                         ret = -E2BIG;
  206                         goto out;
  207                 }
  208 
  209                 bprm->p -= len;
  210                 /* XXX: add architecture specific overflow check here. */ 
  211                 pos = bprm->p;
  212 
  213                 while (len > 0) {
  214                         int i, new, err;
  215                         int offset, bytes_to_copy;
  216                         struct page *page;
  217 
  218                         offset = pos % PAGE_SIZE;
  219                         i = pos/PAGE_SIZE;
  220                         page = bprm->page[i];
  221                         new = 0;
  222                         if (!page) {
  223                                 page = alloc_page(GFP_HIGHUSER);
  224                                 bprm->page[i] = page;
  225                                 if (!page) {
  226                                         ret = -ENOMEM;
  227                                         goto out;
  228                                 }
  229                                 new = 1;
  230                         }
  231 
  232                         if (page != kmapped_page) {
  233                                 if (kmapped_page)
  234                                         kunmap(kmapped_page);
  235                                 kmapped_page = page;
  236                                 kaddr = kmap(kmapped_page);
  237                         }
  238                         if (new && offset)
  239                                 memset(kaddr, 0, offset);
  240                         bytes_to_copy = PAGE_SIZE - offset;
  241                         if (bytes_to_copy > len) {
  242                                 bytes_to_copy = len;
  243                                 if (new)
  244                                         memset(kaddr+offset+len, 0,
  245                                                 PAGE_SIZE-offset-len);
  246                         }
  247                         err = copy_from_user(kaddr+offset, str, bytes_to_copy);
  248                         if (err) {
  249                                 ret = -EFAULT;
  250                                 goto out;
  251                         }
  252 
  253                         pos += bytes_to_copy;
  254                         str += bytes_to_copy;
  255                         len -= bytes_to_copy;
  256                 }
  257         }
  258         ret = 0;
  259 out:
  260         if (kmapped_page)
  261                 kunmap(kmapped_page);
  262         return ret;
  263 }
  264 
  265 /*
  266  * Like copy_strings, but get argv and its values from kernel memory.
  267  */
  268 int copy_strings_kernel(int argc,char ** argv, struct linux_binprm *bprm)
  269 {
  270         int r;
  271         mm_segment_t oldfs = get_fs();
  272         set_fs(KERNEL_DS); 
  273         r = copy_strings(argc, argv, bprm);
  274         set_fs(oldfs);
  275         return r; 
  276 }
  277 
  278 /*
  279  * This routine is used to map in a page into an address space: needed by
  280  * execve() for the initial stack and environment pages.
  281  *
  282  * tsk->mmap_sem is held for writing.
  283  */
  284 void put_dirty_page(struct task_struct * tsk, struct page *page, unsigned long address)
  285 {
  286         pgd_t * pgd;
  287         pmd_t * pmd;
  288         pte_t * pte;
  289         struct vm_area_struct *vma; 
  290         pgprot_t prot = PAGE_COPY; 
  291 
  292         if (page_count(page) != 1)
  293                 printk(KERN_ERR "mem_map disagrees with %p at %08lx\n", page, address);
  294         pgd = pgd_offset(tsk->mm, address);
  295 
  296         spin_lock(&tsk->mm->page_table_lock);
  297         pmd = pmd_alloc(tsk->mm, pgd, address);
  298         if (!pmd)
  299                 goto out;
  300         pte = pte_alloc(tsk->mm, pmd, address);
  301         if (!pte)
  302                 goto out;
  303         if (!pte_none(*pte))
  304                 goto out;
  305         lru_cache_add(page);
  306         flush_dcache_page(page);
  307         flush_page_to_ram(page);
  308         /* lookup is cheap because there is only a single entry in the list */
  309         vma = find_vma(tsk->mm, address); 
  310         if (vma) 
  311                 prot = vma->vm_page_prot;
  312         set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, prot))));
  313         tsk->mm->rss++;
  314         spin_unlock(&tsk->mm->page_table_lock);
  315 
  316         /* no need for flush_tlb */
  317         return;
  318 out:
  319         spin_unlock(&tsk->mm->page_table_lock);
  320         __free_page(page);
  321         force_sig(SIGKILL, tsk);
  322         return;
  323 }
  324 
  325 int setup_arg_pages(struct linux_binprm *bprm)
  326 {
  327         unsigned long stack_base;
  328         struct vm_area_struct *mpnt;
  329         int i;
  330 
  331         stack_base = STACK_TOP - MAX_ARG_PAGES*PAGE_SIZE;
  332 
  333         bprm->p += stack_base;
  334         if (bprm->loader)
  335                 bprm->loader += stack_base;
  336         bprm->exec += stack_base;
  337 
  338         mpnt = kmem_cache_alloc(vm_area_cachep, SLAB_KERNEL);
  339         if (!mpnt) 
  340                 return -ENOMEM; 
  341         
  342         down_write(&current->mm->mmap_sem);
  343         {
  344                 mpnt->vm_mm = current->mm;
  345                 mpnt->vm_start = PAGE_MASK & (unsigned long) bprm->p;
  346                 mpnt->vm_end = STACK_TOP;
  347                 mpnt->vm_flags = VM_STACK_FLAGS;
  348                 mpnt->vm_page_prot = protection_map[VM_STACK_FLAGS & 0x7];
  349                 mpnt->vm_ops = NULL;
  350                 mpnt->vm_pgoff = 0;
  351                 mpnt->vm_file = NULL;
  352                 mpnt->vm_private_data = (void *) 0;
  353                 insert_vm_struct(current->mm, mpnt);
  354                 current->mm->total_vm = (mpnt->vm_end - mpnt->vm_start) >> PAGE_SHIFT;
  355         } 
  356 
  357         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
  358                 struct page *page = bprm->page[i];
  359                 if (page) {
  360                         bprm->page[i] = NULL;
  361                         put_dirty_page(current,page,stack_base);
  362                 }
  363                 stack_base += PAGE_SIZE;
  364         }
  365         up_write(&current->mm->mmap_sem);
  366         
  367         return 0;
  368 }
  369 
  370 struct file *open_exec(const char *name)
  371 {
  372         struct nameidata nd;
  373         struct inode *inode;
  374         struct file *file;
  375         int err = 0;
  376 
  377         err = path_lookup(name, LOOKUP_FOLLOW|LOOKUP_POSITIVE, &nd);
  378         file = ERR_PTR(err);
  379         if (!err) {
  380                 inode = nd.dentry->d_inode;
  381                 file = ERR_PTR(-EACCES);
  382                 if (!(nd.mnt->mnt_flags & MNT_NOEXEC) &&
  383                     S_ISREG(inode->i_mode)) {
  384                         int err = permission(inode, MAY_EXEC);
  385                         if (!err && !(inode->i_mode & 0111))
  386                                 err = -EACCES;
  387                         file = ERR_PTR(err);
  388                         if (!err) {
  389                                 file = dentry_open(nd.dentry, nd.mnt, O_RDONLY);
  390                                 if (!IS_ERR(file)) {
  391                                         err = deny_write_access(file);
  392                                         if (err) {
  393                                                 fput(file);
  394                                                 file = ERR_PTR(err);
  395                                         }
  396                                 }
  397 out:
  398                                 return file;
  399                         }
  400                 }
  401                 path_release(&nd);
  402         }
  403         goto out;
  404 }
  405 
  406 int kernel_read(struct file *file, unsigned long offset,
  407         char * addr, unsigned long count)
  408 {
  409         mm_segment_t old_fs;
  410         loff_t pos = offset;
  411         int result = -ENOSYS;
  412 
  413         if (!file->f_op->read)
  414                 goto fail;
  415         old_fs = get_fs();
  416         set_fs(get_ds());
  417         result = file->f_op->read(file, addr, count, &pos);
  418         set_fs(old_fs);
  419 fail:
  420         return result;
  421 }
  422 
  423 static int exec_mmap(void)
  424 {
  425         struct mm_struct * mm, * old_mm;
  426 
  427         old_mm = current->mm;
  428         if (old_mm && atomic_read(&old_mm->mm_users) == 1) {
  429                 mm_release();
  430                 exit_mmap(old_mm);
  431                 return 0;
  432         }
  433 
  434         mm = mm_alloc();
  435         if (mm) {
  436                 struct mm_struct *active_mm;
  437 
  438                 if (init_new_context(current, mm)) {
  439                         mmdrop(mm);
  440                         return -ENOMEM;
  441                 }
  442 
  443                 /* Add it to the list of mm's */
  444                 spin_lock(&mmlist_lock);
  445                 list_add(&mm->mmlist, &init_mm.mmlist);
  446                 mmlist_nr++;
  447                 spin_unlock(&mmlist_lock);
  448 
  449                 task_lock(current);
  450                 active_mm = current->active_mm;
  451                 current->mm = mm;
  452                 current->active_mm = mm;
  453                 task_unlock(current);
  454                 activate_mm(active_mm, mm);
  455                 mm_release();
  456                 if (old_mm) {
  457                         if (active_mm != old_mm) BUG();
  458                         mmput(old_mm);
  459                         return 0;
  460                 }
  461                 mmdrop(active_mm);
  462                 return 0;
  463         }
  464         return -ENOMEM;
  465 }
  466 
  467 /*
  468  * This function makes sure the current process has its own signal table,
  469  * so that flush_signal_handlers can later reset the handlers without
  470  * disturbing other processes.  (Other processes might share the signal
  471  * table via the CLONE_SIGNAL option to clone().)
  472  */
  473  
  474 static inline int make_private_signals(void)
  475 {
  476         struct signal_struct * newsig;
  477 
  478         if (atomic_read(&current->sig->count) <= 1)
  479                 return 0;
  480         newsig = kmem_cache_alloc(sigact_cachep, GFP_KERNEL);
  481         if (newsig == NULL)
  482                 return -ENOMEM;
  483         spin_lock_init(&newsig->siglock);
  484         atomic_set(&newsig->count, 1);
  485         memcpy(newsig->action, current->sig->action, sizeof(newsig->action));
  486         spin_lock_irq(&current->sigmask_lock);
  487         current->sig = newsig;
  488         spin_unlock_irq(&current->sigmask_lock);
  489         return 0;
  490 }
  491         
  492 /*
  493  * If make_private_signals() made a copy of the signal table, decrement the
  494  * refcount of the original table, and free it if necessary.
  495  * We don't do that in make_private_signals() so that we can back off
  496  * in flush_old_exec() if an error occurs after calling make_private_signals().
  497  */
  498 
  499 static inline void release_old_signals(struct signal_struct * oldsig)
  500 {
  501         if (current->sig == oldsig)
  502                 return;
  503         if (atomic_dec_and_test(&oldsig->count))
  504                 kmem_cache_free(sigact_cachep, oldsig);
  505 }
  506 
  507 /*
  508  * These functions flushes out all traces of the currently running executable
  509  * so that a new one can be started
  510  */
  511 
  512 static inline void flush_old_files(struct files_struct * files)
  513 {
  514         long j = -1;
  515 
  516         write_lock(&files->file_lock);
  517         for (;;) {
  518                 unsigned long set, i;
  519 
  520                 j++;
  521                 i = j * __NFDBITS;
  522                 if (i >= files->max_fds || i >= files->max_fdset)
  523                         break;
  524                 set = files->close_on_exec->fds_bits[j];
  525                 if (!set)
  526                         continue;
  527                 files->close_on_exec->fds_bits[j] = 0;
  528                 write_unlock(&files->file_lock);
  529                 for ( ; set ; i++,set >>= 1) {
  530                         if (set & 1) {
  531                                 sys_close(i);
  532                         }
  533                 }
  534                 write_lock(&files->file_lock);
  535 
  536         }
  537         write_unlock(&files->file_lock);
  538 }
  539 
  540 /*
  541  * An execve() will automatically "de-thread" the process.
  542  * Note: we don't have to hold the tasklist_lock to test
  543  * whether we migth need to do this. If we're not part of
  544  * a thread group, there is no way we can become one
  545  * dynamically. And if we are, we only need to protect the
  546  * unlink - even if we race with the last other thread exit,
  547  * at worst the list_del_init() might end up being a no-op.
  548  */
  549 static inline void de_thread(struct task_struct *tsk)
  550 {
  551         if (!list_empty(&tsk->thread_group)) {
  552                 write_lock_irq(&tasklist_lock);
  553                 list_del_init(&tsk->thread_group);
  554                 write_unlock_irq(&tasklist_lock);
  555         }
  556 
  557         /* Minor oddity: this might stay the same. */
  558         tsk->tgid = tsk->pid;
  559 }
  560 
  561 int flush_old_exec(struct linux_binprm * bprm)
  562 {
  563         char * name;
  564         int i, ch, retval;
  565         struct signal_struct * oldsig;
  566         struct files_struct * files;
  567 
  568         /*
  569          * Make sure we have a private signal table
  570          */
  571         oldsig = current->sig;
  572         retval = make_private_signals();
  573         if (retval) goto flush_failed;
  574 
  575         /*
  576          * Make sure we have private file handles. Ask the
  577          * fork helper to do the work for us and the exit
  578          * helper to do the cleanup of the old one.
  579          */
  580          
  581         files = current->files;         /* refcounted so safe to hold */
  582         retval = unshare_files();
  583         if(retval)
  584                 goto flush_failed;
  585         
  586         /* 
  587          * Release all of the old mmap stuff
  588          */
  589         retval = exec_mmap();
  590         if (retval) goto mmap_failed;
  591 
  592         /* This is the point of no return */
  593         steal_locks(files);
  594         put_files_struct(files);
  595         release_old_signals(oldsig);
  596 
  597         current->sas_ss_sp = current->sas_ss_size = 0;
  598 
  599         if (current->euid == current->uid && current->egid == current->gid) {
  600                 current->mm->dumpable = 1;
  601                 current->task_dumpable = 1;
  602         }
  603         name = bprm->filename;
  604         for (i=0; (ch = *(name++)) != '\0';) {
  605                 if (ch == '/')
  606                         i = 0;
  607                 else
  608                         if (i < 15)
  609                                 current->comm[i++] = ch;
  610         }
  611         current->comm[i] = '\0';
  612 
  613         flush_thread();
  614 
  615         de_thread(current);
  616 
  617         if (bprm->e_uid != current->euid || bprm->e_gid != current->egid || 
  618             permission(bprm->file->f_dentry->d_inode,MAY_READ))
  619                 current->mm->dumpable = 0;
  620 
  621         /* An exec changes our domain. We are no longer part of the thread
  622            group */
  623            
  624         current->self_exec_id++;
  625                         
  626         flush_signal_handlers(current);
  627         flush_old_files(current->files);
  628 
  629         return 0;
  630 
  631 mmap_failed:
  632         put_files_struct(current->files);
  633         current->files = files;
  634 flush_failed:
  635         spin_lock_irq(&current->sigmask_lock);
  636         if (current->sig != oldsig) {
  637                 kmem_cache_free(sigact_cachep, current->sig);
  638                 current->sig = oldsig;
  639         }
  640         spin_unlock_irq(&current->sigmask_lock);
  641         return retval;
  642 }
  643 
  644 /*
  645  * We mustn't allow tracing of suid binaries, unless
  646  * the tracer has the capability to trace anything..
  647  */
  648 static inline int must_not_trace_exec(struct task_struct * p)
  649 {
  650         return (p->ptrace & PT_PTRACED) && !(p->ptrace & PT_PTRACE_CAP);
  651 }
  652 
  653 /* 
  654  * Fill the binprm structure from the inode. 
  655  * Check permissions, then read the first 128 (BINPRM_BUF_SIZE) bytes
  656  */
  657 int prepare_binprm(struct linux_binprm *bprm)
  658 {
  659         int mode;
  660         struct inode * inode = bprm->file->f_dentry->d_inode;
  661 
  662         mode = inode->i_mode;
  663         /*
  664          * Check execute perms again - if the caller has CAP_DAC_OVERRIDE,
  665          * vfs_permission lets a non-executable through
  666          */
  667         if (!(mode & 0111))     /* with at least _one_ execute bit set */
  668                 return -EACCES;
  669         if (bprm->file->f_op == NULL)
  670                 return -EACCES;
  671 
  672         bprm->e_uid = current->euid;
  673         bprm->e_gid = current->egid;
  674 
  675         if(!(bprm->file->f_vfsmnt->mnt_flags & MNT_NOSUID)) {
  676                 /* Set-uid? */
  677                 if (mode & S_ISUID)
  678                         bprm->e_uid = inode->i_uid;
  679 
  680                 /* Set-gid? */
  681                 /*
  682                  * If setgid is set but no group execute bit then this
  683                  * is a candidate for mandatory locking, not a setgid
  684                  * executable.
  685                  */
  686                 if ((mode & (S_ISGID | S_IXGRP)) == (S_ISGID | S_IXGRP))
  687                         bprm->e_gid = inode->i_gid;
  688         }
  689 
  690         /* We don't have VFS support for capabilities yet */
  691         cap_clear(bprm->cap_inheritable);
  692         cap_clear(bprm->cap_permitted);
  693         cap_clear(bprm->cap_effective);
  694 
  695         /*  To support inheritance of root-permissions and suid-root
  696          *  executables under compatibility mode, we raise all three
  697          *  capability sets for the file.
  698          *
  699          *  If only the real uid is 0, we only raise the inheritable
  700          *  and permitted sets of the executable file.
  701          */
  702 
  703         if (!issecure(SECURE_NOROOT)) {
  704                 if (bprm->e_uid == 0 || current->uid == 0) {
  705                         cap_set_full(bprm->cap_inheritable);
  706                         cap_set_full(bprm->cap_permitted);
  707                 }
  708                 if (bprm->e_uid == 0) 
  709                         cap_set_full(bprm->cap_effective);
  710         }
  711 
  712         memset(bprm->buf,0,BINPRM_BUF_SIZE);
  713         return kernel_read(bprm->file,0,bprm->buf,BINPRM_BUF_SIZE);
  714 }
  715 
  716 /*
  717  * This function is used to produce the new IDs and capabilities
  718  * from the old ones and the file's capabilities.
  719  *
  720  * The formula used for evolving capabilities is:
  721  *
  722  *       pI' = pI
  723  * (***) pP' = (fP & X) | (fI & pI)
  724  *       pE' = pP' & fE          [NB. fE is 0 or ~0]
  725  *
  726  * I=Inheritable, P=Permitted, E=Effective // p=process, f=file
  727  * ' indicates post-exec(), and X is the global 'cap_bset'.
  728  *
  729  */
  730 
  731 void compute_creds(struct linux_binprm *bprm) 
  732 {
  733         kernel_cap_t new_permitted, working;
  734         int do_unlock = 0;
  735 
  736         new_permitted = cap_intersect(bprm->cap_permitted, cap_bset);
  737         working = cap_intersect(bprm->cap_inheritable,
  738                                 current->cap_inheritable);
  739         new_permitted = cap_combine(new_permitted, working);
  740 
  741         if (bprm->e_uid != current->uid || bprm->e_gid != current->gid ||
  742             !cap_issubset(new_permitted, current->cap_permitted)) {
  743                 current->mm->dumpable = 0;
  744                 
  745                 lock_kernel();
  746                 if (must_not_trace_exec(current)
  747                     || atomic_read(&current->fs->count) > 1
  748                     || atomic_read(&current->files->count) > 1
  749                     || atomic_read(&current->sig->count) > 1) {
  750                         if(!capable(CAP_SETUID)) {
  751                                 bprm->e_uid = current->uid;
  752                                 bprm->e_gid = current->gid;
  753                         }
  754                         if(!capable(CAP_SETPCAP)) {
  755                                 new_permitted = cap_intersect(new_permitted,
  756                                                         current->cap_permitted);
  757                         }
  758                 }
  759                 do_unlock = 1;
  760         }
  761 
  762 
  763         /* For init, we want to retain the capabilities set
  764          * in the init_task struct. Thus we skip the usual
  765          * capability rules */
  766         if (current->pid != 1) {
  767                 current->cap_permitted = new_permitted;
  768                 current->cap_effective =
  769                         cap_intersect(new_permitted, bprm->cap_effective);
  770         }
  771         
  772         /* AUD: Audit candidate if current->cap_effective is set */
  773 
  774         current->suid = current->euid = current->fsuid = bprm->e_uid;
  775         current->sgid = current->egid = current->fsgid = bprm->e_gid;
  776 
  777         if(do_unlock)
  778                 unlock_kernel();
  779         current->keep_capabilities = 0;
  780 }
  781 
  782 
  783 void remove_arg_zero(struct linux_binprm *bprm)
  784 {
  785         if (bprm->argc) {
  786                 unsigned long offset;
  787                 char * kaddr;
  788                 struct page *page;
  789 
  790                 offset = bprm->p % PAGE_SIZE;
  791                 goto inside;
  792 
  793                 while (bprm->p++, *(kaddr+offset++)) {
  794                         if (offset != PAGE_SIZE)
  795                                 continue;
  796                         offset = 0;
  797                         kunmap(page);
  798 inside:
  799                         page = bprm->page[bprm->p/PAGE_SIZE];
  800                         kaddr = kmap(page);
  801                 }
  802                 kunmap(page);
  803                 bprm->argc--;
  804         }
  805 }
  806 
  807 /*
  808  * cycle the list of binary formats handler, until one recognizes the image
  809  */
  810 int search_binary_handler(struct linux_binprm *bprm,struct pt_regs *regs)
  811 {
  812         int try,retval=0;
  813         struct linux_binfmt *fmt;
  814 #ifdef __alpha__
  815         /* handle /sbin/loader.. */
  816         {
  817             struct exec * eh = (struct exec *) bprm->buf;
  818 
  819             if (!bprm->loader && eh->fh.f_magic == 0x183 &&
  820                 (eh->fh.f_flags & 0x3000) == 0x3000)
  821             {
  822                 struct file * file;
  823                 unsigned long loader;
  824 
  825                 allow_write_access(bprm->file);
  826                 fput(bprm->file);
  827                 bprm->file = NULL;
  828 
  829                 loader = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
  830 
  831                 file = open_exec("/sbin/loader");
  832                 retval = PTR_ERR(file);
  833                 if (IS_ERR(file))
  834                         return retval;
  835 
  836                 /* Remember if the application is TASO.  */
  837                 bprm->sh_bang = eh->ah.entry < 0x100000000;
  838 
  839                 bprm->file = file;
  840                 bprm->loader = loader;
  841                 retval = prepare_binprm(bprm);
  842                 if (retval<0)
  843                         return retval;
  844                 /* should call search_binary_handler recursively here,
  845                    but it does not matter */
  846             }
  847         }
  848 #endif
  849         /* kernel module loader fixup */
  850         /* so we don't try to load run modprobe in kernel space. */
  851         set_fs(USER_DS);
  852         for (try=0; try<2; try++) {
  853                 read_lock(&binfmt_lock);
  854                 for (fmt = formats ; fmt ; fmt = fmt->next) {
  855                         int (*fn)(struct linux_binprm *, struct pt_regs *) = fmt->load_binary;
  856                         if (!fn)
  857                                 continue;
  858                         if (!try_inc_mod_count(fmt->module))
  859                                 continue;
  860                         read_unlock(&binfmt_lock);
  861                         retval = fn(bprm, regs);
  862                         if (retval >= 0) {
  863                                 put_binfmt(fmt);
  864                                 allow_write_access(bprm->file);
  865                                 if (bprm->file)
  866                                         fput(bprm->file);
  867                                 bprm->file = NULL;
  868                                 current->did_exec = 1;
  869                                 return retval;
  870                         }
  871                         read_lock(&binfmt_lock);
  872                         put_binfmt(fmt);
  873                         if (retval != -ENOEXEC)
  874                                 break;
  875                         if (!bprm->file) {
  876                                 read_unlock(&binfmt_lock);
  877                                 return retval;
  878                         }
  879                 }
  880                 read_unlock(&binfmt_lock);
  881                 if (retval != -ENOEXEC) {
  882                         break;
  883 #ifdef CONFIG_KMOD
  884                 }else{
  885 #define printable(c) (((c)=='\t') || ((c)=='\n') || (0x20<=(c) && (c)<=0x7e))
  886                         char modname[20];
  887                         if (printable(bprm->buf[0]) &&
  888                             printable(bprm->buf[1]) &&
  889                             printable(bprm->buf[2]) &&
  890                             printable(bprm->buf[3]))
  891                                 break; /* -ENOEXEC */
  892                         sprintf(modname, "binfmt-%04x", *(unsigned short *)(&bprm->buf[2]));
  893                         request_module(modname);
  894 #endif
  895                 }
  896         }
  897         return retval;
  898 }
  899 
  900 
  901 /*
  902  * sys_execve() executes a new program.
  903  */
  904 int do_execve(char * filename, char ** argv, char ** envp, struct pt_regs * regs)
  905 {
  906         struct linux_binprm bprm;
  907         struct file *file;
  908         int retval;
  909         int i;
  910 
  911         file = open_exec(filename);
  912 
  913         retval = PTR_ERR(file);
  914         if (IS_ERR(file))
  915                 return retval;
  916 
  917         bprm.p = PAGE_SIZE*MAX_ARG_PAGES-sizeof(void *);
  918         memset(bprm.page, 0, MAX_ARG_PAGES*sizeof(bprm.page[0])); 
  919 
  920         bprm.file = file;
  921         bprm.filename = filename;
  922         bprm.sh_bang = 0;
  923         bprm.loader = 0;
  924         bprm.exec = 0;
  925         if ((bprm.argc = count(argv, bprm.p / sizeof(void *))) < 0) {
  926                 allow_write_access(file);
  927                 fput(file);
  928                 return bprm.argc;
  929         }
  930 
  931         if ((bprm.envc = count(envp, bprm.p / sizeof(void *))) < 0) {
  932                 allow_write_access(file);
  933                 fput(file);
  934                 return bprm.envc;
  935         }
  936 
  937         retval = prepare_binprm(&bprm);
  938         if (retval < 0) 
  939                 goto out; 
  940 
  941         retval = copy_strings_kernel(1, &bprm.filename, &bprm);
  942         if (retval < 0) 
  943                 goto out; 
  944 
  945         bprm.exec = bprm.p;
  946         retval = copy_strings(bprm.envc, envp, &bprm);
  947         if (retval < 0) 
  948                 goto out; 
  949 
  950         retval = copy_strings(bprm.argc, argv, &bprm);
  951         if (retval < 0) 
  952                 goto out; 
  953 
  954         retval = search_binary_handler(&bprm,regs);
  955         if (retval >= 0)
  956                 /* execve success */
  957                 return retval;
  958 
  959 out:
  960         /* Something went wrong, return the inode and free the argument pages*/
  961         allow_write_access(bprm.file);
  962         if (bprm.file)
  963                 fput(bprm.file);
  964 
  965         for (i = 0 ; i < MAX_ARG_PAGES ; i++) {
  966                 struct page * page = bprm.page[i];
  967                 if (page)
  968                         __free_page(page);
  969         }
  970 
  971         return retval;
  972 }
  973 
  974 void set_binfmt(struct linux_binfmt *new)
  975 {
  976         struct linux_binfmt *old = current->binfmt;
  977         if (new && new->module)
  978                 __MOD_INC_USE_COUNT(new->module);
  979         current->binfmt = new;
  980         if (old && old->module)
  981                 __MOD_DEC_USE_COUNT(old->module);
  982 }
  983 
  984 #define CORENAME_MAX_SIZE 64
  985 
  986 /* format_corename will inspect the pattern parameter, and output a
  987  * name into corename, which must have space for at least
  988  * CORENAME_MAX_SIZE bytes plus one byte for the zero terminator.
  989  */
  990 void format_corename(char *corename, const char *pattern, long signr)
  991 {
  992         const char *pat_ptr = pattern;
  993         char *out_ptr = corename;
  994         char *const out_end = corename + CORENAME_MAX_SIZE;
  995         int rc;
  996         int pid_in_pattern = 0;
  997 
  998         /* Repeat as long as we have more pattern to process and more output
  999            space */
 1000         while (*pat_ptr) {
 1001                 if (*pat_ptr != '%') {
 1002                         if (out_ptr == out_end)
 1003                                 goto out;
 1004                         *out_ptr++ = *pat_ptr++;
 1005                 } else {
 1006                         switch (*++pat_ptr) {
 1007                         case 0:
 1008                                 goto out;
 1009                         /* Double percent, output one percent */
 1010                         case '%':
 1011                                 if (out_ptr == out_end)
 1012                                         goto out;
 1013                                 *out_ptr++ = '%';
 1014                                 break;
 1015                         /* pid */
 1016                         case 'p':
 1017                                 pid_in_pattern = 1;
 1018                                 rc = snprintf(out_ptr, out_end - out_ptr,
 1019                                               "%d", current->pid);
 1020                                 if (rc > out_end - out_ptr)
 1021                                         goto out;
 1022                                 out_ptr += rc;
 1023                                 break;
 1024                         /* uid */
 1025                         case 'u':
 1026                                 rc = snprintf(out_ptr, out_end - out_ptr,
 1027                                               "%d", current->uid);
 1028                                 if (rc > out_end - out_ptr)
 1029                                         goto out;
 1030                                 out_ptr += rc;
 1031                                 break;
 1032                         /* gid */
 1033                         case 'g':
 1034                                 rc = snprintf(out_ptr, out_end - out_ptr,
 1035                                               "%d", current->gid);
 1036                                 if (rc > out_end - out_ptr)
 1037                                         goto out;
 1038                                 out_ptr += rc;
 1039                                 break;
 1040                         /* signal that caused the coredump */
 1041                         case 's':
 1042                                 rc = snprintf(out_ptr, out_end - out_ptr,
 1043                                               "%ld", signr);
 1044                                 if (rc > out_end - out_ptr)
 1045                                         goto out;
 1046                                 out_ptr += rc;
 1047                                 break;
 1048                         /* UNIX time of coredump */
 1049                         case 't': {
 1050                                 struct timeval tv;
 1051                                 do_gettimeofday(&tv);
 1052                                 rc = snprintf(out_ptr, out_end - out_ptr,
 1053                                               "%ld", tv.tv_sec);
 1054                                 if (rc > out_end - out_ptr)
 1055                                         goto out;
 1056                                 out_ptr += rc;
 1057                                 break;
 1058                         }
 1059                         /* hostname */
 1060                         case 'h':
 1061                                 down_read(&uts_sem);
 1062                                 rc = snprintf(out_ptr, out_end - out_ptr,
 1063                                               "%s", system_utsname.nodename);
 1064                                 up_read(&uts_sem);
 1065                                 if (rc > out_end - out_ptr)
 1066                                         goto out;
 1067                                 out_ptr += rc;
 1068                                 break;
 1069                         /* executable */
 1070                         case 'e':
 1071                                 rc = snprintf(out_ptr, out_end - out_ptr,
 1072                                               "%s", current->comm);
 1073                                 if (rc > out_end - out_ptr)
 1074                                         goto out;
 1075                                 out_ptr += rc;
 1076                                 break;
 1077                         default:
 1078                                 break;
 1079                         }
 1080                         ++pat_ptr;
 1081                 }
 1082         }
 1083         /* Backward compatibility with core_uses_pid:
 1084          *
 1085          * If core_pattern does not include a %p (as is the default)
 1086          * and core_uses_pid is set, then .%pid will be appended to
 1087          * the filename */
 1088         if (!pid_in_pattern
 1089             && (core_uses_pid || atomic_read(&current->mm->mm_users) != 1)) {
 1090                 rc = snprintf(out_ptr, out_end - out_ptr,
 1091                               ".%d", current->pid);
 1092                 if (rc > out_end - out_ptr)
 1093                         goto out;
 1094                 out_ptr += rc;
 1095         }
 1096       out:
 1097         *out_ptr = 0;
 1098 }
 1099 
 1100 int do_coredump(long signr, struct pt_regs * regs)
 1101 {
 1102         struct linux_binfmt * binfmt;
 1103         char corename[CORENAME_MAX_SIZE + 1];
 1104         struct file * file;
 1105         struct inode * inode;
 1106         int retval = 0;
 1107 
 1108         lock_kernel();
 1109         binfmt = current->binfmt;
 1110         if (!binfmt || !binfmt->core_dump)
 1111                 goto fail;
 1112         if (!is_dumpable(current))
 1113                 goto fail;
 1114         current->mm->dumpable = 0;
 1115         if (current->rlim[RLIMIT_CORE].rlim_cur < binfmt->min_coredump)
 1116                 goto fail;
 1117 
 1118         format_corename(corename, core_pattern, signr);
 1119         file = filp_open(corename, O_CREAT | 2 | O_NOFOLLOW, 0600);
 1120         if (IS_ERR(file))
 1121                 goto fail;
 1122         inode = file->f_dentry->d_inode;
 1123         if (inode->i_nlink > 1)
 1124                 goto close_fail;        /* multiple links - don't dump */
 1125         if (d_unhashed(file->f_dentry))
 1126                 goto close_fail;
 1127 
 1128         if (!S_ISREG(inode->i_mode))
 1129                 goto close_fail;
 1130         if (!file->f_op)
 1131                 goto close_fail;
 1132         if (!file->f_op->write)
 1133                 goto close_fail;
 1134         if (do_truncate(file->f_dentry, 0) != 0)
 1135                 goto close_fail;
 1136 
 1137         retval = binfmt->core_dump(signr, regs, file);
 1138 
 1139 close_fail:
 1140         filp_close(file, NULL);
 1141 fail:
 1142         unlock_kernel();
 1143         return retval;
 1144 }

Cache object: 8952c06e103b4bc5f2e361e432cc36d7


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.