The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/file.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  linux/fs/file.c
    3  *
    4  *  Copyright (C) 1998-1999, Stephen Tweedie and Bill Hawes
    5  *
    6  *  Manage the dynamic fd arrays in the process files_struct.
    7  */
    8 
    9 #include <linux/syscalls.h>
   10 #include <linux/export.h>
   11 #include <linux/fs.h>
   12 #include <linux/mm.h>
   13 #include <linux/mmzone.h>
   14 #include <linux/time.h>
   15 #include <linux/sched.h>
   16 #include <linux/slab.h>
   17 #include <linux/vmalloc.h>
   18 #include <linux/file.h>
   19 #include <linux/fdtable.h>
   20 #include <linux/bitops.h>
   21 #include <linux/interrupt.h>
   22 #include <linux/spinlock.h>
   23 #include <linux/rcupdate.h>
   24 #include <linux/workqueue.h>
   25 
   26 struct fdtable_defer {
   27         spinlock_t lock;
   28         struct work_struct wq;
   29         struct fdtable *next;
   30 };
   31 
   32 int sysctl_nr_open __read_mostly = 1024*1024;
   33 int sysctl_nr_open_min = BITS_PER_LONG;
   34 int sysctl_nr_open_max = 1024 * 1024; /* raised later */
   35 
   36 /*
   37  * We use this list to defer free fdtables that have vmalloced
   38  * sets/arrays. By keeping a per-cpu list, we avoid having to embed
   39  * the work_struct in fdtable itself which avoids a 64 byte (i386) increase in
   40  * this per-task structure.
   41  */
   42 static DEFINE_PER_CPU(struct fdtable_defer, fdtable_defer_list);
   43 
   44 static void *alloc_fdmem(size_t size)
   45 {
   46         /*
   47          * Very large allocations can stress page reclaim, so fall back to
   48          * vmalloc() if the allocation size will be considered "large" by the VM.
   49          */
   50         if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) {
   51                 void *data = kmalloc(size, GFP_KERNEL|__GFP_NOWARN);
   52                 if (data != NULL)
   53                         return data;
   54         }
   55         return vmalloc(size);
   56 }
   57 
   58 static void free_fdmem(void *ptr)
   59 {
   60         is_vmalloc_addr(ptr) ? vfree(ptr) : kfree(ptr);
   61 }
   62 
   63 static void __free_fdtable(struct fdtable *fdt)
   64 {
   65         free_fdmem(fdt->fd);
   66         free_fdmem(fdt->open_fds);
   67         kfree(fdt);
   68 }
   69 
   70 static void free_fdtable_work(struct work_struct *work)
   71 {
   72         struct fdtable_defer *f =
   73                 container_of(work, struct fdtable_defer, wq);
   74         struct fdtable *fdt;
   75 
   76         spin_lock_bh(&f->lock);
   77         fdt = f->next;
   78         f->next = NULL;
   79         spin_unlock_bh(&f->lock);
   80         while(fdt) {
   81                 struct fdtable *next = fdt->next;
   82 
   83                 __free_fdtable(fdt);
   84                 fdt = next;
   85         }
   86 }
   87 
   88 static void free_fdtable_rcu(struct rcu_head *rcu)
   89 {
   90         struct fdtable *fdt = container_of(rcu, struct fdtable, rcu);
   91         struct fdtable_defer *fddef;
   92 
   93         BUG_ON(!fdt);
   94         BUG_ON(fdt->max_fds <= NR_OPEN_DEFAULT);
   95 
   96         if (!is_vmalloc_addr(fdt->fd) && !is_vmalloc_addr(fdt->open_fds)) {
   97                 kfree(fdt->fd);
   98                 kfree(fdt->open_fds);
   99                 kfree(fdt);
  100         } else {
  101                 fddef = &get_cpu_var(fdtable_defer_list);
  102                 spin_lock(&fddef->lock);
  103                 fdt->next = fddef->next;
  104                 fddef->next = fdt;
  105                 /* vmallocs are handled from the workqueue context */
  106                 schedule_work(&fddef->wq);
  107                 spin_unlock(&fddef->lock);
  108                 put_cpu_var(fdtable_defer_list);
  109         }
  110 }
  111 
  112 /*
  113  * Expand the fdset in the files_struct.  Called with the files spinlock
  114  * held for write.
  115  */
  116 static void copy_fdtable(struct fdtable *nfdt, struct fdtable *ofdt)
  117 {
  118         unsigned int cpy, set;
  119 
  120         BUG_ON(nfdt->max_fds < ofdt->max_fds);
  121 
  122         cpy = ofdt->max_fds * sizeof(struct file *);
  123         set = (nfdt->max_fds - ofdt->max_fds) * sizeof(struct file *);
  124         memcpy(nfdt->fd, ofdt->fd, cpy);
  125         memset((char *)(nfdt->fd) + cpy, 0, set);
  126 
  127         cpy = ofdt->max_fds / BITS_PER_BYTE;
  128         set = (nfdt->max_fds - ofdt->max_fds) / BITS_PER_BYTE;
  129         memcpy(nfdt->open_fds, ofdt->open_fds, cpy);
  130         memset((char *)(nfdt->open_fds) + cpy, 0, set);
  131         memcpy(nfdt->close_on_exec, ofdt->close_on_exec, cpy);
  132         memset((char *)(nfdt->close_on_exec) + cpy, 0, set);
  133 }
  134 
  135 static struct fdtable * alloc_fdtable(unsigned int nr)
  136 {
  137         struct fdtable *fdt;
  138         void *data;
  139 
  140         /*
  141          * Figure out how many fds we actually want to support in this fdtable.
  142          * Allocation steps are keyed to the size of the fdarray, since it
  143          * grows far faster than any of the other dynamic data. We try to fit
  144          * the fdarray into comfortable page-tuned chunks: starting at 1024B
  145          * and growing in powers of two from there on.
  146          */
  147         nr /= (1024 / sizeof(struct file *));
  148         nr = roundup_pow_of_two(nr + 1);
  149         nr *= (1024 / sizeof(struct file *));
  150         /*
  151          * Note that this can drive nr *below* what we had passed if sysctl_nr_open
  152          * had been set lower between the check in expand_files() and here.  Deal
  153          * with that in caller, it's cheaper that way.
  154          *
  155          * We make sure that nr remains a multiple of BITS_PER_LONG - otherwise
  156          * bitmaps handling below becomes unpleasant, to put it mildly...
  157          */
  158         if (unlikely(nr > sysctl_nr_open))
  159                 nr = ((sysctl_nr_open - 1) | (BITS_PER_LONG - 1)) + 1;
  160 
  161         fdt = kmalloc(sizeof(struct fdtable), GFP_KERNEL);
  162         if (!fdt)
  163                 goto out;
  164         fdt->max_fds = nr;
  165         data = alloc_fdmem(nr * sizeof(struct file *));
  166         if (!data)
  167                 goto out_fdt;
  168         fdt->fd = data;
  169 
  170         data = alloc_fdmem(max_t(size_t,
  171                                  2 * nr / BITS_PER_BYTE, L1_CACHE_BYTES));
  172         if (!data)
  173                 goto out_arr;
  174         fdt->open_fds = data;
  175         data += nr / BITS_PER_BYTE;
  176         fdt->close_on_exec = data;
  177         fdt->next = NULL;
  178 
  179         return fdt;
  180 
  181 out_arr:
  182         free_fdmem(fdt->fd);
  183 out_fdt:
  184         kfree(fdt);
  185 out:
  186         return NULL;
  187 }
  188 
  189 /*
  190  * Expand the file descriptor table.
  191  * This function will allocate a new fdtable and both fd array and fdset, of
  192  * the given size.
  193  * Return <0 error code on error; 1 on successful completion.
  194  * The files->file_lock should be held on entry, and will be held on exit.
  195  */
  196 static int expand_fdtable(struct files_struct *files, int nr)
  197         __releases(files->file_lock)
  198         __acquires(files->file_lock)
  199 {
  200         struct fdtable *new_fdt, *cur_fdt;
  201 
  202         spin_unlock(&files->file_lock);
  203         new_fdt = alloc_fdtable(nr);
  204         spin_lock(&files->file_lock);
  205         if (!new_fdt)
  206                 return -ENOMEM;
  207         /*
  208          * extremely unlikely race - sysctl_nr_open decreased between the check in
  209          * caller and alloc_fdtable().  Cheaper to catch it here...
  210          */
  211         if (unlikely(new_fdt->max_fds <= nr)) {
  212                 __free_fdtable(new_fdt);
  213                 return -EMFILE;
  214         }
  215         /*
  216          * Check again since another task may have expanded the fd table while
  217          * we dropped the lock
  218          */
  219         cur_fdt = files_fdtable(files);
  220         if (nr >= cur_fdt->max_fds) {
  221                 /* Continue as planned */
  222                 copy_fdtable(new_fdt, cur_fdt);
  223                 rcu_assign_pointer(files->fdt, new_fdt);
  224                 if (cur_fdt->max_fds > NR_OPEN_DEFAULT)
  225                         call_rcu(&cur_fdt->rcu, free_fdtable_rcu);
  226         } else {
  227                 /* Somebody else expanded, so undo our attempt */
  228                 __free_fdtable(new_fdt);
  229         }
  230         return 1;
  231 }
  232 
  233 /*
  234  * Expand files.
  235  * This function will expand the file structures, if the requested size exceeds
  236  * the current capacity and there is room for expansion.
  237  * Return <0 error code on error; 0 when nothing done; 1 when files were
  238  * expanded and execution may have blocked.
  239  * The files->file_lock should be held on entry, and will be held on exit.
  240  */
  241 static int expand_files(struct files_struct *files, int nr)
  242 {
  243         struct fdtable *fdt;
  244 
  245         fdt = files_fdtable(files);
  246 
  247         /* Do we need to expand? */
  248         if (nr < fdt->max_fds)
  249                 return 0;
  250 
  251         /* Can we expand? */
  252         if (nr >= sysctl_nr_open)
  253                 return -EMFILE;
  254 
  255         /* All good, so we try */
  256         return expand_fdtable(files, nr);
  257 }
  258 
  259 static inline void __set_close_on_exec(int fd, struct fdtable *fdt)
  260 {
  261         __set_bit(fd, fdt->close_on_exec);
  262 }
  263 
  264 static inline void __clear_close_on_exec(int fd, struct fdtable *fdt)
  265 {
  266         __clear_bit(fd, fdt->close_on_exec);
  267 }
  268 
  269 static inline void __set_open_fd(int fd, struct fdtable *fdt)
  270 {
  271         __set_bit(fd, fdt->open_fds);
  272 }
  273 
  274 static inline void __clear_open_fd(int fd, struct fdtable *fdt)
  275 {
  276         __clear_bit(fd, fdt->open_fds);
  277 }
  278 
  279 static int count_open_files(struct fdtable *fdt)
  280 {
  281         int size = fdt->max_fds;
  282         int i;
  283 
  284         /* Find the last open fd */
  285         for (i = size / BITS_PER_LONG; i > 0; ) {
  286                 if (fdt->open_fds[--i])
  287                         break;
  288         }
  289         i = (i + 1) * BITS_PER_LONG;
  290         return i;
  291 }
  292 
  293 /*
  294  * Allocate a new files structure and copy contents from the
  295  * passed in files structure.
  296  * errorp will be valid only when the returned files_struct is NULL.
  297  */
  298 struct files_struct *dup_fd(struct files_struct *oldf, int *errorp)
  299 {
  300         struct files_struct *newf;
  301         struct file **old_fds, **new_fds;
  302         int open_files, size, i;
  303         struct fdtable *old_fdt, *new_fdt;
  304 
  305         *errorp = -ENOMEM;
  306         newf = kmem_cache_alloc(files_cachep, GFP_KERNEL);
  307         if (!newf)
  308                 goto out;
  309 
  310         atomic_set(&newf->count, 1);
  311 
  312         spin_lock_init(&newf->file_lock);
  313         newf->next_fd = 0;
  314         new_fdt = &newf->fdtab;
  315         new_fdt->max_fds = NR_OPEN_DEFAULT;
  316         new_fdt->close_on_exec = newf->close_on_exec_init;
  317         new_fdt->open_fds = newf->open_fds_init;
  318         new_fdt->fd = &newf->fd_array[0];
  319         new_fdt->next = NULL;
  320 
  321         spin_lock(&oldf->file_lock);
  322         old_fdt = files_fdtable(oldf);
  323         open_files = count_open_files(old_fdt);
  324 
  325         /*
  326          * Check whether we need to allocate a larger fd array and fd set.
  327          */
  328         while (unlikely(open_files > new_fdt->max_fds)) {
  329                 spin_unlock(&oldf->file_lock);
  330 
  331                 if (new_fdt != &newf->fdtab)
  332                         __free_fdtable(new_fdt);
  333 
  334                 new_fdt = alloc_fdtable(open_files - 1);
  335                 if (!new_fdt) {
  336                         *errorp = -ENOMEM;
  337                         goto out_release;
  338                 }
  339 
  340                 /* beyond sysctl_nr_open; nothing to do */
  341                 if (unlikely(new_fdt->max_fds < open_files)) {
  342                         __free_fdtable(new_fdt);
  343                         *errorp = -EMFILE;
  344                         goto out_release;
  345                 }
  346 
  347                 /*
  348                  * Reacquire the oldf lock and a pointer to its fd table
  349                  * who knows it may have a new bigger fd table. We need
  350                  * the latest pointer.
  351                  */
  352                 spin_lock(&oldf->file_lock);
  353                 old_fdt = files_fdtable(oldf);
  354                 open_files = count_open_files(old_fdt);
  355         }
  356 
  357         old_fds = old_fdt->fd;
  358         new_fds = new_fdt->fd;
  359 
  360         memcpy(new_fdt->open_fds, old_fdt->open_fds, open_files / 8);
  361         memcpy(new_fdt->close_on_exec, old_fdt->close_on_exec, open_files / 8);
  362 
  363         for (i = open_files; i != 0; i--) {
  364                 struct file *f = *old_fds++;
  365                 if (f) {
  366                         get_file(f);
  367                 } else {
  368                         /*
  369                          * The fd may be claimed in the fd bitmap but not yet
  370                          * instantiated in the files array if a sibling thread
  371                          * is partway through open().  So make sure that this
  372                          * fd is available to the new process.
  373                          */
  374                         __clear_open_fd(open_files - i, new_fdt);
  375                 }
  376                 rcu_assign_pointer(*new_fds++, f);
  377         }
  378         spin_unlock(&oldf->file_lock);
  379 
  380         /* compute the remainder to be cleared */
  381         size = (new_fdt->max_fds - open_files) * sizeof(struct file *);
  382 
  383         /* This is long word aligned thus could use a optimized version */
  384         memset(new_fds, 0, size);
  385 
  386         if (new_fdt->max_fds > open_files) {
  387                 int left = (new_fdt->max_fds - open_files) / 8;
  388                 int start = open_files / BITS_PER_LONG;
  389 
  390                 memset(&new_fdt->open_fds[start], 0, left);
  391                 memset(&new_fdt->close_on_exec[start], 0, left);
  392         }
  393 
  394         rcu_assign_pointer(newf->fdt, new_fdt);
  395 
  396         return newf;
  397 
  398 out_release:
  399         kmem_cache_free(files_cachep, newf);
  400 out:
  401         return NULL;
  402 }
  403 
  404 static void close_files(struct files_struct * files)
  405 {
  406         int i, j;
  407         struct fdtable *fdt;
  408 
  409         j = 0;
  410 
  411         /*
  412          * It is safe to dereference the fd table without RCU or
  413          * ->file_lock because this is the last reference to the
  414          * files structure.  But use RCU to shut RCU-lockdep up.
  415          */
  416         rcu_read_lock();
  417         fdt = files_fdtable(files);
  418         rcu_read_unlock();
  419         for (;;) {
  420                 unsigned long set;
  421                 i = j * BITS_PER_LONG;
  422                 if (i >= fdt->max_fds)
  423                         break;
  424                 set = fdt->open_fds[j++];
  425                 while (set) {
  426                         if (set & 1) {
  427                                 struct file * file = xchg(&fdt->fd[i], NULL);
  428                                 if (file) {
  429                                         filp_close(file, files);
  430                                         cond_resched();
  431                                 }
  432                         }
  433                         i++;
  434                         set >>= 1;
  435                 }
  436         }
  437 }
  438 
  439 struct files_struct *get_files_struct(struct task_struct *task)
  440 {
  441         struct files_struct *files;
  442 
  443         task_lock(task);
  444         files = task->files;
  445         if (files)
  446                 atomic_inc(&files->count);
  447         task_unlock(task);
  448 
  449         return files;
  450 }
  451 
  452 void put_files_struct(struct files_struct *files)
  453 {
  454         struct fdtable *fdt;
  455 
  456         if (atomic_dec_and_test(&files->count)) {
  457                 close_files(files);
  458                 /* not really needed, since nobody can see us */
  459                 rcu_read_lock();
  460                 fdt = files_fdtable(files);
  461                 rcu_read_unlock();
  462                 /* free the arrays if they are not embedded */
  463                 if (fdt != &files->fdtab)
  464                         __free_fdtable(fdt);
  465                 kmem_cache_free(files_cachep, files);
  466         }
  467 }
  468 
  469 void reset_files_struct(struct files_struct *files)
  470 {
  471         struct task_struct *tsk = current;
  472         struct files_struct *old;
  473 
  474         old = tsk->files;
  475         task_lock(tsk);
  476         tsk->files = files;
  477         task_unlock(tsk);
  478         put_files_struct(old);
  479 }
  480 
  481 void exit_files(struct task_struct *tsk)
  482 {
  483         struct files_struct * files = tsk->files;
  484 
  485         if (files) {
  486                 task_lock(tsk);
  487                 tsk->files = NULL;
  488                 task_unlock(tsk);
  489                 put_files_struct(files);
  490         }
  491 }
  492 
  493 static void fdtable_defer_list_init(int cpu)
  494 {
  495         struct fdtable_defer *fddef = &per_cpu(fdtable_defer_list, cpu);
  496         spin_lock_init(&fddef->lock);
  497         INIT_WORK(&fddef->wq, free_fdtable_work);
  498         fddef->next = NULL;
  499 }
  500 
  501 void __init files_defer_init(void)
  502 {
  503         int i;
  504         for_each_possible_cpu(i)
  505                 fdtable_defer_list_init(i);
  506         sysctl_nr_open_max = min((size_t)INT_MAX, ~(size_t)0/sizeof(void *)) &
  507                              -BITS_PER_LONG;
  508 }
  509 
  510 struct files_struct init_files = {
  511         .count          = ATOMIC_INIT(1),
  512         .fdt            = &init_files.fdtab,
  513         .fdtab          = {
  514                 .max_fds        = NR_OPEN_DEFAULT,
  515                 .fd             = &init_files.fd_array[0],
  516                 .close_on_exec  = init_files.close_on_exec_init,
  517                 .open_fds       = init_files.open_fds_init,
  518         },
  519         .file_lock      = __SPIN_LOCK_UNLOCKED(init_task.file_lock),
  520 };
  521 
  522 /*
  523  * allocate a file descriptor, mark it busy.
  524  */
  525 int __alloc_fd(struct files_struct *files,
  526                unsigned start, unsigned end, unsigned flags)
  527 {
  528         unsigned int fd;
  529         int error;
  530         struct fdtable *fdt;
  531 
  532         spin_lock(&files->file_lock);
  533 repeat:
  534         fdt = files_fdtable(files);
  535         fd = start;
  536         if (fd < files->next_fd)
  537                 fd = files->next_fd;
  538 
  539         if (fd < fdt->max_fds)
  540                 fd = find_next_zero_bit(fdt->open_fds, fdt->max_fds, fd);
  541 
  542         /*
  543          * N.B. For clone tasks sharing a files structure, this test
  544          * will limit the total number of files that can be opened.
  545          */
  546         error = -EMFILE;
  547         if (fd >= end)
  548                 goto out;
  549 
  550         error = expand_files(files, fd);
  551         if (error < 0)
  552                 goto out;
  553 
  554         /*
  555          * If we needed to expand the fs array we
  556          * might have blocked - try again.
  557          */
  558         if (error)
  559                 goto repeat;
  560 
  561         if (start <= files->next_fd)
  562                 files->next_fd = fd + 1;
  563 
  564         __set_open_fd(fd, fdt);
  565         if (flags & O_CLOEXEC)
  566                 __set_close_on_exec(fd, fdt);
  567         else
  568                 __clear_close_on_exec(fd, fdt);
  569         error = fd;
  570 #if 1
  571         /* Sanity check */
  572         if (rcu_dereference_raw(fdt->fd[fd]) != NULL) {
  573                 printk(KERN_WARNING "alloc_fd: slot %d not NULL!\n", fd);
  574                 rcu_assign_pointer(fdt->fd[fd], NULL);
  575         }
  576 #endif
  577 
  578 out:
  579         spin_unlock(&files->file_lock);
  580         return error;
  581 }
  582 
  583 static int alloc_fd(unsigned start, unsigned flags)
  584 {
  585         return __alloc_fd(current->files, start, rlimit(RLIMIT_NOFILE), flags);
  586 }
  587 
  588 int get_unused_fd_flags(unsigned flags)
  589 {
  590         return __alloc_fd(current->files, 0, rlimit(RLIMIT_NOFILE), flags);
  591 }
  592 EXPORT_SYMBOL(get_unused_fd_flags);
  593 
  594 static void __put_unused_fd(struct files_struct *files, unsigned int fd)
  595 {
  596         struct fdtable *fdt = files_fdtable(files);
  597         __clear_open_fd(fd, fdt);
  598         if (fd < files->next_fd)
  599                 files->next_fd = fd;
  600 }
  601 
  602 void put_unused_fd(unsigned int fd)
  603 {
  604         struct files_struct *files = current->files;
  605         spin_lock(&files->file_lock);
  606         __put_unused_fd(files, fd);
  607         spin_unlock(&files->file_lock);
  608 }
  609 
  610 EXPORT_SYMBOL(put_unused_fd);
  611 
  612 /*
  613  * Install a file pointer in the fd array.
  614  *
  615  * The VFS is full of places where we drop the files lock between
  616  * setting the open_fds bitmap and installing the file in the file
  617  * array.  At any such point, we are vulnerable to a dup2() race
  618  * installing a file in the array before us.  We need to detect this and
  619  * fput() the struct file we are about to overwrite in this case.
  620  *
  621  * It should never happen - if we allow dup2() do it, _really_ bad things
  622  * will follow.
  623  *
  624  * NOTE: __fd_install() variant is really, really low-level; don't
  625  * use it unless you are forced to by truly lousy API shoved down
  626  * your throat.  'files' *MUST* be either current->files or obtained
  627  * by get_files_struct(current) done by whoever had given it to you,
  628  * or really bad things will happen.  Normally you want to use
  629  * fd_install() instead.
  630  */
  631 
  632 void __fd_install(struct files_struct *files, unsigned int fd,
  633                 struct file *file)
  634 {
  635         struct fdtable *fdt;
  636         spin_lock(&files->file_lock);
  637         fdt = files_fdtable(files);
  638         BUG_ON(fdt->fd[fd] != NULL);
  639         rcu_assign_pointer(fdt->fd[fd], file);
  640         spin_unlock(&files->file_lock);
  641 }
  642 
  643 void fd_install(unsigned int fd, struct file *file)
  644 {
  645         __fd_install(current->files, fd, file);
  646 }
  647 
  648 EXPORT_SYMBOL(fd_install);
  649 
  650 /*
  651  * The same warnings as for __alloc_fd()/__fd_install() apply here...
  652  */
  653 int __close_fd(struct files_struct *files, unsigned fd)
  654 {
  655         struct file *file;
  656         struct fdtable *fdt;
  657 
  658         spin_lock(&files->file_lock);
  659         fdt = files_fdtable(files);
  660         if (fd >= fdt->max_fds)
  661                 goto out_unlock;
  662         file = fdt->fd[fd];
  663         if (!file)
  664                 goto out_unlock;
  665         rcu_assign_pointer(fdt->fd[fd], NULL);
  666         __clear_close_on_exec(fd, fdt);
  667         __put_unused_fd(files, fd);
  668         spin_unlock(&files->file_lock);
  669         return filp_close(file, files);
  670 
  671 out_unlock:
  672         spin_unlock(&files->file_lock);
  673         return -EBADF;
  674 }
  675 
  676 void do_close_on_exec(struct files_struct *files)
  677 {
  678         unsigned i;
  679         struct fdtable *fdt;
  680 
  681         /* exec unshares first */
  682         spin_lock(&files->file_lock);
  683         for (i = 0; ; i++) {
  684                 unsigned long set;
  685                 unsigned fd = i * BITS_PER_LONG;
  686                 fdt = files_fdtable(files);
  687                 if (fd >= fdt->max_fds)
  688                         break;
  689                 set = fdt->close_on_exec[i];
  690                 if (!set)
  691                         continue;
  692                 fdt->close_on_exec[i] = 0;
  693                 for ( ; set ; fd++, set >>= 1) {
  694                         struct file *file;
  695                         if (!(set & 1))
  696                                 continue;
  697                         file = fdt->fd[fd];
  698                         if (!file)
  699                                 continue;
  700                         rcu_assign_pointer(fdt->fd[fd], NULL);
  701                         __put_unused_fd(files, fd);
  702                         spin_unlock(&files->file_lock);
  703                         filp_close(file, files);
  704                         cond_resched();
  705                         spin_lock(&files->file_lock);
  706                 }
  707 
  708         }
  709         spin_unlock(&files->file_lock);
  710 }
  711 
  712 struct file *fget(unsigned int fd)
  713 {
  714         struct file *file;
  715         struct files_struct *files = current->files;
  716 
  717         rcu_read_lock();
  718         file = fcheck_files(files, fd);
  719         if (file) {
  720                 /* File object ref couldn't be taken */
  721                 if (file->f_mode & FMODE_PATH ||
  722                     !atomic_long_inc_not_zero(&file->f_count))
  723                         file = NULL;
  724         }
  725         rcu_read_unlock();
  726 
  727         return file;
  728 }
  729 
  730 EXPORT_SYMBOL(fget);
  731 
  732 struct file *fget_raw(unsigned int fd)
  733 {
  734         struct file *file;
  735         struct files_struct *files = current->files;
  736 
  737         rcu_read_lock();
  738         file = fcheck_files(files, fd);
  739         if (file) {
  740                 /* File object ref couldn't be taken */
  741                 if (!atomic_long_inc_not_zero(&file->f_count))
  742                         file = NULL;
  743         }
  744         rcu_read_unlock();
  745 
  746         return file;
  747 }
  748 
  749 EXPORT_SYMBOL(fget_raw);
  750 
  751 /*
  752  * Lightweight file lookup - no refcnt increment if fd table isn't shared.
  753  *
  754  * You can use this instead of fget if you satisfy all of the following
  755  * conditions:
  756  * 1) You must call fput_light before exiting the syscall and returning control
  757  *    to userspace (i.e. you cannot remember the returned struct file * after
  758  *    returning to userspace).
  759  * 2) You must not call filp_close on the returned struct file * in between
  760  *    calls to fget_light and fput_light.
  761  * 3) You must not clone the current task in between the calls to fget_light
  762  *    and fput_light.
  763  *
  764  * The fput_needed flag returned by fget_light should be passed to the
  765  * corresponding fput_light.
  766  */
  767 struct file *fget_light(unsigned int fd, int *fput_needed)
  768 {
  769         struct file *file;
  770         struct files_struct *files = current->files;
  771 
  772         *fput_needed = 0;
  773         if (atomic_read(&files->count) == 1) {
  774                 file = fcheck_files(files, fd);
  775                 if (file && (file->f_mode & FMODE_PATH))
  776                         file = NULL;
  777         } else {
  778                 rcu_read_lock();
  779                 file = fcheck_files(files, fd);
  780                 if (file) {
  781                         if (!(file->f_mode & FMODE_PATH) &&
  782                             atomic_long_inc_not_zero(&file->f_count))
  783                                 *fput_needed = 1;
  784                         else
  785                                 /* Didn't get the reference, someone's freed */
  786                                 file = NULL;
  787                 }
  788                 rcu_read_unlock();
  789         }
  790 
  791         return file;
  792 }
  793 EXPORT_SYMBOL(fget_light);
  794 
  795 struct file *fget_raw_light(unsigned int fd, int *fput_needed)
  796 {
  797         struct file *file;
  798         struct files_struct *files = current->files;
  799 
  800         *fput_needed = 0;
  801         if (atomic_read(&files->count) == 1) {
  802                 file = fcheck_files(files, fd);
  803         } else {
  804                 rcu_read_lock();
  805                 file = fcheck_files(files, fd);
  806                 if (file) {
  807                         if (atomic_long_inc_not_zero(&file->f_count))
  808                                 *fput_needed = 1;
  809                         else
  810                                 /* Didn't get the reference, someone's freed */
  811                                 file = NULL;
  812                 }
  813                 rcu_read_unlock();
  814         }
  815 
  816         return file;
  817 }
  818 
  819 void set_close_on_exec(unsigned int fd, int flag)
  820 {
  821         struct files_struct *files = current->files;
  822         struct fdtable *fdt;
  823         spin_lock(&files->file_lock);
  824         fdt = files_fdtable(files);
  825         if (flag)
  826                 __set_close_on_exec(fd, fdt);
  827         else
  828                 __clear_close_on_exec(fd, fdt);
  829         spin_unlock(&files->file_lock);
  830 }
  831 
  832 bool get_close_on_exec(unsigned int fd)
  833 {
  834         struct files_struct *files = current->files;
  835         struct fdtable *fdt;
  836         bool res;
  837         rcu_read_lock();
  838         fdt = files_fdtable(files);
  839         res = close_on_exec(fd, fdt);
  840         rcu_read_unlock();
  841         return res;
  842 }
  843 
  844 static int do_dup2(struct files_struct *files,
  845         struct file *file, unsigned fd, unsigned flags)
  846 {
  847         struct file *tofree;
  848         struct fdtable *fdt;
  849 
  850         /*
  851          * We need to detect attempts to do dup2() over allocated but still
  852          * not finished descriptor.  NB: OpenBSD avoids that at the price of
  853          * extra work in their equivalent of fget() - they insert struct
  854          * file immediately after grabbing descriptor, mark it larval if
  855          * more work (e.g. actual opening) is needed and make sure that
  856          * fget() treats larval files as absent.  Potentially interesting,
  857          * but while extra work in fget() is trivial, locking implications
  858          * and amount of surgery on open()-related paths in VFS are not.
  859          * FreeBSD fails with -EBADF in the same situation, NetBSD "solution"
  860          * deadlocks in rather amusing ways, AFAICS.  All of that is out of
  861          * scope of POSIX or SUS, since neither considers shared descriptor
  862          * tables and this condition does not arise without those.
  863          */
  864         fdt = files_fdtable(files);
  865         tofree = fdt->fd[fd];
  866         if (!tofree && fd_is_open(fd, fdt))
  867                 goto Ebusy;
  868         get_file(file);
  869         rcu_assign_pointer(fdt->fd[fd], file);
  870         __set_open_fd(fd, fdt);
  871         if (flags & O_CLOEXEC)
  872                 __set_close_on_exec(fd, fdt);
  873         else
  874                 __clear_close_on_exec(fd, fdt);
  875         spin_unlock(&files->file_lock);
  876 
  877         if (tofree)
  878                 filp_close(tofree, files);
  879 
  880         return fd;
  881 
  882 Ebusy:
  883         spin_unlock(&files->file_lock);
  884         return -EBUSY;
  885 }
  886 
  887 int replace_fd(unsigned fd, struct file *file, unsigned flags)
  888 {
  889         int err;
  890         struct files_struct *files = current->files;
  891 
  892         if (!file)
  893                 return __close_fd(files, fd);
  894 
  895         if (fd >= rlimit(RLIMIT_NOFILE))
  896                 return -EBADF;
  897 
  898         spin_lock(&files->file_lock);
  899         err = expand_files(files, fd);
  900         if (unlikely(err < 0))
  901                 goto out_unlock;
  902         return do_dup2(files, file, fd, flags);
  903 
  904 out_unlock:
  905         spin_unlock(&files->file_lock);
  906         return err;
  907 }
  908 
  909 SYSCALL_DEFINE3(dup3, unsigned int, oldfd, unsigned int, newfd, int, flags)
  910 {
  911         int err = -EBADF;
  912         struct file *file;
  913         struct files_struct *files = current->files;
  914 
  915         if ((flags & ~O_CLOEXEC) != 0)
  916                 return -EINVAL;
  917 
  918         if (unlikely(oldfd == newfd))
  919                 return -EINVAL;
  920 
  921         if (newfd >= rlimit(RLIMIT_NOFILE))
  922                 return -EBADF;
  923 
  924         spin_lock(&files->file_lock);
  925         err = expand_files(files, newfd);
  926         file = fcheck(oldfd);
  927         if (unlikely(!file))
  928                 goto Ebadf;
  929         if (unlikely(err < 0)) {
  930                 if (err == -EMFILE)
  931                         goto Ebadf;
  932                 goto out_unlock;
  933         }
  934         return do_dup2(files, file, newfd, flags);
  935 
  936 Ebadf:
  937         err = -EBADF;
  938 out_unlock:
  939         spin_unlock(&files->file_lock);
  940         return err;
  941 }
  942 
  943 SYSCALL_DEFINE2(dup2, unsigned int, oldfd, unsigned int, newfd)
  944 {
  945         if (unlikely(newfd == oldfd)) { /* corner case */
  946                 struct files_struct *files = current->files;
  947                 int retval = oldfd;
  948 
  949                 rcu_read_lock();
  950                 if (!fcheck_files(files, oldfd))
  951                         retval = -EBADF;
  952                 rcu_read_unlock();
  953                 return retval;
  954         }
  955         return sys_dup3(oldfd, newfd, 0);
  956 }
  957 
  958 SYSCALL_DEFINE1(dup, unsigned int, fildes)
  959 {
  960         int ret = -EBADF;
  961         struct file *file = fget_raw(fildes);
  962 
  963         if (file) {
  964                 ret = get_unused_fd();
  965                 if (ret >= 0)
  966                         fd_install(ret, file);
  967                 else
  968                         fput(file);
  969         }
  970         return ret;
  971 }
  972 
  973 int f_dupfd(unsigned int from, struct file *file, unsigned flags)
  974 {
  975         int err;
  976         if (from >= rlimit(RLIMIT_NOFILE))
  977                 return -EINVAL;
  978         err = alloc_fd(from, flags);
  979         if (err >= 0) {
  980                 get_file(file);
  981                 fd_install(err, file);
  982         }
  983         return err;
  984 }
  985 
  986 int iterate_fd(struct files_struct *files, unsigned n,
  987                 int (*f)(const void *, struct file *, unsigned),
  988                 const void *p)
  989 {
  990         struct fdtable *fdt;
  991         int res = 0;
  992         if (!files)
  993                 return 0;
  994         spin_lock(&files->file_lock);
  995         for (fdt = files_fdtable(files); n < fdt->max_fds; n++) {
  996                 struct file *file;
  997                 file = rcu_dereference_check_fdtable(files, fdt->fd[n]);
  998                 if (!file)
  999                         continue;
 1000                 res = f(p, file, n);
 1001                 if (res)
 1002                         break;
 1003         }
 1004         spin_unlock(&files->file_lock);
 1005         return res;
 1006 }
 1007 EXPORT_SYMBOL(iterate_fd);

Cache object: d54317bfb3f7bcdc4ccbfce92b0788a0


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.