The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/open.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *  linux/fs/open.c
    3  *
    4  *  Copyright (C) 1991, 1992  Linus Torvalds
    5  */
    6 
    7 #include <linux/string.h>
    8 #include <linux/mm.h>
    9 #include <linux/file.h>
   10 #include <linux/fdtable.h>
   11 #include <linux/fsnotify.h>
   12 #include <linux/module.h>
   13 #include <linux/tty.h>
   14 #include <linux/namei.h>
   15 #include <linux/backing-dev.h>
   16 #include <linux/capability.h>
   17 #include <linux/securebits.h>
   18 #include <linux/security.h>
   19 #include <linux/mount.h>
   20 #include <linux/fcntl.h>
   21 #include <linux/slab.h>
   22 #include <asm/uaccess.h>
   23 #include <linux/fs.h>
   24 #include <linux/personality.h>
   25 #include <linux/pagemap.h>
   26 #include <linux/syscalls.h>
   27 #include <linux/rcupdate.h>
   28 #include <linux/audit.h>
   29 #include <linux/falloc.h>
   30 #include <linux/fs_struct.h>
   31 #include <linux/ima.h>
   32 #include <linux/dnotify.h>
   33 
   34 #include "internal.h"
   35 
   36 int do_truncate(struct dentry *dentry, loff_t length, unsigned int time_attrs,
   37         struct file *filp)
   38 {
   39         int ret;
   40         struct iattr newattrs;
   41 
   42         /* Not pretty: "inode->i_size" shouldn't really be signed. But it is. */
   43         if (length < 0)
   44                 return -EINVAL;
   45 
   46         newattrs.ia_size = length;
   47         newattrs.ia_valid = ATTR_SIZE | time_attrs;
   48         if (filp) {
   49                 newattrs.ia_file = filp;
   50                 newattrs.ia_valid |= ATTR_FILE;
   51         }
   52 
   53         /* Remove suid/sgid on truncate too */
   54         ret = should_remove_suid(dentry);
   55         if (ret)
   56                 newattrs.ia_valid |= ret | ATTR_FORCE;
   57 
   58         mutex_lock(&dentry->d_inode->i_mutex);
   59         ret = notify_change(dentry, &newattrs);
   60         mutex_unlock(&dentry->d_inode->i_mutex);
   61         return ret;
   62 }
   63 
   64 long vfs_truncate(struct path *path, loff_t length)
   65 {
   66         struct inode *inode;
   67         long error;
   68 
   69         inode = path->dentry->d_inode;
   70 
   71         /* For directories it's -EISDIR, for other non-regulars - -EINVAL */
   72         if (S_ISDIR(inode->i_mode))
   73                 return -EISDIR;
   74         if (!S_ISREG(inode->i_mode))
   75                 return -EINVAL;
   76 
   77         error = mnt_want_write(path->mnt);
   78         if (error)
   79                 goto out;
   80 
   81         error = inode_permission(inode, MAY_WRITE);
   82         if (error)
   83                 goto mnt_drop_write_and_out;
   84 
   85         error = -EPERM;
   86         if (IS_APPEND(inode))
   87                 goto mnt_drop_write_and_out;
   88 
   89         error = get_write_access(inode);
   90         if (error)
   91                 goto mnt_drop_write_and_out;
   92 
   93         /*
   94          * Make sure that there are no leases.  get_write_access() protects
   95          * against the truncate racing with a lease-granting setlease().
   96          */
   97         error = break_lease(inode, O_WRONLY);
   98         if (error)
   99                 goto put_write_and_out;
  100 
  101         error = locks_verify_truncate(inode, NULL, length);
  102         if (!error)
  103                 error = security_path_truncate(path);
  104         if (!error)
  105                 error = do_truncate(path->dentry, length, 0, NULL);
  106 
  107 put_write_and_out:
  108         put_write_access(inode);
  109 mnt_drop_write_and_out:
  110         mnt_drop_write(path->mnt);
  111 out:
  112         return error;
  113 }
  114 EXPORT_SYMBOL_GPL(vfs_truncate);
  115 
  116 static long do_sys_truncate(const char __user *pathname, loff_t length)
  117 {
  118         unsigned int lookup_flags = LOOKUP_FOLLOW;
  119         struct path path;
  120         int error;
  121 
  122         if (length < 0) /* sorry, but loff_t says... */
  123                 return -EINVAL;
  124 
  125 retry:
  126         error = user_path_at(AT_FDCWD, pathname, lookup_flags, &path);
  127         if (!error) {
  128                 error = vfs_truncate(&path, length);
  129                 path_put(&path);
  130         }
  131         if (retry_estale(error, lookup_flags)) {
  132                 lookup_flags |= LOOKUP_REVAL;
  133                 goto retry;
  134         }
  135         return error;
  136 }
  137 
  138 SYSCALL_DEFINE2(truncate, const char __user *, path, long, length)
  139 {
  140         return do_sys_truncate(path, length);
  141 }
  142 
  143 static long do_sys_ftruncate(unsigned int fd, loff_t length, int small)
  144 {
  145         struct inode *inode;
  146         struct dentry *dentry;
  147         struct fd f;
  148         int error;
  149 
  150         error = -EINVAL;
  151         if (length < 0)
  152                 goto out;
  153         error = -EBADF;
  154         f = fdget(fd);
  155         if (!f.file)
  156                 goto out;
  157 
  158         /* explicitly opened as large or we are on 64-bit box */
  159         if (f.file->f_flags & O_LARGEFILE)
  160                 small = 0;
  161 
  162         dentry = f.file->f_path.dentry;
  163         inode = dentry->d_inode;
  164         error = -EINVAL;
  165         if (!S_ISREG(inode->i_mode) || !(f.file->f_mode & FMODE_WRITE))
  166                 goto out_putf;
  167 
  168         error = -EINVAL;
  169         /* Cannot ftruncate over 2^31 bytes without large file support */
  170         if (small && length > MAX_NON_LFS)
  171                 goto out_putf;
  172 
  173         error = -EPERM;
  174         if (IS_APPEND(inode))
  175                 goto out_putf;
  176 
  177         sb_start_write(inode->i_sb);
  178         error = locks_verify_truncate(inode, f.file, length);
  179         if (!error)
  180                 error = security_path_truncate(&f.file->f_path);
  181         if (!error)
  182                 error = do_truncate(dentry, length, ATTR_MTIME|ATTR_CTIME, f.file);
  183         sb_end_write(inode->i_sb);
  184 out_putf:
  185         fdput(f);
  186 out:
  187         return error;
  188 }
  189 
  190 SYSCALL_DEFINE2(ftruncate, unsigned int, fd, unsigned long, length)
  191 {
  192         long ret = do_sys_ftruncate(fd, length, 1);
  193         /* avoid REGPARM breakage on x86: */
  194         asmlinkage_protect(2, ret, fd, length);
  195         return ret;
  196 }
  197 
  198 /* LFS versions of truncate are only needed on 32 bit machines */
  199 #if BITS_PER_LONG == 32
  200 SYSCALL_DEFINE(truncate64)(const char __user * path, loff_t length)
  201 {
  202         return do_sys_truncate(path, length);
  203 }
  204 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  205 asmlinkage long SyS_truncate64(long path, loff_t length)
  206 {
  207         return SYSC_truncate64((const char __user *) path, length);
  208 }
  209 SYSCALL_ALIAS(sys_truncate64, SyS_truncate64);
  210 #endif
  211 
  212 SYSCALL_DEFINE(ftruncate64)(unsigned int fd, loff_t length)
  213 {
  214         long ret = do_sys_ftruncate(fd, length, 0);
  215         /* avoid REGPARM breakage on x86: */
  216         asmlinkage_protect(2, ret, fd, length);
  217         return ret;
  218 }
  219 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  220 asmlinkage long SyS_ftruncate64(long fd, loff_t length)
  221 {
  222         return SYSC_ftruncate64((unsigned int) fd, length);
  223 }
  224 SYSCALL_ALIAS(sys_ftruncate64, SyS_ftruncate64);
  225 #endif
  226 #endif /* BITS_PER_LONG == 32 */
  227 
  228 
  229 int do_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
  230 {
  231         struct inode *inode = file->f_path.dentry->d_inode;
  232         long ret;
  233 
  234         if (offset < 0 || len <= 0)
  235                 return -EINVAL;
  236 
  237         /* Return error if mode is not supported */
  238         if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE))
  239                 return -EOPNOTSUPP;
  240 
  241         /* Punch hole must have keep size set */
  242         if ((mode & FALLOC_FL_PUNCH_HOLE) &&
  243             !(mode & FALLOC_FL_KEEP_SIZE))
  244                 return -EOPNOTSUPP;
  245 
  246         if (!(file->f_mode & FMODE_WRITE))
  247                 return -EBADF;
  248 
  249         /* It's not possible punch hole on append only file */
  250         if (mode & FALLOC_FL_PUNCH_HOLE && IS_APPEND(inode))
  251                 return -EPERM;
  252 
  253         if (IS_IMMUTABLE(inode))
  254                 return -EPERM;
  255 
  256         /*
  257          * Revalidate the write permissions, in case security policy has
  258          * changed since the files were opened.
  259          */
  260         ret = security_file_permission(file, MAY_WRITE);
  261         if (ret)
  262                 return ret;
  263 
  264         if (S_ISFIFO(inode->i_mode))
  265                 return -ESPIPE;
  266 
  267         /*
  268          * Let individual file system decide if it supports preallocation
  269          * for directories or not.
  270          */
  271         if (!S_ISREG(inode->i_mode) && !S_ISDIR(inode->i_mode))
  272                 return -ENODEV;
  273 
  274         /* Check for wrap through zero too */
  275         if (((offset + len) > inode->i_sb->s_maxbytes) || ((offset + len) < 0))
  276                 return -EFBIG;
  277 
  278         if (!file->f_op->fallocate)
  279                 return -EOPNOTSUPP;
  280 
  281         sb_start_write(inode->i_sb);
  282         ret = file->f_op->fallocate(file, mode, offset, len);
  283         sb_end_write(inode->i_sb);
  284         return ret;
  285 }
  286 
  287 SYSCALL_DEFINE(fallocate)(int fd, int mode, loff_t offset, loff_t len)
  288 {
  289         struct fd f = fdget(fd);
  290         int error = -EBADF;
  291 
  292         if (f.file) {
  293                 error = do_fallocate(f.file, mode, offset, len);
  294                 fdput(f);
  295         }
  296         return error;
  297 }
  298 
  299 #ifdef CONFIG_HAVE_SYSCALL_WRAPPERS
  300 asmlinkage long SyS_fallocate(long fd, long mode, loff_t offset, loff_t len)
  301 {
  302         return SYSC_fallocate((int)fd, (int)mode, offset, len);
  303 }
  304 SYSCALL_ALIAS(sys_fallocate, SyS_fallocate);
  305 #endif
  306 
  307 /*
  308  * access() needs to use the real uid/gid, not the effective uid/gid.
  309  * We do this by temporarily clearing all FS-related capabilities and
  310  * switching the fsuid/fsgid around to the real ones.
  311  */
  312 SYSCALL_DEFINE3(faccessat, int, dfd, const char __user *, filename, int, mode)
  313 {
  314         const struct cred *old_cred;
  315         struct cred *override_cred;
  316         struct path path;
  317         struct inode *inode;
  318         int res;
  319         unsigned int lookup_flags = LOOKUP_FOLLOW;
  320 
  321         if (mode & ~S_IRWXO)    /* where's F_OK, X_OK, W_OK, R_OK? */
  322                 return -EINVAL;
  323 
  324         override_cred = prepare_creds();
  325         if (!override_cred)
  326                 return -ENOMEM;
  327 
  328         override_cred->fsuid = override_cred->uid;
  329         override_cred->fsgid = override_cred->gid;
  330 
  331         if (!issecure(SECURE_NO_SETUID_FIXUP)) {
  332                 /* Clear the capabilities if we switch to a non-root user */
  333                 kuid_t root_uid = make_kuid(override_cred->user_ns, 0);
  334                 if (!uid_eq(override_cred->uid, root_uid))
  335                         cap_clear(override_cred->cap_effective);
  336                 else
  337                         override_cred->cap_effective =
  338                                 override_cred->cap_permitted;
  339         }
  340 
  341         old_cred = override_creds(override_cred);
  342 retry:
  343         res = user_path_at(dfd, filename, lookup_flags, &path);
  344         if (res)
  345                 goto out;
  346 
  347         inode = path.dentry->d_inode;
  348 
  349         if ((mode & MAY_EXEC) && S_ISREG(inode->i_mode)) {
  350                 /*
  351                  * MAY_EXEC on regular files is denied if the fs is mounted
  352                  * with the "noexec" flag.
  353                  */
  354                 res = -EACCES;
  355                 if (path.mnt->mnt_flags & MNT_NOEXEC)
  356                         goto out_path_release;
  357         }
  358 
  359         res = inode_permission(inode, mode | MAY_ACCESS);
  360         /* SuS v2 requires we report a read only fs too */
  361         if (res || !(mode & S_IWOTH) || special_file(inode->i_mode))
  362                 goto out_path_release;
  363         /*
  364          * This is a rare case where using __mnt_is_readonly()
  365          * is OK without a mnt_want/drop_write() pair.  Since
  366          * no actual write to the fs is performed here, we do
  367          * not need to telegraph to that to anyone.
  368          *
  369          * By doing this, we accept that this access is
  370          * inherently racy and know that the fs may change
  371          * state before we even see this result.
  372          */
  373         if (__mnt_is_readonly(path.mnt))
  374                 res = -EROFS;
  375 
  376 out_path_release:
  377         path_put(&path);
  378         if (retry_estale(res, lookup_flags)) {
  379                 lookup_flags |= LOOKUP_REVAL;
  380                 goto retry;
  381         }
  382 out:
  383         revert_creds(old_cred);
  384         put_cred(override_cred);
  385         return res;
  386 }
  387 
  388 SYSCALL_DEFINE2(access, const char __user *, filename, int, mode)
  389 {
  390         return sys_faccessat(AT_FDCWD, filename, mode);
  391 }
  392 
  393 SYSCALL_DEFINE1(chdir, const char __user *, filename)
  394 {
  395         struct path path;
  396         int error;
  397         unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
  398 retry:
  399         error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
  400         if (error)
  401                 goto out;
  402 
  403         error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
  404         if (error)
  405                 goto dput_and_out;
  406 
  407         set_fs_pwd(current->fs, &path);
  408 
  409 dput_and_out:
  410         path_put(&path);
  411         if (retry_estale(error, lookup_flags)) {
  412                 lookup_flags |= LOOKUP_REVAL;
  413                 goto retry;
  414         }
  415 out:
  416         return error;
  417 }
  418 
  419 SYSCALL_DEFINE1(fchdir, unsigned int, fd)
  420 {
  421         struct fd f = fdget_raw(fd);
  422         struct inode *inode;
  423         int error = -EBADF;
  424 
  425         error = -EBADF;
  426         if (!f.file)
  427                 goto out;
  428 
  429         inode = f.file->f_path.dentry->d_inode;
  430 
  431         error = -ENOTDIR;
  432         if (!S_ISDIR(inode->i_mode))
  433                 goto out_putf;
  434 
  435         error = inode_permission(inode, MAY_EXEC | MAY_CHDIR);
  436         if (!error)
  437                 set_fs_pwd(current->fs, &f.file->f_path);
  438 out_putf:
  439         fdput(f);
  440 out:
  441         return error;
  442 }
  443 
  444 SYSCALL_DEFINE1(chroot, const char __user *, filename)
  445 {
  446         struct path path;
  447         int error;
  448         unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
  449 retry:
  450         error = user_path_at(AT_FDCWD, filename, lookup_flags, &path);
  451         if (error)
  452                 goto out;
  453 
  454         error = inode_permission(path.dentry->d_inode, MAY_EXEC | MAY_CHDIR);
  455         if (error)
  456                 goto dput_and_out;
  457 
  458         error = -EPERM;
  459         if (!nsown_capable(CAP_SYS_CHROOT))
  460                 goto dput_and_out;
  461         error = security_path_chroot(&path);
  462         if (error)
  463                 goto dput_and_out;
  464 
  465         set_fs_root(current->fs, &path);
  466         error = 0;
  467 dput_and_out:
  468         path_put(&path);
  469         if (retry_estale(error, lookup_flags)) {
  470                 lookup_flags |= LOOKUP_REVAL;
  471                 goto retry;
  472         }
  473 out:
  474         return error;
  475 }
  476 
  477 static int chmod_common(struct path *path, umode_t mode)
  478 {
  479         struct inode *inode = path->dentry->d_inode;
  480         struct iattr newattrs;
  481         int error;
  482 
  483         error = mnt_want_write(path->mnt);
  484         if (error)
  485                 return error;
  486         mutex_lock(&inode->i_mutex);
  487         error = security_path_chmod(path, mode);
  488         if (error)
  489                 goto out_unlock;
  490         newattrs.ia_mode = (mode & S_IALLUGO) | (inode->i_mode & ~S_IALLUGO);
  491         newattrs.ia_valid = ATTR_MODE | ATTR_CTIME;
  492         error = notify_change(path->dentry, &newattrs);
  493 out_unlock:
  494         mutex_unlock(&inode->i_mutex);
  495         mnt_drop_write(path->mnt);
  496         return error;
  497 }
  498 
  499 SYSCALL_DEFINE2(fchmod, unsigned int, fd, umode_t, mode)
  500 {
  501         struct file * file;
  502         int err = -EBADF;
  503 
  504         file = fget(fd);
  505         if (file) {
  506                 audit_inode(NULL, file->f_path.dentry, 0);
  507                 err = chmod_common(&file->f_path, mode);
  508                 fput(file);
  509         }
  510         return err;
  511 }
  512 
  513 SYSCALL_DEFINE3(fchmodat, int, dfd, const char __user *, filename, umode_t, mode)
  514 {
  515         struct path path;
  516         int error;
  517         unsigned int lookup_flags = LOOKUP_FOLLOW;
  518 retry:
  519         error = user_path_at(dfd, filename, lookup_flags, &path);
  520         if (!error) {
  521                 error = chmod_common(&path, mode);
  522                 path_put(&path);
  523                 if (retry_estale(error, lookup_flags)) {
  524                         lookup_flags |= LOOKUP_REVAL;
  525                         goto retry;
  526                 }
  527         }
  528         return error;
  529 }
  530 
  531 SYSCALL_DEFINE2(chmod, const char __user *, filename, umode_t, mode)
  532 {
  533         return sys_fchmodat(AT_FDCWD, filename, mode);
  534 }
  535 
  536 static int chown_common(struct path *path, uid_t user, gid_t group)
  537 {
  538         struct inode *inode = path->dentry->d_inode;
  539         int error;
  540         struct iattr newattrs;
  541         kuid_t uid;
  542         kgid_t gid;
  543 
  544         uid = make_kuid(current_user_ns(), user);
  545         gid = make_kgid(current_user_ns(), group);
  546 
  547         newattrs.ia_valid =  ATTR_CTIME;
  548         if (user != (uid_t) -1) {
  549                 if (!uid_valid(uid))
  550                         return -EINVAL;
  551                 newattrs.ia_valid |= ATTR_UID;
  552                 newattrs.ia_uid = uid;
  553         }
  554         if (group != (gid_t) -1) {
  555                 if (!gid_valid(gid))
  556                         return -EINVAL;
  557                 newattrs.ia_valid |= ATTR_GID;
  558                 newattrs.ia_gid = gid;
  559         }
  560         if (!S_ISDIR(inode->i_mode))
  561                 newattrs.ia_valid |=
  562                         ATTR_KILL_SUID | ATTR_KILL_SGID | ATTR_KILL_PRIV;
  563         mutex_lock(&inode->i_mutex);
  564         error = security_path_chown(path, uid, gid);
  565         if (!error)
  566                 error = notify_change(path->dentry, &newattrs);
  567         mutex_unlock(&inode->i_mutex);
  568 
  569         return error;
  570 }
  571 
  572 SYSCALL_DEFINE5(fchownat, int, dfd, const char __user *, filename, uid_t, user,
  573                 gid_t, group, int, flag)
  574 {
  575         struct path path;
  576         int error = -EINVAL;
  577         int lookup_flags;
  578 
  579         if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_EMPTY_PATH)) != 0)
  580                 goto out;
  581 
  582         lookup_flags = (flag & AT_SYMLINK_NOFOLLOW) ? 0 : LOOKUP_FOLLOW;
  583         if (flag & AT_EMPTY_PATH)
  584                 lookup_flags |= LOOKUP_EMPTY;
  585 retry:
  586         error = user_path_at(dfd, filename, lookup_flags, &path);
  587         if (error)
  588                 goto out;
  589         error = mnt_want_write(path.mnt);
  590         if (error)
  591                 goto out_release;
  592         error = chown_common(&path, user, group);
  593         mnt_drop_write(path.mnt);
  594 out_release:
  595         path_put(&path);
  596         if (retry_estale(error, lookup_flags)) {
  597                 lookup_flags |= LOOKUP_REVAL;
  598                 goto retry;
  599         }
  600 out:
  601         return error;
  602 }
  603 
  604 SYSCALL_DEFINE3(chown, const char __user *, filename, uid_t, user, gid_t, group)
  605 {
  606         return sys_fchownat(AT_FDCWD, filename, user, group, 0);
  607 }
  608 
  609 SYSCALL_DEFINE3(lchown, const char __user *, filename, uid_t, user, gid_t, group)
  610 {
  611         return sys_fchownat(AT_FDCWD, filename, user, group,
  612                             AT_SYMLINK_NOFOLLOW);
  613 }
  614 
  615 SYSCALL_DEFINE3(fchown, unsigned int, fd, uid_t, user, gid_t, group)
  616 {
  617         struct fd f = fdget(fd);
  618         int error = -EBADF;
  619 
  620         if (!f.file)
  621                 goto out;
  622 
  623         error = mnt_want_write_file(f.file);
  624         if (error)
  625                 goto out_fput;
  626         audit_inode(NULL, f.file->f_path.dentry, 0);
  627         error = chown_common(&f.file->f_path, user, group);
  628         mnt_drop_write_file(f.file);
  629 out_fput:
  630         fdput(f);
  631 out:
  632         return error;
  633 }
  634 
  635 /*
  636  * You have to be very careful that these write
  637  * counts get cleaned up in error cases and
  638  * upon __fput().  This should probably never
  639  * be called outside of __dentry_open().
  640  */
  641 static inline int __get_file_write_access(struct inode *inode,
  642                                           struct vfsmount *mnt)
  643 {
  644         int error;
  645         error = get_write_access(inode);
  646         if (error)
  647                 return error;
  648         /*
  649          * Do not take mount writer counts on
  650          * special files since no writes to
  651          * the mount itself will occur.
  652          */
  653         if (!special_file(inode->i_mode)) {
  654                 /*
  655                  * Balanced in __fput()
  656                  */
  657                 error = __mnt_want_write(mnt);
  658                 if (error)
  659                         put_write_access(inode);
  660         }
  661         return error;
  662 }
  663 
  664 int open_check_o_direct(struct file *f)
  665 {
  666         /* NB: we're sure to have correct a_ops only after f_op->open */
  667         if (f->f_flags & O_DIRECT) {
  668                 if (!f->f_mapping->a_ops ||
  669                     ((!f->f_mapping->a_ops->direct_IO) &&
  670                     (!f->f_mapping->a_ops->get_xip_mem))) {
  671                         return -EINVAL;
  672                 }
  673         }
  674         return 0;
  675 }
  676 
  677 static int do_dentry_open(struct file *f,
  678                           int (*open)(struct inode *, struct file *),
  679                           const struct cred *cred)
  680 {
  681         static const struct file_operations empty_fops = {};
  682         struct inode *inode;
  683         int error;
  684 
  685         f->f_mode = OPEN_FMODE(f->f_flags) | FMODE_LSEEK |
  686                                 FMODE_PREAD | FMODE_PWRITE;
  687 
  688         if (unlikely(f->f_flags & O_PATH))
  689                 f->f_mode = FMODE_PATH;
  690 
  691         path_get(&f->f_path);
  692         inode = f->f_path.dentry->d_inode;
  693         if (f->f_mode & FMODE_WRITE) {
  694                 error = __get_file_write_access(inode, f->f_path.mnt);
  695                 if (error)
  696                         goto cleanup_file;
  697                 if (!special_file(inode->i_mode))
  698                         file_take_write(f);
  699         }
  700 
  701         f->f_mapping = inode->i_mapping;
  702         f->f_pos = 0;
  703         file_sb_list_add(f, inode->i_sb);
  704 
  705         if (unlikely(f->f_mode & FMODE_PATH)) {
  706                 f->f_op = &empty_fops;
  707                 return 0;
  708         }
  709 
  710         f->f_op = fops_get(inode->i_fop);
  711 
  712         error = security_file_open(f, cred);
  713         if (error)
  714                 goto cleanup_all;
  715 
  716         error = break_lease(inode, f->f_flags);
  717         if (error)
  718                 goto cleanup_all;
  719 
  720         if (!open && f->f_op)
  721                 open = f->f_op->open;
  722         if (open) {
  723                 error = open(inode, f);
  724                 if (error)
  725                         goto cleanup_all;
  726         }
  727         if ((f->f_mode & (FMODE_READ | FMODE_WRITE)) == FMODE_READ)
  728                 i_readcount_inc(inode);
  729 
  730         f->f_flags &= ~(O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC);
  731 
  732         file_ra_state_init(&f->f_ra, f->f_mapping->host->i_mapping);
  733 
  734         return 0;
  735 
  736 cleanup_all:
  737         fops_put(f->f_op);
  738         file_sb_list_del(f);
  739         if (f->f_mode & FMODE_WRITE) {
  740                 put_write_access(inode);
  741                 if (!special_file(inode->i_mode)) {
  742                         /*
  743                          * We don't consider this a real
  744                          * mnt_want/drop_write() pair
  745                          * because it all happenend right
  746                          * here, so just reset the state.
  747                          */
  748                         file_reset_write(f);
  749                         __mnt_drop_write(f->f_path.mnt);
  750                 }
  751         }
  752 cleanup_file:
  753         path_put(&f->f_path);
  754         f->f_path.mnt = NULL;
  755         f->f_path.dentry = NULL;
  756         return error;
  757 }
  758 
  759 /**
  760  * finish_open - finish opening a file
  761  * @od: opaque open data
  762  * @dentry: pointer to dentry
  763  * @open: open callback
  764  *
  765  * This can be used to finish opening a file passed to i_op->atomic_open().
  766  *
  767  * If the open callback is set to NULL, then the standard f_op->open()
  768  * filesystem callback is substituted.
  769  */
  770 int finish_open(struct file *file, struct dentry *dentry,
  771                 int (*open)(struct inode *, struct file *),
  772                 int *opened)
  773 {
  774         int error;
  775         BUG_ON(*opened & FILE_OPENED); /* once it's opened, it's opened */
  776 
  777         file->f_path.dentry = dentry;
  778         error = do_dentry_open(file, open, current_cred());
  779         if (!error)
  780                 *opened |= FILE_OPENED;
  781 
  782         return error;
  783 }
  784 EXPORT_SYMBOL(finish_open);
  785 
  786 /**
  787  * finish_no_open - finish ->atomic_open() without opening the file
  788  *
  789  * @od: opaque open data
  790  * @dentry: dentry or NULL (as returned from ->lookup())
  791  *
  792  * This can be used to set the result of a successful lookup in ->atomic_open().
  793  * The filesystem's atomic_open() method shall return NULL after calling this.
  794  */
  795 int finish_no_open(struct file *file, struct dentry *dentry)
  796 {
  797         file->f_path.dentry = dentry;
  798         return 1;
  799 }
  800 EXPORT_SYMBOL(finish_no_open);
  801 
  802 struct file *dentry_open(const struct path *path, int flags,
  803                          const struct cred *cred)
  804 {
  805         int error;
  806         struct file *f;
  807 
  808         validate_creds(cred);
  809 
  810         /* We must always pass in a valid mount pointer. */
  811         BUG_ON(!path->mnt);
  812 
  813         error = -ENFILE;
  814         f = get_empty_filp();
  815         if (f == NULL)
  816                 return ERR_PTR(error);
  817 
  818         f->f_flags = flags;
  819         f->f_path = *path;
  820         error = do_dentry_open(f, NULL, cred);
  821         if (!error) {
  822                 error = open_check_o_direct(f);
  823                 if (error) {
  824                         fput(f);
  825                         f = ERR_PTR(error);
  826                 }
  827         } else { 
  828                 put_filp(f);
  829                 f = ERR_PTR(error);
  830         }
  831         return f;
  832 }
  833 EXPORT_SYMBOL(dentry_open);
  834 
  835 static inline int build_open_flags(int flags, umode_t mode, struct open_flags *op)
  836 {
  837         int lookup_flags = 0;
  838         int acc_mode;
  839 
  840         if (flags & O_CREAT)
  841                 op->mode = (mode & S_IALLUGO) | S_IFREG;
  842         else
  843                 op->mode = 0;
  844 
  845         /* Must never be set by userspace */
  846         flags &= ~FMODE_NONOTIFY & ~O_CLOEXEC;
  847 
  848         /*
  849          * O_SYNC is implemented as __O_SYNC|O_DSYNC.  As many places only
  850          * check for O_DSYNC if the need any syncing at all we enforce it's
  851          * always set instead of having to deal with possibly weird behaviour
  852          * for malicious applications setting only __O_SYNC.
  853          */
  854         if (flags & __O_SYNC)
  855                 flags |= O_DSYNC;
  856 
  857         /*
  858          * If we have O_PATH in the open flag. Then we
  859          * cannot have anything other than the below set of flags
  860          */
  861         if (flags & O_PATH) {
  862                 flags &= O_DIRECTORY | O_NOFOLLOW | O_PATH;
  863                 acc_mode = 0;
  864         } else {
  865                 acc_mode = MAY_OPEN | ACC_MODE(flags);
  866         }
  867 
  868         op->open_flag = flags;
  869 
  870         /* O_TRUNC implies we need access checks for write permissions */
  871         if (flags & O_TRUNC)
  872                 acc_mode |= MAY_WRITE;
  873 
  874         /* Allow the LSM permission hook to distinguish append
  875            access from general write access. */
  876         if (flags & O_APPEND)
  877                 acc_mode |= MAY_APPEND;
  878 
  879         op->acc_mode = acc_mode;
  880 
  881         op->intent = flags & O_PATH ? 0 : LOOKUP_OPEN;
  882 
  883         if (flags & O_CREAT) {
  884                 op->intent |= LOOKUP_CREATE;
  885                 if (flags & O_EXCL)
  886                         op->intent |= LOOKUP_EXCL;
  887         }
  888 
  889         if (flags & O_DIRECTORY)
  890                 lookup_flags |= LOOKUP_DIRECTORY;
  891         if (!(flags & O_NOFOLLOW))
  892                 lookup_flags |= LOOKUP_FOLLOW;
  893         return lookup_flags;
  894 }
  895 
  896 /**
  897  * file_open_name - open file and return file pointer
  898  *
  899  * @name:       struct filename containing path to open
  900  * @flags:      open flags as per the open(2) second argument
  901  * @mode:       mode for the new file if O_CREAT is set, else ignored
  902  *
  903  * This is the helper to open a file from kernelspace if you really
  904  * have to.  But in generally you should not do this, so please move
  905  * along, nothing to see here..
  906  */
  907 struct file *file_open_name(struct filename *name, int flags, umode_t mode)
  908 {
  909         struct open_flags op;
  910         int lookup = build_open_flags(flags, mode, &op);
  911         return do_filp_open(AT_FDCWD, name, &op, lookup);
  912 }
  913 
  914 /**
  915  * filp_open - open file and return file pointer
  916  *
  917  * @filename:   path to open
  918  * @flags:      open flags as per the open(2) second argument
  919  * @mode:       mode for the new file if O_CREAT is set, else ignored
  920  *
  921  * This is the helper to open a file from kernelspace if you really
  922  * have to.  But in generally you should not do this, so please move
  923  * along, nothing to see here..
  924  */
  925 struct file *filp_open(const char *filename, int flags, umode_t mode)
  926 {
  927         struct filename name = {.name = filename};
  928         return file_open_name(&name, flags, mode);
  929 }
  930 EXPORT_SYMBOL(filp_open);
  931 
  932 struct file *file_open_root(struct dentry *dentry, struct vfsmount *mnt,
  933                             const char *filename, int flags)
  934 {
  935         struct open_flags op;
  936         int lookup = build_open_flags(flags, 0, &op);
  937         if (flags & O_CREAT)
  938                 return ERR_PTR(-EINVAL);
  939         if (!filename && (flags & O_DIRECTORY))
  940                 if (!dentry->d_inode->i_op->lookup)
  941                         return ERR_PTR(-ENOTDIR);
  942         return do_file_open_root(dentry, mnt, filename, &op, lookup);
  943 }
  944 EXPORT_SYMBOL(file_open_root);
  945 
  946 long do_sys_open(int dfd, const char __user *filename, int flags, umode_t mode)
  947 {
  948         struct open_flags op;
  949         int lookup = build_open_flags(flags, mode, &op);
  950         struct filename *tmp = getname(filename);
  951         int fd = PTR_ERR(tmp);
  952 
  953         if (!IS_ERR(tmp)) {
  954                 fd = get_unused_fd_flags(flags);
  955                 if (fd >= 0) {
  956                         struct file *f = do_filp_open(dfd, tmp, &op, lookup);
  957                         if (IS_ERR(f)) {
  958                                 put_unused_fd(fd);
  959                                 fd = PTR_ERR(f);
  960                         } else {
  961                                 fsnotify_open(f);
  962                                 fd_install(fd, f);
  963                         }
  964                 }
  965                 putname(tmp);
  966         }
  967         return fd;
  968 }
  969 
  970 SYSCALL_DEFINE3(open, const char __user *, filename, int, flags, umode_t, mode)
  971 {
  972         long ret;
  973 
  974         if (force_o_largefile())
  975                 flags |= O_LARGEFILE;
  976 
  977         ret = do_sys_open(AT_FDCWD, filename, flags, mode);
  978         /* avoid REGPARM breakage on x86: */
  979         asmlinkage_protect(3, ret, filename, flags, mode);
  980         return ret;
  981 }
  982 
  983 SYSCALL_DEFINE4(openat, int, dfd, const char __user *, filename, int, flags,
  984                 umode_t, mode)
  985 {
  986         long ret;
  987 
  988         if (force_o_largefile())
  989                 flags |= O_LARGEFILE;
  990 
  991         ret = do_sys_open(dfd, filename, flags, mode);
  992         /* avoid REGPARM breakage on x86: */
  993         asmlinkage_protect(4, ret, dfd, filename, flags, mode);
  994         return ret;
  995 }
  996 
  997 #ifndef __alpha__
  998 
  999 /*
 1000  * For backward compatibility?  Maybe this should be moved
 1001  * into arch/i386 instead?
 1002  */
 1003 SYSCALL_DEFINE2(creat, const char __user *, pathname, umode_t, mode)
 1004 {
 1005         return sys_open(pathname, O_CREAT | O_WRONLY | O_TRUNC, mode);
 1006 }
 1007 
 1008 #endif
 1009 
 1010 /*
 1011  * "id" is the POSIX thread ID. We use the
 1012  * files pointer for this..
 1013  */
 1014 int filp_close(struct file *filp, fl_owner_t id)
 1015 {
 1016         int retval = 0;
 1017 
 1018         if (!file_count(filp)) {
 1019                 printk(KERN_ERR "VFS: Close: file count is 0\n");
 1020                 return 0;
 1021         }
 1022 
 1023         if (filp->f_op && filp->f_op->flush)
 1024                 retval = filp->f_op->flush(filp, id);
 1025 
 1026         if (likely(!(filp->f_mode & FMODE_PATH))) {
 1027                 dnotify_flush(filp, id);
 1028                 locks_remove_posix(filp, id);
 1029         }
 1030         fput(filp);
 1031         return retval;
 1032 }
 1033 
 1034 EXPORT_SYMBOL(filp_close);
 1035 
 1036 /*
 1037  * Careful here! We test whether the file pointer is NULL before
 1038  * releasing the fd. This ensures that one clone task can't release
 1039  * an fd while another clone is opening it.
 1040  */
 1041 SYSCALL_DEFINE1(close, unsigned int, fd)
 1042 {
 1043         int retval = __close_fd(current->files, fd);
 1044 
 1045         /* can't restart close syscall because file table entry was cleared */
 1046         if (unlikely(retval == -ERESTARTSYS ||
 1047                      retval == -ERESTARTNOINTR ||
 1048                      retval == -ERESTARTNOHAND ||
 1049                      retval == -ERESTART_RESTARTBLOCK))
 1050                 retval = -EINTR;
 1051 
 1052         return retval;
 1053 }
 1054 EXPORT_SYMBOL(sys_close);
 1055 
 1056 /*
 1057  * This routine simulates a hangup on the tty, to arrange that users
 1058  * are given clean terminals at login time.
 1059  */
 1060 SYSCALL_DEFINE0(vhangup)
 1061 {
 1062         if (capable(CAP_SYS_TTY_CONFIG)) {
 1063                 tty_vhangup_self();
 1064                 return 0;
 1065         }
 1066         return -EPERM;
 1067 }
 1068 
 1069 /*
 1070  * Called when an inode is about to be open.
 1071  * We use this to disallow opening large files on 32bit systems if
 1072  * the caller didn't specify O_LARGEFILE.  On 64bit systems we force
 1073  * on this flag in sys_open.
 1074  */
 1075 int generic_file_open(struct inode * inode, struct file * filp)
 1076 {
 1077         if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
 1078                 return -EOVERFLOW;
 1079         return 0;
 1080 }
 1081 
 1082 EXPORT_SYMBOL(generic_file_open);
 1083 
 1084 /*
 1085  * This is used by subsystems that don't want seekable
 1086  * file descriptors. The function is not supposed to ever fail, the only
 1087  * reason it returns an 'int' and not 'void' is so that it can be plugged
 1088  * directly into file_operations structure.
 1089  */
 1090 int nonseekable_open(struct inode *inode, struct file *filp)
 1091 {
 1092         filp->f_mode &= ~(FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE);
 1093         return 0;
 1094 }
 1095 
 1096 EXPORT_SYMBOL(nonseekable_open);

Cache object: d37f9ecebd0c6f6a20c751ddfd2b06bc


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.