FreeBSD/Linux Kernel Cross Reference
sys/fs/namei.c
1 /*
2 * linux/fs/namei.c
3 *
4 * Copyright (C) 1991, 1992 Linus Torvalds
5 */
6
7 /*
8 * Some corrections by tytso.
9 */
10
11 /* [Feb 1997 T. Schoebel-Theuer] Complete rewrite of the pathname
12 * lookup logic.
13 */
14 /* [Feb-Apr 2000, AV] Rewrite to the new namespace architecture.
15 */
16
17 #include <linux/init.h>
18 #include <linux/slab.h>
19 #include <linux/fs.h>
20 #include <linux/quotaops.h>
21 #include <linux/pagemap.h>
22 #include <linux/dnotify.h>
23 #include <linux/smp_lock.h>
24 #include <linux/personality.h>
25
26 #include <asm/namei.h>
27 #include <asm/uaccess.h>
28
29 #define ACC_MODE(x) ("\000\004\002\006"[(x)&O_ACCMODE])
30
31 /* [Feb-1997 T. Schoebel-Theuer]
32 * Fundamental changes in the pathname lookup mechanisms (namei)
33 * were necessary because of omirr. The reason is that omirr needs
34 * to know the _real_ pathname, not the user-supplied one, in case
35 * of symlinks (and also when transname replacements occur).
36 *
37 * The new code replaces the old recursive symlink resolution with
38 * an iterative one (in case of non-nested symlink chains). It does
39 * this with calls to <fs>_follow_link().
40 * As a side effect, dir_namei(), _namei() and follow_link() are now
41 * replaced with a single function lookup_dentry() that can handle all
42 * the special cases of the former code.
43 *
44 * With the new dcache, the pathname is stored at each inode, at least as
45 * long as the refcount of the inode is positive. As a side effect, the
46 * size of the dcache depends on the inode cache and thus is dynamic.
47 *
48 * [29-Apr-1998 C. Scott Ananian] Updated above description of symlink
49 * resolution to correspond with current state of the code.
50 *
51 * Note that the symlink resolution is not *completely* iterative.
52 * There is still a significant amount of tail- and mid- recursion in
53 * the algorithm. Also, note that <fs>_readlink() is not used in
54 * lookup_dentry(): lookup_dentry() on the result of <fs>_readlink()
55 * may return different results than <fs>_follow_link(). Many virtual
56 * filesystems (including /proc) exhibit this behavior.
57 */
58
59 /* [24-Feb-97 T. Schoebel-Theuer] Side effects caused by new implementation:
60 * New symlink semantics: when open() is called with flags O_CREAT | O_EXCL
61 * and the name already exists in form of a symlink, try to create the new
62 * name indicated by the symlink. The old code always complained that the
63 * name already exists, due to not following the symlink even if its target
64 * is nonexistent. The new semantics affects also mknod() and link() when
65 * the name is a symlink pointing to a non-existant name.
66 *
67 * I don't know which semantics is the right one, since I have no access
68 * to standards. But I found by trial that HP-UX 9.0 has the full "new"
69 * semantics implemented, while SunOS 4.1.1 and Solaris (SunOS 5.4) have the
70 * "old" one. Personally, I think the new semantics is much more logical.
71 * Note that "ln old new" where "new" is a symlink pointing to a non-existing
72 * file does succeed in both HP-UX and SunOs, but not in Solaris
73 * and in the old Linux semantics.
74 */
75
76 /* [16-Dec-97 Kevin Buhr] For security reasons, we change some symlink
77 * semantics. See the comments in "open_namei" and "do_link" below.
78 *
79 * [10-Sep-98 Alan Modra] Another symlink change.
80 */
81
82 /* [Feb-Apr 2000 AV] Complete rewrite. Rules for symlinks:
83 * inside the path - always follow.
84 * in the last component in creation/removal/renaming - never follow.
85 * if LOOKUP_FOLLOW passed - follow.
86 * if the pathname has trailing slashes - follow.
87 * otherwise - don't follow.
88 * (applied in that order).
89 *
90 * [Jun 2000 AV] Inconsistent behaviour of open() in case if flags==O_CREAT
91 * restored for 2.4. This is the last surviving part of old 4.2BSD bug.
92 * During the 2.4 we need to fix the userland stuff depending on it -
93 * hopefully we will be able to get rid of that wart in 2.5. So far only
94 * XEmacs seems to be relying on it...
95 */
96
97 /* In order to reduce some races, while at the same time doing additional
98 * checking and hopefully speeding things up, we copy filenames to the
99 * kernel data space before using them..
100 *
101 * POSIX.1 2.4: an empty pathname is invalid (ENOENT).
102 * PATH_MAX includes the nul terminator --RR.
103 */
104 static inline int do_getname(const char *filename, char *page)
105 {
106 int retval;
107 unsigned long len = PATH_MAX;
108
109 if ((unsigned long) filename >= TASK_SIZE) {
110 if (!segment_eq(get_fs(), KERNEL_DS))
111 return -EFAULT;
112 } else if (TASK_SIZE - (unsigned long) filename < PATH_MAX)
113 len = TASK_SIZE - (unsigned long) filename;
114
115 retval = strncpy_from_user((char *)page, filename, len);
116 if (retval > 0) {
117 if (retval < len)
118 return 0;
119 return -ENAMETOOLONG;
120 } else if (!retval)
121 retval = -ENOENT;
122 return retval;
123 }
124
125 char * getname(const char * filename)
126 {
127 char *tmp, *result;
128
129 result = ERR_PTR(-ENOMEM);
130 tmp = __getname();
131 if (tmp) {
132 int retval = do_getname(filename, tmp);
133
134 result = tmp;
135 if (retval < 0) {
136 putname(tmp);
137 result = ERR_PTR(retval);
138 }
139 }
140 return result;
141 }
142
143 /*
144 * vfs_permission()
145 *
146 * is used to check for read/write/execute permissions on a file.
147 * We use "fsuid" for this, letting us set arbitrary permissions
148 * for filesystem access without changing the "normal" uids which
149 * are used for other things..
150 */
151 int vfs_permission(struct inode * inode, int mask)
152 {
153 umode_t mode = inode->i_mode;
154
155 if (mask & MAY_WRITE) {
156 /*
157 * Nobody gets write access to a read-only fs.
158 */
159 if (IS_RDONLY(inode) &&
160 (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)))
161 return -EROFS;
162
163 /*
164 * Nobody gets write access to an immutable file.
165 */
166 if (IS_IMMUTABLE(inode))
167 return -EACCES;
168 }
169
170 if (current->fsuid == inode->i_uid)
171 mode >>= 6;
172 else if (in_group_p(inode->i_gid))
173 mode >>= 3;
174
175 /*
176 * If the DACs are ok we don't need any capability check.
177 */
178 if (((mode & mask & (MAY_READ|MAY_WRITE|MAY_EXEC)) == mask))
179 return 0;
180
181 /*
182 * Read/write DACs are always overridable.
183 * Executable DACs are overridable if at least one exec bit is set.
184 */
185 if ((mask & (MAY_READ|MAY_WRITE)) || (inode->i_mode & S_IXUGO))
186 if (capable(CAP_DAC_OVERRIDE))
187 return 0;
188
189 /*
190 * Searching includes executable on directories, else just read.
191 */
192 if (mask == MAY_READ || (S_ISDIR(inode->i_mode) && !(mask & MAY_WRITE)))
193 if (capable(CAP_DAC_READ_SEARCH))
194 return 0;
195
196 return -EACCES;
197 }
198
199 int permission(struct inode * inode,int mask)
200 {
201 if (inode->i_op && inode->i_op->permission) {
202 int retval;
203 lock_kernel();
204 retval = inode->i_op->permission(inode, mask);
205 unlock_kernel();
206 return retval;
207 }
208 return vfs_permission(inode, mask);
209 }
210
211 /*
212 * get_write_access() gets write permission for a file.
213 * put_write_access() releases this write permission.
214 * This is used for regular files.
215 * We cannot support write (and maybe mmap read-write shared) accesses and
216 * MAP_DENYWRITE mmappings simultaneously. The i_writecount field of an inode
217 * can have the following values:
218 * 0: no writers, no VM_DENYWRITE mappings
219 * < 0: (-i_writecount) vm_area_structs with VM_DENYWRITE set exist
220 * > 0: (i_writecount) users are writing to the file.
221 *
222 * Normally we operate on that counter with atomic_{inc,dec} and it's safe
223 * except for the cases where we don't hold i_writecount yet. Then we need to
224 * use {get,deny}_write_access() - these functions check the sign and refuse
225 * to do the change if sign is wrong. Exclusion between them is provided by
226 * spinlock (arbitration_lock) and I'll rip the second arsehole to the first
227 * who will try to move it in struct inode - just leave it here.
228 */
229 static spinlock_t arbitration_lock = SPIN_LOCK_UNLOCKED;
230 int get_write_access(struct inode * inode)
231 {
232 spin_lock(&arbitration_lock);
233 if (atomic_read(&inode->i_writecount) < 0) {
234 spin_unlock(&arbitration_lock);
235 return -ETXTBSY;
236 }
237 atomic_inc(&inode->i_writecount);
238 spin_unlock(&arbitration_lock);
239 return 0;
240 }
241 int deny_write_access(struct file * file)
242 {
243 spin_lock(&arbitration_lock);
244 if (atomic_read(&file->f_dentry->d_inode->i_writecount) > 0) {
245 spin_unlock(&arbitration_lock);
246 return -ETXTBSY;
247 }
248 atomic_dec(&file->f_dentry->d_inode->i_writecount);
249 spin_unlock(&arbitration_lock);
250 return 0;
251 }
252
253 void path_release(struct nameidata *nd)
254 {
255 dput(nd->dentry);
256 mntput(nd->mnt);
257 }
258
259 /*
260 * Internal lookup() using the new generic dcache.
261 * SMP-safe
262 */
263 static struct dentry * cached_lookup(struct dentry * parent, struct qstr * name, int flags)
264 {
265 struct dentry * dentry = d_lookup(parent, name);
266
267 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
268 if (!dentry->d_op->d_revalidate(dentry, flags) && !d_invalidate(dentry)) {
269 dput(dentry);
270 dentry = NULL;
271 }
272 }
273 return dentry;
274 }
275
276 /*
277 * This is called when everything else fails, and we actually have
278 * to go to the low-level filesystem to find out what we should do..
279 *
280 * We get the directory semaphore, and after getting that we also
281 * make sure that nobody added the entry to the dcache in the meantime..
282 * SMP-safe
283 */
284 static struct dentry * real_lookup(struct dentry * parent, struct qstr * name, int flags)
285 {
286 struct dentry * result;
287 struct inode *dir = parent->d_inode;
288
289 down(&dir->i_sem);
290 /*
291 * First re-do the cached lookup just in case it was created
292 * while we waited for the directory semaphore..
293 *
294 * FIXME! This could use version numbering or similar to
295 * avoid unnecessary cache lookups.
296 */
297 result = d_lookup(parent, name);
298 if (!result) {
299 struct dentry * dentry = d_alloc(parent, name);
300 result = ERR_PTR(-ENOMEM);
301 if (dentry) {
302 lock_kernel();
303 result = dir->i_op->lookup(dir, dentry);
304 unlock_kernel();
305 if (result)
306 dput(dentry);
307 else
308 result = dentry;
309 }
310 up(&dir->i_sem);
311 return result;
312 }
313
314 /*
315 * Uhhuh! Nasty case: the cache was re-populated while
316 * we waited on the semaphore. Need to revalidate.
317 */
318 up(&dir->i_sem);
319 if (result->d_op && result->d_op->d_revalidate) {
320 if (!result->d_op->d_revalidate(result, flags) && !d_invalidate(result)) {
321 dput(result);
322 result = ERR_PTR(-ENOENT);
323 }
324 }
325 return result;
326 }
327
328 /*
329 * This limits recursive symlink follows to 8, while
330 * limiting consecutive symlinks to 40.
331 *
332 * Without that kind of total limit, nasty chains of consecutive
333 * symlinks can cause almost arbitrarily long lookups.
334 */
335 static inline int do_follow_link(struct dentry *dentry, struct nameidata *nd)
336 {
337 int err;
338 if (current->link_count >= 5)
339 goto loop;
340 if (current->total_link_count >= 40)
341 goto loop;
342 if (current->need_resched) {
343 current->state = TASK_RUNNING;
344 schedule();
345 }
346 current->link_count++;
347 current->total_link_count++;
348 UPDATE_ATIME(dentry->d_inode);
349 err = dentry->d_inode->i_op->follow_link(dentry, nd);
350 current->link_count--;
351 return err;
352 loop:
353 path_release(nd);
354 return -ELOOP;
355 }
356
357 static inline int __follow_up(struct vfsmount **mnt, struct dentry **base)
358 {
359 struct vfsmount *parent;
360 struct dentry *dentry;
361 spin_lock(&dcache_lock);
362 parent=(*mnt)->mnt_parent;
363 if (parent == *mnt) {
364 spin_unlock(&dcache_lock);
365 return 0;
366 }
367 mntget(parent);
368 dentry=dget((*mnt)->mnt_mountpoint);
369 spin_unlock(&dcache_lock);
370 dput(*base);
371 *base = dentry;
372 mntput(*mnt);
373 *mnt = parent;
374 return 1;
375 }
376
377 int follow_up(struct vfsmount **mnt, struct dentry **dentry)
378 {
379 return __follow_up(mnt, dentry);
380 }
381
382 static inline int __follow_down(struct vfsmount **mnt, struct dentry **dentry)
383 {
384 struct vfsmount *mounted;
385
386 spin_lock(&dcache_lock);
387 mounted = lookup_mnt(*mnt, *dentry);
388 if (mounted) {
389 *mnt = mntget(mounted);
390 spin_unlock(&dcache_lock);
391 dput(*dentry);
392 mntput(mounted->mnt_parent);
393 *dentry = dget(mounted->mnt_root);
394 return 1;
395 }
396 spin_unlock(&dcache_lock);
397 return 0;
398 }
399
400 int follow_down(struct vfsmount **mnt, struct dentry **dentry)
401 {
402 return __follow_down(mnt,dentry);
403 }
404
405 static inline void follow_dotdot(struct nameidata *nd)
406 {
407 while(1) {
408 struct vfsmount *parent;
409 struct dentry *dentry;
410 read_lock(¤t->fs->lock);
411 if (nd->dentry == current->fs->root &&
412 nd->mnt == current->fs->rootmnt) {
413 read_unlock(¤t->fs->lock);
414 break;
415 }
416 read_unlock(¤t->fs->lock);
417 spin_lock(&dcache_lock);
418 if (nd->dentry != nd->mnt->mnt_root) {
419 dentry = dget(nd->dentry->d_parent);
420 spin_unlock(&dcache_lock);
421 dput(nd->dentry);
422 nd->dentry = dentry;
423 break;
424 }
425 parent=nd->mnt->mnt_parent;
426 if (parent == nd->mnt) {
427 spin_unlock(&dcache_lock);
428 break;
429 }
430 mntget(parent);
431 dentry=dget(nd->mnt->mnt_mountpoint);
432 spin_unlock(&dcache_lock);
433 dput(nd->dentry);
434 nd->dentry = dentry;
435 mntput(nd->mnt);
436 nd->mnt = parent;
437 }
438 while (d_mountpoint(nd->dentry) && __follow_down(&nd->mnt, &nd->dentry))
439 ;
440 }
441
442 /*
443 * Name resolution.
444 *
445 * This is the basic name resolution function, turning a pathname
446 * into the final dentry.
447 *
448 * We expect 'base' to be positive and a directory.
449 */
450 int link_path_walk(const char * name, struct nameidata *nd)
451 {
452 struct dentry *dentry;
453 struct inode *inode;
454 int err;
455 unsigned int lookup_flags = nd->flags;
456
457 while (*name=='/')
458 name++;
459 if (!*name)
460 goto return_reval;
461
462 inode = nd->dentry->d_inode;
463 if (current->link_count)
464 lookup_flags = LOOKUP_FOLLOW;
465
466 /* At this point we know we have a real path component. */
467 for(;;) {
468 unsigned long hash;
469 struct qstr this;
470 unsigned int c;
471
472 err = permission(inode, MAY_EXEC);
473 dentry = ERR_PTR(err);
474 if (err)
475 break;
476
477 this.name = name;
478 c = *(const unsigned char *)name;
479
480 hash = init_name_hash();
481 do {
482 name++;
483 hash = partial_name_hash(c, hash);
484 c = *(const unsigned char *)name;
485 } while (c && (c != '/'));
486 this.len = name - (const char *) this.name;
487 this.hash = end_name_hash(hash);
488
489 /* remove trailing slashes? */
490 if (!c)
491 goto last_component;
492 while (*++name == '/');
493 if (!*name)
494 goto last_with_slashes;
495
496 /*
497 * "." and ".." are special - ".." especially so because it has
498 * to be able to know about the current root directory and
499 * parent relationships.
500 */
501 if (this.name[0] == '.') switch (this.len) {
502 default:
503 break;
504 case 2:
505 if (this.name[1] != '.')
506 break;
507 follow_dotdot(nd);
508 inode = nd->dentry->d_inode;
509 /* fallthrough */
510 case 1:
511 continue;
512 }
513 /*
514 * See if the low-level filesystem might want
515 * to use its own hash..
516 */
517 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
518 err = nd->dentry->d_op->d_hash(nd->dentry, &this);
519 if (err < 0)
520 break;
521 }
522 /* This does the actual lookups.. */
523 dentry = cached_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
524 if (!dentry) {
525 dentry = real_lookup(nd->dentry, &this, LOOKUP_CONTINUE);
526 err = PTR_ERR(dentry);
527 if (IS_ERR(dentry))
528 break;
529 }
530 /* Check mountpoints.. */
531 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
532 ;
533
534 err = -ENOENT;
535 inode = dentry->d_inode;
536 if (!inode)
537 goto out_dput;
538 err = -ENOTDIR;
539 if (!inode->i_op)
540 goto out_dput;
541
542 if (inode->i_op->follow_link) {
543 err = do_follow_link(dentry, nd);
544 dput(dentry);
545 if (err)
546 goto return_err;
547 err = -ENOENT;
548 inode = nd->dentry->d_inode;
549 if (!inode)
550 break;
551 err = -ENOTDIR;
552 if (!inode->i_op)
553 break;
554 } else {
555 dput(nd->dentry);
556 nd->dentry = dentry;
557 }
558 err = -ENOTDIR;
559 if (!inode->i_op->lookup)
560 break;
561 continue;
562 /* here ends the main loop */
563
564 last_with_slashes:
565 lookup_flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
566 last_component:
567 if (lookup_flags & LOOKUP_PARENT)
568 goto lookup_parent;
569 if (this.name[0] == '.') switch (this.len) {
570 default:
571 break;
572 case 2:
573 if (this.name[1] != '.')
574 break;
575 follow_dotdot(nd);
576 inode = nd->dentry->d_inode;
577 /* fallthrough */
578 case 1:
579 goto return_reval;
580 }
581 if (nd->dentry->d_op && nd->dentry->d_op->d_hash) {
582 err = nd->dentry->d_op->d_hash(nd->dentry, &this);
583 if (err < 0)
584 break;
585 }
586 dentry = cached_lookup(nd->dentry, &this, 0);
587 if (!dentry) {
588 dentry = real_lookup(nd->dentry, &this, 0);
589 err = PTR_ERR(dentry);
590 if (IS_ERR(dentry))
591 break;
592 }
593 while (d_mountpoint(dentry) && __follow_down(&nd->mnt, &dentry))
594 ;
595 inode = dentry->d_inode;
596 if ((lookup_flags & LOOKUP_FOLLOW)
597 && inode && inode->i_op && inode->i_op->follow_link) {
598 err = do_follow_link(dentry, nd);
599 dput(dentry);
600 if (err)
601 goto return_err;
602 inode = nd->dentry->d_inode;
603 } else {
604 dput(nd->dentry);
605 nd->dentry = dentry;
606 }
607 err = -ENOENT;
608 if (!inode)
609 goto no_inode;
610 if (lookup_flags & LOOKUP_DIRECTORY) {
611 err = -ENOTDIR;
612 if (!inode->i_op || !inode->i_op->lookup)
613 break;
614 }
615 goto return_base;
616 no_inode:
617 err = -ENOENT;
618 if (lookup_flags & (LOOKUP_POSITIVE|LOOKUP_DIRECTORY))
619 break;
620 goto return_base;
621 lookup_parent:
622 nd->last = this;
623 nd->last_type = LAST_NORM;
624 if (this.name[0] != '.')
625 goto return_base;
626 if (this.len == 1)
627 nd->last_type = LAST_DOT;
628 else if (this.len == 2 && this.name[1] == '.')
629 nd->last_type = LAST_DOTDOT;
630 else
631 goto return_base;
632 return_reval:
633 /*
634 * We bypassed the ordinary revalidation routines.
635 * Check the cached dentry for staleness.
636 */
637 dentry = nd->dentry;
638 if (dentry && dentry->d_op && dentry->d_op->d_revalidate) {
639 err = -ESTALE;
640 if (!dentry->d_op->d_revalidate(dentry, 0)) {
641 d_invalidate(dentry);
642 break;
643 }
644 }
645 return_base:
646 return 0;
647 out_dput:
648 dput(dentry);
649 break;
650 }
651 path_release(nd);
652 return_err:
653 return err;
654 }
655
656 int path_walk(const char * name, struct nameidata *nd)
657 {
658 current->total_link_count = 0;
659 return link_path_walk(name, nd);
660 }
661
662 /* SMP-safe */
663 /* returns 1 if everything is done */
664 static int __emul_lookup_dentry(const char *name, struct nameidata *nd)
665 {
666 if (path_walk(name, nd))
667 return 0; /* something went wrong... */
668
669 if (!nd->dentry->d_inode || S_ISDIR(nd->dentry->d_inode->i_mode)) {
670 struct nameidata nd_root;
671 /*
672 * NAME was not found in alternate root or it's a directory. Try to find
673 * it in the normal root:
674 */
675 nd_root.last_type = LAST_ROOT;
676 nd_root.flags = nd->flags;
677 read_lock(¤t->fs->lock);
678 nd_root.mnt = mntget(current->fs->rootmnt);
679 nd_root.dentry = dget(current->fs->root);
680 read_unlock(¤t->fs->lock);
681 if (path_walk(name, &nd_root))
682 return 1;
683 if (nd_root.dentry->d_inode) {
684 path_release(nd);
685 nd->dentry = nd_root.dentry;
686 nd->mnt = nd_root.mnt;
687 nd->last = nd_root.last;
688 return 1;
689 }
690 path_release(&nd_root);
691 }
692 return 1;
693 }
694
695 void set_fs_altroot(void)
696 {
697 char *emul = __emul_prefix();
698 struct nameidata nd;
699 struct vfsmount *mnt = NULL, *oldmnt;
700 struct dentry *dentry = NULL, *olddentry;
701 if (emul) {
702 read_lock(¤t->fs->lock);
703 nd.mnt = mntget(current->fs->rootmnt);
704 nd.dentry = dget(current->fs->root);
705 read_unlock(¤t->fs->lock);
706 nd.flags = LOOKUP_FOLLOW|LOOKUP_DIRECTORY|LOOKUP_POSITIVE;
707 if (path_walk(emul,&nd) == 0) {
708 mnt = nd.mnt;
709 dentry = nd.dentry;
710 }
711 }
712 write_lock(¤t->fs->lock);
713 oldmnt = current->fs->altrootmnt;
714 olddentry = current->fs->altroot;
715 current->fs->altrootmnt = mnt;
716 current->fs->altroot = dentry;
717 write_unlock(¤t->fs->lock);
718 if (olddentry) {
719 dput(olddentry);
720 mntput(oldmnt);
721 }
722 }
723
724 /* SMP-safe */
725 static inline int
726 walk_init_root(const char *name, struct nameidata *nd)
727 {
728 read_lock(¤t->fs->lock);
729 if (current->fs->altroot && !(nd->flags & LOOKUP_NOALT)) {
730 nd->mnt = mntget(current->fs->altrootmnt);
731 nd->dentry = dget(current->fs->altroot);
732 read_unlock(¤t->fs->lock);
733 if (__emul_lookup_dentry(name,nd))
734 return 0;
735 read_lock(¤t->fs->lock);
736 }
737 nd->mnt = mntget(current->fs->rootmnt);
738 nd->dentry = dget(current->fs->root);
739 read_unlock(¤t->fs->lock);
740 return 1;
741 }
742
743 /* SMP-safe */
744 int path_lookup(const char *path, unsigned flags, struct nameidata *nd)
745 {
746 int error = 0;
747 if (path_init(path, flags, nd))
748 error = path_walk(path, nd);
749 return error;
750 }
751
752
753 /* SMP-safe */
754 int path_init(const char *name, unsigned int flags, struct nameidata *nd)
755 {
756 nd->last_type = LAST_ROOT; /* if there are only slashes... */
757 nd->flags = flags;
758 if (*name=='/')
759 return walk_init_root(name,nd);
760 read_lock(¤t->fs->lock);
761 nd->mnt = mntget(current->fs->pwdmnt);
762 nd->dentry = dget(current->fs->pwd);
763 read_unlock(¤t->fs->lock);
764 return 1;
765 }
766
767 /*
768 * Restricted form of lookup. Doesn't follow links, single-component only,
769 * needs parent already locked. Doesn't follow mounts.
770 * SMP-safe.
771 */
772 struct dentry * lookup_hash(struct qstr *name, struct dentry * base)
773 {
774 struct dentry * dentry;
775 struct inode *inode;
776 int err;
777
778 inode = base->d_inode;
779 err = permission(inode, MAY_EXEC);
780 dentry = ERR_PTR(err);
781 if (err)
782 goto out;
783
784 /*
785 * See if the low-level filesystem might want
786 * to use its own hash..
787 */
788 if (base->d_op && base->d_op->d_hash) {
789 err = base->d_op->d_hash(base, name);
790 dentry = ERR_PTR(err);
791 if (err < 0)
792 goto out;
793 }
794
795 dentry = cached_lookup(base, name, 0);
796 if (!dentry) {
797 struct dentry *new = d_alloc(base, name);
798 dentry = ERR_PTR(-ENOMEM);
799 if (!new)
800 goto out;
801 lock_kernel();
802 dentry = inode->i_op->lookup(inode, new);
803 unlock_kernel();
804 if (!dentry)
805 dentry = new;
806 else
807 dput(new);
808 }
809 out:
810 return dentry;
811 }
812
813 /* SMP-safe */
814 struct dentry * lookup_one_len(const char * name, struct dentry * base, int len)
815 {
816 unsigned long hash;
817 struct qstr this;
818 unsigned int c;
819
820 this.name = name;
821 this.len = len;
822 if (!len)
823 goto access;
824
825 hash = init_name_hash();
826 while (len--) {
827 c = *(const unsigned char *)name++;
828 if (c == '/' || c == '\0')
829 goto access;
830 hash = partial_name_hash(c, hash);
831 }
832 this.hash = end_name_hash(hash);
833
834 return lookup_hash(&this, base);
835 access:
836 return ERR_PTR(-EACCES);
837 }
838
839 /*
840 * namei()
841 *
842 * is used by most simple commands to get the inode of a specified name.
843 * Open, link etc use their own routines, but this is enough for things
844 * like 'chmod' etc.
845 *
846 * namei exists in two versions: namei/lnamei. The only difference is
847 * that namei follows links, while lnamei does not.
848 * SMP-safe
849 */
850 int __user_walk(const char *name, unsigned flags, struct nameidata *nd)
851 {
852 char *tmp;
853 int err;
854
855 tmp = getname(name);
856 err = PTR_ERR(tmp);
857 if (!IS_ERR(tmp)) {
858 err = 0;
859 err = path_lookup(tmp, flags, nd);
860 putname(tmp);
861 }
862 return err;
863 }
864
865 /*
866 * It's inline, so penalty for filesystems that don't use sticky bit is
867 * minimal.
868 */
869 static inline int check_sticky(struct inode *dir, struct inode *inode)
870 {
871 if (!(dir->i_mode & S_ISVTX))
872 return 0;
873 if (inode->i_uid == current->fsuid)
874 return 0;
875 if (dir->i_uid == current->fsuid)
876 return 0;
877 return !capable(CAP_FOWNER);
878 }
879
880 /*
881 * Check whether we can remove a link victim from directory dir, check
882 * whether the type of victim is right.
883 * 1. We can't do it if dir is read-only (done in permission())
884 * 2. We should have write and exec permissions on dir
885 * 3. We can't remove anything from append-only dir
886 * 4. We can't do anything with immutable dir (done in permission())
887 * 5. If the sticky bit on dir is set we should either
888 * a. be owner of dir, or
889 * b. be owner of victim, or
890 * c. have CAP_FOWNER capability
891 * 6. If the victim is append-only or immutable we can't do antyhing with
892 * links pointing to it.
893 * 7. If we were asked to remove a directory and victim isn't one - ENOTDIR.
894 * 8. If we were asked to remove a non-directory and victim isn't one - EISDIR.
895 * 9. We can't remove a root or mountpoint.
896 */
897 static inline int may_delete(struct inode *dir,struct dentry *victim, int isdir)
898 {
899 int error;
900 if (!victim->d_inode || victim->d_parent->d_inode != dir)
901 return -ENOENT;
902 error = permission(dir,MAY_WRITE | MAY_EXEC);
903 if (error)
904 return error;
905 if (IS_APPEND(dir))
906 return -EPERM;
907 if (check_sticky(dir, victim->d_inode)||IS_APPEND(victim->d_inode)||
908 IS_IMMUTABLE(victim->d_inode))
909 return -EPERM;
910 if (isdir) {
911 if (!S_ISDIR(victim->d_inode->i_mode))
912 return -ENOTDIR;
913 if (IS_ROOT(victim))
914 return -EBUSY;
915 } else if (S_ISDIR(victim->d_inode->i_mode))
916 return -EISDIR;
917 if (IS_DEADDIR(dir))
918 return -ENOENT;
919 return 0;
920 }
921
922 /* Check whether we can create an object with dentry child in directory
923 * dir.
924 * 1. We can't do it if child already exists (open has special treatment for
925 * this case, but since we are inlined it's OK)
926 * 2. We can't do it if dir is read-only (done in permission())
927 * 3. We should have write and exec permissions on dir
928 * 4. We can't do it if dir is immutable (done in permission())
929 */
930 static inline int may_create(struct inode *dir, struct dentry *child) {
931 if (child->d_inode)
932 return -EEXIST;
933 if (IS_DEADDIR(dir))
934 return -ENOENT;
935 return permission(dir,MAY_WRITE | MAY_EXEC);
936 }
937
938 /*
939 * Special case: O_CREAT|O_EXCL implies O_NOFOLLOW for security
940 * reasons.
941 *
942 * O_DIRECTORY translates into forcing a directory lookup.
943 */
944 static inline int lookup_flags(unsigned int f)
945 {
946 unsigned long retval = LOOKUP_FOLLOW;
947
948 if (f & O_NOFOLLOW)
949 retval &= ~LOOKUP_FOLLOW;
950
951 if ((f & (O_CREAT|O_EXCL)) == (O_CREAT|O_EXCL))
952 retval &= ~LOOKUP_FOLLOW;
953
954 if (f & O_DIRECTORY)
955 retval |= LOOKUP_DIRECTORY;
956
957 return retval;
958 }
959
960 int vfs_create(struct inode *dir, struct dentry *dentry, int mode)
961 {
962 int error;
963
964 mode &= S_IALLUGO;
965 mode |= S_IFREG;
966
967 down(&dir->i_zombie);
968 error = may_create(dir, dentry);
969 if (error)
970 goto exit_lock;
971
972 error = -EACCES; /* shouldn't it be ENOSYS? */
973 if (!dir->i_op || !dir->i_op->create)
974 goto exit_lock;
975
976 DQUOT_INIT(dir);
977 lock_kernel();
978 error = dir->i_op->create(dir, dentry, mode);
979 unlock_kernel();
980 exit_lock:
981 up(&dir->i_zombie);
982 if (!error)
983 inode_dir_notify(dir, DN_CREATE);
984 return error;
985 }
986
987 /*
988 * open_namei()
989 *
990 * namei for open - this is in fact almost the whole open-routine.
991 *
992 * Note that the low bits of "flag" aren't the same as in the open
993 * system call - they are 00 - no permissions needed
994 * 01 - read permission needed
995 * 10 - write permission needed
996 * 11 - read/write permissions needed
997 * which is a lot more logical, and also allows the "no perm" needed
998 * for symlinks (where the permissions are checked later).
999 * SMP-safe
1000 */
1001 int open_namei(const char * pathname, int flag, int mode, struct nameidata *nd)
1002 {
1003 int acc_mode, error = 0;
1004 struct inode *inode;
1005 struct dentry *dentry;
1006 struct dentry *dir;
1007 int count = 0;
1008
1009 acc_mode = ACC_MODE(flag);
1010
1011 /*
1012 * The simplest case - just a plain lookup.
1013 */
1014 if (!(flag & O_CREAT)) {
1015 error = path_lookup(pathname, lookup_flags(flag), nd);
1016 if (error)
1017 return error;
1018 dentry = nd->dentry;
1019 goto ok;
1020 }
1021
1022 /*
1023 * Create - we need to know the parent.
1024 */
1025 error = path_lookup(pathname, LOOKUP_PARENT, nd);
1026 if (error)
1027 return error;
1028
1029 /*
1030 * We have the parent and last component. First of all, check
1031 * that we are not asked to creat(2) an obvious directory - that
1032 * will not do.
1033 */
1034 error = -EISDIR;
1035 if (nd->last_type != LAST_NORM || nd->last.name[nd->last.len])
1036 goto exit;
1037
1038 dir = nd->dentry;
1039 down(&dir->d_inode->i_sem);
1040 dentry = lookup_hash(&nd->last, nd->dentry);
1041
1042 do_last:
1043 error = PTR_ERR(dentry);
1044 if (IS_ERR(dentry)) {
1045 up(&dir->d_inode->i_sem);
1046 goto exit;
1047 }
1048
1049 /* Negative dentry, just create the file */
1050 if (!dentry->d_inode) {
1051 error = vfs_create(dir->d_inode, dentry,
1052 mode & ~current->fs->umask);
1053 up(&dir->d_inode->i_sem);
1054 dput(nd->dentry);
1055 nd->dentry = dentry;
1056 if (error)
1057 goto exit;
1058 /* Don't check for write permission, don't truncate */
1059 acc_mode = 0;
1060 flag &= ~O_TRUNC;
1061 goto ok;
1062 }
1063
1064 /*
1065 * It already exists.
1066 */
1067 up(&dir->d_inode->i_sem);
1068
1069 error = -EEXIST;
1070 if (flag & O_EXCL)
1071 goto exit_dput;
1072
1073 if (d_mountpoint(dentry)) {
1074 error = -ELOOP;
1075 if (flag & O_NOFOLLOW)
1076 goto exit_dput;
1077 while (__follow_down(&nd->mnt,&dentry) && d_mountpoint(dentry));
1078 }
1079 error = -ENOENT;
1080 if (!dentry->d_inode)
1081 goto exit_dput;
1082 if (dentry->d_inode->i_op && dentry->d_inode->i_op->follow_link)
1083 goto do_link;
1084
1085 dput(nd->dentry);
1086 nd->dentry = dentry;
1087 error = -EISDIR;
1088 if (dentry->d_inode && S_ISDIR(dentry->d_inode->i_mode))
1089 goto exit;
1090 ok:
1091 error = -ENOENT;
1092 inode = dentry->d_inode;
1093 if (!inode)
1094 goto exit;
1095
1096 error = -ELOOP;
1097 if (S_ISLNK(inode->i_mode))
1098 goto exit;
1099
1100 error = -EISDIR;
1101 if (S_ISDIR(inode->i_mode) && (flag & FMODE_WRITE))
1102 goto exit;
1103
1104 error = permission(inode,acc_mode);
1105 if (error)
1106 goto exit;
1107
1108 /*
1109 * FIFO's, sockets and device files are special: they don't
1110 * actually live on the filesystem itself, and as such you
1111 * can write to them even if the filesystem is read-only.
1112 */
1113 if (S_ISFIFO(inode->i_mode) || S_ISSOCK(inode->i_mode)) {
1114 flag &= ~O_TRUNC;
1115 } else if (S_ISBLK(inode->i_mode) || S_ISCHR(inode->i_mode)) {
1116 error = -EACCES;
1117 if (nd->mnt->mnt_flags & MNT_NODEV)
1118 goto exit;
1119
1120 flag &= ~O_TRUNC;
1121 } else {
1122 error = -EROFS;
1123 if (IS_RDONLY(inode) && (flag & 2))
1124 goto exit;
1125 }
1126 /*
1127 * An append-only file must be opened in append mode for writing.
1128 */
1129 error = -EPERM;
1130 if (IS_APPEND(inode)) {
1131 if ((flag & FMODE_WRITE) && !(flag & O_APPEND))
1132 goto exit;
1133 if (flag & O_TRUNC)
1134 goto exit;
1135 }
1136
1137 /*
1138 * Ensure there are no outstanding leases on the file.
1139 */
1140 error = get_lease(inode, flag);
1141 if (error)
1142 goto exit;
1143
1144 if (flag & O_TRUNC) {
1145 error = get_write_access(inode);
1146 if (error)
1147 goto exit;
1148
1149 /*
1150 * Refuse to truncate files with mandatory locks held on them.
1151 */
1152 error = locks_verify_locked(inode);
1153 if (!error) {
1154 DQUOT_INIT(inode);
1155
1156 error = do_truncate(dentry, 0);
1157 }
1158 put_write_access(inode);
1159 if (error)
1160 goto exit;
1161 } else
1162 if (flag & FMODE_WRITE)
1163 DQUOT_INIT(inode);
1164
1165 return 0;
1166
1167 exit_dput:
1168 dput(dentry);
1169 exit:
1170 path_release(nd);
1171 return error;
1172
1173 do_link:
1174 error = -ELOOP;
1175 if (flag & O_NOFOLLOW)
1176 goto exit_dput;
1177 /*
1178 * This is subtle. Instead of calling do_follow_link() we do the
1179 * thing by hands. The reason is that this way we have zero link_count
1180 * and path_walk() (called from ->follow_link) honoring LOOKUP_PARENT.
1181 * After that we have the parent and last component, i.e.
1182 * we are in the same situation as after the first path_walk().
1183 * Well, almost - if the last component is normal we get its copy
1184 * stored in nd->last.name and we will have to putname() it when we
1185 * are done. Procfs-like symlinks just set LAST_BIND.
1186 */
1187 UPDATE_ATIME(dentry->d_inode);
1188 error = dentry->d_inode->i_op->follow_link(dentry, nd);
1189 dput(dentry);
1190 if (error)
1191 return error;
1192 if (nd->last_type == LAST_BIND) {
1193 dentry = nd->dentry;
1194 goto ok;
1195 }
1196 error = -EISDIR;
1197 if (nd->last_type != LAST_NORM)
1198 goto exit;
1199 if (nd->last.name[nd->last.len]) {
1200 putname(nd->last.name);
1201 goto exit;
1202 }
1203 error = -ELOOP;
1204 if (count++==32) {
1205 putname(nd->last.name);
1206 goto exit;
1207 }
1208 dir = nd->dentry;
1209 down(&dir->d_inode->i_sem);
1210 dentry = lookup_hash(&nd->last, nd->dentry);
1211 putname(nd->last.name);
1212 goto do_last;
1213 }
1214
1215 /* SMP-safe */
1216 static struct dentry *lookup_create(struct nameidata *nd, int is_dir)
1217 {
1218 struct dentry *dentry;
1219
1220 down(&nd->dentry->d_inode->i_sem);
1221 dentry = ERR_PTR(-EEXIST);
1222 if (nd->last_type != LAST_NORM)
1223 goto fail;
1224 dentry = lookup_hash(&nd->last, nd->dentry);
1225 if (IS_ERR(dentry))
1226 goto fail;
1227 if (!is_dir && nd->last.name[nd->last.len] && !dentry->d_inode)
1228 goto enoent;
1229 return dentry;
1230 enoent:
1231 dput(dentry);
1232 dentry = ERR_PTR(-ENOENT);
1233 fail:
1234 return dentry;
1235 }
1236
1237 int vfs_mknod(struct inode *dir, struct dentry *dentry, int mode, dev_t dev)
1238 {
1239 int error = -EPERM;
1240
1241 down(&dir->i_zombie);
1242 if ((S_ISCHR(mode) || S_ISBLK(mode)) && !capable(CAP_MKNOD))
1243 goto exit_lock;
1244
1245 error = may_create(dir, dentry);
1246 if (error)
1247 goto exit_lock;
1248
1249 error = -EPERM;
1250 if (!dir->i_op || !dir->i_op->mknod)
1251 goto exit_lock;
1252
1253 DQUOT_INIT(dir);
1254 lock_kernel();
1255 error = dir->i_op->mknod(dir, dentry, mode, dev);
1256 unlock_kernel();
1257 exit_lock:
1258 up(&dir->i_zombie);
1259 if (!error)
1260 inode_dir_notify(dir, DN_CREATE);
1261 return error;
1262 }
1263
1264 asmlinkage long sys_mknod(const char * filename, int mode, dev_t dev)
1265 {
1266 int error = 0;
1267 char * tmp;
1268 struct dentry * dentry;
1269 struct nameidata nd;
1270
1271 if (S_ISDIR(mode))
1272 return -EPERM;
1273 tmp = getname(filename);
1274 if (IS_ERR(tmp))
1275 return PTR_ERR(tmp);
1276
1277 error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1278 if (error)
1279 goto out;
1280 dentry = lookup_create(&nd, 0);
1281 error = PTR_ERR(dentry);
1282
1283 mode &= ~current->fs->umask;
1284 if (!IS_ERR(dentry)) {
1285 switch (mode & S_IFMT) {
1286 case 0: case S_IFREG:
1287 error = vfs_create(nd.dentry->d_inode,dentry,mode);
1288 break;
1289 case S_IFCHR: case S_IFBLK: case S_IFIFO: case S_IFSOCK:
1290 error = vfs_mknod(nd.dentry->d_inode,dentry,mode,dev);
1291 break;
1292 case S_IFDIR:
1293 error = -EPERM;
1294 break;
1295 default:
1296 error = -EINVAL;
1297 }
1298 dput(dentry);
1299 }
1300 up(&nd.dentry->d_inode->i_sem);
1301 path_release(&nd);
1302 out:
1303 putname(tmp);
1304
1305 return error;
1306 }
1307
1308 int vfs_mkdir(struct inode *dir, struct dentry *dentry, int mode)
1309 {
1310 int error;
1311
1312 down(&dir->i_zombie);
1313 error = may_create(dir, dentry);
1314 if (error)
1315 goto exit_lock;
1316
1317 error = -EPERM;
1318 if (!dir->i_op || !dir->i_op->mkdir)
1319 goto exit_lock;
1320
1321 DQUOT_INIT(dir);
1322 mode &= (S_IRWXUGO|S_ISVTX);
1323 lock_kernel();
1324 error = dir->i_op->mkdir(dir, dentry, mode);
1325 unlock_kernel();
1326
1327 exit_lock:
1328 up(&dir->i_zombie);
1329 if (!error)
1330 inode_dir_notify(dir, DN_CREATE);
1331 return error;
1332 }
1333
1334 asmlinkage long sys_mkdir(const char * pathname, int mode)
1335 {
1336 int error = 0;
1337 char * tmp;
1338
1339 tmp = getname(pathname);
1340 error = PTR_ERR(tmp);
1341 if (!IS_ERR(tmp)) {
1342 struct dentry *dentry;
1343 struct nameidata nd;
1344
1345 error = path_lookup(tmp, LOOKUP_PARENT, &nd);
1346 if (error)
1347 goto out;
1348 dentry = lookup_create(&nd, 1);
1349 error = PTR_ERR(dentry);
1350 if (!IS_ERR(dentry)) {
1351 error = vfs_mkdir(nd.dentry->d_inode, dentry,
1352 mode & ~current->fs->umask);
1353 dput(dentry);
1354 }
1355 up(&nd.dentry->d_inode->i_sem);
1356 path_release(&nd);
1357 out:
1358 putname(tmp);
1359 }
1360
1361 return error;
1362 }
1363
1364 /*
1365 * We try to drop the dentry early: we should have
1366 * a usage count of 2 if we're the only user of this
1367 * dentry, and if that is true (possibly after pruning
1368 * the dcache), then we drop the dentry now.
1369 *
1370 * A low-level filesystem can, if it choses, legally
1371 * do a
1372 *
1373 * if (!d_unhashed(dentry))
1374 * return -EBUSY;
1375 *
1376 * if it cannot handle the case of removing a directory
1377 * that is still in use by something else..
1378 */
1379 static void d_unhash(struct dentry *dentry)
1380 {
1381 dget(dentry);
1382 spin_lock(&dcache_lock);
1383 switch (atomic_read(&dentry->d_count)) {
1384 default:
1385 spin_unlock(&dcache_lock);
1386 shrink_dcache_parent(dentry);
1387 spin_lock(&dcache_lock);
1388 if (atomic_read(&dentry->d_count) != 2)
1389 break;
1390 case 2:
1391 list_del_init(&dentry->d_hash);
1392 }
1393 spin_unlock(&dcache_lock);
1394 }
1395
1396 int vfs_rmdir(struct inode *dir, struct dentry *dentry)
1397 {
1398 int error;
1399
1400 error = may_delete(dir, dentry, 1);
1401 if (error)
1402 return error;
1403
1404 if (!dir->i_op || !dir->i_op->rmdir)
1405 return -EPERM;
1406
1407 DQUOT_INIT(dir);
1408
1409 double_down(&dir->i_zombie, &dentry->d_inode->i_zombie);
1410 d_unhash(dentry);
1411 if (d_mountpoint(dentry))
1412 error = -EBUSY;
1413 else {
1414 lock_kernel();
1415 error = dir->i_op->rmdir(dir, dentry);
1416 unlock_kernel();
1417 if (!error)
1418 dentry->d_inode->i_flags |= S_DEAD;
1419 }
1420 double_up(&dir->i_zombie, &dentry->d_inode->i_zombie);
1421 if (!error) {
1422 inode_dir_notify(dir, DN_DELETE);
1423 d_delete(dentry);
1424 }
1425 dput(dentry);
1426
1427 return error;
1428 }
1429
1430 asmlinkage long sys_rmdir(const char * pathname)
1431 {
1432 int error = 0;
1433 char * name;
1434 struct dentry *dentry;
1435 struct nameidata nd;
1436
1437 name = getname(pathname);
1438 if(IS_ERR(name))
1439 return PTR_ERR(name);
1440
1441 error = path_lookup(name, LOOKUP_PARENT, &nd);
1442 if (error)
1443 goto exit;
1444
1445 switch(nd.last_type) {
1446 case LAST_DOTDOT:
1447 error = -ENOTEMPTY;
1448 goto exit1;
1449 case LAST_DOT:
1450 error = -EINVAL;
1451 goto exit1;
1452 case LAST_ROOT:
1453 error = -EBUSY;
1454 goto exit1;
1455 }
1456 down(&nd.dentry->d_inode->i_sem);
1457 dentry = lookup_hash(&nd.last, nd.dentry);
1458 error = PTR_ERR(dentry);
1459 if (!IS_ERR(dentry)) {
1460 error = vfs_rmdir(nd.dentry->d_inode, dentry);
1461 dput(dentry);
1462 }
1463 up(&nd.dentry->d_inode->i_sem);
1464 exit1:
1465 path_release(&nd);
1466 exit:
1467 putname(name);
1468 return error;
1469 }
1470
1471 int vfs_unlink(struct inode *dir, struct dentry *dentry)
1472 {
1473 int error;
1474
1475 down(&dir->i_zombie);
1476 error = may_delete(dir, dentry, 0);
1477 if (!error) {
1478 error = -EPERM;
1479 if (dir->i_op && dir->i_op->unlink) {
1480 DQUOT_INIT(dir);
1481 if (d_mountpoint(dentry))
1482 error = -EBUSY;
1483 else {
1484 lock_kernel();
1485 error = dir->i_op->unlink(dir, dentry);
1486 unlock_kernel();
1487 if (!error)
1488 d_delete(dentry);
1489 }
1490 }
1491 }
1492 up(&dir->i_zombie);
1493 if (!error)
1494 inode_dir_notify(dir, DN_DELETE);
1495 return error;
1496 }
1497
1498 asmlinkage long sys_unlink(const char * pathname)
1499 {
1500 int error = 0;
1501 char * name;
1502 struct dentry *dentry;
1503 struct nameidata nd;
1504
1505 name = getname(pathname);
1506 if(IS_ERR(name))
1507 return PTR_ERR(name);
1508
1509 error = path_lookup(name, LOOKUP_PARENT, &nd);
1510 if (error)
1511 goto exit;
1512 error = -EISDIR;
1513 if (nd.last_type != LAST_NORM)
1514 goto exit1;
1515 down(&nd.dentry->d_inode->i_sem);
1516 dentry = lookup_hash(&nd.last, nd.dentry);
1517 error = PTR_ERR(dentry);
1518 if (!IS_ERR(dentry)) {
1519 /* Why not before? Because we want correct error value */
1520 if (nd.last.name[nd.last.len])
1521 goto slashes;
1522 error = vfs_unlink(nd.dentry->d_inode, dentry);
1523 exit2:
1524 dput(dentry);
1525 }
1526 up(&nd.dentry->d_inode->i_sem);
1527 exit1:
1528 path_release(&nd);
1529 exit:
1530 putname(name);
1531
1532 return error;
1533
1534 slashes:
1535 error = !dentry->d_inode ? -ENOENT :
1536 S_ISDIR(dentry->d_inode->i_mode) ? -EISDIR : -ENOTDIR;
1537 goto exit2;
1538 }
1539
1540 int vfs_symlink(struct inode *dir, struct dentry *dentry, const char *oldname)
1541 {
1542 int error;
1543
1544 down(&dir->i_zombie);
1545 error = may_create(dir, dentry);
1546 if (error)
1547 goto exit_lock;
1548
1549 error = -EPERM;
1550 if (!dir->i_op || !dir->i_op->symlink)
1551 goto exit_lock;
1552
1553 DQUOT_INIT(dir);
1554 lock_kernel();
1555 error = dir->i_op->symlink(dir, dentry, oldname);
1556 unlock_kernel();
1557
1558 exit_lock:
1559 up(&dir->i_zombie);
1560 if (!error)
1561 inode_dir_notify(dir, DN_CREATE);
1562 return error;
1563 }
1564
1565 asmlinkage long sys_symlink(const char * oldname, const char * newname)
1566 {
1567 int error = 0;
1568 char * from;
1569 char * to;
1570
1571 from = getname(oldname);
1572 if(IS_ERR(from))
1573 return PTR_ERR(from);
1574 to = getname(newname);
1575 error = PTR_ERR(to);
1576 if (!IS_ERR(to)) {
1577 struct dentry *dentry;
1578 struct nameidata nd;
1579
1580 error = path_lookup(to, LOOKUP_PARENT, &nd);
1581 if (error)
1582 goto out;
1583 dentry = lookup_create(&nd, 0);
1584 error = PTR_ERR(dentry);
1585 if (!IS_ERR(dentry)) {
1586 error = vfs_symlink(nd.dentry->d_inode, dentry, from);
1587 dput(dentry);
1588 }
1589 up(&nd.dentry->d_inode->i_sem);
1590 path_release(&nd);
1591 out:
1592 putname(to);
1593 }
1594 putname(from);
1595 return error;
1596 }
1597
1598 int vfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *new_dentry)
1599 {
1600 struct inode *inode;
1601 int error;
1602
1603 down(&dir->i_zombie);
1604 error = -ENOENT;
1605 inode = old_dentry->d_inode;
1606 if (!inode)
1607 goto exit_lock;
1608
1609 error = may_create(dir, new_dentry);
1610 if (error)
1611 goto exit_lock;
1612
1613 error = -EXDEV;
1614 if (dir->i_dev != inode->i_dev)
1615 goto exit_lock;
1616
1617 /*
1618 * A link to an append-only or immutable file cannot be created.
1619 */
1620 error = -EPERM;
1621 if (IS_APPEND(inode) || IS_IMMUTABLE(inode))
1622 goto exit_lock;
1623 if (!dir->i_op || !dir->i_op->link)
1624 goto exit_lock;
1625
1626 DQUOT_INIT(dir);
1627 lock_kernel();
1628 error = dir->i_op->link(old_dentry, dir, new_dentry);
1629 unlock_kernel();
1630
1631 exit_lock:
1632 up(&dir->i_zombie);
1633 if (!error)
1634 inode_dir_notify(dir, DN_CREATE);
1635 return error;
1636 }
1637
1638 /*
1639 * Hardlinks are often used in delicate situations. We avoid
1640 * security-related surprises by not following symlinks on the
1641 * newname. --KAB
1642 *
1643 * We don't follow them on the oldname either to be compatible
1644 * with linux 2.0, and to avoid hard-linking to directories
1645 * and other special files. --ADM
1646 */
1647 asmlinkage long sys_link(const char * oldname, const char * newname)
1648 {
1649 int error;
1650 char * to;
1651
1652 to = getname(newname);
1653 error = PTR_ERR(to);
1654 if (!IS_ERR(to)) {
1655 struct dentry *new_dentry;
1656 struct nameidata nd, old_nd;
1657
1658 error = __user_walk(oldname, LOOKUP_POSITIVE, &old_nd);
1659 if (error)
1660 goto exit;
1661 error = path_lookup(to, LOOKUP_PARENT, &nd);
1662 if (error)
1663 goto out;
1664 error = -EXDEV;
1665 if (old_nd.mnt != nd.mnt)
1666 goto out_release;
1667 new_dentry = lookup_create(&nd, 0);
1668 error = PTR_ERR(new_dentry);
1669 if (!IS_ERR(new_dentry)) {
1670 error = vfs_link(old_nd.dentry, nd.dentry->d_inode, new_dentry);
1671 dput(new_dentry);
1672 }
1673 up(&nd.dentry->d_inode->i_sem);
1674 out_release:
1675 path_release(&nd);
1676 out:
1677 path_release(&old_nd);
1678 exit:
1679 putname(to);
1680 }
1681 return error;
1682 }
1683
1684 /*
1685 * The worst of all namespace operations - renaming directory. "Perverted"
1686 * doesn't even start to describe it. Somebody in UCB had a heck of a trip...
1687 * Problems:
1688 * a) we can get into loop creation. Check is done in is_subdir().
1689 * b) race potential - two innocent renames can create a loop together.
1690 * That's where 4.4 screws up. Current fix: serialization on
1691 * sb->s_vfs_rename_sem. We might be more accurate, but that's another
1692 * story.
1693 * c) we have to lock _three_ objects - parents and victim (if it exists).
1694 * And that - after we got ->i_sem on parents (until then we don't know
1695 * whether the target exists at all, let alone whether it is a directory
1696 * or not). Solution: ->i_zombie. Taken only after ->i_sem. Always taken
1697 * on link creation/removal of any kind. And taken (without ->i_sem) on
1698 * directory that will be removed (both in rmdir() and here).
1699 * d) some filesystems don't support opened-but-unlinked directories,
1700 * either because of layout or because they are not ready to deal with
1701 * all cases correctly. The latter will be fixed (taking this sort of
1702 * stuff into VFS), but the former is not going away. Solution: the same
1703 * trick as in rmdir().
1704 * e) conversion from fhandle to dentry may come in the wrong moment - when
1705 * we are removing the target. Solution: we will have to grab ->i_zombie
1706 * in the fhandle_to_dentry code. [FIXME - current nfsfh.c relies on
1707 * ->i_sem on parents, which works but leads to some truely excessive
1708 * locking].
1709 */
1710 int vfs_rename_dir(struct inode *old_dir, struct dentry *old_dentry,
1711 struct inode *new_dir, struct dentry *new_dentry)
1712 {
1713 int error;
1714 struct inode *target;
1715
1716 if (old_dentry->d_inode == new_dentry->d_inode)
1717 return 0;
1718
1719 error = may_delete(old_dir, old_dentry, 1);
1720 if (error)
1721 return error;
1722
1723 if (new_dir->i_dev != old_dir->i_dev)
1724 return -EXDEV;
1725
1726 if (!new_dentry->d_inode)
1727 error = may_create(new_dir, new_dentry);
1728 else
1729 error = may_delete(new_dir, new_dentry, 1);
1730 if (error)
1731 return error;
1732
1733 if (!old_dir->i_op || !old_dir->i_op->rename)
1734 return -EPERM;
1735
1736 /*
1737 * If we are going to change the parent - check write permissions,
1738 * we'll need to flip '..'.
1739 */
1740 if (new_dir != old_dir) {
1741 error = permission(old_dentry->d_inode, MAY_WRITE);
1742 }
1743 if (error)
1744 return error;
1745
1746 DQUOT_INIT(old_dir);
1747 DQUOT_INIT(new_dir);
1748 down(&old_dir->i_sb->s_vfs_rename_sem);
1749 error = -EINVAL;
1750 if (is_subdir(new_dentry, old_dentry))
1751 goto out_unlock;
1752 /* Don't eat your daddy, dear... */
1753 /* This also avoids locking issues */
1754 if (old_dentry->d_parent == new_dentry)
1755 goto out_unlock;
1756 target = new_dentry->d_inode;
1757 if (target) { /* Hastur! Hastur! Hastur! */
1758 triple_down(&old_dir->i_zombie,
1759 &new_dir->i_zombie,
1760 &target->i_zombie);
1761 d_unhash(new_dentry);
1762 } else
1763 double_down(&old_dir->i_zombie,
1764 &new_dir->i_zombie);
1765 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1766 error = -EBUSY;
1767 else
1768 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1769 if (target) {
1770 if (!error)
1771 target->i_flags |= S_DEAD;
1772 triple_up(&old_dir->i_zombie,
1773 &new_dir->i_zombie,
1774 &target->i_zombie);
1775 if (d_unhashed(new_dentry))
1776 d_rehash(new_dentry);
1777 dput(new_dentry);
1778 } else
1779 double_up(&old_dir->i_zombie,
1780 &new_dir->i_zombie);
1781
1782 if (!error)
1783 d_move(old_dentry,new_dentry);
1784 out_unlock:
1785 up(&old_dir->i_sb->s_vfs_rename_sem);
1786 return error;
1787 }
1788
1789 int vfs_rename_other(struct inode *old_dir, struct dentry *old_dentry,
1790 struct inode *new_dir, struct dentry *new_dentry)
1791 {
1792 int error;
1793
1794 if (old_dentry->d_inode == new_dentry->d_inode)
1795 return 0;
1796
1797 error = may_delete(old_dir, old_dentry, 0);
1798 if (error)
1799 return error;
1800
1801 if (new_dir->i_dev != old_dir->i_dev)
1802 return -EXDEV;
1803
1804 if (!new_dentry->d_inode)
1805 error = may_create(new_dir, new_dentry);
1806 else
1807 error = may_delete(new_dir, new_dentry, 0);
1808 if (error)
1809 return error;
1810
1811 if (!old_dir->i_op || !old_dir->i_op->rename)
1812 return -EPERM;
1813
1814 DQUOT_INIT(old_dir);
1815 DQUOT_INIT(new_dir);
1816 double_down(&old_dir->i_zombie, &new_dir->i_zombie);
1817 if (d_mountpoint(old_dentry)||d_mountpoint(new_dentry))
1818 error = -EBUSY;
1819 else
1820 error = old_dir->i_op->rename(old_dir, old_dentry, new_dir, new_dentry);
1821 double_up(&old_dir->i_zombie, &new_dir->i_zombie);
1822 if (error)
1823 return error;
1824 /* The following d_move() should become unconditional */
1825 if (!(old_dir->i_sb->s_type->fs_flags & FS_ODD_RENAME)) {
1826 d_move(old_dentry, new_dentry);
1827 }
1828 return 0;
1829 }
1830
1831 int vfs_rename(struct inode *old_dir, struct dentry *old_dentry,
1832 struct inode *new_dir, struct dentry *new_dentry)
1833 {
1834 int error;
1835 if (S_ISDIR(old_dentry->d_inode->i_mode))
1836 error = vfs_rename_dir(old_dir,old_dentry,new_dir,new_dentry);
1837 else
1838 error = vfs_rename_other(old_dir,old_dentry,new_dir,new_dentry);
1839 if (!error) {
1840 if (old_dir == new_dir)
1841 inode_dir_notify(old_dir, DN_RENAME);
1842 else {
1843 inode_dir_notify(old_dir, DN_DELETE);
1844 inode_dir_notify(new_dir, DN_CREATE);
1845 }
1846 }
1847 return error;
1848 }
1849
1850 static inline int do_rename(const char * oldname, const char * newname)
1851 {
1852 int error = 0;
1853 struct dentry * old_dir, * new_dir;
1854 struct dentry * old_dentry, *new_dentry;
1855 struct nameidata oldnd, newnd;
1856
1857 error = path_lookup(oldname, LOOKUP_PARENT, &oldnd);
1858 if (error)
1859 goto exit;
1860
1861 error = path_lookup(newname, LOOKUP_PARENT, &newnd);
1862 if (error)
1863 goto exit1;
1864
1865 error = -EXDEV;
1866 if (oldnd.mnt != newnd.mnt)
1867 goto exit2;
1868
1869 old_dir = oldnd.dentry;
1870 error = -EBUSY;
1871 if (oldnd.last_type != LAST_NORM)
1872 goto exit2;
1873
1874 new_dir = newnd.dentry;
1875 if (newnd.last_type != LAST_NORM)
1876 goto exit2;
1877
1878 double_lock(new_dir, old_dir);
1879
1880 old_dentry = lookup_hash(&oldnd.last, old_dir);
1881 error = PTR_ERR(old_dentry);
1882 if (IS_ERR(old_dentry))
1883 goto exit3;
1884 /* source must exist */
1885 error = -ENOENT;
1886 if (!old_dentry->d_inode)
1887 goto exit4;
1888 /* unless the source is a directory trailing slashes give -ENOTDIR */
1889 if (!S_ISDIR(old_dentry->d_inode->i_mode)) {
1890 error = -ENOTDIR;
1891 if (oldnd.last.name[oldnd.last.len])
1892 goto exit4;
1893 if (newnd.last.name[newnd.last.len])
1894 goto exit4;
1895 }
1896 new_dentry = lookup_hash(&newnd.last, new_dir);
1897 error = PTR_ERR(new_dentry);
1898 if (IS_ERR(new_dentry))
1899 goto exit4;
1900
1901 lock_kernel();
1902 error = vfs_rename(old_dir->d_inode, old_dentry,
1903 new_dir->d_inode, new_dentry);
1904 unlock_kernel();
1905
1906 dput(new_dentry);
1907 exit4:
1908 dput(old_dentry);
1909 exit3:
1910 double_up(&new_dir->d_inode->i_sem, &old_dir->d_inode->i_sem);
1911 exit2:
1912 path_release(&newnd);
1913 exit1:
1914 path_release(&oldnd);
1915 exit:
1916 return error;
1917 }
1918
1919 asmlinkage long sys_rename(const char * oldname, const char * newname)
1920 {
1921 int error;
1922 char * from;
1923 char * to;
1924
1925 from = getname(oldname);
1926 if(IS_ERR(from))
1927 return PTR_ERR(from);
1928 to = getname(newname);
1929 error = PTR_ERR(to);
1930 if (!IS_ERR(to)) {
1931 error = do_rename(from,to);
1932 putname(to);
1933 }
1934 putname(from);
1935 return error;
1936 }
1937
1938 int vfs_readlink(struct dentry *dentry, char *buffer, int buflen, const char *link)
1939 {
1940 int len;
1941
1942 len = PTR_ERR(link);
1943 if (IS_ERR(link))
1944 goto out;
1945
1946 len = strlen(link);
1947 if (len > (unsigned) buflen)
1948 len = buflen;
1949 if (copy_to_user(buffer, link, len))
1950 len = -EFAULT;
1951 out:
1952 return len;
1953 }
1954
1955 static inline int
1956 __vfs_follow_link(struct nameidata *nd, const char *link)
1957 {
1958 int res = 0;
1959 char *name;
1960 if (IS_ERR(link))
1961 goto fail;
1962
1963 if (*link == '/') {
1964 path_release(nd);
1965 if (!walk_init_root(link, nd))
1966 /* weird __emul_prefix() stuff did it */
1967 goto out;
1968 }
1969 res = link_path_walk(link, nd);
1970 out:
1971 if (current->link_count || res || nd->last_type!=LAST_NORM)
1972 return res;
1973 /*
1974 * If it is an iterative symlinks resolution in open_namei() we
1975 * have to copy the last component. And all that crap because of
1976 * bloody create() on broken symlinks. Furrfu...
1977 */
1978 name = __getname();
1979 if (!name) {
1980 path_release(nd);
1981 return -ENOMEM;
1982 }
1983 strcpy(name, nd->last.name);
1984 nd->last.name = name;
1985 return 0;
1986 fail:
1987 path_release(nd);
1988 return PTR_ERR(link);
1989 }
1990
1991 int vfs_follow_link(struct nameidata *nd, const char *link)
1992 {
1993 return __vfs_follow_link(nd, link);
1994 }
1995
1996 /* get the link contents into pagecache */
1997 static char *page_getlink(struct dentry * dentry, struct page **ppage)
1998 {
1999 struct page * page;
2000 struct address_space *mapping = dentry->d_inode->i_mapping;
2001 page = read_cache_page(mapping, 0, (filler_t *)mapping->a_ops->readpage,
2002 NULL);
2003 if (IS_ERR(page))
2004 goto sync_fail;
2005 wait_on_page(page);
2006 if (!Page_Uptodate(page))
2007 goto async_fail;
2008 *ppage = page;
2009 return kmap(page);
2010
2011 async_fail:
2012 page_cache_release(page);
2013 return ERR_PTR(-EIO);
2014
2015 sync_fail:
2016 return (char*)page;
2017 }
2018
2019 int page_readlink(struct dentry *dentry, char *buffer, int buflen)
2020 {
2021 struct page *page = NULL;
2022 char *s = page_getlink(dentry, &page);
2023 int res = vfs_readlink(dentry,buffer,buflen,s);
2024 if (page) {
2025 kunmap(page);
2026 page_cache_release(page);
2027 }
2028 return res;
2029 }
2030
2031 int page_follow_link(struct dentry *dentry, struct nameidata *nd)
2032 {
2033 struct page *page = NULL;
2034 char *s = page_getlink(dentry, &page);
2035 int res = __vfs_follow_link(nd, s);
2036 if (page) {
2037 kunmap(page);
2038 page_cache_release(page);
2039 }
2040 return res;
2041 }
2042
2043 struct inode_operations page_symlink_inode_operations = {
2044 readlink: page_readlink,
2045 follow_link: page_follow_link,
2046 };
Cache object: 75461ca36c2b7bad16c7ae42df86c7e2
|