1 /*
2 * Copyright (c) 2007-2011 The DragonFly Project. All rights reserved.
3 *
4 * This code is derived from software contributed to The DragonFly Project
5 * by Matthew Dillon <dillon@backplane.com>
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 *
11 * 1. Redistributions of source code must retain the above copyright
12 * notice, this list of conditions and the following disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in
15 * the documentation and/or other materials provided with the
16 * distribution.
17 * 3. Neither the name of The DragonFly Project nor the names of its
18 * contributors may be used to endorse or promote products derived
19 * from this software without specific, prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
23 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS
24 * FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE
25 * COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT,
26 * INCIDENTAL, SPECIAL, EXEMPLARY OR CONSEQUENTIAL DAMAGES (INCLUDING,
27 * BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
28 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED
29 * AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
30 * OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT
31 * OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 */
34 /*
35 * HAMMER structural locking
36 */
37
38 #include "hammer.h"
39 #include <sys/dirent.h>
40
41 void
42 hammer_lock_ex_ident(struct hammer_lock *lock, const char *ident)
43 {
44 thread_t td = curthread;
45 u_int lv;
46 u_int nlv;
47
48 KKASSERT(lock->refs);
49 for (;;) {
50 lv = lock->lockval;
51
52 if (lv == 0) {
53 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
54 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
55 lock->lowner = td;
56 break;
57 }
58 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
59 lock->lowner == td) {
60 nlv = (lv + 1);
61 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
62 break;
63 } else {
64 if (hammer_debug_locks) {
65 kprintf("hammer_lock_ex: held by %p\n",
66 lock->lowner);
67 }
68 nlv = lv | HAMMER_LOCKF_WANTED;
69 ++hammer_contention_count;
70 tsleep_interlock(&lock->lockval, 0);
71 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
72 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
73 if (hammer_debug_locks)
74 kprintf("hammer_lock_ex: try again\n");
75 }
76 }
77 }
78 }
79
80 /*
81 * Try to obtain an exclusive lock
82 */
83 int
84 hammer_lock_ex_try(struct hammer_lock *lock)
85 {
86 thread_t td = curthread;
87 int error;
88 u_int lv;
89 u_int nlv;
90
91 KKASSERT(lock->refs);
92 for (;;) {
93 lv = lock->lockval;
94
95 if (lv == 0) {
96 nlv = 1 | HAMMER_LOCKF_EXCLUSIVE;
97 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
98 lock->lowner = td;
99 error = 0;
100 break;
101 }
102 } else if ((lv & HAMMER_LOCKF_EXCLUSIVE) &&
103 lock->lowner == td) {
104 nlv = (lv + 1);
105 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
106 error = 0;
107 break;
108 }
109 } else {
110 error = EAGAIN;
111 break;
112 }
113 }
114 return (error);
115 }
116
117 /*
118 * Obtain a shared lock
119 *
120 * We do not give pending exclusive locks priority over shared locks as
121 * doing so could lead to a deadlock.
122 */
123 void
124 hammer_lock_sh(struct hammer_lock *lock)
125 {
126 thread_t td = curthread;
127 u_int lv;
128 u_int nlv;
129 const char *ident = "hmrlck";
130
131 KKASSERT(lock->refs);
132 for (;;) {
133 lv = lock->lockval;
134
135 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
136 nlv = (lv + 1);
137 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
138 break;
139 } else if (lock->lowner == td) {
140 /*
141 * Disallowed case, drop into kernel debugger for
142 * now. A cont continues w/ an exclusive lock.
143 */
144 nlv = (lv + 1);
145 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
146 if (hammer_debug_critical)
147 Debugger("hammer_lock_sh: holding ex");
148 break;
149 }
150 } else {
151 nlv = lv | HAMMER_LOCKF_WANTED;
152 ++hammer_contention_count;
153 tsleep_interlock(&lock->lockval, 0);
154 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
155 tsleep(&lock->lockval, PINTERLOCKED, ident, 0);
156 }
157 }
158 }
159
160 int
161 hammer_lock_sh_try(struct hammer_lock *lock)
162 {
163 thread_t td = curthread;
164 u_int lv;
165 u_int nlv;
166 int error;
167
168 KKASSERT(lock->refs);
169 for (;;) {
170 lv = lock->lockval;
171
172 if ((lv & HAMMER_LOCKF_EXCLUSIVE) == 0) {
173 nlv = (lv + 1);
174 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
175 error = 0;
176 break;
177 }
178 } else if (lock->lowner == td) {
179 /*
180 * Disallowed case, drop into kernel debugger for
181 * now. A cont continues w/ an exclusive lock.
182 */
183 nlv = (lv + 1);
184 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
185 if (hammer_debug_critical)
186 Debugger("hammer_lock_sh: holding ex");
187 error = 0;
188 break;
189 }
190 } else {
191 error = EAGAIN;
192 break;
193 }
194 }
195 return (error);
196 }
197
198 /*
199 * Upgrade a shared lock to an exclusively held lock. This function will
200 * return EDEADLK If there is more then one shared holder.
201 *
202 * No error occurs and no action is taken if the lock is already exclusively
203 * held by the caller. If the lock is not held at all or held exclusively
204 * by someone else, this function will panic.
205 */
206 int
207 hammer_lock_upgrade(struct hammer_lock *lock, int shcount)
208 {
209 thread_t td = curthread;
210 u_int lv;
211 u_int nlv;
212 int error;
213
214 for (;;) {
215 lv = lock->lockval;
216
217 if ((lv & ~HAMMER_LOCKF_WANTED) == shcount) {
218 nlv = lv | HAMMER_LOCKF_EXCLUSIVE;
219 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
220 lock->lowner = td;
221 error = 0;
222 break;
223 }
224 } else if (lv & HAMMER_LOCKF_EXCLUSIVE) {
225 if (lock->lowner != curthread)
226 panic("hammer_lock_upgrade: illegal state");
227 error = 0;
228 break;
229 } else if ((lv & ~HAMMER_LOCKF_WANTED) == 0) {
230 panic("hammer_lock_upgrade: lock is not held");
231 /* NOT REACHED */
232 error = EDEADLK;
233 break;
234 } else {
235 error = EDEADLK;
236 break;
237 }
238 }
239 return (error);
240 }
241
242 /*
243 * Downgrade an exclusively held lock to a shared lock.
244 */
245 void
246 hammer_lock_downgrade(struct hammer_lock *lock, int shcount)
247 {
248 thread_t td __debugvar = curthread;
249 u_int lv;
250 u_int nlv;
251
252 KKASSERT((lock->lockval & ~HAMMER_LOCKF_WANTED) ==
253 (HAMMER_LOCKF_EXCLUSIVE | shcount));
254 KKASSERT(lock->lowner == td);
255
256 /*
257 * NOTE: Must clear owner before releasing exclusivity
258 */
259 lock->lowner = NULL;
260
261 for (;;) {
262 lv = lock->lockval;
263 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
264 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
265 if (lv & HAMMER_LOCKF_WANTED)
266 wakeup(&lock->lockval);
267 break;
268 }
269 }
270 }
271
272 void
273 hammer_unlock(struct hammer_lock *lock)
274 {
275 thread_t td __debugvar = curthread;
276 u_int lv;
277 u_int nlv;
278
279 lv = lock->lockval;
280 KKASSERT(lv != 0);
281 if (lv & HAMMER_LOCKF_EXCLUSIVE)
282 KKASSERT(lock->lowner == td);
283
284 for (;;) {
285 lv = lock->lockval;
286 nlv = lv & ~(HAMMER_LOCKF_EXCLUSIVE | HAMMER_LOCKF_WANTED);
287 if (nlv > 1) {
288 nlv = lv - 1;
289 if (atomic_cmpset_int(&lock->lockval, lv, nlv))
290 break;
291 } else if (nlv == 1) {
292 nlv = 0;
293 if (lv & HAMMER_LOCKF_EXCLUSIVE)
294 lock->lowner = NULL;
295 if (atomic_cmpset_int(&lock->lockval, lv, nlv)) {
296 if (lv & HAMMER_LOCKF_WANTED)
297 wakeup(&lock->lockval);
298 break;
299 }
300 } else {
301 panic("hammer_unlock: lock %p is not held", lock);
302 }
303 }
304 }
305
306 /*
307 * The calling thread must be holding a shared or exclusive lock.
308 * Returns < 0 if lock is held shared, and > 0 if held exlusively.
309 */
310 int
311 hammer_lock_status(struct hammer_lock *lock)
312 {
313 u_int lv = lock->lockval;
314
315 if (lv & HAMMER_LOCKF_EXCLUSIVE)
316 return(1);
317 else if (lv)
318 return(-1);
319 panic("hammer_lock_status: lock must be held: %p", lock);
320 }
321
322 /*
323 * Bump the ref count for a lock (not the excl/share count, but a separate
324 * structural reference count). The CHECK flag will be set on a 0->1
325 * transition.
326 *
327 * This function does nothing to serialize races between multple threads.
328 * The caller can interlock it later on to deal with serialization.
329 *
330 * MPSAFE
331 */
332 void
333 hammer_ref(struct hammer_lock *lock)
334 {
335 u_int lv;
336 u_int nlv;
337
338 for (;;) {
339 lv = lock->refs;
340 if ((lv & ~HAMMER_REFS_FLAGS) == 0) {
341 nlv = (lv + 1) | HAMMER_REFS_CHECK;
342 if (atomic_cmpset_int(&lock->refs, lv, nlv))
343 return;
344 } else {
345 nlv = (lv + 1);
346 KKASSERT((int)nlv > 0);
347 if (atomic_cmpset_int(&lock->refs, lv, nlv))
348 return;
349 }
350 }
351 /* not reached */
352 }
353
354 /*
355 * Drop the ref count for a lock (not the excl/share count, but a separate
356 * structural reference count). The CHECK flag will be cleared on a 1->0
357 * transition.
358 *
359 * This function does nothing to serialize races between multple threads.
360 *
361 * MPSAFE
362 */
363 void
364 hammer_rel(struct hammer_lock *lock)
365 {
366 u_int lv;
367 u_int nlv;
368
369 for (;;) {
370 lv = lock->refs;
371 if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
372 nlv = (lv - 1) & ~HAMMER_REFS_CHECK;
373 if (atomic_cmpset_int(&lock->refs, lv, nlv))
374 return;
375 } else {
376 KKASSERT((int)lv > 0);
377 nlv = (lv - 1);
378 if (atomic_cmpset_int(&lock->refs, lv, nlv))
379 return;
380 }
381 }
382 /* not reached */
383 }
384
385 /*
386 * The hammer_*_interlock() and hammer_*_interlock_done() functions are
387 * more sophisticated versions which handle MP transition races and block
388 * when necessary.
389 *
390 * hammer_ref_interlock() bumps the ref-count and conditionally acquires
391 * the interlock for 0->1 transitions or if the CHECK is found to be set.
392 *
393 * This case will return TRUE, the interlock will be held, and the CHECK
394 * bit also set. Other threads attempting to ref will see the CHECK bit
395 * and block until we clean up.
396 *
397 * FALSE is returned for transitions other than 0->1 when the CHECK bit
398 * is not found to be set, or if the function loses the race with another
399 * thread.
400 *
401 * TRUE is only returned to one thread and the others will block.
402 * Effectively a TRUE indicator means 'someone transitioned 0->1
403 * and you are the first guy to successfully lock it after that, so you
404 * need to check'. Due to races the ref-count may be greater than 1 upon
405 * return.
406 *
407 * MPSAFE
408 */
409 int
410 hammer_ref_interlock(struct hammer_lock *lock)
411 {
412 u_int lv;
413 u_int nlv;
414
415 /*
416 * Integrated reference count bump, lock, and check, with hot-path.
417 *
418 * (a) Return 1 (+LOCKED, +CHECK) 0->1 transition
419 * (b) Return 0 (-LOCKED, -CHECK) N->N+1 transition
420 * (c) Break out (+CHECK) Check condition and Cannot lock
421 * (d) Return 1 (+LOCKED, +CHECK) Successfully locked
422 */
423 for (;;) {
424 lv = lock->refs;
425 if (lv == 0) {
426 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
427 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
428 lock->rowner = curthread;
429 return(1);
430 }
431 } else {
432 nlv = (lv + 1);
433 if ((lv & ~HAMMER_REFS_FLAGS) == 0)
434 nlv |= HAMMER_REFS_CHECK;
435 if ((nlv & HAMMER_REFS_CHECK) == 0) {
436 if (atomic_cmpset_int(&lock->refs, lv, nlv))
437 return(0);
438 } else if (lv & HAMMER_REFS_LOCKED) {
439 /* CHECK also set here */
440 if (atomic_cmpset_int(&lock->refs, lv, nlv))
441 break;
442 } else {
443 /* CHECK also set here */
444 nlv |= HAMMER_REFS_LOCKED;
445 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
446 lock->rowner = curthread;
447 return(1);
448 }
449 }
450 }
451 }
452
453 /*
454 * Defered check condition because we were unable to acquire the
455 * lock. We must block until the check condition is cleared due
456 * to a race with another thread, or we are able to acquire the
457 * lock.
458 *
459 * (a) Return 0 (-CHECK) Another thread handled it
460 * (b) Return 1 (+LOCKED, +CHECK) We handled it.
461 */
462 for (;;) {
463 lv = lock->refs;
464 if ((lv & HAMMER_REFS_CHECK) == 0)
465 return(0);
466 if (lv & HAMMER_REFS_LOCKED) {
467 tsleep_interlock(&lock->refs, 0);
468 nlv = (lv | HAMMER_REFS_WANTED);
469 if (atomic_cmpset_int(&lock->refs, lv, nlv))
470 tsleep(&lock->refs, PINTERLOCKED, "h1lk", 0);
471 } else {
472 /* CHECK also set here */
473 nlv = lv | HAMMER_REFS_LOCKED;
474 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
475 lock->rowner = curthread;
476 return(1);
477 }
478 }
479 }
480 /* not reached */
481 }
482
483 /*
484 * This is the same as hammer_ref_interlock() but asserts that the
485 * 0->1 transition is always true, thus the lock must have no references
486 * on entry or have CHECK set, and will have one reference with the
487 * interlock held on return. It must also not be interlocked on entry
488 * by anyone.
489 *
490 * NOTE that CHECK will never be found set when the ref-count is 0.
491 *
492 * TRUE is always returned to match the API for hammer_ref_interlock().
493 * This function returns with one ref, the lock held, and the CHECK bit set.
494 */
495 int
496 hammer_ref_interlock_true(struct hammer_lock *lock)
497 {
498 u_int lv;
499 u_int nlv;
500
501 for (;;) {
502 lv = lock->refs;
503
504 if (lv) {
505 panic("hammer_ref_interlock_true: bad lock %p %08x",
506 lock, lock->refs);
507 }
508 nlv = 1 | HAMMER_REFS_LOCKED | HAMMER_REFS_CHECK;
509 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
510 lock->rowner = curthread;
511 return (1);
512 }
513 }
514 }
515
516 /*
517 * Unlock the interlock acquired by hammer_ref_interlock() and clear the
518 * CHECK flag. The ref-count remains unchanged.
519 *
520 * This routine is called in the load path when the load succeeds.
521 */
522 void
523 hammer_ref_interlock_done(struct hammer_lock *lock)
524 {
525 u_int lv;
526 u_int nlv;
527
528 for (;;) {
529 lv = lock->refs;
530 nlv = lv & ~HAMMER_REFS_FLAGS;
531 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
532 if (lv & HAMMER_REFS_WANTED)
533 wakeup(&lock->refs);
534 break;
535 }
536 }
537 }
538
539 /*
540 * hammer_rel_interlock() works a bit differently in that it must
541 * acquire the lock in tandem with a 1->0 transition. CHECK is
542 * not used.
543 *
544 * TRUE is returned on 1->0 transitions with the lock held on return
545 * and FALSE is returned otherwise with the lock not held.
546 *
547 * It is important to note that the refs are not stable and may
548 * increase while we hold the lock, the TRUE indication only means
549 * that we transitioned 1->0, not necessarily that we stayed at 0.
550 *
551 * Another thread bumping refs while we hold the lock will set CHECK,
552 * causing one of the competing hammer_ref_interlock() calls to
553 * return TRUE after we release our lock.
554 *
555 * MPSAFE
556 */
557 int
558 hammer_rel_interlock(struct hammer_lock *lock, int locked)
559 {
560 u_int lv;
561 u_int nlv;
562
563 /*
564 * In locked mode (failure/unload path) we release the
565 * ref-count but leave it locked.
566 */
567 if (locked) {
568 hammer_rel(lock);
569 return(1);
570 }
571
572 /*
573 * Integrated reference count drop with LOCKED, plus the hot-path
574 * returns.
575 */
576 for (;;) {
577 lv = lock->refs;
578
579 if (lv == 1) {
580 nlv = 0 | HAMMER_REFS_LOCKED;
581 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
582 lock->rowner = curthread;
583 return(1);
584 }
585 } else if ((lv & ~HAMMER_REFS_FLAGS) == 1) {
586 if ((lv & HAMMER_REFS_LOCKED) == 0) {
587 nlv = (lv - 1) | HAMMER_REFS_LOCKED;
588 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
589 lock->rowner = curthread;
590 return(1);
591 }
592 } else {
593 nlv = lv | HAMMER_REFS_WANTED;
594 tsleep_interlock(&lock->refs, 0);
595 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
596 tsleep(&lock->refs, PINTERLOCKED,
597 "h0lk", 0);
598 }
599 }
600 } else {
601 nlv = (lv - 1);
602 KKASSERT((int)nlv >= 0);
603 if (atomic_cmpset_int(&lock->refs, lv, nlv))
604 return(0);
605 }
606 }
607 /* not reached */
608 }
609
610 /*
611 * Unlock the interlock acquired by hammer_rel_interlock().
612 *
613 * If orig_locked is non-zero the interlock was originally held prior to
614 * the hammer_rel_interlock() call and passed through to us. In this
615 * case we want to retain the CHECK error state if not transitioning
616 * to 0.
617 *
618 * The code is the same either way so we do not have to conditionalize
619 * on orig_locked.
620 */
621 void
622 hammer_rel_interlock_done(struct hammer_lock *lock, int orig_locked __unused)
623 {
624 u_int lv;
625 u_int nlv;
626
627 for (;;) {
628 lv = lock->refs;
629 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
630 if ((lv & ~HAMMER_REFS_FLAGS) == 0)
631 nlv &= ~HAMMER_REFS_CHECK;
632 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
633 if (lv & HAMMER_REFS_WANTED)
634 wakeup(&lock->refs);
635 break;
636 }
637 }
638 }
639
640 /*
641 * Acquire the interlock on lock->refs.
642 *
643 * Return TRUE if CHECK is currently set. Note that CHECK will not
644 * be set if the reference count is 0, but can get set if this function
645 * is preceeded by, say, hammer_ref(), or through races with other
646 * threads. The return value allows the caller to use the same logic
647 * as hammer_ref_interlock().
648 *
649 * MPSAFE
650 */
651 int
652 hammer_get_interlock(struct hammer_lock *lock)
653 {
654 u_int lv;
655 u_int nlv;
656
657 for (;;) {
658 lv = lock->refs;
659 if (lv & HAMMER_REFS_LOCKED) {
660 nlv = lv | HAMMER_REFS_WANTED;
661 tsleep_interlock(&lock->refs, 0);
662 if (atomic_cmpset_int(&lock->refs, lv, nlv))
663 tsleep(&lock->refs, PINTERLOCKED, "hilk", 0);
664 } else {
665 nlv = (lv | HAMMER_REFS_LOCKED);
666 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
667 lock->rowner = curthread;
668 return((lv & HAMMER_REFS_CHECK) ? 1 : 0);
669 }
670 }
671 }
672 }
673
674 /*
675 * Attempt to acquire the interlock and expect 0 refs. Used by the buffer
676 * cache callback code to disassociate or lock the bufs related to HAMMER
677 * structures.
678 *
679 * During teardown the related bp will be acquired by hammer_io_release()
680 * which interocks our test.
681 *
682 * Returns non-zero on success, zero on failure.
683 */
684 int
685 hammer_try_interlock_norefs(struct hammer_lock *lock)
686 {
687 u_int lv;
688 u_int nlv;
689
690 for (;;) {
691 lv = lock->refs;
692 if (lv == 0) {
693 nlv = lv | HAMMER_REFS_LOCKED;
694 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
695 lock->rowner = curthread;
696 return(1);
697 }
698 } else {
699 return(0);
700 }
701 }
702 /* not reached */
703 }
704
705 /*
706 * Release the interlock on lock->refs. This function will set
707 * CHECK if the refs is non-zero and error is non-zero, and clear
708 * CHECK otherwise.
709 *
710 * MPSAFE
711 */
712 void
713 hammer_put_interlock(struct hammer_lock *lock, int error)
714 {
715 u_int lv;
716 u_int nlv;
717
718 for (;;) {
719 lv = lock->refs;
720 KKASSERT(lv & HAMMER_REFS_LOCKED);
721 nlv = lv & ~(HAMMER_REFS_LOCKED | HAMMER_REFS_WANTED);
722
723 if ((nlv & ~HAMMER_REFS_FLAGS) == 0 || error == 0)
724 nlv &= ~HAMMER_REFS_CHECK;
725 else
726 nlv |= HAMMER_REFS_CHECK;
727
728 if (atomic_cmpset_int(&lock->refs, lv, nlv)) {
729 if (lv & HAMMER_REFS_WANTED)
730 wakeup(&lock->refs);
731 return;
732 }
733 }
734 }
735
736 /*
737 * The sync_lock must be held when doing any modifying operations on
738 * meta-data. It does not have to be held when modifying non-meta-data buffers
739 * (backend or frontend).
740 *
741 * The flusher holds the lock exclusively while all other consumers hold it
742 * shared. All modifying operations made while holding the lock are atomic
743 * in that they will be made part of the same flush group.
744 *
745 * Due to the atomicy requirement deadlock recovery code CANNOT release the
746 * sync lock, nor can we give pending exclusive sync locks priority over
747 * a shared sync lock as this could lead to a 3-way deadlock.
748 */
749 void
750 hammer_sync_lock_ex(hammer_transaction_t trans)
751 {
752 ++trans->sync_lock_refs;
753 hammer_lock_ex(&trans->hmp->sync_lock);
754 }
755
756 void
757 hammer_sync_lock_sh(hammer_transaction_t trans)
758 {
759 ++trans->sync_lock_refs;
760 hammer_lock_sh(&trans->hmp->sync_lock);
761 }
762
763 int
764 hammer_sync_lock_sh_try(hammer_transaction_t trans)
765 {
766 int error;
767
768 ++trans->sync_lock_refs;
769 if ((error = hammer_lock_sh_try(&trans->hmp->sync_lock)) != 0)
770 --trans->sync_lock_refs;
771 return (error);
772 }
773
774 void
775 hammer_sync_unlock(hammer_transaction_t trans)
776 {
777 --trans->sync_lock_refs;
778 hammer_unlock(&trans->hmp->sync_lock);
779 }
780
781 /*
782 * Misc
783 */
784 u_int32_t
785 hammer_to_unix_xid(uuid_t *uuid)
786 {
787 return(*(u_int32_t *)&uuid->node[2]);
788 }
789
790 void
791 hammer_guid_to_uuid(uuid_t *uuid, u_int32_t guid)
792 {
793 bzero(uuid, sizeof(*uuid));
794 *(u_int32_t *)&uuid->node[2] = guid;
795 }
796
797 void
798 hammer_time_to_timespec(u_int64_t xtime, struct timespec *ts)
799 {
800 ts->tv_sec = (unsigned long)(xtime / 1000000);
801 ts->tv_nsec = (unsigned int)(xtime % 1000000) * 1000L;
802 }
803
804 u_int64_t
805 hammer_timespec_to_time(struct timespec *ts)
806 {
807 u_int64_t xtime;
808
809 xtime = (unsigned)(ts->tv_nsec / 1000) +
810 (unsigned long)ts->tv_sec * 1000000ULL;
811 return(xtime);
812 }
813
814
815 /*
816 * Convert a HAMMER filesystem object type to a vnode type
817 */
818 enum vtype
819 hammer_get_vnode_type(u_int8_t obj_type)
820 {
821 switch(obj_type) {
822 case HAMMER_OBJTYPE_DIRECTORY:
823 return(VDIR);
824 case HAMMER_OBJTYPE_REGFILE:
825 return(VREG);
826 case HAMMER_OBJTYPE_DBFILE:
827 return(VDATABASE);
828 case HAMMER_OBJTYPE_FIFO:
829 return(VFIFO);
830 case HAMMER_OBJTYPE_SOCKET:
831 return(VSOCK);
832 case HAMMER_OBJTYPE_CDEV:
833 return(VCHR);
834 case HAMMER_OBJTYPE_BDEV:
835 return(VBLK);
836 case HAMMER_OBJTYPE_SOFTLINK:
837 return(VLNK);
838 default:
839 return(VBAD);
840 }
841 /* not reached */
842 }
843
844 int
845 hammer_get_dtype(u_int8_t obj_type)
846 {
847 switch(obj_type) {
848 case HAMMER_OBJTYPE_DIRECTORY:
849 return(DT_DIR);
850 case HAMMER_OBJTYPE_REGFILE:
851 return(DT_REG);
852 case HAMMER_OBJTYPE_DBFILE:
853 return(DT_DBF);
854 case HAMMER_OBJTYPE_FIFO:
855 return(DT_FIFO);
856 case HAMMER_OBJTYPE_SOCKET:
857 return(DT_SOCK);
858 case HAMMER_OBJTYPE_CDEV:
859 return(DT_CHR);
860 case HAMMER_OBJTYPE_BDEV:
861 return(DT_BLK);
862 case HAMMER_OBJTYPE_SOFTLINK:
863 return(DT_LNK);
864 default:
865 return(DT_UNKNOWN);
866 }
867 /* not reached */
868 }
869
870 u_int8_t
871 hammer_get_obj_type(enum vtype vtype)
872 {
873 switch(vtype) {
874 case VDIR:
875 return(HAMMER_OBJTYPE_DIRECTORY);
876 case VREG:
877 return(HAMMER_OBJTYPE_REGFILE);
878 case VDATABASE:
879 return(HAMMER_OBJTYPE_DBFILE);
880 case VFIFO:
881 return(HAMMER_OBJTYPE_FIFO);
882 case VSOCK:
883 return(HAMMER_OBJTYPE_SOCKET);
884 case VCHR:
885 return(HAMMER_OBJTYPE_CDEV);
886 case VBLK:
887 return(HAMMER_OBJTYPE_BDEV);
888 case VLNK:
889 return(HAMMER_OBJTYPE_SOFTLINK);
890 default:
891 return(HAMMER_OBJTYPE_UNKNOWN);
892 }
893 /* not reached */
894 }
895
896 /*
897 * Return flags for hammer_delete_at_cursor()
898 */
899 int
900 hammer_nohistory(hammer_inode_t ip)
901 {
902 if (ip->hmp->hflags & HMNT_NOHISTORY)
903 return(HAMMER_DELETE_DESTROY);
904 if (ip->ino_data.uflags & (SF_NOHISTORY|UF_NOHISTORY))
905 return(HAMMER_DELETE_DESTROY);
906 return(0);
907 }
908
909 /*
910 * ALGORITHM VERSION 0:
911 * Return a namekey hash. The 64 bit namekey hash consists of a 32 bit
912 * crc in the MSB and 0 in the LSB. The caller will use the low 32 bits
913 * to generate a unique key and will scan all entries with the same upper
914 * 32 bits when issuing a lookup.
915 *
916 * 0hhhhhhhhhhhhhhh hhhhhhhhhhhhhhhh 0000000000000000 0000000000000000
917 *
918 * ALGORITHM VERSION 1:
919 *
920 * This algorithm breaks the filename down into a separate 32-bit crcs
921 * for each filename segment separated by a special character (dot,
922 * underscore, underline, or tilde). The CRCs are then added together.
923 * This allows temporary names. A full-filename 16 bit crc is also
924 * generated to deal with degenerate conditions.
925 *
926 * The algorithm is designed to handle create/rename situations such
927 * that a create with an extention to a rename without an extention
928 * only shifts the key space rather than randomizes it.
929 *
930 * NOTE: The inode allocator cache can only match 10 bits so we do
931 * not really have any room for a partial sorted name, and
932 * numbers don't sort well in that situation anyway.
933 *
934 * 0mmmmmmmmmmmmmmm mmmmmmmmmmmmmmmm llllllllllllllll 0000000000000000
935 *
936 *
937 * We strip bit 63 in order to provide a positive key, this way a seek
938 * offset of 0 will represent the base of the directory.
939 *
940 * We usually strip bit 0 (set it to 0) in order to provide a consistent
941 * iteration space for collisions.
942 *
943 * This function can never return 0. We use the MSB-0 space to synthesize
944 * artificial directory entries such as "." and "..".
945 */
946 int64_t
947 hammer_directory_namekey(hammer_inode_t dip, const void *name, int len,
948 u_int32_t *max_iterationsp)
949 {
950 const char *aname = name;
951 int32_t crcx;
952 int64_t key;
953 int i;
954 int j;
955
956 switch (dip->ino_data.cap_flags & HAMMER_INODE_CAP_DIRHASH_MASK) {
957 case HAMMER_INODE_CAP_DIRHASH_ALG0:
958 /*
959 * Original algorithm
960 */
961 key = (int64_t)(crc32(aname, len) & 0x7FFFFFFF) << 32;
962 if (key == 0)
963 key |= 0x100000000LL;
964 *max_iterationsp = 0xFFFFFFFFU;
965 break;
966 case HAMMER_INODE_CAP_DIRHASH_ALG1:
967 /*
968 * Filesystem version 6 or better will create directories
969 * using the ALG1 dirhash. This hash breaks the filename
970 * up into domains separated by special characters and
971 * hashes each domain independently.
972 *
973 * We also do a simple sub-sort using the first character
974 * of the filename in the top 5-bits.
975 */
976 key = 0;
977
978 /*
979 * m32
980 */
981 crcx = 0;
982 for (i = j = 0; i < len; ++i) {
983 if (aname[i] == '.' ||
984 aname[i] == '-' ||
985 aname[i] == '_' ||
986 aname[i] == '~') {
987 if (i != j)
988 crcx += crc32(aname + j, i - j);
989 j = i + 1;
990 }
991 }
992 if (i != j)
993 crcx += crc32(aname + j, i - j);
994
995 #if 0
996 /*
997 * xor top 5 bits 0mmmm into low bits and steal the top 5
998 * bits as a semi sub sort using the first character of
999 * the filename. bit 63 is always left as 0 so directory
1000 * keys are positive numbers.
1001 */
1002 crcx ^= (uint32_t)crcx >> (32 - 5);
1003 crcx = (crcx & 0x07FFFFFF) | ((aname[0] & 0x0F) << (32 - 5));
1004 #endif
1005 crcx &= 0x7FFFFFFFU;
1006
1007 key |= (uint64_t)crcx << 32;
1008
1009 /*
1010 * l16 - crc of entire filename
1011 *
1012 * This crc reduces degenerate hash collision conditions
1013 */
1014 crcx = crc32(aname, len);
1015 crcx = crcx ^ (crcx << 16);
1016 key |= crcx & 0xFFFF0000U;
1017
1018 /*
1019 * Cleanup
1020 */
1021 if ((key & 0xFFFFFFFF00000000LL) == 0)
1022 key |= 0x100000000LL;
1023 if (hammer_debug_general & 0x0400) {
1024 kprintf("namekey2: 0x%016llx %*.*s\n",
1025 (long long)key, len, len, aname);
1026 }
1027 *max_iterationsp = 0x00FFFFFF;
1028 break;
1029 case HAMMER_INODE_CAP_DIRHASH_ALG2:
1030 case HAMMER_INODE_CAP_DIRHASH_ALG3:
1031 default:
1032 key = 0; /* compiler warning */
1033 *max_iterationsp = 1; /* sanity */
1034 panic("hammer_directory_namekey: bad algorithm %p", dip);
1035 break;
1036 }
1037 return(key);
1038 }
1039
1040 /*
1041 * Convert string after @@ (@@ not included) to TID. Returns 0 on success,
1042 * EINVAL on failure.
1043 *
1044 * If this function fails *ispfs, *tidp, and *localizationp will not
1045 * be modified.
1046 */
1047 int
1048 hammer_str_to_tid(const char *str, int *ispfsp,
1049 hammer_tid_t *tidp, u_int32_t *localizationp)
1050 {
1051 hammer_tid_t tid;
1052 u_int32_t localization;
1053 char *ptr;
1054 int ispfs;
1055 int n;
1056
1057 /*
1058 * Forms allowed for TID: "0x%016llx"
1059 * "-1"
1060 */
1061 tid = strtouq(str, &ptr, 0);
1062 n = ptr - str;
1063 if (n == 2 && str[0] == '-' && str[1] == '1') {
1064 /* ok */
1065 } else if (n == 18 && str[0] == '' && (str[1] | 0x20) == 'x') {
1066 /* ok */
1067 } else {
1068 return(EINVAL);
1069 }
1070
1071 /*
1072 * Forms allowed for PFS: ":%05d" (i.e. "...:0" would be illegal).
1073 */
1074 str = ptr;
1075 if (*str == ':') {
1076 localization = strtoul(str + 1, &ptr, 10) << 16;
1077 if (ptr - str != 6)
1078 return(EINVAL);
1079 str = ptr;
1080 ispfs = 1;
1081 } else {
1082 localization = *localizationp;
1083 ispfs = 0;
1084 }
1085
1086 /*
1087 * Any trailing junk invalidates special extension handling.
1088 */
1089 if (*str)
1090 return(EINVAL);
1091 *tidp = tid;
1092 *localizationp = localization;
1093 *ispfsp = ispfs;
1094 return(0);
1095 }
1096
1097 void
1098 hammer_crc_set_blockmap(hammer_blockmap_t blockmap)
1099 {
1100 blockmap->entry_crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1101 }
1102
1103 void
1104 hammer_crc_set_volume(hammer_volume_ondisk_t ondisk)
1105 {
1106 ondisk->vol_crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1107 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1108 }
1109
1110 int
1111 hammer_crc_test_blockmap(hammer_blockmap_t blockmap)
1112 {
1113 hammer_crc_t crc;
1114
1115 crc = crc32(blockmap, HAMMER_BLOCKMAP_CRCSIZE);
1116 return (blockmap->entry_crc == crc);
1117 }
1118
1119 int
1120 hammer_crc_test_volume(hammer_volume_ondisk_t ondisk)
1121 {
1122 hammer_crc_t crc;
1123
1124 crc = crc32(ondisk, HAMMER_VOL_CRCSIZE1) ^
1125 crc32(&ondisk->vol_crc + 1, HAMMER_VOL_CRCSIZE2);
1126 return (ondisk->vol_crc == crc);
1127 }
1128
1129 int
1130 hammer_crc_test_btree(hammer_node_ondisk_t ondisk)
1131 {
1132 hammer_crc_t crc;
1133
1134 crc = crc32(&ondisk->crc + 1, HAMMER_BTREE_CRCSIZE);
1135 return (ondisk->crc == crc);
1136 }
1137
1138 /*
1139 * Test or set the leaf->data_crc field. Deal with any special cases given
1140 * a generic B-Tree leaf element and its data.
1141 *
1142 * NOTE: Inode-data: the atime and mtime fields are not CRCd, allowing them
1143 * to be updated in-place.
1144 */
1145 int
1146 hammer_crc_test_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1147 {
1148 hammer_crc_t crc;
1149
1150 if (leaf->data_len == 0) {
1151 crc = 0;
1152 } else {
1153 switch(leaf->base.rec_type) {
1154 case HAMMER_RECTYPE_INODE:
1155 if (leaf->data_len != sizeof(struct hammer_inode_data))
1156 return(0);
1157 crc = crc32(data, HAMMER_INODE_CRCSIZE);
1158 break;
1159 default:
1160 crc = crc32(data, leaf->data_len);
1161 break;
1162 }
1163 }
1164 return (leaf->data_crc == crc);
1165 }
1166
1167 void
1168 hammer_crc_set_leaf(void *data, hammer_btree_leaf_elm_t leaf)
1169 {
1170 if (leaf->data_len == 0) {
1171 leaf->data_crc = 0;
1172 } else {
1173 switch(leaf->base.rec_type) {
1174 case HAMMER_RECTYPE_INODE:
1175 KKASSERT(leaf->data_len ==
1176 sizeof(struct hammer_inode_data));
1177 leaf->data_crc = crc32(data, HAMMER_INODE_CRCSIZE);
1178 break;
1179 default:
1180 leaf->data_crc = crc32(data, leaf->data_len);
1181 break;
1182 }
1183 }
1184 }
1185
1186 void
1187 hkprintf(const char *ctl, ...)
1188 {
1189 __va_list va;
1190
1191 if (hammer_debug_debug) {
1192 __va_start(va, ctl);
1193 kvprintf(ctl, va);
1194 __va_end(va);
1195 }
1196 }
1197
1198 /*
1199 * Return the block size at the specified file offset.
1200 */
1201 int
1202 hammer_blocksize(int64_t file_offset)
1203 {
1204 if (file_offset < HAMMER_XDEMARC)
1205 return(HAMMER_BUFSIZE);
1206 else
1207 return(HAMMER_XBUFSIZE);
1208 }
1209
1210 int
1211 hammer_blockoff(int64_t file_offset)
1212 {
1213 if (file_offset < HAMMER_XDEMARC)
1214 return((int)file_offset & HAMMER_BUFMASK);
1215 else
1216 return((int)file_offset & HAMMER_XBUFMASK);
1217 }
1218
1219 /*
1220 * Return the demarkation point between the two offsets where
1221 * the block size changes.
1222 */
1223 int64_t
1224 hammer_blockdemarc(int64_t file_offset1, int64_t file_offset2)
1225 {
1226 if (file_offset1 < HAMMER_XDEMARC) {
1227 if (file_offset2 <= HAMMER_XDEMARC)
1228 return(file_offset2);
1229 return(HAMMER_XDEMARC);
1230 }
1231 panic("hammer_blockdemarc: illegal range %lld %lld",
1232 (long long)file_offset1, (long long)file_offset2);
1233 }
1234
1235 udev_t
1236 hammer_fsid_to_udev(uuid_t *uuid)
1237 {
1238 u_int32_t crc;
1239
1240 crc = crc32(uuid, sizeof(*uuid));
1241 return((udev_t)crc);
1242 }
1243
Cache object: 6efd901fe59519b17b452d2cf39ccb46
|