FreeBSD/Linux Kernel Cross Reference
sys/kern/subr_blist.c
1 /*-
2 * Copyright (c) 1998 Matthew Dillon. All Rights Reserved.
3 * Redistribution and use in source and binary forms, with or without
4 * modification, are permitted provided that the following conditions
5 * are met:
6 * 1. Redistributions of source code must retain the above copyright
7 * notice, this list of conditions and the following disclaimer.
8 * 2. Redistributions in binary form must reproduce the above copyright
9 * notice, this list of conditions and the following disclaimer in the
10 * documentation and/or other materials provided with the distribution.
11 * 4. Neither the name of the University nor the names of its contributors
12 * may be used to endorse or promote products derived from this software
13 * without specific prior written permission.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS
16 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 * WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 * DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 * GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26 */
27 /*
28 * BLIST.C - Bitmap allocator/deallocator, using a radix tree with hinting
29 *
30 * This module implements a general bitmap allocator/deallocator. The
31 * allocator eats around 2 bits per 'block'. The module does not
32 * try to interpret the meaning of a 'block' other then to return
33 * SWAPBLK_NONE on an allocation failure.
34 *
35 * A radix tree is used to maintain the bitmap. Two radix constants are
36 * involved: One for the bitmaps contained in the leaf nodes (typically
37 * 32), and one for the meta nodes (typically 16). Both meta and leaf
38 * nodes have a hint field. This field gives us a hint as to the largest
39 * free contiguous range of blocks under the node. It may contain a
40 * value that is too high, but will never contain a value that is too
41 * low. When the radix tree is searched, allocation failures in subtrees
42 * update the hint.
43 *
44 * The radix tree also implements two collapsed states for meta nodes:
45 * the ALL-ALLOCATED state and the ALL-FREE state. If a meta node is
46 * in either of these two states, all information contained underneath
47 * the node is considered stale. These states are used to optimize
48 * allocation and freeing operations.
49 *
50 * The hinting greatly increases code efficiency for allocations while
51 * the general radix structure optimizes both allocations and frees. The
52 * radix tree should be able to operate well no matter how much
53 * fragmentation there is and no matter how large a bitmap is used.
54 *
55 * Unlike the rlist code, the blist code wires all necessary memory at
56 * creation time. Neither allocations nor frees require interaction with
57 * the memory subsystem. In contrast, the rlist code may allocate memory
58 * on an rlist_free() call. The non-blocking features of the blist code
59 * are used to great advantage in the swap code (vm/nswap_pager.c). The
60 * rlist code uses a little less overall memory then the blist code (but
61 * due to swap interleaving not all that much less), but the blist code
62 * scales much, much better.
63 *
64 * LAYOUT: The radix tree is layed out recursively using a
65 * linear array. Each meta node is immediately followed (layed out
66 * sequentially in memory) by BLIST_META_RADIX lower level nodes. This
67 * is a recursive structure but one that can be easily scanned through
68 * a very simple 'skip' calculation. In order to support large radixes,
69 * portions of the tree may reside outside our memory allocation. We
70 * handle this with an early-termination optimization (when bighint is
71 * set to -1) on the scan. The memory allocation is only large enough
72 * to cover the number of blocks requested at creation time even if it
73 * must be encompassed in larger root-node radix.
74 *
75 * NOTE: the allocator cannot currently allocate more then
76 * BLIST_BMAP_RADIX blocks per call. It will panic with 'allocation too
77 * large' if you try. This is an area that could use improvement. The
78 * radix is large enough that this restriction does not effect the swap
79 * system, though. Currently only the allocation code is effected by
80 * this algorithmic unfeature. The freeing code can handle arbitrary
81 * ranges.
82 *
83 * This code can be compiled stand-alone for debugging.
84 */
85
86 #include <sys/cdefs.h>
87 __FBSDID("$FreeBSD: releng/5.2/sys/kern/subr_blist.c 118848 2003-08-12 23:24:05Z imp $");
88
89 #ifdef _KERNEL
90
91 #include <sys/param.h>
92 #include <sys/systm.h>
93 #include <sys/lock.h>
94 #include <sys/kernel.h>
95 #include <sys/blist.h>
96 #include <sys/malloc.h>
97 #include <sys/proc.h>
98 #include <sys/mutex.h>
99 #include <vm/vm.h>
100 #include <vm/vm_object.h>
101 #include <vm/vm_kern.h>
102 #include <vm/vm_extern.h>
103 #include <vm/vm_page.h>
104
105 #else
106
107 #ifndef BLIST_NO_DEBUG
108 #define BLIST_DEBUG
109 #endif
110
111 #define SWAPBLK_NONE ((daddr_t)-1)
112
113 #include <sys/types.h>
114 #include <stdio.h>
115 #include <string.h>
116 #include <stdlib.h>
117 #include <stdarg.h>
118
119 #define malloc(a,b,c) calloc(a, 1)
120 #define free(a,b) free(a)
121
122 typedef unsigned int u_daddr_t;
123
124 #include <sys/blist.h>
125
126 void panic(const char *ctl, ...);
127
128 #endif
129
130 /*
131 * static support functions
132 */
133
134 static daddr_t blst_leaf_alloc(blmeta_t *scan, daddr_t blk, int count);
135 static daddr_t blst_meta_alloc(blmeta_t *scan, daddr_t blk,
136 daddr_t count, daddr_t radix, int skip);
137 static void blst_leaf_free(blmeta_t *scan, daddr_t relblk, int count);
138 static void blst_meta_free(blmeta_t *scan, daddr_t freeBlk, daddr_t count,
139 daddr_t radix, int skip, daddr_t blk);
140 static void blst_copy(blmeta_t *scan, daddr_t blk, daddr_t radix,
141 daddr_t skip, blist_t dest, daddr_t count);
142 static int blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count);
143 static int blst_meta_fill(blmeta_t *scan, daddr_t allocBlk, daddr_t count,
144 daddr_t radix, int skip, daddr_t blk);
145 static daddr_t blst_radix_init(blmeta_t *scan, daddr_t radix,
146 int skip, daddr_t count);
147 #ifndef _KERNEL
148 static void blst_radix_print(blmeta_t *scan, daddr_t blk,
149 daddr_t radix, int skip, int tab);
150 #endif
151
152 #ifdef _KERNEL
153 static MALLOC_DEFINE(M_SWAP, "SWAP", "Swap space");
154 #endif
155
156 /*
157 * blist_create() - create a blist capable of handling up to the specified
158 * number of blocks
159 *
160 * blocks must be greater then 0
161 *
162 * The smallest blist consists of a single leaf node capable of
163 * managing BLIST_BMAP_RADIX blocks.
164 */
165
166 blist_t
167 blist_create(daddr_t blocks)
168 {
169 blist_t bl;
170 int radix;
171 int skip = 0;
172
173 /*
174 * Calculate radix and skip field used for scanning.
175 */
176 radix = BLIST_BMAP_RADIX;
177
178 while (radix < blocks) {
179 radix *= BLIST_META_RADIX;
180 skip = (skip + 1) * BLIST_META_RADIX;
181 }
182
183 bl = malloc(sizeof(struct blist), M_SWAP, M_WAITOK | M_ZERO);
184
185 bl->bl_blocks = blocks;
186 bl->bl_radix = radix;
187 bl->bl_skip = skip;
188 bl->bl_rootblks = 1 +
189 blst_radix_init(NULL, bl->bl_radix, bl->bl_skip, blocks);
190 bl->bl_root = malloc(sizeof(blmeta_t) * bl->bl_rootblks, M_SWAP, M_WAITOK);
191
192 #if defined(BLIST_DEBUG)
193 printf(
194 "BLIST representing %lld blocks (%lld MB of swap)"
195 ", requiring %lldK of ram\n",
196 (long long)bl->bl_blocks,
197 (long long)bl->bl_blocks * 4 / 1024,
198 (long long)(bl->bl_rootblks * sizeof(blmeta_t) + 1023) / 1024
199 );
200 printf("BLIST raw radix tree contains %lld records\n",
201 (long long)bl->bl_rootblks);
202 #endif
203 blst_radix_init(bl->bl_root, bl->bl_radix, bl->bl_skip, blocks);
204
205 return(bl);
206 }
207
208 void
209 blist_destroy(blist_t bl)
210 {
211 free(bl->bl_root, M_SWAP);
212 free(bl, M_SWAP);
213 }
214
215 /*
216 * blist_alloc() - reserve space in the block bitmap. Return the base
217 * of a contiguous region or SWAPBLK_NONE if space could
218 * not be allocated.
219 */
220
221 daddr_t
222 blist_alloc(blist_t bl, daddr_t count)
223 {
224 daddr_t blk = SWAPBLK_NONE;
225
226 if (bl) {
227 if (bl->bl_radix == BLIST_BMAP_RADIX)
228 blk = blst_leaf_alloc(bl->bl_root, 0, count);
229 else
230 blk = blst_meta_alloc(bl->bl_root, 0, count, bl->bl_radix, bl->bl_skip);
231 if (blk != SWAPBLK_NONE)
232 bl->bl_free -= count;
233 }
234 return(blk);
235 }
236
237 /*
238 * blist_free() - free up space in the block bitmap. Return the base
239 * of a contiguous region. Panic if an inconsistancy is
240 * found.
241 */
242
243 void
244 blist_free(blist_t bl, daddr_t blkno, daddr_t count)
245 {
246 if (bl) {
247 if (bl->bl_radix == BLIST_BMAP_RADIX)
248 blst_leaf_free(bl->bl_root, blkno, count);
249 else
250 blst_meta_free(bl->bl_root, blkno, count, bl->bl_radix, bl->bl_skip, 0);
251 bl->bl_free += count;
252 }
253 }
254
255 /*
256 * blist_fill() - mark a region in the block bitmap as off-limits
257 * to the allocator (i.e. allocate it), ignoring any
258 * existing allocations. Return the number of blocks
259 * actually filled that were free before the call.
260 */
261
262 int
263 blist_fill(blist_t bl, daddr_t blkno, daddr_t count)
264 {
265 int filled;
266
267 if (bl) {
268 if (bl->bl_radix == BLIST_BMAP_RADIX)
269 filled = blst_leaf_fill(bl->bl_root, blkno, count);
270 else
271 filled = blst_meta_fill(bl->bl_root, blkno, count,
272 bl->bl_radix, bl->bl_skip, 0);
273 bl->bl_free -= filled;
274 return filled;
275 } else
276 return 0;
277 }
278
279 /*
280 * blist_resize() - resize an existing radix tree to handle the
281 * specified number of blocks. This will reallocate
282 * the tree and transfer the previous bitmap to the new
283 * one. When extending the tree you can specify whether
284 * the new blocks are to left allocated or freed.
285 */
286
287 void
288 blist_resize(blist_t *pbl, daddr_t count, int freenew)
289 {
290 blist_t newbl = blist_create(count);
291 blist_t save = *pbl;
292
293 *pbl = newbl;
294 if (count > save->bl_blocks)
295 count = save->bl_blocks;
296 blst_copy(save->bl_root, 0, save->bl_radix, save->bl_skip, newbl, count);
297
298 /*
299 * If resizing upwards, should we free the new space or not?
300 */
301 if (freenew && count < newbl->bl_blocks) {
302 blist_free(newbl, count, newbl->bl_blocks - count);
303 }
304 blist_destroy(save);
305 }
306
307 #ifdef BLIST_DEBUG
308
309 /*
310 * blist_print() - dump radix tree
311 */
312
313 void
314 blist_print(blist_t bl)
315 {
316 printf("BLIST {\n");
317 blst_radix_print(bl->bl_root, 0, bl->bl_radix, bl->bl_skip, 4);
318 printf("}\n");
319 }
320
321 #endif
322
323 /************************************************************************
324 * ALLOCATION SUPPORT FUNCTIONS *
325 ************************************************************************
326 *
327 * These support functions do all the actual work. They may seem
328 * rather longish, but that's because I've commented them up. The
329 * actual code is straight forward.
330 *
331 */
332
333 /*
334 * blist_leaf_alloc() - allocate at a leaf in the radix tree (a bitmap).
335 *
336 * This is the core of the allocator and is optimized for the 1 block
337 * and the BLIST_BMAP_RADIX block allocation cases. Other cases are
338 * somewhat slower. The 1 block allocation case is log2 and extremely
339 * quick.
340 */
341
342 static daddr_t
343 blst_leaf_alloc(
344 blmeta_t *scan,
345 daddr_t blk,
346 int count
347 ) {
348 u_daddr_t orig = scan->u.bmu_bitmap;
349
350 if (orig == 0) {
351 /*
352 * Optimize bitmap all-allocated case. Also, count = 1
353 * case assumes at least 1 bit is free in the bitmap, so
354 * we have to take care of this case here.
355 */
356 scan->bm_bighint = 0;
357 return(SWAPBLK_NONE);
358 }
359 if (count == 1) {
360 /*
361 * Optimized code to allocate one bit out of the bitmap
362 */
363 u_daddr_t mask;
364 int j = BLIST_BMAP_RADIX/2;
365 int r = 0;
366
367 mask = (u_daddr_t)-1 >> (BLIST_BMAP_RADIX/2);
368
369 while (j) {
370 if ((orig & mask) == 0) {
371 r += j;
372 orig >>= j;
373 }
374 j >>= 1;
375 mask >>= j;
376 }
377 scan->u.bmu_bitmap &= ~(1 << r);
378 return(blk + r);
379 }
380 if (count <= BLIST_BMAP_RADIX) {
381 /*
382 * non-optimized code to allocate N bits out of the bitmap.
383 * The more bits, the faster the code runs. It will run
384 * the slowest allocating 2 bits, but since there aren't any
385 * memory ops in the core loop (or shouldn't be, anyway),
386 * you probably won't notice the difference.
387 */
388 int j;
389 int n = BLIST_BMAP_RADIX - count;
390 u_daddr_t mask;
391
392 mask = (u_daddr_t)-1 >> n;
393
394 for (j = 0; j <= n; ++j) {
395 if ((orig & mask) == mask) {
396 scan->u.bmu_bitmap &= ~mask;
397 return(blk + j);
398 }
399 mask = (mask << 1);
400 }
401 }
402 /*
403 * We couldn't allocate count in this subtree, update bighint.
404 */
405 scan->bm_bighint = count - 1;
406 return(SWAPBLK_NONE);
407 }
408
409 /*
410 * blist_meta_alloc() - allocate at a meta in the radix tree.
411 *
412 * Attempt to allocate at a meta node. If we can't, we update
413 * bighint and return a failure. Updating bighint optimize future
414 * calls that hit this node. We have to check for our collapse cases
415 * and we have a few optimizations strewn in as well.
416 */
417
418 static daddr_t
419 blst_meta_alloc(
420 blmeta_t *scan,
421 daddr_t blk,
422 daddr_t count,
423 daddr_t radix,
424 int skip
425 ) {
426 int i;
427 int next_skip = ((u_int)skip / BLIST_META_RADIX);
428
429 if (scan->u.bmu_avail == 0) {
430 /*
431 * ALL-ALLOCATED special case
432 */
433 scan->bm_bighint = count;
434 return(SWAPBLK_NONE);
435 }
436
437 if (scan->u.bmu_avail == radix) {
438 radix /= BLIST_META_RADIX;
439
440 /*
441 * ALL-FREE special case, initialize uninitialize
442 * sublevel.
443 */
444 for (i = 1; i <= skip; i += next_skip) {
445 if (scan[i].bm_bighint == (daddr_t)-1)
446 break;
447 if (next_skip == 1) {
448 scan[i].u.bmu_bitmap = (u_daddr_t)-1;
449 scan[i].bm_bighint = BLIST_BMAP_RADIX;
450 } else {
451 scan[i].bm_bighint = radix;
452 scan[i].u.bmu_avail = radix;
453 }
454 }
455 } else {
456 radix /= BLIST_META_RADIX;
457 }
458
459 for (i = 1; i <= skip; i += next_skip) {
460 if (count <= scan[i].bm_bighint) {
461 /*
462 * count fits in object
463 */
464 daddr_t r;
465 if (next_skip == 1) {
466 r = blst_leaf_alloc(&scan[i], blk, count);
467 } else {
468 r = blst_meta_alloc(&scan[i], blk, count, radix, next_skip - 1);
469 }
470 if (r != SWAPBLK_NONE) {
471 scan->u.bmu_avail -= count;
472 if (scan->bm_bighint > scan->u.bmu_avail)
473 scan->bm_bighint = scan->u.bmu_avail;
474 return(r);
475 }
476 } else if (scan[i].bm_bighint == (daddr_t)-1) {
477 /*
478 * Terminator
479 */
480 break;
481 } else if (count > radix) {
482 /*
483 * count does not fit in object even if it were
484 * complete free.
485 */
486 panic("blist_meta_alloc: allocation too large");
487 }
488 blk += radix;
489 }
490
491 /*
492 * We couldn't allocate count in this subtree, update bighint.
493 */
494 if (scan->bm_bighint >= count)
495 scan->bm_bighint = count - 1;
496 return(SWAPBLK_NONE);
497 }
498
499 /*
500 * BLST_LEAF_FREE() - free allocated block from leaf bitmap
501 *
502 */
503
504 static void
505 blst_leaf_free(
506 blmeta_t *scan,
507 daddr_t blk,
508 int count
509 ) {
510 /*
511 * free some data in this bitmap
512 *
513 * e.g.
514 * 0000111111111110000
515 * \_________/\__/
516 * v n
517 */
518 int n = blk & (BLIST_BMAP_RADIX - 1);
519 u_daddr_t mask;
520
521 mask = ((u_daddr_t)-1 << n) &
522 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
523
524 if (scan->u.bmu_bitmap & mask)
525 panic("blst_radix_free: freeing free block");
526 scan->u.bmu_bitmap |= mask;
527
528 /*
529 * We could probably do a better job here. We are required to make
530 * bighint at least as large as the biggest contiguous block of
531 * data. If we just shoehorn it, a little extra overhead will
532 * be incured on the next allocation (but only that one typically).
533 */
534 scan->bm_bighint = BLIST_BMAP_RADIX;
535 }
536
537 /*
538 * BLST_META_FREE() - free allocated blocks from radix tree meta info
539 *
540 * This support routine frees a range of blocks from the bitmap.
541 * The range must be entirely enclosed by this radix node. If a
542 * meta node, we break the range down recursively to free blocks
543 * in subnodes (which means that this code can free an arbitrary
544 * range whereas the allocation code cannot allocate an arbitrary
545 * range).
546 */
547
548 static void
549 blst_meta_free(
550 blmeta_t *scan,
551 daddr_t freeBlk,
552 daddr_t count,
553 daddr_t radix,
554 int skip,
555 daddr_t blk
556 ) {
557 int i;
558 int next_skip = ((u_int)skip / BLIST_META_RADIX);
559
560 #if 0
561 printf("FREE (%llx,%lld) FROM (%llx,%lld)\n",
562 (long long)freeBlk, (long long)count,
563 (long long)blk, (long long)radix
564 );
565 #endif
566
567 if (scan->u.bmu_avail == 0) {
568 /*
569 * ALL-ALLOCATED special case, with possible
570 * shortcut to ALL-FREE special case.
571 */
572 scan->u.bmu_avail = count;
573 scan->bm_bighint = count;
574
575 if (count != radix) {
576 for (i = 1; i <= skip; i += next_skip) {
577 if (scan[i].bm_bighint == (daddr_t)-1)
578 break;
579 scan[i].bm_bighint = 0;
580 if (next_skip == 1) {
581 scan[i].u.bmu_bitmap = 0;
582 } else {
583 scan[i].u.bmu_avail = 0;
584 }
585 }
586 /* fall through */
587 }
588 } else {
589 scan->u.bmu_avail += count;
590 /* scan->bm_bighint = radix; */
591 }
592
593 /*
594 * ALL-FREE special case.
595 */
596
597 if (scan->u.bmu_avail == radix)
598 return;
599 if (scan->u.bmu_avail > radix)
600 panic("blst_meta_free: freeing already free blocks (%lld) %lld/%lld",
601 (long long)count, (long long)scan->u.bmu_avail,
602 (long long)radix);
603
604 /*
605 * Break the free down into its components
606 */
607
608 radix /= BLIST_META_RADIX;
609
610 i = (freeBlk - blk) / radix;
611 blk += i * radix;
612 i = i * next_skip + 1;
613
614 while (i <= skip && blk < freeBlk + count) {
615 daddr_t v;
616
617 v = blk + radix - freeBlk;
618 if (v > count)
619 v = count;
620
621 if (scan->bm_bighint == (daddr_t)-1)
622 panic("blst_meta_free: freeing unexpected range");
623
624 if (next_skip == 1) {
625 blst_leaf_free(&scan[i], freeBlk, v);
626 } else {
627 blst_meta_free(&scan[i], freeBlk, v, radix, next_skip - 1, blk);
628 }
629 if (scan->bm_bighint < scan[i].bm_bighint)
630 scan->bm_bighint = scan[i].bm_bighint;
631 count -= v;
632 freeBlk += v;
633 blk += radix;
634 i += next_skip;
635 }
636 }
637
638 /*
639 * BLIST_RADIX_COPY() - copy one radix tree to another
640 *
641 * Locates free space in the source tree and frees it in the destination
642 * tree. The space may not already be free in the destination.
643 */
644
645 static void blst_copy(
646 blmeta_t *scan,
647 daddr_t blk,
648 daddr_t radix,
649 daddr_t skip,
650 blist_t dest,
651 daddr_t count
652 ) {
653 int next_skip;
654 int i;
655
656 /*
657 * Leaf node
658 */
659
660 if (radix == BLIST_BMAP_RADIX) {
661 u_daddr_t v = scan->u.bmu_bitmap;
662
663 if (v == (u_daddr_t)-1) {
664 blist_free(dest, blk, count);
665 } else if (v != 0) {
666 int i;
667
668 for (i = 0; i < BLIST_BMAP_RADIX && i < count; ++i) {
669 if (v & (1 << i))
670 blist_free(dest, blk + i, 1);
671 }
672 }
673 return;
674 }
675
676 /*
677 * Meta node
678 */
679
680 if (scan->u.bmu_avail == 0) {
681 /*
682 * Source all allocated, leave dest allocated
683 */
684 return;
685 }
686 if (scan->u.bmu_avail == radix) {
687 /*
688 * Source all free, free entire dest
689 */
690 if (count < radix)
691 blist_free(dest, blk, count);
692 else
693 blist_free(dest, blk, radix);
694 return;
695 }
696
697
698 radix /= BLIST_META_RADIX;
699 next_skip = ((u_int)skip / BLIST_META_RADIX);
700
701 for (i = 1; count && i <= skip; i += next_skip) {
702 if (scan[i].bm_bighint == (daddr_t)-1)
703 break;
704
705 if (count >= radix) {
706 blst_copy(
707 &scan[i],
708 blk,
709 radix,
710 next_skip - 1,
711 dest,
712 radix
713 );
714 count -= radix;
715 } else {
716 if (count) {
717 blst_copy(
718 &scan[i],
719 blk,
720 radix,
721 next_skip - 1,
722 dest,
723 count
724 );
725 }
726 count = 0;
727 }
728 blk += radix;
729 }
730 }
731
732 /*
733 * BLST_LEAF_FILL() - allocate specific blocks in leaf bitmap
734 *
735 * This routine allocates all blocks in the specified range
736 * regardless of any existing allocations in that range. Returns
737 * the number of blocks allocated by the call.
738 */
739
740 static int
741 blst_leaf_fill(blmeta_t *scan, daddr_t blk, int count)
742 {
743 int n = blk & (BLIST_BMAP_RADIX - 1);
744 int nblks;
745 u_daddr_t mask, bitmap;
746
747 mask = ((u_daddr_t)-1 << n) &
748 ((u_daddr_t)-1 >> (BLIST_BMAP_RADIX - count - n));
749
750 /* Count the number of blocks we're about to allocate */
751 bitmap = scan->u.bmu_bitmap & mask;
752 for (nblks = 0; bitmap != 0; nblks++)
753 bitmap &= bitmap - 1;
754
755 scan->u.bmu_bitmap &= ~mask;
756 return nblks;
757 }
758
759 /*
760 * BLIST_META_FILL() - allocate specific blocks at a meta node
761 *
762 * This routine allocates the specified range of blocks,
763 * regardless of any existing allocations in the range. The
764 * range must be within the extent of this node. Returns the
765 * number of blocks allocated by the call.
766 */
767 static int
768 blst_meta_fill(
769 blmeta_t *scan,
770 daddr_t allocBlk,
771 daddr_t count,
772 daddr_t radix,
773 int skip,
774 daddr_t blk
775 ) {
776 int i;
777 int next_skip = ((u_int)skip / BLIST_META_RADIX);
778 int nblks = 0;
779
780 if (count == radix || scan->u.bmu_avail == 0) {
781 /*
782 * ALL-ALLOCATED special case
783 */
784 nblks = scan->u.bmu_avail;
785 scan->u.bmu_avail = 0;
786 scan->bm_bighint = count;
787 return nblks;
788 }
789
790 if (scan->u.bmu_avail == radix) {
791 radix /= BLIST_META_RADIX;
792
793 /*
794 * ALL-FREE special case, initialize sublevel
795 */
796 for (i = 1; i <= skip; i += next_skip) {
797 if (scan[i].bm_bighint == (daddr_t)-1)
798 break;
799 if (next_skip == 1) {
800 scan[i].u.bmu_bitmap = (u_daddr_t)-1;
801 scan[i].bm_bighint = BLIST_BMAP_RADIX;
802 } else {
803 scan[i].bm_bighint = radix;
804 scan[i].u.bmu_avail = radix;
805 }
806 }
807 } else {
808 radix /= BLIST_META_RADIX;
809 }
810
811 if (count > radix)
812 panic("blist_meta_fill: allocation too large");
813
814 i = (allocBlk - blk) / radix;
815 blk += i * radix;
816 i = i * next_skip + 1;
817
818 while (i <= skip && blk < allocBlk + count) {
819 daddr_t v;
820
821 v = blk + radix - allocBlk;
822 if (v > count)
823 v = count;
824
825 if (scan->bm_bighint == (daddr_t)-1)
826 panic("blst_meta_fill: filling unexpected range");
827
828 if (next_skip == 1) {
829 nblks += blst_leaf_fill(&scan[i], allocBlk, v);
830 } else {
831 nblks += blst_meta_fill(&scan[i], allocBlk, v,
832 radix, next_skip - 1, blk);
833 }
834 count -= v;
835 allocBlk += v;
836 blk += radix;
837 i += next_skip;
838 }
839 scan->u.bmu_avail -= nblks;
840 return nblks;
841 }
842
843 /*
844 * BLST_RADIX_INIT() - initialize radix tree
845 *
846 * Initialize our meta structures and bitmaps and calculate the exact
847 * amount of space required to manage 'count' blocks - this space may
848 * be considerably less then the calculated radix due to the large
849 * RADIX values we use.
850 */
851
852 static daddr_t
853 blst_radix_init(blmeta_t *scan, daddr_t radix, int skip, daddr_t count)
854 {
855 int i;
856 int next_skip;
857 daddr_t memindex = 0;
858
859 /*
860 * Leaf node
861 */
862
863 if (radix == BLIST_BMAP_RADIX) {
864 if (scan) {
865 scan->bm_bighint = 0;
866 scan->u.bmu_bitmap = 0;
867 }
868 return(memindex);
869 }
870
871 /*
872 * Meta node. If allocating the entire object we can special
873 * case it. However, we need to figure out how much memory
874 * is required to manage 'count' blocks, so we continue on anyway.
875 */
876
877 if (scan) {
878 scan->bm_bighint = 0;
879 scan->u.bmu_avail = 0;
880 }
881
882 radix /= BLIST_META_RADIX;
883 next_skip = ((u_int)skip / BLIST_META_RADIX);
884
885 for (i = 1; i <= skip; i += next_skip) {
886 if (count >= radix) {
887 /*
888 * Allocate the entire object
889 */
890 memindex = i + blst_radix_init(
891 ((scan) ? &scan[i] : NULL),
892 radix,
893 next_skip - 1,
894 radix
895 );
896 count -= radix;
897 } else if (count > 0) {
898 /*
899 * Allocate a partial object
900 */
901 memindex = i + blst_radix_init(
902 ((scan) ? &scan[i] : NULL),
903 radix,
904 next_skip - 1,
905 count
906 );
907 count = 0;
908 } else {
909 /*
910 * Add terminator and break out
911 */
912 if (scan)
913 scan[i].bm_bighint = (daddr_t)-1;
914 break;
915 }
916 }
917 if (memindex < i)
918 memindex = i;
919 return(memindex);
920 }
921
922 #ifdef BLIST_DEBUG
923
924 static void
925 blst_radix_print(blmeta_t *scan, daddr_t blk, daddr_t radix, int skip, int tab)
926 {
927 int i;
928 int next_skip;
929 int lastState = 0;
930
931 if (radix == BLIST_BMAP_RADIX) {
932 printf(
933 "%*.*s(%08llx,%lld): bitmap %08llx big=%lld\n",
934 tab, tab, "",
935 (long long)blk, (long long)radix,
936 (long long)scan->u.bmu_bitmap,
937 (long long)scan->bm_bighint
938 );
939 return;
940 }
941
942 if (scan->u.bmu_avail == 0) {
943 printf(
944 "%*.*s(%08llx,%lld) ALL ALLOCATED\n",
945 tab, tab, "",
946 (long long)blk,
947 (long long)radix
948 );
949 return;
950 }
951 if (scan->u.bmu_avail == radix) {
952 printf(
953 "%*.*s(%08llx,%lld) ALL FREE\n",
954 tab, tab, "",
955 (long long)blk,
956 (long long)radix
957 );
958 return;
959 }
960
961 printf(
962 "%*.*s(%08llx,%lld): subtree (%lld/%lld) big=%lld {\n",
963 tab, tab, "",
964 (long long)blk, (long long)radix,
965 (long long)scan->u.bmu_avail,
966 (long long)radix,
967 (long long)scan->bm_bighint
968 );
969
970 radix /= BLIST_META_RADIX;
971 next_skip = ((u_int)skip / BLIST_META_RADIX);
972 tab += 4;
973
974 for (i = 1; i <= skip; i += next_skip) {
975 if (scan[i].bm_bighint == (daddr_t)-1) {
976 printf(
977 "%*.*s(%08llx,%lld): Terminator\n",
978 tab, tab, "",
979 (long long)blk, (long long)radix
980 );
981 lastState = 0;
982 break;
983 }
984 blst_radix_print(
985 &scan[i],
986 blk,
987 radix,
988 next_skip - 1,
989 tab
990 );
991 blk += radix;
992 }
993 tab -= 4;
994
995 printf(
996 "%*.*s}\n",
997 tab, tab, ""
998 );
999 }
1000
1001 #endif
1002
1003 #ifdef BLIST_DEBUG
1004
1005 int
1006 main(int ac, char **av)
1007 {
1008 int size = 1024;
1009 int i;
1010 blist_t bl;
1011
1012 for (i = 1; i < ac; ++i) {
1013 const char *ptr = av[i];
1014 if (*ptr != '-') {
1015 size = strtol(ptr, NULL, 0);
1016 continue;
1017 }
1018 ptr += 2;
1019 fprintf(stderr, "Bad option: %s\n", ptr - 2);
1020 exit(1);
1021 }
1022 bl = blist_create(size);
1023 blist_free(bl, 0, size);
1024
1025 for (;;) {
1026 char buf[1024];
1027 daddr_t da = 0;
1028 daddr_t count = 0;
1029
1030
1031 printf("%lld/%lld/%lld> ", (long long)bl->bl_free,
1032 (long long)size, (long long)bl->bl_radix);
1033 fflush(stdout);
1034 if (fgets(buf, sizeof(buf), stdin) == NULL)
1035 break;
1036 switch(buf[0]) {
1037 case 'r':
1038 if (sscanf(buf + 1, "%lld", &count) == 1) {
1039 blist_resize(&bl, count, 1);
1040 } else {
1041 printf("?\n");
1042 }
1043 case 'p':
1044 blist_print(bl);
1045 break;
1046 case 'a':
1047 if (sscanf(buf + 1, "%lld", &count) == 1) {
1048 daddr_t blk = blist_alloc(bl, count);
1049 printf(" R=%08llx\n", (long long)blk);
1050 } else {
1051 printf("?\n");
1052 }
1053 break;
1054 case 'f':
1055 if (sscanf(buf + 1, "%llx %lld",
1056 (long long *)&da, (long long *)&count) == 2) {
1057 blist_free(bl, da, count);
1058 } else {
1059 printf("?\n");
1060 }
1061 break;
1062 case 'l':
1063 if (sscanf(buf + 1, "%llx %lld",
1064 (long long *)&da, (long long *)&count) == 2) {
1065 printf(" n=%d\n",
1066 blist_fill(bl, da, count));
1067 } else {
1068 printf("?\n");
1069 }
1070 break;
1071 case '?':
1072 case 'h':
1073 puts(
1074 "p -print\n"
1075 "a %d -allocate\n"
1076 "f %x %d -free\n"
1077 "l %x %d -fill\n"
1078 "r %d -resize\n"
1079 "h/? -help"
1080 );
1081 break;
1082 default:
1083 printf("?\n");
1084 break;
1085 }
1086 }
1087 return(0);
1088 }
1089
1090 void
1091 panic(const char *ctl, ...)
1092 {
1093 va_list va;
1094
1095 va_start(va, ctl);
1096 vfprintf(stderr, ctl, va);
1097 fprintf(stderr, "\n");
1098 va_end(va);
1099 exit(1);
1100 }
1101
1102 #endif
1103
Cache object: bf3c3067ba0d22a3f2bf78f3bc5d265a
|