FreeBSD/Linux Kernel Cross Reference
sys/vm/swap_pager.c
1 /*
2 * Copyright (c) 1994 John S. Dyson
3 * Copyright (c) 1990 University of Utah.
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40 *
41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
42 * $FreeBSD$
43 */
44
45 /*
46 * Quick hack to page to dedicated partition(s).
47 * TODO:
48 * Add multiprocessor locks
49 * Deal with async writes in a better fashion
50 */
51
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/kernel.h>
55 #include <sys/proc.h>
56 #include <sys/buf.h>
57 #include <sys/vnode.h>
58 #include <sys/malloc.h>
59 #include <sys/vmmeter.h>
60 #include <sys/rlist.h>
61
62 #ifndef MAX_PAGEOUT_CLUSTER
63 #define MAX_PAGEOUT_CLUSTER 16
64 #endif
65
66 #ifndef NPENDINGIO
67 #define NPENDINGIO 16
68 #endif
69
70 #define SWB_NPAGES MAX_PAGEOUT_CLUSTER
71
72 #include <vm/vm.h>
73 #include <vm/vm_prot.h>
74 #include <vm/vm_object.h>
75 #include <vm/vm_page.h>
76 #include <vm/vm_pager.h>
77 #include <vm/vm_pageout.h>
78 #include <vm/swap_pager.h>
79 #include <vm/vm_extern.h>
80
81 static int nswiodone;
82 int swap_pager_full;
83 extern int vm_swap_size;
84 static int no_swap_space = 1;
85 static int max_pageout_cluster;
86 struct rlisthdr swaplist;
87
88 TAILQ_HEAD(swpclean, swpagerclean);
89
90 typedef struct swpagerclean *swp_clean_t;
91
92 static struct swpagerclean {
93 TAILQ_ENTRY(swpagerclean) spc_list;
94 int spc_flags;
95 struct buf *spc_bp;
96 vm_object_t spc_object;
97 vm_offset_t spc_kva;
98 int spc_first;
99 int spc_count;
100 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
101 } swcleanlist[NPENDINGIO];
102
103
104 /* spc_flags values */
105 #define SPC_ERROR 0x01
106
107 #define SWB_EMPTY (-1)
108
109 /* list of completed page cleans */
110 static struct swpclean swap_pager_done;
111
112 /* list of pending page cleans */
113 static struct swpclean swap_pager_inuse;
114
115 /* list of free pager clean structs */
116 static struct swpclean swap_pager_free;
117 static int swap_pager_free_count;
118 static int swap_pager_free_pending;
119
120 /* list of "named" anon region objects */
121 static struct pagerlst swap_pager_object_list;
122
123 /* list of "unnamed" anon region objects */
124 struct pagerlst swap_pager_un_object_list;
125
126 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */
127 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
128 static int swap_pager_needflags;
129
130 static struct pagerlst *swp_qs[] = {
131 &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
132 };
133
134 /*
135 * pagerops for OBJT_SWAP - "swap pager".
136 */
137 static vm_object_t
138 swap_pager_alloc __P((void *handle, vm_ooffset_t size,
139 vm_prot_t prot, vm_ooffset_t offset));
140 static void swap_pager_dealloc __P((vm_object_t object));
141 static boolean_t
142 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
143 int *before, int *after));
144 static int swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
145 static void swap_pager_init __P((void));
146 static void spc_free __P((swp_clean_t));
147
148 struct pagerops swappagerops = {
149 swap_pager_init,
150 swap_pager_alloc,
151 swap_pager_dealloc,
152 swap_pager_getpages,
153 swap_pager_putpages,
154 swap_pager_haspage,
155 swap_pager_sync
156 };
157
158 static int npendingio;
159 static int dmmin;
160 int dmmax;
161
162 static int swap_pager_block_index __P((vm_pindex_t pindex));
163 static int swap_pager_block_offset __P((vm_pindex_t pindex));
164 static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
165 vm_pindex_t pindex, int *valid));
166 static void swap_pager_finish __P((swp_clean_t spc));
167 static void swap_pager_free_swap __P((vm_object_t object));
168 static void swap_pager_freeswapspace __P((vm_object_t object,
169 unsigned int from,
170 unsigned int to));
171 static int swap_pager_getswapspace __P((vm_object_t object,
172 unsigned int amount,
173 daddr_t *rtval));
174 static void swap_pager_iodone __P((struct buf *));
175 static void swap_pager_iodone1 __P((struct buf *bp));
176 static void swap_pager_reclaim __P((void));
177 static void swap_pager_ridpages __P((vm_page_t *m, int count,
178 int reqpage));
179 static void swap_pager_setvalid __P((vm_object_t object,
180 vm_offset_t offset, int valid));
181 static __inline void swapsizecheck __P((void));
182
183 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE)))
184
185 static __inline void
186 swapsizecheck()
187 {
188 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
189 if (swap_pager_full == 0)
190 printf("swap_pager: out of swap space\n");
191 swap_pager_full = 1;
192 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
193 swap_pager_full = 0;
194 }
195
196 static void
197 swap_pager_init()
198 {
199 int maxsafepending;
200 TAILQ_INIT(&swap_pager_object_list);
201 TAILQ_INIT(&swap_pager_un_object_list);
202
203 /*
204 * Initialize clean lists
205 */
206 TAILQ_INIT(&swap_pager_inuse);
207 TAILQ_INIT(&swap_pager_done);
208 TAILQ_INIT(&swap_pager_free);
209 swap_pager_free_count = 0;
210
211 /*
212 * Calculate the swap allocation constants.
213 */
214 dmmin = PAGE_SIZE / DEV_BSIZE;
215 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
216
217 maxsafepending = cnt.v_free_min - cnt.v_free_reserved;
218 npendingio = NPENDINGIO;
219 max_pageout_cluster = MAX_PAGEOUT_CLUSTER;
220
221 if ((2 * NPENDINGIO * MAX_PAGEOUT_CLUSTER) > maxsafepending) {
222 max_pageout_cluster = MAX_PAGEOUT_CLUSTER / 2;
223 npendingio = maxsafepending / (2 * max_pageout_cluster);
224 if (npendingio < 2)
225 npendingio = 2;
226 }
227 }
228
229 void
230 swap_pager_swap_init()
231 {
232 swp_clean_t spc;
233 struct buf *bp;
234 int i;
235
236 /*
237 * kva's are allocated here so that we dont need to keep doing
238 * kmem_alloc pageables at runtime
239 */
240 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
241 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * max_pageout_cluster);
242 if (!spc->spc_kva) {
243 break;
244 }
245 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
246 if (!spc->spc_bp) {
247 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
248 break;
249 }
250 spc->spc_flags = 0;
251 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
252 swap_pager_free_count++;
253 }
254 }
255
256 int
257 swap_pager_swp_alloc(object, wait)
258 vm_object_t object;
259 int wait;
260 {
261 sw_blk_t swb;
262 int nblocks;
263 int i, j;
264
265 nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
266 swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
267 if (swb == NULL)
268 return 1;
269
270 for (i = 0; i < nblocks; i++) {
271 swb[i].swb_valid = 0;
272 swb[i].swb_locked = 0;
273 for (j = 0; j < SWB_NPAGES; j++)
274 swb[i].swb_block[j] = SWB_EMPTY;
275 }
276
277 object->un_pager.swp.swp_nblocks = nblocks;
278 object->un_pager.swp.swp_allocsize = 0;
279 object->un_pager.swp.swp_blocks = swb;
280 object->un_pager.swp.swp_poip = 0;
281
282 if (object->handle != NULL) {
283 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
284 } else {
285 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
286 }
287
288 return 0;
289 }
290
291 /*
292 * Allocate an object and associated resources.
293 * Note that if we are called from the pageout daemon (handle == NULL)
294 * we should not wait for memory as it could resulting in deadlock.
295 */
296 static vm_object_t
297 swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
298 vm_ooffset_t offset)
299 {
300 vm_object_t object;
301
302 /*
303 * If this is a "named" anonymous region, look it up and use the
304 * object if it exists, otherwise allocate a new one.
305 */
306 if (handle) {
307 object = vm_pager_object_lookup(&swap_pager_object_list, handle);
308 if (object != NULL) {
309 vm_object_reference(object);
310 } else {
311 /*
312 * XXX - there is a race condition here. Two processes
313 * can request the same named object simultaneuously,
314 * and if one blocks for memory, the result is a disaster.
315 * Probably quite rare, but is yet another reason to just
316 * rip support of "named anonymous regions" out altogether.
317 */
318 object = vm_object_allocate(OBJT_SWAP,
319 OFF_TO_IDX(offset + PAGE_MASK + size));
320 object->handle = handle;
321 (void) swap_pager_swp_alloc(object, M_WAITOK);
322 }
323 } else {
324 object = vm_object_allocate(OBJT_SWAP,
325 OFF_TO_IDX(offset + PAGE_MASK + size));
326 (void) swap_pager_swp_alloc(object, M_WAITOK);
327 }
328
329 return (object);
330 }
331
332 /*
333 * returns disk block associated with pager and offset
334 * additionally, as a side effect returns a flag indicating
335 * if the block has been written
336 */
337
338 static __inline daddr_t *
339 swap_pager_diskaddr(object, pindex, valid)
340 vm_object_t object;
341 vm_pindex_t pindex;
342 int *valid;
343 {
344 register sw_blk_t swb;
345 int ix;
346
347 if (valid)
348 *valid = 0;
349 ix = pindex / SWB_NPAGES;
350 if ((ix >= object->un_pager.swp.swp_nblocks) ||
351 (pindex >= object->size)) {
352 return (FALSE);
353 }
354 swb = &object->un_pager.swp.swp_blocks[ix];
355 ix = pindex % SWB_NPAGES;
356 if (valid)
357 *valid = swb->swb_valid & (1 << ix);
358 return &swb->swb_block[ix];
359 }
360
361 /*
362 * Utility routine to set the valid (written) bit for
363 * a block associated with a pager and offset
364 */
365 static void
366 swap_pager_setvalid(object, offset, valid)
367 vm_object_t object;
368 vm_offset_t offset;
369 int valid;
370 {
371 register sw_blk_t swb;
372 int ix;
373
374 ix = offset / SWB_NPAGES;
375 if (ix >= object->un_pager.swp.swp_nblocks)
376 return;
377
378 swb = &object->un_pager.swp.swp_blocks[ix];
379 ix = offset % SWB_NPAGES;
380 if (valid)
381 swb->swb_valid |= (1 << ix);
382 else
383 swb->swb_valid &= ~(1 << ix);
384 return;
385 }
386
387 /*
388 * this routine allocates swap space with a fragmentation
389 * minimization policy.
390 */
391 static int
392 swap_pager_getswapspace(object, amount, rtval)
393 vm_object_t object;
394 unsigned int amount;
395 daddr_t *rtval;
396 {
397 unsigned location;
398
399 vm_swap_size -= amount;
400
401 if (!rlist_alloc(&swaplist, amount, &location)) {
402 vm_swap_size += amount;
403 return 0;
404 } else {
405 swapsizecheck();
406 object->un_pager.swp.swp_allocsize += amount;
407 *rtval = location;
408 return 1;
409 }
410 }
411
412 /*
413 * this routine frees swap space with a fragmentation
414 * minimization policy.
415 */
416 static void
417 swap_pager_freeswapspace(object, from, to)
418 vm_object_t object;
419 unsigned int from;
420 unsigned int to;
421 {
422 rlist_free(&swaplist, from, to);
423 vm_swap_size += (to - from) + 1;
424 object->un_pager.swp.swp_allocsize -= (to - from) + 1;
425 swapsizecheck();
426 }
427 /*
428 * this routine frees swap blocks from a specified pager
429 */
430 void
431 swap_pager_freespace(object, start, size)
432 vm_object_t object;
433 vm_pindex_t start;
434 vm_size_t size;
435 {
436 vm_pindex_t i;
437 int s;
438
439 s = splvm();
440 for (i = start; i < start + size; i += 1) {
441 int valid;
442 daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
443
444 if (addr && *addr != SWB_EMPTY) {
445 swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
446 if (valid) {
447 swap_pager_setvalid(object, i, 0);
448 }
449 *addr = SWB_EMPTY;
450 }
451 }
452 splx(s);
453 }
454
455 /*
456 * same as freespace, but don't free, just force a DMZ next time
457 */
458 void
459 swap_pager_dmzspace(object, start, size)
460 vm_object_t object;
461 vm_pindex_t start;
462 vm_size_t size;
463 {
464 vm_pindex_t i;
465 int s;
466
467 s = splvm();
468 for (i = start; i < start + size; i += 1) {
469 int valid;
470 daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
471
472 if (addr && *addr != SWB_EMPTY) {
473 if (valid) {
474 swap_pager_setvalid(object, i, 0);
475 }
476 }
477 }
478 splx(s);
479 }
480
481 static void
482 swap_pager_free_swap(object)
483 vm_object_t object;
484 {
485 register int i, j;
486 register sw_blk_t swb;
487 int first_block=0, block_count=0;
488 int s;
489 /*
490 * Free left over swap blocks
491 */
492 swb = object->un_pager.swp.swp_blocks;
493 if (swb == NULL) {
494 return;
495 }
496
497 s = splvm();
498 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++, swb++) {
499 for (j = 0; j < SWB_NPAGES; j++) {
500 if (swb->swb_block[j] != SWB_EMPTY) {
501 /*
502 * initially the length of the run is zero
503 */
504 if (block_count == 0) {
505 first_block = swb->swb_block[j];
506 block_count = btodb(PAGE_SIZE);
507 swb->swb_block[j] = SWB_EMPTY;
508 /*
509 * if the new block can be included into the current run
510 */
511 } else if (swb->swb_block[j] == first_block + block_count) {
512 block_count += btodb(PAGE_SIZE);
513 swb->swb_block[j] = SWB_EMPTY;
514 /*
515 * terminate the previous run, and start a new one
516 */
517 } else {
518 swap_pager_freeswapspace(object, first_block,
519 (unsigned) first_block + block_count - 1);
520 first_block = swb->swb_block[j];
521 block_count = btodb(PAGE_SIZE);
522 swb->swb_block[j] = SWB_EMPTY;
523 }
524 }
525 }
526 }
527
528 if (block_count) {
529 swap_pager_freeswapspace(object, first_block,
530 (unsigned) first_block + block_count - 1);
531 }
532 splx(s);
533 }
534
535
536 /*
537 * swap_pager_reclaim frees up over-allocated space from all pagers
538 * this eliminates internal fragmentation due to allocation of space
539 * for segments that are never swapped to. It has been written so that
540 * it does not block until the rlist_free operation occurs; it keeps
541 * the queues consistant.
542 */
543
544 /*
545 * Maximum number of blocks (pages) to reclaim per pass
546 */
547 #define MAXRECLAIM 128
548
549 static void
550 swap_pager_reclaim()
551 {
552 vm_object_t object;
553 int i, j, k;
554 int s;
555 int reclaimcount;
556 static struct {
557 int address;
558 vm_object_t object;
559 } reclaims[MAXRECLAIM];
560 static int in_reclaim;
561
562 /*
563 * allow only one process to be in the swap_pager_reclaim subroutine
564 */
565 s = splvm();
566 if (in_reclaim) {
567 tsleep(&in_reclaim, PSWP, "swrclm", 0);
568 splx(s);
569 return;
570 }
571 in_reclaim = 1;
572 reclaimcount = 0;
573
574 /* for each pager queue */
575 for (k = 0; swp_qs[k]; k++) {
576
577 object = TAILQ_FIRST(swp_qs[k]);
578 while (object && (reclaimcount < MAXRECLAIM)) {
579
580 /*
581 * see if any blocks associated with a pager has been
582 * allocated but not used (written)
583 */
584 if ((object->flags & OBJ_DEAD) == 0 &&
585 (object->paging_in_progress == 0)) {
586 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
587 sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
588
589 if (swb->swb_locked)
590 continue;
591 for (j = 0; j < SWB_NPAGES; j++) {
592 if (swb->swb_block[j] != SWB_EMPTY &&
593 (swb->swb_valid & (1 << j)) == 0) {
594 reclaims[reclaimcount].address = swb->swb_block[j];
595 reclaims[reclaimcount++].object = object;
596 swb->swb_block[j] = SWB_EMPTY;
597 if (reclaimcount >= MAXRECLAIM)
598 goto rfinished;
599 }
600 }
601 }
602 }
603 object = TAILQ_NEXT(object, pager_object_list);
604 }
605 }
606
607 rfinished:
608
609 /*
610 * free the blocks that have been added to the reclaim list
611 */
612 for (i = 0; i < reclaimcount; i++) {
613 swap_pager_freeswapspace(reclaims[i].object,
614 reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
615 }
616 splx(s);
617 in_reclaim = 0;
618 wakeup(&in_reclaim);
619 }
620
621
622 /*
623 * swap_pager_copy copies blocks from one pager to another and
624 * destroys the source pager
625 */
626
627 void
628 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset,
629 offset, destroysource)
630 vm_object_t srcobject;
631 vm_pindex_t srcoffset;
632 vm_object_t dstobject;
633 vm_pindex_t dstoffset;
634 vm_pindex_t offset;
635 int destroysource;
636 {
637 vm_pindex_t i;
638 int origsize;
639 int s;
640
641 if (vm_swap_size)
642 no_swap_space = 0;
643
644 origsize = srcobject->un_pager.swp.swp_allocsize;
645
646 /*
647 * remove the source object from the swap_pager internal queue
648 */
649 if (destroysource) {
650 if (srcobject->handle == NULL) {
651 TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
652 } else {
653 TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
654 }
655 }
656
657 s = splvm();
658 while (srcobject->un_pager.swp.swp_poip) {
659 tsleep(srcobject, PVM, "spgout", 0);
660 }
661
662 /*
663 * clean all of the pages that are currently active and finished
664 */
665 if (swap_pager_free_pending)
666 swap_pager_sync();
667
668 /*
669 * transfer source to destination
670 */
671 for (i = 0; i < dstobject->size; i += 1) {
672 int srcvalid, dstvalid;
673 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject,
674 i + offset + srcoffset, &srcvalid);
675 daddr_t *dstaddrp;
676
677 /*
678 * see if the source has space allocated
679 */
680 if (srcaddrp && *srcaddrp != SWB_EMPTY) {
681 /*
682 * if the source is valid and the dest has no space,
683 * then copy the allocation from the srouce to the
684 * dest.
685 */
686 if (srcvalid) {
687 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
688 &dstvalid);
689 /*
690 * if the dest already has a valid block,
691 * deallocate the source block without
692 * copying.
693 */
694 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
695 swap_pager_freeswapspace(dstobject, *dstaddrp,
696 *dstaddrp + btodb(PAGE_SIZE) - 1);
697 *dstaddrp = SWB_EMPTY;
698 }
699 if (dstaddrp && *dstaddrp == SWB_EMPTY) {
700 *dstaddrp = *srcaddrp;
701 *srcaddrp = SWB_EMPTY;
702 dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
703 srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
704 swap_pager_setvalid(dstobject, i + dstoffset, 1);
705 }
706 }
707 /*
708 * if the source is not empty at this point, then
709 * deallocate the space.
710 */
711 if (*srcaddrp != SWB_EMPTY) {
712 swap_pager_freeswapspace(srcobject, *srcaddrp,
713 *srcaddrp + btodb(PAGE_SIZE) - 1);
714 *srcaddrp = SWB_EMPTY;
715 }
716 }
717 }
718 splx(s);
719
720 /*
721 * Free left over swap blocks
722 */
723 if (destroysource) {
724 swap_pager_free_swap(srcobject);
725
726 if (srcobject->un_pager.swp.swp_allocsize) {
727 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
728 srcobject->un_pager.swp.swp_allocsize, origsize);
729 }
730
731 free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
732 srcobject->un_pager.swp.swp_blocks = NULL;
733 }
734 return;
735 }
736
737 static void
738 swap_pager_dealloc(object)
739 vm_object_t object;
740 {
741 int s;
742 sw_blk_t swb;
743
744 /*
745 * Remove from list right away so lookups will fail if we block for
746 * pageout completion.
747 */
748 if (object->handle == NULL) {
749 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
750 } else {
751 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
752 }
753
754 /*
755 * Wait for all pageouts to finish and remove all entries from
756 * cleaning list.
757 */
758
759 s = splvm();
760 while (object->un_pager.swp.swp_poip) {
761 tsleep(object, PVM, "swpout", 0);
762 }
763 splx(s);
764
765 if (swap_pager_free_pending)
766 swap_pager_sync();
767
768 /*
769 * Free left over swap blocks
770 */
771 swap_pager_free_swap(object);
772
773 if (object->un_pager.swp.swp_allocsize) {
774 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
775 object->un_pager.swp.swp_allocsize);
776 }
777 swb = object->un_pager.swp.swp_blocks;
778 if (swb) {
779 /*
780 * Free swap management resources
781 */
782 free(swb, M_VMPGDATA);
783 object->un_pager.swp.swp_blocks = NULL;
784 }
785 }
786
787 static __inline int
788 swap_pager_block_index(pindex)
789 vm_pindex_t pindex;
790 {
791 return (pindex / SWB_NPAGES);
792 }
793
794 static __inline int
795 swap_pager_block_offset(pindex)
796 vm_pindex_t pindex;
797 {
798 return (pindex % SWB_NPAGES);
799 }
800
801 /*
802 * swap_pager_haspage returns TRUE if the pager has data that has
803 * been written out.
804 */
805 static boolean_t
806 swap_pager_haspage(object, pindex, before, after)
807 vm_object_t object;
808 vm_pindex_t pindex;
809 int *before;
810 int *after;
811 {
812 register sw_blk_t swb;
813 int ix;
814
815 if (before != NULL)
816 *before = 0;
817 if (after != NULL)
818 *after = 0;
819 ix = pindex / SWB_NPAGES;
820 if (ix >= object->un_pager.swp.swp_nblocks) {
821 return (FALSE);
822 }
823 swb = &object->un_pager.swp.swp_blocks[ix];
824 ix = pindex % SWB_NPAGES;
825
826 if (swb->swb_block[ix] != SWB_EMPTY) {
827
828 if (swb->swb_valid & (1 << ix)) {
829 int tix;
830 if (before) {
831 for(tix = ix - 1; tix >= 0; --tix) {
832 if ((swb->swb_valid & (1 << tix)) == 0)
833 break;
834 if ((swb->swb_block[tix] +
835 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
836 swb->swb_block[ix])
837 break;
838 (*before)++;
839 }
840 }
841
842 if (after) {
843 for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
844 if ((swb->swb_valid & (1 << tix)) == 0)
845 break;
846 if ((swb->swb_block[tix] -
847 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
848 swb->swb_block[ix])
849 break;
850 (*after)++;
851 }
852 }
853
854 return TRUE;
855 }
856 }
857 return (FALSE);
858 }
859
860 /*
861 * Wakeup based upon spc state
862 */
863 static void
864 spc_wakeup(void)
865 {
866 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
867 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
868 wakeup(&swap_pager_needflags);
869 } else if ((swap_pager_needflags & SWAP_FREE_NEEDED) &&
870 swap_pager_free_count >= ((2 * npendingio) / 3)) {
871 swap_pager_needflags &= ~SWAP_FREE_NEEDED;
872 wakeup(&swap_pager_free);
873 }
874 }
875
876 /*
877 * Free an spc structure
878 */
879 static void
880 spc_free(spc)
881 swp_clean_t spc;
882 {
883 spc->spc_flags = 0;
884 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
885 swap_pager_free_count++;
886 if (swap_pager_needflags) {
887 spc_wakeup();
888 }
889 }
890
891 /*
892 * swap_pager_ridpages is a convienience routine that deallocates all
893 * but the required page. this is usually used in error returns that
894 * need to invalidate the "extra" readahead pages.
895 */
896 static void
897 swap_pager_ridpages(m, count, reqpage)
898 vm_page_t *m;
899 int count;
900 int reqpage;
901 {
902 int i;
903
904 for (i = 0; i < count; i++) {
905 if (i != reqpage) {
906 vm_page_free(m[i]);
907 }
908 }
909 }
910
911 /*
912 * swap_pager_iodone1 is the completion routine for both reads and async writes
913 */
914 static void
915 swap_pager_iodone1(bp)
916 struct buf *bp;
917 {
918 bp->b_flags |= B_DONE;
919 bp->b_flags &= ~B_ASYNC;
920 wakeup(bp);
921 }
922
923 static int
924 swap_pager_getpages(object, m, count, reqpage)
925 vm_object_t object;
926 vm_page_t *m;
927 int count, reqpage;
928 {
929 register struct buf *bp;
930 sw_blk_t swb[count];
931 register int s;
932 int i;
933 boolean_t rv;
934 vm_offset_t kva, off[count];
935 vm_pindex_t paging_offset;
936 int reqaddr[count];
937 int sequential;
938
939 int first, last;
940 int failed;
941 int reqdskregion;
942
943 object = m[reqpage]->object;
944 paging_offset = OFF_TO_IDX(object->paging_offset);
945 sequential = (m[reqpage]->pindex == (object->last_read + 1));
946
947 for (i = 0; i < count; i++) {
948 vm_pindex_t fidx = m[i]->pindex + paging_offset;
949 int ix = swap_pager_block_index(fidx);
950
951 if (ix >= object->un_pager.swp.swp_nblocks) {
952 int j;
953
954 if (i <= reqpage) {
955 swap_pager_ridpages(m, count, reqpage);
956 return (VM_PAGER_FAIL);
957 }
958 for (j = i; j < count; j++) {
959 vm_page_free(m[j]);
960 }
961 count = i;
962 break;
963 }
964 swb[i] = &object->un_pager.swp.swp_blocks[ix];
965 off[i] = swap_pager_block_offset(fidx);
966 reqaddr[i] = swb[i]->swb_block[off[i]];
967 }
968
969 /* make sure that our required input request is existant */
970
971 if (reqaddr[reqpage] == SWB_EMPTY ||
972 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
973 swap_pager_ridpages(m, count, reqpage);
974 return (VM_PAGER_FAIL);
975 }
976 reqdskregion = reqaddr[reqpage] / dmmax;
977
978 /*
979 * search backwards for the first contiguous page to transfer
980 */
981 failed = 0;
982 first = 0;
983 for (i = reqpage - 1; i >= 0; --i) {
984 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
985 (swb[i]->swb_valid & (1 << off[i])) == 0 ||
986 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
987 ((reqaddr[i] / dmmax) != reqdskregion)) {
988 failed = 1;
989 vm_page_free(m[i]);
990 if (first == 0)
991 first = i + 1;
992 }
993 }
994 /*
995 * search forwards for the last contiguous page to transfer
996 */
997 failed = 0;
998 last = count;
999 for (i = reqpage + 1; i < count; i++) {
1000 if (failed || (reqaddr[i] == SWB_EMPTY) ||
1001 (swb[i]->swb_valid & (1 << off[i])) == 0 ||
1002 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
1003 ((reqaddr[i] / dmmax) != reqdskregion)) {
1004 failed = 1;
1005 vm_page_free(m[i]);
1006 if (last == count)
1007 last = i;
1008 }
1009 }
1010
1011 count = last;
1012 if (first != 0) {
1013 for (i = first; i < count; i++) {
1014 m[i - first] = m[i];
1015 reqaddr[i - first] = reqaddr[i];
1016 off[i - first] = off[i];
1017 }
1018 count -= first;
1019 reqpage -= first;
1020 }
1021 ++swb[reqpage]->swb_locked;
1022
1023 /*
1024 * at this point: "m" is a pointer to the array of vm_page_t for
1025 * paging I/O "count" is the number of vm_page_t entries represented
1026 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
1027 * into "m" for the page actually faulted
1028 */
1029
1030 /*
1031 * Get a swap buffer header to perform the IO
1032 */
1033 bp = getpbuf();
1034 kva = (vm_offset_t) bp->b_data;
1035
1036 /*
1037 * map our page(s) into kva for input
1038 */
1039 pmap_qenter(kva, m, count);
1040
1041 bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
1042 bp->b_iodone = swap_pager_iodone1;
1043 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
1044 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1045 crhold(bp->b_rcred);
1046 crhold(bp->b_wcred);
1047 bp->b_data = (caddr_t) kva;
1048 bp->b_blkno = reqaddr[0];
1049 bp->b_bcount = PAGE_SIZE * count;
1050 bp->b_bufsize = PAGE_SIZE * count;
1051
1052 pbgetvp(swapdev_vp, bp);
1053
1054 cnt.v_swapin++;
1055 cnt.v_swappgsin += count;
1056 /*
1057 * perform the I/O
1058 */
1059 VOP_STRATEGY(bp->b_vp, bp);
1060
1061 /*
1062 * wait for the sync I/O to complete
1063 */
1064 s = splvm();
1065 while ((bp->b_flags & B_DONE) == 0) {
1066 if (tsleep(bp, PVM, "swread", hz*20)) {
1067 printf(
1068 "swap_pager: indefinite wait buffer: device: %#lx, blkno: %ld, size: %ld\n",
1069 (u_long)bp->b_dev, (long)bp->b_blkno,
1070 (long)bp->b_bcount);
1071 }
1072 }
1073
1074 if (bp->b_flags & B_ERROR) {
1075 printf(
1076 "swap_pager: I/O error - pagein failed; blkno %ld, size %ld, error %d\n",
1077 (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error);
1078 rv = VM_PAGER_ERROR;
1079 } else {
1080 rv = VM_PAGER_OK;
1081 }
1082
1083 splx(s);
1084 swb[reqpage]->swb_locked--;
1085
1086 /*
1087 * remove the mapping for kernel virtual
1088 */
1089 pmap_qremove(kva, count);
1090
1091 /*
1092 * release the physical I/O buffer
1093 */
1094 relpbuf(bp);
1095 /*
1096 * finish up input if everything is ok
1097 */
1098 if (rv == VM_PAGER_OK) {
1099 for (i = 0; i < count; i++) {
1100 m[i]->dirty = 0;
1101 vm_page_flag_clear(m[i], PG_ZERO);
1102 if (i != reqpage) {
1103 /*
1104 * whether or not to leave the page
1105 * activated is up in the air, but we
1106 * should put the page on a page queue
1107 * somewhere. (it already is in the
1108 * object). After some emperical
1109 * results, it is best to deactivate
1110 * the readahead pages.
1111 */
1112 vm_page_deactivate(m[i]);
1113
1114 /*
1115 * just in case someone was asking for
1116 * this page we now tell them that it
1117 * is ok to use
1118 */
1119 m[i]->valid = VM_PAGE_BITS_ALL;
1120 vm_page_wakeup(m[i]);
1121 }
1122 }
1123
1124 m[reqpage]->object->last_read = m[count-1]->pindex;
1125 } else {
1126 swap_pager_ridpages(m, count, reqpage);
1127 }
1128 return (rv);
1129 }
1130
1131 int
1132 swap_pager_putpages(object, m, count, sync, rtvals)
1133 vm_object_t object;
1134 vm_page_t *m;
1135 int count;
1136 boolean_t sync;
1137 int *rtvals;
1138 {
1139 register struct buf *bp;
1140 sw_blk_t swb[count];
1141 register int s;
1142 int i, j, ix, firstidx, lastidx;
1143 boolean_t rv;
1144 vm_offset_t kva, off, fidx;
1145 swp_clean_t spc;
1146 vm_pindex_t paging_pindex;
1147 int reqaddr[count];
1148 int failed;
1149
1150 if (vm_swap_size)
1151 no_swap_space = 0;
1152
1153 if (no_swap_space) {
1154 for (i = 0; i < count; i++)
1155 rtvals[i] = VM_PAGER_FAIL;
1156 return VM_PAGER_FAIL;
1157 }
1158
1159 if (curproc != pageproc)
1160 sync = TRUE;
1161
1162 object = m[0]->object;
1163 paging_pindex = OFF_TO_IDX(object->paging_offset);
1164
1165 failed = 0;
1166 for (j = 0; j < count; j++) {
1167 fidx = m[j]->pindex + paging_pindex;
1168 ix = swap_pager_block_index(fidx);
1169 swb[j] = 0;
1170 if (ix >= object->un_pager.swp.swp_nblocks) {
1171 rtvals[j] = VM_PAGER_FAIL;
1172 failed = 1;
1173 continue;
1174 } else {
1175 rtvals[j] = VM_PAGER_OK;
1176 }
1177 swb[j] = &object->un_pager.swp.swp_blocks[ix];
1178 swb[j]->swb_locked++;
1179 if (failed) {
1180 rtvals[j] = VM_PAGER_FAIL;
1181 continue;
1182 }
1183 off = swap_pager_block_offset(fidx);
1184 reqaddr[j] = swb[j]->swb_block[off];
1185 if (reqaddr[j] == SWB_EMPTY) {
1186 daddr_t blk;
1187 int tries;
1188 int ntoget;
1189
1190 tries = 0;
1191 s = splvm();
1192
1193 /*
1194 * if any other pages have been allocated in this
1195 * block, we only try to get one page.
1196 */
1197 for (i = 0; i < SWB_NPAGES; i++) {
1198 if (swb[j]->swb_block[i] != SWB_EMPTY)
1199 break;
1200 }
1201
1202 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
1203 /*
1204 * this code is alittle conservative, but works (the
1205 * intent of this code is to allocate small chunks for
1206 * small objects)
1207 */
1208 if ((off == 0) && ((fidx + ntoget) > object->size)) {
1209 ntoget = object->size - fidx;
1210 }
1211 retrygetspace:
1212 if (!swap_pager_full && ntoget > 1 &&
1213 swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
1214 &blk)) {
1215
1216 for (i = 0; i < ntoget; i++) {
1217 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
1218 swb[j]->swb_valid = 0;
1219 }
1220
1221 reqaddr[j] = swb[j]->swb_block[off];
1222 } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
1223 &swb[j]->swb_block[off])) {
1224 /*
1225 * if the allocation has failed, we try to
1226 * reclaim space and retry.
1227 */
1228 if (++tries == 1) {
1229 swap_pager_reclaim();
1230 goto retrygetspace;
1231 }
1232 rtvals[j] = VM_PAGER_AGAIN;
1233 failed = 1;
1234 swap_pager_full = 1;
1235 } else {
1236 reqaddr[j] = swb[j]->swb_block[off];
1237 swb[j]->swb_valid &= ~(1 << off);
1238 }
1239 splx(s);
1240 }
1241 }
1242
1243 /*
1244 * search forwards for the last contiguous page to transfer
1245 */
1246 failed = 0;
1247 for (i = 0; i < count; i++) {
1248 if (failed ||
1249 (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
1250 ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
1251 (rtvals[i] != VM_PAGER_OK)) {
1252 failed = 1;
1253 if (rtvals[i] == VM_PAGER_OK)
1254 rtvals[i] = VM_PAGER_AGAIN;
1255 }
1256 }
1257
1258 ix = 0;
1259 firstidx = -1;
1260 for (i = 0; i < count; i++) {
1261 if (rtvals[i] == VM_PAGER_OK) {
1262 ix++;
1263 if (firstidx == -1) {
1264 firstidx = i;
1265 }
1266 } else if (firstidx >= 0) {
1267 break;
1268 }
1269 }
1270
1271 if (firstidx == -1) {
1272 for (i = 0; i < count; i++) {
1273 if (rtvals[i] == VM_PAGER_OK)
1274 rtvals[i] = VM_PAGER_AGAIN;
1275 }
1276 return VM_PAGER_AGAIN;
1277 }
1278
1279 lastidx = firstidx + ix;
1280
1281 if (ix > max_pageout_cluster) {
1282 for (i = firstidx + max_pageout_cluster; i < lastidx; i++) {
1283 if (rtvals[i] == VM_PAGER_OK)
1284 rtvals[i] = VM_PAGER_AGAIN;
1285 }
1286 ix = max_pageout_cluster;
1287 lastidx = firstidx + ix;
1288 }
1289
1290 for (i = 0; i < firstidx; i++) {
1291 if (swb[i])
1292 swb[i]->swb_locked--;
1293 }
1294
1295 for (i = lastidx; i < count; i++) {
1296 if (swb[i])
1297 swb[i]->swb_locked--;
1298 }
1299
1300 #ifdef INVARIANTS
1301 for (i = firstidx; i < lastidx; i++) {
1302 if (reqaddr[i] == SWB_EMPTY) {
1303 printf("I/O to empty block???? -- pindex: %d, i: %d\n",
1304 m[i]->pindex, i);
1305 }
1306 }
1307 #endif
1308
1309 /*
1310 * Clean up all completed async pageouts.
1311 */
1312 if (swap_pager_free_pending)
1313 swap_pager_sync();
1314
1315 /*
1316 * get a swap pager clean data structure, block until we get it
1317 */
1318 if (curproc == pageproc) {
1319 if (swap_pager_free_count == 0) {
1320 s = splvm();
1321 while (swap_pager_free_count == 0) {
1322 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT;
1323 /*
1324 * if it does not get one within a short time, then
1325 * there is a potential deadlock, so we go-on trying
1326 * to free pages. It is important to block here as opposed
1327 * to returning, thereby allowing the pageout daemon to continue.
1328 * It is likely that pageout daemon will start suboptimally
1329 * reclaiming vnode backed pages if we don't block. Since the
1330 * I/O subsystem is probably already fully utilized, might as
1331 * well wait.
1332 */
1333 if (tsleep(&swap_pager_needflags, PVM-1, "swpfre", hz/2)) {
1334 if (swap_pager_free_pending)
1335 swap_pager_sync();
1336 if (swap_pager_free_count == 0) {
1337 for (i = firstidx; i < lastidx; i++) {
1338 rtvals[i] = VM_PAGER_AGAIN;
1339 }
1340 splx(s);
1341 return VM_PAGER_AGAIN;
1342 }
1343 } else {
1344 swap_pager_sync();
1345 }
1346 }
1347 splx(s);
1348 }
1349
1350 spc = TAILQ_FIRST(&swap_pager_free);
1351 KASSERT(spc != NULL,
1352 ("swap_pager_putpages: free queue is empty, %d expected\n",
1353 swap_pager_free_count));
1354 TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1355 swap_pager_free_count--;
1356
1357 kva = spc->spc_kva;
1358 bp = spc->spc_bp;
1359 bzero(bp, sizeof *bp);
1360 bp->b_spc = spc;
1361 bp->b_xflags = 0;
1362 bp->b_data = (caddr_t) kva;
1363 } else {
1364 spc = NULL;
1365 bp = getpbuf();
1366 kva = (vm_offset_t) bp->b_data;
1367 bp->b_spc = NULL;
1368 }
1369
1370 /*
1371 * map our page(s) into kva for I/O
1372 */
1373 pmap_qenter(kva, &m[firstidx], ix);
1374
1375 /*
1376 * get the base I/O offset into the swap file
1377 */
1378 for (i = firstidx; i < lastidx ; i++) {
1379 fidx = m[i]->pindex + paging_pindex;
1380 off = swap_pager_block_offset(fidx);
1381 /*
1382 * set the valid bit
1383 */
1384 swb[i]->swb_valid |= (1 << off);
1385 /*
1386 * and unlock the data structure
1387 */
1388 swb[i]->swb_locked--;
1389 }
1390
1391 bp->b_flags = B_BUSY | B_PAGING;
1392 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
1393 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1394 if (bp->b_rcred != NOCRED)
1395 crhold(bp->b_rcred);
1396 if (bp->b_wcred != NOCRED)
1397 crhold(bp->b_wcred);
1398 bp->b_blkno = reqaddr[firstidx];
1399 pbgetvp(swapdev_vp, bp);
1400
1401 bp->b_bcount = PAGE_SIZE * ix;
1402 bp->b_bufsize = PAGE_SIZE * ix;
1403
1404 s = splvm();
1405 swapdev_vp->v_numoutput++;
1406
1407 /*
1408 * If this is an async write we set up additional buffer fields and
1409 * place a "cleaning" entry on the inuse queue.
1410 */
1411 object->un_pager.swp.swp_poip++;
1412
1413 if (spc) {
1414 spc->spc_flags = 0;
1415 spc->spc_object = object;
1416 bp->b_npages = ix;
1417 for (i = firstidx; i < lastidx; i++) {
1418 spc->spc_m[i] = m[i];
1419 bp->b_pages[i - firstidx] = m[i];
1420 vm_page_protect(m[i], VM_PROT_READ);
1421 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1422 m[i]->dirty = 0;
1423 }
1424 spc->spc_first = firstidx;
1425 spc->spc_count = ix;
1426 /*
1427 * the completion routine for async writes
1428 */
1429 bp->b_flags |= B_CALL;
1430 bp->b_iodone = swap_pager_iodone;
1431 bp->b_dirtyoff = 0;
1432 bp->b_dirtyend = bp->b_bcount;
1433 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
1434 } else {
1435 bp->b_flags |= B_CALL;
1436 bp->b_iodone = swap_pager_iodone1;
1437 bp->b_npages = ix;
1438 for (i = firstidx; i < lastidx; i++)
1439 bp->b_pages[i - firstidx] = m[i];
1440 }
1441
1442 cnt.v_swapout++;
1443 cnt.v_swappgsout += ix;
1444
1445 /*
1446 * perform the I/O
1447 */
1448 VOP_STRATEGY(bp->b_vp, bp);
1449 if (sync == FALSE) {
1450 if (swap_pager_free_pending) {
1451 swap_pager_sync();
1452 }
1453 for (i = firstidx; i < lastidx; i++) {
1454 rtvals[i] = VM_PAGER_PEND;
1455 }
1456 splx(s);
1457 return VM_PAGER_PEND;
1458 }
1459
1460 /*
1461 * wait for the sync I/O to complete
1462 */
1463 while ((bp->b_flags & B_DONE) == 0) {
1464 tsleep(bp, PVM, "swwrt", 0);
1465 }
1466
1467 if (bp->b_flags & B_ERROR) {
1468 printf(
1469 "swap_pager: I/O error - pageout failed; blkno %ld, size %ld, error %d\n",
1470 (long)bp->b_blkno, (long)bp->b_bcount, bp->b_error);
1471 rv = VM_PAGER_ERROR;
1472 } else {
1473 rv = VM_PAGER_OK;
1474 }
1475
1476 object->un_pager.swp.swp_poip--;
1477 if (object->un_pager.swp.swp_poip == 0)
1478 wakeup(object);
1479
1480 if (bp->b_vp)
1481 pbrelvp(bp);
1482
1483 splx(s);
1484
1485 /*
1486 * remove the mapping for kernel virtual
1487 */
1488 pmap_qremove(kva, ix);
1489
1490 /*
1491 * if we have written the page, then indicate that the page is clean.
1492 */
1493 if (rv == VM_PAGER_OK) {
1494 for (i = firstidx; i < lastidx; i++) {
1495 if (rtvals[i] == VM_PAGER_OK) {
1496 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1497 m[i]->dirty = 0;
1498 /*
1499 * optimization, if a page has been read
1500 * during the pageout process, we activate it.
1501 */
1502 if (((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
1503 pmap_ts_referenced(VM_PAGE_TO_PHYS(m[i])))) {
1504 vm_page_activate(m[i]);
1505 }
1506 }
1507 }
1508 } else {
1509 for (i = firstidx; i < lastidx; i++) {
1510 rtvals[i] = rv;
1511 }
1512 }
1513
1514 if (spc != NULL) {
1515 if (bp->b_rcred != NOCRED)
1516 crfree(bp->b_rcred);
1517 if (bp->b_wcred != NOCRED)
1518 crfree(bp->b_wcred);
1519 spc_free(spc);
1520 } else
1521 relpbuf(bp);
1522 if (swap_pager_free_pending)
1523 swap_pager_sync();
1524
1525 return (rv);
1526 }
1527
1528 void
1529 swap_pager_sync()
1530 {
1531 swp_clean_t spc;
1532
1533 while (spc = TAILQ_FIRST(&swap_pager_done)) {
1534 swap_pager_finish(spc);
1535 }
1536 return;
1537 }
1538
1539 static void
1540 swap_pager_finish(spc)
1541 register swp_clean_t spc;
1542 {
1543 int i, s, lastidx;
1544 vm_object_t object;
1545 vm_page_t *ma;
1546
1547 ma = spc->spc_m;
1548 object = spc->spc_object;
1549 lastidx = spc->spc_first + spc->spc_count;
1550
1551 s = splvm();
1552 TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1553 splx(s);
1554
1555 pmap_qremove(spc->spc_kva, spc->spc_count);
1556
1557 /*
1558 * If no error, mark as clean and inform the pmap system. If error,
1559 * mark as dirty so we will try again. (XXX could get stuck doing
1560 * this, should give up after awhile)
1561 */
1562 if (spc->spc_flags & SPC_ERROR) {
1563
1564 for (i = spc->spc_first; i < lastidx; i++) {
1565 printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
1566 (u_long) VM_PAGE_TO_PHYS(ma[i]));
1567 ma[i]->dirty = VM_PAGE_BITS_ALL;
1568 vm_page_io_finish(ma[i]);
1569 }
1570
1571 vm_object_pip_subtract(object, spc->spc_count);
1572 if ((object->paging_in_progress == 0) &&
1573 (object->flags & OBJ_PIPWNT)) {
1574 vm_object_clear_flag(object, OBJ_PIPWNT);
1575 wakeup(object);
1576 }
1577
1578 } else {
1579 for (i = spc->spc_first; i < lastidx; i++) {
1580 if ((ma[i]->queue != PQ_ACTIVE) &&
1581 ((ma[i]->flags & PG_WANTED) ||
1582 pmap_ts_referenced(VM_PAGE_TO_PHYS(ma[i])))) {
1583 vm_page_activate(ma[i]);
1584 }
1585 }
1586 }
1587
1588 nswiodone -= spc->spc_count;
1589 swap_pager_free_pending--;
1590 spc_free(spc);
1591
1592 return;
1593 }
1594
1595 /*
1596 * swap_pager_iodone
1597 */
1598 static void
1599 swap_pager_iodone(bp)
1600 register struct buf *bp;
1601 {
1602 int i, s, lastidx;
1603 register swp_clean_t spc;
1604 vm_object_t object;
1605 vm_page_t *ma;
1606
1607
1608 s = splvm();
1609 spc = (swp_clean_t) bp->b_spc;
1610 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
1611 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1612
1613 object = spc->spc_object;
1614
1615 #if defined(DIAGNOSTIC)
1616 if (object->paging_in_progress < spc->spc_count)
1617 printf("swap_pager_iodone: paging_in_progress(%d) < spc_count(%d)\n",
1618 object->paging_in_progress, spc->spc_count);
1619 #endif
1620
1621 if (bp->b_flags & B_ERROR) {
1622 spc->spc_flags |= SPC_ERROR;
1623 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
1624 (bp->b_flags & B_READ) ? "pagein" : "pageout",
1625 (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
1626 } else {
1627 vm_object_pip_subtract(object, spc->spc_count);
1628 if ((object->paging_in_progress == 0) &&
1629 (object->flags & OBJ_PIPWNT)) {
1630 vm_object_clear_flag(object, OBJ_PIPWNT);
1631 wakeup(object);
1632 }
1633 ma = spc->spc_m;
1634 lastidx = spc->spc_first + spc->spc_count;
1635 for (i = spc->spc_first; i < lastidx; i++) {
1636 /*
1637 * we wakeup any processes that are waiting on these pages.
1638 */
1639 vm_page_io_finish(ma[i]);
1640 }
1641 }
1642
1643 if (bp->b_vp)
1644 pbrelvp(bp);
1645
1646 if (bp->b_rcred != NOCRED)
1647 crfree(bp->b_rcred);
1648 if (bp->b_wcred != NOCRED)
1649 crfree(bp->b_wcred);
1650
1651 nswiodone += spc->spc_count;
1652 swap_pager_free_pending++;
1653 if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
1654 wakeup(spc->spc_object);
1655 }
1656
1657 if (swap_pager_needflags &&
1658 ((swap_pager_free_count + swap_pager_free_pending) > (npendingio / 2))) {
1659 spc_wakeup();
1660 }
1661
1662 if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) &&
1663 vm_pageout_pages_needed) {
1664 wakeup(&vm_pageout_pages_needed);
1665 vm_pageout_pages_needed = 0;
1666 }
1667
1668 splx(s);
1669 }
Cache object: 2987aaf6361c377d9bf30673a201cf0c
|