FreeBSD/Linux Kernel Cross Reference
sys/vm/swap_pager.c
1 /*
2 * Copyright (c) 1994 John S. Dyson
3 * Copyright (c) 1990 University of Utah.
4 * Copyright (c) 1991, 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * This code is derived from software contributed to Berkeley by
8 * the Systems Programming Group of the University of Utah Computer
9 * Science Department.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 * 3. All advertising materials mentioning features or use of this software
20 * must display the following acknowledgement:
21 * This product includes software developed by the University of
22 * California, Berkeley and its contributors.
23 * 4. Neither the name of the University nor the names of its contributors
24 * may be used to endorse or promote products derived from this software
25 * without specific prior written permission.
26 *
27 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
28 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
29 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
30 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
31 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
32 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
33 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
34 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
35 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
36 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
37 * SUCH DAMAGE.
38 *
39 * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
40 *
41 * @(#)swap_pager.c 8.9 (Berkeley) 3/21/94
42 * $FreeBSD: src/sys/vm/swap_pager.c,v 1.72.2.3 1999/09/05 08:24:19 peter Exp $
43 */
44
45 /*
46 * Quick hack to page to dedicated partition(s).
47 * TODO:
48 * Add multiprocessor locks
49 * Deal with async writes in a better fashion
50 */
51
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/kernel.h>
55 #include <sys/proc.h>
56 #include <sys/buf.h>
57 #include <sys/vnode.h>
58 #include <sys/malloc.h>
59 #include <sys/vmmeter.h>
60
61 #include <miscfs/specfs/specdev.h>
62 #include <sys/rlist.h>
63
64 #include <vm/vm.h>
65 #include <vm/vm_param.h>
66 #include <vm/vm_prot.h>
67 #include <vm/vm_object.h>
68 #include <vm/vm_page.h>
69 #include <vm/vm_pager.h>
70 #include <vm/vm_pageout.h>
71 #include <vm/swap_pager.h>
72 #include <vm/vm_kern.h>
73 #include <vm/vm_extern.h>
74
75 #ifndef NPENDINGIO
76 #define NPENDINGIO 10
77 #endif
78
79 static int nswiodone;
80 int swap_pager_full;
81 extern int vm_swap_size;
82 static int no_swap_space = 1;
83 struct rlisthdr swaplist;
84
85 #define MAX_PAGEOUT_CLUSTER 16
86
87 TAILQ_HEAD(swpclean, swpagerclean);
88
89 typedef struct swpagerclean *swp_clean_t;
90
91 static struct swpagerclean {
92 TAILQ_ENTRY(swpagerclean) spc_list;
93 int spc_flags;
94 struct buf *spc_bp;
95 vm_object_t spc_object;
96 vm_offset_t spc_kva;
97 int spc_first;
98 int spc_count;
99 vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
100 } swcleanlist[NPENDINGIO];
101
102
103 /* spc_flags values */
104 #define SPC_ERROR 0x01
105
106 #define SWB_EMPTY (-1)
107
108 /* list of completed page cleans */
109 static struct swpclean swap_pager_done;
110
111 /* list of pending page cleans */
112 static struct swpclean swap_pager_inuse;
113
114 /* list of free pager clean structs */
115 static struct swpclean swap_pager_free;
116 int swap_pager_free_count;
117
118 /* list of "named" anon region objects */
119 static struct pagerlst swap_pager_object_list;
120
121 /* list of "unnamed" anon region objects */
122 struct pagerlst swap_pager_un_object_list;
123
124 #define SWAP_FREE_NEEDED 0x1 /* need a swap block */
125 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
126 static int swap_pager_needflags;
127
128 static struct pagerlst *swp_qs[] = {
129 &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
130 };
131
132 /*
133 * pagerops for OBJT_SWAP - "swap pager".
134 */
135 static vm_object_t
136 swap_pager_alloc __P((void *handle, vm_size_t size,
137 vm_prot_t prot, vm_ooffset_t offset));
138 static void swap_pager_dealloc __P((vm_object_t object));
139 static boolean_t
140 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
141 int *before, int *after));
142 static int swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
143 static void swap_pager_init __P((void));
144 static void swap_pager_sync __P((void));
145
146 struct pagerops swappagerops = {
147 swap_pager_init,
148 swap_pager_alloc,
149 swap_pager_dealloc,
150 swap_pager_getpages,
151 swap_pager_putpages,
152 swap_pager_haspage,
153 swap_pager_sync
154 };
155
156 static int npendingio = NPENDINGIO;
157 static int dmmin;
158 int dmmax;
159
160 static int swap_pager_block_index __P((vm_pindex_t pindex));
161 static int swap_pager_block_offset __P((vm_pindex_t pindex));
162 static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
163 vm_pindex_t pindex, int *valid));
164 static void swap_pager_finish __P((swp_clean_t spc));
165 static void swap_pager_freepage __P((vm_page_t m));
166 static void swap_pager_free_swap __P((vm_object_t object));
167 static void swap_pager_freeswapspace __P((vm_object_t object,
168 unsigned int from,
169 unsigned int to));
170 static int swap_pager_getswapspace __P((vm_object_t object,
171 unsigned int amount,
172 daddr_t *rtval));
173 static void swap_pager_iodone __P((struct buf *));
174 static void swap_pager_iodone1 __P((struct buf *bp));
175 static void swap_pager_reclaim __P((void));
176 static void swap_pager_ridpages __P((vm_page_t *m, int count,
177 int reqpage));
178 static void swap_pager_setvalid __P((vm_object_t object,
179 vm_offset_t offset, int valid));
180 static void swapsizecheck __P((void));
181
182 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE)))
183
184 static __inline void
185 swapsizecheck()
186 {
187 if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
188 if (swap_pager_full == 0)
189 printf("swap_pager: out of swap space\n");
190 swap_pager_full = 1;
191 } else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
192 swap_pager_full = 0;
193 }
194
195 static void
196 swap_pager_init()
197 {
198 TAILQ_INIT(&swap_pager_object_list);
199 TAILQ_INIT(&swap_pager_un_object_list);
200
201 /*
202 * Initialize clean lists
203 */
204 TAILQ_INIT(&swap_pager_inuse);
205 TAILQ_INIT(&swap_pager_done);
206 TAILQ_INIT(&swap_pager_free);
207 swap_pager_free_count = 0;
208
209 /*
210 * Calculate the swap allocation constants.
211 */
212 dmmin = PAGE_SIZE / DEV_BSIZE;
213 dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
214 }
215
216 void
217 swap_pager_swap_init()
218 {
219 swp_clean_t spc;
220 struct buf *bp;
221 int i;
222
223 /*
224 * kva's are allocated here so that we dont need to keep doing
225 * kmem_alloc pageables at runtime
226 */
227 for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
228 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER);
229 if (!spc->spc_kva) {
230 break;
231 }
232 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
233 if (!spc->spc_bp) {
234 kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
235 break;
236 }
237 spc->spc_flags = 0;
238 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
239 swap_pager_free_count++;
240 }
241 }
242
243 int
244 swap_pager_swp_alloc(object, wait)
245 vm_object_t object;
246 int wait;
247 {
248 sw_blk_t swb;
249 int nblocks;
250 int i, j;
251
252 nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
253 swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
254 if (swb == NULL)
255 return 1;
256
257 for (i = 0; i < nblocks; i++) {
258 swb[i].swb_valid = 0;
259 swb[i].swb_locked = 0;
260 for (j = 0; j < SWB_NPAGES; j++)
261 swb[i].swb_block[j] = SWB_EMPTY;
262 }
263
264 object->un_pager.swp.swp_nblocks = nblocks;
265 object->un_pager.swp.swp_allocsize = 0;
266 object->un_pager.swp.swp_blocks = swb;
267 object->un_pager.swp.swp_poip = 0;
268
269 if (object->handle != NULL) {
270 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
271 } else {
272 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
273 }
274
275 return 0;
276 }
277
278 /*
279 * Allocate an object and associated resources.
280 * Note that if we are called from the pageout daemon (handle == NULL)
281 * we should not wait for memory as it could resulting in deadlock.
282 */
283 static vm_object_t
284 swap_pager_alloc(handle, size, prot, offset)
285 void *handle;
286 register vm_size_t size;
287 vm_prot_t prot;
288 vm_ooffset_t offset;
289 {
290 vm_object_t object;
291
292 /*
293 * If this is a "named" anonymous region, look it up and use the
294 * object if it exists, otherwise allocate a new one.
295 */
296 if (handle) {
297 object = vm_pager_object_lookup(&swap_pager_object_list, handle);
298 if (object != NULL) {
299 vm_object_reference(object);
300 } else {
301 /*
302 * XXX - there is a race condition here. Two processes
303 * can request the same named object simultaneuously,
304 * and if one blocks for memory, the result is a disaster.
305 * Probably quite rare, but is yet another reason to just
306 * rip support of "named anonymous regions" out altogether.
307 */
308 object = vm_object_allocate(OBJT_SWAP,
309 OFF_TO_IDX(offset + PAGE_MASK) + size);
310 object->handle = handle;
311 (void) swap_pager_swp_alloc(object, M_WAITOK);
312 }
313 } else {
314 object = vm_object_allocate(OBJT_SWAP,
315 OFF_TO_IDX(offset + PAGE_MASK) + size);
316 (void) swap_pager_swp_alloc(object, M_WAITOK);
317 }
318
319 return (object);
320 }
321
322 /*
323 * returns disk block associated with pager and offset
324 * additionally, as a side effect returns a flag indicating
325 * if the block has been written
326 */
327
328 __inline static daddr_t *
329 swap_pager_diskaddr(object, pindex, valid)
330 vm_object_t object;
331 vm_pindex_t pindex;
332 int *valid;
333 {
334 register sw_blk_t swb;
335 int ix;
336
337 if (valid)
338 *valid = 0;
339 ix = pindex / SWB_NPAGES;
340 if ((ix >= object->un_pager.swp.swp_nblocks) ||
341 (pindex >= object->size)) {
342 return (FALSE);
343 }
344 swb = &object->un_pager.swp.swp_blocks[ix];
345 ix = pindex % SWB_NPAGES;
346 if (valid)
347 *valid = swb->swb_valid & (1 << ix);
348 return &swb->swb_block[ix];
349 }
350
351 /*
352 * Utility routine to set the valid (written) bit for
353 * a block associated with a pager and offset
354 */
355 static void
356 swap_pager_setvalid(object, offset, valid)
357 vm_object_t object;
358 vm_offset_t offset;
359 int valid;
360 {
361 register sw_blk_t swb;
362 int ix;
363
364 ix = offset / SWB_NPAGES;
365 if (ix >= object->un_pager.swp.swp_nblocks)
366 return;
367
368 swb = &object->un_pager.swp.swp_blocks[ix];
369 ix = offset % SWB_NPAGES;
370 if (valid)
371 swb->swb_valid |= (1 << ix);
372 else
373 swb->swb_valid &= ~(1 << ix);
374 return;
375 }
376
377 /*
378 * this routine allocates swap space with a fragmentation
379 * minimization policy.
380 */
381 static int
382 swap_pager_getswapspace(object, amount, rtval)
383 vm_object_t object;
384 unsigned int amount;
385 daddr_t *rtval;
386 {
387 unsigned location;
388 vm_swap_size -= amount;
389 if (!rlist_alloc(&swaplist, amount, &location)) {
390 vm_swap_size += amount;
391 return 0;
392 } else {
393 swapsizecheck();
394 object->un_pager.swp.swp_allocsize += amount;
395 *rtval = location;
396 return 1;
397 }
398 }
399
400 /*
401 * this routine frees swap space with a fragmentation
402 * minimization policy.
403 */
404 static void
405 swap_pager_freeswapspace(object, from, to)
406 vm_object_t object;
407 unsigned int from;
408 unsigned int to;
409 {
410 rlist_free(&swaplist, from, to);
411 vm_swap_size += (to - from) + 1;
412 object->un_pager.swp.swp_allocsize -= (to - from) + 1;
413 swapsizecheck();
414 }
415 /*
416 * this routine frees swap blocks from a specified pager
417 */
418 void
419 swap_pager_freespace(object, start, size)
420 vm_object_t object;
421 vm_pindex_t start;
422 vm_size_t size;
423 {
424 vm_pindex_t i;
425 int s;
426
427 s = splbio();
428 for (i = start; i < start + size; i += 1) {
429 int valid;
430 daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
431
432 if (addr && *addr != SWB_EMPTY) {
433 swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
434 if (valid) {
435 swap_pager_setvalid(object, i, 0);
436 }
437 *addr = SWB_EMPTY;
438 }
439 }
440 splx(s);
441 }
442
443 /*
444 * same as freespace, but don't free, just force a DMZ next time
445 */
446 void
447 swap_pager_dmzspace(object, start, size)
448 vm_object_t object;
449 vm_pindex_t start;
450 vm_size_t size;
451 {
452 vm_pindex_t i;
453 int s;
454
455 s = splbio();
456 for (i = start; i < start + size; i += 1) {
457 int valid;
458 daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
459
460 if (addr && *addr != SWB_EMPTY) {
461 if (valid) {
462 swap_pager_setvalid(object, i, 0);
463 }
464 }
465 }
466 splx(s);
467 }
468
469 static void
470 swap_pager_free_swap(object)
471 vm_object_t object;
472 {
473 register int i, j;
474 register sw_blk_t swb;
475 int first_block=0, block_count=0;
476 int s;
477 /*
478 * Free left over swap blocks
479 */
480 s = splbio();
481 for (i = 0, swb = object->un_pager.swp.swp_blocks;
482 i < object->un_pager.swp.swp_nblocks; i++, swb++) {
483 for (j = 0; j < SWB_NPAGES; j++) {
484 if (swb->swb_block[j] != SWB_EMPTY) {
485 /*
486 * initially the length of the run is zero
487 */
488 if (block_count == 0) {
489 first_block = swb->swb_block[j];
490 block_count = btodb(PAGE_SIZE);
491 swb->swb_block[j] = SWB_EMPTY;
492 /*
493 * if the new block can be included into the current run
494 */
495 } else if (swb->swb_block[j] == first_block + block_count) {
496 block_count += btodb(PAGE_SIZE);
497 swb->swb_block[j] = SWB_EMPTY;
498 /*
499 * terminate the previous run, and start a new one
500 */
501 } else {
502 swap_pager_freeswapspace(object, first_block,
503 (unsigned) first_block + block_count - 1);
504 first_block = swb->swb_block[j];
505 block_count = btodb(PAGE_SIZE);
506 swb->swb_block[j] = SWB_EMPTY;
507 }
508 }
509 }
510 }
511
512 if (block_count) {
513 swap_pager_freeswapspace(object, first_block,
514 (unsigned) first_block + block_count - 1);
515 }
516 splx(s);
517 }
518
519
520 /*
521 * swap_pager_reclaim frees up over-allocated space from all pagers
522 * this eliminates internal fragmentation due to allocation of space
523 * for segments that are never swapped to. It has been written so that
524 * it does not block until the rlist_free operation occurs; it keeps
525 * the queues consistant.
526 */
527
528 /*
529 * Maximum number of blocks (pages) to reclaim per pass
530 */
531 #define MAXRECLAIM 128
532
533 static void
534 swap_pager_reclaim()
535 {
536 vm_object_t object;
537 int i, j, k;
538 int s;
539 int reclaimcount;
540 static struct {
541 int address;
542 vm_object_t object;
543 } reclaims[MAXRECLAIM];
544 static int in_reclaim;
545
546 /*
547 * allow only one process to be in the swap_pager_reclaim subroutine
548 */
549 s = splbio();
550 if (in_reclaim) {
551 tsleep(&in_reclaim, PSWP, "swrclm", 0);
552 splx(s);
553 return;
554 }
555 in_reclaim = 1;
556 reclaimcount = 0;
557
558 /* for each pager queue */
559 for (k = 0; swp_qs[k]; k++) {
560
561 object = TAILQ_FIRST(swp_qs[k]);
562 while (object && (reclaimcount < MAXRECLAIM)) {
563
564 /*
565 * see if any blocks associated with a pager has been
566 * allocated but not used (written)
567 */
568 if ((object->flags & OBJ_DEAD) == 0 &&
569 (object->paging_in_progress == 0)) {
570 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
571 sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
572
573 if (swb->swb_locked)
574 continue;
575 for (j = 0; j < SWB_NPAGES; j++) {
576 if (swb->swb_block[j] != SWB_EMPTY &&
577 (swb->swb_valid & (1 << j)) == 0) {
578 reclaims[reclaimcount].address = swb->swb_block[j];
579 reclaims[reclaimcount++].object = object;
580 swb->swb_block[j] = SWB_EMPTY;
581 if (reclaimcount >= MAXRECLAIM)
582 goto rfinished;
583 }
584 }
585 }
586 }
587 object = TAILQ_NEXT(object, pager_object_list);
588 }
589 }
590
591 rfinished:
592
593 /*
594 * free the blocks that have been added to the reclaim list
595 */
596 for (i = 0; i < reclaimcount; i++) {
597 swap_pager_freeswapspace(reclaims[i].object,
598 reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
599 }
600 splx(s);
601 in_reclaim = 0;
602 wakeup(&in_reclaim);
603 }
604
605
606 /*
607 * swap_pager_copy copies blocks from one pager to another and
608 * destroys the source pager
609 */
610
611 void
612 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset)
613 vm_object_t srcobject;
614 vm_pindex_t srcoffset;
615 vm_object_t dstobject;
616 vm_pindex_t dstoffset;
617 vm_pindex_t offset;
618 {
619 vm_pindex_t i;
620 int origsize;
621 int s;
622
623 if (vm_swap_size)
624 no_swap_space = 0;
625
626 origsize = srcobject->un_pager.swp.swp_allocsize;
627
628 /*
629 * remove the source object from the swap_pager internal queue
630 */
631 if (srcobject->handle == NULL) {
632 TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
633 } else {
634 TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
635 }
636
637 s = splbio();
638 while (srcobject->un_pager.swp.swp_poip) {
639 tsleep(srcobject, PVM, "spgout", 0);
640 }
641 splx(s);
642
643 /*
644 * clean all of the pages that are currently active and finished
645 */
646 swap_pager_sync();
647
648 s = splbio();
649 /*
650 * transfer source to destination
651 */
652 for (i = 0; i < dstobject->size; i += 1) {
653 int srcvalid, dstvalid;
654 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset,
655 &srcvalid);
656 daddr_t *dstaddrp;
657
658 /*
659 * see if the source has space allocated
660 */
661 if (srcaddrp && *srcaddrp != SWB_EMPTY) {
662 /*
663 * if the source is valid and the dest has no space,
664 * then copy the allocation from the srouce to the
665 * dest.
666 */
667 if (srcvalid) {
668 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
669 &dstvalid);
670 /*
671 * if the dest already has a valid block,
672 * deallocate the source block without
673 * copying.
674 */
675 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
676 swap_pager_freeswapspace(dstobject, *dstaddrp,
677 *dstaddrp + btodb(PAGE_SIZE) - 1);
678 *dstaddrp = SWB_EMPTY;
679 }
680 if (dstaddrp && *dstaddrp == SWB_EMPTY) {
681 *dstaddrp = *srcaddrp;
682 *srcaddrp = SWB_EMPTY;
683 dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
684 srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
685 swap_pager_setvalid(dstobject, i + dstoffset, 1);
686 }
687 }
688 /*
689 * if the source is not empty at this point, then
690 * deallocate the space.
691 */
692 if (*srcaddrp != SWB_EMPTY) {
693 swap_pager_freeswapspace(srcobject, *srcaddrp,
694 *srcaddrp + btodb(PAGE_SIZE) - 1);
695 *srcaddrp = SWB_EMPTY;
696 }
697 }
698 }
699 splx(s);
700
701 /*
702 * Free left over swap blocks
703 */
704 swap_pager_free_swap(srcobject);
705
706 if (srcobject->un_pager.swp.swp_allocsize) {
707 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
708 srcobject->un_pager.swp.swp_allocsize, origsize);
709 }
710
711 free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
712 srcobject->un_pager.swp.swp_blocks = NULL;
713
714 return;
715 }
716
717 static void
718 swap_pager_dealloc(object)
719 vm_object_t object;
720 {
721 int s;
722
723 /*
724 * Remove from list right away so lookups will fail if we block for
725 * pageout completion.
726 */
727 if (object->handle == NULL) {
728 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
729 } else {
730 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
731 }
732
733 /*
734 * Wait for all pageouts to finish and remove all entries from
735 * cleaning list.
736 */
737
738 s = splbio();
739 while (object->un_pager.swp.swp_poip) {
740 tsleep(object, PVM, "swpout", 0);
741 }
742 splx(s);
743
744
745 swap_pager_sync();
746
747 /*
748 * Free left over swap blocks
749 */
750 swap_pager_free_swap(object);
751
752 if (object->un_pager.swp.swp_allocsize) {
753 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
754 object->un_pager.swp.swp_allocsize);
755 }
756 /*
757 * Free swap management resources
758 */
759 free(object->un_pager.swp.swp_blocks, M_VMPGDATA);
760 object->un_pager.swp.swp_blocks = NULL;
761 }
762
763 static __inline int
764 swap_pager_block_index(pindex)
765 vm_pindex_t pindex;
766 {
767 return (pindex / SWB_NPAGES);
768 }
769
770 static __inline int
771 swap_pager_block_offset(pindex)
772 vm_pindex_t pindex;
773 {
774 return (pindex % SWB_NPAGES);
775 }
776
777 /*
778 * swap_pager_haspage returns TRUE if the pager has data that has
779 * been written out.
780 */
781 static boolean_t
782 swap_pager_haspage(object, pindex, before, after)
783 vm_object_t object;
784 vm_pindex_t pindex;
785 int *before;
786 int *after;
787 {
788 register sw_blk_t swb;
789 int ix;
790
791 if (before != NULL)
792 *before = 0;
793 if (after != NULL)
794 *after = 0;
795 ix = pindex / SWB_NPAGES;
796 if (ix >= object->un_pager.swp.swp_nblocks) {
797 return (FALSE);
798 }
799 swb = &object->un_pager.swp.swp_blocks[ix];
800 ix = pindex % SWB_NPAGES;
801
802 if (swb->swb_block[ix] != SWB_EMPTY) {
803
804 if (swb->swb_valid & (1 << ix)) {
805 int tix;
806 if (before) {
807 for(tix = ix - 1; tix >= 0; --tix) {
808 if ((swb->swb_valid & (1 << tix)) == 0)
809 break;
810 if ((swb->swb_block[tix] +
811 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
812 swb->swb_block[ix])
813 break;
814 (*before)++;
815 }
816 }
817
818 if (after) {
819 for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
820 if ((swb->swb_valid & (1 << tix)) == 0)
821 break;
822 if ((swb->swb_block[tix] -
823 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
824 swb->swb_block[ix])
825 break;
826 (*after)++;
827 }
828 }
829
830 return TRUE;
831 }
832 }
833 return (FALSE);
834 }
835
836 /*
837 * swap_pager_freepage is a convienience routine that clears the busy
838 * bit and deallocates a page.
839 */
840 static void
841 swap_pager_freepage(m)
842 vm_page_t m;
843 {
844 PAGE_WAKEUP(m);
845 vm_page_free(m);
846 }
847
848 /*
849 * swap_pager_ridpages is a convienience routine that deallocates all
850 * but the required page. this is usually used in error returns that
851 * need to invalidate the "extra" readahead pages.
852 */
853 static void
854 swap_pager_ridpages(m, count, reqpage)
855 vm_page_t *m;
856 int count;
857 int reqpage;
858 {
859 int i;
860
861 for (i = 0; i < count; i++)
862 if (i != reqpage)
863 swap_pager_freepage(m[i]);
864 }
865
866 /*
867 * swap_pager_iodone1 is the completion routine for both reads and async writes
868 */
869 static void
870 swap_pager_iodone1(bp)
871 struct buf *bp;
872 {
873 bp->b_flags |= B_DONE;
874 bp->b_flags &= ~B_ASYNC;
875 wakeup(bp);
876 }
877
878 static int
879 swap_pager_getpages(object, m, count, reqpage)
880 vm_object_t object;
881 vm_page_t *m;
882 int count, reqpage;
883 {
884 register struct buf *bp;
885 sw_blk_t swb[count];
886 register int s;
887 int i;
888 boolean_t rv;
889 vm_offset_t kva, off[count];
890 swp_clean_t spc;
891 vm_pindex_t paging_offset;
892 int reqaddr[count];
893 int sequential;
894
895 int first, last;
896 int failed;
897 int reqdskregion;
898
899 object = m[reqpage]->object;
900 paging_offset = OFF_TO_IDX(object->paging_offset);
901 sequential = (m[reqpage]->pindex == (object->last_read + 1));
902
903 for (i = 0; i < count; i++) {
904 vm_pindex_t fidx = m[i]->pindex + paging_offset;
905 int ix = swap_pager_block_index(fidx);
906
907 if (ix >= object->un_pager.swp.swp_nblocks) {
908 int j;
909
910 if (i <= reqpage) {
911 swap_pager_ridpages(m, count, reqpage);
912 return (VM_PAGER_FAIL);
913 }
914 for (j = i; j < count; j++) {
915 swap_pager_freepage(m[j]);
916 }
917 count = i;
918 break;
919 }
920 swb[i] = &object->un_pager.swp.swp_blocks[ix];
921 off[i] = swap_pager_block_offset(fidx);
922 reqaddr[i] = swb[i]->swb_block[off[i]];
923 }
924
925 /* make sure that our required input request is existant */
926
927 if (reqaddr[reqpage] == SWB_EMPTY ||
928 (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
929 swap_pager_ridpages(m, count, reqpage);
930 return (VM_PAGER_FAIL);
931 }
932 reqdskregion = reqaddr[reqpage] / dmmax;
933
934 /*
935 * search backwards for the first contiguous page to transfer
936 */
937 failed = 0;
938 first = 0;
939 for (i = reqpage - 1; i >= 0; --i) {
940 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
941 (swb[i]->swb_valid & (1 << off[i])) == 0 ||
942 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
943 ((reqaddr[i] / dmmax) != reqdskregion)) {
944 failed = 1;
945 swap_pager_freepage(m[i]);
946 if (first == 0)
947 first = i + 1;
948 }
949 }
950 /*
951 * search forwards for the last contiguous page to transfer
952 */
953 failed = 0;
954 last = count;
955 for (i = reqpage + 1; i < count; i++) {
956 if (failed || (reqaddr[i] == SWB_EMPTY) ||
957 (swb[i]->swb_valid & (1 << off[i])) == 0 ||
958 (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
959 ((reqaddr[i] / dmmax) != reqdskregion)) {
960 failed = 1;
961 swap_pager_freepage(m[i]);
962 if (last == count)
963 last = i;
964 }
965 }
966
967 count = last;
968 if (first != 0) {
969 for (i = first; i < count; i++) {
970 m[i - first] = m[i];
971 reqaddr[i - first] = reqaddr[i];
972 off[i - first] = off[i];
973 }
974 count -= first;
975 reqpage -= first;
976 }
977 ++swb[reqpage]->swb_locked;
978
979 /*
980 * at this point: "m" is a pointer to the array of vm_page_t for
981 * paging I/O "count" is the number of vm_page_t entries represented
982 * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
983 * into "m" for the page actually faulted
984 */
985
986 spc = NULL;
987 if ((count == 1) && ((spc = TAILQ_FIRST(&swap_pager_free)) != NULL)) {
988 TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
989 swap_pager_free_count--;
990 kva = spc->spc_kva;
991 bp = spc->spc_bp;
992 bzero(bp, sizeof *bp);
993 bp->b_spc = spc;
994 bp->b_vnbufs.le_next = NOLIST;
995 } else {
996 /*
997 * Get a swap buffer header to perform the IO
998 */
999 bp = getpbuf();
1000 kva = (vm_offset_t) bp->b_data;
1001 }
1002
1003 /*
1004 * map our page(s) into kva for input
1005 */
1006 pmap_qenter(kva, m, count);
1007
1008 bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
1009 bp->b_iodone = swap_pager_iodone1;
1010 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
1011 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1012 crhold(bp->b_rcred);
1013 crhold(bp->b_wcred);
1014 bp->b_un.b_addr = (caddr_t) kva;
1015 bp->b_blkno = reqaddr[0];
1016 bp->b_bcount = PAGE_SIZE * count;
1017 bp->b_bufsize = PAGE_SIZE * count;
1018
1019 pbgetvp(swapdev_vp, bp);
1020
1021 cnt.v_swapin++;
1022 cnt.v_swappgsin += count;
1023 /*
1024 * perform the I/O
1025 */
1026 VOP_STRATEGY(bp);
1027
1028 /*
1029 * wait for the sync I/O to complete
1030 */
1031 s = splbio();
1032 while ((bp->b_flags & B_DONE) == 0) {
1033 if (tsleep(bp, PVM, "swread", hz*20)) {
1034 printf("swap_pager: indefinite wait buffer: device: %d, blkno: %d, size: %d\n",
1035 bp->b_dev, bp->b_blkno, bp->b_bcount);
1036 }
1037 }
1038
1039 if (bp->b_flags & B_ERROR) {
1040 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
1041 bp->b_blkno, bp->b_bcount, bp->b_error);
1042 rv = VM_PAGER_ERROR;
1043 } else {
1044 rv = VM_PAGER_OK;
1045 }
1046
1047 /*
1048 * relpbuf does this, but we maintain our own buffer list also...
1049 */
1050 if (bp->b_vp)
1051 pbrelvp(bp);
1052
1053 splx(s);
1054 swb[reqpage]->swb_locked--;
1055
1056 /*
1057 * remove the mapping for kernel virtual
1058 */
1059 pmap_qremove(kva, count);
1060
1061 if (spc) {
1062 m[reqpage]->object->last_read = m[reqpage]->pindex;
1063 if (bp->b_flags & B_WANTED)
1064 wakeup(bp);
1065 /*
1066 * if we have used an spc, we need to free it.
1067 */
1068 if (bp->b_rcred != NOCRED)
1069 crfree(bp->b_rcred);
1070 if (bp->b_wcred != NOCRED)
1071 crfree(bp->b_wcred);
1072 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1073 swap_pager_free_count++;
1074 if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1075 wakeup(&swap_pager_free);
1076 }
1077 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1078 pagedaemon_wakeup();
1079 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1080 if (rv == VM_PAGER_OK) {
1081 pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage]));
1082 m[reqpage]->valid = VM_PAGE_BITS_ALL;
1083 m[reqpage]->dirty = 0;
1084 }
1085 } else {
1086 /*
1087 * release the physical I/O buffer
1088 */
1089 relpbuf(bp);
1090 /*
1091 * finish up input if everything is ok
1092 */
1093 if (rv == VM_PAGER_OK) {
1094 for (i = 0; i < count; i++) {
1095 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1096 m[i]->dirty = 0;
1097 m[i]->flags &= ~PG_ZERO;
1098 if (i != reqpage) {
1099 /*
1100 * whether or not to leave the page
1101 * activated is up in the air, but we
1102 * should put the page on a page queue
1103 * somewhere. (it already is in the
1104 * object). After some emperical
1105 * results, it is best to deactivate
1106 * the readahead pages.
1107 */
1108 vm_page_deactivate(m[i]);
1109
1110 /*
1111 * just in case someone was asking for
1112 * this page we now tell them that it
1113 * is ok to use
1114 */
1115 m[i]->valid = VM_PAGE_BITS_ALL;
1116 PAGE_WAKEUP(m[i]);
1117 }
1118 }
1119
1120 m[reqpage]->object->last_read = m[count-1]->pindex;
1121
1122 /*
1123 * If we're out of swap space, then attempt to free
1124 * some whenever multiple pages are brought in. We
1125 * must set the dirty bits so that the page contents
1126 * will be preserved.
1127 */
1128 if (SWAPLOW) {
1129 for (i = 0; i < count; i++) {
1130 m[i]->dirty = VM_PAGE_BITS_ALL;
1131 }
1132 swap_pager_freespace(object, m[0]->pindex + paging_offset, count);
1133 }
1134 } else {
1135 swap_pager_ridpages(m, count, reqpage);
1136 }
1137 }
1138 return (rv);
1139 }
1140
1141 int
1142 swap_pager_putpages(object, m, count, sync, rtvals)
1143 vm_object_t object;
1144 vm_page_t *m;
1145 int count;
1146 boolean_t sync;
1147 int *rtvals;
1148 {
1149 register struct buf *bp;
1150 sw_blk_t swb[count];
1151 register int s;
1152 int i, j, ix, firstidx, lastidx;
1153 boolean_t rv;
1154 vm_offset_t kva, off, fidx;
1155 swp_clean_t spc;
1156 vm_pindex_t paging_pindex;
1157 int reqaddr[count];
1158 int failed;
1159
1160 if (vm_swap_size)
1161 no_swap_space = 0;
1162
1163 if (no_swap_space) {
1164 for (i = 0; i < count; i++)
1165 rtvals[i] = VM_PAGER_FAIL;
1166 return VM_PAGER_FAIL;
1167 }
1168 spc = NULL;
1169
1170 object = m[0]->object;
1171 paging_pindex = OFF_TO_IDX(object->paging_offset);
1172
1173 failed = 0;
1174 for (j = 0; j < count; j++) {
1175 fidx = m[j]->pindex + paging_pindex;
1176 ix = swap_pager_block_index(fidx);
1177 swb[j] = 0;
1178 if (ix >= object->un_pager.swp.swp_nblocks) {
1179 rtvals[j] = VM_PAGER_FAIL;
1180 failed = 1;
1181 continue;
1182 } else {
1183 rtvals[j] = VM_PAGER_OK;
1184 }
1185 swb[j] = &object->un_pager.swp.swp_blocks[ix];
1186 swb[j]->swb_locked++;
1187 if (failed) {
1188 rtvals[j] = VM_PAGER_FAIL;
1189 continue;
1190 }
1191 off = swap_pager_block_offset(fidx);
1192 reqaddr[j] = swb[j]->swb_block[off];
1193 if (reqaddr[j] == SWB_EMPTY) {
1194 daddr_t blk;
1195 int tries;
1196 int ntoget;
1197
1198 tries = 0;
1199 s = splbio();
1200
1201 /*
1202 * if any other pages have been allocated in this
1203 * block, we only try to get one page.
1204 */
1205 for (i = 0; i < SWB_NPAGES; i++) {
1206 if (swb[j]->swb_block[i] != SWB_EMPTY)
1207 break;
1208 }
1209
1210 ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
1211 /*
1212 * this code is alittle conservative, but works (the
1213 * intent of this code is to allocate small chunks for
1214 * small objects)
1215 */
1216 if ((off == 0) && ((fidx + ntoget) > object->size)) {
1217 ntoget = object->size - fidx;
1218 }
1219 retrygetspace:
1220 if (!swap_pager_full && ntoget > 1 &&
1221 swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
1222 &blk)) {
1223
1224 for (i = 0; i < ntoget; i++) {
1225 swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
1226 swb[j]->swb_valid = 0;
1227 }
1228
1229 reqaddr[j] = swb[j]->swb_block[off];
1230 } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
1231 &swb[j]->swb_block[off])) {
1232 /*
1233 * if the allocation has failed, we try to
1234 * reclaim space and retry.
1235 */
1236 if (++tries == 1) {
1237 swap_pager_reclaim();
1238 goto retrygetspace;
1239 }
1240 rtvals[j] = VM_PAGER_AGAIN;
1241 failed = 1;
1242 swap_pager_full = 1;
1243 } else {
1244 reqaddr[j] = swb[j]->swb_block[off];
1245 swb[j]->swb_valid &= ~(1 << off);
1246 }
1247 splx(s);
1248 }
1249 }
1250
1251 /*
1252 * search forwards for the last contiguous page to transfer
1253 */
1254 failed = 0;
1255 for (i = 0; i < count; i++) {
1256 if (failed ||
1257 (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
1258 ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
1259 (rtvals[i] != VM_PAGER_OK)) {
1260 failed = 1;
1261 if (rtvals[i] == VM_PAGER_OK)
1262 rtvals[i] = VM_PAGER_AGAIN;
1263 }
1264 }
1265
1266 ix = 0;
1267 firstidx = -1;
1268 for (i = 0; i < count; i++) {
1269 if (rtvals[i] == VM_PAGER_OK) {
1270 ix++;
1271 if (firstidx == -1) {
1272 firstidx = i;
1273 }
1274 } else if (firstidx >= 0) {
1275 break;
1276 }
1277 }
1278
1279 if (firstidx == -1) {
1280 if ((object->paging_in_progress == 0) &&
1281 (object->flags & OBJ_PIPWNT)) {
1282 object->flags &= ~OBJ_PIPWNT;
1283 wakeup(object);
1284 }
1285 return VM_PAGER_AGAIN;
1286 }
1287
1288 lastidx = firstidx + ix;
1289
1290 for (i = 0; i < firstidx; i++) {
1291 if (swb[i])
1292 swb[i]->swb_locked--;
1293 }
1294
1295 for (i = lastidx; i < count; i++) {
1296 if (swb[i])
1297 swb[i]->swb_locked--;
1298 }
1299
1300 for (i = firstidx; i < lastidx; i++) {
1301 if (reqaddr[i] == SWB_EMPTY) {
1302 printf("I/O to empty block???? -- pindex: %d, i: %d\n",
1303 m[i]->pindex, i);
1304 }
1305 }
1306
1307 /*
1308 * For synchronous writes, we clean up all completed async pageouts.
1309 */
1310 if (sync == TRUE) {
1311 swap_pager_sync();
1312 }
1313 kva = 0;
1314
1315 /*
1316 * get a swap pager clean data structure, block until we get it
1317 */
1318 if (swap_pager_free_count <= 3) {
1319 s = splbio();
1320 if (curproc == pageproc) {
1321 retryfree:
1322 /*
1323 * pageout daemon needs a swap control block
1324 */
1325 swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT|SWAP_FREE_NEEDED;
1326 /*
1327 * if it does not get one within a short time, then
1328 * there is a potential deadlock, so we go-on trying
1329 * to free pages. It is important to block here as opposed
1330 * to returning, thereby allowing the pageout daemon to continue.
1331 * It is likely that pageout daemon will start suboptimally
1332 * reclaiming vnode backed pages if we don't block. Since the
1333 * I/O subsystem is probably already fully utilized, might as
1334 * well wait.
1335 */
1336 if (tsleep(&swap_pager_free, PVM, "swpfre", hz/5)) {
1337 swap_pager_sync();
1338 if (swap_pager_free_count <= 3) {
1339 for (i = firstidx; i < lastidx; i++) {
1340 rtvals[i] = VM_PAGER_AGAIN;
1341 }
1342 splx(s);
1343 return VM_PAGER_AGAIN;
1344 }
1345 } else {
1346 /*
1347 * we make sure that pageouts aren't taking up all of
1348 * the free swap control blocks.
1349 */
1350 swap_pager_sync();
1351 if (swap_pager_free_count <= 3) {
1352 goto retryfree;
1353 }
1354 }
1355 } else {
1356 pagedaemon_wakeup();
1357 while (swap_pager_free_count <= 3) {
1358 swap_pager_needflags |= SWAP_FREE_NEEDED;
1359 tsleep(&swap_pager_free, PVM, "swpfre", 0);
1360 pagedaemon_wakeup();
1361 }
1362 }
1363 splx(s);
1364 }
1365 spc = TAILQ_FIRST(&swap_pager_free);
1366 if (spc == NULL)
1367 panic("swap_pager_putpages: free queue is empty, %d expected\n",
1368 swap_pager_free_count);
1369 TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
1370 swap_pager_free_count--;
1371
1372 kva = spc->spc_kva;
1373
1374 /*
1375 * map our page(s) into kva for I/O
1376 */
1377 pmap_qenter(kva, &m[firstidx], ix);
1378
1379 /*
1380 * get the base I/O offset into the swap file
1381 */
1382 for (i = firstidx; i < lastidx ; i++) {
1383 fidx = m[i]->pindex + paging_pindex;
1384 off = swap_pager_block_offset(fidx);
1385 /*
1386 * set the valid bit
1387 */
1388 swb[i]->swb_valid |= (1 << off);
1389 /*
1390 * and unlock the data structure
1391 */
1392 swb[i]->swb_locked--;
1393 }
1394
1395 /*
1396 * Get a swap buffer header and perform the IO
1397 */
1398 bp = spc->spc_bp;
1399 bzero(bp, sizeof *bp);
1400 bp->b_spc = spc;
1401 bp->b_vnbufs.le_next = NOLIST;
1402
1403 bp->b_flags = B_BUSY | B_PAGING;
1404 bp->b_proc = &proc0; /* XXX (but without B_PHYS set this is ok) */
1405 bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
1406 if (bp->b_rcred != NOCRED)
1407 crhold(bp->b_rcred);
1408 if (bp->b_wcred != NOCRED)
1409 crhold(bp->b_wcred);
1410 bp->b_data = (caddr_t) kva;
1411 bp->b_blkno = reqaddr[firstidx];
1412 pbgetvp(swapdev_vp, bp);
1413
1414 bp->b_bcount = PAGE_SIZE * ix;
1415 bp->b_bufsize = PAGE_SIZE * ix;
1416 swapdev_vp->v_numoutput++;
1417
1418 /*
1419 * If this is an async write we set up additional buffer fields and
1420 * place a "cleaning" entry on the inuse queue.
1421 */
1422 s = splbio();
1423 if (sync == FALSE) {
1424 spc->spc_flags = 0;
1425 spc->spc_object = object;
1426 for (i = firstidx; i < lastidx; i++)
1427 spc->spc_m[i] = m[i];
1428 spc->spc_first = firstidx;
1429 spc->spc_count = ix;
1430 /*
1431 * the completion routine for async writes
1432 */
1433 bp->b_flags |= B_CALL;
1434 bp->b_iodone = swap_pager_iodone;
1435 bp->b_dirtyoff = 0;
1436 bp->b_dirtyend = bp->b_bcount;
1437 object->un_pager.swp.swp_poip++;
1438 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
1439 } else {
1440 object->un_pager.swp.swp_poip++;
1441 bp->b_flags |= B_CALL;
1442 bp->b_iodone = swap_pager_iodone1;
1443 }
1444
1445 cnt.v_swapout++;
1446 cnt.v_swappgsout += ix;
1447 /*
1448 * perform the I/O
1449 */
1450 VOP_STRATEGY(bp);
1451 if (sync == FALSE) {
1452 if ((bp->b_flags & B_DONE) == B_DONE) {
1453 swap_pager_sync();
1454 }
1455 splx(s);
1456 for (i = firstidx; i < lastidx; i++) {
1457 rtvals[i] = VM_PAGER_PEND;
1458 }
1459 return VM_PAGER_PEND;
1460 }
1461 /*
1462 * wait for the sync I/O to complete
1463 */
1464 while ((bp->b_flags & B_DONE) == 0) {
1465 tsleep(bp, PVM, "swwrt", 0);
1466 }
1467 if (bp->b_flags & B_ERROR) {
1468 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
1469 bp->b_blkno, bp->b_bcount, bp->b_error);
1470 rv = VM_PAGER_ERROR;
1471 } else {
1472 rv = VM_PAGER_OK;
1473 }
1474
1475 object->un_pager.swp.swp_poip--;
1476 if (object->un_pager.swp.swp_poip == 0)
1477 wakeup(object);
1478
1479 if (bp->b_vp)
1480 pbrelvp(bp);
1481 if (bp->b_flags & B_WANTED)
1482 wakeup(bp);
1483
1484 splx(s);
1485
1486 /*
1487 * remove the mapping for kernel virtual
1488 */
1489 pmap_qremove(kva, ix);
1490
1491 /*
1492 * if we have written the page, then indicate that the page is clean.
1493 */
1494 if (rv == VM_PAGER_OK) {
1495 for (i = firstidx; i < lastidx; i++) {
1496 if (rtvals[i] == VM_PAGER_OK) {
1497 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
1498 m[i]->dirty = 0;
1499 /*
1500 * optimization, if a page has been read
1501 * during the pageout process, we activate it.
1502 */
1503 if ((m[i]->queue != PQ_ACTIVE) &&
1504 ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
1505 pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) {
1506 vm_page_activate(m[i]);
1507 }
1508 }
1509 }
1510 } else {
1511 for (i = firstidx; i < lastidx; i++) {
1512 rtvals[i] = rv;
1513 }
1514 }
1515
1516 if (bp->b_rcred != NOCRED)
1517 crfree(bp->b_rcred);
1518 if (bp->b_wcred != NOCRED)
1519 crfree(bp->b_wcred);
1520 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1521 swap_pager_free_count++;
1522 if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1523 wakeup(&swap_pager_free);
1524 }
1525 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1526 pagedaemon_wakeup();
1527 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1528 return (rv);
1529 }
1530
1531 static void
1532 swap_pager_sync()
1533 {
1534 register swp_clean_t spc, tspc;
1535 register int s;
1536
1537 tspc = NULL;
1538 if (TAILQ_FIRST(&swap_pager_done) == NULL)
1539 return;
1540 for (;;) {
1541 s = splbio();
1542 /*
1543 * Look up and removal from done list must be done at splbio()
1544 * to avoid conflicts with swap_pager_iodone.
1545 */
1546 while ((spc = TAILQ_FIRST(&swap_pager_done)) != 0) {
1547 pmap_qremove(spc->spc_kva, spc->spc_count);
1548 swap_pager_finish(spc);
1549 TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
1550 goto doclean;
1551 }
1552
1553 /*
1554 * No operations done, thats all we can do for now.
1555 */
1556
1557 splx(s);
1558 break;
1559
1560 /*
1561 * The desired page was found to be busy earlier in the scan
1562 * but has since completed.
1563 */
1564 doclean:
1565 if (tspc && tspc == spc) {
1566 tspc = NULL;
1567 }
1568 spc->spc_flags = 0;
1569 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
1570 swap_pager_free_count++;
1571 if (swap_pager_needflags & SWAP_FREE_NEEDED) {
1572 wakeup(&swap_pager_free);
1573 }
1574 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
1575 pagedaemon_wakeup();
1576 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
1577 splx(s);
1578 }
1579
1580 return;
1581 }
1582
1583 void
1584 swap_pager_finish(spc)
1585 register swp_clean_t spc;
1586 {
1587 int lastidx = spc->spc_first + spc->spc_count;
1588 vm_page_t *ma = spc->spc_m;
1589 vm_object_t object = ma[spc->spc_first]->object;
1590 int i;
1591
1592 object->paging_in_progress -= spc->spc_count;
1593 if ((object->paging_in_progress == 0) &&
1594 (object->flags & OBJ_PIPWNT)) {
1595 object->flags &= ~OBJ_PIPWNT;
1596 wakeup(object);
1597 }
1598
1599 /*
1600 * If no error, mark as clean and inform the pmap system. If error,
1601 * mark as dirty so we will try again. (XXX could get stuck doing
1602 * this, should give up after awhile)
1603 */
1604 if (spc->spc_flags & SPC_ERROR) {
1605 for (i = spc->spc_first; i < lastidx; i++) {
1606 printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
1607 (u_long) VM_PAGE_TO_PHYS(ma[i]));
1608 }
1609 } else {
1610 for (i = spc->spc_first; i < lastidx; i++) {
1611 pmap_clear_modify(VM_PAGE_TO_PHYS(ma[i]));
1612 ma[i]->dirty = 0;
1613 if ((ma[i]->queue != PQ_ACTIVE) &&
1614 ((ma[i]->flags & PG_WANTED) ||
1615 pmap_ts_referenced(VM_PAGE_TO_PHYS(ma[i]))))
1616 vm_page_activate(ma[i]);
1617 }
1618 }
1619
1620
1621 for (i = spc->spc_first; i < lastidx; i++) {
1622 /*
1623 * we wakeup any processes that are waiting on these pages.
1624 */
1625 PAGE_WAKEUP(ma[i]);
1626 }
1627 nswiodone -= spc->spc_count;
1628
1629 return;
1630 }
1631
1632 /*
1633 * swap_pager_iodone
1634 */
1635 static void
1636 swap_pager_iodone(bp)
1637 register struct buf *bp;
1638 {
1639 register swp_clean_t spc;
1640 int s;
1641
1642 s = splbio();
1643 spc = (swp_clean_t) bp->b_spc;
1644 TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
1645 TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
1646 if (bp->b_flags & B_ERROR) {
1647 spc->spc_flags |= SPC_ERROR;
1648 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
1649 (bp->b_flags & B_READ) ? "pagein" : "pageout",
1650 (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
1651 }
1652
1653 if (bp->b_vp)
1654 pbrelvp(bp);
1655
1656 /*
1657 if (bp->b_flags & B_WANTED)
1658 */
1659 wakeup(bp);
1660
1661 if (bp->b_rcred != NOCRED)
1662 crfree(bp->b_rcred);
1663 if (bp->b_wcred != NOCRED)
1664 crfree(bp->b_wcred);
1665
1666 nswiodone += spc->spc_count;
1667 if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
1668 wakeup(spc->spc_object);
1669 }
1670 if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
1671 TAILQ_FIRST(&swap_pager_inuse) == 0) {
1672 swap_pager_needflags &= ~SWAP_FREE_NEEDED;
1673 wakeup(&swap_pager_free);
1674 }
1675
1676 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
1677 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
1678 pagedaemon_wakeup();
1679 }
1680
1681 if (vm_pageout_pages_needed) {
1682 wakeup(&vm_pageout_pages_needed);
1683 vm_pageout_pages_needed = 0;
1684 }
1685 if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) ||
1686 ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min &&
1687 nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) {
1688 pagedaemon_wakeup();
1689 }
1690 splx(s);
1691 }
Cache object: d9fa82aff75c01a63552caf6099a24ca
|