The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/vm/swap_pager.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1994 John S. Dyson
    3  * Copyright (c) 1990 University of Utah.
    4  * Copyright (c) 1991, 1993
    5  *      The Regents of the University of California.  All rights reserved.
    6  *
    7  * This code is derived from software contributed to Berkeley by
    8  * the Systems Programming Group of the University of Utah Computer
    9  * Science Department.
   10  *
   11  * Redistribution and use in source and binary forms, with or without
   12  * modification, are permitted provided that the following conditions
   13  * are met:
   14  * 1. Redistributions of source code must retain the above copyright
   15  *    notice, this list of conditions and the following disclaimer.
   16  * 2. Redistributions in binary form must reproduce the above copyright
   17  *    notice, this list of conditions and the following disclaimer in the
   18  *    documentation and/or other materials provided with the distribution.
   19  * 3. All advertising materials mentioning features or use of this software
   20  *    must display the following acknowledgement:
   21  *      This product includes software developed by the University of
   22  *      California, Berkeley and its contributors.
   23  * 4. Neither the name of the University nor the names of its contributors
   24  *    may be used to endorse or promote products derived from this software
   25  *    without specific prior written permission.
   26  *
   27  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   28  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   29  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   30  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   31  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   32  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   33  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   34  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   35  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   36  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   37  * SUCH DAMAGE.
   38  *
   39  * from: Utah $Hdr: swap_pager.c 1.4 91/04/30$
   40  *
   41  *      @(#)swap_pager.c        8.9 (Berkeley) 3/21/94
   42  * $FreeBSD: src/sys/vm/swap_pager.c,v 1.72.2.3 1999/09/05 08:24:19 peter Exp $
   43  */
   44 
   45 /*
   46  * Quick hack to page to dedicated partition(s).
   47  * TODO:
   48  *      Add multiprocessor locks
   49  *      Deal with async writes in a better fashion
   50  */
   51 
   52 #include <sys/param.h>
   53 #include <sys/systm.h>
   54 #include <sys/kernel.h>
   55 #include <sys/proc.h>
   56 #include <sys/buf.h>
   57 #include <sys/vnode.h>
   58 #include <sys/malloc.h>
   59 #include <sys/vmmeter.h>
   60 
   61 #include <miscfs/specfs/specdev.h>
   62 #include <sys/rlist.h>
   63 
   64 #include <vm/vm.h>
   65 #include <vm/vm_param.h>
   66 #include <vm/vm_prot.h>
   67 #include <vm/vm_object.h>
   68 #include <vm/vm_page.h>
   69 #include <vm/vm_pager.h>
   70 #include <vm/vm_pageout.h>
   71 #include <vm/swap_pager.h>
   72 #include <vm/vm_kern.h>
   73 #include <vm/vm_extern.h>
   74 
   75 #ifndef NPENDINGIO
   76 #define NPENDINGIO      10
   77 #endif
   78 
   79 static int nswiodone;
   80 int swap_pager_full;
   81 extern int vm_swap_size;
   82 static int no_swap_space = 1;
   83 struct rlisthdr swaplist;
   84 
   85 #define MAX_PAGEOUT_CLUSTER 16
   86 
   87 TAILQ_HEAD(swpclean, swpagerclean);
   88 
   89 typedef struct swpagerclean *swp_clean_t;
   90 
   91 static struct swpagerclean {
   92         TAILQ_ENTRY(swpagerclean) spc_list;
   93         int spc_flags;
   94         struct buf *spc_bp;
   95         vm_object_t spc_object;
   96         vm_offset_t spc_kva;
   97         int spc_first;
   98         int spc_count;
   99         vm_page_t spc_m[MAX_PAGEOUT_CLUSTER];
  100 } swcleanlist[NPENDINGIO];
  101 
  102 
  103 /* spc_flags values */
  104 #define SPC_ERROR       0x01
  105 
  106 #define SWB_EMPTY (-1)
  107 
  108 /* list of completed page cleans */
  109 static struct swpclean swap_pager_done;
  110 
  111 /* list of pending page cleans */
  112 static struct swpclean swap_pager_inuse;
  113 
  114 /* list of free pager clean structs */
  115 static struct swpclean swap_pager_free;
  116 int swap_pager_free_count;
  117 
  118 /* list of "named" anon region objects */
  119 static struct pagerlst swap_pager_object_list;
  120 
  121 /* list of "unnamed" anon region objects */
  122 struct pagerlst swap_pager_un_object_list;
  123 
  124 #define SWAP_FREE_NEEDED        0x1     /* need a swap block */
  125 #define SWAP_FREE_NEEDED_BY_PAGEOUT 0x2
  126 static int swap_pager_needflags;
  127 
  128 static struct pagerlst *swp_qs[] = {
  129         &swap_pager_object_list, &swap_pager_un_object_list, (struct pagerlst *) 0
  130 };
  131 
  132 /*
  133  * pagerops for OBJT_SWAP - "swap pager".
  134  */
  135 static vm_object_t
  136                 swap_pager_alloc __P((void *handle, vm_size_t size,
  137                                       vm_prot_t prot, vm_ooffset_t offset));
  138 static void     swap_pager_dealloc __P((vm_object_t object));
  139 static boolean_t
  140                 swap_pager_haspage __P((vm_object_t object, vm_pindex_t pindex,
  141                                         int *before, int *after));
  142 static int      swap_pager_getpages __P((vm_object_t, vm_page_t *, int, int));
  143 static void     swap_pager_init __P((void));
  144 static void     swap_pager_sync __P((void));
  145 
  146 struct pagerops swappagerops = {
  147         swap_pager_init,
  148         swap_pager_alloc,
  149         swap_pager_dealloc,
  150         swap_pager_getpages,
  151         swap_pager_putpages,
  152         swap_pager_haspage,
  153         swap_pager_sync
  154 };
  155 
  156 static int npendingio = NPENDINGIO;
  157 static int dmmin;
  158 int dmmax;
  159 
  160 static int      swap_pager_block_index __P((vm_pindex_t pindex));
  161 static int      swap_pager_block_offset __P((vm_pindex_t pindex));
  162 static daddr_t *swap_pager_diskaddr __P((vm_object_t object,
  163                                           vm_pindex_t pindex, int *valid));
  164 static void     swap_pager_finish __P((swp_clean_t spc));
  165 static void     swap_pager_freepage __P((vm_page_t m));
  166 static void     swap_pager_free_swap __P((vm_object_t object));
  167 static void     swap_pager_freeswapspace __P((vm_object_t object,
  168                                               unsigned int from,
  169                                               unsigned int to));
  170 static int      swap_pager_getswapspace __P((vm_object_t object,
  171                                              unsigned int amount,
  172                                              daddr_t *rtval));
  173 static void     swap_pager_iodone __P((struct buf *));
  174 static void     swap_pager_iodone1 __P((struct buf *bp));
  175 static void     swap_pager_reclaim __P((void));
  176 static void     swap_pager_ridpages __P((vm_page_t *m, int count,
  177                                          int reqpage));
  178 static void     swap_pager_setvalid __P((vm_object_t object,
  179                                          vm_offset_t offset, int valid));
  180 static void     swapsizecheck __P((void));
  181 
  182 #define SWAPLOW (vm_swap_size < (512 * btodb(PAGE_SIZE)))
  183 
  184 static __inline void
  185 swapsizecheck()
  186 {
  187         if (vm_swap_size < 128 * btodb(PAGE_SIZE)) {
  188                 if (swap_pager_full == 0)
  189                         printf("swap_pager: out of swap space\n");
  190                 swap_pager_full = 1;
  191         } else if (vm_swap_size > 192 * btodb(PAGE_SIZE))
  192                 swap_pager_full = 0;
  193 }
  194 
  195 static void
  196 swap_pager_init()
  197 {
  198         TAILQ_INIT(&swap_pager_object_list);
  199         TAILQ_INIT(&swap_pager_un_object_list);
  200 
  201         /*
  202          * Initialize clean lists
  203          */
  204         TAILQ_INIT(&swap_pager_inuse);
  205         TAILQ_INIT(&swap_pager_done);
  206         TAILQ_INIT(&swap_pager_free);
  207         swap_pager_free_count = 0;
  208 
  209         /*
  210          * Calculate the swap allocation constants.
  211          */
  212         dmmin = PAGE_SIZE / DEV_BSIZE;
  213         dmmax = btodb(SWB_NPAGES * PAGE_SIZE) * 2;
  214 }
  215 
  216 void
  217 swap_pager_swap_init()
  218 {
  219         swp_clean_t spc;
  220         struct buf *bp;
  221         int i;
  222 
  223         /*
  224          * kva's are allocated here so that we dont need to keep doing
  225          * kmem_alloc pageables at runtime
  226          */
  227         for (i = 0, spc = swcleanlist; i < npendingio; i++, spc++) {
  228                 spc->spc_kva = kmem_alloc_pageable(pager_map, PAGE_SIZE * MAX_PAGEOUT_CLUSTER);
  229                 if (!spc->spc_kva) {
  230                         break;
  231                 }
  232                 spc->spc_bp = malloc(sizeof(*bp), M_TEMP, M_KERNEL);
  233                 if (!spc->spc_bp) {
  234                         kmem_free_wakeup(pager_map, spc->spc_kva, PAGE_SIZE);
  235                         break;
  236                 }
  237                 spc->spc_flags = 0;
  238                 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
  239                 swap_pager_free_count++;
  240         }
  241 }
  242 
  243 int
  244 swap_pager_swp_alloc(object, wait)
  245         vm_object_t object;
  246         int wait;
  247 {
  248         sw_blk_t swb;
  249         int nblocks;
  250         int i, j;
  251 
  252         nblocks = (object->size + SWB_NPAGES - 1) / SWB_NPAGES;
  253         swb = malloc(nblocks * sizeof(*swb), M_VMPGDATA, wait);
  254         if (swb == NULL)
  255                 return 1;
  256 
  257         for (i = 0; i < nblocks; i++) {
  258                 swb[i].swb_valid = 0;
  259                 swb[i].swb_locked = 0;
  260                 for (j = 0; j < SWB_NPAGES; j++)
  261                         swb[i].swb_block[j] = SWB_EMPTY;
  262         }
  263 
  264         object->un_pager.swp.swp_nblocks = nblocks;
  265         object->un_pager.swp.swp_allocsize = 0;
  266         object->un_pager.swp.swp_blocks = swb;
  267         object->un_pager.swp.swp_poip = 0;
  268 
  269         if (object->handle != NULL) {
  270                 TAILQ_INSERT_TAIL(&swap_pager_object_list, object, pager_object_list);
  271         } else {
  272                 TAILQ_INSERT_TAIL(&swap_pager_un_object_list, object, pager_object_list);
  273         }
  274 
  275         return 0;
  276 }
  277 
  278 /*
  279  * Allocate an object and associated resources.
  280  * Note that if we are called from the pageout daemon (handle == NULL)
  281  * we should not wait for memory as it could resulting in deadlock.
  282  */
  283 static vm_object_t
  284 swap_pager_alloc(handle, size, prot, offset)
  285         void *handle;
  286         register vm_size_t size;
  287         vm_prot_t prot;
  288         vm_ooffset_t offset;
  289 {
  290         vm_object_t object;
  291 
  292         /*
  293          * If this is a "named" anonymous region, look it up and use the
  294          * object if it exists, otherwise allocate a new one.
  295          */
  296         if (handle) {
  297                 object = vm_pager_object_lookup(&swap_pager_object_list, handle);
  298                 if (object != NULL) {
  299                         vm_object_reference(object);
  300                 } else {
  301                         /*
  302                          * XXX - there is a race condition here. Two processes
  303                          * can request the same named object simultaneuously,
  304                          * and if one blocks for memory, the result is a disaster.
  305                          * Probably quite rare, but is yet another reason to just
  306                          * rip support of "named anonymous regions" out altogether.
  307                          */
  308                         object = vm_object_allocate(OBJT_SWAP,
  309                                 OFF_TO_IDX(offset + PAGE_MASK) + size);
  310                         object->handle = handle;
  311                         (void) swap_pager_swp_alloc(object, M_WAITOK);
  312                 }
  313         } else {
  314                 object = vm_object_allocate(OBJT_SWAP,
  315                         OFF_TO_IDX(offset + PAGE_MASK) + size);
  316                 (void) swap_pager_swp_alloc(object, M_WAITOK);
  317         }
  318 
  319         return (object);
  320 }
  321 
  322 /*
  323  * returns disk block associated with pager and offset
  324  * additionally, as a side effect returns a flag indicating
  325  * if the block has been written
  326  */
  327 
  328 __inline static daddr_t *
  329 swap_pager_diskaddr(object, pindex, valid)
  330         vm_object_t object;
  331         vm_pindex_t pindex;
  332         int *valid;
  333 {
  334         register sw_blk_t swb;
  335         int ix;
  336 
  337         if (valid)
  338                 *valid = 0;
  339         ix = pindex / SWB_NPAGES;
  340         if ((ix >= object->un_pager.swp.swp_nblocks) ||
  341             (pindex >= object->size)) {
  342                 return (FALSE);
  343         }
  344         swb = &object->un_pager.swp.swp_blocks[ix];
  345         ix = pindex % SWB_NPAGES;
  346         if (valid)
  347                 *valid = swb->swb_valid & (1 << ix);
  348         return &swb->swb_block[ix];
  349 }
  350 
  351 /*
  352  * Utility routine to set the valid (written) bit for
  353  * a block associated with a pager and offset
  354  */
  355 static void
  356 swap_pager_setvalid(object, offset, valid)
  357         vm_object_t object;
  358         vm_offset_t offset;
  359         int valid;
  360 {
  361         register sw_blk_t swb;
  362         int ix;
  363 
  364         ix = offset / SWB_NPAGES;
  365         if (ix >= object->un_pager.swp.swp_nblocks)
  366                 return;
  367 
  368         swb = &object->un_pager.swp.swp_blocks[ix];
  369         ix = offset % SWB_NPAGES;
  370         if (valid)
  371                 swb->swb_valid |= (1 << ix);
  372         else
  373                 swb->swb_valid &= ~(1 << ix);
  374         return;
  375 }
  376 
  377 /*
  378  * this routine allocates swap space with a fragmentation
  379  * minimization policy.
  380  */
  381 static int
  382 swap_pager_getswapspace(object, amount, rtval)
  383         vm_object_t object;
  384         unsigned int amount;
  385         daddr_t *rtval;
  386 {
  387         unsigned location;
  388         vm_swap_size -= amount;
  389         if (!rlist_alloc(&swaplist, amount, &location)) {
  390                 vm_swap_size += amount;
  391                 return 0;
  392         } else {
  393                 swapsizecheck();
  394                 object->un_pager.swp.swp_allocsize += amount;
  395                 *rtval = location;
  396                 return 1;
  397         }
  398 }
  399 
  400 /*
  401  * this routine frees swap space with a fragmentation
  402  * minimization policy.
  403  */
  404 static void
  405 swap_pager_freeswapspace(object, from, to)
  406         vm_object_t object;
  407         unsigned int from;
  408         unsigned int to;
  409 {
  410         rlist_free(&swaplist, from, to);
  411         vm_swap_size += (to - from) + 1;
  412         object->un_pager.swp.swp_allocsize -= (to - from) + 1;
  413         swapsizecheck();
  414 }
  415 /*
  416  * this routine frees swap blocks from a specified pager
  417  */
  418 void
  419 swap_pager_freespace(object, start, size)
  420         vm_object_t object;
  421         vm_pindex_t start;
  422         vm_size_t size;
  423 {
  424         vm_pindex_t i;
  425         int s;
  426 
  427         s = splbio();
  428         for (i = start; i < start + size; i += 1) {
  429                 int valid;
  430                 daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
  431 
  432                 if (addr && *addr != SWB_EMPTY) {
  433                         swap_pager_freeswapspace(object, *addr, *addr + btodb(PAGE_SIZE) - 1);
  434                         if (valid) {
  435                                 swap_pager_setvalid(object, i, 0);
  436                         }
  437                         *addr = SWB_EMPTY;
  438                 }
  439         }
  440         splx(s);
  441 }
  442 
  443 /*
  444  * same as freespace, but don't free, just force a DMZ next time
  445  */
  446 void
  447 swap_pager_dmzspace(object, start, size)
  448         vm_object_t object;
  449         vm_pindex_t start;
  450         vm_size_t size;
  451 {
  452         vm_pindex_t i;
  453         int s;
  454 
  455         s = splbio();
  456         for (i = start; i < start + size; i += 1) {
  457                 int valid;
  458                 daddr_t *addr = swap_pager_diskaddr(object, i, &valid);
  459 
  460                 if (addr && *addr != SWB_EMPTY) {
  461                         if (valid) {
  462                                 swap_pager_setvalid(object, i, 0);
  463                         }
  464                 }
  465         }
  466         splx(s);
  467 }
  468 
  469 static void
  470 swap_pager_free_swap(object)
  471         vm_object_t object;
  472 {
  473         register int i, j;
  474         register sw_blk_t swb;
  475         int first_block=0, block_count=0;
  476         int s;
  477         /*
  478          * Free left over swap blocks
  479          */
  480         s = splbio();
  481         for (i = 0, swb = object->un_pager.swp.swp_blocks;
  482             i < object->un_pager.swp.swp_nblocks; i++, swb++) {
  483                 for (j = 0; j < SWB_NPAGES; j++) {
  484                         if (swb->swb_block[j] != SWB_EMPTY) {
  485                                 /*
  486                                  * initially the length of the run is zero
  487                                  */
  488                                 if (block_count == 0) {
  489                                         first_block = swb->swb_block[j];
  490                                         block_count = btodb(PAGE_SIZE);
  491                                         swb->swb_block[j] = SWB_EMPTY;
  492                                 /*
  493                                  * if the new block can be included into the current run
  494                                  */
  495                                 } else if (swb->swb_block[j] == first_block + block_count) {
  496                                         block_count += btodb(PAGE_SIZE);
  497                                         swb->swb_block[j] = SWB_EMPTY;
  498                                 /*
  499                                  * terminate the previous run, and start a new one
  500                                  */
  501                                 } else {
  502                                         swap_pager_freeswapspace(object, first_block,
  503                                          (unsigned) first_block + block_count - 1);
  504                                         first_block = swb->swb_block[j];
  505                                         block_count = btodb(PAGE_SIZE);
  506                                         swb->swb_block[j] = SWB_EMPTY;
  507                                 }
  508                         }
  509                 }
  510         }
  511 
  512         if (block_count) {
  513                 swap_pager_freeswapspace(object, first_block,
  514                          (unsigned) first_block + block_count - 1);
  515         }
  516         splx(s);
  517 }
  518 
  519 
  520 /*
  521  * swap_pager_reclaim frees up over-allocated space from all pagers
  522  * this eliminates internal fragmentation due to allocation of space
  523  * for segments that are never swapped to. It has been written so that
  524  * it does not block until the rlist_free operation occurs; it keeps
  525  * the queues consistant.
  526  */
  527 
  528 /*
  529  * Maximum number of blocks (pages) to reclaim per pass
  530  */
  531 #define MAXRECLAIM 128
  532 
  533 static void
  534 swap_pager_reclaim()
  535 {
  536         vm_object_t object;
  537         int i, j, k;
  538         int s;
  539         int reclaimcount;
  540         static struct {
  541                 int address;
  542                 vm_object_t object;
  543         } reclaims[MAXRECLAIM];
  544         static int in_reclaim;
  545 
  546         /*
  547          * allow only one process to be in the swap_pager_reclaim subroutine
  548          */
  549         s = splbio();
  550         if (in_reclaim) {
  551                 tsleep(&in_reclaim, PSWP, "swrclm", 0);
  552                 splx(s);
  553                 return;
  554         }
  555         in_reclaim = 1;
  556         reclaimcount = 0;
  557 
  558         /* for each pager queue */
  559         for (k = 0; swp_qs[k]; k++) {
  560 
  561                 object = TAILQ_FIRST(swp_qs[k]);
  562                 while (object && (reclaimcount < MAXRECLAIM)) {
  563 
  564                         /*
  565                          * see if any blocks associated with a pager has been
  566                          * allocated but not used (written)
  567                          */
  568                         if ((object->flags & OBJ_DEAD) == 0 &&
  569                                 (object->paging_in_progress == 0)) {
  570                                 for (i = 0; i < object->un_pager.swp.swp_nblocks; i++) {
  571                                         sw_blk_t swb = &object->un_pager.swp.swp_blocks[i];
  572 
  573                                         if (swb->swb_locked)
  574                                                 continue;
  575                                         for (j = 0; j < SWB_NPAGES; j++) {
  576                                                 if (swb->swb_block[j] != SWB_EMPTY &&
  577                                                     (swb->swb_valid & (1 << j)) == 0) {
  578                                                         reclaims[reclaimcount].address = swb->swb_block[j];
  579                                                         reclaims[reclaimcount++].object = object;
  580                                                         swb->swb_block[j] = SWB_EMPTY;
  581                                                         if (reclaimcount >= MAXRECLAIM)
  582                                                                 goto rfinished;
  583                                                 }
  584                                         }
  585                                 }
  586                         }
  587                         object = TAILQ_NEXT(object, pager_object_list);
  588                 }
  589         }
  590 
  591 rfinished:
  592 
  593         /*
  594          * free the blocks that have been added to the reclaim list
  595          */
  596         for (i = 0; i < reclaimcount; i++) {
  597                 swap_pager_freeswapspace(reclaims[i].object,
  598                     reclaims[i].address, reclaims[i].address + btodb(PAGE_SIZE) - 1);
  599         }
  600         splx(s);
  601         in_reclaim = 0;
  602         wakeup(&in_reclaim);
  603 }
  604 
  605 
  606 /*
  607  * swap_pager_copy copies blocks from one pager to another and
  608  * destroys the source pager
  609  */
  610 
  611 void
  612 swap_pager_copy(srcobject, srcoffset, dstobject, dstoffset, offset)
  613         vm_object_t srcobject;
  614         vm_pindex_t srcoffset;
  615         vm_object_t dstobject;
  616         vm_pindex_t dstoffset;
  617         vm_pindex_t offset;
  618 {
  619         vm_pindex_t i;
  620         int origsize;
  621         int s;
  622 
  623         if (vm_swap_size)
  624                 no_swap_space = 0;
  625 
  626         origsize = srcobject->un_pager.swp.swp_allocsize;
  627 
  628         /*
  629          * remove the source object from the swap_pager internal queue
  630          */
  631         if (srcobject->handle == NULL) {
  632                 TAILQ_REMOVE(&swap_pager_un_object_list, srcobject, pager_object_list);
  633         } else {
  634                 TAILQ_REMOVE(&swap_pager_object_list, srcobject, pager_object_list);
  635         }
  636 
  637         s = splbio();
  638         while (srcobject->un_pager.swp.swp_poip) {
  639                 tsleep(srcobject, PVM, "spgout", 0);
  640         }
  641         splx(s);
  642 
  643         /*
  644          * clean all of the pages that are currently active and finished
  645          */
  646         swap_pager_sync();
  647 
  648         s = splbio();
  649         /*
  650          * transfer source to destination
  651          */
  652         for (i = 0; i < dstobject->size; i += 1) {
  653                 int srcvalid, dstvalid;
  654                 daddr_t *srcaddrp = swap_pager_diskaddr(srcobject, i + offset + srcoffset,
  655                                                     &srcvalid);
  656                 daddr_t *dstaddrp;
  657 
  658                 /*
  659                  * see if the source has space allocated
  660                  */
  661                 if (srcaddrp && *srcaddrp != SWB_EMPTY) {
  662                         /*
  663                          * if the source is valid and the dest has no space,
  664                          * then copy the allocation from the srouce to the
  665                          * dest.
  666                          */
  667                         if (srcvalid) {
  668                                 dstaddrp = swap_pager_diskaddr(dstobject, i + dstoffset,
  669                                                         &dstvalid);
  670                                 /*
  671                                  * if the dest already has a valid block,
  672                                  * deallocate the source block without
  673                                  * copying.
  674                                  */
  675                                 if (!dstvalid && dstaddrp && *dstaddrp != SWB_EMPTY) {
  676                                         swap_pager_freeswapspace(dstobject, *dstaddrp,
  677                                                 *dstaddrp + btodb(PAGE_SIZE) - 1);
  678                                         *dstaddrp = SWB_EMPTY;
  679                                 }
  680                                 if (dstaddrp && *dstaddrp == SWB_EMPTY) {
  681                                         *dstaddrp = *srcaddrp;
  682                                         *srcaddrp = SWB_EMPTY;
  683                                         dstobject->un_pager.swp.swp_allocsize += btodb(PAGE_SIZE);
  684                                         srcobject->un_pager.swp.swp_allocsize -= btodb(PAGE_SIZE);
  685                                         swap_pager_setvalid(dstobject, i + dstoffset, 1);
  686                                 }
  687                         }
  688                         /*
  689                          * if the source is not empty at this point, then
  690                          * deallocate the space.
  691                          */
  692                         if (*srcaddrp != SWB_EMPTY) {
  693                                 swap_pager_freeswapspace(srcobject, *srcaddrp,
  694                                         *srcaddrp + btodb(PAGE_SIZE) - 1);
  695                                 *srcaddrp = SWB_EMPTY;
  696                         }
  697                 }
  698         }
  699         splx(s);
  700 
  701         /*
  702          * Free left over swap blocks
  703          */
  704         swap_pager_free_swap(srcobject);
  705 
  706         if (srcobject->un_pager.swp.swp_allocsize) {
  707                 printf("swap_pager_copy: *warning* pager with %d blocks (orig: %d)\n",
  708                     srcobject->un_pager.swp.swp_allocsize, origsize);
  709         }
  710 
  711         free(srcobject->un_pager.swp.swp_blocks, M_VMPGDATA);
  712         srcobject->un_pager.swp.swp_blocks = NULL;
  713 
  714         return;
  715 }
  716 
  717 static void
  718 swap_pager_dealloc(object)
  719         vm_object_t object;
  720 {
  721         int s;
  722 
  723         /*
  724          * Remove from list right away so lookups will fail if we block for
  725          * pageout completion.
  726          */
  727         if (object->handle == NULL) {
  728                 TAILQ_REMOVE(&swap_pager_un_object_list, object, pager_object_list);
  729         } else {
  730                 TAILQ_REMOVE(&swap_pager_object_list, object, pager_object_list);
  731         }
  732 
  733         /*
  734          * Wait for all pageouts to finish and remove all entries from
  735          * cleaning list.
  736          */
  737 
  738         s = splbio();
  739         while (object->un_pager.swp.swp_poip) {
  740                 tsleep(object, PVM, "swpout", 0);
  741         }
  742         splx(s);
  743 
  744 
  745         swap_pager_sync();
  746 
  747         /*
  748          * Free left over swap blocks
  749          */
  750         swap_pager_free_swap(object);
  751 
  752         if (object->un_pager.swp.swp_allocsize) {
  753                 printf("swap_pager_dealloc: *warning* freeing pager with %d blocks\n",
  754                     object->un_pager.swp.swp_allocsize);
  755         }
  756         /*
  757          * Free swap management resources
  758          */
  759         free(object->un_pager.swp.swp_blocks, M_VMPGDATA);
  760         object->un_pager.swp.swp_blocks = NULL;
  761 }
  762 
  763 static __inline int
  764 swap_pager_block_index(pindex)
  765         vm_pindex_t pindex;
  766 {
  767         return (pindex / SWB_NPAGES);
  768 }
  769 
  770 static __inline int
  771 swap_pager_block_offset(pindex)
  772         vm_pindex_t pindex;
  773 {
  774         return (pindex % SWB_NPAGES);
  775 }
  776 
  777 /*
  778  * swap_pager_haspage returns TRUE if the pager has data that has
  779  * been written out.
  780  */
  781 static boolean_t
  782 swap_pager_haspage(object, pindex, before, after)
  783         vm_object_t object;
  784         vm_pindex_t pindex;
  785         int *before;
  786         int *after;
  787 {
  788         register sw_blk_t swb;
  789         int ix;
  790 
  791         if (before != NULL)
  792                 *before = 0;
  793         if (after != NULL)
  794                 *after = 0;
  795         ix = pindex / SWB_NPAGES;
  796         if (ix >= object->un_pager.swp.swp_nblocks) {
  797                 return (FALSE);
  798         }
  799         swb = &object->un_pager.swp.swp_blocks[ix];
  800         ix = pindex % SWB_NPAGES;
  801 
  802         if (swb->swb_block[ix] != SWB_EMPTY) {
  803 
  804                 if (swb->swb_valid & (1 << ix)) {
  805                         int tix;
  806                         if (before) {
  807                                 for(tix = ix - 1; tix >= 0; --tix) {
  808                                         if ((swb->swb_valid & (1 << tix)) == 0)
  809                                                 break;
  810                                         if ((swb->swb_block[tix] +
  811                                                 (ix - tix) * (PAGE_SIZE/DEV_BSIZE)) !=
  812                                                 swb->swb_block[ix])
  813                                                 break;
  814                                         (*before)++;
  815                                 }
  816                         }
  817 
  818                         if (after) {
  819                                 for(tix = ix + 1; tix < SWB_NPAGES; tix++) {
  820                                         if ((swb->swb_valid & (1 << tix)) == 0)
  821                                                 break;
  822                                         if ((swb->swb_block[tix] -
  823                                                 (tix - ix) * (PAGE_SIZE/DEV_BSIZE)) !=
  824                                                 swb->swb_block[ix])
  825                                                 break;
  826                                         (*after)++;
  827                                 }
  828                         }
  829 
  830                         return TRUE;
  831                 }
  832         }
  833         return (FALSE);
  834 }
  835 
  836 /*
  837  * swap_pager_freepage is a convienience routine that clears the busy
  838  * bit and deallocates a page.
  839  */
  840 static void
  841 swap_pager_freepage(m)
  842         vm_page_t m;
  843 {
  844         PAGE_WAKEUP(m);
  845         vm_page_free(m);
  846 }
  847 
  848 /*
  849  * swap_pager_ridpages is a convienience routine that deallocates all
  850  * but the required page.  this is usually used in error returns that
  851  * need to invalidate the "extra" readahead pages.
  852  */
  853 static void
  854 swap_pager_ridpages(m, count, reqpage)
  855         vm_page_t *m;
  856         int count;
  857         int reqpage;
  858 {
  859         int i;
  860 
  861         for (i = 0; i < count; i++)
  862                 if (i != reqpage)
  863                         swap_pager_freepage(m[i]);
  864 }
  865 
  866 /*
  867  * swap_pager_iodone1 is the completion routine for both reads and async writes
  868  */
  869 static void
  870 swap_pager_iodone1(bp)
  871         struct buf *bp;
  872 {
  873         bp->b_flags |= B_DONE;
  874         bp->b_flags &= ~B_ASYNC;
  875         wakeup(bp);
  876 }
  877 
  878 static int
  879 swap_pager_getpages(object, m, count, reqpage)
  880         vm_object_t object;
  881         vm_page_t *m;
  882         int count, reqpage;
  883 {
  884         register struct buf *bp;
  885         sw_blk_t swb[count];
  886         register int s;
  887         int i;
  888         boolean_t rv;
  889         vm_offset_t kva, off[count];
  890         swp_clean_t spc;
  891         vm_pindex_t paging_offset;
  892         int reqaddr[count];
  893         int sequential;
  894 
  895         int first, last;
  896         int failed;
  897         int reqdskregion;
  898 
  899         object = m[reqpage]->object;
  900         paging_offset = OFF_TO_IDX(object->paging_offset);
  901         sequential = (m[reqpage]->pindex == (object->last_read + 1));
  902 
  903         for (i = 0; i < count; i++) {
  904                 vm_pindex_t fidx = m[i]->pindex + paging_offset;
  905                 int ix = swap_pager_block_index(fidx);
  906 
  907                 if (ix >= object->un_pager.swp.swp_nblocks) {
  908                         int j;
  909 
  910                         if (i <= reqpage) {
  911                                 swap_pager_ridpages(m, count, reqpage);
  912                                 return (VM_PAGER_FAIL);
  913                         }
  914                         for (j = i; j < count; j++) {
  915                                 swap_pager_freepage(m[j]);
  916                         }
  917                         count = i;
  918                         break;
  919                 }
  920                 swb[i] = &object->un_pager.swp.swp_blocks[ix];
  921                 off[i] = swap_pager_block_offset(fidx);
  922                 reqaddr[i] = swb[i]->swb_block[off[i]];
  923         }
  924 
  925         /* make sure that our required input request is existant */
  926 
  927         if (reqaddr[reqpage] == SWB_EMPTY ||
  928             (swb[reqpage]->swb_valid & (1 << off[reqpage])) == 0) {
  929                 swap_pager_ridpages(m, count, reqpage);
  930                 return (VM_PAGER_FAIL);
  931         }
  932         reqdskregion = reqaddr[reqpage] / dmmax;
  933 
  934         /*
  935          * search backwards for the first contiguous page to transfer
  936          */
  937         failed = 0;
  938         first = 0;
  939         for (i = reqpage - 1; i >= 0; --i) {
  940                 if (sequential || failed || (reqaddr[i] == SWB_EMPTY) ||
  941                     (swb[i]->swb_valid & (1 << off[i])) == 0 ||
  942                     (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
  943                     ((reqaddr[i] / dmmax) != reqdskregion)) {
  944                         failed = 1;
  945                         swap_pager_freepage(m[i]);
  946                         if (first == 0)
  947                                 first = i + 1;
  948                 }
  949         }
  950         /*
  951          * search forwards for the last contiguous page to transfer
  952          */
  953         failed = 0;
  954         last = count;
  955         for (i = reqpage + 1; i < count; i++) {
  956                 if (failed || (reqaddr[i] == SWB_EMPTY) ||
  957                     (swb[i]->swb_valid & (1 << off[i])) == 0 ||
  958                     (reqaddr[i] != (reqaddr[reqpage] + (i - reqpage) * btodb(PAGE_SIZE))) ||
  959                     ((reqaddr[i] / dmmax) != reqdskregion)) {
  960                         failed = 1;
  961                         swap_pager_freepage(m[i]);
  962                         if (last == count)
  963                                 last = i;
  964                 }
  965         }
  966 
  967         count = last;
  968         if (first != 0) {
  969                 for (i = first; i < count; i++) {
  970                         m[i - first] = m[i];
  971                         reqaddr[i - first] = reqaddr[i];
  972                         off[i - first] = off[i];
  973                 }
  974                 count -= first;
  975                 reqpage -= first;
  976         }
  977         ++swb[reqpage]->swb_locked;
  978 
  979         /*
  980          * at this point: "m" is a pointer to the array of vm_page_t for
  981          * paging I/O "count" is the number of vm_page_t entries represented
  982          * by "m" "object" is the vm_object_t for I/O "reqpage" is the index
  983          * into "m" for the page actually faulted
  984          */
  985 
  986         spc = NULL;
  987         if ((count == 1) && ((spc = TAILQ_FIRST(&swap_pager_free)) != NULL)) {
  988                 TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
  989                 swap_pager_free_count--;
  990                 kva = spc->spc_kva;
  991                 bp = spc->spc_bp;
  992                 bzero(bp, sizeof *bp);
  993                 bp->b_spc = spc;
  994                 bp->b_vnbufs.le_next = NOLIST;
  995         } else {
  996                 /*
  997                  * Get a swap buffer header to perform the IO
  998                  */
  999                 bp = getpbuf();
 1000                 kva = (vm_offset_t) bp->b_data;
 1001         }
 1002 
 1003         /*
 1004          * map our page(s) into kva for input
 1005          */
 1006         pmap_qenter(kva, m, count);
 1007 
 1008         bp->b_flags = B_BUSY | B_READ | B_CALL | B_PAGING;
 1009         bp->b_iodone = swap_pager_iodone1;
 1010         bp->b_proc = &proc0;    /* XXX (but without B_PHYS set this is ok) */
 1011         bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 1012         crhold(bp->b_rcred);
 1013         crhold(bp->b_wcred);
 1014         bp->b_un.b_addr = (caddr_t) kva;
 1015         bp->b_blkno = reqaddr[0];
 1016         bp->b_bcount = PAGE_SIZE * count;
 1017         bp->b_bufsize = PAGE_SIZE * count;
 1018 
 1019         pbgetvp(swapdev_vp, bp);
 1020 
 1021         cnt.v_swapin++;
 1022         cnt.v_swappgsin += count;
 1023         /*
 1024          * perform the I/O
 1025          */
 1026         VOP_STRATEGY(bp);
 1027 
 1028         /*
 1029          * wait for the sync I/O to complete
 1030          */
 1031         s = splbio();
 1032         while ((bp->b_flags & B_DONE) == 0) {
 1033                 if (tsleep(bp, PVM, "swread", hz*20)) {
 1034                         printf("swap_pager: indefinite wait buffer: device: %d, blkno: %d, size: %d\n",
 1035                                 bp->b_dev, bp->b_blkno, bp->b_bcount);
 1036                 }
 1037         }
 1038 
 1039         if (bp->b_flags & B_ERROR) {
 1040                 printf("swap_pager: I/O error - pagein failed; blkno %d, size %d, error %d\n",
 1041                     bp->b_blkno, bp->b_bcount, bp->b_error);
 1042                 rv = VM_PAGER_ERROR;
 1043         } else {
 1044                 rv = VM_PAGER_OK;
 1045         }
 1046 
 1047         /*
 1048          * relpbuf does this, but we maintain our own buffer list also...
 1049          */
 1050         if (bp->b_vp)
 1051                 pbrelvp(bp);
 1052 
 1053         splx(s);
 1054         swb[reqpage]->swb_locked--;
 1055 
 1056         /*
 1057          * remove the mapping for kernel virtual
 1058          */
 1059         pmap_qremove(kva, count);
 1060 
 1061         if (spc) {
 1062                 m[reqpage]->object->last_read = m[reqpage]->pindex;
 1063                 if (bp->b_flags & B_WANTED)
 1064                         wakeup(bp);
 1065                 /*
 1066                  * if we have used an spc, we need to free it.
 1067                  */
 1068                 if (bp->b_rcred != NOCRED)
 1069                         crfree(bp->b_rcred);
 1070                 if (bp->b_wcred != NOCRED)
 1071                         crfree(bp->b_wcred);
 1072                 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
 1073                 swap_pager_free_count++;
 1074                 if (swap_pager_needflags & SWAP_FREE_NEEDED) {
 1075                         wakeup(&swap_pager_free);
 1076                 }
 1077                 if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
 1078                         pagedaemon_wakeup();
 1079                 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
 1080                 if (rv == VM_PAGER_OK) {
 1081                         pmap_clear_modify(VM_PAGE_TO_PHYS(m[reqpage]));
 1082                         m[reqpage]->valid = VM_PAGE_BITS_ALL;
 1083                         m[reqpage]->dirty = 0;
 1084                 }
 1085         } else {
 1086                 /*
 1087                  * release the physical I/O buffer
 1088                  */
 1089                 relpbuf(bp);
 1090                 /*
 1091                  * finish up input if everything is ok
 1092                  */
 1093                 if (rv == VM_PAGER_OK) {
 1094                         for (i = 0; i < count; i++) {
 1095                                 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
 1096                                 m[i]->dirty = 0;
 1097                                 m[i]->flags &= ~PG_ZERO;
 1098                                 if (i != reqpage) {
 1099                                         /*
 1100                                          * whether or not to leave the page
 1101                                          * activated is up in the air, but we
 1102                                          * should put the page on a page queue
 1103                                          * somewhere. (it already is in the
 1104                                          * object). After some emperical
 1105                                          * results, it is best to deactivate
 1106                                          * the readahead pages.
 1107                                          */
 1108                                         vm_page_deactivate(m[i]);
 1109 
 1110                                         /*
 1111                                          * just in case someone was asking for
 1112                                          * this page we now tell them that it
 1113                                          * is ok to use
 1114                                          */
 1115                                         m[i]->valid = VM_PAGE_BITS_ALL;
 1116                                         PAGE_WAKEUP(m[i]);
 1117                                 }
 1118                         }
 1119 
 1120                         m[reqpage]->object->last_read = m[count-1]->pindex;
 1121 
 1122                         /*
 1123                          * If we're out of swap space, then attempt to free
 1124                          * some whenever multiple pages are brought in. We
 1125                          * must set the dirty bits so that the page contents
 1126                          * will be preserved.
 1127                          */
 1128                         if (SWAPLOW) {
 1129                                 for (i = 0; i < count; i++) {
 1130                                         m[i]->dirty = VM_PAGE_BITS_ALL;
 1131                                 }
 1132                                 swap_pager_freespace(object, m[0]->pindex + paging_offset, count);
 1133                         }
 1134                 } else {
 1135                         swap_pager_ridpages(m, count, reqpage);
 1136                 }
 1137         }
 1138         return (rv);
 1139 }
 1140 
 1141 int
 1142 swap_pager_putpages(object, m, count, sync, rtvals)
 1143         vm_object_t object;
 1144         vm_page_t *m;
 1145         int count;
 1146         boolean_t sync;
 1147         int *rtvals;
 1148 {
 1149         register struct buf *bp;
 1150         sw_blk_t swb[count];
 1151         register int s;
 1152         int i, j, ix, firstidx, lastidx;
 1153         boolean_t rv;
 1154         vm_offset_t kva, off, fidx;
 1155         swp_clean_t spc;
 1156         vm_pindex_t paging_pindex;
 1157         int reqaddr[count];
 1158         int failed;
 1159 
 1160         if (vm_swap_size)
 1161                 no_swap_space = 0;
 1162 
 1163         if (no_swap_space) {
 1164                 for (i = 0; i < count; i++)
 1165                         rtvals[i] = VM_PAGER_FAIL;
 1166                 return VM_PAGER_FAIL;
 1167         }
 1168         spc = NULL;
 1169 
 1170         object = m[0]->object;
 1171         paging_pindex = OFF_TO_IDX(object->paging_offset);
 1172 
 1173         failed = 0;
 1174         for (j = 0; j < count; j++) {
 1175                 fidx = m[j]->pindex + paging_pindex;
 1176                 ix = swap_pager_block_index(fidx);
 1177                 swb[j] = 0;
 1178                 if (ix >= object->un_pager.swp.swp_nblocks) {
 1179                         rtvals[j] = VM_PAGER_FAIL;
 1180                         failed = 1;
 1181                         continue;
 1182                 } else {
 1183                         rtvals[j] = VM_PAGER_OK;
 1184                 }
 1185                 swb[j] = &object->un_pager.swp.swp_blocks[ix];
 1186                 swb[j]->swb_locked++;
 1187                 if (failed) {
 1188                         rtvals[j] = VM_PAGER_FAIL;
 1189                         continue;
 1190                 }
 1191                 off = swap_pager_block_offset(fidx);
 1192                 reqaddr[j] = swb[j]->swb_block[off];
 1193                 if (reqaddr[j] == SWB_EMPTY) {
 1194                         daddr_t blk;
 1195                         int tries;
 1196                         int ntoget;
 1197 
 1198                         tries = 0;
 1199                         s = splbio();
 1200 
 1201                         /*
 1202                          * if any other pages have been allocated in this
 1203                          * block, we only try to get one page.
 1204                          */
 1205                         for (i = 0; i < SWB_NPAGES; i++) {
 1206                                 if (swb[j]->swb_block[i] != SWB_EMPTY)
 1207                                         break;
 1208                         }
 1209 
 1210                         ntoget = (i == SWB_NPAGES) ? SWB_NPAGES : 1;
 1211                         /*
 1212                          * this code is alittle conservative, but works (the
 1213                          * intent of this code is to allocate small chunks for
 1214                          * small objects)
 1215                          */
 1216                         if ((off == 0) && ((fidx + ntoget) > object->size)) {
 1217                                 ntoget = object->size - fidx;
 1218                         }
 1219         retrygetspace:
 1220                         if (!swap_pager_full && ntoget > 1 &&
 1221                             swap_pager_getswapspace(object, ntoget * btodb(PAGE_SIZE),
 1222                                 &blk)) {
 1223 
 1224                                 for (i = 0; i < ntoget; i++) {
 1225                                         swb[j]->swb_block[i] = blk + btodb(PAGE_SIZE) * i;
 1226                                         swb[j]->swb_valid = 0;
 1227                                 }
 1228 
 1229                                 reqaddr[j] = swb[j]->swb_block[off];
 1230                         } else if (!swap_pager_getswapspace(object, btodb(PAGE_SIZE),
 1231                                 &swb[j]->swb_block[off])) {
 1232                                 /*
 1233                                  * if the allocation has failed, we try to
 1234                                  * reclaim space and retry.
 1235                                  */
 1236                                 if (++tries == 1) {
 1237                                         swap_pager_reclaim();
 1238                                         goto retrygetspace;
 1239                                 }
 1240                                 rtvals[j] = VM_PAGER_AGAIN;
 1241                                 failed = 1;
 1242                                 swap_pager_full = 1;
 1243                         } else {
 1244                                 reqaddr[j] = swb[j]->swb_block[off];
 1245                                 swb[j]->swb_valid &= ~(1 << off);
 1246                         }
 1247                         splx(s);
 1248                 }
 1249         }
 1250 
 1251         /*
 1252          * search forwards for the last contiguous page to transfer
 1253          */
 1254         failed = 0;
 1255         for (i = 0; i < count; i++) {
 1256                 if (failed ||
 1257                         (reqaddr[i] != reqaddr[0] + i * btodb(PAGE_SIZE)) ||
 1258                     ((reqaddr[i] / dmmax) != (reqaddr[0] / dmmax)) ||
 1259                     (rtvals[i] != VM_PAGER_OK)) {
 1260                         failed = 1;
 1261                         if (rtvals[i] == VM_PAGER_OK)
 1262                                 rtvals[i] = VM_PAGER_AGAIN;
 1263                 }
 1264         }
 1265 
 1266         ix = 0;
 1267         firstidx = -1;
 1268         for (i = 0; i < count; i++) {
 1269                 if (rtvals[i] == VM_PAGER_OK) {
 1270                         ix++;
 1271                         if (firstidx == -1) {
 1272                                 firstidx = i;
 1273                         }
 1274                 } else if (firstidx >= 0) {
 1275                         break;
 1276                 }
 1277         }
 1278 
 1279         if (firstidx == -1) {
 1280                 if ((object->paging_in_progress == 0) &&
 1281                         (object->flags & OBJ_PIPWNT)) {
 1282                         object->flags &= ~OBJ_PIPWNT;
 1283                         wakeup(object);
 1284                 }
 1285                 return VM_PAGER_AGAIN;
 1286         }
 1287 
 1288         lastidx = firstidx + ix;
 1289 
 1290         for (i = 0; i < firstidx; i++) {
 1291                 if (swb[i])
 1292                         swb[i]->swb_locked--;
 1293         }
 1294 
 1295         for (i = lastidx; i < count; i++) {
 1296                 if (swb[i])
 1297                         swb[i]->swb_locked--;
 1298         }
 1299 
 1300         for (i = firstidx; i < lastidx; i++) {
 1301                 if (reqaddr[i] == SWB_EMPTY) {
 1302                         printf("I/O to empty block???? -- pindex: %d, i: %d\n",
 1303                                 m[i]->pindex, i);
 1304                 }
 1305         }
 1306 
 1307         /*
 1308          * For synchronous writes, we clean up all completed async pageouts.
 1309          */
 1310         if (sync == TRUE) {
 1311                 swap_pager_sync();
 1312         }
 1313         kva = 0;
 1314 
 1315         /*
 1316          * get a swap pager clean data structure, block until we get it
 1317          */
 1318         if (swap_pager_free_count <= 3) {
 1319                 s = splbio();
 1320                 if (curproc == pageproc) {
 1321 retryfree:
 1322                         /*
 1323                          * pageout daemon needs a swap control block
 1324                          */
 1325                         swap_pager_needflags |= SWAP_FREE_NEEDED_BY_PAGEOUT|SWAP_FREE_NEEDED;
 1326                         /*
 1327                          * if it does not get one within a short time, then
 1328                          * there is a potential deadlock, so we go-on trying
 1329                          * to free pages.  It is important to block here as opposed
 1330                          * to returning, thereby allowing the pageout daemon to continue.
 1331                          * It is likely that pageout daemon will start suboptimally
 1332                          * reclaiming vnode backed pages if we don't block.  Since the
 1333                          * I/O subsystem is probably already fully utilized, might as
 1334                          * well wait.
 1335                          */
 1336                         if (tsleep(&swap_pager_free, PVM, "swpfre", hz/5)) {
 1337                                 swap_pager_sync();
 1338                                 if (swap_pager_free_count <= 3) {
 1339                                         for (i = firstidx; i < lastidx; i++) {
 1340                                                 rtvals[i] = VM_PAGER_AGAIN;
 1341                                         }
 1342                                         splx(s);
 1343                                         return VM_PAGER_AGAIN;
 1344                                 }
 1345                         } else {
 1346                         /*
 1347                          * we make sure that pageouts aren't taking up all of
 1348                          * the free swap control blocks.
 1349                          */
 1350                                 swap_pager_sync();
 1351                                 if (swap_pager_free_count <= 3) {
 1352                                         goto retryfree;
 1353                                 }
 1354                         }
 1355                 } else {
 1356                         pagedaemon_wakeup();
 1357                         while (swap_pager_free_count <= 3) {
 1358                                 swap_pager_needflags |= SWAP_FREE_NEEDED;
 1359                                 tsleep(&swap_pager_free, PVM, "swpfre", 0);
 1360                                 pagedaemon_wakeup();
 1361                         }
 1362                 }
 1363                 splx(s);
 1364         }
 1365         spc = TAILQ_FIRST(&swap_pager_free);
 1366         if (spc == NULL)
 1367                 panic("swap_pager_putpages: free queue is empty, %d expected\n",
 1368                         swap_pager_free_count);
 1369         TAILQ_REMOVE(&swap_pager_free, spc, spc_list);
 1370         swap_pager_free_count--;
 1371 
 1372         kva = spc->spc_kva;
 1373 
 1374         /*
 1375          * map our page(s) into kva for I/O
 1376          */
 1377         pmap_qenter(kva, &m[firstidx], ix);
 1378 
 1379         /*
 1380          * get the base I/O offset into the swap file
 1381          */
 1382         for (i = firstidx; i < lastidx ; i++) {
 1383                 fidx = m[i]->pindex + paging_pindex;
 1384                 off = swap_pager_block_offset(fidx);
 1385                 /*
 1386                  * set the valid bit
 1387                  */
 1388                 swb[i]->swb_valid |= (1 << off);
 1389                 /*
 1390                  * and unlock the data structure
 1391                  */
 1392                 swb[i]->swb_locked--;
 1393         }
 1394 
 1395         /*
 1396          * Get a swap buffer header and perform the IO
 1397          */
 1398         bp = spc->spc_bp;
 1399         bzero(bp, sizeof *bp);
 1400         bp->b_spc = spc;
 1401         bp->b_vnbufs.le_next = NOLIST;
 1402 
 1403         bp->b_flags = B_BUSY | B_PAGING;
 1404         bp->b_proc = &proc0;    /* XXX (but without B_PHYS set this is ok) */
 1405         bp->b_rcred = bp->b_wcred = bp->b_proc->p_ucred;
 1406         if (bp->b_rcred != NOCRED)
 1407                 crhold(bp->b_rcred);
 1408         if (bp->b_wcred != NOCRED)
 1409                 crhold(bp->b_wcred);
 1410         bp->b_data = (caddr_t) kva;
 1411         bp->b_blkno = reqaddr[firstidx];
 1412         pbgetvp(swapdev_vp, bp);
 1413 
 1414         bp->b_bcount = PAGE_SIZE * ix;
 1415         bp->b_bufsize = PAGE_SIZE * ix;
 1416         swapdev_vp->v_numoutput++;
 1417 
 1418         /*
 1419          * If this is an async write we set up additional buffer fields and
 1420          * place a "cleaning" entry on the inuse queue.
 1421          */
 1422         s = splbio();
 1423         if (sync == FALSE) {
 1424                 spc->spc_flags = 0;
 1425                 spc->spc_object = object;
 1426                 for (i = firstidx; i < lastidx; i++)
 1427                         spc->spc_m[i] = m[i];
 1428                 spc->spc_first = firstidx;
 1429                 spc->spc_count = ix;
 1430                 /*
 1431                  * the completion routine for async writes
 1432                  */
 1433                 bp->b_flags |= B_CALL;
 1434                 bp->b_iodone = swap_pager_iodone;
 1435                 bp->b_dirtyoff = 0;
 1436                 bp->b_dirtyend = bp->b_bcount;
 1437                 object->un_pager.swp.swp_poip++;
 1438                 TAILQ_INSERT_TAIL(&swap_pager_inuse, spc, spc_list);
 1439         } else {
 1440                 object->un_pager.swp.swp_poip++;
 1441                 bp->b_flags |= B_CALL;
 1442                 bp->b_iodone = swap_pager_iodone1;
 1443         }
 1444 
 1445         cnt.v_swapout++;
 1446         cnt.v_swappgsout += ix;
 1447         /*
 1448          * perform the I/O
 1449          */
 1450         VOP_STRATEGY(bp);
 1451         if (sync == FALSE) {
 1452                 if ((bp->b_flags & B_DONE) == B_DONE) {
 1453                         swap_pager_sync();
 1454                 }
 1455                 splx(s);
 1456                 for (i = firstidx; i < lastidx; i++) {
 1457                         rtvals[i] = VM_PAGER_PEND;
 1458                 }
 1459                 return VM_PAGER_PEND;
 1460         }
 1461         /*
 1462          * wait for the sync I/O to complete
 1463          */
 1464         while ((bp->b_flags & B_DONE) == 0) {
 1465                 tsleep(bp, PVM, "swwrt", 0);
 1466         }
 1467         if (bp->b_flags & B_ERROR) {
 1468                 printf("swap_pager: I/O error - pageout failed; blkno %d, size %d, error %d\n",
 1469                     bp->b_blkno, bp->b_bcount, bp->b_error);
 1470                 rv = VM_PAGER_ERROR;
 1471         } else {
 1472                 rv = VM_PAGER_OK;
 1473         }
 1474 
 1475         object->un_pager.swp.swp_poip--;
 1476         if (object->un_pager.swp.swp_poip == 0)
 1477                 wakeup(object);
 1478 
 1479         if (bp->b_vp)
 1480                 pbrelvp(bp);
 1481         if (bp->b_flags & B_WANTED)
 1482                 wakeup(bp);
 1483 
 1484         splx(s);
 1485 
 1486         /*
 1487          * remove the mapping for kernel virtual
 1488          */
 1489         pmap_qremove(kva, ix);
 1490 
 1491         /*
 1492          * if we have written the page, then indicate that the page is clean.
 1493          */
 1494         if (rv == VM_PAGER_OK) {
 1495                 for (i = firstidx; i < lastidx; i++) {
 1496                         if (rtvals[i] == VM_PAGER_OK) {
 1497                                 pmap_clear_modify(VM_PAGE_TO_PHYS(m[i]));
 1498                                 m[i]->dirty = 0;
 1499                                 /*
 1500                                  * optimization, if a page has been read
 1501                                  * during the pageout process, we activate it.
 1502                                  */
 1503                                 if ((m[i]->queue != PQ_ACTIVE) &&
 1504                                     ((m[i]->flags & (PG_WANTED|PG_REFERENCED)) ||
 1505                                     pmap_is_referenced(VM_PAGE_TO_PHYS(m[i])))) {
 1506                                         vm_page_activate(m[i]);
 1507                                 }
 1508                         }
 1509                 }
 1510         } else {
 1511                 for (i = firstidx; i < lastidx; i++) {
 1512                         rtvals[i] = rv;
 1513                 }
 1514         }
 1515 
 1516         if (bp->b_rcred != NOCRED)
 1517                 crfree(bp->b_rcred);
 1518         if (bp->b_wcred != NOCRED)
 1519                 crfree(bp->b_wcred);
 1520         TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
 1521         swap_pager_free_count++;
 1522         if (swap_pager_needflags & SWAP_FREE_NEEDED) {
 1523                 wakeup(&swap_pager_free);
 1524         }
 1525         if (swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
 1526                 pagedaemon_wakeup();
 1527         swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
 1528         return (rv);
 1529 }
 1530 
 1531 static void
 1532 swap_pager_sync()
 1533 {
 1534         register swp_clean_t spc, tspc;
 1535         register int s;
 1536 
 1537         tspc = NULL;
 1538         if (TAILQ_FIRST(&swap_pager_done) == NULL)
 1539                 return;
 1540         for (;;) {
 1541                 s = splbio();
 1542                 /*
 1543                  * Look up and removal from done list must be done at splbio()
 1544                  * to avoid conflicts with swap_pager_iodone.
 1545                  */
 1546                 while ((spc = TAILQ_FIRST(&swap_pager_done)) != 0) {
 1547                         pmap_qremove(spc->spc_kva, spc->spc_count);
 1548                         swap_pager_finish(spc);
 1549                         TAILQ_REMOVE(&swap_pager_done, spc, spc_list);
 1550                         goto doclean;
 1551                 }
 1552 
 1553                 /*
 1554                  * No operations done, thats all we can do for now.
 1555                  */
 1556 
 1557                 splx(s);
 1558                 break;
 1559 
 1560                 /*
 1561                  * The desired page was found to be busy earlier in the scan
 1562                  * but has since completed.
 1563                  */
 1564 doclean:
 1565                 if (tspc && tspc == spc) {
 1566                         tspc = NULL;
 1567                 }
 1568                 spc->spc_flags = 0;
 1569                 TAILQ_INSERT_TAIL(&swap_pager_free, spc, spc_list);
 1570                 swap_pager_free_count++;
 1571                 if (swap_pager_needflags & SWAP_FREE_NEEDED) {
 1572                         wakeup(&swap_pager_free);
 1573                 }
 1574                 if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT)
 1575                         pagedaemon_wakeup();
 1576                 swap_pager_needflags &= ~(SWAP_FREE_NEEDED|SWAP_FREE_NEEDED_BY_PAGEOUT);
 1577                 splx(s);
 1578         }
 1579 
 1580         return;
 1581 }
 1582 
 1583 void
 1584 swap_pager_finish(spc)
 1585         register swp_clean_t spc;
 1586 {
 1587         int lastidx = spc->spc_first + spc->spc_count;
 1588         vm_page_t *ma = spc->spc_m;
 1589         vm_object_t object = ma[spc->spc_first]->object;
 1590         int i;
 1591 
 1592         object->paging_in_progress -= spc->spc_count;
 1593         if ((object->paging_in_progress == 0) &&
 1594             (object->flags & OBJ_PIPWNT)) {
 1595                 object->flags &= ~OBJ_PIPWNT;
 1596                 wakeup(object);
 1597         }
 1598 
 1599         /*
 1600          * If no error, mark as clean and inform the pmap system. If error,
 1601          * mark as dirty so we will try again. (XXX could get stuck doing
 1602          * this, should give up after awhile)
 1603          */
 1604         if (spc->spc_flags & SPC_ERROR) {
 1605                 for (i = spc->spc_first; i < lastidx; i++) {
 1606                         printf("swap_pager_finish: I/O error, clean of page %lx failed\n",
 1607                             (u_long) VM_PAGE_TO_PHYS(ma[i]));
 1608                 }
 1609         } else {
 1610                 for (i = spc->spc_first; i < lastidx; i++) {
 1611                         pmap_clear_modify(VM_PAGE_TO_PHYS(ma[i]));
 1612                         ma[i]->dirty = 0;
 1613                         if ((ma[i]->queue != PQ_ACTIVE) &&
 1614                            ((ma[i]->flags & PG_WANTED) ||
 1615                                  pmap_ts_referenced(VM_PAGE_TO_PHYS(ma[i]))))
 1616                                 vm_page_activate(ma[i]);
 1617                 }
 1618         }
 1619 
 1620 
 1621         for (i = spc->spc_first; i < lastidx; i++) {
 1622                 /*
 1623                  * we wakeup any processes that are waiting on these pages.
 1624                  */
 1625                 PAGE_WAKEUP(ma[i]);
 1626         }
 1627         nswiodone -= spc->spc_count;
 1628 
 1629         return;
 1630 }
 1631 
 1632 /*
 1633  * swap_pager_iodone
 1634  */
 1635 static void
 1636 swap_pager_iodone(bp)
 1637         register struct buf *bp;
 1638 {
 1639         register swp_clean_t spc;
 1640         int s;
 1641 
 1642         s = splbio();
 1643         spc = (swp_clean_t) bp->b_spc;
 1644         TAILQ_REMOVE(&swap_pager_inuse, spc, spc_list);
 1645         TAILQ_INSERT_TAIL(&swap_pager_done, spc, spc_list);
 1646         if (bp->b_flags & B_ERROR) {
 1647                 spc->spc_flags |= SPC_ERROR;
 1648                 printf("swap_pager: I/O error - async %s failed; blkno %lu, size %ld, error %d\n",
 1649                     (bp->b_flags & B_READ) ? "pagein" : "pageout",
 1650                     (u_long) bp->b_blkno, bp->b_bcount, bp->b_error);
 1651         }
 1652 
 1653         if (bp->b_vp)
 1654                 pbrelvp(bp);
 1655 
 1656 /*
 1657         if (bp->b_flags & B_WANTED)
 1658 */
 1659                 wakeup(bp);
 1660 
 1661         if (bp->b_rcred != NOCRED)
 1662                 crfree(bp->b_rcred);
 1663         if (bp->b_wcred != NOCRED)
 1664                 crfree(bp->b_wcred);
 1665 
 1666         nswiodone += spc->spc_count;
 1667         if (--spc->spc_object->un_pager.swp.swp_poip == 0) {
 1668                 wakeup(spc->spc_object);
 1669         }
 1670         if ((swap_pager_needflags & SWAP_FREE_NEEDED) ||
 1671             TAILQ_FIRST(&swap_pager_inuse) == 0) {
 1672                 swap_pager_needflags &= ~SWAP_FREE_NEEDED;
 1673                 wakeup(&swap_pager_free);
 1674         }
 1675 
 1676         if( swap_pager_needflags & SWAP_FREE_NEEDED_BY_PAGEOUT) {
 1677                 swap_pager_needflags &= ~SWAP_FREE_NEEDED_BY_PAGEOUT;
 1678                 pagedaemon_wakeup();
 1679         }
 1680 
 1681         if (vm_pageout_pages_needed) {
 1682                 wakeup(&vm_pageout_pages_needed);
 1683                 vm_pageout_pages_needed = 0;
 1684         }
 1685         if ((TAILQ_FIRST(&swap_pager_inuse) == NULL) ||
 1686             ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min &&
 1687             nswiodone + cnt.v_free_count + cnt.v_cache_count >= cnt.v_free_min)) {
 1688                 pagedaemon_wakeup();
 1689         }
 1690         splx(s);
 1691 }

Cache object: d9fa82aff75c01a63552caf6099a24ca


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.