FreeBSD/Linux Kernel Cross Reference
sys/vm/vm_swap.c
1 /*
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 * 3. All advertising materials mentioning features or use of this software
14 * must display the following acknowledgement:
15 * This product includes software developed by the University of
16 * California, Berkeley and its contributors.
17 * 4. Neither the name of the University nor the names of its contributors
18 * may be used to endorse or promote products derived from this software
19 * without specific prior written permission.
20 *
21 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
22 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
23 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
24 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
25 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
26 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
27 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
28 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
29 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
30 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
31 * SUCH DAMAGE.
32 *
33 * @(#)vm_swap.c 8.5 (Berkeley) 2/17/94
34 * $FreeBSD: releng/5.0/sys/vm/vm_swap.c 106023 2002-10-27 06:54:06Z rwatson $
35 */
36
37 #include "opt_mac.h"
38 #include "opt_swap.h"
39
40 #include <sys/param.h>
41 #include <sys/systm.h>
42 #include <sys/sysproto.h>
43 #include <sys/bio.h>
44 #include <sys/buf.h>
45 #include <sys/proc.h>
46 #include <sys/namei.h>
47 #include <sys/dmap.h> /* XXX */
48 #include <sys/vnode.h>
49 #include <sys/fcntl.h>
50 #include <sys/blist.h>
51 #include <sys/kernel.h>
52 #include <sys/lock.h>
53 #include <sys/conf.h>
54 #include <sys/stat.h>
55 #include <sys/sysctl.h>
56 #include <sys/mac.h>
57 #include <sys/mount.h>
58 #include <vm/vm.h>
59 #include <vm/vm_extern.h>
60 #include <vm/vm_param.h>
61 #include <vm/swap_pager.h>
62 #include <vm/uma.h>
63
64 /*
65 * Indirect driver for multi-controller paging.
66 */
67
68 #ifndef NSWAPDEV
69 #define NSWAPDEV 4
70 #endif
71 static struct swdevt should_be_malloced[NSWAPDEV];
72 struct swdevt *swdevt = should_be_malloced;
73 static int nswap; /* first block after the interleaved devs */
74 int nswdev = NSWAPDEV;
75 int vm_swap_size;
76
77 static int swapdev_strategy(struct vop_strategy_args *ap);
78 struct vnode *swapdev_vp;
79
80 /*
81 * swapdev_strategy:
82 *
83 * VOP_STRATEGY() for swapdev_vp.
84 * Perform swap strategy interleave device selection.
85 *
86 * The bp is expected to be locked and *not* B_DONE on call.
87 */
88 static int
89 swapdev_strategy(ap)
90 struct vop_strategy_args /* {
91 struct vnode *a_vp;
92 struct buf *a_bp;
93 } */ *ap;
94 {
95 int s, sz, off, seg, index;
96 struct swdevt *sp;
97 struct vnode *vp;
98 struct buf *bp;
99
100 bp = ap->a_bp;
101 sz = howmany(bp->b_bcount, PAGE_SIZE);
102
103 /*
104 * Convert interleaved swap into per-device swap. Note that
105 * the block size is left in PAGE_SIZE'd chunks (for the newswap)
106 * here.
107 */
108 if (nswdev > 1) {
109 off = bp->b_blkno % dmmax;
110 if (off + sz > dmmax) {
111 bp->b_error = EINVAL;
112 bp->b_ioflags |= BIO_ERROR;
113 bufdone(bp);
114 return 0;
115 }
116 seg = bp->b_blkno / dmmax;
117 index = seg % nswdev;
118 seg /= nswdev;
119 bp->b_blkno = seg * dmmax + off;
120 } else {
121 index = 0;
122 }
123 sp = &swdevt[index];
124 if (bp->b_blkno + sz > sp->sw_nblks) {
125 bp->b_error = EINVAL;
126 bp->b_ioflags |= BIO_ERROR;
127 bufdone(bp);
128 return 0;
129 }
130 bp->b_dev = sp->sw_device;
131 if (sp->sw_vp == NULL) {
132 bp->b_error = ENODEV;
133 bp->b_ioflags |= BIO_ERROR;
134 bufdone(bp);
135 return 0;
136 }
137
138 /*
139 * Convert from PAGE_SIZE'd to DEV_BSIZE'd chunks for the actual I/O
140 */
141 bp->b_blkno = ctodb(bp->b_blkno);
142
143 vhold(sp->sw_vp);
144 s = splvm();
145 if (bp->b_iocmd == BIO_WRITE) {
146 vp = bp->b_vp;
147 if (vp) {
148 VI_LOCK(vp);
149 vp->v_numoutput--;
150 if ((vp->v_iflag & VI_BWAIT) && vp->v_numoutput <= 0) {
151 vp->v_iflag &= ~VI_BWAIT;
152 wakeup(&vp->v_numoutput);
153 }
154 VI_UNLOCK(vp);
155 }
156 VI_LOCK(sp->sw_vp);
157 sp->sw_vp->v_numoutput++;
158 VI_UNLOCK(sp->sw_vp);
159 }
160 bp->b_vp = sp->sw_vp;
161 splx(s);
162 BUF_STRATEGY(bp);
163 return 0;
164 }
165
166 /*
167 * Create a special vnode op vector for swapdev_vp - we only use
168 * VOP_STRATEGY(), everything else returns an error.
169 */
170 vop_t **swapdev_vnodeop_p;
171 static struct vnodeopv_entry_desc swapdev_vnodeop_entries[] = {
172 { &vop_default_desc, (vop_t *) vop_defaultop },
173 { &vop_strategy_desc, (vop_t *) swapdev_strategy },
174 { NULL, NULL }
175 };
176 static struct vnodeopv_desc swapdev_vnodeop_opv_desc =
177 { &swapdev_vnodeop_p, swapdev_vnodeop_entries };
178
179 VNODEOP_SET(swapdev_vnodeop_opv_desc);
180
181 /*
182 * System call swapon(name) enables swapping on device name,
183 * which must be in the swdevsw. Return EBUSY
184 * if already swapping on this device.
185 */
186 #ifndef _SYS_SYSPROTO_H_
187 struct swapon_args {
188 char *name;
189 };
190 #endif
191
192 /*
193 * MPSAFE
194 */
195 /* ARGSUSED */
196 int
197 swapon(td, uap)
198 struct thread *td;
199 struct swapon_args *uap;
200 {
201 struct vattr attr;
202 struct vnode *vp;
203 struct nameidata nd;
204 int error;
205
206 mtx_lock(&Giant);
207 error = suser(td);
208 if (error)
209 goto done2;
210
211 /*
212 * Swap metadata may not fit in the KVM if we have physical
213 * memory of >1GB.
214 */
215 if (swap_zone == NULL) {
216 error = ENOMEM;
217 goto done2;
218 }
219
220 NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->name, td);
221 error = namei(&nd);
222 if (error)
223 goto done2;
224
225 NDFREE(&nd, NDF_ONLY_PNBUF);
226 vp = nd.ni_vp;
227
228 if (vn_isdisk(vp, &error))
229 error = swaponvp(td, vp, vp->v_rdev, 0);
230 else if (vp->v_type == VREG &&
231 (vp->v_mount->mnt_vfc->vfc_flags & VFCF_NETWORK) != 0 &&
232 (error = VOP_GETATTR(vp, &attr, td->td_ucred, td)) == 0) {
233 /*
234 * Allow direct swapping to NFS regular files in the same
235 * way that nfs_mountroot() sets up diskless swapping.
236 */
237 error = swaponvp(td, vp, NODEV, attr.va_size / DEV_BSIZE);
238 }
239
240 if (error)
241 vrele(vp);
242 done2:
243 mtx_unlock(&Giant);
244 return (error);
245 }
246
247 /*
248 * Swfree(index) frees the index'th portion of the swap map.
249 * Each of the nswdev devices provides 1/nswdev'th of the swap
250 * space, which is laid out with blocks of dmmax pages circularly
251 * among the devices.
252 *
253 * The new swap code uses page-sized blocks. The old swap code used
254 * DEV_BSIZE'd chunks.
255 *
256 * XXX locking when multiple swapon's run in parallel
257 */
258 int
259 swaponvp(td, vp, dev, nblks)
260 struct thread *td;
261 struct vnode *vp;
262 dev_t dev;
263 u_long nblks;
264 {
265 int index;
266 struct swdevt *sp;
267 swblk_t vsbase;
268 long blk;
269 swblk_t dvbase;
270 int error;
271 u_long aligned_nblks;
272
273 if (!swapdev_vp) {
274 error = getnewvnode("none", NULL, swapdev_vnodeop_p,
275 &swapdev_vp);
276 if (error)
277 panic("Cannot get vnode for swapdev");
278 swapdev_vp->v_type = VNON; /* Untyped */
279 }
280
281 ASSERT_VOP_UNLOCKED(vp, "swaponvp");
282 for (sp = swdevt, index = 0 ; index < nswdev; index++, sp++) {
283 if (sp->sw_vp == vp)
284 return EBUSY;
285 if (!sp->sw_vp)
286 goto found;
287
288 }
289 return EINVAL;
290 found:
291 (void) vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td);
292 #ifdef MAC
293 error = mac_check_system_swapon(td->td_ucred, vp);
294 if (error == 0)
295 #endif
296 error = VOP_OPEN(vp, FREAD | FWRITE, td->td_ucred, td);
297 (void) VOP_UNLOCK(vp, 0, td);
298 if (error)
299 return (error);
300
301 if (nblks == 0 && dev != NODEV && (devsw(dev)->d_psize == 0 ||
302 (nblks = (*devsw(dev)->d_psize) (dev)) == -1)) {
303 (void) VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
304 return (ENXIO);
305 }
306 if (nblks == 0) {
307 (void) VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
308 return (ENXIO);
309 }
310
311 /*
312 * If we go beyond this, we get overflows in the radix
313 * tree bitmap code.
314 */
315 if (nblks > 0x40000000 / BLIST_META_RADIX / nswdev) {
316 printf("exceeded maximum of %d blocks per swap unit\n",
317 0x40000000 / BLIST_META_RADIX / nswdev);
318 (void) VOP_CLOSE(vp, FREAD | FWRITE, td->td_ucred, td);
319 return (ENXIO);
320 }
321 /*
322 * nblks is in DEV_BSIZE'd chunks, convert to PAGE_SIZE'd chunks.
323 * First chop nblks off to page-align it, then convert.
324 *
325 * sw->sw_nblks is in page-sized chunks now too.
326 */
327 nblks &= ~(ctodb(1) - 1);
328 nblks = dbtoc(nblks);
329
330 sp->sw_vp = vp;
331 sp->sw_dev = dev2udev(dev);
332 sp->sw_device = dev;
333 sp->sw_flags |= SW_FREED;
334 sp->sw_nblks = nblks;
335 sp->sw_used = 0;
336
337 /*
338 * nblks, nswap, and dmmax are PAGE_SIZE'd parameters now, not
339 * DEV_BSIZE'd. aligned_nblks is used to calculate the
340 * size of the swap bitmap, taking into account the stripe size.
341 */
342 aligned_nblks = (nblks + (dmmax - 1)) & ~(u_long)(dmmax - 1);
343
344 if (aligned_nblks * nswdev > nswap)
345 nswap = aligned_nblks * nswdev;
346
347 if (swapblist == NULL)
348 swapblist = blist_create(nswap);
349 else
350 blist_resize(&swapblist, nswap, 0);
351
352 for (dvbase = dmmax; dvbase < nblks; dvbase += dmmax) {
353 blk = min(nblks - dvbase, dmmax);
354 vsbase = index * dmmax + dvbase * nswdev;
355 blist_free(swapblist, vsbase, blk);
356 vm_swap_size += blk;
357 }
358
359 return (0);
360 }
361
362 static int
363 sysctl_vm_swap_info(SYSCTL_HANDLER_ARGS)
364 {
365 int *name = (int *)arg1;
366 int error, i, n;
367 struct xswdev xs;
368 struct swdevt *sp;
369
370 if (arg2 != 1) /* name length */
371 return (EINVAL);
372
373 for (sp = swdevt, i = 0, n = 0 ; i < nswdev; i++, sp++) {
374 if (sp->sw_vp) {
375 if (n == *name) {
376 xs.xsw_version = XSWDEV_VERSION;
377 xs.xsw_dev = sp->sw_dev;
378 xs.xsw_flags = sp->sw_flags;
379 xs.xsw_nblks = sp->sw_nblks;
380 xs.xsw_used = sp->sw_used;
381
382 error = SYSCTL_OUT(req, &xs, sizeof(xs));
383 return (error);
384 }
385 n++;
386 }
387
388 }
389 return (ENOENT);
390 }
391
392 SYSCTL_INT(_vm, OID_AUTO, nswapdev, CTLFLAG_RD, &nswdev, 0,
393 "Number of swap devices");
394 SYSCTL_NODE(_vm, OID_AUTO, swap_info, CTLFLAG_RD, sysctl_vm_swap_info,
395 "Swap statistics by device");
Cache object: a748fbbcdbf6be36e2461bffa27d33a2
|