FreeBSD/Linux Kernel Cross Reference
sys/sys/buf.h
1 /*-
2 * Copyright (c) 1982, 1986, 1989, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 4. Neither the name of the University nor the names of its contributors
19 * may be used to endorse or promote products derived from this software
20 * without specific prior written permission.
21 *
22 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32 * SUCH DAMAGE.
33 *
34 * @(#)buf.h 8.9 (Berkeley) 3/30/95
35 * $FreeBSD: src/sys/sys/buf.h,v 1.167.2.4 2005/09/12 04:25:53 truckman Exp $
36 */
37
38 #ifndef _SYS_BUF_H_
39 #define _SYS_BUF_H_
40
41 #include <sys/queue.h>
42 #include <sys/lock.h>
43 #include <sys/lockmgr.h>
44
45 struct bio;
46 struct buf;
47 struct mount;
48 struct vnode;
49
50 /*
51 * To avoid including <ufs/ffs/softdep.h>
52 */
53 LIST_HEAD(workhead, worklist);
54 /*
55 * These are currently used only by the soft dependency code, hence
56 * are stored once in a global variable. If other subsystems wanted
57 * to use these hooks, a pointer to a set of bio_ops could be added
58 * to each buffer.
59 */
60 extern struct bio_ops {
61 int (*io_prewrite)(struct vnode *, struct buf *);
62 void (*io_start)(struct buf *);
63 void (*io_complete)(struct buf *);
64 void (*io_deallocate)(struct buf *);
65 void (*io_movedeps)(struct buf *, struct buf *);
66 int (*io_countdeps)(struct buf *, int);
67 } bioops;
68
69 struct buf_ops {
70 char *bop_name;
71 int (*bop_write)(struct buf *);
72 };
73
74 extern struct buf_ops buf_ops_bio;
75
76 struct vm_object;
77
78 typedef unsigned char b_xflags_t;
79
80 /*
81 * The buffer header describes an I/O operation in the kernel.
82 *
83 * NOTES:
84 * b_bufsize, b_bcount. b_bufsize is the allocation size of the
85 * buffer, either DEV_BSIZE or PAGE_SIZE aligned. b_bcount is the
86 * originally requested buffer size and can serve as a bounds check
87 * against EOF. For most, but not all uses, b_bcount == b_bufsize.
88 *
89 * b_dirtyoff, b_dirtyend. Buffers support piecemeal, unaligned
90 * ranges of dirty data that need to be written to backing store.
91 * The range is typically clipped at b_bcount ( not b_bufsize ).
92 *
93 * b_resid. Number of bytes remaining in I/O. After an I/O operation
94 * completes, b_resid is usually 0 indicating 100% success.
95 *
96 * All fields are protected by the buffer lock except those marked:
97 * V - Protected by owning vnode lock
98 * Q - Protected by the buf queue lock
99 * D - Protected by an dependency implementation specific lock
100 */
101 struct buf {
102 struct bio b_io; /* "Builtin" I/O request. */
103 #define b_bcount b_io.bio_bcount
104 #define b_caller1 b_io.bio_caller1
105 #define b_data b_io.bio_data
106 #define b_dev b_io.bio_dev
107 #define b_error b_io.bio_error
108 #define b_iocmd b_io.bio_cmd
109 #define b_ioflags b_io.bio_flags
110 #define b_iooffset b_io.bio_offset
111 #define b_resid b_io.bio_resid
112 struct buf_ops *b_op;
113 unsigned b_magic;
114 #define B_MAGIC_BIO 0x10b10b10
115 #define B_MAGIC_NFS 0x67238234
116 void (*b_iodone)(struct buf *);
117 daddr_t b_blkno; /* Underlying physical block number. */
118 off_t b_offset; /* Offset into file. */
119 TAILQ_ENTRY(buf) b_vnbufs; /* (V) Buffer's associated vnode. */
120 struct buf *b_left; /* (V) splay tree link */
121 struct buf *b_right; /* (V) splay tree link */
122 uint32_t b_vflags; /* (V) BV_* flags */
123 TAILQ_ENTRY(buf) b_freelist; /* (Q) Free list position inactive. */
124 unsigned short b_qindex; /* (Q) buffer queue index */
125 uint32_t b_flags; /* B_* flags. */
126 b_xflags_t b_xflags; /* extra flags */
127 struct lock b_lock; /* Buffer lock */
128 long b_bufsize; /* Allocated buffer size. */
129 long b_runningbufspace; /* when I/O is running, pipelining */
130 caddr_t b_kvabase; /* base kva for buffer */
131 int b_kvasize; /* size of kva for buffer */
132 daddr_t b_lblkno; /* Logical block number. */
133 struct vnode *b_vp; /* Device vnode. */
134 struct vm_object *b_object; /* Object for vp */
135 int b_dirtyoff; /* Offset in buffer of dirty region. */
136 int b_dirtyend; /* Offset of end of dirty region. */
137 struct ucred *b_rcred; /* Read credentials reference. */
138 struct ucred *b_wcred; /* Write credentials reference. */
139 void *b_saveaddr; /* Original b_addr for physio. */
140 union pager_info {
141 int pg_reqpage;
142 } b_pager;
143 union cluster_info {
144 TAILQ_HEAD(cluster_list_head, buf) cluster_head;
145 TAILQ_ENTRY(buf) cluster_entry;
146 } b_cluster;
147 struct vm_page *b_pages[btoc(MAXPHYS)];
148 int b_npages;
149 struct workhead b_dep; /* (D) List of filesystem dependencies. */
150 };
151
152 /*
153 * These flags are kept in b_flags.
154 *
155 * Notes:
156 *
157 * B_ASYNC VOP calls on bp's are usually async whether or not
158 * B_ASYNC is set, but some subsystems, such as NFS, like
159 * to know what is best for the caller so they can
160 * optimize the I/O.
161 *
162 * B_PAGING Indicates that bp is being used by the paging system or
163 * some paging system and that the bp is not linked into
164 * the b_vp's clean/dirty linked lists or ref counts.
165 * Buffer vp reassignments are illegal in this case.
166 *
167 * B_CACHE This may only be set if the buffer is entirely valid.
168 * The situation where B_DELWRI is set and B_CACHE is
169 * clear MUST be committed to disk by getblk() so
170 * B_DELWRI can also be cleared. See the comments for
171 * getblk() in kern/vfs_bio.c. If B_CACHE is clear,
172 * the caller is expected to clear BIO_ERROR and B_INVAL,
173 * set BIO_READ, and initiate an I/O.
174 *
175 * The 'entire buffer' is defined to be the range from
176 * 0 through b_bcount.
177 *
178 * B_MALLOC Request that the buffer be allocated from the malloc
179 * pool, DEV_BSIZE aligned instead of PAGE_SIZE aligned.
180 *
181 * B_CLUSTEROK This flag is typically set for B_DELWRI buffers
182 * by filesystems that allow clustering when the buffer
183 * is fully dirty and indicates that it may be clustered
184 * with other adjacent dirty buffers. Note the clustering
185 * may not be used with the stage 1 data write under NFS
186 * but may be used for the commit rpc portion.
187 *
188 * B_VMIO Indicates that the buffer is tied into an VM object.
189 * The buffer's data is always PAGE_SIZE aligned even
190 * if b_bufsize and b_bcount are not. ( b_bufsize is
191 * always at least DEV_BSIZE aligned, though ).
192 *
193 * B_DIRECT Hint that we should attempt to completely free
194 * the pages underlying the buffer. B_DIRECT is
195 * sticky until the buffer is released and typically
196 * only has an effect when B_RELBUF is also set.
197 *
198 */
199
200 #define B_AGE 0x00000001 /* Move to age queue when I/O done. */
201 #define B_NEEDCOMMIT 0x00000002 /* Append-write in progress. */
202 #define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
203 #define B_DIRECT 0x00000008 /* direct I/O flag (pls free vmio) */
204 #define B_DEFERRED 0x00000010 /* Skipped over for cleaning */
205 #define B_CACHE 0x00000020 /* Bread found us in the cache. */
206 #define B_VALIDSUSPWRT 0x00000040 /* Valid write during suspension. */
207 #define B_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
208 #define B_PERSISTENT 0x00000100 /* Perm. ref'ed while EXT2FS mounted. */
209 #define B_DONE 0x00000200 /* I/O completed. */
210 #define B_EINTR 0x00000400 /* I/O was interrupted */
211 #define B_00000800 0x00000800 /* Available flag. */
212 #define B_00001000 0x00001000 /* Available flag. */
213 #define B_INVAL 0x00002000 /* Does not contain valid info. */
214 #define B_00004000 0x00004000 /* Available flag. */
215 #define B_NOCACHE 0x00008000 /* Do not cache block after use. */
216 #define B_MALLOC 0x00010000 /* malloced b_data */
217 #define B_CLUSTEROK 0x00020000 /* Pagein op, so swap() can count it. */
218 #define B_000400000 0x00040000 /* Available flag. */
219 #define B_000800000 0x00080000 /* Available flag. */
220 #define B_00100000 0x00100000 /* Available flag. */
221 #define B_DIRTY 0x00200000 /* Needs writing later (in EXT2FS). */
222 #define B_RELBUF 0x00400000 /* Release VMIO buffer. */
223 #define B_00800000 0x00800000 /* Available flag. */
224 #define B_WRITEINPROG 0x01000000 /* Write in progress. */
225 #define B_02000000 0x02000000 /* Available flag. */
226 #define B_PAGING 0x04000000 /* volatile paging I/O -- bypass VMIO */
227 #define B_08000000 0x08000000 /* Available flag. */
228 #define B_RAM 0x10000000 /* Read ahead mark (flag) */
229 #define B_VMIO 0x20000000 /* VMIO flag */
230 #define B_CLUSTER 0x40000000 /* pagein op, so swap() can count it */
231 #define B_80000000 0x80000000 /* Available flag. */
232
233 #define PRINT_BUF_FLAGS "\2\40b31\37cluster\36vmio\35ram\34b27" \
234 "\33paging\32b25\31writeinprog\30b23\27relbuf\26dirty\25b20" \
235 "\24b19\23b18\22clusterok\21malloc\20nocache\17b14\16inval" \
236 "\15b12\14b11\13eintr\12done\11persistent\10delwri\7validsuspwrt" \
237 "\6cache\5deferred\4direct\3async\2needcommit\1age"
238
239 /*
240 * These flags are kept in b_xflags.
241 */
242 #define BX_VNDIRTY 0x00000001 /* On vnode dirty list */
243 #define BX_VNCLEAN 0x00000002 /* On vnode clean list */
244 #define BX_BKGRDWRITE 0x00000010 /* Do writes in background */
245 #define BX_BKGRDMARKER 0x00000020 /* Mark buffer for splay tree */
246 #define BX_ALTDATA 0x00000040 /* Holds extended data */
247
248 #define NOOFFSET (-1LL) /* No buffer offset calculated yet */
249
250 /*
251 * These flags are kept in b_vflags.
252 */
253 #define BV_SCANNED 0x00000001 /* VOP_FSYNC funcs mark written bufs */
254 #define BV_BKGRDINPROG 0x00000002 /* Background write in progress */
255 #define BV_BKGRDWAIT 0x00000004 /* Background write waiting */
256
257 #ifdef _KERNEL
258 /*
259 * Buffer locking
260 */
261 extern const char *buf_wmesg; /* Default buffer lock message */
262 #define BUF_WMESG "bufwait"
263 #include <sys/proc.h> /* XXX for curthread */
264 #include <sys/mutex.h>
265
266 /*
267 * Initialize a lock.
268 */
269 #define BUF_LOCKINIT(bp) \
270 lockinit(&(bp)->b_lock, PRIBIO + 4, buf_wmesg, 0, 0)
271 /*
272 *
273 * Get a lock sleeping non-interruptably until it becomes available.
274 */
275 static __inline int BUF_LOCK(struct buf *, int, struct mtx *);
276 static __inline int
277 BUF_LOCK(struct buf *bp, int locktype, struct mtx *interlock)
278 {
279 int s, ret;
280
281 s = splbio();
282 mtx_lock(bp->b_lock.lk_interlock);
283 locktype |= LK_INTERNAL;
284 bp->b_lock.lk_wmesg = buf_wmesg;
285 bp->b_lock.lk_prio = PRIBIO + 4;
286 ret = lockmgr(&(bp)->b_lock, locktype, interlock, curthread);
287 splx(s);
288 return ret;
289 }
290 /*
291 * Get a lock sleeping with specified interruptably and timeout.
292 */
293 static __inline int BUF_TIMELOCK(struct buf *, int, struct mtx *,
294 char *, int, int);
295 static __inline int
296 BUF_TIMELOCK(struct buf *bp, int locktype, struct mtx *interlock,
297 char *wmesg, int catch, int timo)
298 {
299 int s, ret;
300
301 s = splbio();
302 mtx_lock(bp->b_lock.lk_interlock);
303 locktype |= LK_INTERNAL | LK_TIMELOCK;
304 bp->b_lock.lk_wmesg = wmesg;
305 bp->b_lock.lk_prio = (PRIBIO + 4) | catch;
306 bp->b_lock.lk_timo = timo;
307 ret = lockmgr(&(bp)->b_lock, (locktype), interlock, curthread);
308 splx(s);
309 return ret;
310 }
311 /*
312 * Release a lock. Only the acquiring process may free the lock unless
313 * it has been handed off to biodone.
314 */
315 static __inline void BUF_UNLOCK(struct buf *);
316 static __inline void
317 BUF_UNLOCK(struct buf *bp)
318 {
319 int s;
320
321 s = splbio();
322 lockmgr(&(bp)->b_lock, LK_RELEASE, NULL, curthread);
323 splx(s);
324 }
325
326 /*
327 * Free a buffer lock.
328 */
329 #define BUF_LOCKFREE(bp) \
330 do { \
331 if (BUF_REFCNT(bp) > 0) \
332 panic("free locked buf"); \
333 lockdestroy(&(bp)->b_lock); \
334 } while (0)
335
336 #ifdef _SYS_PROC_H_ /* Avoid #include <sys/proc.h> pollution */
337 /*
338 * When initiating asynchronous I/O, change ownership of the lock to the
339 * kernel. Once done, the lock may legally released by biodone. The
340 * original owning process can no longer acquire it recursively, but must
341 * wait until the I/O is completed and the lock has been freed by biodone.
342 */
343 static __inline void BUF_KERNPROC(struct buf *);
344 static __inline void
345 BUF_KERNPROC(struct buf *bp)
346 {
347 struct thread *td = curthread;
348
349 if ((td != PCPU_GET(idlethread))
350 && bp->b_lock.lk_lockholder == td)
351 td->td_locks--;
352 bp->b_lock.lk_lockholder = LK_KERNPROC;
353 }
354 #endif
355 /*
356 * Find out the number of references to a lock.
357 */
358 static __inline int BUF_REFCNT(struct buf *);
359 static __inline int
360 BUF_REFCNT(struct buf *bp)
361 {
362 int s, ret;
363
364 /*
365 * When the system is panicing, the lock manager grants all lock
366 * requests whether or not the lock is available. To avoid "unlocked
367 * buffer" panics after a crash, we just claim that all buffers
368 * are locked when cleaning up after a system panic.
369 */
370 if (panicstr != NULL)
371 return (1);
372 s = splbio();
373 ret = lockcount(&(bp)->b_lock);
374 splx(s);
375 return ret;
376 }
377
378 #endif /* _KERNEL */
379
380 struct buf_queue_head {
381 TAILQ_HEAD(buf_queue, buf) queue;
382 daddr_t last_pblkno;
383 struct buf *insert_point;
384 struct buf *switch_point;
385 };
386
387 /*
388 * This structure describes a clustered I/O. It is stored in the b_saveaddr
389 * field of the buffer on which I/O is done. At I/O completion, cluster
390 * callback uses the structure to parcel I/O's to individual buffers, and
391 * then free's this structure.
392 */
393 struct cluster_save {
394 long bs_bcount; /* Saved b_bcount. */
395 long bs_bufsize; /* Saved b_bufsize. */
396 void *bs_saveaddr; /* Saved b_addr. */
397 int bs_nchildren; /* Number of associated buffers. */
398 struct buf **bs_children; /* List of associated buffers. */
399 };
400
401 #ifdef _KERNEL
402
403
404 static __inline int
405 buf_prewrite(struct vnode *vp, struct buf *bp)
406 {
407 if (bioops.io_prewrite)
408 return (*bioops.io_prewrite)(vp, bp);
409 else
410 return (0);
411 }
412
413 static __inline void
414 buf_start(struct buf *bp)
415 {
416 if (bioops.io_start)
417 (*bioops.io_start)(bp);
418 }
419
420 static __inline void
421 buf_complete(struct buf *bp)
422 {
423 if (bioops.io_complete)
424 (*bioops.io_complete)(bp);
425 }
426
427 static __inline void
428 buf_deallocate(struct buf *bp)
429 {
430 if (bioops.io_deallocate)
431 (*bioops.io_deallocate)(bp);
432 BUF_LOCKFREE(bp);
433 }
434
435 static __inline void
436 buf_movedeps(struct buf *bp, struct buf *bp2)
437 {
438 if (bioops.io_movedeps)
439 (*bioops.io_movedeps)(bp, bp2);
440 }
441
442 static __inline int
443 buf_countdeps(struct buf *bp, int i)
444 {
445 if (bioops.io_countdeps)
446 return ((*bioops.io_countdeps)(bp, i));
447 else
448 return (0);
449 }
450
451 #endif /* _KERNEL */
452
453 /*
454 * Zero out the buffer's data area.
455 */
456 #define clrbuf(bp) { \
457 bzero((bp)->b_data, (u_int)(bp)->b_bcount); \
458 (bp)->b_resid = 0; \
459 }
460
461 /*
462 * Flags for getblk's last parameter.
463 */
464 #define GB_LOCK_NOWAIT 0x0001 /* Fail if we block on a buf lock. */
465 #define GB_NOCREAT 0x0002 /* Don't create a buf if not found. */
466
467 #ifdef _KERNEL
468 extern int nbuf; /* The number of buffer headers */
469 extern int maxswzone; /* Max KVA for swap structures */
470 extern int maxbcache; /* Max KVA for buffer cache */
471 extern int runningbufspace;
472 extern int hibufspace;
473 extern int buf_maxio; /* nominal maximum I/O for buffer */
474 extern struct buf *buf; /* The buffer headers. */
475 extern char *buffers; /* The buffer contents. */
476 extern int bufpages; /* Number of memory pages in the buffer pool. */
477 extern struct buf *swbuf; /* Swap I/O buffer headers. */
478 extern int nswbuf; /* Number of swap I/O buffer headers. */
479 extern int cluster_pbuf_freecnt; /* Number of pbufs for clusters */
480 extern int vnode_pbuf_freecnt; /* Number of pbufs for vnode pager */
481
482 struct uio;
483
484 caddr_t kern_vfs_bio_buffer_alloc(caddr_t v, long physmem_est);
485 void bufinit(void);
486 void bwillwrite(void);
487 int buf_dirty_count_severe(void);
488 void bremfree(struct buf *);
489 int bread(struct vnode *, daddr_t, int, struct ucred *, struct buf **);
490 int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
491 struct ucred *, struct buf **);
492 int bwrite(struct buf *);
493 void bdwrite(struct buf *);
494 void bawrite(struct buf *);
495 void bdirty(struct buf *);
496 void bundirty(struct buf *);
497 void brelse(struct buf *);
498 void bqrelse(struct buf *);
499 int vfs_bio_awrite(struct buf *);
500 struct buf * getpbuf(int *);
501 struct buf *incore(struct vnode *, daddr_t);
502 struct buf *gbincore(struct vnode *, daddr_t);
503 int inmem(struct vnode *, daddr_t);
504 struct buf *getblk(struct vnode *, daddr_t, int, int, int, int);
505 struct buf *geteblk(int);
506 int bufwait(struct buf *);
507 void bufdone(struct buf *);
508
509 void cluster_callback(struct buf *);
510 int cluster_read(struct vnode *, u_quad_t, daddr_t, long,
511 struct ucred *, long, int, struct buf **);
512 int cluster_wbuild(struct vnode *, long, daddr_t, int);
513 void cluster_write(struct buf *, u_quad_t, int);
514 void vfs_bio_set_validclean(struct buf *, int base, int size);
515 void vfs_bio_clrbuf(struct buf *);
516 void vfs_busy_pages(struct buf *, int clear_modify);
517 void vfs_unbusy_pages(struct buf *);
518 void vwakeup(struct buf *);
519 int vmapbuf(struct buf *);
520 void vunmapbuf(struct buf *);
521 void relpbuf(struct buf *, int *);
522 void brelvp(struct buf *);
523 void bgetvp(struct vnode *, struct buf *);
524 void pbgetvp(struct vnode *, struct buf *);
525 void pbrelvp(struct buf *);
526 int allocbuf(struct buf *bp, int size);
527 void reassignbuf(struct buf *);
528 struct buf *trypbuf(int *);
529 void bwait(struct buf *, u_char, const char *);
530 void bdone(struct buf *);
531
532 #endif /* _KERNEL */
533
534 #endif /* !_SYS_BUF_H_ */
Cache object: 7d2f3be041f0f0704edc3f6670fe5e42
|