FreeBSD/Linux Kernel Cross Reference
sys/sys/buf.h
1 /* $NetBSD: buf.h,v 1.110 2008/07/31 05:38:05 simonb Exp $ */
2
3 /*-
4 * Copyright (c) 1999, 2000, 2007 The NetBSD Foundation, Inc.
5 * All rights reserved.
6 *
7 * This code is derived from software contributed to The NetBSD Foundation
8 * by Jason R. Thorpe of the Numerical Aerospace Simulation Facility,
9 * NASA Ames Research Center, and by Andrew Doran.
10 *
11 * Redistribution and use in source and binary forms, with or without
12 * modification, are permitted provided that the following conditions
13 * are met:
14 * 1. Redistributions of source code must retain the above copyright
15 * notice, this list of conditions and the following disclaimer.
16 * 2. Redistributions in binary form must reproduce the above copyright
17 * notice, this list of conditions and the following disclaimer in the
18 * documentation and/or other materials provided with the distribution.
19 *
20 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
21 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
22 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
23 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
24 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
25 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
26 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
27 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
28 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
29 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
30 * POSSIBILITY OF SUCH DAMAGE.
31 */
32
33 /*
34 * Copyright (c) 1982, 1986, 1989, 1993
35 * The Regents of the University of California. All rights reserved.
36 * (c) UNIX System Laboratories, Inc.
37 * All or some portions of this file are derived from material licensed
38 * to the University of California by American Telephone and Telegraph
39 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
40 * the permission of UNIX System Laboratories, Inc.
41 *
42 * Redistribution and use in source and binary forms, with or without
43 * modification, are permitted provided that the following conditions
44 * are met:
45 * 1. Redistributions of source code must retain the above copyright
46 * notice, this list of conditions and the following disclaimer.
47 * 2. Redistributions in binary form must reproduce the above copyright
48 * notice, this list of conditions and the following disclaimer in the
49 * documentation and/or other materials provided with the distribution.
50 * 3. Neither the name of the University nor the names of its contributors
51 * may be used to endorse or promote products derived from this software
52 * without specific prior written permission.
53 *
54 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
55 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
56 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
57 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
58 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
59 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
60 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
61 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
62 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
63 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
64 * SUCH DAMAGE.
65 *
66 * @(#)buf.h 8.9 (Berkeley) 3/30/95
67 */
68
69 #ifndef _SYS_BUF_H_
70 #define _SYS_BUF_H_
71
72 #include <sys/pool.h>
73 #include <sys/queue.h>
74 #include <sys/mutex.h>
75 #include <sys/condvar.h>
76 #if defined(_KERNEL)
77 #include <sys/workqueue.h>
78 #endif /* defined(_KERNEL) */
79
80 struct buf;
81 struct mount;
82 struct vnode;
83 struct kauth_cred;
84
85 #define NOLIST ((struct buf *)0x87654321)
86
87 /*
88 * To avoid including <ufs/ffs/softdep.h>
89 */
90 LIST_HEAD(workhead, worklist);
91
92 /*
93 * These are currently used only by the soft dependency code, hence
94 * are stored once in a global variable. If other subsystems wanted
95 * to use these hooks, a pointer to a set of bio_ops could be added
96 * to each buffer.
97 */
98 struct bio_ops {
99 void (*io_start)(struct buf *);
100 void (*io_complete)(struct buf *);
101 void (*io_deallocate)(struct buf *);
102 int (*io_fsync)(struct vnode *, int);
103 int (*io_sync)(struct mount *);
104 void (*io_movedeps)(struct buf *, struct buf *);
105 int (*io_countdeps)(struct buf *, int);
106 void (*io_pageiodone)(struct buf *);
107 };
108
109 extern kmutex_t bufcache_lock;
110 extern kmutex_t buffer_lock;
111
112 /*
113 * The buffer header describes an I/O operation in the kernel.
114 *
115 * Field markings and the corresponding locks:
116 *
117 * b thread of execution that holds BC_BUSY, does not correspond
118 * directly to any particular LWP
119 * c bufcache_lock
120 * l b_objlock
121 *
122 * For buffers associated with a vnode, b_objlock points to vp->v_interlock.
123 * If not associated with a vnode, it points to the generic buffer_lock.
124 */
125 struct buf {
126 union {
127 TAILQ_ENTRY(buf) u_actq;
128 #if defined(_KERNEL) /* u_work is smaller than u_actq. XXX */
129 struct work u_work;
130 #endif /* defined(_KERNEL) */
131 } b_u; /* b: device driver queue */
132 #define b_actq b_u.u_actq
133 #define b_work b_u.u_work
134 void (*b_iodone)(struct buf *);/* b: call when done */
135 int b_error; /* b: errno value. */
136 int b_resid; /* b: remaining I/O. */
137 u_int b_flags; /* b: B_* flags */
138 int b_prio; /* b: priority for queue */
139 int b_bufsize; /* b: allocated size */
140 int b_bcount; /* b: valid bytes in buffer */
141 dev_t b_dev; /* b: associated device */
142 void *b_data; /* b: fs private data */
143 daddr_t b_blkno; /* b: physical block number
144 (partition relative) */
145 daddr_t b_rawblkno; /* b: raw physical block number
146 (volume relative) */
147 struct proc *b_proc; /* b: proc if BB_PHYS */
148 void *b_saveaddr; /* b: saved b_data for physio */
149
150 /*
151 * b: private data for owner.
152 * - buffer cache buffers are owned by corresponding filesystem.
153 * - non-buffer cache buffers are owned by subsystem which
154 * allocated them. (filesystem, disk driver, etc)
155 */
156 void *b_private;
157 off_t b_dcookie; /* NFS: Offset cookie if dir block */
158
159 kcondvar_t b_busy; /* c: threads waiting on buf */
160 u_int b_refcnt; /* c: refcount for b_busy */
161 struct workhead b_dep; /* c: softdep */
162 LIST_ENTRY(buf) b_hash; /* c: hash chain */
163 LIST_ENTRY(buf) b_vnbufs; /* c: associated vnode */
164 TAILQ_ENTRY(buf) b_freelist; /* c: position if not active */
165 LIST_ENTRY(buf) b_wapbllist; /* c: transaction buffer list */
166 daddr_t b_lblkno; /* c: logical block number */
167 int b_freelistindex;/* c: free list index (BQ_) */
168 u_int b_cflags; /* c: BC_* flags */
169 struct vnode *b_vp; /* c: file vnode */
170
171 kcondvar_t b_done; /* o: waiting on completion */
172 u_int b_oflags; /* o: BO_* flags */
173 kmutex_t *b_objlock; /* o: completion lock */
174 };
175
176 /*
177 * For portability with historic industry practice, the cylinder number has
178 * to be maintained in the `b_resid' field.
179 */
180 #define b_cylinder b_resid /* Cylinder number for disksort(). */
181
182 /*
183 * These flags are kept in b_cflags (owned by buffer cache).
184 */
185 #define BC_AGE 0x00000001 /* Move to age queue when I/O done. */
186 #define BC_BUSY 0x00000010 /* I/O in progress. */
187 #define BC_SCANNED 0x00000020 /* Block already pushed during sync */
188 #define BC_INVAL 0x00002000 /* Does not contain valid info. */
189 #define BC_NOCACHE 0x00008000 /* Do not cache block after use. */
190 #define BC_WANTED 0x00800000 /* Process wants this buffer. */
191 #define BC_VFLUSH 0x04000000 /* Buffer is being synced. */
192
193 /*
194 * These flags are kept in b_oflags (owned by associated object).
195 */
196 #define BO_DELWRI 0x00000080 /* Delay I/O until buffer reused. */
197 #define BO_DONE 0x00000200 /* I/O completed. */
198
199 /*
200 * These flags are kept in b_flags (owned by buffer holder).
201 */
202 #define B_WRITE 0x00000000 /* Write buffer (pseudo flag). */
203 #define B_ASYNC 0x00000004 /* Start I/O, do not wait. */
204 #define B_COWDONE 0x00000400 /* Copy-on-write already done. */
205 #define B_GATHERED 0x00001000 /* LFS: already in a segment. */
206 #define B_LOCKED 0x00004000 /* Locked in core (not reusable). */
207 #define B_PHYS 0x00040000 /* I/O to user memory. */
208 #define B_RAW 0x00080000 /* Set by physio for raw transfers. */
209 #define B_READ 0x00100000 /* Read buffer. */
210 #define B_DEVPRIVATE 0x02000000 /* Device driver private flag. */
211
212 #define BUF_FLAGBITS \
213 "\2\1AGE\3ASYNC\4BAD\5BUSY\6SCANNED\10DELWRI" \
214 "\12DONE\13COWDONE\15GATHERED\16INVAL\17LOCKED\20NOCACHE" \
215 "\23PHYS\24RAW\25READ\32DEVPRIVATE\33VFLUSH"
216
217 /* Avoid weird code due to B_WRITE being a "pseudo flag" */
218 #define BUF_ISREAD(bp) (((bp)->b_flags & B_READ) == B_READ)
219 #define BUF_ISWRITE(bp) (((bp)->b_flags & B_READ) == B_WRITE)
220
221 /*
222 * This structure describes a clustered I/O. It is stored in the b_saveaddr
223 * field of the buffer on which I/O is done. At I/O completion, cluster
224 * callback uses the structure to parcel I/O's to individual buffers, and
225 * then free's this structure.
226 */
227 struct cluster_save {
228 long bs_bcount; /* Saved b_bcount. */
229 long bs_bufsize; /* Saved b_bufsize. */
230 void *bs_saveaddr; /* Saved b_addr. */
231 int bs_nchildren; /* Number of associated buffers. */
232 struct buf *bs_children; /* List of associated buffers. */
233 };
234
235 /*
236 * Zero out the buffer's data area.
237 */
238 #define clrbuf(bp) \
239 do { \
240 memset((bp)->b_data, 0, (u_int)(bp)->b_bcount); \
241 (bp)->b_resid = 0; \
242 } while (/* CONSTCOND */ 0)
243
244 /* Flags to low-level allocation routines. */
245 #define B_CLRBUF 0x01 /* Request allocated buffer be cleared. */
246 #define B_SYNC 0x02 /* Do all allocations synchronously. */
247 #define B_METAONLY 0x04 /* Return indirect block buffer. */
248 #define B_CONTIG 0x08 /* Allocate file contiguously. */
249
250 /* Flags to bread(), breadn() and breada(). */
251 #define B_MODIFY 0x01 /* Hint: caller might modify buffer */
252
253 #ifdef _KERNEL
254
255 #define BIO_GETPRIO(bp) ((bp)->b_prio)
256 #define BIO_SETPRIO(bp, prio) (bp)->b_prio = (prio)
257 #define BIO_COPYPRIO(bp1, bp2) BIO_SETPRIO(bp1, BIO_GETPRIO(bp2))
258
259 #define BPRIO_NPRIO 3
260 #define BPRIO_TIMECRITICAL 2
261 #define BPRIO_TIMELIMITED 1
262 #define BPRIO_TIMENONCRITICAL 0
263 #define BPRIO_DEFAULT BPRIO_TIMELIMITED
264
265 extern const struct bio_ops *bioopsp;
266 extern u_int nbuf; /* The number of buffer headers */
267
268 /*
269 * Definitions for the buffer free lists.
270 */
271 #define BQUEUES 4 /* number of free buffer queues */
272
273 #define BQ_LOCKED 0 /* super-blocks &c */
274 #define BQ_LRU 1 /* lru, useful buffers */
275 #define BQ_AGE 2 /* rubbish */
276 #define BQ_EMPTY 3 /* buffer headers with no memory */
277
278 struct bqueue {
279 TAILQ_HEAD(, buf) bq_queue;
280 uint64_t bq_bytes;
281 buf_t *bq_marker;
282 };
283
284 extern struct bqueue bufqueues[BQUEUES];
285 extern struct simplelock bqueue_slock;
286
287 __BEGIN_DECLS
288 int allocbuf(buf_t *, int, int);
289 void bawrite(buf_t *);
290 void bdirty(buf_t *);
291 void bdwrite(buf_t *);
292 void biodone(buf_t *);
293 int biowait(buf_t *);
294 int bread(struct vnode *, daddr_t, int, struct kauth_cred *, int, buf_t **);
295 int breada(struct vnode *, daddr_t, int, daddr_t, int, struct kauth_cred *,
296 int, buf_t **);
297 int breadn(struct vnode *, daddr_t, int, daddr_t *, int *, int,
298 struct kauth_cred *, int, buf_t **);
299 void brelsel(buf_t *, int);
300 void brelse(buf_t *, int);
301 void bremfree(buf_t *);
302 void bufinit(void);
303 void bufinit2(void);
304 int bwrite(buf_t *);
305 buf_t *getblk(struct vnode *, daddr_t, int, int, int);
306 buf_t *geteblk(int);
307 buf_t *incore(struct vnode *, daddr_t);
308
309 void minphys(buf_t *);
310 int physio(void (*)(buf_t *), buf_t *, dev_t, int,
311 void (*)(buf_t *), struct uio *);
312
313 void brelvp(buf_t *);
314 void reassignbuf(buf_t *, struct vnode *);
315 void bgetvp(struct vnode *, buf_t *);
316 int buf_syncwait(void);
317 u_long buf_memcalc(void);
318 int buf_drain(int);
319 int buf_setvalimit(vsize_t);
320 #ifdef DDB
321 void vfs_buf_print(buf_t *, int, void (*)(const char *, ...));
322 #endif
323 buf_t *getiobuf(struct vnode *, bool);
324 void putiobuf(buf_t *);
325 void buf_init(buf_t *);
326 void buf_destroy(buf_t *);
327 int bbusy(buf_t *, bool, int, kmutex_t *);
328
329 void nestiobuf_iodone(buf_t *);
330 void nestiobuf_setup(buf_t *, buf_t *, int, size_t);
331 void nestiobuf_done(buf_t *, int, int);
332
333 __END_DECLS
334 #endif /* _KERNEL */
335 #endif /* !_SYS_BUF_H_ */
Cache object: 9162edb9c984eaa3951182606c1ab3e5
|