1 /* $NetBSD: ufs_readwrite.c,v 1.55 2003/08/07 16:34:46 agc Exp $ */
2
3 /*-
4 * Copyright (c) 1993
5 * The Regents of the University of California. All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice, this list of conditions and the following disclaimer.
12 * 2. Redistributions in binary form must reproduce the above copyright
13 * notice, this list of conditions and the following disclaimer in the
14 * documentation and/or other materials provided with the distribution.
15 * 3. Neither the name of the University nor the names of its contributors
16 * may be used to endorse or promote products derived from this software
17 * without specific prior written permission.
18 *
19 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
20 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
21 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
22 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
23 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
24 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
25 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
26 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
27 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
28 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
29 * SUCH DAMAGE.
30 *
31 * @(#)ufs_readwrite.c 8.11 (Berkeley) 5/8/95
32 */
33
34 #include <sys/cdefs.h>
35 __KERNEL_RCSID(1, "$NetBSD: ufs_readwrite.c,v 1.55 2003/08/07 16:34:46 agc Exp $");
36
37 #ifdef LFS_READWRITE
38 #define BLKSIZE(a, b, c) blksize(a, b, c)
39 #define FS struct lfs
40 #define I_FS i_lfs
41 #define READ lfs_read
42 #define READ_S "lfs_read"
43 #define WRITE lfs_write
44 #define WRITE_S "lfs_write"
45 #define fs_bsize lfs_bsize
46 #define fs_maxfilesize lfs_maxfilesize
47 #else
48 #define BLKSIZE(a, b, c) blksize(a, b, c)
49 #define FS struct fs
50 #define I_FS i_fs
51 #define READ ffs_read
52 #define READ_S "ffs_read"
53 #define WRITE ffs_write
54 #define WRITE_S "ffs_write"
55 #endif
56
57 /*
58 * Vnode op for reading.
59 */
60 /* ARGSUSED */
61 int
62 READ(void *v)
63 {
64 struct vop_read_args /* {
65 struct vnode *a_vp;
66 struct uio *a_uio;
67 int a_ioflag;
68 struct ucred *a_cred;
69 } */ *ap = v;
70 struct vnode *vp;
71 struct inode *ip;
72 struct uio *uio;
73 FS *fs;
74 void *win;
75 vsize_t bytelen;
76 struct buf *bp;
77 daddr_t lbn, nextlbn;
78 off_t bytesinfile;
79 long size, xfersize, blkoffset;
80 int error;
81 boolean_t usepc = FALSE;
82
83 vp = ap->a_vp;
84 ip = VTOI(vp);
85 uio = ap->a_uio;
86 error = 0;
87
88 #ifdef DIAGNOSTIC
89 if (uio->uio_rw != UIO_READ)
90 panic("%s: mode", READ_S);
91
92 if (vp->v_type == VLNK) {
93 if ((int)ip->i_size < vp->v_mount->mnt_maxsymlinklen ||
94 (vp->v_mount->mnt_maxsymlinklen == 0 &&
95 DIP(ip, blocks) == 0))
96 panic("%s: short symlink", READ_S);
97 } else if (vp->v_type != VREG && vp->v_type != VDIR)
98 panic("%s: type %d", READ_S, vp->v_type);
99 #endif
100 fs = ip->I_FS;
101 if ((u_int64_t)uio->uio_offset > fs->fs_maxfilesize)
102 return (EFBIG);
103 if (uio->uio_resid == 0)
104 return (0);
105 if (uio->uio_offset >= ip->i_size) {
106 goto out;
107 }
108
109 #ifdef LFS_READWRITE
110 usepc = (vp->v_type == VREG && ip->i_number != LFS_IFILE_INUM);
111 #else /* !LFS_READWRITE */
112 usepc = vp->v_type == VREG;
113 #endif /* !LFS_READWRITE */
114 if (usepc) {
115 while (uio->uio_resid > 0) {
116 bytelen = MIN(ip->i_size - uio->uio_offset,
117 uio->uio_resid);
118 if (bytelen == 0)
119 break;
120
121 win = ubc_alloc(&vp->v_uobj, uio->uio_offset,
122 &bytelen, UBC_READ);
123 error = uiomove(win, bytelen, uio);
124 ubc_release(win, 0);
125 if (error)
126 break;
127 }
128 goto out;
129 }
130
131 for (error = 0, bp = NULL; uio->uio_resid > 0; bp = NULL) {
132 bytesinfile = ip->i_size - uio->uio_offset;
133 if (bytesinfile <= 0)
134 break;
135 lbn = lblkno(fs, uio->uio_offset);
136 nextlbn = lbn + 1;
137 size = BLKSIZE(fs, ip, lbn);
138 blkoffset = blkoff(fs, uio->uio_offset);
139 xfersize = MIN(MIN(fs->fs_bsize - blkoffset, uio->uio_resid),
140 bytesinfile);
141
142 if (lblktosize(fs, nextlbn) >= ip->i_size)
143 error = bread(vp, lbn, size, NOCRED, &bp);
144 else {
145 int nextsize = BLKSIZE(fs, ip, nextlbn);
146 error = breadn(vp, lbn,
147 size, &nextlbn, &nextsize, 1, NOCRED, &bp);
148 }
149 if (error)
150 break;
151
152 /*
153 * We should only get non-zero b_resid when an I/O error
154 * has occurred, which should cause us to break above.
155 * However, if the short read did not cause an error,
156 * then we want to ensure that we do not uiomove bad
157 * or uninitialized data.
158 */
159 size -= bp->b_resid;
160 if (size < xfersize) {
161 if (size == 0)
162 break;
163 xfersize = size;
164 }
165 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
166 if (error)
167 break;
168 brelse(bp);
169 }
170 if (bp != NULL)
171 brelse(bp);
172
173 out:
174 if (!(vp->v_mount->mnt_flag & MNT_NOATIME)) {
175 ip->i_flag |= IN_ACCESS;
176 if ((ap->a_ioflag & IO_SYNC) == IO_SYNC)
177 error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
178 }
179 return (error);
180 }
181
182 /*
183 * Vnode op for writing.
184 */
185 int
186 WRITE(void *v)
187 {
188 struct vop_write_args /* {
189 struct vnode *a_vp;
190 struct uio *a_uio;
191 int a_ioflag;
192 struct ucred *a_cred;
193 } */ *ap = v;
194 struct vnode *vp;
195 struct uio *uio;
196 struct inode *ip;
197 struct genfs_node *gp;
198 FS *fs;
199 struct buf *bp;
200 struct proc *p;
201 struct ucred *cred;
202 daddr_t lbn;
203 off_t osize, origoff, oldoff, preallocoff, endallocoff, nsize;
204 int blkoffset, error, flags, ioflag, resid, size, xfersize;
205 int bsize, aflag;
206 int ubc_alloc_flags;
207 int extended=0;
208 void *win;
209 vsize_t bytelen;
210 boolean_t async;
211 boolean_t usepc = FALSE;
212 #ifdef LFS_READWRITE
213 boolean_t need_unreserve = FALSE;
214 #endif
215
216 cred = ap->a_cred;
217 ioflag = ap->a_ioflag;
218 uio = ap->a_uio;
219 vp = ap->a_vp;
220 ip = VTOI(vp);
221 gp = VTOG(vp);
222
223 KASSERT(vp->v_size == ip->i_size);
224 #ifdef DIAGNOSTIC
225 if (uio->uio_rw != UIO_WRITE)
226 panic("%s: mode", WRITE_S);
227 #endif
228
229 switch (vp->v_type) {
230 case VREG:
231 if (ioflag & IO_APPEND)
232 uio->uio_offset = ip->i_size;
233 if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size)
234 return (EPERM);
235 /* FALLTHROUGH */
236 case VLNK:
237 break;
238 case VDIR:
239 if ((ioflag & IO_SYNC) == 0)
240 panic("%s: nonsync dir write", WRITE_S);
241 break;
242 default:
243 panic("%s: type", WRITE_S);
244 }
245
246 fs = ip->I_FS;
247 if (uio->uio_offset < 0 ||
248 (u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize)
249 return (EFBIG);
250 #ifdef LFS_READWRITE
251 /* Disallow writes to the Ifile, even if noschg flag is removed */
252 /* XXX can this go away when the Ifile is no longer in the namespace? */
253 if (vp == fs->lfs_ivnode)
254 return (EPERM);
255 #endif
256
257 /*
258 * Maybe this should be above the vnode op call, but so long as
259 * file servers have no limits, I don't think it matters.
260 */
261 p = uio->uio_procp;
262 if (vp->v_type == VREG && p &&
263 uio->uio_offset + uio->uio_resid >
264 p->p_rlimit[RLIMIT_FSIZE].rlim_cur) {
265 psignal(p, SIGXFSZ);
266 return (EFBIG);
267 }
268 if (uio->uio_resid == 0)
269 return (0);
270
271 flags = ioflag & IO_SYNC ? B_SYNC : 0;
272 async = vp->v_mount->mnt_flag & MNT_ASYNC;
273 origoff = uio->uio_offset;
274 resid = uio->uio_resid;
275 osize = ip->i_size;
276 bsize = fs->fs_bsize;
277 error = 0;
278
279 usepc = vp->v_type == VREG;
280 #ifdef LFS_READWRITE
281 async = TRUE;
282
283 /* Account writes. This overcounts if pages are already dirty. */
284 if (usepc) {
285 simple_lock(&lfs_subsys_lock);
286 lfs_subsys_pages += round_page(uio->uio_resid) >> PAGE_SHIFT;
287 simple_unlock(&lfs_subsys_lock);
288 }
289 lfs_check(vp, LFS_UNUSED_LBN, 0);
290 #endif /* !LFS_READWRITE */
291 if (!usepc) {
292 goto bcache;
293 }
294
295 preallocoff = round_page(blkroundup(fs, MAX(osize, uio->uio_offset)));
296 aflag = ioflag & IO_SYNC ? B_SYNC : 0;
297 nsize = MAX(osize, uio->uio_offset + uio->uio_resid);
298 endallocoff = nsize - blkoff(fs, nsize);
299
300 /*
301 * if we're increasing the file size, deal with expanding
302 * the fragment if there is one.
303 */
304
305 if (nsize > osize && lblkno(fs, osize) < NDADDR &&
306 lblkno(fs, osize) != lblkno(fs, nsize) &&
307 blkroundup(fs, osize) != osize) {
308 error = ufs_balloc_range(vp, osize, blkroundup(fs, osize) -
309 osize, cred, aflag);
310 if (error) {
311 goto out;
312 }
313 if (flags & B_SYNC) {
314 vp->v_size = blkroundup(fs, osize);
315 simple_lock(&vp->v_interlock);
316 VOP_PUTPAGES(vp, trunc_page(osize & ~(bsize - 1)),
317 round_page(vp->v_size), PGO_CLEANIT | PGO_SYNCIO);
318 }
319 }
320
321 ubc_alloc_flags = UBC_WRITE;
322 while (uio->uio_resid > 0) {
323 boolean_t extending; /* if we're extending a whole block */
324 off_t newoff;
325
326 oldoff = uio->uio_offset;
327 blkoffset = blkoff(fs, uio->uio_offset);
328 bytelen = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
329
330 /*
331 * if we're filling in a hole, allocate the blocks now and
332 * initialize the pages first. if we're extending the file,
333 * we can safely allocate blocks without initializing pages
334 * since the new blocks will be inaccessible until the write
335 * is complete.
336 */
337 extending = uio->uio_offset >= preallocoff &&
338 uio->uio_offset < endallocoff;
339
340 if (!extending) {
341 error = ufs_balloc_range(vp, uio->uio_offset, bytelen,
342 cred, aflag);
343 if (error) {
344 break;
345 }
346 ubc_alloc_flags &= ~UBC_FAULTBUSY;
347 } else {
348 lockmgr(&gp->g_glock, LK_EXCLUSIVE, NULL);
349 error = GOP_ALLOC(vp, uio->uio_offset, bytelen,
350 aflag, cred);
351 lockmgr(&gp->g_glock, LK_RELEASE, NULL);
352 if (error) {
353 break;
354 }
355 ubc_alloc_flags |= UBC_FAULTBUSY;
356 }
357
358 /*
359 * copy the data.
360 */
361
362 win = ubc_alloc(&vp->v_uobj, uio->uio_offset, &bytelen,
363 ubc_alloc_flags);
364 error = uiomove(win, bytelen, uio);
365 if (error && extending) {
366 /*
367 * if we haven't initialized the pages yet,
368 * do it now. it's safe to use memset here
369 * because we just mapped the pages above.
370 */
371 memset(win, 0, bytelen);
372 }
373 ubc_release(win, 0);
374
375 /*
376 * update UVM's notion of the size now that we've
377 * copied the data into the vnode's pages.
378 *
379 * we should update the size even when uiomove failed.
380 * otherwise ffs_truncate can't flush soft update states.
381 */
382
383 newoff = oldoff + bytelen;
384 if (vp->v_size < newoff) {
385 uvm_vnp_setsize(vp, newoff);
386 extended = 1;
387 }
388
389 if (error) {
390 break;
391 }
392
393 /*
394 * flush what we just wrote if necessary.
395 * XXXUBC simplistic async flushing.
396 */
397
398 if (!async && oldoff >> 16 != uio->uio_offset >> 16) {
399 simple_lock(&vp->v_interlock);
400 error = VOP_PUTPAGES(vp, (oldoff >> 16) << 16,
401 (uio->uio_offset >> 16) << 16, PGO_CLEANIT);
402 if (error) {
403 break;
404 }
405 }
406 }
407 if (error == 0 && ioflag & IO_SYNC) {
408 simple_lock(&vp->v_interlock);
409 error = VOP_PUTPAGES(vp, trunc_page(origoff & ~(bsize - 1)),
410 round_page(blkroundup(fs, uio->uio_offset)),
411 PGO_CLEANIT | PGO_SYNCIO);
412 }
413 goto out;
414
415 bcache:
416 simple_lock(&vp->v_interlock);
417 VOP_PUTPAGES(vp, trunc_page(origoff), round_page(origoff + resid),
418 PGO_CLEANIT | PGO_FREE | PGO_SYNCIO);
419 while (uio->uio_resid > 0) {
420 lbn = lblkno(fs, uio->uio_offset);
421 blkoffset = blkoff(fs, uio->uio_offset);
422 xfersize = MIN(fs->fs_bsize - blkoffset, uio->uio_resid);
423 if (fs->fs_bsize > xfersize)
424 flags |= B_CLRBUF;
425 else
426 flags &= ~B_CLRBUF;
427
428 #ifdef LFS_READWRITE
429 error = lfs_reserve(fs, vp, NULL,
430 btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
431 if (error)
432 break;
433 need_unreserve = TRUE;
434 #endif
435 error = VOP_BALLOC(vp, uio->uio_offset, xfersize,
436 ap->a_cred, flags, &bp);
437
438 if (error)
439 break;
440 if (uio->uio_offset + xfersize > ip->i_size) {
441 ip->i_size = uio->uio_offset + xfersize;
442 DIP_ASSIGN(ip, size, ip->i_size);
443 uvm_vnp_setsize(vp, ip->i_size);
444 extended = 1;
445 }
446 size = BLKSIZE(fs, ip, lbn) - bp->b_resid;
447 if (xfersize > size)
448 xfersize = size;
449
450 error = uiomove((char *)bp->b_data + blkoffset, xfersize, uio);
451
452 /*
453 * if we didn't clear the block and the uiomove failed,
454 * the buf will now contain part of some other file,
455 * so we need to invalidate it.
456 */
457 if (error && (flags & B_CLRBUF) == 0) {
458 bp->b_flags |= B_INVAL;
459 brelse(bp);
460 break;
461 }
462 #ifdef LFS_READWRITE
463 (void)VOP_BWRITE(bp);
464 lfs_reserve(fs, vp, NULL,
465 -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
466 need_unreserve = FALSE;
467 #else
468 if (ioflag & IO_SYNC)
469 (void)bwrite(bp);
470 else if (xfersize + blkoffset == fs->fs_bsize)
471 bawrite(bp);
472 else
473 bdwrite(bp);
474 #endif
475 if (error || xfersize == 0)
476 break;
477 }
478 #ifdef LFS_READWRITE
479 if (need_unreserve) {
480 lfs_reserve(fs, vp, NULL,
481 -btofsb(fs, (NIADDR + 1) << fs->lfs_bshift));
482 }
483 #endif
484
485 /*
486 * If we successfully wrote any data, and we are not the superuser
487 * we clear the setuid and setgid bits as a precaution against
488 * tampering.
489 */
490 out:
491 ip->i_flag |= IN_CHANGE | IN_UPDATE;
492 if (resid > uio->uio_resid && ap->a_cred && ap->a_cred->cr_uid != 0) {
493 ip->i_mode &= ~(ISUID | ISGID);
494 DIP_ASSIGN(ip, mode, ip->i_mode);
495 }
496 if (resid > uio->uio_resid)
497 VN_KNOTE(vp, NOTE_WRITE | (extended ? NOTE_EXTEND : 0));
498 if (error) {
499 (void) VOP_TRUNCATE(vp, osize, ioflag & IO_SYNC, ap->a_cred,
500 uio->uio_procp);
501 uio->uio_offset -= resid - uio->uio_resid;
502 uio->uio_resid = resid;
503 } else if (resid > uio->uio_resid && (ioflag & IO_SYNC) == IO_SYNC)
504 error = VOP_UPDATE(vp, NULL, NULL, UPDATE_WAIT);
505 KASSERT(vp->v_size == ip->i_size);
506 return (error);
507 }
Cache object: 4e4172af0b374f169ded52b64673b21e
|