1 /*
2 * Copyright (c) 1982, 1986, 1988, 1993
3 * The Regents of the University of California. All rights reserved.
4 * (c) UNIX System Laboratories, Inc.
5 * All or some portions of this file are derived from material licensed
6 * to the University of California by American Telephone and Telegraph
7 * Co. or Unix System Laboratories, Inc. and are reproduced herein with
8 * the permission of UNIX System Laboratories, Inc.
9 *
10 * Redistribution and use in source and binary forms, with or without
11 * modification, are permitted provided that the following conditions
12 * are met:
13 * 1. Redistributions of source code must retain the above copyright
14 * notice, this list of conditions and the following disclaimer.
15 * 2. Redistributions in binary form must reproduce the above copyright
16 * notice, this list of conditions and the following disclaimer in the
17 * documentation and/or other materials provided with the distribution.
18 * 3. All advertising materials mentioning features or use of this software
19 * must display the following acknowledgement:
20 * This product includes software developed by the University of
21 * California, Berkeley and its contributors.
22 * 4. Neither the name of the University nor the names of its contributors
23 * may be used to endorse or promote products derived from this software
24 * without specific prior written permission.
25 *
26 * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
27 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
28 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
29 * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
30 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
31 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
32 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
33 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
34 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
35 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
36 * SUCH DAMAGE.
37 *
38 * @(#)ufs_disksubr.c 8.5 (Berkeley) 1/21/94
39 * $FreeBSD$
40 */
41
42 #include <sys/param.h>
43 #include <sys/systm.h>
44 #include <sys/buf.h>
45 #include <sys/conf.h>
46 #include <sys/disklabel.h>
47 #include <sys/diskslice.h>
48 #include <sys/syslog.h>
49
50 /*
51 * Seek sort for disks.
52 *
53 * The buf_queue keep two queues, sorted in ascending block order. The first
54 * queue holds those requests which are positioned after the current block
55 * (in the first request); the second, which starts at queue->switch_point,
56 * holds requests which came in after their block number was passed. Thus
57 * we implement a one way scan, retracting after reaching the end of the drive
58 * to the first request on the second queue, at which time it becomes the
59 * first queue.
60 *
61 * A one-way scan is natural because of the way UNIX read-ahead blocks are
62 * allocated.
63 */
64
65 void
66 bufqdisksort(bufq, bp)
67 struct buf_queue_head *bufq;
68 struct buf *bp;
69 {
70 struct buf *bq;
71 struct buf *bn;
72 struct buf *be;
73
74 be = TAILQ_LAST(&bufq->queue, buf_queue);
75 /*
76 * If the queue is empty or we are an
77 * ordered transaction, then it's easy.
78 */
79 if ((bq = bufq_first(bufq)) == NULL
80 || (bp->b_flags & B_ORDERED) != 0) {
81 bufq_insert_tail(bufq, bp);
82 return;
83 } else if (bufq->insert_point != NULL) {
84
85 /*
86 * A certain portion of the list is
87 * "locked" to preserve ordering, so
88 * we can only insert after the insert
89 * point.
90 */
91 bq = bufq->insert_point;
92 } else {
93
94 /*
95 * If we lie before the last removed (currently active)
96 * request, and are not inserting ourselves into the
97 * "locked" portion of the list, then we must add ourselves
98 * to the second request list.
99 */
100 if (bp->b_pblkno < bufq->last_pblkno) {
101
102 bq = bufq->switch_point;
103 /*
104 * If we are starting a new secondary list,
105 * then it's easy.
106 */
107 if (bq == NULL) {
108 bufq->switch_point = bp;
109 bufq_insert_tail(bufq, bp);
110 return;
111 }
112 /*
113 * If we lie ahead of the current switch point,
114 * insert us before the switch point and move
115 * the switch point.
116 */
117 if (bp->b_pblkno < bq->b_pblkno) {
118 bufq->switch_point = bp;
119 TAILQ_INSERT_BEFORE(bq, bp, b_act);
120 return;
121 }
122 } else {
123 if (bufq->switch_point != NULL)
124 be = TAILQ_PREV(bufq->switch_point,
125 buf_queue, b_act);
126 /*
127 * If we lie between last_pblkno and bq,
128 * insert before bq.
129 */
130 if (bp->b_pblkno < bq->b_pblkno) {
131 TAILQ_INSERT_BEFORE(bq, bp, b_act);
132 return;
133 }
134 }
135 }
136
137 /*
138 * Request is at/after our current position in the list.
139 * Optimize for sequential I/O by seeing if we go at the tail.
140 */
141 if (bp->b_pblkno > be->b_pblkno) {
142 TAILQ_INSERT_AFTER(&bufq->queue, be, bp, b_act);
143 return;
144 }
145
146 /* Otherwise, insertion sort */
147 while ((bn = TAILQ_NEXT(bq, b_act)) != NULL) {
148
149 /*
150 * We want to go after the current request if it is the end
151 * of the first request list, or if the next request is a
152 * larger cylinder than our request.
153 */
154 if (bn == bufq->switch_point
155 || bp->b_pblkno < bn->b_pblkno)
156 break;
157 bq = bn;
158 }
159 TAILQ_INSERT_AFTER(&bufq->queue, bq, bp, b_act);
160 }
161
162
163 /*
164 * Attempt to read a disk label from a device using the indicated strategy
165 * routine. The label must be partly set up before this: secpercyl, secsize
166 * and anything required in the strategy routine (e.g., dummy bounds for the
167 * partition containing the label) must be filled in before calling us.
168 * Returns NULL on success and an error string on failure.
169 */
170 char *
171 readdisklabel(dev, strat, lp)
172 dev_t dev;
173 d_strategy_t *strat;
174 register struct disklabel *lp;
175 {
176 register struct buf *bp;
177 struct disklabel *dlp;
178 char *msg = NULL;
179
180 bp = geteblk((int)lp->d_secsize);
181 bp->b_dev = dev;
182 bp->b_blkno = LABELSECTOR * ((int)lp->d_secsize/DEV_BSIZE);
183 bp->b_bcount = lp->d_secsize;
184 bp->b_flags &= ~B_INVAL;
185 bp->b_flags |= B_BUSY | B_READ;
186 (*strat)(bp);
187 if (biowait(bp))
188 msg = "I/O error";
189 else for (dlp = (struct disklabel *)bp->b_data;
190 dlp <= (struct disklabel *)((char *)bp->b_data +
191 lp->d_secsize - sizeof(*dlp));
192 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
193 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
194 if (msg == NULL)
195 msg = "no disk label";
196 } else if (dlp->d_npartitions > MAXPARTITIONS ||
197 dkcksum(dlp) != 0)
198 msg = "disk label corrupted";
199 else {
200 *lp = *dlp;
201 msg = NULL;
202 break;
203 }
204 }
205 bp->b_flags |= B_INVAL | B_AGE;
206 brelse(bp);
207 return (msg);
208 }
209
210 /*
211 * Check new disk label for sensibility before setting it.
212 */
213 int
214 setdisklabel(olp, nlp, openmask)
215 register struct disklabel *olp, *nlp;
216 u_long openmask;
217 {
218 register int i;
219 register struct partition *opp, *npp;
220
221 /*
222 * Check it is actually a disklabel we are looking at.
223 */
224 if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
225 dkcksum(nlp) != 0)
226 return (EINVAL);
227 /*
228 * For each partition that we think is open,
229 */
230 while ((i = ffs((long)openmask)) != 0) {
231 i--;
232 /*
233 * Check it is not changing....
234 */
235 openmask &= ~(1 << i);
236 if (nlp->d_npartitions <= i)
237 return (EBUSY);
238 opp = &olp->d_partitions[i];
239 npp = &nlp->d_partitions[i];
240 if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
241 return (EBUSY);
242 /*
243 * Copy internally-set partition information
244 * if new label doesn't include it. XXX
245 * (If we are using it then we had better stay the same type)
246 * This is possibly dubious, as someone else noted (XXX)
247 */
248 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
249 npp->p_fstype = opp->p_fstype;
250 npp->p_fsize = opp->p_fsize;
251 npp->p_frag = opp->p_frag;
252 npp->p_cpg = opp->p_cpg;
253 }
254 }
255 nlp->d_checksum = 0;
256 nlp->d_checksum = dkcksum(nlp);
257 *olp = *nlp;
258 return (0);
259 }
260
261 /*
262 * Write disk label back to device after modification.
263 */
264 int
265 writedisklabel(dev, strat, lp)
266 dev_t dev;
267 d_strategy_t *strat;
268 register struct disklabel *lp;
269 {
270 struct buf *bp;
271 struct disklabel *dlp;
272 int error = 0;
273
274 if (lp->d_partitions[RAW_PART].p_offset != 0)
275 return (EXDEV); /* not quite right */
276 bp = geteblk((int)lp->d_secsize);
277 bp->b_dev = dkmodpart(dev, RAW_PART);
278 bp->b_blkno = LABELSECTOR * ((int)lp->d_secsize/DEV_BSIZE);
279 bp->b_bcount = lp->d_secsize;
280 #if 1
281 /*
282 * We read the label first to see if it's there,
283 * in which case we will put ours at the same offset into the block..
284 * (I think this is stupid [Julian])
285 * Note that you can't write a label out over a corrupted label!
286 * (also stupid.. how do you write the first one? by raw writes?)
287 */
288 bp->b_flags &= ~B_INVAL;
289 bp->b_flags |= B_BUSY | B_READ;
290 (*strat)(bp);
291 error = biowait(bp);
292 if (error)
293 goto done;
294 for (dlp = (struct disklabel *)bp->b_data;
295 dlp <= (struct disklabel *)
296 ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
297 dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
298 if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
299 dkcksum(dlp) == 0) {
300 *dlp = *lp;
301 bp->b_flags &= ~(B_DONE | B_READ);
302 bp->b_flags |= B_BUSY | B_WRITE;
303 #ifdef __alpha__
304 alpha_fix_srm_checksum(bp);
305 #endif
306 (*strat)(bp);
307 error = biowait(bp);
308 goto done;
309 }
310 }
311 error = ESRCH;
312 done:
313 #else
314 bzero(bp->b_data, lp->d_secsize);
315 dlp = (struct disklabel *)bp->b_data;
316 *dlp = *lp;
317 bp->b_flags &= ~B_INVAL;
318 bp->b_flags |= B_BUSY | B_WRITE;
319 (*strat)(bp);
320 error = biowait(bp);
321 #endif
322 bp->b_flags |= B_INVAL | B_AGE;
323 brelse(bp);
324 return (error);
325 }
326
327 /*
328 * Compute checksum for disk label.
329 */
330 u_int
331 dkcksum(lp)
332 register struct disklabel *lp;
333 {
334 register u_short *start, *end;
335 register u_short sum = 0;
336
337 start = (u_short *)lp;
338 end = (u_short *)&lp->d_partitions[lp->d_npartitions];
339 while (start < end)
340 sum ^= *start++;
341 return (sum);
342 }
343
344 /*
345 * Disk error is the preface to plaintive error messages
346 * about failing disk transfers. It prints messages of the form
347
348 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
349
350 * if the offset of the error in the transfer and a disk label
351 * are both available. blkdone should be -1 if the position of the error
352 * is unknown; the disklabel pointer may be null from drivers that have not
353 * been converted to use them. The message is printed with printf
354 * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
355 * The message should be completed (with at least a newline) with printf
356 * or addlog, respectively. There is no trailing space.
357 */
358 void
359 diskerr(bp, dname, what, pri, blkdone, lp)
360 register struct buf *bp;
361 char *dname, *what;
362 int pri, blkdone;
363 register struct disklabel *lp;
364 {
365 int unit = dkunit(bp->b_dev);
366 int slice = dkslice(bp->b_dev);
367 int part = dkpart(bp->b_dev);
368 register int (*pr) __P((const char *, ...));
369 char partname[2];
370 char *sname;
371 daddr_t sn;
372
373 if (pri != LOG_PRINTF) {
374 log(pri, "%s", "");
375 pr = addlog;
376 } else
377 pr = printf;
378 sname = dsname(dname, unit, slice, part, partname);
379 (*pr)("%s%s: %s %sing fsbn ", sname, partname, what,
380 bp->b_flags & B_READ ? "read" : "writ");
381 sn = bp->b_blkno;
382 if (bp->b_bcount <= DEV_BSIZE)
383 (*pr)("%ld", (long)sn);
384 else {
385 if (blkdone >= 0) {
386 sn += blkdone;
387 (*pr)("%ld of ", (long)sn);
388 }
389 (*pr)("%ld-%ld", (long)bp->b_blkno,
390 (long)(bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE));
391 }
392 if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
393 #ifdef tahoe
394 sn *= DEV_BSIZE / lp->d_secsize; /* XXX */
395 #endif
396 sn += lp->d_partitions[part].p_offset;
397 /*
398 * XXX should add slice offset and not print the slice,
399 * but we don't know the slice pointer.
400 * XXX should print bp->b_pblkno so that this will work
401 * independent of slices, labels and bad sector remapping,
402 * but some drivers don't set bp->b_pblkno.
403 */
404 (*pr)(" (%s bn %ld; cn %ld", sname, (long)sn,
405 (long)(sn / lp->d_secpercyl));
406 sn %= (long)lp->d_secpercyl;
407 (*pr)(" tn %ld sn %ld)", (long)(sn / lp->d_nsectors),
408 (long)(sn % lp->d_nsectors));
409 }
410 }
Cache object: 9bad6566471237b6fc011098a5350b68
|