The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/ufs/ufs/ufs_disksubr.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright (c) 1982, 1986, 1988, 1993
    3  *      The Regents of the University of California.  All rights reserved.
    4  * (c) UNIX System Laboratories, Inc.
    5  * All or some portions of this file are derived from material licensed
    6  * to the University of California by American Telephone and Telegraph
    7  * Co. or Unix System Laboratories, Inc. and are reproduced herein with
    8  * the permission of UNIX System Laboratories, Inc.
    9  *
   10  * Redistribution and use in source and binary forms, with or without
   11  * modification, are permitted provided that the following conditions
   12  * are met:
   13  * 1. Redistributions of source code must retain the above copyright
   14  *    notice, this list of conditions and the following disclaimer.
   15  * 2. Redistributions in binary form must reproduce the above copyright
   16  *    notice, this list of conditions and the following disclaimer in the
   17  *    documentation and/or other materials provided with the distribution.
   18  * 3. All advertising materials mentioning features or use of this software
   19  *    must display the following acknowledgement:
   20  *      This product includes software developed by the University of
   21  *      California, Berkeley and its contributors.
   22  * 4. Neither the name of the University nor the names of its contributors
   23  *    may be used to endorse or promote products derived from this software
   24  *    without specific prior written permission.
   25  *
   26  * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
   27  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
   28  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
   29  * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
   30  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
   31  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
   32  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
   33  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
   34  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
   35  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
   36  * SUCH DAMAGE.
   37  *
   38  *      @(#)ufs_disksubr.c      8.5 (Berkeley) 1/21/94
   39  * $FreeBSD$
   40  */
   41 
   42 #include <sys/param.h>
   43 #include <sys/systm.h>
   44 #include <sys/buf.h>
   45 #include <sys/conf.h>
   46 #include <sys/disklabel.h>
   47 #include <sys/diskslice.h>
   48 #include <sys/syslog.h>
   49 
   50 /*
   51  * Seek sort for disks.
   52  *
   53  * The buf_queue keep two queues, sorted in ascending block order.  The first
   54  * queue holds those requests which are positioned after the current block
   55  * (in the first request); the second, which starts at queue->switch_point,
   56  * holds requests which came in after their block number was passed.  Thus
   57  * we implement a one way scan, retracting after reaching the end of the drive
   58  * to the first request on the second queue, at which time it becomes the
   59  * first queue.
   60  *
   61  * A one-way scan is natural because of the way UNIX read-ahead blocks are
   62  * allocated.
   63  */
   64 
   65 void
   66 bufqdisksort(bufq, bp)
   67         struct buf_queue_head *bufq;
   68         struct buf *bp;
   69 {
   70         struct buf *bq;
   71         struct buf *bn;
   72         struct buf *be;
   73         
   74         be = TAILQ_LAST(&bufq->queue, buf_queue);
   75         /*
   76          * If the queue is empty or we are an
   77          * ordered transaction, then it's easy.
   78          */
   79         if ((bq = bufq_first(bufq)) == NULL
   80          || (bp->b_flags & B_ORDERED) != 0) {
   81                 bufq_insert_tail(bufq, bp);
   82                 return;
   83         } else if (bufq->insert_point != NULL) {
   84 
   85                 /*
   86                  * A certain portion of the list is
   87                  * "locked" to preserve ordering, so
   88                  * we can only insert after the insert
   89                  * point.
   90                  */
   91                 bq = bufq->insert_point;
   92         } else {
   93 
   94                 /*
   95                  * If we lie before the last removed (currently active)
   96                  * request, and are not inserting ourselves into the
   97                  * "locked" portion of the list, then we must add ourselves
   98                  * to the second request list.
   99                  */
  100                 if (bp->b_pblkno < bufq->last_pblkno) {
  101 
  102                         bq = bufq->switch_point;
  103                         /*
  104                          * If we are starting a new secondary list,
  105                          * then it's easy.
  106                          */
  107                         if (bq == NULL) {
  108                                 bufq->switch_point = bp;
  109                                 bufq_insert_tail(bufq, bp);
  110                                 return;
  111                         }
  112                         /*
  113                          * If we lie ahead of the current switch point,
  114                          * insert us before the switch point and move
  115                          * the switch point.
  116                          */
  117                         if (bp->b_pblkno < bq->b_pblkno) {
  118                                 bufq->switch_point = bp;
  119                                 TAILQ_INSERT_BEFORE(bq, bp, b_act);
  120                                 return;
  121                         }
  122                 } else {
  123                         if (bufq->switch_point != NULL)
  124                                 be = TAILQ_PREV(bufq->switch_point,
  125                                                 buf_queue, b_act);
  126                         /*
  127                          * If we lie between last_pblkno and bq,
  128                          * insert before bq.
  129                          */
  130                         if (bp->b_pblkno < bq->b_pblkno) {
  131                                 TAILQ_INSERT_BEFORE(bq, bp, b_act);
  132                                 return;
  133                         }
  134                 }
  135         }
  136 
  137         /*
  138          * Request is at/after our current position in the list.
  139          * Optimize for sequential I/O by seeing if we go at the tail.
  140          */
  141         if (bp->b_pblkno > be->b_pblkno) {
  142                 TAILQ_INSERT_AFTER(&bufq->queue, be, bp, b_act);
  143                 return;
  144         }
  145 
  146         /* Otherwise, insertion sort */
  147         while ((bn = TAILQ_NEXT(bq, b_act)) != NULL) {
  148                 
  149                 /*
  150                  * We want to go after the current request if it is the end
  151                  * of the first request list, or if the next request is a
  152                  * larger cylinder than our request.
  153                  */
  154                 if (bn == bufq->switch_point
  155                  || bp->b_pblkno < bn->b_pblkno)
  156                         break;
  157                 bq = bn;
  158         }
  159         TAILQ_INSERT_AFTER(&bufq->queue, bq, bp, b_act);
  160 }
  161 
  162 
  163 /*
  164  * Attempt to read a disk label from a device using the indicated strategy
  165  * routine.  The label must be partly set up before this: secpercyl, secsize
  166  * and anything required in the strategy routine (e.g., dummy bounds for the
  167  * partition containing the label) must be filled in before calling us.
  168  * Returns NULL on success and an error string on failure.
  169  */
  170 char *
  171 readdisklabel(dev, strat, lp)
  172         dev_t dev;
  173         d_strategy_t *strat;
  174         register struct disklabel *lp;
  175 {
  176         register struct buf *bp;
  177         struct disklabel *dlp;
  178         char *msg = NULL;
  179 
  180         bp = geteblk((int)lp->d_secsize);
  181         bp->b_dev = dev;
  182         bp->b_blkno = LABELSECTOR * ((int)lp->d_secsize/DEV_BSIZE);
  183         bp->b_bcount = lp->d_secsize;
  184         bp->b_flags &= ~B_INVAL;
  185         bp->b_flags |= B_BUSY | B_READ;
  186         (*strat)(bp);
  187         if (biowait(bp))
  188                 msg = "I/O error";
  189         else for (dlp = (struct disklabel *)bp->b_data;
  190             dlp <= (struct disklabel *)((char *)bp->b_data +
  191             lp->d_secsize - sizeof(*dlp));
  192             dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
  193                 if (dlp->d_magic != DISKMAGIC || dlp->d_magic2 != DISKMAGIC) {
  194                         if (msg == NULL)
  195                                 msg = "no disk label";
  196                 } else if (dlp->d_npartitions > MAXPARTITIONS ||
  197                            dkcksum(dlp) != 0)
  198                         msg = "disk label corrupted";
  199                 else {
  200                         *lp = *dlp;
  201                         msg = NULL;
  202                         break;
  203                 }
  204         }
  205         bp->b_flags |= B_INVAL | B_AGE;
  206         brelse(bp);
  207         return (msg);
  208 }
  209 
  210 /*
  211  * Check new disk label for sensibility before setting it.
  212  */
  213 int
  214 setdisklabel(olp, nlp, openmask)
  215         register struct disklabel *olp, *nlp;
  216         u_long openmask;
  217 {
  218         register int i;
  219         register struct partition *opp, *npp;
  220 
  221         /*
  222          * Check it is actually a disklabel we are looking at.
  223          */
  224         if (nlp->d_magic != DISKMAGIC || nlp->d_magic2 != DISKMAGIC ||
  225             dkcksum(nlp) != 0)
  226                 return (EINVAL);
  227         /*
  228          * For each partition that we think is open,
  229          */
  230         while ((i = ffs((long)openmask)) != 0) {
  231                 i--;
  232                 /*
  233                  * Check it is not changing....
  234                  */
  235                 openmask &= ~(1 << i);
  236                 if (nlp->d_npartitions <= i)
  237                         return (EBUSY);
  238                 opp = &olp->d_partitions[i];
  239                 npp = &nlp->d_partitions[i];
  240                 if (npp->p_offset != opp->p_offset || npp->p_size < opp->p_size)
  241                         return (EBUSY);
  242                 /*
  243                  * Copy internally-set partition information
  244                  * if new label doesn't include it.             XXX
  245                  * (If we are using it then we had better stay the same type)
  246                  * This is possibly dubious, as someone else noted (XXX)
  247                  */
  248                 if (npp->p_fstype == FS_UNUSED && opp->p_fstype != FS_UNUSED) {
  249                         npp->p_fstype = opp->p_fstype;
  250                         npp->p_fsize = opp->p_fsize;
  251                         npp->p_frag = opp->p_frag;
  252                         npp->p_cpg = opp->p_cpg;
  253                 }
  254         }
  255         nlp->d_checksum = 0;
  256         nlp->d_checksum = dkcksum(nlp);
  257         *olp = *nlp;
  258         return (0);
  259 }
  260 
  261 /*
  262  * Write disk label back to device after modification.
  263  */
  264 int
  265 writedisklabel(dev, strat, lp)
  266         dev_t dev;
  267         d_strategy_t *strat;
  268         register struct disklabel *lp;
  269 {
  270         struct buf *bp;
  271         struct disklabel *dlp;
  272         int error = 0;
  273 
  274         if (lp->d_partitions[RAW_PART].p_offset != 0)
  275                 return (EXDEV);                 /* not quite right */
  276         bp = geteblk((int)lp->d_secsize);
  277         bp->b_dev = dkmodpart(dev, RAW_PART);
  278         bp->b_blkno = LABELSECTOR * ((int)lp->d_secsize/DEV_BSIZE);
  279         bp->b_bcount = lp->d_secsize;
  280 #if 1
  281         /*
  282          * We read the label first to see if it's there,
  283          * in which case we will put ours at the same offset into the block..
  284          * (I think this is stupid [Julian])
  285          * Note that you can't write a label out over a corrupted label!
  286          * (also stupid.. how do you write the first one? by raw writes?)
  287          */
  288         bp->b_flags &= ~B_INVAL;
  289         bp->b_flags |= B_BUSY | B_READ;
  290         (*strat)(bp);
  291         error = biowait(bp);
  292         if (error)
  293                 goto done;
  294         for (dlp = (struct disklabel *)bp->b_data;
  295             dlp <= (struct disklabel *)
  296               ((char *)bp->b_data + lp->d_secsize - sizeof(*dlp));
  297             dlp = (struct disklabel *)((char *)dlp + sizeof(long))) {
  298                 if (dlp->d_magic == DISKMAGIC && dlp->d_magic2 == DISKMAGIC &&
  299                     dkcksum(dlp) == 0) {
  300                         *dlp = *lp;
  301                         bp->b_flags &= ~(B_DONE | B_READ);
  302                         bp->b_flags |= B_BUSY | B_WRITE;
  303 #ifdef __alpha__
  304                         alpha_fix_srm_checksum(bp);
  305 #endif
  306                         (*strat)(bp);
  307                         error = biowait(bp);
  308                         goto done;
  309                 }
  310         }
  311         error = ESRCH;
  312 done:
  313 #else
  314         bzero(bp->b_data, lp->d_secsize);
  315         dlp = (struct disklabel *)bp->b_data;
  316         *dlp = *lp;
  317         bp->b_flags &= ~B_INVAL;
  318         bp->b_flags |= B_BUSY | B_WRITE;
  319         (*strat)(bp);
  320         error = biowait(bp);
  321 #endif
  322         bp->b_flags |= B_INVAL | B_AGE;
  323         brelse(bp);
  324         return (error);
  325 }
  326 
  327 /*
  328  * Compute checksum for disk label.
  329  */
  330 u_int
  331 dkcksum(lp)
  332         register struct disklabel *lp;
  333 {
  334         register u_short *start, *end;
  335         register u_short sum = 0;
  336 
  337         start = (u_short *)lp;
  338         end = (u_short *)&lp->d_partitions[lp->d_npartitions];
  339         while (start < end)
  340                 sum ^= *start++;
  341         return (sum);
  342 }
  343 
  344 /*
  345  * Disk error is the preface to plaintive error messages
  346  * about failing disk transfers.  It prints messages of the form
  347 
  348 hp0g: hard error reading fsbn 12345 of 12344-12347 (hp0 bn %d cn %d tn %d sn %d)
  349 
  350  * if the offset of the error in the transfer and a disk label
  351  * are both available.  blkdone should be -1 if the position of the error
  352  * is unknown; the disklabel pointer may be null from drivers that have not
  353  * been converted to use them.  The message is printed with printf
  354  * if pri is LOG_PRINTF, otherwise it uses log at the specified priority.
  355  * The message should be completed (with at least a newline) with printf
  356  * or addlog, respectively.  There is no trailing space.
  357  */
  358 void
  359 diskerr(bp, dname, what, pri, blkdone, lp)
  360         register struct buf *bp;
  361         char *dname, *what;
  362         int pri, blkdone;
  363         register struct disklabel *lp;
  364 {
  365         int unit = dkunit(bp->b_dev);
  366         int slice = dkslice(bp->b_dev);
  367         int part = dkpart(bp->b_dev);
  368         register int (*pr) __P((const char *, ...));
  369         char partname[2];
  370         char *sname;
  371         daddr_t sn;
  372 
  373         if (pri != LOG_PRINTF) {
  374                 log(pri, "%s", "");
  375                 pr = addlog;
  376         } else
  377                 pr = printf;
  378         sname = dsname(dname, unit, slice, part, partname);
  379         (*pr)("%s%s: %s %sing fsbn ", sname, partname, what,
  380               bp->b_flags & B_READ ? "read" : "writ");
  381         sn = bp->b_blkno;
  382         if (bp->b_bcount <= DEV_BSIZE)
  383                 (*pr)("%ld", (long)sn);
  384         else {
  385                 if (blkdone >= 0) {
  386                         sn += blkdone;
  387                         (*pr)("%ld of ", (long)sn);
  388                 }
  389                 (*pr)("%ld-%ld", (long)bp->b_blkno,
  390                     (long)(bp->b_blkno + (bp->b_bcount - 1) / DEV_BSIZE));
  391         }
  392         if (lp && (blkdone >= 0 || bp->b_bcount <= lp->d_secsize)) {
  393 #ifdef tahoe
  394                 sn *= DEV_BSIZE / lp->d_secsize;                /* XXX */
  395 #endif
  396                 sn += lp->d_partitions[part].p_offset;
  397                 /*
  398                  * XXX should add slice offset and not print the slice,
  399                  * but we don't know the slice pointer.
  400                  * XXX should print bp->b_pblkno so that this will work
  401                  * independent of slices, labels and bad sector remapping,
  402                  * but some drivers don't set bp->b_pblkno.
  403                  */
  404                 (*pr)(" (%s bn %ld; cn %ld", sname, (long)sn,
  405                     (long)(sn / lp->d_secpercyl));
  406                 sn %= (long)lp->d_secpercyl;
  407                 (*pr)(" tn %ld sn %ld)", (long)(sn / lp->d_nsectors),
  408                     (long)(sn % lp->d_nsectors));
  409         }
  410 }

Cache object: 9bad6566471237b6fc011098a5350b68


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.