The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/jfs/jfs_xtree.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  *   Copyright (c) International Business Machines Corp., 2000-2002
    3  *
    4  *   This program is free software;  you can redistribute it and/or modify
    5  *   it under the terms of the GNU General Public License as published by
    6  *   the Free Software Foundation; either version 2 of the License, or 
    7  *   (at your option) any later version.
    8  * 
    9  *   This program is distributed in the hope that it will be useful,
   10  *   but WITHOUT ANY WARRANTY;  without even the implied warranty of
   11  *   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See
   12  *   the GNU General Public License for more details.
   13  *
   14  *   You should have received a copy of the GNU General Public License
   15  *   along with this program;  if not, write to the Free Software 
   16  *   Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
   17  */
   18 /*
   19  *      jfs_xtree.c: extent allocation descriptor B+-tree manager
   20  */
   21 
   22 #include <linux/fs.h>
   23 #include "jfs_incore.h"
   24 #include "jfs_filsys.h"
   25 #include "jfs_metapage.h"
   26 #include "jfs_dmap.h"
   27 #include "jfs_dinode.h"
   28 #include "jfs_superblock.h"
   29 #include "jfs_debug.h"
   30 
   31 /*
   32  * xtree local flag
   33  */
   34 #define XT_INSERT       0x00000001
   35 
   36 /*
   37  *       xtree key/entry comparison: extent offset
   38  *
   39  * return:
   40  *      -1: k < start of extent
   41  *       0: start_of_extent <= k <= end_of_extent
   42  *       1: k > end_of_extent
   43  */
   44 #define XT_CMP(CMP, K, X, OFFSET64)\
   45 {\
   46         OFFSET64 = offsetXAD(X);\
   47         (CMP) = ((K) >= OFFSET64 + lengthXAD(X)) ? 1 :\
   48               ((K) < OFFSET64) ? -1 : 0;\
   49 }
   50 
   51 /* write a xad entry */
   52 #define XT_PUTENTRY(XAD, FLAG, OFF, LEN, ADDR)\
   53 {\
   54         (XAD)->flag = (FLAG);\
   55         XADoffset((XAD), (OFF));\
   56         XADlength((XAD), (LEN));\
   57         XADaddress((XAD), (ADDR));\
   58 }
   59 
   60 #define XT_PAGE(IP, MP) BT_PAGE(IP, MP, xtpage_t, i_xtroot)
   61 
   62 /* get page buffer for specified block address */
   63 #define XT_GETPAGE(IP, BN, MP, SIZE, P, RC)\
   64 {\
   65         BT_GETPAGE(IP, BN, MP, xtpage_t, SIZE, P, RC, i_xtroot)\
   66         if (!(RC))\
   67         {\
   68                 if ((le16_to_cpu((P)->header.nextindex) < XTENTRYSTART) ||\
   69                     (le16_to_cpu((P)->header.nextindex) > le16_to_cpu((P)->header.maxentry)) ||\
   70                     (le16_to_cpu((P)->header.maxentry) > (((BN)==0)?XTROOTMAXSLOT:PSIZE>>L2XTSLOTSIZE)))\
   71                 {\
   72                         jfs_err("XT_GETPAGE: xtree page corrupt");\
   73                         BT_PUTPAGE(MP);\
   74                         updateSuper((IP)->i_sb, FM_DIRTY);\
   75                         MP = NULL;\
   76                         RC = EIO;\
   77                 }\
   78         }\
   79 }
   80 
   81 /* for consistency */
   82 #define XT_PUTPAGE(MP) BT_PUTPAGE(MP)
   83 
   84 #define XT_GETSEARCH(IP, LEAF, BN, MP,  P, INDEX) \
   85         BT_GETSEARCH(IP, LEAF, BN, MP, xtpage_t, P, INDEX, i_xtroot)
   86 /* xtree entry parameter descriptor */
   87 struct xtsplit {
   88         struct metapage *mp;
   89         s16 index;
   90         u8 flag;
   91         s64 off;
   92         s64 addr;
   93         int len;
   94         struct pxdlist *pxdlist;
   95 };
   96 
   97 
   98 /*
   99  *      statistics
  100  */
  101 #ifdef CONFIG_JFS_STATISTICS
  102 static struct {
  103         uint search;
  104         uint fastSearch;
  105         uint split;
  106 } xtStat;
  107 #endif
  108 
  109 
  110 /*
  111  * forward references
  112  */
  113 static int xtSearch(struct inode *ip,
  114                     s64 xoff, int *cmpp, struct btstack * btstack, int flag);
  115 
  116 static int xtSplitUp(tid_t tid,
  117                      struct inode *ip,
  118                      struct xtsplit * split, struct btstack * btstack);
  119 
  120 static int xtSplitPage(tid_t tid, struct inode *ip, struct xtsplit * split,
  121                        struct metapage ** rmpp, s64 * rbnp);
  122 
  123 static int xtSplitRoot(tid_t tid, struct inode *ip,
  124                        struct xtsplit * split, struct metapage ** rmpp);
  125 
  126 #ifdef _STILL_TO_PORT
  127 static int xtDeleteUp(tid_t tid, struct inode *ip, struct metapage * fmp,
  128                       xtpage_t * fp, struct btstack * btstack);
  129 
  130 static int xtSearchNode(struct inode *ip,
  131                         xad_t * xad,
  132                         int *cmpp, struct btstack * btstack, int flag);
  133 
  134 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * fp);
  135 #endif                          /*  _STILL_TO_PORT */
  136 
  137 /* External references */
  138 
  139 /*
  140  *      debug control
  141  */
  142 /*      #define _JFS_DEBUG_XTREE        1 */
  143 
  144 
  145 /*
  146  *      xtLookup()
  147  *
  148  * function: map a single page into a physical extent;
  149  */
  150 int xtLookup(struct inode *ip, s64 lstart,
  151              s64 llen, int *pflag, s64 * paddr, s32 * plen, int no_check)
  152 {
  153         int rc = 0;
  154         struct btstack btstack;
  155         int cmp;
  156         s64 bn;
  157         struct metapage *mp;
  158         xtpage_t *p;
  159         int index;
  160         xad_t *xad;
  161         s64 size, xoff, xend;
  162         int xlen;
  163         s64 xaddr;
  164 
  165         *plen = 0;
  166 
  167         if (!no_check) {
  168                 /* is lookup offset beyond eof ? */
  169                 size = ((u64) ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
  170                     JFS_SBI(ip->i_sb)->l2bsize;
  171                 if (lstart >= size) {
  172                         jfs_err("xtLookup: lstart (0x%lx) >= size (0x%lx)",
  173                                 (ulong) lstart, (ulong) size);
  174                         return 0;
  175                 }
  176         }
  177 
  178         /*
  179          * search for the xad entry covering the logical extent
  180          */
  181 //search:
  182         if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0))) {
  183                 jfs_err("xtLookup: xtSearch returned %d", rc);
  184                 return rc;
  185         }
  186 
  187         /*
  188          *      compute the physical extent covering logical extent
  189          *
  190          * N.B. search may have failed (e.g., hole in sparse file),
  191          * and returned the index of the next entry.
  192          */
  193         /* retrieve search result */
  194         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  195 
  196         /* is xad found covering start of logical extent ?
  197          * lstart is a page start address,
  198          * i.e., lstart cannot start in a hole;
  199          */
  200         if (cmp)
  201                 goto out;
  202 
  203         /*
  204          * lxd covered by xad
  205          */
  206         xad = &p->xad[index];
  207         xoff = offsetXAD(xad);
  208         xlen = lengthXAD(xad);
  209         xend = xoff + xlen;
  210         xaddr = addressXAD(xad);
  211 
  212         /* initialize new pxd */
  213         *pflag = xad->flag;
  214         *paddr = xaddr + (lstart - xoff);
  215         /* a page must be fully covered by an xad */
  216         *plen = min(xend - lstart, llen);
  217 
  218       out:
  219         XT_PUTPAGE(mp);
  220 
  221         return rc;
  222 }
  223 
  224 
  225 /*
  226  *      xtLookupList()
  227  *
  228  * function: map a single logical extent into a list of physical extent;
  229  *
  230  * parameter:
  231  *      struct inode    *ip,
  232  *      struct lxdlist  *lxdlist,       lxd list (in)
  233  *      struct xadlist  *xadlist,       xad list (in/out)
  234  *      int             flag)
  235  *
  236  * coverage of lxd by xad under assumption of
  237  * . lxd's are ordered and disjoint.
  238  * . xad's are ordered and disjoint.
  239  *
  240  * return:
  241  *      0:      success
  242  *
  243  * note: a page being written (even a single byte) is backed fully,
  244  *      except the last page which is only backed with blocks
  245  *      required to cover the last byte;
  246  *      the extent backing a page is fully contained within an xad;
  247  */
  248 int xtLookupList(struct inode *ip, struct lxdlist * lxdlist,
  249                  struct xadlist * xadlist, int flag)
  250 {
  251         int rc = 0;
  252         struct btstack btstack;
  253         int cmp;
  254         s64 bn;
  255         struct metapage *mp;
  256         xtpage_t *p;
  257         int index;
  258         lxd_t *lxd;
  259         xad_t *xad, *pxd;
  260         s64 size, lstart, lend, xstart, xend, pstart;
  261         s64 llen, xlen, plen;
  262         s64 xaddr, paddr;
  263         int nlxd, npxd, maxnpxd;
  264 
  265         npxd = xadlist->nxad = 0;
  266         maxnpxd = xadlist->maxnxad;
  267         pxd = xadlist->xad;
  268 
  269         nlxd = lxdlist->nlxd;
  270         lxd = lxdlist->lxd;
  271 
  272         lstart = offsetLXD(lxd);
  273         llen = lengthLXD(lxd);
  274         lend = lstart + llen;
  275 
  276         size = (ip->i_size + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
  277             JFS_SBI(ip->i_sb)->l2bsize;
  278 
  279         /*
  280          * search for the xad entry covering the logical extent
  281          */
  282       search:
  283         if (lstart >= size)
  284                 return 0;
  285 
  286         if ((rc = xtSearch(ip, lstart, &cmp, &btstack, 0)))
  287                 return rc;
  288 
  289         /*
  290          *      compute the physical extent covering logical extent
  291          *
  292          * N.B. search may have failed (e.g., hole in sparse file),
  293          * and returned the index of the next entry.
  294          */
  295 //map:
  296         /* retrieve search result */
  297         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  298 
  299         /* is xad on the next sibling page ? */
  300         if (index == le16_to_cpu(p->header.nextindex)) {
  301                 if (p->header.flag & BT_ROOT)
  302                         goto mapend;
  303 
  304                 if ((bn = le64_to_cpu(p->header.next)) == 0)
  305                         goto mapend;
  306 
  307                 XT_PUTPAGE(mp);
  308 
  309                 /* get next sibling page */
  310                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  311                 if (rc)
  312                         return rc;
  313 
  314                 index = XTENTRYSTART;
  315         }
  316 
  317         xad = &p->xad[index];
  318 
  319         /*
  320          * is lxd covered by xad ?
  321          */
  322       compare:
  323         xstart = offsetXAD(xad);
  324         xlen = lengthXAD(xad);
  325         xend = xstart + xlen;
  326         xaddr = addressXAD(xad);
  327 
  328       compare1:
  329         if (xstart < lstart)
  330                 goto compare2;
  331 
  332         /* (lstart <= xstart) */
  333 
  334         /* lxd is NOT covered by xad */
  335         if (lend <= xstart) {
  336                 /*
  337                  * get next lxd
  338                  */
  339                 if (--nlxd == 0)
  340                         goto mapend;
  341                 lxd++;
  342 
  343                 lstart = offsetLXD(lxd);
  344                 llen = lengthLXD(lxd);
  345                 lend = lstart + llen;
  346                 if (lstart >= size)
  347                         goto mapend;
  348 
  349                 /* compare with the current xad  */
  350                 goto compare1;
  351         }
  352         /* lxd is covered by xad */
  353         else {                  /* (xstart < lend) */
  354 
  355                 /* initialize new pxd */
  356                 pstart = xstart;
  357                 plen = min(lend - xstart, xlen);
  358                 paddr = xaddr;
  359 
  360                 goto cover;
  361         }
  362 
  363         /* (xstart < lstart) */
  364       compare2:
  365         /* lxd is covered by xad */
  366         if (lstart < xend) {
  367                 /* initialize new pxd */
  368                 pstart = lstart;
  369                 plen = min(xend - lstart, llen);
  370                 paddr = xaddr + (lstart - xstart);
  371 
  372                 goto cover;
  373         }
  374         /* lxd is NOT covered by xad */
  375         else {                  /* (xend <= lstart) */
  376 
  377                 /*
  378                  * get next xad
  379                  *
  380                  * linear search next xad covering lxd on
  381                  * the current xad page, and then tree search
  382                  */
  383                 if (index == le16_to_cpu(p->header.nextindex) - 1) {
  384                         if (p->header.flag & BT_ROOT)
  385                                 goto mapend;
  386 
  387                         XT_PUTPAGE(mp);
  388                         goto search;
  389                 } else {
  390                         index++;
  391                         xad++;
  392 
  393                         /* compare with new xad */
  394                         goto compare;
  395                 }
  396         }
  397 
  398         /*
  399          * lxd is covered by xad and a new pxd has been initialized
  400          * (lstart <= xstart < lend) or (xstart < lstart < xend)
  401          */
  402       cover:
  403         /* finalize pxd corresponding to current xad */
  404         XT_PUTENTRY(pxd, xad->flag, pstart, plen, paddr);
  405 
  406         if (++npxd >= maxnpxd)
  407                 goto mapend;
  408         pxd++;
  409 
  410         /*
  411          * lxd is fully covered by xad
  412          */
  413         if (lend <= xend) {
  414                 /*
  415                  * get next lxd
  416                  */
  417                 if (--nlxd == 0)
  418                         goto mapend;
  419                 lxd++;
  420 
  421                 lstart = offsetLXD(lxd);
  422                 llen = lengthLXD(lxd);
  423                 lend = lstart + llen;
  424                 if (lstart >= size)
  425                         goto mapend;
  426 
  427                 /*
  428                  * test for old xad covering new lxd
  429                  * (old xstart < new lstart)
  430                  */
  431                 goto compare2;
  432         }
  433         /*
  434          * lxd is partially covered by xad
  435          */
  436         else {                  /* (xend < lend)  */
  437 
  438                 /*
  439                  * get next xad
  440                  *
  441                  * linear search next xad covering lxd on
  442                  * the current xad page, and then next xad page search
  443                  */
  444                 if (index == le16_to_cpu(p->header.nextindex) - 1) {
  445                         if (p->header.flag & BT_ROOT)
  446                                 goto mapend;
  447 
  448                         if ((bn = le64_to_cpu(p->header.next)) == 0)
  449                                 goto mapend;
  450 
  451                         XT_PUTPAGE(mp);
  452 
  453                         /* get next sibling page */
  454                         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  455                         if (rc)
  456                                 return rc;
  457 
  458                         index = XTENTRYSTART;
  459                         xad = &p->xad[index];
  460                 } else {
  461                         index++;
  462                         xad++;
  463                 }
  464 
  465                 /*
  466                  * test for new xad covering old lxd
  467                  * (old lstart < new xstart)
  468                  */
  469                 goto compare;
  470         }
  471 
  472       mapend:
  473         xadlist->nxad = npxd;
  474 
  475 //out:
  476         XT_PUTPAGE(mp);
  477 
  478         return rc;
  479 }
  480 
  481 
  482 /*
  483  *      xtSearch()
  484  *
  485  * function:    search for the xad entry covering specified offset.
  486  *
  487  * parameters:
  488  *      ip      - file object;
  489  *      xoff    - extent offset;
  490  *      cmpp    - comparison result:
  491  *      btstack - traverse stack;
  492  *      flag    - search process flag (XT_INSERT);
  493  *
  494  * returns:
  495  *      btstack contains (bn, index) of search path traversed to the entry.
  496  *      *cmpp is set to result of comparison with the entry returned.
  497  *      the page containing the entry is pinned at exit.
  498  */
  499 static int xtSearch(struct inode *ip, s64 xoff, /* offset of extent */
  500                     int *cmpp, struct btstack * btstack, int flag)
  501 {
  502         struct jfs_inode_info *jfs_ip = JFS_IP(ip);
  503         int rc = 0;
  504         int cmp = 1;            /* init for empty page */
  505         s64 bn;                 /* block number */
  506         struct metapage *mp;    /* page buffer */
  507         xtpage_t *p;            /* page */
  508         xad_t *xad;
  509         int base, index, lim, btindex;
  510         struct btframe *btsp;
  511         int nsplit = 0;         /* number of pages to split */
  512         s64 t64;
  513 
  514         INCREMENT(xtStat.search);
  515 
  516         BT_CLR(btstack);
  517 
  518         btstack->nsplit = 0;
  519 
  520         /*
  521          *      search down tree from root:
  522          *
  523          * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
  524          * internal page, child page Pi contains entry with k, Ki <= K < Kj.
  525          *
  526          * if entry with search key K is not found
  527          * internal page search find the entry with largest key Ki
  528          * less than K which point to the child page to search;
  529          * leaf page search find the entry with smallest key Kj
  530          * greater than K so that the returned index is the position of
  531          * the entry to be shifted right for insertion of new entry.
  532          * for empty tree, search key is greater than any key of the tree.
  533          *
  534          * by convention, root bn = 0.
  535          */
  536         for (bn = 0;;) {
  537                 /* get/pin the page to search */
  538                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
  539                 if (rc)
  540                         return rc;
  541 
  542                 /* try sequential access heuristics with the previous
  543                  * access entry in target leaf page:
  544                  * once search narrowed down into the target leaf,
  545                  * key must either match an entry in the leaf or
  546                  * key entry does not exist in the tree;
  547                  */
  548 //fastSearch:
  549                 if ((jfs_ip->btorder & BT_SEQUENTIAL) &&
  550                     (p->header.flag & BT_LEAF) &&
  551                     (index = jfs_ip->btindex) <
  552                     le16_to_cpu(p->header.nextindex)) {
  553                         xad = &p->xad[index];
  554                         t64 = offsetXAD(xad);
  555                         if (xoff < t64 + lengthXAD(xad)) {
  556                                 if (xoff >= t64) {
  557                                         *cmpp = 0;
  558                                         goto out;
  559                                 }
  560 
  561                                 /* stop sequential access heuristics */
  562                                 goto binarySearch;
  563                         } else {        /* (t64 + lengthXAD(xad)) <= xoff */
  564 
  565                                 /* try next sequential entry */
  566                                 index++;
  567                                 if (index <
  568                                     le16_to_cpu(p->header.nextindex)) {
  569                                         xad++;
  570                                         t64 = offsetXAD(xad);
  571                                         if (xoff < t64 + lengthXAD(xad)) {
  572                                                 if (xoff >= t64) {
  573                                                         *cmpp = 0;
  574                                                         goto out;
  575                                                 }
  576 
  577                                                 /* miss: key falls between
  578                                                  * previous and this entry
  579                                                  */
  580                                                 *cmpp = 1;
  581                                                 goto out;
  582                                         }
  583 
  584                                         /* (xoff >= t64 + lengthXAD(xad));
  585                                          * matching entry may be further out:
  586                                          * stop heuristic search
  587                                          */
  588                                         /* stop sequential access heuristics */
  589                                         goto binarySearch;
  590                                 }
  591 
  592                                 /* (index == p->header.nextindex);
  593                                  * miss: key entry does not exist in
  594                                  * the target leaf/tree
  595                                  */
  596                                 *cmpp = 1;
  597                                 goto out;
  598                         }
  599 
  600                         /*
  601                          * if hit, return index of the entry found, and
  602                          * if miss, where new entry with search key is
  603                          * to be inserted;
  604                          */
  605                       out:
  606                         /* compute number of pages to split */
  607                         if (flag & XT_INSERT) {
  608                                 if (p->header.nextindex ==      /* little-endian */
  609                                     p->header.maxentry)
  610                                         nsplit++;
  611                                 else
  612                                         nsplit = 0;
  613                                 btstack->nsplit = nsplit;
  614                         }
  615 
  616                         /* save search result */
  617                         btsp = btstack->top;
  618                         btsp->bn = bn;
  619                         btsp->index = index;
  620                         btsp->mp = mp;
  621 
  622                         /* update sequential access heuristics */
  623                         jfs_ip->btindex = index;
  624 
  625                         INCREMENT(xtStat.fastSearch);
  626                         return 0;
  627                 }
  628 
  629                 /* well, ... full search now */
  630               binarySearch:
  631                 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
  632 
  633                 /*
  634                  * binary search with search key K on the current page
  635                  */
  636                 for (base = XTENTRYSTART; lim; lim >>= 1) {
  637                         index = base + (lim >> 1);
  638 
  639                         XT_CMP(cmp, xoff, &p->xad[index], t64);
  640                         if (cmp == 0) {
  641                                 /*
  642                                  *      search hit
  643                                  */
  644                                 /* search hit - leaf page:
  645                                  * return the entry found
  646                                  */
  647                                 if (p->header.flag & BT_LEAF) {
  648                                         *cmpp = cmp;
  649 
  650                                         /* compute number of pages to split */
  651                                         if (flag & XT_INSERT) {
  652                                                 if (p->header.nextindex ==
  653                                                     p->header.maxentry)
  654                                                         nsplit++;
  655                                                 else
  656                                                         nsplit = 0;
  657                                                 btstack->nsplit = nsplit;
  658                                         }
  659 
  660                                         /* save search result */
  661                                         btsp = btstack->top;
  662                                         btsp->bn = bn;
  663                                         btsp->index = index;
  664                                         btsp->mp = mp;
  665 
  666                                         /* init sequential access heuristics */
  667                                         btindex = jfs_ip->btindex;
  668                                         if (index == btindex ||
  669                                             index == btindex + 1)
  670                                                 jfs_ip->btorder = BT_SEQUENTIAL;
  671                                         else
  672                                                 jfs_ip->btorder = BT_RANDOM;
  673                                         jfs_ip->btindex = index;
  674 
  675                                         return 0;
  676                                 }
  677 
  678                                 /* search hit - internal page:
  679                                  * descend/search its child page
  680                                  */
  681                                 goto next;
  682                         }
  683 
  684                         if (cmp > 0) {
  685                                 base = index + 1;
  686                                 --lim;
  687                         }
  688                 }
  689 
  690                 /*
  691                  *      search miss
  692                  *
  693                  * base is the smallest index with key (Kj) greater than
  694                  * search key (K) and may be zero or maxentry index.
  695                  */
  696                 /*
  697                  * search miss - leaf page:
  698                  *
  699                  * return location of entry (base) where new entry with
  700                  * search key K is to be inserted.
  701                  */
  702                 if (p->header.flag & BT_LEAF) {
  703                         *cmpp = cmp;
  704 
  705                         /* compute number of pages to split */
  706                         if (flag & XT_INSERT) {
  707                                 if (p->header.nextindex ==
  708                                     p->header.maxentry)
  709                                         nsplit++;
  710                                 else
  711                                         nsplit = 0;
  712                                 btstack->nsplit = nsplit;
  713                         }
  714 
  715                         /* save search result */
  716                         btsp = btstack->top;
  717                         btsp->bn = bn;
  718                         btsp->index = base;
  719                         btsp->mp = mp;
  720 
  721                         /* init sequential access heuristics */
  722                         btindex = jfs_ip->btindex;
  723                         if (base == btindex || base == btindex + 1)
  724                                 jfs_ip->btorder = BT_SEQUENTIAL;
  725                         else
  726                                 jfs_ip->btorder = BT_RANDOM;
  727                         jfs_ip->btindex = base;
  728 
  729                         return 0;
  730                 }
  731 
  732                 /*
  733                  * search miss - non-leaf page:
  734                  *
  735                  * if base is non-zero, decrement base by one to get the parent
  736                  * entry of the child page to search.
  737                  */
  738                 index = base ? base - 1 : base;
  739 
  740                 /*
  741                  * go down to child page
  742                  */
  743               next:
  744                 /* update number of pages to split */
  745                 if (p->header.nextindex == p->header.maxentry)
  746                         nsplit++;
  747                 else
  748                         nsplit = 0;
  749 
  750                 /* push (bn, index) of the parent page/entry */
  751                 BT_PUSH(btstack, bn, index);
  752 
  753                 /* get the child page block number */
  754                 bn = addressXAD(&p->xad[index]);
  755 
  756                 /* unpin the parent page */
  757                 XT_PUTPAGE(mp);
  758         }
  759 }
  760 
  761 /*
  762  *      xtInsert()
  763  *
  764  * function:
  765  *
  766  * parameter:
  767  *      tid     - transaction id;
  768  *      ip      - file object;
  769  *      xflag   - extent flag (XAD_NOTRECORDED):
  770  *      xoff    - extent offset;
  771  *      xlen    - extent length;
  772  *      xaddrp  - extent address pointer (in/out):
  773  *              if (*xaddrp)
  774  *                      caller allocated data extent at *xaddrp;
  775  *              else
  776  *                      allocate data extent and return its xaddr;
  777  *      flag    -
  778  *
  779  * return:
  780  */
  781 int xtInsert(tid_t tid,         /* transaction id */
  782              struct inode *ip, int xflag, s64 xoff, s32 xlen, s64 * xaddrp,
  783              int flag)
  784 {
  785         int rc = 0;
  786         s64 xaddr, hint;
  787         struct metapage *mp;    /* meta-page buffer */
  788         xtpage_t *p;            /* base B+-tree index page */
  789         s64 bn;
  790         int index, nextindex;
  791         struct btstack btstack; /* traverse stack */
  792         struct xtsplit split;   /* split information */
  793         xad_t *xad;
  794         int cmp;
  795         struct tlock *tlck;
  796         struct xtlock *xtlck;
  797 
  798         jfs_info("xtInsert: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
  799 
  800         /*
  801          *      search for the entry location at which to insert:
  802          *
  803          * xtFastSearch() and xtSearch() both returns (leaf page
  804          * pinned, index at which to insert).
  805          * n.b. xtSearch() may return index of maxentry of
  806          * the full page.
  807          */
  808         if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
  809                 return rc;
  810 
  811         /* retrieve search result */
  812         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
  813 
  814         /* This test must follow XT_GETSEARCH since mp must be valid if
  815          * we branch to out: */
  816         if (cmp == 0) {
  817                 rc = EEXIST;
  818                 goto out;
  819         }
  820 
  821         /*
  822          * allocate data extent requested
  823          *
  824          * allocation hint: last xad
  825          */
  826         if ((xaddr = *xaddrp) == 0) {
  827                 if (index > XTENTRYSTART) {
  828                         xad = &p->xad[index - 1];
  829                         hint = addressXAD(xad) + lengthXAD(xad) - 1;
  830                 } else
  831                         hint = 0;
  832                 if ((rc = dbAlloc(ip, hint, (s64) xlen, &xaddr)))
  833                         goto out;
  834         }
  835 
  836         /*
  837          *      insert entry for new extent
  838          */
  839         xflag |= XAD_NEW;
  840 
  841         /*
  842          *      if the leaf page is full, split the page and
  843          *      propagate up the router entry for the new page from split
  844          *
  845          * The xtSplitUp() will insert the entry and unpin the leaf page.
  846          */
  847         nextindex = le16_to_cpu(p->header.nextindex);
  848         if (nextindex == le16_to_cpu(p->header.maxentry)) {
  849                 split.mp = mp;
  850                 split.index = index;
  851                 split.flag = xflag;
  852                 split.off = xoff;
  853                 split.len = xlen;
  854                 split.addr = xaddr;
  855                 split.pxdlist = NULL;
  856                 if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
  857                         /* undo data extent allocation */
  858                         if (*xaddrp == 0)
  859                                 dbFree(ip, xaddr, (s64) xlen);
  860                         return rc;
  861                 }
  862 
  863                 *xaddrp = xaddr;
  864                 return 0;
  865         }
  866 
  867         /*
  868          *      insert the new entry into the leaf page
  869          */
  870         /*
  871          * acquire a transaction lock on the leaf page;
  872          *
  873          * action: xad insertion/extension;
  874          */
  875         BT_MARK_DIRTY(mp, ip);
  876 
  877         /* if insert into middle, shift right remaining entries. */
  878         if (index < nextindex)
  879                 memmove(&p->xad[index + 1], &p->xad[index],
  880                         (nextindex - index) * sizeof(xad_t));
  881 
  882         /* insert the new entry: mark the entry NEW */
  883         xad = &p->xad[index];
  884         XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
  885 
  886         /* advance next available entry index */
  887         p->header.nextindex =
  888             cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
  889 
  890         /* Don't log it if there are no links to the file */
  891         if (!test_cflag(COMMIT_Nolink, ip)) {
  892                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
  893                 xtlck = (struct xtlock *) & tlck->lock;
  894                 xtlck->lwm.offset =
  895                     (xtlck->lwm.offset) ? min(index,
  896                                               (int)xtlck->lwm.offset) : index;
  897                 xtlck->lwm.length =
  898                     le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
  899         }
  900 
  901         *xaddrp = xaddr;
  902 
  903       out:
  904         /* unpin the leaf page */
  905         XT_PUTPAGE(mp);
  906 
  907         return rc;
  908 }
  909 
  910 
  911 /*
  912  *      xtSplitUp()
  913  *
  914  * function:
  915  *      split full pages as propagating insertion up the tree
  916  *
  917  * parameter:
  918  *      tid     - transaction id;
  919  *      ip      - file object;
  920  *      split   - entry parameter descriptor;
  921  *      btstack - traverse stack from xtSearch()
  922  *
  923  * return:
  924  */
  925 static int
  926 xtSplitUp(tid_t tid,
  927           struct inode *ip, struct xtsplit * split, struct btstack * btstack)
  928 {
  929         int rc = 0;
  930         struct metapage *smp;
  931         xtpage_t *sp;           /* split page */
  932         struct metapage *rmp;
  933         s64 rbn;                /* new right page block number */
  934         struct metapage *rcmp;
  935         xtpage_t *rcp;          /* right child page */
  936         s64 rcbn;               /* right child page block number */
  937         int skip;               /* index of entry of insertion */
  938         int nextindex;          /* next available entry index of p */
  939         struct btframe *parent; /* parent page entry on traverse stack */
  940         xad_t *xad;
  941         s64 xaddr;
  942         int xlen;
  943         int nsplit;             /* number of pages split */
  944         struct pxdlist pxdlist;
  945         pxd_t *pxd;
  946         struct tlock *tlck;
  947         struct xtlock *xtlck;
  948 
  949         smp = split->mp;
  950         sp = XT_PAGE(ip, smp);
  951 
  952         /* is inode xtree root extension/inline EA area free ? */
  953         if ((sp->header.flag & BT_ROOT) && (!S_ISDIR(ip->i_mode)) &&
  954             (sp->header.maxentry < cpu_to_le16(XTROOTMAXSLOT)) &&
  955             (JFS_IP(ip)->mode2 & INLINEEA)) {
  956                 sp->header.maxentry = cpu_to_le16(XTROOTMAXSLOT);
  957                 JFS_IP(ip)->mode2 &= ~INLINEEA;
  958 
  959                 BT_MARK_DIRTY(smp, ip);
  960                 /*
  961                  * acquire a transaction lock on the leaf page;
  962                  *
  963                  * action: xad insertion/extension;
  964                  */
  965 
  966                 /* if insert into middle, shift right remaining entries. */
  967                 skip = split->index;
  968                 nextindex = le16_to_cpu(sp->header.nextindex);
  969                 if (skip < nextindex)
  970                         memmove(&sp->xad[skip + 1], &sp->xad[skip],
  971                                 (nextindex - skip) * sizeof(xad_t));
  972 
  973                 /* insert the new entry: mark the entry NEW */
  974                 xad = &sp->xad[skip];
  975                 XT_PUTENTRY(xad, split->flag, split->off, split->len,
  976                             split->addr);
  977 
  978                 /* advance next available entry index */
  979                 sp->header.nextindex =
  980                     cpu_to_le16(le16_to_cpu(sp->header.nextindex) + 1);
  981 
  982                 /* Don't log it if there are no links to the file */
  983                 if (!test_cflag(COMMIT_Nolink, ip)) {
  984                         tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
  985                         xtlck = (struct xtlock *) & tlck->lock;
  986                         xtlck->lwm.offset = (xtlck->lwm.offset) ?
  987                             min(skip, (int)xtlck->lwm.offset) : skip;
  988                         xtlck->lwm.length =
  989                             le16_to_cpu(sp->header.nextindex) -
  990                             xtlck->lwm.offset;
  991                 }
  992 
  993                 return 0;
  994         }
  995 
  996         /*
  997          * allocate new index blocks to cover index page split(s)
  998          *
  999          * allocation hint: ?
 1000          */
 1001         if (split->pxdlist == NULL) {
 1002                 nsplit = btstack->nsplit;
 1003                 split->pxdlist = &pxdlist;
 1004                 pxdlist.maxnpxd = pxdlist.npxd = 0;
 1005                 pxd = &pxdlist.pxd[0];
 1006                 xlen = JFS_SBI(ip->i_sb)->nbperpage;
 1007                 for (; nsplit > 0; nsplit--, pxd++) {
 1008                         if ((rc = dbAlloc(ip, (s64) 0, (s64) xlen, &xaddr))
 1009                             == 0) {
 1010                                 PXDaddress(pxd, xaddr);
 1011                                 PXDlength(pxd, xlen);
 1012 
 1013                                 pxdlist.maxnpxd++;
 1014 
 1015                                 continue;
 1016                         }
 1017 
 1018                         /* undo allocation */
 1019 
 1020                         XT_PUTPAGE(smp);
 1021                         return rc;
 1022                 }
 1023         }
 1024 
 1025         /*
 1026          * Split leaf page <sp> into <sp> and a new right page <rp>.
 1027          *
 1028          * The split routines insert the new entry into the leaf page,
 1029          * and acquire txLock as appropriate.
 1030          * return <rp> pinned and its block number <rpbn>.
 1031          */
 1032         rc = (sp->header.flag & BT_ROOT) ?
 1033             xtSplitRoot(tid, ip, split, &rmp) :
 1034             xtSplitPage(tid, ip, split, &rmp, &rbn);
 1035         if (rc)
 1036                 return EIO;
 1037 
 1038         XT_PUTPAGE(smp);
 1039 
 1040         /*
 1041          * propagate up the router entry for the leaf page just split
 1042          *
 1043          * insert a router entry for the new page into the parent page,
 1044          * propagate the insert/split up the tree by walking back the stack
 1045          * of (bn of parent page, index of child page entry in parent page)
 1046          * that were traversed during the search for the page that split.
 1047          *
 1048          * the propagation of insert/split up the tree stops if the root
 1049          * splits or the page inserted into doesn't have to split to hold
 1050          * the new entry.
 1051          *
 1052          * the parent entry for the split page remains the same, and
 1053          * a new entry is inserted at its right with the first key and
 1054          * block number of the new right page.
 1055          *
 1056          * There are a maximum of 3 pages pinned at any time:
 1057          * right child, left parent and right parent (when the parent splits)
 1058          * to keep the child page pinned while working on the parent.
 1059          * make sure that all pins are released at exit.
 1060          */
 1061         while ((parent = BT_POP(btstack)) != NULL) {
 1062                 /* parent page specified by stack frame <parent> */
 1063 
 1064                 /* keep current child pages <rcp> pinned */
 1065                 rcmp = rmp;
 1066                 rcbn = rbn;
 1067                 rcp = XT_PAGE(ip, rcmp);
 1068 
 1069                 /*
 1070                  * insert router entry in parent for new right child page <rp>
 1071                  */
 1072                 /* get/pin the parent page <sp> */
 1073                 XT_GETPAGE(ip, parent->bn, smp, PSIZE, sp, rc);
 1074                 if (rc)
 1075                         goto errout2;
 1076 
 1077                 /*
 1078                  * The new key entry goes ONE AFTER the index of parent entry,
 1079                  * because the split was to the right.
 1080                  */
 1081                 skip = parent->index + 1;
 1082 
 1083                 /*
 1084                  * split or shift right remaining entries of the parent page
 1085                  */
 1086                 nextindex = le16_to_cpu(sp->header.nextindex);
 1087                 /*
 1088                  * parent page is full - split the parent page
 1089                  */
 1090                 if (nextindex == le16_to_cpu(sp->header.maxentry)) {
 1091                         /* init for parent page split */
 1092                         split->mp = smp;
 1093                         split->index = skip;    /* index at insert */
 1094                         split->flag = XAD_NEW;
 1095                         split->off = offsetXAD(&rcp->xad[XTENTRYSTART]);
 1096                         split->len = JFS_SBI(ip->i_sb)->nbperpage;
 1097                         split->addr = rcbn;
 1098 
 1099                         /* unpin previous right child page */
 1100                         XT_PUTPAGE(rcmp);
 1101 
 1102                         /* The split routines insert the new entry,
 1103                          * and acquire txLock as appropriate.
 1104                          * return <rp> pinned and its block number <rpbn>.
 1105                          */
 1106                         rc = (sp->header.flag & BT_ROOT) ?
 1107                             xtSplitRoot(tid, ip, split, &rmp) :
 1108                             xtSplitPage(tid, ip, split, &rmp, &rbn);
 1109                         if (rc)
 1110                                 goto errout1;
 1111 
 1112                         XT_PUTPAGE(smp);
 1113                         /* keep new child page <rp> pinned */
 1114                 }
 1115                 /*
 1116                  * parent page is not full - insert in parent page
 1117                  */
 1118                 else {
 1119                         /*
 1120                          * insert router entry in parent for the right child
 1121                          * page from the first entry of the right child page:
 1122                          */
 1123                         /*
 1124                          * acquire a transaction lock on the parent page;
 1125                          *
 1126                          * action: router xad insertion;
 1127                          */
 1128                         BT_MARK_DIRTY(smp, ip);
 1129 
 1130                         /*
 1131                          * if insert into middle, shift right remaining entries
 1132                          */
 1133                         if (skip < nextindex)
 1134                                 memmove(&sp->xad[skip + 1], &sp->xad[skip],
 1135                                         (nextindex -
 1136                                          skip) << L2XTSLOTSIZE);
 1137 
 1138                         /* insert the router entry */
 1139                         xad = &sp->xad[skip];
 1140                         XT_PUTENTRY(xad, XAD_NEW,
 1141                                     offsetXAD(&rcp->xad[XTENTRYSTART]),
 1142                                     JFS_SBI(ip->i_sb)->nbperpage, rcbn);
 1143 
 1144                         /* advance next available entry index. */
 1145                         sp->header.nextindex =
 1146                             cpu_to_le16(le16_to_cpu(sp->header.nextindex) +
 1147                                         1);
 1148 
 1149                         /* Don't log it if there are no links to the file */
 1150                         if (!test_cflag(COMMIT_Nolink, ip)) {
 1151                                 tlck = txLock(tid, ip, smp,
 1152                                               tlckXTREE | tlckGROW);
 1153                                 xtlck = (struct xtlock *) & tlck->lock;
 1154                                 xtlck->lwm.offset = (xtlck->lwm.offset) ?
 1155                                     min(skip, (int)xtlck->lwm.offset) : skip;
 1156                                 xtlck->lwm.length =
 1157                                     le16_to_cpu(sp->header.nextindex) -
 1158                                     xtlck->lwm.offset;
 1159                         }
 1160 
 1161                         /* unpin parent page */
 1162                         XT_PUTPAGE(smp);
 1163 
 1164                         /* exit propagate up */
 1165                         break;
 1166                 }
 1167         }
 1168 
 1169         /* unpin current right page */
 1170         XT_PUTPAGE(rmp);
 1171 
 1172         return 0;
 1173 
 1174         /*
 1175          * If something fails in the above loop we were already walking back
 1176          * up the tree and the tree is now inconsistent.
 1177          * release all pages we're holding.
 1178          */
 1179       errout1:
 1180         XT_PUTPAGE(smp);
 1181 
 1182       errout2:
 1183         XT_PUTPAGE(rcmp);
 1184 
 1185         return rc;
 1186 }
 1187 
 1188 
 1189 /*
 1190  *      xtSplitPage()
 1191  *
 1192  * function:
 1193  *      split a full non-root page into
 1194  *      original/split/left page and new right page
 1195  *      i.e., the original/split page remains as left page.
 1196  *
 1197  * parameter:
 1198  *      int             tid,
 1199  *      struct inode    *ip,
 1200  *      struct xtsplit  *split,
 1201  *      struct metapage **rmpp,
 1202  *      u64             *rbnp,
 1203  *
 1204  * return:
 1205  *      Pointer to page in which to insert or NULL on error.
 1206  */
 1207 static int
 1208 xtSplitPage(tid_t tid, struct inode *ip,
 1209             struct xtsplit * split, struct metapage ** rmpp, s64 * rbnp)
 1210 {
 1211         int rc = 0;
 1212         struct metapage *smp;
 1213         xtpage_t *sp;
 1214         struct metapage *rmp;
 1215         xtpage_t *rp;           /* new right page allocated */
 1216         s64 rbn;                /* new right page block number */
 1217         struct metapage *mp;
 1218         xtpage_t *p;
 1219         s64 nextbn;
 1220         int skip, maxentry, middle, righthalf, n;
 1221         xad_t *xad;
 1222         struct pxdlist *pxdlist;
 1223         pxd_t *pxd;
 1224         struct tlock *tlck;
 1225         struct xtlock *sxtlck = 0, *rxtlck = 0;
 1226 
 1227         smp = split->mp;
 1228         sp = XT_PAGE(ip, smp);
 1229 
 1230         INCREMENT(xtStat.split);
 1231 
 1232         /*
 1233          * allocate the new right page for the split
 1234          */
 1235         pxdlist = split->pxdlist;
 1236         pxd = &pxdlist->pxd[pxdlist->npxd];
 1237         pxdlist->npxd++;
 1238         rbn = addressPXD(pxd);
 1239         rmp = get_metapage(ip, rbn, PSIZE, 1);
 1240         if (rmp == NULL)
 1241                 return EIO;
 1242 
 1243         jfs_info("xtSplitPage: ip:0x%p smp:0x%p rmp:0x%p", ip, smp, rmp);
 1244 
 1245         BT_MARK_DIRTY(rmp, ip);
 1246         /*
 1247          * action: new page;
 1248          */
 1249 
 1250         rp = (xtpage_t *) rmp->data;
 1251         rp->header.self = *pxd;
 1252         rp->header.flag = sp->header.flag & BT_TYPE;
 1253         rp->header.maxentry = sp->header.maxentry;      /* little-endian */
 1254         rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
 1255 
 1256         BT_MARK_DIRTY(smp, ip);
 1257         /* Don't log it if there are no links to the file */
 1258         if (!test_cflag(COMMIT_Nolink, ip)) {
 1259                 /*
 1260                  * acquire a transaction lock on the new right page;
 1261                  */
 1262                 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
 1263                 rxtlck = (struct xtlock *) & tlck->lock;
 1264                 rxtlck->lwm.offset = XTENTRYSTART;
 1265                 /*
 1266                  * acquire a transaction lock on the split page
 1267                  */
 1268                 tlck = txLock(tid, ip, smp, tlckXTREE | tlckGROW);
 1269                 sxtlck = (struct xtlock *) & tlck->lock;
 1270         }
 1271 
 1272         /*
 1273          * initialize/update sibling pointers of <sp> and <rp>
 1274          */
 1275         nextbn = le64_to_cpu(sp->header.next);
 1276         rp->header.next = cpu_to_le64(nextbn);
 1277         rp->header.prev = cpu_to_le64(addressPXD(&sp->header.self));
 1278         sp->header.next = cpu_to_le64(rbn);
 1279 
 1280         skip = split->index;
 1281 
 1282         /*
 1283          *      sequential append at tail (after last entry of last page)
 1284          *
 1285          * if splitting the last page on a level because of appending
 1286          * a entry to it (skip is maxentry), it's likely that the access is
 1287          * sequential. adding an empty page on the side of the level is less
 1288          * work and can push the fill factor much higher than normal.
 1289          * if we're wrong it's no big deal -  we will do the split the right
 1290          * way next time.
 1291          * (it may look like it's equally easy to do a similar hack for
 1292          * reverse sorted data, that is, split the tree left, but it's not.
 1293          * Be my guest.)
 1294          */
 1295         if (nextbn == 0 && skip == le16_to_cpu(sp->header.maxentry)) {
 1296                 /*
 1297                  * acquire a transaction lock on the new/right page;
 1298                  *
 1299                  * action: xad insertion;
 1300                  */
 1301                 /* insert entry at the first entry of the new right page */
 1302                 xad = &rp->xad[XTENTRYSTART];
 1303                 XT_PUTENTRY(xad, split->flag, split->off, split->len,
 1304                             split->addr);
 1305 
 1306                 rp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
 1307 
 1308                 if (!test_cflag(COMMIT_Nolink, ip)) {
 1309                         /* rxtlck->lwm.offset = XTENTRYSTART; */
 1310                         rxtlck->lwm.length = 1;
 1311                 }
 1312 
 1313                 *rmpp = rmp;
 1314                 *rbnp = rbn;
 1315 
 1316                 ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
 1317 
 1318                 jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
 1319                 return 0;
 1320         }
 1321 
 1322         /*
 1323          *      non-sequential insert (at possibly middle page)
 1324          */
 1325 
 1326         /*
 1327          * update previous pointer of old next/right page of <sp>
 1328          */
 1329         if (nextbn != 0) {
 1330                 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
 1331                 if (rc) {
 1332                         XT_PUTPAGE(rmp);
 1333                         return rc;
 1334                 }
 1335 
 1336                 BT_MARK_DIRTY(mp, ip);
 1337                 /*
 1338                  * acquire a transaction lock on the next page;
 1339                  *
 1340                  * action:sibling pointer update;
 1341                  */
 1342                 if (!test_cflag(COMMIT_Nolink, ip))
 1343                         tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
 1344 
 1345                 p->header.prev = cpu_to_le64(rbn);
 1346 
 1347                 /* sibling page may have been updated previously, or
 1348                  * it may be updated later;
 1349                  */
 1350 
 1351                 XT_PUTPAGE(mp);
 1352         }
 1353 
 1354         /*
 1355          * split the data between the split and new/right pages
 1356          */
 1357         maxentry = le16_to_cpu(sp->header.maxentry);
 1358         middle = maxentry >> 1;
 1359         righthalf = maxentry - middle;
 1360 
 1361         /*
 1362          * skip index in old split/left page - insert into left page:
 1363          */
 1364         if (skip <= middle) {
 1365                 /* move right half of split page to the new right page */
 1366                 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
 1367                         righthalf << L2XTSLOTSIZE);
 1368 
 1369                 /* shift right tail of left half to make room for new entry */
 1370                 if (skip < middle)
 1371                         memmove(&sp->xad[skip + 1], &sp->xad[skip],
 1372                                 (middle - skip) << L2XTSLOTSIZE);
 1373 
 1374                 /* insert new entry */
 1375                 xad = &sp->xad[skip];
 1376                 XT_PUTENTRY(xad, split->flag, split->off, split->len,
 1377                             split->addr);
 1378 
 1379                 /* update page header */
 1380                 sp->header.nextindex = cpu_to_le16(middle + 1);
 1381                 if (!test_cflag(COMMIT_Nolink, ip)) {
 1382                         sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
 1383                             min(skip, (int)sxtlck->lwm.offset) : skip;
 1384                 }
 1385 
 1386                 rp->header.nextindex =
 1387                     cpu_to_le16(XTENTRYSTART + righthalf);
 1388         }
 1389         /*
 1390          * skip index in new right page - insert into right page:
 1391          */
 1392         else {
 1393                 /* move left head of right half to right page */
 1394                 n = skip - middle;
 1395                 memmove(&rp->xad[XTENTRYSTART], &sp->xad[middle],
 1396                         n << L2XTSLOTSIZE);
 1397 
 1398                 /* insert new entry */
 1399                 n += XTENTRYSTART;
 1400                 xad = &rp->xad[n];
 1401                 XT_PUTENTRY(xad, split->flag, split->off, split->len,
 1402                             split->addr);
 1403 
 1404                 /* move right tail of right half to right page */
 1405                 if (skip < maxentry)
 1406                         memmove(&rp->xad[n + 1], &sp->xad[skip],
 1407                                 (maxentry - skip) << L2XTSLOTSIZE);
 1408 
 1409                 /* update page header */
 1410                 sp->header.nextindex = cpu_to_le16(middle);
 1411                 if (!test_cflag(COMMIT_Nolink, ip)) {
 1412                         sxtlck->lwm.offset = (sxtlck->lwm.offset) ?
 1413                             min(middle, (int)sxtlck->lwm.offset) : middle;
 1414                 }
 1415 
 1416                 rp->header.nextindex = cpu_to_le16(XTENTRYSTART +
 1417                                                    righthalf + 1);
 1418         }
 1419 
 1420         if (!test_cflag(COMMIT_Nolink, ip)) {
 1421                 sxtlck->lwm.length = le16_to_cpu(sp->header.nextindex) -
 1422                     sxtlck->lwm.offset;
 1423 
 1424                 /* rxtlck->lwm.offset = XTENTRYSTART; */
 1425                 rxtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
 1426                     XTENTRYSTART;
 1427         }
 1428 
 1429         *rmpp = rmp;
 1430         *rbnp = rbn;
 1431 
 1432         ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
 1433 
 1434         jfs_info("xtSplitPage: sp:0x%p rp:0x%p", sp, rp);
 1435         return rc;
 1436 }
 1437 
 1438 
 1439 /*
 1440  *      xtSplitRoot()
 1441  *
 1442  * function:
 1443  *      split the full root page into
 1444  *      original/root/split page and new right page
 1445  *      i.e., root remains fixed in tree anchor (inode) and
 1446  *      the root is copied to a single new right child page
 1447  *      since root page << non-root page, and
 1448  *      the split root page contains a single entry for the
 1449  *      new right child page.
 1450  *
 1451  * parameter:
 1452  *      int             tid,
 1453  *      struct inode    *ip,
 1454  *      struct xtsplit  *split,
 1455  *      struct metapage **rmpp)
 1456  *
 1457  * return:
 1458  *      Pointer to page in which to insert or NULL on error.
 1459  */
 1460 static int
 1461 xtSplitRoot(tid_t tid,
 1462             struct inode *ip, struct xtsplit * split, struct metapage ** rmpp)
 1463 {
 1464         xtpage_t *sp;
 1465         struct metapage *rmp;
 1466         xtpage_t *rp;
 1467         s64 rbn;
 1468         int skip, nextindex;
 1469         xad_t *xad;
 1470         pxd_t *pxd;
 1471         struct pxdlist *pxdlist;
 1472         struct tlock *tlck;
 1473         struct xtlock *xtlck;
 1474 
 1475         sp = &JFS_IP(ip)->i_xtroot;
 1476 
 1477         INCREMENT(xtStat.split);
 1478 
 1479         /*
 1480          *      allocate a single (right) child page
 1481          */
 1482         pxdlist = split->pxdlist;
 1483         pxd = &pxdlist->pxd[pxdlist->npxd];
 1484         pxdlist->npxd++;
 1485         rbn = addressPXD(pxd);
 1486         rmp = get_metapage(ip, rbn, PSIZE, 1);
 1487         if (rmp == NULL)
 1488                 return EIO;
 1489 
 1490         jfs_info("xtSplitRoot: ip:0x%p rmp:0x%p", ip, rmp);
 1491 
 1492         /*
 1493          * acquire a transaction lock on the new right page;
 1494          *
 1495          * action: new page;
 1496          */
 1497         BT_MARK_DIRTY(rmp, ip);
 1498 
 1499         rp = (xtpage_t *) rmp->data;
 1500         rp->header.flag =
 1501             (sp->header.flag & BT_LEAF) ? BT_LEAF : BT_INTERNAL;
 1502         rp->header.self = *pxd;
 1503         rp->header.nextindex = cpu_to_le16(XTENTRYSTART);
 1504         rp->header.maxentry = cpu_to_le16(PSIZE >> L2XTSLOTSIZE);
 1505 
 1506         /* initialize sibling pointers */
 1507         rp->header.next = 0;
 1508         rp->header.prev = 0;
 1509 
 1510         /*
 1511          * copy the in-line root page into new right page extent
 1512          */
 1513         nextindex = le16_to_cpu(sp->header.maxentry);
 1514         memmove(&rp->xad[XTENTRYSTART], &sp->xad[XTENTRYSTART],
 1515                 (nextindex - XTENTRYSTART) << L2XTSLOTSIZE);
 1516 
 1517         /*
 1518          * insert the new entry into the new right/child page
 1519          * (skip index in the new right page will not change)
 1520          */
 1521         skip = split->index;
 1522         /* if insert into middle, shift right remaining entries */
 1523         if (skip != nextindex)
 1524                 memmove(&rp->xad[skip + 1], &rp->xad[skip],
 1525                         (nextindex - skip) * sizeof(xad_t));
 1526 
 1527         xad = &rp->xad[skip];
 1528         XT_PUTENTRY(xad, split->flag, split->off, split->len, split->addr);
 1529 
 1530         /* update page header */
 1531         rp->header.nextindex = cpu_to_le16(nextindex + 1);
 1532 
 1533         if (!test_cflag(COMMIT_Nolink, ip)) {
 1534                 tlck = txLock(tid, ip, rmp, tlckXTREE | tlckNEW);
 1535                 xtlck = (struct xtlock *) & tlck->lock;
 1536                 xtlck->lwm.offset = XTENTRYSTART;
 1537                 xtlck->lwm.length = le16_to_cpu(rp->header.nextindex) -
 1538                     XTENTRYSTART;
 1539         }
 1540 
 1541         /*
 1542          *      reset the root
 1543          *
 1544          * init root with the single entry for the new right page
 1545          * set the 1st entry offset to 0, which force the left-most key
 1546          * at any level of the tree to be less than any search key.
 1547          */
 1548         /*
 1549          * acquire a transaction lock on the root page (in-memory inode);
 1550          *
 1551          * action: root split;
 1552          */
 1553         BT_MARK_DIRTY(split->mp, ip);
 1554 
 1555         xad = &sp->xad[XTENTRYSTART];
 1556         XT_PUTENTRY(xad, XAD_NEW, 0, JFS_SBI(ip->i_sb)->nbperpage, rbn);
 1557 
 1558         /* update page header of root */
 1559         sp->header.flag &= ~BT_LEAF;
 1560         sp->header.flag |= BT_INTERNAL;
 1561 
 1562         sp->header.nextindex = cpu_to_le16(XTENTRYSTART + 1);
 1563 
 1564         if (!test_cflag(COMMIT_Nolink, ip)) {
 1565                 tlck = txLock(tid, ip, split->mp, tlckXTREE | tlckGROW);
 1566                 xtlck = (struct xtlock *) & tlck->lock;
 1567                 xtlck->lwm.offset = XTENTRYSTART;
 1568                 xtlck->lwm.length = 1;
 1569         }
 1570 
 1571         *rmpp = rmp;
 1572 
 1573         ip->i_blocks += LBLK2PBLK(ip->i_sb, lengthPXD(pxd));
 1574 
 1575         jfs_info("xtSplitRoot: sp:0x%p rp:0x%p", sp, rp);
 1576         return 0;
 1577 }
 1578 
 1579 
 1580 /*
 1581  *      xtExtend()
 1582  *
 1583  * function: extend in-place;
 1584  *
 1585  * note: existing extent may or may not have been committed.
 1586  * caller is responsible for pager buffer cache update, and
 1587  * working block allocation map update;
 1588  * update pmap: alloc whole extended extent;
 1589  */
 1590 int xtExtend(tid_t tid,         /* transaction id */
 1591              struct inode *ip, s64 xoff,        /* delta extent offset */
 1592              s32 xlen,          /* delta extent length */
 1593              int flag)
 1594 {
 1595         int rc = 0;
 1596         int cmp;
 1597         struct metapage *mp;    /* meta-page buffer */
 1598         xtpage_t *p;            /* base B+-tree index page */
 1599         s64 bn;
 1600         int index, nextindex, len;
 1601         struct btstack btstack; /* traverse stack */
 1602         struct xtsplit split;   /* split information */
 1603         xad_t *xad;
 1604         s64 xaddr;
 1605         struct tlock *tlck;
 1606         struct xtlock *xtlck = 0;
 1607         int rootsplit = 0;
 1608 
 1609         jfs_info("xtExtend: nxoff:0x%lx nxlen:0x%x", (ulong) xoff, xlen);
 1610 
 1611         /* there must exist extent to be extended */
 1612         if ((rc = xtSearch(ip, xoff - 1, &cmp, &btstack, XT_INSERT)))
 1613                 return rc;
 1614         assert(cmp == 0);
 1615 
 1616         /* retrieve search result */
 1617         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 1618 
 1619         /* extension must be contiguous */
 1620         xad = &p->xad[index];
 1621         assert((offsetXAD(xad) + lengthXAD(xad)) == xoff);
 1622 
 1623         /*
 1624          * acquire a transaction lock on the leaf page;
 1625          *
 1626          * action: xad insertion/extension;
 1627          */
 1628         BT_MARK_DIRTY(mp, ip);
 1629         if (!test_cflag(COMMIT_Nolink, ip)) {
 1630                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
 1631                 xtlck = (struct xtlock *) & tlck->lock;
 1632         }
 1633 
 1634         /* extend will overflow extent ? */
 1635         xlen = lengthXAD(xad) + xlen;
 1636         if ((len = xlen - MAXXLEN) <= 0)
 1637                 goto extendOld;
 1638 
 1639         /*
 1640          *      extent overflow: insert entry for new extent
 1641          */
 1642 //insertNew:
 1643         xoff = offsetXAD(xad) + MAXXLEN;
 1644         xaddr = addressXAD(xad) + MAXXLEN;
 1645         nextindex = le16_to_cpu(p->header.nextindex);
 1646 
 1647         /*
 1648          *      if the leaf page is full, insert the new entry and
 1649          *      propagate up the router entry for the new page from split
 1650          *
 1651          * The xtSplitUp() will insert the entry and unpin the leaf page.
 1652          */
 1653         if (nextindex == le16_to_cpu(p->header.maxentry)) {
 1654                 rootsplit = p->header.flag & BT_ROOT;
 1655 
 1656                 /* xtSpliUp() unpins leaf pages */
 1657                 split.mp = mp;
 1658                 split.index = index + 1;
 1659                 split.flag = XAD_NEW;
 1660                 split.off = xoff;       /* split offset */
 1661                 split.len = len;
 1662                 split.addr = xaddr;
 1663                 split.pxdlist = NULL;
 1664                 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
 1665                         return rc;
 1666 
 1667                 /*
 1668                  * if leaf root has been split, original root has been
 1669                  * copied to new child page, i.e., original entry now
 1670                  * resides on the new child page;
 1671                  */
 1672                 if (rootsplit) {
 1673                         if (p->header.nextindex ==
 1674                             cpu_to_le16(XTENTRYSTART + 1)) {
 1675                                 xad = &p->xad[XTENTRYSTART];
 1676                                 bn = addressXAD(xad);
 1677 
 1678                                 /* get new child page */
 1679                                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 1680 
 1681                                 BT_MARK_DIRTY(mp, ip);
 1682                                 if (!test_cflag(COMMIT_Nolink, ip)) {
 1683                                         tlck = txLock(tid, ip, mp,
 1684                                                       tlckXTREE |
 1685                                                       tlckGROW);
 1686                                         xtlck = (struct xtlock *) & tlck->lock;
 1687                                 }
 1688                         }
 1689                 } else
 1690                         /* get back old page */
 1691                         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 1692         }
 1693         /*
 1694          *      insert the new entry into the leaf page
 1695          */
 1696         else {
 1697                 /* insert the new entry: mark the entry NEW */
 1698                 xad = &p->xad[index + 1];
 1699                 XT_PUTENTRY(xad, XAD_NEW, xoff, len, xaddr);
 1700 
 1701                 /* advance next available entry index */
 1702                 p->header.nextindex =
 1703                     cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
 1704         }
 1705 
 1706         /* get back old entry */
 1707         xad = &p->xad[index];
 1708         xlen = MAXXLEN;
 1709 
 1710         /*
 1711          * extend old extent
 1712          */
 1713       extendOld:
 1714         XADlength(xad, xlen);
 1715         if (!(xad->flag & XAD_NEW))
 1716                 xad->flag |= XAD_EXTENDED;
 1717 
 1718         if (!test_cflag(COMMIT_Nolink, ip)) {
 1719                 xtlck->lwm.offset =
 1720                     (xtlck->lwm.offset) ? min(index,
 1721                                               (int)xtlck->lwm.offset) : index;
 1722                 xtlck->lwm.length =
 1723                     le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
 1724         }
 1725 
 1726         /* unpin the leaf page */
 1727         XT_PUTPAGE(mp);
 1728 
 1729         return rc;
 1730 }
 1731 
 1732 
 1733 /*
 1734  *      xtTailgate()
 1735  *
 1736  * function: split existing 'tail' extent
 1737  *      (split offset >= start offset of tail extent), and
 1738  *      relocate and extend the split tail half;
 1739  *
 1740  * note: existing extent may or may not have been committed.
 1741  * caller is responsible for pager buffer cache update, and
 1742  * working block allocation map update;
 1743  * update pmap: free old split tail extent, alloc new extent;
 1744  */
 1745 int xtTailgate(tid_t tid,               /* transaction id */
 1746                struct inode *ip, s64 xoff,      /* split/new extent offset */
 1747                s32 xlen,        /* new extent length */
 1748                s64 xaddr,       /* new extent address */
 1749                int flag)
 1750 {
 1751         int rc = 0;
 1752         int cmp;
 1753         struct metapage *mp;    /* meta-page buffer */
 1754         xtpage_t *p;            /* base B+-tree index page */
 1755         s64 bn;
 1756         int index, nextindex, llen, rlen;
 1757         struct btstack btstack; /* traverse stack */
 1758         struct xtsplit split;   /* split information */
 1759         xad_t *xad;
 1760         struct tlock *tlck;
 1761         struct xtlock *xtlck = 0;
 1762         struct tlock *mtlck;
 1763         struct maplock *pxdlock;
 1764         int rootsplit = 0;
 1765 
 1766 /*
 1767 printf("xtTailgate: nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
 1768         (ulong)xoff, xlen, (ulong)xaddr);
 1769 */
 1770 
 1771         /* there must exist extent to be tailgated */
 1772         if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
 1773                 return rc;
 1774         assert(cmp == 0);
 1775 
 1776         /* retrieve search result */
 1777         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 1778 
 1779         /* entry found must be last entry */
 1780         nextindex = le16_to_cpu(p->header.nextindex);
 1781         assert(index == nextindex - 1);
 1782 
 1783         BT_MARK_DIRTY(mp, ip);
 1784         /*
 1785          * acquire tlock of the leaf page containing original entry
 1786          */
 1787         if (!test_cflag(COMMIT_Nolink, ip)) {
 1788                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
 1789                 xtlck = (struct xtlock *) & tlck->lock;
 1790         }
 1791 
 1792         /* completely replace extent ? */
 1793         xad = &p->xad[index];
 1794 /*
 1795 printf("xtTailgate: xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
 1796         (ulong)offsetXAD(xad), lengthXAD(xad), (ulong)addressXAD(xad));
 1797 */
 1798         if ((llen = xoff - offsetXAD(xad)) == 0)
 1799                 goto updateOld;
 1800 
 1801         /*
 1802          *      partially replace extent: insert entry for new extent
 1803          */
 1804 //insertNew:
 1805         /*
 1806          *      if the leaf page is full, insert the new entry and
 1807          *      propagate up the router entry for the new page from split
 1808          *
 1809          * The xtSplitUp() will insert the entry and unpin the leaf page.
 1810          */
 1811         if (nextindex == le16_to_cpu(p->header.maxentry)) {
 1812                 rootsplit = p->header.flag & BT_ROOT;
 1813 
 1814                 /* xtSpliUp() unpins leaf pages */
 1815                 split.mp = mp;
 1816                 split.index = index + 1;
 1817                 split.flag = XAD_NEW;
 1818                 split.off = xoff;       /* split offset */
 1819                 split.len = xlen;
 1820                 split.addr = xaddr;
 1821                 split.pxdlist = NULL;
 1822                 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
 1823                         return rc;
 1824 
 1825                 /*
 1826                  * if leaf root has been split, original root has been
 1827                  * copied to new child page, i.e., original entry now
 1828                  * resides on the new child page;
 1829                  */
 1830                 if (rootsplit) {
 1831                         if (p->header.nextindex ==
 1832                             cpu_to_le16(XTENTRYSTART + 1)) {
 1833                                 xad = &p->xad[XTENTRYSTART];
 1834                                 bn = addressXAD(xad);
 1835 
 1836                                 /* get new child page */
 1837                                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 1838 
 1839                                 BT_MARK_DIRTY(mp, ip);
 1840                                 if (!test_cflag(COMMIT_Nolink, ip)) {
 1841                                         tlck = txLock(tid, ip, mp,
 1842                                                       tlckXTREE |
 1843                                                       tlckGROW);
 1844                                         xtlck = (struct xtlock *) & tlck->lock;
 1845                                 }
 1846                         }
 1847                 } else
 1848                         /* get back old page */
 1849                         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 1850         }
 1851         /*
 1852          *      insert the new entry into the leaf page
 1853          */
 1854         else {
 1855                 /* insert the new entry: mark the entry NEW */
 1856                 xad = &p->xad[index + 1];
 1857                 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
 1858 
 1859                 /* advance next available entry index */
 1860                 p->header.nextindex =
 1861                     cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
 1862         }
 1863 
 1864         /* get back old XAD */
 1865         xad = &p->xad[index];
 1866 
 1867         /*
 1868          * truncate/relocate old extent at split offset
 1869          */
 1870       updateOld:
 1871         /* update dmap for old/committed/truncated extent */
 1872         rlen = lengthXAD(xad) - llen;
 1873         if (!(xad->flag & XAD_NEW)) {
 1874                 /* free from PWMAP at commit */
 1875                 if (!test_cflag(COMMIT_Nolink, ip)) {
 1876                         mtlck = txMaplock(tid, ip, tlckMAP);
 1877                         pxdlock = (struct maplock *) & mtlck->lock;
 1878                         pxdlock->flag = mlckFREEPXD;
 1879                         PXDaddress(&pxdlock->pxd, addressXAD(xad) + llen);
 1880                         PXDlength(&pxdlock->pxd, rlen);
 1881                         pxdlock->index = 1;
 1882                 }
 1883         } else
 1884                 /* free from WMAP */
 1885                 dbFree(ip, addressXAD(xad) + llen, (s64) rlen);
 1886 
 1887         if (llen)
 1888                 /* truncate */
 1889                 XADlength(xad, llen);
 1890         else
 1891                 /* replace */
 1892                 XT_PUTENTRY(xad, XAD_NEW, xoff, xlen, xaddr);
 1893 
 1894         if (!test_cflag(COMMIT_Nolink, ip)) {
 1895                 xtlck->lwm.offset = (xtlck->lwm.offset) ?
 1896                     min(index, (int)xtlck->lwm.offset) : index;
 1897                 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
 1898                     xtlck->lwm.offset;
 1899         }
 1900 
 1901         /* unpin the leaf page */
 1902         XT_PUTPAGE(mp);
 1903 
 1904         return rc;
 1905 }
 1906 
 1907 
 1908 /*
 1909  *      xtUpdate()
 1910  *
 1911  * function: update XAD;
 1912  *
 1913  *      update extent for allocated_but_not_recorded or
 1914  *      compressed extent;
 1915  *
 1916  * parameter:
 1917  *      nxad    - new XAD;
 1918  *                logical extent of the specified XAD must be completely
 1919  *                contained by an existing XAD;
 1920  */
 1921 int xtUpdate(tid_t tid, struct inode *ip, xad_t * nxad)
 1922 {                               /* new XAD */
 1923         int rc = 0;
 1924         int cmp;
 1925         struct metapage *mp;    /* meta-page buffer */
 1926         xtpage_t *p;            /* base B+-tree index page */
 1927         s64 bn;
 1928         int index0, index, newindex, nextindex;
 1929         struct btstack btstack; /* traverse stack */
 1930         struct xtsplit split;   /* split information */
 1931         xad_t *xad, *lxad, *rxad;
 1932         int xflag;
 1933         s64 nxoff, xoff;
 1934         int nxlen, xlen, lxlen, rxlen;
 1935         s64 nxaddr, xaddr;
 1936         struct tlock *tlck;
 1937         struct xtlock *xtlck = 0;
 1938         int rootsplit = 0, newpage = 0;
 1939 
 1940         /* there must exist extent to be tailgated */
 1941         nxoff = offsetXAD(nxad);
 1942         nxlen = lengthXAD(nxad);
 1943         nxaddr = addressXAD(nxad);
 1944 /*
 1945 printf("xtUpdate: nxflag:0x%x nxoff:0x%lx nxlen:0x%x nxaddr:0x%lx\n",
 1946         nxad->flag, (ulong)nxoff, nxlen, (ulong)nxaddr);
 1947 */
 1948         if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
 1949                 return rc;
 1950         assert(cmp == 0);
 1951 
 1952         /* retrieve search result */
 1953         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
 1954 
 1955         BT_MARK_DIRTY(mp, ip);
 1956         /*
 1957          * acquire tlock of the leaf page containing original entry
 1958          */
 1959         if (!test_cflag(COMMIT_Nolink, ip)) {
 1960                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
 1961                 xtlck = (struct xtlock *) & tlck->lock;
 1962         }
 1963 
 1964         xad = &p->xad[index0];
 1965         xflag = xad->flag;
 1966         xoff = offsetXAD(xad);
 1967         xlen = lengthXAD(xad);
 1968         xaddr = addressXAD(xad);
 1969 /*
 1970 printf("xtUpdate: xflag:0x%x xoff:0x%lx xlen:0x%x xaddr:0x%lx\n",
 1971         xflag, (ulong)xoff, xlen, (ulong)xaddr);
 1972 */
 1973 
 1974         /* nXAD must be completely contained within XAD */
 1975         assert(xoff <= nxoff);
 1976         assert(nxoff + nxlen <= xoff + xlen);
 1977 
 1978         index = index0;
 1979         newindex = index + 1;
 1980         nextindex = le16_to_cpu(p->header.nextindex);
 1981 
 1982 #ifdef  _JFS_WIP_NOCOALESCE
 1983         if (xoff < nxoff)
 1984                 goto updateRight;
 1985 
 1986         /*
 1987          * replace XAD with nXAD
 1988          */
 1989       replace:                  /* (nxoff == xoff) */
 1990         if (nxlen == xlen) {
 1991                 /* replace XAD with nXAD:recorded */
 1992                 *xad = *nxad;
 1993                 xad->flag = xflag & ~XAD_NOTRECORDED;
 1994 
 1995                 goto out;
 1996         } else                  /* (nxlen < xlen) */
 1997                 goto updateLeft;
 1998 #endif                          /* _JFS_WIP_NOCOALESCE */
 1999 
 2000 /* #ifdef _JFS_WIP_COALESCE */
 2001         if (xoff < nxoff)
 2002                 goto coalesceRight;
 2003 
 2004         /*
 2005          * coalesce with left XAD
 2006          */
 2007 //coalesceLeft: /* (xoff == nxoff) */
 2008         /* is XAD first entry of page ? */
 2009         if (index == XTENTRYSTART)
 2010                 goto replace;
 2011 
 2012         /* is nXAD logically and physically contiguous with lXAD ? */
 2013         lxad = &p->xad[index - 1];
 2014         lxlen = lengthXAD(lxad);
 2015         if (!(lxad->flag & XAD_NOTRECORDED) &&
 2016             (nxoff == offsetXAD(lxad) + lxlen) &&
 2017             (nxaddr == addressXAD(lxad) + lxlen) &&
 2018             (lxlen + nxlen < MAXXLEN)) {
 2019                 /* extend right lXAD */
 2020                 index0 = index - 1;
 2021                 XADlength(lxad, lxlen + nxlen);
 2022 
 2023                 /* If we just merged two extents together, need to make sure the
 2024                  * right extent gets logged.  If the left one is marked XAD_NEW,
 2025                  * then we know it will be logged.  Otherwise, mark as
 2026                  * XAD_EXTENDED
 2027                  */
 2028                 if (!(lxad->flag & XAD_NEW))
 2029                         lxad->flag |= XAD_EXTENDED;
 2030 
 2031                 if (xlen > nxlen) {
 2032                         /* truncate XAD */
 2033                         XADoffset(xad, xoff + nxlen);
 2034                         XADlength(xad, xlen - nxlen);
 2035                         XADaddress(xad, xaddr + nxlen);
 2036                         goto out;
 2037                 } else {        /* (xlen == nxlen) */
 2038 
 2039                         /* remove XAD */
 2040                         if (index < nextindex - 1)
 2041                                 memmove(&p->xad[index], &p->xad[index + 1],
 2042                                         (nextindex - index -
 2043                                          1) << L2XTSLOTSIZE);
 2044 
 2045                         p->header.nextindex =
 2046                             cpu_to_le16(le16_to_cpu(p->header.nextindex) -
 2047                                         1);
 2048 
 2049                         index = index0;
 2050                         newindex = index + 1;
 2051                         nextindex = le16_to_cpu(p->header.nextindex);
 2052                         xoff = nxoff = offsetXAD(lxad);
 2053                         xlen = nxlen = lxlen + nxlen;
 2054                         xaddr = nxaddr = addressXAD(lxad);
 2055                         goto coalesceRight;
 2056                 }
 2057         }
 2058 
 2059         /*
 2060          * replace XAD with nXAD
 2061          */
 2062       replace:                  /* (nxoff == xoff) */
 2063         if (nxlen == xlen) {
 2064                 /* replace XAD with nXAD:recorded */
 2065                 *xad = *nxad;
 2066                 xad->flag = xflag & ~XAD_NOTRECORDED;
 2067 
 2068                 goto coalesceRight;
 2069         } else                  /* (nxlen < xlen) */
 2070                 goto updateLeft;
 2071 
 2072         /*
 2073          * coalesce with right XAD
 2074          */
 2075       coalesceRight:            /* (xoff <= nxoff) */
 2076         /* is XAD last entry of page ? */
 2077         if (newindex == nextindex) {
 2078                 if (xoff == nxoff)
 2079                         goto out;
 2080                 goto updateRight;
 2081         }
 2082 
 2083         /* is nXAD logically and physically contiguous with rXAD ? */
 2084         rxad = &p->xad[index + 1];
 2085         rxlen = lengthXAD(rxad);
 2086         if (!(rxad->flag & XAD_NOTRECORDED) &&
 2087             (nxoff + nxlen == offsetXAD(rxad)) &&
 2088             (nxaddr + nxlen == addressXAD(rxad)) &&
 2089             (rxlen + nxlen < MAXXLEN)) {
 2090                 /* extend left rXAD */
 2091                 XADoffset(rxad, nxoff);
 2092                 XADlength(rxad, rxlen + nxlen);
 2093                 XADaddress(rxad, nxaddr);
 2094 
 2095                 /* If we just merged two extents together, need to make sure
 2096                  * the left extent gets logged.  If the right one is marked
 2097                  * XAD_NEW, then we know it will be logged.  Otherwise, mark as
 2098                  * XAD_EXTENDED
 2099                  */
 2100                 if (!(rxad->flag & XAD_NEW))
 2101                         rxad->flag |= XAD_EXTENDED;
 2102 
 2103                 if (xlen > nxlen)
 2104                         /* truncate XAD */
 2105                         XADlength(xad, xlen - nxlen);
 2106                 else {          /* (xlen == nxlen) */
 2107 
 2108                         /* remove XAD */
 2109                         memmove(&p->xad[index], &p->xad[index + 1],
 2110                                 (nextindex - index - 1) << L2XTSLOTSIZE);
 2111 
 2112                         p->header.nextindex =
 2113                             cpu_to_le16(le16_to_cpu(p->header.nextindex) -
 2114                                         1);
 2115                 }
 2116 
 2117                 goto out;
 2118         } else if (xoff == nxoff)
 2119                 goto out;
 2120 
 2121         assert(xoff < nxoff);
 2122 /* #endif _JFS_WIP_COALESCE */
 2123 
 2124         /*
 2125          * split XAD into (lXAD, nXAD):
 2126          *
 2127          *          |---nXAD--->
 2128          * --|----------XAD----------|--
 2129          *   |-lXAD-|
 2130          */
 2131       updateRight:              /* (xoff < nxoff) */
 2132         /* truncate old XAD as lXAD:not_recorded */
 2133         xad = &p->xad[index];
 2134         XADlength(xad, nxoff - xoff);
 2135 
 2136         /* insert nXAD:recorded */
 2137         if (nextindex == le16_to_cpu(p->header.maxentry)) {
 2138 /*
 2139 printf("xtUpdate.updateRight.split p:0x%p\n", p);
 2140 */
 2141                 rootsplit = p->header.flag & BT_ROOT;
 2142 
 2143                 /* xtSpliUp() unpins leaf pages */
 2144                 split.mp = mp;
 2145                 split.index = newindex;
 2146                 split.flag = xflag & ~XAD_NOTRECORDED;
 2147                 split.off = nxoff;
 2148                 split.len = nxlen;
 2149                 split.addr = nxaddr;
 2150                 split.pxdlist = NULL;
 2151                 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
 2152                         return rc;
 2153 
 2154                 /*
 2155                  * if leaf root has been split, original root has been
 2156                  * copied to new child page, i.e., original entry now
 2157                  * resides on the new child page;
 2158                  */
 2159                 if (rootsplit) {
 2160                         if (p->header.nextindex ==
 2161                             cpu_to_le16(XTENTRYSTART + 1)) {
 2162                                 xad = &p->xad[XTENTRYSTART];
 2163                                 bn = addressXAD(xad);
 2164 
 2165                                 /* get new child page */
 2166                                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 2167 
 2168                                 BT_MARK_DIRTY(mp, ip);
 2169                                 if (!test_cflag(COMMIT_Nolink, ip)) {
 2170                                         tlck = txLock(tid, ip, mp,
 2171                                                       tlckXTREE |
 2172                                                       tlckGROW);
 2173                                         xtlck = (struct xtlock *) & tlck->lock;
 2174                                 }
 2175                         }
 2176                 } else {
 2177                         /* get back old page */
 2178                         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 2179 
 2180                         /* is nXAD on new page ? */
 2181                         if (newindex >
 2182                             (le16_to_cpu(p->header.maxentry) >> 1)) {
 2183                                 newindex =
 2184                                     newindex -
 2185                                     le16_to_cpu(p->header.nextindex) +
 2186                                     XTENTRYSTART;
 2187                                 newpage = 1;
 2188                         }
 2189                 }
 2190         } else {
 2191                 /* if insert into middle, shift right remaining entries */
 2192                 if (newindex < nextindex)
 2193                         memmove(&p->xad[newindex + 1], &p->xad[newindex],
 2194                                 (nextindex - newindex) << L2XTSLOTSIZE);
 2195 
 2196                 /* insert the entry */
 2197                 xad = &p->xad[newindex];
 2198                 *xad = *nxad;
 2199                 xad->flag = xflag & ~XAD_NOTRECORDED;
 2200 
 2201                 /* advance next available entry index. */
 2202                 p->header.nextindex =
 2203                     cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
 2204         }
 2205 
 2206         /*
 2207          * does nXAD force 3-way split ?
 2208          *
 2209          *          |---nXAD--->|
 2210          * --|----------XAD-------------|--
 2211          *   |-lXAD-|           |-rXAD -|
 2212          */
 2213         if (nxoff + nxlen == xoff + xlen)
 2214                 goto out;
 2215 
 2216         /* reorient nXAD as XAD for further split XAD into (nXAD, rXAD) */
 2217         if (newpage) {
 2218                 /* close out old page */
 2219                 if (!test_cflag(COMMIT_Nolink, ip)) {
 2220                         xtlck->lwm.offset = (xtlck->lwm.offset) ?
 2221                             min(index0, (int)xtlck->lwm.offset) : index0;
 2222                         xtlck->lwm.length =
 2223                             le16_to_cpu(p->header.nextindex) -
 2224                             xtlck->lwm.offset;
 2225                 }
 2226 
 2227                 bn = le64_to_cpu(p->header.next);
 2228                 XT_PUTPAGE(mp);
 2229 
 2230                 /* get new right page */
 2231                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 2232 
 2233                 BT_MARK_DIRTY(mp, ip);
 2234                 if (!test_cflag(COMMIT_Nolink, ip)) {
 2235                         tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
 2236                         xtlck = (struct xtlock *) & tlck->lock;
 2237                 }
 2238 
 2239                 index0 = index = newindex;
 2240         } else
 2241                 index++;
 2242 
 2243         newindex = index + 1;
 2244         nextindex = le16_to_cpu(p->header.nextindex);
 2245         xlen = xlen - (nxoff - xoff);
 2246         xoff = nxoff;
 2247         xaddr = nxaddr;
 2248 
 2249         /* recompute split pages */
 2250         if (nextindex == le16_to_cpu(p->header.maxentry)) {
 2251 /*
 2252 printf("xtUpdate: updateRight+Left recompute split pages: p:0x%p\n", p);
 2253 */
 2254                 XT_PUTPAGE(mp);
 2255 
 2256                 if ((rc = xtSearch(ip, nxoff, &cmp, &btstack, XT_INSERT)))
 2257                         return rc;
 2258                 assert(cmp == 0);
 2259 
 2260                 /* retrieve search result */
 2261                 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index0);
 2262                 assert(index0 == index);
 2263         }
 2264 
 2265         /*
 2266          * split XAD into (nXAD, rXAD)
 2267          *
 2268          *          ---nXAD---|
 2269          * --|----------XAD----------|--
 2270          *                    |-rXAD-|
 2271          */
 2272       updateLeft:               /* (nxoff == xoff) && (nxlen < xlen) */
 2273         /* update old XAD with nXAD:recorded */
 2274         xad = &p->xad[index];
 2275         *xad = *nxad;
 2276         xad->flag = xflag & ~XAD_NOTRECORDED;
 2277 
 2278         /* insert rXAD:not_recorded */
 2279         xoff = xoff + nxlen;
 2280         xlen = xlen - nxlen;
 2281         xaddr = xaddr + nxlen;
 2282         if (nextindex == le16_to_cpu(p->header.maxentry)) {
 2283                 rootsplit = p->header.flag & BT_ROOT;
 2284 
 2285 /*
 2286 printf("xtUpdate.updateLeft.split p:0x%p\n", p);
 2287 */
 2288                 /* xtSpliUp() unpins leaf pages */
 2289                 split.mp = mp;
 2290                 split.index = newindex;
 2291                 split.flag = xflag;
 2292                 split.off = xoff;
 2293                 split.len = xlen;
 2294                 split.addr = xaddr;
 2295                 split.pxdlist = NULL;
 2296                 if ((rc = xtSplitUp(tid, ip, &split, &btstack)))
 2297                         return rc;
 2298 
 2299                 /*
 2300                  * if leaf root has been split, original root has been
 2301                  * copied to new child page, i.e., original entry now
 2302                  * resides on the new child page;
 2303                  */
 2304                 if (rootsplit) {
 2305                         if (p->header.nextindex ==
 2306                             cpu_to_le16(XTENTRYSTART + 1)) {
 2307                                 xad = &p->xad[XTENTRYSTART];
 2308                                 bn = addressXAD(xad);
 2309 
 2310                                 /* get new child page */
 2311                                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 2312 
 2313                                 BT_MARK_DIRTY(mp, ip);
 2314                                 if (!test_cflag(COMMIT_Nolink, ip)) {
 2315                                         tlck = txLock(tid, ip, mp,
 2316                                                       tlckXTREE |
 2317                                                       tlckGROW);
 2318                                         xtlck = (struct xtlock *) & tlck->lock;
 2319                                 }
 2320                         }
 2321                 } else
 2322                         /* get back old page */
 2323                         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 2324         } else {
 2325                 /* if insert into middle, shift right remaining entries */
 2326                 if (newindex < nextindex)
 2327                         memmove(&p->xad[newindex + 1], &p->xad[newindex],
 2328                                 (nextindex - newindex) << L2XTSLOTSIZE);
 2329 
 2330                 /* insert the entry */
 2331                 xad = &p->xad[newindex];
 2332                 XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
 2333 
 2334                 /* advance next available entry index. */
 2335                 p->header.nextindex =
 2336                     cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
 2337         }
 2338 
 2339       out:
 2340         if (!test_cflag(COMMIT_Nolink, ip)) {
 2341                 xtlck->lwm.offset = (xtlck->lwm.offset) ?
 2342                     min(index0, (int)xtlck->lwm.offset) : index0;
 2343                 xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
 2344                     xtlck->lwm.offset;
 2345         }
 2346 
 2347         /* unpin the leaf page */
 2348         XT_PUTPAGE(mp);
 2349 
 2350         return rc;
 2351 }
 2352 
 2353 
 2354 /*
 2355  *      xtAppend()
 2356  *
 2357  * function: grow in append mode from contiguous region specified ;
 2358  *
 2359  * parameter:
 2360  *      tid             - transaction id;
 2361  *      ip              - file object;
 2362  *      xflag           - extent flag:
 2363  *      xoff            - extent offset;
 2364  *      maxblocks       - max extent length;
 2365  *      xlen            - extent length (in/out);
 2366  *      xaddrp          - extent address pointer (in/out):
 2367  *      flag            -
 2368  *
 2369  * return:
 2370  */
 2371 int xtAppend(tid_t tid,         /* transaction id */
 2372              struct inode *ip, int xflag, s64 xoff, s32 maxblocks,      
 2373              s32 * xlenp,       /* (in/out) */
 2374              s64 * xaddrp,      /* (in/out) */
 2375              int flag)
 2376 {
 2377         int rc = 0;
 2378         struct metapage *mp;    /* meta-page buffer */
 2379         xtpage_t *p;            /* base B+-tree index page */
 2380         s64 bn, xaddr;
 2381         int index, nextindex;
 2382         struct btstack btstack; /* traverse stack */
 2383         struct xtsplit split;   /* split information */
 2384         xad_t *xad;
 2385         int cmp;
 2386         struct tlock *tlck;
 2387         struct xtlock *xtlck;
 2388         int nsplit, nblocks, xlen;
 2389         struct pxdlist pxdlist;
 2390         pxd_t *pxd;
 2391 
 2392         xaddr = *xaddrp;
 2393         xlen = *xlenp;
 2394         jfs_info("xtAppend: xoff:0x%lx maxblocks:%d xlen:%d xaddr:0x%lx",
 2395                  (ulong) xoff, maxblocks, xlen, (ulong) xaddr);
 2396 
 2397         /*
 2398          *      search for the entry location at which to insert:
 2399          *
 2400          * xtFastSearch() and xtSearch() both returns (leaf page
 2401          * pinned, index at which to insert).
 2402          * n.b. xtSearch() may return index of maxentry of
 2403          * the full page.
 2404          */
 2405         if ((rc = xtSearch(ip, xoff, &cmp, &btstack, XT_INSERT)))
 2406                 return rc;
 2407 
 2408         /* retrieve search result */
 2409         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 2410 
 2411         if (cmp == 0) {
 2412                 rc = EEXIST;
 2413                 goto out;
 2414         }
 2415 //insert:
 2416         /*
 2417          *      insert entry for new extent
 2418          */
 2419         xflag |= XAD_NEW;
 2420 
 2421         /*
 2422          *      if the leaf page is full, split the page and
 2423          *      propagate up the router entry for the new page from split
 2424          *
 2425          * The xtSplitUp() will insert the entry and unpin the leaf page.
 2426          */
 2427         nextindex = le16_to_cpu(p->header.nextindex);
 2428         if (nextindex < le16_to_cpu(p->header.maxentry))
 2429                 goto insertLeaf;
 2430 
 2431         /*
 2432          * allocate new index blocks to cover index page split(s)
 2433          */
 2434         nsplit = btstack.nsplit;
 2435         split.pxdlist = &pxdlist;
 2436         pxdlist.maxnpxd = pxdlist.npxd = 0;
 2437         pxd = &pxdlist.pxd[0];
 2438         nblocks = JFS_SBI(ip->i_sb)->nbperpage;
 2439         for (; nsplit > 0; nsplit--, pxd++, xaddr += nblocks, maxblocks -= nblocks) {   
 2440                 if ((rc = dbAllocBottomUp(ip, xaddr, (s64) nblocks)) == 0) {
 2441                         PXDaddress(pxd, xaddr);
 2442                         PXDlength(pxd, nblocks);
 2443 
 2444                         pxdlist.maxnpxd++;
 2445 
 2446                         continue;
 2447                 }
 2448 
 2449                 /* undo allocation */
 2450 
 2451                 goto out;
 2452         }
 2453 
 2454         xlen = min(xlen, maxblocks);    
 2455 
 2456         /*
 2457          * allocate data extent requested
 2458          */
 2459         if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
 2460                 goto out;
 2461 
 2462         split.mp = mp;
 2463         split.index = index;
 2464         split.flag = xflag;
 2465         split.off = xoff;
 2466         split.len = xlen;
 2467         split.addr = xaddr;
 2468         if ((rc = xtSplitUp(tid, ip, &split, &btstack))) {
 2469                 /* undo data extent allocation */
 2470                 dbFree(ip, *xaddrp, (s64) * xlenp);
 2471 
 2472                 return rc;
 2473         }
 2474 
 2475         *xaddrp = xaddr;
 2476         *xlenp = xlen;
 2477         return 0;
 2478 
 2479         /*
 2480          *      insert the new entry into the leaf page
 2481          */
 2482       insertLeaf:
 2483         /*
 2484          * allocate data extent requested
 2485          */
 2486         if ((rc = dbAllocBottomUp(ip, xaddr, (s64) xlen)))
 2487                 goto out;
 2488 
 2489         BT_MARK_DIRTY(mp, ip);
 2490         /*
 2491          * acquire a transaction lock on the leaf page;
 2492          *
 2493          * action: xad insertion/extension;
 2494          */
 2495         tlck = txLock(tid, ip, mp, tlckXTREE | tlckGROW);
 2496         xtlck = (struct xtlock *) & tlck->lock;
 2497 
 2498         /* insert the new entry: mark the entry NEW */
 2499         xad = &p->xad[index];
 2500         XT_PUTENTRY(xad, xflag, xoff, xlen, xaddr);
 2501 
 2502         /* advance next available entry index */
 2503         p->header.nextindex =
 2504             cpu_to_le16(le16_to_cpu(p->header.nextindex) + 1);
 2505 
 2506         xtlck->lwm.offset =
 2507             (xtlck->lwm.offset) ? min(index,(int) xtlck->lwm.offset) : index;
 2508         xtlck->lwm.length = le16_to_cpu(p->header.nextindex) -
 2509             xtlck->lwm.offset;
 2510 
 2511         *xaddrp = xaddr;
 2512         *xlenp = xlen;
 2513 
 2514       out:
 2515         /* unpin the leaf page */
 2516         XT_PUTPAGE(mp);
 2517 
 2518         return rc;
 2519 }
 2520 #ifdef _STILL_TO_PORT
 2521 
 2522 /* - TBD for defragmentaion/reorganization -
 2523  *
 2524  *      xtDelete()
 2525  *
 2526  * function:
 2527  *      delete the entry with the specified key.
 2528  *
 2529  *      N.B.: whole extent of the entry is assumed to be deleted.
 2530  *
 2531  * parameter:
 2532  *
 2533  * return:
 2534  *       ENOENT: if the entry is not found.
 2535  *
 2536  * exception:
 2537  */
 2538 int xtDelete(tid_t tid, struct inode *ip, s64 xoff, s32 xlen, int flag)
 2539 {
 2540         int rc = 0;
 2541         struct btstack btstack;
 2542         int cmp;
 2543         s64 bn;
 2544         struct metapage *mp;
 2545         xtpage_t *p;
 2546         int index, nextindex;
 2547         struct tlock *tlck;
 2548         struct xtlock *xtlck;
 2549 
 2550         /*
 2551          * find the matching entry; xtSearch() pins the page
 2552          */
 2553         if ((rc = xtSearch(ip, xoff, &cmp, &btstack, 0)))
 2554                 return rc;
 2555 
 2556         XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 2557         if (cmp) {
 2558                 /* unpin the leaf page */
 2559                 XT_PUTPAGE(mp);
 2560                 return ENOENT;
 2561         }
 2562 
 2563         /*
 2564          * delete the entry from the leaf page
 2565          */
 2566         nextindex = le16_to_cpu(p->header.nextindex);
 2567         p->header.nextindex =
 2568             cpu_to_le16(le16_to_cpu(p->header.nextindex) - 1);
 2569 
 2570         /*
 2571          * if the leaf page bocome empty, free the page
 2572          */
 2573         if (p->header.nextindex == cpu_to_le16(XTENTRYSTART))
 2574                 return (xtDeleteUp(tid, ip, mp, p, &btstack));
 2575 
 2576         BT_MARK_DIRTY(mp, ip);
 2577         /*
 2578          * acquire a transaction lock on the leaf page;
 2579          *
 2580          * action:xad deletion;
 2581          */
 2582         tlck = txLock(tid, ip, mp, tlckXTREE);
 2583         xtlck = (struct xtlock *) & tlck->lock;
 2584         xtlck->lwm.offset =
 2585             (xtlck->lwm.offset) ? min(index, xtlck->lwm.offset) : index;
 2586 
 2587         /* if delete from middle, shift left/compact the remaining entries */
 2588         if (index < nextindex - 1)
 2589                 memmove(&p->xad[index], &p->xad[index + 1],
 2590                         (nextindex - index - 1) * sizeof(xad_t));
 2591 
 2592         XT_PUTPAGE(mp);
 2593 
 2594         return 0;
 2595 }
 2596 
 2597 
 2598 /* - TBD for defragmentaion/reorganization -
 2599  *
 2600  *      xtDeleteUp()
 2601  *
 2602  * function:
 2603  *      free empty pages as propagating deletion up the tree
 2604  *
 2605  * parameter:
 2606  *
 2607  * return:
 2608  */
 2609 static int
 2610 xtDeleteUp(tid_t tid, struct inode *ip,
 2611            struct metapage * fmp, xtpage_t * fp, struct btstack * btstack)
 2612 {
 2613         int rc = 0;
 2614         struct metapage *mp;
 2615         xtpage_t *p;
 2616         int index, nextindex;
 2617         s64 xaddr;
 2618         int xlen;
 2619         struct btframe *parent;
 2620         struct tlock *tlck;
 2621         struct xtlock *xtlck;
 2622 
 2623         /*
 2624          * keep root leaf page which has become empty
 2625          */
 2626         if (fp->header.flag & BT_ROOT) {
 2627                 /* keep the root page */
 2628                 fp->header.flag &= ~BT_INTERNAL;
 2629                 fp->header.flag |= BT_LEAF;
 2630                 fp->header.nextindex = cpu_to_le16(XTENTRYSTART);
 2631 
 2632                 /* XT_PUTPAGE(fmp); */
 2633 
 2634                 return 0;
 2635         }
 2636 
 2637         /*
 2638          * free non-root leaf page
 2639          */
 2640         if ((rc = xtRelink(tid, ip, fp)))
 2641                 return rc;
 2642 
 2643         xaddr = addressPXD(&fp->header.self);
 2644         xlen = lengthPXD(&fp->header.self);
 2645         /* free the page extent */
 2646         dbFree(ip, xaddr, (s64) xlen);
 2647 
 2648         /* free the buffer page */
 2649         discard_metapage(fmp);
 2650 
 2651         /*
 2652          * propagate page deletion up the index tree
 2653          *
 2654          * If the delete from the parent page makes it empty,
 2655          * continue all the way up the tree.
 2656          * stop if the root page is reached (which is never deleted) or
 2657          * if the entry deletion does not empty the page.
 2658          */
 2659         while ((parent = BT_POP(btstack)) != NULL) {
 2660                 /* get/pin the parent page <sp> */
 2661                 XT_GETPAGE(ip, parent->bn, mp, PSIZE, p, rc);
 2662                 if (rc)
 2663                         return rc;
 2664 
 2665                 index = parent->index;
 2666 
 2667                 /* delete the entry for the freed child page from parent.
 2668                  */
 2669                 nextindex = le16_to_cpu(p->header.nextindex);
 2670 
 2671                 /*
 2672                  * the parent has the single entry being deleted:
 2673                  * free the parent page which has become empty.
 2674                  */
 2675                 if (nextindex == 1) {
 2676                         if (p->header.flag & BT_ROOT) {
 2677                                 /* keep the root page */
 2678                                 p->header.flag &= ~BT_INTERNAL;
 2679                                 p->header.flag |= BT_LEAF;
 2680                                 p->header.nextindex =
 2681                                     cpu_to_le16(XTENTRYSTART);
 2682 
 2683                                 /* XT_PUTPAGE(fmp); */
 2684 
 2685                                 break;
 2686                         } else {
 2687                                 /* free the parent page */
 2688                                 if ((rc = xtRelink(tid, ip, p)))
 2689                                         return rc;
 2690 
 2691                                 xaddr = addressPXD(&p->header.self);
 2692                                 /* free the page extent */
 2693                                 dbFree(ip, xaddr,
 2694                                        (s64) JFS_SBI(ip->i_sb)->nbperpage);
 2695 
 2696                                 /* unpin/free the buffer page */
 2697                                 discard_metapage(fmp);
 2698 
 2699                                 /* propagate up */
 2700                                 continue;
 2701                         }
 2702                 }
 2703                 /*
 2704                  * the parent has other entries remaining:
 2705                  * delete the router entry from the parent page.
 2706                  */
 2707                 else {
 2708                         BT_MARK_DIRTY(mp, ip);
 2709                         /*
 2710                          * acquire a transaction lock on the leaf page;
 2711                          *
 2712                          * action:xad deletion;
 2713                          */
 2714                         tlck = txLock(tid, ip, mp, tlckXTREE);
 2715                         xtlck = (struct xtlock *) & tlck->lock;
 2716                         xtlck->lwm.offset =
 2717                             (xtlck->lwm.offset) ? min(index,
 2718                                                       xtlck->lwm.
 2719                                                       offset) : index;
 2720 
 2721                         /* if delete from middle,
 2722                          * shift left/compact the remaining entries in the page
 2723                          */
 2724                         if (index < nextindex - 1)
 2725                                 memmove(&p->xad[index], &p->xad[index + 1],
 2726                                         (nextindex - index -
 2727                                          1) << L2XTSLOTSIZE);
 2728 
 2729                         p->header.nextindex =
 2730                             cpu_to_le16(le16_to_cpu(p->header.nextindex) -
 2731                                         1);
 2732                         jfs_info("xtDeleteUp(entry): 0x%lx[%d]",
 2733                                  (ulong) parent->bn, index);
 2734                 }
 2735 
 2736                 /* unpin the parent page */
 2737                 XT_PUTPAGE(mp);
 2738 
 2739                 /* exit propagation up */
 2740                 break;
 2741         }
 2742 
 2743         return 0;
 2744 }
 2745 
 2746 
 2747 /*
 2748  * NAME:        xtRelocate()
 2749  *
 2750  * FUNCTION:    relocate xtpage or data extent of regular file;
 2751  *              This function is mainly used by defragfs utility.
 2752  *
 2753  * NOTE:        This routine does not have the logic to handle
 2754  *              uncommitted allocated extent. The caller should call
 2755  *              txCommit() to commit all the allocation before call
 2756  *              this routine.
 2757  */
 2758 xtRelocate(tid_t tid, struct inode * ip, xad_t * oxad,  /* old XAD */
 2759            s64 nxaddr,          /* new xaddr */
 2760            int xtype)
 2761 {                               /* extent type: XTPAGE or DATAEXT */
 2762         int rc = 0;
 2763         struct tblock *tblk;
 2764         struct tlock *tlck;
 2765         struct xtlock *xtlck;
 2766         struct metapage *mp, *pmp, *lmp, *rmp;  /* meta-page buffer */
 2767         xtpage_t *p, *pp, *rp, *lp;     /* base B+-tree index page */
 2768         xad_t *xad;
 2769         pxd_t *pxd;
 2770         s64 xoff, xsize;
 2771         int xlen;
 2772         s64 oxaddr, sxaddr, dxaddr, nextbn, prevbn;
 2773         cbuf_t *cp;
 2774         s64 offset, nbytes, nbrd, pno;
 2775         int nb, npages, nblks;
 2776         s64 bn;
 2777         int cmp;
 2778         int index;
 2779         struct pxd_lock *pxdlock;
 2780         struct btstack btstack; /* traverse stack */
 2781 
 2782         xtype = xtype & EXTENT_TYPE;
 2783 
 2784         xoff = offsetXAD(oxad);
 2785         oxaddr = addressXAD(oxad);
 2786         xlen = lengthXAD(oxad);
 2787 
 2788         /* validate extent offset */
 2789         offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
 2790         if (offset >= ip->i_size)
 2791                 return ESTALE;  /* stale extent */
 2792 
 2793         jfs_info("xtRelocate: xtype:%d xoff:0x%lx xlen:0x%x xaddr:0x%lx:0x%lx",
 2794                  xtype, (ulong) xoff, xlen, (ulong) oxaddr, (ulong) nxaddr);
 2795 
 2796         /*
 2797          *      1. get and validate the parent xtpage/xad entry
 2798          *      covering the source extent to be relocated;
 2799          */
 2800         if (xtype == DATAEXT) {
 2801                 /* search in leaf entry */
 2802                 rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
 2803                 if (rc)
 2804                         return rc;
 2805                 if (cmp) {
 2806                         XT_PUTPAGE(pmp);
 2807                         return ESTALE;
 2808                 }
 2809 
 2810                 /* retrieve search result */
 2811                 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
 2812 
 2813                 /* validate for exact match with a single entry */
 2814                 xad = &pp->xad[index];
 2815                 if (addressXAD(xad) != oxaddr || lengthXAD(xad) != xlen) {
 2816                         XT_PUTPAGE(pmp);
 2817                         return ESTALE;
 2818                 }
 2819         } else {                /* (xtype == XTPAGE) */
 2820 
 2821                 /* search in internal entry */
 2822                 rc = xtSearchNode(ip, oxad, &cmp, &btstack, 0);
 2823                 if (rc)
 2824                         return rc;
 2825                 if (cmp) {
 2826                         XT_PUTPAGE(pmp);
 2827                         return ESTALE;
 2828                 }
 2829 
 2830                 /* retrieve search result */
 2831                 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
 2832 
 2833                 /* xtSearchNode() validated for exact match with a single entry
 2834                  */
 2835                 xad = &pp->xad[index];
 2836         }
 2837         jfs_info("xtRelocate: parent xad entry validated.");
 2838 
 2839         /*
 2840          *      2. relocate the extent
 2841          */
 2842         if (xtype == DATAEXT) {
 2843                 /* if the extent is allocated-but-not-recorded
 2844                  * there is no real data to be moved in this extent,
 2845                  */
 2846                 if (xad->flag & XAD_NOTRECORDED)
 2847                         goto out;
 2848                 else
 2849                         /* release xtpage for cmRead()/xtLookup() */
 2850                         XT_PUTPAGE(pmp);
 2851 
 2852                 /*
 2853                  *      cmRelocate()
 2854                  *
 2855                  * copy target data pages to be relocated;
 2856                  *
 2857                  * data extent must start at page boundary and
 2858                  * multiple of page size (except the last data extent);
 2859                  * read in each page of the source data extent into cbuf,
 2860                  * update the cbuf extent descriptor of the page to be
 2861                  * homeward bound to new dst data extent
 2862                  * copy the data from the old extent to new extent.
 2863                  * copy is essential for compressed files to avoid problems
 2864                  * that can arise if there was a change in compression
 2865                  * algorithms.
 2866                  * it is a good strategy because it may disrupt cache
 2867                  * policy to keep the pages in memory afterwards.
 2868                  */
 2869                 offset = xoff << JFS_SBI(ip->i_sb)->l2bsize;
 2870                 assert((offset & CM_OFFSET) == 0);
 2871                 nbytes = xlen << JFS_SBI(ip->i_sb)->l2bsize;
 2872                 pno = offset >> CM_L2BSIZE;
 2873                 npages = (nbytes + (CM_BSIZE - 1)) >> CM_L2BSIZE;
 2874 /*
 2875                 npages = ((offset + nbytes - 1) >> CM_L2BSIZE) -
 2876                          (offset >> CM_L2BSIZE) + 1;
 2877 */
 2878                 sxaddr = oxaddr;
 2879                 dxaddr = nxaddr;
 2880 
 2881                 /* process the request one cache buffer at a time */
 2882                 for (nbrd = 0; nbrd < nbytes; nbrd += nb,
 2883                      offset += nb, pno++, npages--) {
 2884                         /* compute page size */
 2885                         nb = min(nbytes - nbrd, CM_BSIZE);
 2886 
 2887                         /* get the cache buffer of the page */
 2888                         if (rc = cmRead(ip, offset, npages, &cp))
 2889                                 break;
 2890 
 2891                         assert(addressPXD(&cp->cm_pxd) == sxaddr);
 2892                         assert(!cp->cm_modified);
 2893 
 2894                         /* bind buffer with the new extent address */
 2895                         nblks = nb >> JFS_IP(ip->i_sb)->l2bsize;
 2896                         cmSetXD(ip, cp, pno, dxaddr, nblks);
 2897 
 2898                         /* release the cbuf, mark it as modified */
 2899                         cmPut(cp, TRUE);
 2900 
 2901                         dxaddr += nblks;
 2902                         sxaddr += nblks;
 2903                 }
 2904 
 2905                 /* get back parent page */
 2906                 rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
 2907                 XT_GETSEARCH(ip, btstack.top, bn, pmp, pp, index);
 2908                 jfs_info("xtRelocate: target data extent relocated.");
 2909         } else {                /* (xtype  == XTPAGE) */
 2910 
 2911                 /*
 2912                  * read in the target xtpage from the source extent;
 2913                  */
 2914                 XT_GETPAGE(ip, oxaddr, mp, PSIZE, p, rc);
 2915                 if (rc) {
 2916                         XT_PUTPAGE(pmp);
 2917                         return rc;
 2918                 }
 2919 
 2920                 /*
 2921                  * read in sibling pages if any to update sibling pointers;
 2922                  */
 2923                 rmp = NULL;
 2924                 if (p->header.next) {
 2925                         nextbn = le64_to_cpu(p->header.next);
 2926                         XT_GETPAGE(ip, nextbn, rmp, PSIZE, rp, rc);
 2927                         if (rc) {
 2928                                 XT_PUTPAGE(pmp);
 2929                                 XT_PUTPAGE(mp);
 2930                                 return (rc);
 2931                         }
 2932                 }
 2933 
 2934                 lmp = NULL;
 2935                 if (p->header.prev) {
 2936                         prevbn = le64_to_cpu(p->header.prev);
 2937                         XT_GETPAGE(ip, prevbn, lmp, PSIZE, lp, rc);
 2938                         if (rc) {
 2939                                 XT_PUTPAGE(pmp);
 2940                                 XT_PUTPAGE(mp);
 2941                                 if (rmp)
 2942                                         XT_PUTPAGE(rmp);
 2943                                 return (rc);
 2944                         }
 2945                 }
 2946 
 2947                 /* at this point, all xtpages to be updated are in memory */
 2948 
 2949                 /*
 2950                  * update sibling pointers of sibling xtpages if any;
 2951                  */
 2952                 if (lmp) {
 2953                         BT_MARK_DIRTY(lmp, ip);
 2954                         tlck =
 2955                             txLock(tid, ip, lmp, tlckXTREE | tlckRELINK);
 2956                         lp->header.next = cpu_to_le64(nxaddr);
 2957                         XT_PUTPAGE(lmp);
 2958                 }
 2959 
 2960                 if (rmp) {
 2961                         BT_MARK_DIRTY(rmp, ip);
 2962                         tlck =
 2963                             txLock(tid, ip, rmp, tlckXTREE | tlckRELINK);
 2964                         rp->header.prev = cpu_to_le64(nxaddr);
 2965                         XT_PUTPAGE(rmp);
 2966                 }
 2967 
 2968                 /*
 2969                  * update the target xtpage to be relocated
 2970                  *
 2971                  * update the self address of the target page
 2972                  * and write to destination extent;
 2973                  * redo image covers the whole xtpage since it is new page
 2974                  * to the destination extent;
 2975                  * update of bmap for the free of source extent
 2976                  * of the target xtpage itself:
 2977                  * update of bmap for the allocation of destination extent
 2978                  * of the target xtpage itself:
 2979                  * update of bmap for the extents covered by xad entries in
 2980                  * the target xtpage is not necessary since they are not
 2981                  * updated;
 2982                  * if not committed before this relocation,
 2983                  * target page may contain XAD_NEW entries which must
 2984                  * be scanned for bmap update (logredo() always
 2985                  * scan xtpage REDOPAGE image for bmap update);
 2986                  * if committed before this relocation (tlckRELOCATE),
 2987                  * scan may be skipped by commit() and logredo();
 2988                  */
 2989                 BT_MARK_DIRTY(mp, ip);
 2990                 /* tlckNEW init  xtlck->lwm.offset = XTENTRYSTART; */
 2991                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckNEW);
 2992                 xtlck = (struct xtlock *) & tlck->lock;
 2993 
 2994                 /* update the self address in the xtpage header */
 2995                 pxd = &p->header.self;
 2996                 PXDaddress(pxd, nxaddr);
 2997 
 2998                 /* linelock for the after image of the whole page */
 2999                 xtlck->lwm.length =
 3000                     le16_to_cpu(p->header.nextindex) - xtlck->lwm.offset;
 3001 
 3002                 /* update the buffer extent descriptor of target xtpage */
 3003                 xsize = xlen << JFS_SBI(ip->i_sb)->l2bsize;
 3004                 bmSetXD(mp, nxaddr, xsize);
 3005 
 3006                 /* unpin the target page to new homeward bound */
 3007                 XT_PUTPAGE(mp);
 3008                 jfs_info("xtRelocate: target xtpage relocated.");
 3009         }
 3010 
 3011         /*
 3012          *      3. acquire maplock for the source extent to be freed;
 3013          *
 3014          * acquire a maplock saving the src relocated extent address;
 3015          * to free of the extent at commit time;
 3016          */
 3017       out:
 3018         /* if DATAEXT relocation, write a LOG_UPDATEMAP record for
 3019          * free PXD of the source data extent (logredo() will update
 3020          * bmap for free of source data extent), and update bmap for
 3021          * free of the source data extent;
 3022          */
 3023         if (xtype == DATAEXT)
 3024                 tlck = txMaplock(tid, ip, tlckMAP);
 3025         /* if XTPAGE relocation, write a LOG_NOREDOPAGE record
 3026          * for the source xtpage (logredo() will init NoRedoPage
 3027          * filter and will also update bmap for free of the source
 3028          * xtpage), and update bmap for free of the source xtpage;
 3029          * N.B. We use tlckMAP instead of tlkcXTREE because there
 3030          *      is no buffer associated with this lock since the buffer
 3031          *      has been redirected to the target location.
 3032          */
 3033         else                    /* (xtype  == XTPAGE) */
 3034                 tlck = txMaplock(tid, ip, tlckMAP | tlckRELOCATE);
 3035 
 3036         pxdlock = (struct pxd_lock *) & tlck->lock;
 3037         pxdlock->flag = mlckFREEPXD;
 3038         PXDaddress(&pxdlock->pxd, oxaddr);
 3039         PXDlength(&pxdlock->pxd, xlen);
 3040         pxdlock->index = 1;
 3041 
 3042         /*
 3043          *      4. update the parent xad entry for relocation;
 3044          *
 3045          * acquire tlck for the parent entry with XAD_NEW as entry
 3046          * update which will write LOG_REDOPAGE and update bmap for
 3047          * allocation of XAD_NEW destination extent;
 3048          */
 3049         jfs_info("xtRelocate: update parent xad entry.");
 3050         BT_MARK_DIRTY(pmp, ip);
 3051         tlck = txLock(tid, ip, pmp, tlckXTREE | tlckGROW);
 3052         xtlck = (struct xtlock *) & tlck->lock;
 3053 
 3054         /* update the XAD with the new destination extent; */
 3055         xad = &pp->xad[index];
 3056         xad->flag |= XAD_NEW;
 3057         XADaddress(xad, nxaddr);
 3058 
 3059         xtlck->lwm.offset = min(index, xtlck->lwm.offset);
 3060         xtlck->lwm.length = le16_to_cpu(pp->header.nextindex) -
 3061             xtlck->lwm.offset;
 3062 
 3063         /* unpin the parent xtpage */
 3064         XT_PUTPAGE(pmp);
 3065 
 3066         return rc;
 3067 }
 3068 
 3069 
 3070 /*
 3071  *      xtSearchNode()
 3072  *
 3073  * function:    search for the internal xad entry covering specified extent.
 3074  *              This function is mainly used by defragfs utility.
 3075  *
 3076  * parameters:
 3077  *      ip      - file object;
 3078  *      xad     - extent to find;
 3079  *      cmpp    - comparison result:
 3080  *      btstack - traverse stack;
 3081  *      flag    - search process flag;
 3082  *
 3083  * returns:
 3084  *      btstack contains (bn, index) of search path traversed to the entry.
 3085  *      *cmpp is set to result of comparison with the entry returned.
 3086  *      the page containing the entry is pinned at exit.
 3087  */
 3088 static int xtSearchNode(struct inode *ip, xad_t * xad,  /* required XAD entry */
 3089                         int *cmpp, struct btstack * btstack, int flag)
 3090 {
 3091         int rc = 0;
 3092         s64 xoff, xaddr;
 3093         int xlen;
 3094         int cmp = 1;            /* init for empty page */
 3095         s64 bn;                 /* block number */
 3096         struct metapage *mp;    /* meta-page buffer */
 3097         xtpage_t *p;            /* page */
 3098         int base, index, lim;
 3099         struct btframe *btsp;
 3100         s64 t64;
 3101 
 3102         BT_CLR(btstack);
 3103 
 3104         xoff = offsetXAD(xad);
 3105         xlen = lengthXAD(xad);
 3106         xaddr = addressXAD(xad);
 3107 
 3108         /*
 3109          *      search down tree from root:
 3110          *
 3111          * between two consecutive entries of <Ki, Pi> and <Kj, Pj> of
 3112          * internal page, child page Pi contains entry with k, Ki <= K < Kj.
 3113          *
 3114          * if entry with search key K is not found
 3115          * internal page search find the entry with largest key Ki
 3116          * less than K which point to the child page to search;
 3117          * leaf page search find the entry with smallest key Kj
 3118          * greater than K so that the returned index is the position of
 3119          * the entry to be shifted right for insertion of new entry.
 3120          * for empty tree, search key is greater than any key of the tree.
 3121          *
 3122          * by convention, root bn = 0.
 3123          */
 3124         for (bn = 0;;) {
 3125                 /* get/pin the page to search */
 3126                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 3127                 if (rc)
 3128                         return rc;
 3129                 if (p->header.flag & BT_LEAF)
 3130                         return ESTALE;
 3131 
 3132                 lim = le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
 3133 
 3134                 /*
 3135                  * binary search with search key K on the current page
 3136                  */
 3137                 for (base = XTENTRYSTART; lim; lim >>= 1) {
 3138                         index = base + (lim >> 1);
 3139 
 3140                         XT_CMP(cmp, xoff, &p->xad[index], t64);
 3141                         if (cmp == 0) {
 3142                                 /*
 3143                                  *      search hit
 3144                                  *
 3145                                  * verify for exact match;
 3146                                  */
 3147                                 if (xaddr == addressXAD(&p->xad[index]) &&
 3148                                     xoff == offsetXAD(&p->xad[index])) {
 3149                                         *cmpp = cmp;
 3150 
 3151                                         /* save search result */
 3152                                         btsp = btstack->top;
 3153                                         btsp->bn = bn;
 3154                                         btsp->index = index;
 3155                                         btsp->mp = mp;
 3156 
 3157                                         return 0;
 3158                                 }
 3159 
 3160                                 /* descend/search its child page */
 3161                                 goto next;
 3162                         }
 3163 
 3164                         if (cmp > 0) {
 3165                                 base = index + 1;
 3166                                 --lim;
 3167                         }
 3168                 }
 3169 
 3170                 /*
 3171                  *      search miss - non-leaf page:
 3172                  *
 3173                  * base is the smallest index with key (Kj) greater than
 3174                  * search key (K) and may be zero or maxentry index.
 3175                  * if base is non-zero, decrement base by one to get the parent
 3176                  * entry of the child page to search.
 3177                  */
 3178                 index = base ? base - 1 : base;
 3179 
 3180                 /*
 3181                  * go down to child page
 3182                  */
 3183               next:
 3184                 /* get the child page block number */
 3185                 bn = addressXAD(&p->xad[index]);
 3186 
 3187                 /* unpin the parent page */
 3188                 XT_PUTPAGE(mp);
 3189         }
 3190 }
 3191 
 3192 
 3193 /*
 3194  *      xtRelink()
 3195  *
 3196  * function:
 3197  *      link around a freed page.
 3198  *
 3199  * Parameter:
 3200  *      int           tid,
 3201  *      struct inode    *ip,
 3202  *      xtpage_t        *p)
 3203  *
 3204  * returns:
 3205  */
 3206 static int xtRelink(tid_t tid, struct inode *ip, xtpage_t * p)
 3207 {
 3208         int rc = 0;
 3209         struct metapage *mp;
 3210         s64 nextbn, prevbn;
 3211         struct tlock *tlck;
 3212 
 3213         nextbn = le64_to_cpu(p->header.next);
 3214         prevbn = le64_to_cpu(p->header.prev);
 3215 
 3216         /* update prev pointer of the next page */
 3217         if (nextbn != 0) {
 3218                 XT_GETPAGE(ip, nextbn, mp, PSIZE, p, rc);
 3219                 if (rc)
 3220                         return rc;
 3221 
 3222                 /*
 3223                  * acquire a transaction lock on the page;
 3224                  *
 3225                  * action: update prev pointer;
 3226                  */
 3227                 BT_MARK_DIRTY(mp, ip);
 3228                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
 3229 
 3230                 /* the page may already have been tlock'd */
 3231 
 3232                 p->header.prev = cpu_to_le64(prevbn);
 3233 
 3234                 XT_PUTPAGE(mp);
 3235         }
 3236 
 3237         /* update next pointer of the previous page */
 3238         if (prevbn != 0) {
 3239                 XT_GETPAGE(ip, prevbn, mp, PSIZE, p, rc);
 3240                 if (rc)
 3241                         return rc;
 3242 
 3243                 /*
 3244                  * acquire a transaction lock on the page;
 3245                  *
 3246                  * action: update next pointer;
 3247                  */
 3248                 BT_MARK_DIRTY(mp, ip);
 3249                 tlck = txLock(tid, ip, mp, tlckXTREE | tlckRELINK);
 3250 
 3251                 /* the page may already have been tlock'd */
 3252 
 3253                 p->header.next = le64_to_cpu(nextbn);
 3254 
 3255                 XT_PUTPAGE(mp);
 3256         }
 3257 
 3258         return 0;
 3259 }
 3260 #endif                          /*  _STILL_TO_PORT */
 3261 
 3262 
 3263 /*
 3264  *      xtInitRoot()
 3265  *
 3266  * initialize file root (inline in inode)
 3267  */
 3268 void xtInitRoot(tid_t tid, struct inode *ip)
 3269 {
 3270         xtpage_t *p;
 3271 
 3272         /*
 3273          * acquire a transaction lock on the root
 3274          *
 3275          * action:
 3276          */
 3277         txLock(tid, ip, (struct metapage *) &JFS_IP(ip)->bxflag,
 3278                       tlckXTREE | tlckNEW);
 3279         p = &JFS_IP(ip)->i_xtroot;
 3280 
 3281         p->header.flag = DXD_INDEX | BT_ROOT | BT_LEAF;
 3282         p->header.nextindex = cpu_to_le16(XTENTRYSTART);
 3283 
 3284         if (S_ISDIR(ip->i_mode))
 3285                 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT_DIR);
 3286         else {
 3287                 p->header.maxentry = cpu_to_le16(XTROOTINITSLOT);
 3288                 ip->i_size = 0;
 3289         }
 3290 
 3291 
 3292         return;
 3293 }
 3294 
 3295 
 3296 /*
 3297  * We can run into a deadlock truncating a file with a large number of
 3298  * xtree pages (large fragmented file).  A robust fix would entail a
 3299  * reservation system where we would reserve a number of metadata pages
 3300  * and tlocks which we would be guaranteed without a deadlock.  Without
 3301  * this, a partial fix is to limit number of metadata pages we will lock
 3302  * in a single transaction.  Currently we will truncate the file so that
 3303  * no more than 50 leaf pages will be locked.  The caller of xtTruncate
 3304  * will be responsible for ensuring that the current transaction gets
 3305  * committed, and that subsequent transactions are created to truncate
 3306  * the file further if needed.
 3307  */
 3308 #define MAX_TRUNCATE_LEAVES 50
 3309 
 3310 /*
 3311  *      xtTruncate()
 3312  *
 3313  * function:
 3314  *      traverse for truncation logging backward bottom up;
 3315  *      terminate at the last extent entry at the current subtree
 3316  *      root page covering new down size.
 3317  *      truncation may occur within the last extent entry.
 3318  *
 3319  * parameter:
 3320  *      int           tid,
 3321  *      struct inode    *ip,
 3322  *      s64           newsize,
 3323  *      int           type)   {PWMAP, PMAP, WMAP; DELETE, TRUNCATE}
 3324  *
 3325  * return:
 3326  *
 3327  * note:
 3328  *      PWMAP:
 3329  *       1. truncate (non-COMMIT_NOLINK file)
 3330  *          by jfs_truncate() or jfs_open(O_TRUNC):
 3331  *          xtree is updated;
 3332  *       2. truncate index table of directory when last entry removed
 3333  *       map update via tlock at commit time;
 3334  *      PMAP:
 3335  *       Call xtTruncate_pmap instead
 3336  *      WMAP:
 3337  *       1. remove (free zero link count) on last reference release
 3338  *          (pmap has been freed at commit zero link count);
 3339  *       2. truncate (COMMIT_NOLINK file, i.e., tmp file):
 3340  *          xtree is updated;
 3341  *       map update directly at truncation time;
 3342  *
 3343  *      if (DELETE)
 3344  *              no LOG_NOREDOPAGE is required (NOREDOFILE is sufficient);
 3345  *      else if (TRUNCATE)
 3346  *              must write LOG_NOREDOPAGE for deleted index page;
 3347  *
 3348  * pages may already have been tlocked by anonymous transactions
 3349  * during file growth (i.e., write) before truncation;
 3350  *
 3351  * except last truncated entry, deleted entries remains as is
 3352  * in the page (nextindex is updated) for other use
 3353  * (e.g., log/update allocation map): this avoid copying the page
 3354  * info but delay free of pages;
 3355  *
 3356  */
 3357 s64 xtTruncate(tid_t tid, struct inode *ip, s64 newsize, int flag)
 3358 {
 3359         int rc = 0;
 3360         s64 teof;
 3361         struct metapage *mp;
 3362         xtpage_t *p;
 3363         s64 bn;
 3364         int index, nextindex;
 3365         xad_t *xad;
 3366         s64 xoff, xaddr;
 3367         int xlen, len, freexlen;
 3368         struct btstack btstack;
 3369         struct btframe *parent;
 3370         struct tblock *tblk = 0;
 3371         struct tlock *tlck = 0;
 3372         struct xtlock *xtlck = 0;
 3373         struct xdlistlock xadlock;      /* maplock for COMMIT_WMAP */
 3374         struct pxd_lock *pxdlock;               /* maplock for COMMIT_WMAP */
 3375         s64 nfreed;
 3376         int freed, log;
 3377         int locked_leaves = 0;
 3378 
 3379         /* save object truncation type */
 3380         if (tid) {
 3381                 tblk = tid_to_tblock(tid);
 3382                 tblk->xflag |= flag;
 3383         }
 3384 
 3385         nfreed = 0;
 3386 
 3387         flag &= COMMIT_MAP;
 3388         assert(flag != COMMIT_PMAP);
 3389 
 3390         if (flag == COMMIT_PWMAP)
 3391                 log = 1;
 3392         else {
 3393                 log = 0;
 3394                 xadlock.flag = mlckFREEXADLIST;
 3395                 xadlock.index = 1;
 3396         }
 3397 
 3398         /*
 3399          * if the newsize is not an integral number of pages,
 3400          * the file between newsize and next page boundary will
 3401          * be cleared.
 3402          * if truncating into a file hole, it will cause
 3403          * a full block to be allocated for the logical block.
 3404          */
 3405 
 3406         /*
 3407          * release page blocks of truncated region <teof, eof>
 3408          *
 3409          * free the data blocks from the leaf index blocks.
 3410          * delete the parent index entries corresponding to
 3411          * the freed child data/index blocks.
 3412          * free the index blocks themselves which aren't needed
 3413          * in new sized file.
 3414          *
 3415          * index blocks are updated only if the blocks are to be
 3416          * retained in the new sized file.
 3417          * if type is PMAP, the data and index pages are NOT
 3418          * freed, and the data and index blocks are NOT freed
 3419          * from  working map.
 3420          * (this will allow continued access of data/index of
 3421          * temporary file (zerolink count file truncated to zero-length)).
 3422          */
 3423         teof = (newsize + (JFS_SBI(ip->i_sb)->bsize - 1)) >>
 3424             JFS_SBI(ip->i_sb)->l2bsize;
 3425 
 3426         /* clear stack */
 3427         BT_CLR(&btstack);
 3428 
 3429         /*
 3430          * start with root
 3431          *
 3432          * root resides in the inode
 3433          */
 3434         bn = 0;
 3435 
 3436         /*
 3437          * first access of each page:
 3438          */
 3439       getPage:
 3440         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 3441         if (rc)
 3442                 return -rc;
 3443 
 3444         /* process entries backward from last index */
 3445         index = le16_to_cpu(p->header.nextindex) - 1;
 3446 
 3447         if (p->header.flag & BT_INTERNAL)
 3448                 goto getChild;
 3449 
 3450         /*
 3451          *      leaf page
 3452          */
 3453 
 3454         /* Since this is the rightmost leaf, and we may have already freed
 3455          * a page that was formerly to the right, let's make sure that the
 3456          * next pointer is zero.
 3457          */
 3458         p->header.next = 0;
 3459 
 3460         freed = 0;
 3461 
 3462         /* does region covered by leaf page precede Teof ? */
 3463         xad = &p->xad[index];
 3464         xoff = offsetXAD(xad);
 3465         xlen = lengthXAD(xad);
 3466         if (teof >= xoff + xlen) {
 3467                 XT_PUTPAGE(mp);
 3468                 goto getParent;
 3469         }
 3470 
 3471         /* (re)acquire tlock of the leaf page */
 3472         if (log) {
 3473                 if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
 3474                         /*
 3475                          * We need to limit the size of the transaction
 3476                          * to avoid exhausting pagecache & tlocks
 3477                          */
 3478                         XT_PUTPAGE(mp);
 3479                         newsize = (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
 3480                         goto getParent;
 3481                 }
 3482                 tlck = txLock(tid, ip, mp, tlckXTREE);
 3483                 tlck->type = tlckXTREE | tlckTRUNCATE;
 3484                 xtlck = (struct xtlock *) & tlck->lock;
 3485                 xtlck->hwm.offset = le16_to_cpu(p->header.nextindex) - 1;
 3486         }
 3487         BT_MARK_DIRTY(mp, ip);
 3488 
 3489         /*
 3490          * scan backward leaf page entries
 3491          */
 3492         for (; index >= XTENTRYSTART; index--) {
 3493                 xad = &p->xad[index];
 3494                 xoff = offsetXAD(xad);
 3495                 xlen = lengthXAD(xad);
 3496                 xaddr = addressXAD(xad);
 3497 
 3498                 /*
 3499                  * The "data" for a directory is indexed by the block
 3500                  * device's address space.  This metadata must be invalidated
 3501                  * here
 3502                  */
 3503                 if (S_ISDIR(ip->i_mode) && (teof == 0))
 3504                         invalidate_xad_metapages(ip, *xad);
 3505                 /*
 3506                  * entry beyond eof: continue scan of current page
 3507                  *          xad
 3508                  * ---|---=======------->
 3509                  *   eof
 3510                  */
 3511                 if (teof < xoff) {
 3512                         nfreed += xlen;
 3513                         continue;
 3514                 }
 3515 
 3516                 /*
 3517                  * (xoff <= teof): last entry to be deleted from page;
 3518                  * If other entries remain in page: keep and update the page.
 3519                  */
 3520 
 3521                 /*
 3522                  * eof == entry_start: delete the entry
 3523                  *           xad
 3524                  * -------|=======------->
 3525                  *       eof
 3526                  *
 3527                  */
 3528                 if (teof == xoff) {
 3529                         nfreed += xlen;
 3530 
 3531                         if (index == XTENTRYSTART)
 3532                                 break;
 3533 
 3534                         nextindex = index;
 3535                 }
 3536                 /*
 3537                  * eof within the entry: truncate the entry.
 3538                  *          xad
 3539                  * -------===|===------->
 3540                  *          eof
 3541                  */
 3542                 else if (teof < xoff + xlen) {
 3543                         /* update truncated entry */
 3544                         len = teof - xoff;
 3545                         freexlen = xlen - len;
 3546                         XADlength(xad, len);
 3547 
 3548                         /* save pxd of truncated extent in tlck */
 3549                         xaddr += len;
 3550                         if (log) {      /* COMMIT_PWMAP */
 3551                                 xtlck->lwm.offset = (xtlck->lwm.offset) ?
 3552                                     min(index, (int)xtlck->lwm.offset) : index;
 3553                                 xtlck->lwm.length = index + 1 -
 3554                                     xtlck->lwm.offset;
 3555                                 xtlck->twm.offset = index;
 3556                                 pxdlock = (struct pxd_lock *) & xtlck->pxdlock;
 3557                                 pxdlock->flag = mlckFREEPXD;
 3558                                 PXDaddress(&pxdlock->pxd, xaddr);
 3559                                 PXDlength(&pxdlock->pxd, freexlen);
 3560                         }
 3561                         /* free truncated extent */
 3562                         else {  /* COMMIT_WMAP */
 3563 
 3564                                 pxdlock = (struct pxd_lock *) & xadlock;
 3565                                 pxdlock->flag = mlckFREEPXD;
 3566                                 PXDaddress(&pxdlock->pxd, xaddr);
 3567                                 PXDlength(&pxdlock->pxd, freexlen);
 3568                                 txFreeMap(ip, pxdlock, 0, COMMIT_WMAP);
 3569 
 3570                                 /* reset map lock */
 3571                                 xadlock.flag = mlckFREEXADLIST;
 3572                         }
 3573 
 3574                         /* current entry is new last entry; */
 3575                         nextindex = index + 1;
 3576 
 3577                         nfreed += freexlen;
 3578                 }
 3579                 /*
 3580                  * eof beyond the entry:
 3581                  *          xad
 3582                  * -------=======---|--->
 3583                  *                 eof
 3584                  */
 3585                 else {          /* (xoff + xlen < teof) */
 3586 
 3587                         nextindex = index + 1;
 3588                 }
 3589 
 3590                 if (nextindex < le16_to_cpu(p->header.nextindex)) {
 3591                         if (!log) {     /* COMMIT_WAMP */
 3592                                 xadlock.xdlist = &p->xad[nextindex];
 3593                                 xadlock.count =
 3594                                     le16_to_cpu(p->header.nextindex) -
 3595                                     nextindex;
 3596                                 txFreeMap(ip, (struct maplock *) & xadlock, 0,
 3597                                           COMMIT_WMAP);
 3598                         }
 3599                         p->header.nextindex = cpu_to_le16(nextindex);
 3600                 }
 3601 
 3602                 XT_PUTPAGE(mp);
 3603 
 3604                 /* assert(freed == 0); */
 3605                 goto getParent;
 3606         }                       /* end scan of leaf page entries */
 3607 
 3608         freed = 1;
 3609 
 3610         /*
 3611          * leaf page become empty: free the page if type != PMAP
 3612          */
 3613         if (log) {              /* COMMIT_PWMAP */
 3614                 /* txCommit() with tlckFREE:
 3615                  * free data extents covered by leaf [XTENTRYSTART:hwm);
 3616                  * invalidate leaf if COMMIT_PWMAP;
 3617                  * if (TRUNCATE), will write LOG_NOREDOPAGE;
 3618                  */
 3619                 tlck->type = tlckXTREE | tlckFREE;
 3620         } else {                /* COMMIT_WAMP */
 3621 
 3622                 /* free data extents covered by leaf */
 3623                 xadlock.xdlist = &p->xad[XTENTRYSTART];
 3624                 xadlock.count =
 3625                     le16_to_cpu(p->header.nextindex) - XTENTRYSTART;
 3626                 txFreeMap(ip, (struct maplock *) & xadlock, 0, COMMIT_WMAP);
 3627         }
 3628 
 3629         if (p->header.flag & BT_ROOT) {
 3630                 p->header.flag &= ~BT_INTERNAL;
 3631                 p->header.flag |= BT_LEAF;
 3632                 p->header.nextindex = cpu_to_le16(XTENTRYSTART);
 3633 
 3634                 XT_PUTPAGE(mp); /* debug */
 3635                 goto out;
 3636         } else {
 3637                 if (log) {      /* COMMIT_PWMAP */
 3638                         /* page will be invalidated at tx completion
 3639                          */
 3640                         XT_PUTPAGE(mp);
 3641                 } else {        /* COMMIT_WMAP */
 3642 
 3643                         if (mp->lid)
 3644                                 lid_to_tlock(mp->lid)->flag |= tlckFREELOCK;
 3645 
 3646                         /* invalidate empty leaf page */
 3647                         discard_metapage(mp);
 3648                 }
 3649         }
 3650 
 3651         /*
 3652          * the leaf page become empty: delete the parent entry
 3653          * for the leaf page if the parent page is to be kept
 3654          * in the new sized file.
 3655          */
 3656 
 3657         /*
 3658          * go back up to the parent page
 3659          */
 3660       getParent:
 3661         /* pop/restore parent entry for the current child page */
 3662         if ((parent = BT_POP(&btstack)) == NULL)
 3663                 /* current page must have been root */
 3664                 goto out;
 3665 
 3666         /* get back the parent page */
 3667         bn = parent->bn;
 3668         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 3669         if (rc)
 3670                 return -rc;
 3671 
 3672         index = parent->index;
 3673 
 3674         /*
 3675          * child page was not empty:
 3676          */
 3677         if (freed == 0) {
 3678                 /* has any entry deleted from parent ? */
 3679                 if (index < le16_to_cpu(p->header.nextindex) - 1) {
 3680                         /* (re)acquire tlock on the parent page */
 3681                         if (log) {      /* COMMIT_PWMAP */
 3682                                 /* txCommit() with tlckTRUNCATE:
 3683                                  * free child extents covered by parent [);
 3684                                  */
 3685                                 tlck = txLock(tid, ip, mp, tlckXTREE);
 3686                                 xtlck = (struct xtlock *) & tlck->lock;
 3687                                 if (!(tlck->type & tlckTRUNCATE)) {
 3688                                         xtlck->hwm.offset =
 3689                                             le16_to_cpu(p->header.
 3690                                                         nextindex) - 1;
 3691                                         tlck->type =
 3692                                             tlckXTREE | tlckTRUNCATE;
 3693                                 }
 3694                         } else {        /* COMMIT_WMAP */
 3695 
 3696                                 /* free child extents covered by parent */
 3697                                 xadlock.xdlist = &p->xad[index + 1];
 3698                                 xadlock.count =
 3699                                     le16_to_cpu(p->header.nextindex) -
 3700                                     index - 1;
 3701                                 txFreeMap(ip, (struct maplock *) & xadlock, 0,
 3702                                           COMMIT_WMAP);
 3703                         }
 3704                         BT_MARK_DIRTY(mp, ip);
 3705 
 3706                         p->header.nextindex = cpu_to_le16(index + 1);
 3707                 }
 3708                 XT_PUTPAGE(mp);
 3709                 goto getParent;
 3710         }
 3711 
 3712         /*
 3713          * child page was empty:
 3714          */
 3715         nfreed += lengthXAD(&p->xad[index]);
 3716 
 3717         /*
 3718          * During working map update, child page's tlock must be handled
 3719          * before parent's.  This is because the parent's tlock will cause
 3720          * the child's disk space to be marked available in the wmap, so
 3721          * it's important that the child page be released by that time.
 3722          *
 3723          * ToDo:  tlocks should be on doubly-linked list, so we can
 3724          * quickly remove it and add it to the end.
 3725          */
 3726 
 3727         /*
 3728          * Move parent page's tlock to the end of the tid's tlock list
 3729          */
 3730         if (log && mp->lid && (tblk->last != mp->lid) &&
 3731             lid_to_tlock(mp->lid)->tid) {
 3732                 lid_t lid = mp->lid;
 3733                 struct tlock *prev;
 3734 
 3735                 tlck = lid_to_tlock(lid);
 3736 
 3737                 if (tblk->next == lid)
 3738                         tblk->next = tlck->next;
 3739                 else {
 3740                         for (prev = lid_to_tlock(tblk->next);
 3741                              prev->next != lid;
 3742                              prev = lid_to_tlock(prev->next)) {
 3743                                 assert(prev->next);
 3744                         }
 3745                         prev->next = tlck->next;
 3746                 }
 3747                 lid_to_tlock(tblk->last)->next = lid;
 3748                 tlck->next = 0;
 3749                 tblk->last = lid;
 3750         }
 3751 
 3752         /*
 3753          * parent page become empty: free the page
 3754          */
 3755         if (index == XTENTRYSTART) {
 3756                 if (log) {      /* COMMIT_PWMAP */
 3757                         /* txCommit() with tlckFREE:
 3758                          * free child extents covered by parent;
 3759                          * invalidate parent if COMMIT_PWMAP;
 3760                          */
 3761                         tlck = txLock(tid, ip, mp, tlckXTREE);
 3762                         xtlck = (struct xtlock *) & tlck->lock;
 3763                         xtlck->hwm.offset =
 3764                             le16_to_cpu(p->header.nextindex) - 1;
 3765                         tlck->type = tlckXTREE | tlckFREE;
 3766                 } else {        /* COMMIT_WMAP */
 3767 
 3768                         /* free child extents covered by parent */
 3769                         xadlock.xdlist = &p->xad[XTENTRYSTART];
 3770                         xadlock.count =
 3771                             le16_to_cpu(p->header.nextindex) -
 3772                             XTENTRYSTART;
 3773                         txFreeMap(ip, (struct maplock *) & xadlock, 0,
 3774                                   COMMIT_WMAP);
 3775                 }
 3776                 BT_MARK_DIRTY(mp, ip);
 3777 
 3778                 if (p->header.flag & BT_ROOT) {
 3779                         p->header.flag &= ~BT_INTERNAL;
 3780                         p->header.flag |= BT_LEAF;
 3781                         p->header.nextindex = cpu_to_le16(XTENTRYSTART);
 3782                         if (le16_to_cpu(p->header.maxentry) == XTROOTMAXSLOT) {
 3783                                 /*
 3784                                  * Shrink root down to allow inline
 3785                                  * EA (otherwise fsck complains)
 3786                                  */
 3787                                 p->header.maxentry =
 3788                                     cpu_to_le16(XTROOTINITSLOT);
 3789                                 JFS_IP(ip)->mode2 |= INLINEEA;
 3790                         }
 3791 
 3792                         XT_PUTPAGE(mp); /* debug */
 3793                         goto out;
 3794                 } else {
 3795                         if (log) {      /* COMMIT_PWMAP */
 3796                                 /* page will be invalidated at tx completion
 3797                                  */
 3798                                 XT_PUTPAGE(mp);
 3799                         } else {        /* COMMIT_WMAP */
 3800 
 3801                                 if (mp->lid)
 3802                                         lid_to_tlock(mp->lid)->flag |=
 3803                                                 tlckFREELOCK;
 3804 
 3805                                 /* invalidate parent page */
 3806                                 discard_metapage(mp);
 3807                         }
 3808 
 3809                         /* parent has become empty and freed:
 3810                          * go back up to its parent page
 3811                          */
 3812                         /* freed = 1; */
 3813                         goto getParent;
 3814                 }
 3815         }
 3816         /*
 3817          * parent page still has entries for front region;
 3818          */
 3819         else {
 3820                 /* try truncate region covered by preceding entry
 3821                  * (process backward)
 3822                  */
 3823                 index--;
 3824 
 3825                 /* go back down to the child page corresponding
 3826                  * to the entry
 3827                  */
 3828                 goto getChild;
 3829         }
 3830 
 3831         /*
 3832          *      internal page: go down to child page of current entry
 3833          */
 3834       getChild:
 3835         /* save current parent entry for the child page */
 3836         BT_PUSH(&btstack, bn, index);
 3837 
 3838         /* get child page */
 3839         xad = &p->xad[index];
 3840         bn = addressXAD(xad);
 3841 
 3842         /*
 3843          * first access of each internal entry:
 3844          */
 3845         /* release parent page */
 3846         XT_PUTPAGE(mp);
 3847 
 3848         /* process the child page */
 3849         goto getPage;
 3850 
 3851       out:
 3852         /*
 3853          * update file resource stat
 3854          */
 3855         /* set size
 3856          */
 3857         if (S_ISDIR(ip->i_mode) && !newsize)
 3858                 ip->i_size = 1; /* fsck hates zero-length directories */
 3859         else
 3860                 ip->i_size = newsize;
 3861 
 3862         /* update nblocks to reflect freed blocks */
 3863         ip->i_blocks -= LBLK2PBLK(ip->i_sb, nfreed);
 3864 
 3865         /*
 3866          * free tlock of invalidated pages
 3867          */
 3868         if (flag == COMMIT_WMAP)
 3869                 txFreelock(ip);
 3870 
 3871         return newsize;
 3872 }
 3873 
 3874 
 3875 /*
 3876  *      xtTruncate_pmap()
 3877  *
 3878  * function:
 3879  *      Perform truncate to zero lenghth for deleted file, leaving the
 3880  *      the xtree and working map untouched.  This allows the file to
 3881  *      be accessed via open file handles, while the delete of the file
 3882  *      is committed to disk.
 3883  *
 3884  * parameter:
 3885  *      tid_t           tid,
 3886  *      struct inode    *ip,
 3887  *      s64             committed_size)
 3888  *
 3889  * return: new committed size
 3890  *
 3891  * note:
 3892  *
 3893  *      To avoid deadlock by holding too many transaction locks, the
 3894  *      truncation may be broken up into multiple transactions.
 3895  *      The committed_size keeps track of part of the file has been
 3896  *      freed from the pmaps.
 3897  */
 3898 s64 xtTruncate_pmap(tid_t tid, struct inode *ip, s64 committed_size)
 3899 {
 3900         s64 bn;
 3901         struct btstack btstack;
 3902         int cmp;
 3903         int index;
 3904         int locked_leaves = 0;
 3905         struct metapage *mp;
 3906         xtpage_t *p;
 3907         struct btframe *parent;
 3908         int rc;
 3909         struct tblock *tblk;
 3910         struct tlock *tlck = 0;
 3911         xad_t *xad;
 3912         int xlen;
 3913         s64 xoff;
 3914         struct xtlock *xtlck = 0;
 3915 
 3916         /* save object truncation type */
 3917         tblk = tid_to_tblock(tid);
 3918         tblk->xflag |= COMMIT_PMAP;
 3919 
 3920         /* clear stack */
 3921         BT_CLR(&btstack);
 3922 
 3923         if (committed_size) {
 3924                 xoff = (committed_size >> JFS_SBI(ip->i_sb)->l2bsize) - 1;
 3925                 rc = xtSearch(ip, xoff, &cmp, &btstack, 0);
 3926                 if (rc)
 3927                         return -rc;
 3928                 assert(cmp == 0);
 3929                 XT_GETSEARCH(ip, btstack.top, bn, mp, p, index);
 3930         } else {
 3931                 /*
 3932                  * start with root
 3933                  *
 3934                  * root resides in the inode
 3935                  */
 3936                 bn = 0;
 3937 
 3938                 /*
 3939                  * first access of each page:
 3940                  */
 3941       getPage:
 3942                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 3943                 if (rc)
 3944                         return -rc;
 3945 
 3946                 /* process entries backward from last index */
 3947                 index = le16_to_cpu(p->header.nextindex) - 1;
 3948 
 3949                 if (p->header.flag & BT_INTERNAL)
 3950                         goto getChild;
 3951         }
 3952 
 3953         /*
 3954          *      leaf page
 3955          */
 3956 
 3957         if (++locked_leaves > MAX_TRUNCATE_LEAVES) {
 3958                 /*
 3959                  * We need to limit the size of the transaction
 3960                  * to avoid exhausting pagecache & tlocks
 3961                  */
 3962                 xad = &p->xad[index];
 3963                 xoff = offsetXAD(xad);
 3964                 xlen = lengthXAD(xad);
 3965                 XT_PUTPAGE(mp);
 3966                 return  (xoff + xlen) << JFS_SBI(ip->i_sb)->l2bsize;
 3967         }
 3968         tlck = txLock(tid, ip, mp, tlckXTREE);
 3969         tlck->type = tlckXTREE | tlckFREE;
 3970         xtlck = (struct xtlock *) & tlck->lock;
 3971         xtlck->hwm.offset = index;
 3972 
 3973 
 3974         XT_PUTPAGE(mp);
 3975 
 3976         /*
 3977          * go back up to the parent page
 3978          */
 3979       getParent:
 3980         /* pop/restore parent entry for the current child page */
 3981         if ((parent = BT_POP(&btstack)) == NULL)
 3982                 /* current page must have been root */
 3983                 goto out;
 3984 
 3985         /* get back the parent page */
 3986         bn = parent->bn;
 3987         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 3988         if (rc)
 3989                 return -rc;
 3990 
 3991         index = parent->index;
 3992 
 3993         /*
 3994          * parent page become empty: free the page
 3995          */
 3996         if (index == XTENTRYSTART) {
 3997                 /* txCommit() with tlckFREE:
 3998                  * free child extents covered by parent;
 3999                  * invalidate parent if COMMIT_PWMAP;
 4000                  */
 4001                 tlck = txLock(tid, ip, mp, tlckXTREE);
 4002                 xtlck = (struct xtlock *) & tlck->lock;
 4003                 xtlck->hwm.offset =
 4004                     le16_to_cpu(p->header.nextindex) - 1;
 4005                 tlck->type = tlckXTREE | tlckFREE;
 4006 
 4007                 XT_PUTPAGE(mp);
 4008 
 4009                 if (p->header.flag & BT_ROOT) {
 4010 
 4011                         goto out;
 4012                 } else {
 4013                         goto getParent;
 4014                 }
 4015         }
 4016         /*
 4017          * parent page still has entries for front region;
 4018          */
 4019         else
 4020                 index--;
 4021         /*
 4022          *      internal page: go down to child page of current entry
 4023          */
 4024       getChild:
 4025         /* save current parent entry for the child page */
 4026         BT_PUSH(&btstack, bn, index);
 4027 
 4028         /* get child page */
 4029         xad = &p->xad[index];
 4030         bn = addressXAD(xad);
 4031 
 4032         /*
 4033          * first access of each internal entry:
 4034          */
 4035         /* release parent page */
 4036         XT_PUTPAGE(mp);
 4037 
 4038         /* process the child page */
 4039         goto getPage;
 4040 
 4041       out:
 4042 
 4043         return 0;
 4044 }
 4045 
 4046 
 4047 #ifdef _JFS_DEBUG_XTREE
 4048 /*
 4049  *      xtDisplayTree()
 4050  *
 4051  * function: traverse forward
 4052  */
 4053 int xtDisplayTree(struct inode *ip)
 4054 {
 4055         int rc = 0;
 4056         struct metapage *mp;
 4057         xtpage_t *p;
 4058         s64 bn, pbn;
 4059         int index, lastindex, v, h;
 4060         xad_t *xad;
 4061         struct btstack btstack;
 4062         struct btframe *btsp;
 4063         struct btframe *parent;
 4064 
 4065         printk("display B+-tree.\n");
 4066 
 4067         /* clear stack */
 4068         btsp = btstack.stack;
 4069 
 4070         /*
 4071          * start with root
 4072          *
 4073          * root resides in the inode
 4074          */
 4075         bn = 0;
 4076         v = h = 0;
 4077 
 4078         /*
 4079          * first access of each page:
 4080          */
 4081       getPage:
 4082         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 4083         if (rc)
 4084                 return rc;
 4085 
 4086         /* process entries forward from first index */
 4087         index = XTENTRYSTART;
 4088         lastindex = le16_to_cpu(p->header.nextindex) - 1;
 4089 
 4090         if (p->header.flag & BT_INTERNAL) {
 4091                 /*
 4092                  * first access of each internal page
 4093                  */
 4094                 goto getChild;
 4095         } else {                /* (p->header.flag & BT_LEAF) */
 4096 
 4097                 /*
 4098                  * first access of each leaf page
 4099                  */
 4100                 printf("leaf page ");
 4101                 xtDisplayPage(ip, bn, p);
 4102 
 4103                 /* unpin the leaf page */
 4104                 XT_PUTPAGE(mp);
 4105         }
 4106 
 4107         /*
 4108          * go back up to the parent page
 4109          */
 4110       getParent:
 4111         /* pop/restore parent entry for the current child page */
 4112         if ((parent = (btsp == btstack.stack ? NULL : --btsp)) == NULL)
 4113                 /* current page must have been root */
 4114                 return;
 4115 
 4116         /*
 4117          * parent page scan completed
 4118          */
 4119         if ((index = parent->index) == (lastindex = parent->lastindex)) {
 4120                 /* go back up to the parent page */
 4121                 goto getParent;
 4122         }
 4123 
 4124         /*
 4125          * parent page has entries remaining
 4126          */
 4127         /* get back the parent page */
 4128         bn = parent->bn;
 4129         /* v = parent->level; */
 4130         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 4131         if (rc)
 4132                 return rc;
 4133 
 4134         /* get next parent entry */
 4135         index++;
 4136 
 4137         /*
 4138          * internal page: go down to child page of current entry
 4139          */
 4140       getChild:
 4141         /* push/save current parent entry for the child page */
 4142         btsp->bn = pbn = bn;
 4143         btsp->index = index;
 4144         btsp->lastindex = lastindex;
 4145         /* btsp->level = v; */
 4146         /* btsp->node = h; */
 4147         ++btsp;
 4148 
 4149         /* get child page */
 4150         xad = &p->xad[index];
 4151         bn = addressXAD(xad);
 4152 
 4153         /*
 4154          * first access of each internal entry:
 4155          */
 4156         /* release parent page */
 4157         XT_PUTPAGE(mp);
 4158 
 4159         printk("traverse down 0x%lx[%d]->0x%lx\n", (ulong) pbn, index,
 4160                (ulong) bn);
 4161         v++;
 4162         h = index;
 4163 
 4164         /* process the child page */
 4165         goto getPage;
 4166 }
 4167 
 4168 
 4169 /*
 4170  *      xtDisplayPage()
 4171  *
 4172  * function: display page
 4173  */
 4174 int xtDisplayPage(struct inode *ip, s64 bn, xtpage_t * p)
 4175 {
 4176         int rc = 0;
 4177         struct metapage *mp;
 4178         xad_t *xad;
 4179         s64 xaddr, xoff;
 4180         int xlen, i, j;
 4181 
 4182         if (p == NULL) {
 4183                 XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 4184                 if (rc)
 4185                         return rc;
 4186         }
 4187 
 4188         /* display page control */
 4189         printf("bn:0x%lx flag:0x%x nextindex:%d\n",
 4190                (ulong) bn, p->header.flag,
 4191                le16_to_cpu(p->header.nextindex));
 4192 
 4193         /* display entries */
 4194         xad = &p->xad[XTENTRYSTART];
 4195                 for (i = XTENTRYSTART, j = 1; i < le16_to_cpu(p->header.nextindex);
 4196                      i++, xad++, j++) {
 4197                         xoff = offsetXAD(xad);
 4198                         xaddr = addressXAD(xad);
 4199                         xlen = lengthXAD(xad);
 4200                         printf("\t[%d] 0x%lx:0x%lx(0x%x)", i, (ulong) xoff,
 4201                                (ulong) xaddr, xlen);
 4202 
 4203                         if (j == 4) {
 4204                                 printf("\n");
 4205                                 j = 0;
 4206                 }
 4207         }
 4208 
 4209         printf("\n");
 4210 }
 4211 #endif                          /* _JFS_DEBUG_XTREE */
 4212 
 4213 
 4214 #ifdef _JFS_WIP
 4215 /*
 4216  *      xtGather()
 4217  *
 4218  * function:
 4219  *      traverse for allocation acquiring tlock at commit time
 4220  *      (vs at the time of update) logging backward top down
 4221  *
 4222  * note:
 4223  *      problem - establishing that all new allocation have been
 4224  *      processed both for append and random write in sparse file
 4225  *      at the current entry at the current subtree root page
 4226  *
 4227  */
 4228 int xtGather(t)
 4229 btree_t *t;
 4230 {
 4231         int rc = 0;
 4232         xtpage_t *p;
 4233         u64 bn;
 4234         int index;
 4235         btentry_t *e;
 4236         struct btstack btstack;
 4237         struct btsf *parent;
 4238 
 4239         /* clear stack */
 4240         BT_CLR(&btstack);
 4241 
 4242         /*
 4243          * start with root
 4244          *
 4245          * root resides in the inode
 4246          */
 4247         bn = 0;
 4248         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 4249         if (rc)
 4250                 return rc;
 4251 
 4252         /* new root is NOT pointed by a new entry
 4253            if (p->header.flag & NEW)
 4254            allocate new page lock;
 4255            write a NEWPAGE log;
 4256          */
 4257 
 4258       dopage:
 4259         /*
 4260          * first access of each page:
 4261          */
 4262         /* process entries backward from last index */
 4263         index = le16_to_cpu(p->header.nextindex) - 1;
 4264 
 4265         if (p->header.flag & BT_LEAF) {
 4266                 /*
 4267                  * first access of each leaf page
 4268                  */
 4269                 /* process leaf page entries backward */
 4270                 for (; index >= XTENTRYSTART; index--) {
 4271                         e = &p->xad[index];
 4272                         /*
 4273                          * if newpage, log NEWPAGE.
 4274                          *
 4275                          if (e->flag & XAD_NEW) {
 4276                          nfound =+ entry->length;
 4277                          update current page lock for the entry;
 4278                          newpage(entry);
 4279                          *
 4280                          * if moved, log move.
 4281                          *
 4282                          } else if (e->flag & XAD_MOVED) {
 4283                          reset flag;
 4284                          update current page lock for the entry;
 4285                          }
 4286                          */
 4287                 }
 4288 
 4289                 /* unpin the leaf page */
 4290                 XT_PUTPAGE(mp);
 4291 
 4292                 /*
 4293                  * go back up to the parent page
 4294                  */
 4295               getParent:
 4296                 /* restore parent entry for the current child page */
 4297                 if ((parent = BT_POP(&btstack)) == NULL)
 4298                         /* current page must have been root */
 4299                         return 0;
 4300 
 4301                 if ((index = parent->index) == XTENTRYSTART) {
 4302                         /*
 4303                          * parent page scan completed
 4304                          */
 4305                         /* go back up to the parent page */
 4306                         goto getParent;
 4307                 } else {
 4308                         /*
 4309                          * parent page has entries remaining
 4310                          */
 4311                         /* get back the parent page */
 4312                         bn = parent->bn;
 4313                         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 4314                         if (rc)
 4315                                 return EIO;
 4316 
 4317                         /* first subroot page which
 4318                          * covers all new allocated blocks
 4319                          * itself not new/modified.
 4320                          * (if modified from split of descendent,
 4321                          * go down path of split page)
 4322 
 4323                          if (nfound == nnew &&
 4324                          !(p->header.flag & (NEW | MOD)))
 4325                          exit scan;
 4326                          */
 4327 
 4328                         /* process parent page entries backward */
 4329                         index--;
 4330                 }
 4331         } else {
 4332                 /*
 4333                  * first access of each internal page
 4334                  */
 4335         }
 4336 
 4337         /*
 4338          * internal page: go down to child page of current entry
 4339          */
 4340 
 4341         /* save current parent entry for the child page */
 4342         BT_PUSH(&btstack, bn, index);
 4343 
 4344         /* get current entry for the child page */
 4345         e = &p->xad[index];
 4346 
 4347         /*
 4348          * first access of each internal entry:
 4349          */
 4350         /*
 4351          * if new entry, log btree_tnewentry.
 4352          *
 4353          if (e->flag & XAD_NEW)
 4354          update parent page lock for the entry;
 4355          */
 4356 
 4357         /* release parent page */
 4358         XT_PUTPAGE(mp);
 4359 
 4360         /* get child page */
 4361         bn = e->bn;
 4362         XT_GETPAGE(ip, bn, mp, PSIZE, p, rc);
 4363         if (rc)
 4364                 return rc;
 4365 
 4366         /*
 4367          * first access of each non-root page:
 4368          */
 4369         /*
 4370          * if new, log btree_newpage.
 4371          *
 4372          if (p->header.flag & NEW)
 4373          allocate new page lock;
 4374          write a NEWPAGE log (next, prev);
 4375          */
 4376 
 4377         /* process the child page */
 4378         goto dopage;
 4379 
 4380       out:
 4381         return 0;
 4382 }
 4383 #endif                          /* _JFS_WIP */
 4384 
 4385 
 4386 #ifdef CONFIG_JFS_STATISTICS
 4387 int jfs_xtstat_read(char *buffer, char **start, off_t offset, int length,
 4388                     int *eof, void *data)
 4389 {
 4390         int len = 0;
 4391         off_t begin;
 4392 
 4393         len += sprintf(buffer,
 4394                        "JFS Xtree statistics\n"
 4395                        "====================\n"
 4396                        "searches = %d\n"
 4397                        "fast searches = %d\n"
 4398                        "splits = %d\n",
 4399                        xtStat.search,
 4400                        xtStat.fastSearch,
 4401                        xtStat.split);
 4402 
 4403         begin = offset;
 4404         *start = buffer + begin;
 4405         len -= begin;
 4406 
 4407         if (len > length)
 4408                 len = length;
 4409         else
 4410                 *eof = 1;
 4411 
 4412         if (len < 0)
 4413                 len = 0;
 4414 
 4415         return len;
 4416 }
 4417 #endif

Cache object: 20bc293ffb4722a3f76fd63c82addfbf


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.