The Design and Implementation of the FreeBSD Operating System, Second Edition
Now available: The Design and Implementation of the FreeBSD Operating System (Second Edition)


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]

FreeBSD/Linux Kernel Cross Reference
sys/fs/reiserfs/do_balan.c

Version: -  FREEBSD  -  FREEBSD-13-STABLE  -  FREEBSD-13-0  -  FREEBSD-12-STABLE  -  FREEBSD-12-0  -  FREEBSD-11-STABLE  -  FREEBSD-11-0  -  FREEBSD-10-STABLE  -  FREEBSD-10-0  -  FREEBSD-9-STABLE  -  FREEBSD-9-0  -  FREEBSD-8-STABLE  -  FREEBSD-8-0  -  FREEBSD-7-STABLE  -  FREEBSD-7-0  -  FREEBSD-6-STABLE  -  FREEBSD-6-0  -  FREEBSD-5-STABLE  -  FREEBSD-5-0  -  FREEBSD-4-STABLE  -  FREEBSD-3-STABLE  -  FREEBSD22  -  l41  -  OPENBSD  -  linux-2.6  -  MK84  -  PLAN9  -  xnu-8792 
SearchContext: -  none  -  3  -  10 

    1 /*
    2  * Copyright 2000-2002 by Hans Reiser, licensing governed by reiserfs/README
    3  */
    4 
    5 /* Now we have all buffers that must be used in balancing of the tree   */
    6 /* Further calculations can not cause schedule(), and thus the buffer   */
    7 /* tree will be stable until the balancing will be finished             */
    8 /* balance the tree according to the analysis made before,              */
    9 /* and using buffers obtained after all above.                          */
   10 
   11 
   12 /**
   13  ** balance_leaf_when_delete
   14  ** balance_leaf
   15  ** do_balance
   16  **
   17  **/
   18 
   19 #include <linux/config.h>
   20 #include <asm/uaccess.h>
   21 #include <linux/sched.h>
   22 #include <linux/reiserfs_fs.h>
   23 
   24 #ifdef CONFIG_REISERFS_CHECK
   25 
   26 struct tree_balance * cur_tb = NULL; /* detects whether more than one
   27                                         copy of tb exists as a means
   28                                         of checking whether schedule
   29                                         is interrupting do_balance */
   30 #endif
   31 
   32 
   33 inline void do_balance_mark_leaf_dirty (struct tree_balance * tb, 
   34                                         struct buffer_head * bh, int flag)
   35 {
   36     if (reiserfs_dont_log(tb->tb_sb)) {
   37         if (!test_and_set_bit(BH_Dirty, &bh->b_state)) {
   38             __mark_buffer_dirty(bh) ;
   39             tb->need_balance_dirty = 1;
   40         }
   41     } else {
   42         int windex = push_journal_writer("do_balance") ;
   43         journal_mark_dirty(tb->transaction_handle, tb->transaction_handle->t_super, bh) ;
   44         pop_journal_writer(windex) ;
   45     }
   46 }
   47 
   48 #define do_balance_mark_internal_dirty do_balance_mark_leaf_dirty
   49 #define do_balance_mark_sb_dirty do_balance_mark_leaf_dirty
   50 
   51 
   52 /* summary: 
   53  if deleting something ( tb->insert_size[0] < 0 )
   54    return(balance_leaf_when_delete()); (flag d handled here)
   55  else
   56    if lnum is larger than 0 we put items into the left node
   57    if rnum is larger than 0 we put items into the right node
   58    if snum1 is larger than 0 we put items into the new node s1
   59    if snum2 is larger than 0 we put items into the new node s2 
   60 Note that all *num* count new items being created.
   61 
   62 It would be easier to read balance_leaf() if each of these summary
   63 lines was a separate procedure rather than being inlined.  I think
   64 that there are many passages here and in balance_leaf_when_delete() in
   65 which two calls to one procedure can replace two passages, and it
   66 might save cache space and improve software maintenance costs to do so.  
   67 
   68 Vladimir made the perceptive comment that we should offload most of
   69 the decision making in this function into fix_nodes/check_balance, and
   70 then create some sort of structure in tb that says what actions should
   71 be performed by do_balance.
   72 
   73 -Hans */
   74 
   75 
   76 
   77 /* Balance leaf node in case of delete or cut: insert_size[0] < 0
   78  *
   79  * lnum, rnum can have values >= -1
   80  *      -1 means that the neighbor must be joined with S
   81  *       0 means that nothing should be done with the neighbor
   82  *      >0 means to shift entirely or partly the specified number of items to the neighbor
   83  */
   84 static int balance_leaf_when_delete (struct tree_balance * tb, int flag)
   85 {
   86     struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
   87     int item_pos = PATH_LAST_POSITION (tb->tb_path);
   88     int pos_in_item = tb->tb_path->pos_in_item;
   89     struct buffer_info bi;
   90     int n;
   91     struct item_head * ih;
   92 
   93     RFALSE( tb->FR[0] && B_LEVEL (tb->FR[0]) != DISK_LEAF_NODE_LEVEL + 1,
   94             "vs- 12000: level: wrong FR %z\n", tb->FR[0]);
   95     RFALSE( tb->blknum[0] > 1,
   96             "PAP-12005: tb->blknum == %d, can not be > 1", tb->blknum[0]);
   97     RFALSE( ! tb->blknum[0] && ! PATH_H_PPARENT(tb->tb_path, 0),
   98             "PAP-12010: tree can not be empty");
   99 
  100     ih = B_N_PITEM_HEAD (tbS0, item_pos);
  101 
  102     /* Delete or truncate the item */
  103 
  104     switch (flag) {
  105     case M_DELETE:   /* delete item in S[0] */
  106 
  107         RFALSE( ih_item_len(ih) + IH_SIZE != -tb->insert_size[0],
  108                 "vs-12013: mode Delete, insert size %d, ih to be deleted %h",
  109                  -tb->insert_size [0], ih);
  110 
  111         bi.tb = tb;
  112         bi.bi_bh = tbS0;
  113         bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
  114         bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
  115         leaf_delete_items (&bi, 0, item_pos, 1, -1);
  116 
  117         if ( ! item_pos && tb->CFL[0] ) {
  118             if ( B_NR_ITEMS(tbS0) ) {
  119                 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
  120             }
  121             else {
  122                 if ( ! PATH_H_POSITION (tb->tb_path, 1) )
  123                     replace_key(tb, tb->CFL[0],tb->lkey[0],PATH_H_PPARENT(tb->tb_path, 0),0);
  124             }
  125         } 
  126 
  127         RFALSE( ! item_pos && !tb->CFL[0],
  128                 "PAP-12020: tb->CFL[0]==%p, tb->L[0]==%p", tb->CFL[0], tb->L[0]);
  129     
  130         break;
  131 
  132     case M_CUT: {  /* cut item in S[0] */
  133         bi.tb = tb;
  134         bi.bi_bh = tbS0;
  135         bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
  136         bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
  137         if (is_direntry_le_ih (ih)) {
  138 
  139             /* UFS unlink semantics are such that you can only delete one directory entry at a time. */
  140             /* when we cut a directory tb->insert_size[0] means number of entries to be cut (always 1) */
  141             tb->insert_size[0] = -1;
  142             leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
  143 
  144             RFALSE( ! item_pos && ! pos_in_item && ! tb->CFL[0],
  145                     "PAP-12030: can not change delimiting key. CFL[0]=%p", 
  146                     tb->CFL[0]);
  147 
  148             if ( ! item_pos && ! pos_in_item && tb->CFL[0] ) {
  149                 replace_key(tb, tb->CFL[0],tb->lkey[0],tbS0,0);
  150             }
  151         } else {
  152             leaf_cut_from_buffer (&bi, item_pos, pos_in_item, -tb->insert_size[0]);
  153 
  154             RFALSE( ! ih_item_len(ih),
  155                 "PAP-12035: cut must leave non-zero dynamic length of item");
  156         }
  157         break;
  158     }
  159 
  160     default:
  161         print_cur_tb ("12040");
  162         reiserfs_panic (tb->tb_sb, "PAP-12040: balance_leaf_when_delete: unexpectable mode: %s(%d)",
  163                         (flag == M_PASTE) ? "PASTE" : ((flag == M_INSERT) ? "INSERT" : "UNKNOWN"), flag);
  164     }
  165 
  166     /* the rule is that no shifting occurs unless by shifting a node can be freed */
  167     n = B_NR_ITEMS(tbS0);
  168     if ( tb->lnum[0] )     /* L[0] takes part in balancing */
  169     {
  170         if ( tb->lnum[0] == -1 )    /* L[0] must be joined with S[0] */
  171         {
  172             if ( tb->rnum[0] == -1 )    /* R[0] must be also joined with S[0] */
  173             {                   
  174                 if ( tb->FR[0] == PATH_H_PPARENT(tb->tb_path, 0) )
  175                 {
  176                     /* all contents of all the 3 buffers will be in L[0] */
  177                     if ( PATH_H_POSITION (tb->tb_path, 1) == 0 && 1 < B_NR_ITEMS(tb->FR[0]) )
  178                         replace_key(tb, tb->CFL[0],tb->lkey[0],tb->FR[0],1);
  179 
  180                     leaf_move_items (LEAF_FROM_S_TO_L, tb, n, -1, 0);
  181                     leaf_move_items (LEAF_FROM_R_TO_L, tb, B_NR_ITEMS(tb->R[0]), -1, 0);
  182 
  183                     reiserfs_invalidate_buffer (tb, tbS0);
  184                     reiserfs_invalidate_buffer (tb, tb->R[0]);
  185 
  186                     return 0;
  187                 }
  188                 /* all contents of all the 3 buffers will be in R[0] */
  189                 leaf_move_items (LEAF_FROM_S_TO_R, tb, n, -1, 0);
  190                 leaf_move_items (LEAF_FROM_L_TO_R, tb, B_NR_ITEMS(tb->L[0]), -1, 0);
  191 
  192                 /* right_delimiting_key is correct in R[0] */
  193                 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
  194 
  195                 reiserfs_invalidate_buffer (tb, tbS0);
  196                 reiserfs_invalidate_buffer (tb, tb->L[0]);
  197 
  198                 return -1;
  199             }
  200 
  201             RFALSE( tb->rnum[0] != 0, 
  202                     "PAP-12045: rnum must be 0 (%d)", tb->rnum[0]);
  203             /* all contents of L[0] and S[0] will be in L[0] */
  204             leaf_shift_left(tb, n, -1);
  205 
  206             reiserfs_invalidate_buffer (tb, tbS0);
  207 
  208             return 0;
  209         }
  210         /* a part of contents of S[0] will be in L[0] and the rest part of S[0] will be in R[0] */
  211 
  212         RFALSE( ( tb->lnum[0] + tb->rnum[0] < n ) || 
  213                 ( tb->lnum[0] + tb->rnum[0] > n+1 ),
  214                 "PAP-12050: rnum(%d) and lnum(%d) and item number(%d) in S[0] are not consistent",
  215                 tb->rnum[0], tb->lnum[0], n);
  216         RFALSE( ( tb->lnum[0] + tb->rnum[0] == n ) && 
  217                 (tb->lbytes != -1 || tb->rbytes != -1),
  218                 "PAP-12055: bad rbytes (%d)/lbytes (%d) parameters when items are not split", 
  219                 tb->rbytes, tb->lbytes);
  220         RFALSE( ( tb->lnum[0] + tb->rnum[0] == n + 1 ) && 
  221                 (tb->lbytes < 1 || tb->rbytes != -1),
  222                 "PAP-12060: bad rbytes (%d)/lbytes (%d) parameters when items are split", 
  223                 tb->rbytes, tb->lbytes);
  224 
  225         leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
  226         leaf_shift_right(tb, tb->rnum[0], tb->rbytes);
  227 
  228         reiserfs_invalidate_buffer (tb, tbS0);
  229 
  230         return 0;
  231     }
  232 
  233     if ( tb->rnum[0] == -1 ) {
  234         /* all contents of R[0] and S[0] will be in R[0] */
  235         leaf_shift_right(tb, n, -1);
  236         reiserfs_invalidate_buffer (tb, tbS0);
  237         return 0;
  238     }
  239 
  240     RFALSE( tb->rnum[0], 
  241             "PAP-12065: bad rnum parameter must be 0 (%d)", tb->rnum[0]);
  242     return 0;
  243 }
  244 
  245 
  246 static int balance_leaf (struct tree_balance * tb,
  247                          struct item_head * ih,         /* item header of inserted item (this is on little endian) */
  248                          const char * body,             /* body  of inserted item or bytes to paste */
  249                          int flag,                      /* i - insert, d - delete, c - cut, p - paste
  250                                                            (see comment to do_balance) */
  251                          struct item_head * insert_key,  /* in our processing of one level we sometimes determine what
  252                                                             must be inserted into the next higher level.  This insertion
  253                                                             consists of a key or two keys and their corresponding
  254                                                             pointers */
  255                          struct buffer_head ** insert_ptr /* inserted node-ptrs for the next level */
  256     )
  257 {
  258     struct buffer_head * tbS0 = PATH_PLAST_BUFFER (tb->tb_path);
  259     int item_pos = PATH_LAST_POSITION (tb->tb_path);    /*  index into the array of item headers in S[0] 
  260                                                             of the affected item */
  261     struct buffer_info bi;
  262     struct buffer_head *S_new[2];  /* new nodes allocated to hold what could not fit into S */
  263     int snum[2];            /* number of items that will be placed
  264                                into S_new (includes partially shifted
  265                                items) */
  266     int sbytes[2];          /* if an item is partially shifted into S_new then 
  267                                if it is a directory item 
  268                                it is the number of entries from the item that are shifted into S_new
  269                                else
  270                                it is the number of bytes from the item that are shifted into S_new
  271                             */
  272     int n, i;
  273     int ret_val;
  274     int pos_in_item;
  275     int zeros_num;
  276 
  277     PROC_INFO_INC( tb -> tb_sb, balance_at[ 0 ] );
  278 
  279     /* Make balance in case insert_size[0] < 0 */
  280     if ( tb->insert_size[0] < 0 )
  281         return balance_leaf_when_delete (tb, flag);
  282   
  283     zeros_num = 0;
  284     if (flag == M_INSERT && body == 0)
  285         zeros_num = ih_item_len( ih );
  286 
  287     pos_in_item = tb->tb_path->pos_in_item;
  288     /* for indirect item pos_in_item is measured in unformatted node
  289        pointers. Recalculate to bytes */
  290     if (flag != M_INSERT && is_indirect_le_ih (B_N_PITEM_HEAD (tbS0, item_pos)))
  291         pos_in_item *= UNFM_P_SIZE;
  292 
  293     if ( tb->lnum[0] > 0 ) {
  294         /* Shift lnum[0] items from S[0] to the left neighbor L[0] */
  295         if ( item_pos < tb->lnum[0] ) {
  296             /* new item or it part falls to L[0], shift it too */
  297             n = B_NR_ITEMS(tb->L[0]);
  298 
  299             switch (flag) {
  300             case M_INSERT:   /* insert item into L[0] */
  301 
  302                 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
  303                     /* part of new item falls into L[0] */
  304                     int new_item_len;
  305                     int version;
  306 
  307                     ret_val = leaf_shift_left (tb, tb->lnum[0]-1, -1);
  308 
  309                     /* Calculate item length to insert to S[0] */
  310                     new_item_len = ih_item_len(ih) - tb->lbytes;
  311                     /* Calculate and check item length to insert to L[0] */
  312                     put_ih_item_len(ih, ih_item_len(ih) - new_item_len );
  313 
  314                     RFALSE( ih_item_len(ih) <= 0,
  315                             "PAP-12080: there is nothing to insert into L[0]: ih_item_len=%d",
  316                             ih_item_len(ih));
  317 
  318                     /* Insert new item into L[0] */
  319                     bi.tb = tb;
  320                     bi.bi_bh = tb->L[0];
  321                     bi.bi_parent = tb->FL[0];
  322                     bi.bi_position = get_left_neighbor_position (tb, 0);
  323                     leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body,
  324                                           zeros_num > ih_item_len(ih) ? ih_item_len(ih) : zeros_num);
  325 
  326                     version = ih_version (ih);
  327 
  328                     /* Calculate key component, item length and body to insert into S[0] */
  329                     set_le_ih_k_offset( ih, le_ih_k_offset( ih ) + (tb->lbytes << (is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits-UNFM_P_SHIFT:0)) );
  330 
  331                     put_ih_item_len( ih, new_item_len );
  332                     if ( tb->lbytes >  zeros_num ) {
  333                         body += (tb->lbytes - zeros_num);
  334                         zeros_num = 0;
  335                     }
  336                     else
  337                         zeros_num -= tb->lbytes;
  338 
  339                     RFALSE( ih_item_len(ih) <= 0,
  340                         "PAP-12085: there is nothing to insert into S[0]: ih_item_len=%d",
  341                         ih_item_len(ih));
  342                 } else {
  343                     /* new item in whole falls into L[0] */
  344                     /* Shift lnum[0]-1 items to L[0] */
  345                     ret_val = leaf_shift_left(tb, tb->lnum[0]-1, tb->lbytes);
  346                     /* Insert new item into L[0] */
  347                     bi.tb = tb;
  348                     bi.bi_bh = tb->L[0];
  349                     bi.bi_parent = tb->FL[0];
  350                     bi.bi_position = get_left_neighbor_position (tb, 0);
  351                     leaf_insert_into_buf (&bi, n + item_pos - ret_val, ih, body, zeros_num);
  352                     tb->insert_size[0] = 0;
  353                     zeros_num = 0;
  354                 }
  355                 break;
  356 
  357             case M_PASTE:   /* append item in L[0] */
  358 
  359                 if ( item_pos == tb->lnum[0] - 1 && tb->lbytes != -1 ) {
  360                     /* we must shift the part of the appended item */
  361                     if ( is_direntry_le_ih (B_N_PITEM_HEAD (tbS0, item_pos))) {
  362 
  363                         RFALSE( zeros_num,
  364                                 "PAP-12090: illegal parameter in case of a directory");
  365                         /* directory item */
  366                         if ( tb->lbytes > pos_in_item ) {
  367                             /* new directory entry falls into L[0] */
  368                             struct item_head * pasted;
  369                             int l_pos_in_item = pos_in_item;
  370                                                           
  371                             /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 entries from given directory item */
  372                             ret_val = leaf_shift_left(tb, tb->lnum[0], tb->lbytes - 1);
  373                             if ( ret_val && ! item_pos ) {
  374                                 pasted =  B_N_PITEM_HEAD(tb->L[0],B_NR_ITEMS(tb->L[0])-1);
  375                                 l_pos_in_item += I_ENTRY_COUNT(pasted) - (tb->lbytes-1);
  376                             }
  377 
  378                             /* Append given directory entry to directory item */
  379                             bi.tb = tb;
  380                             bi.bi_bh = tb->L[0];
  381                             bi.bi_parent = tb->FL[0];
  382                             bi.bi_position = get_left_neighbor_position (tb, 0);
  383                             leaf_paste_in_buffer (&bi, n + item_pos - ret_val, l_pos_in_item,
  384                                                   tb->insert_size[0], body, zeros_num);
  385 
  386                             /* previous string prepared space for pasting new entry, following string pastes this entry */
  387 
  388                             /* when we have merge directory item, pos_in_item has been changed too */
  389 
  390                             /* paste new directory entry. 1 is entry number */
  391                             leaf_paste_entries (bi.bi_bh, n + item_pos - ret_val, l_pos_in_item, 1,
  392                                                 (struct reiserfs_de_head *)body, 
  393                                                 body + DEH_SIZE, tb->insert_size[0]
  394                                 );
  395                             tb->insert_size[0] = 0;
  396                         } else {
  397                             /* new directory item doesn't fall into L[0] */
  398                             /* Shift lnum[0]-1 items in whole. Shift lbytes directory entries from directory item number lnum[0] */
  399                             leaf_shift_left (tb, tb->lnum[0], tb->lbytes);
  400                         }
  401                         /* Calculate new position to append in item body */
  402                         pos_in_item -= tb->lbytes;
  403                     }
  404                     else {
  405                         /* regular object */
  406                         RFALSE( tb->lbytes <= 0,
  407                                 "PAP-12095: there is nothing to shift to L[0]. lbytes=%d",
  408                                 tb->lbytes);
  409                         RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0, item_pos)),
  410                                 "PAP-12100: incorrect position to paste: item_len=%d, pos_in_item=%d",
  411                                 ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)), pos_in_item);
  412 
  413                         if ( tb->lbytes >= pos_in_item ) {
  414                             /* appended item will be in L[0] in whole */
  415                             int l_n;
  416 
  417                             /* this bytes number must be appended to the last item of L[h] */
  418                             l_n = tb->lbytes - pos_in_item;
  419 
  420                             /* Calculate new insert_size[0] */
  421                             tb->insert_size[0] -= l_n;
  422 
  423                             RFALSE( tb->insert_size[0] <= 0,
  424                                     "PAP-12105: there is nothing to paste into L[0]. insert_size=%d",
  425                                     tb->insert_size[0]);
  426                             ret_val =  leaf_shift_left(tb,tb->lnum[0], 
  427                                                        ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)));
  428                             /* Append to body of item in L[0] */
  429                             bi.tb = tb;
  430                             bi.bi_bh = tb->L[0];
  431                             bi.bi_parent = tb->FL[0];
  432                             bi.bi_position = get_left_neighbor_position (tb, 0);
  433                             leaf_paste_in_buffer(
  434                                 &bi,n + item_pos - ret_val,
  435                                 ih_item_len( B_N_PITEM_HEAD(tb->L[0],n+item_pos-ret_val)),
  436                                 l_n,body, zeros_num > l_n ? l_n : zeros_num
  437                                 );
  438                             /* 0-th item in S0 can be only of DIRECT type when l_n != 0*/
  439                             {
  440                                 int version;
  441                                 int temp_l = l_n;
  442                                 
  443                                 RFALSE (ih_item_len (B_N_PITEM_HEAD (tbS0, 0)),
  444                                         "PAP-12106: item length must be 0");
  445                                 RFALSE (comp_short_le_keys (B_N_PKEY (tbS0, 0),
  446                                                             B_N_PKEY (tb->L[0],
  447                                                                             n + item_pos - ret_val)),
  448                                         "PAP-12107: items must be of the same file");
  449                                 if (is_indirect_le_ih(B_N_PITEM_HEAD (tb->L[0],
  450                                                                       n + item_pos - ret_val))) {
  451                                     temp_l = l_n << (tb->tb_sb->s_blocksize_bits - UNFM_P_SHIFT);
  452                                 }
  453                                 /* update key of first item in S0 */
  454                                 version = ih_version (B_N_PITEM_HEAD (tbS0, 0));
  455                                 set_le_key_k_offset (version, B_N_PKEY (tbS0, 0), 
  456                                                      le_key_k_offset (version, B_N_PKEY (tbS0, 0)) + temp_l);
  457                                 /* update left delimiting key */
  458                                 set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]),
  459                                                      le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0])) + temp_l);
  460                             }
  461 
  462                             /* Calculate new body, position in item and insert_size[0] */
  463                             if ( l_n > zeros_num ) {
  464                                 body += (l_n - zeros_num);
  465                                 zeros_num = 0;
  466                             }
  467                             else
  468                                 zeros_num -= l_n;
  469                             pos_in_item = 0;    
  470 
  471                             RFALSE( comp_short_le_keys 
  472                                     (B_N_PKEY(tbS0,0),
  473                                      B_N_PKEY(tb->L[0],B_NR_ITEMS(tb->L[0])-1)) ||
  474                                 
  475                                     !op_is_left_mergeable 
  476                                     (B_N_PKEY (tbS0, 0), tbS0->b_size) ||
  477                                     !op_is_left_mergeable
  478                                     (B_N_PDELIM_KEY(tb->CFL[0],tb->lkey[0]), 
  479                                      tbS0->b_size),
  480                                     "PAP-12120: item must be merge-able with left neighboring item");
  481                         }
  482                         else /* only part of the appended item will be in L[0] */
  483                         {
  484                             /* Calculate position in item for append in S[0] */
  485                             pos_in_item -= tb->lbytes;
  486 
  487                             RFALSE( pos_in_item <= 0,
  488                                     "PAP-12125: no place for paste. pos_in_item=%d", pos_in_item);
  489 
  490                             /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
  491                             leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
  492                         }
  493                     }
  494                 }
  495                 else /* appended item will be in L[0] in whole */
  496                 {
  497                     struct item_head * pasted;
  498 
  499                         if ( ! item_pos  && op_is_left_mergeable (B_N_PKEY (tbS0, 0), tbS0->b_size) )
  500                         { /* if we paste into first item of S[0] and it is left mergable */
  501                             /* then increment pos_in_item by the size of the last item in L[0] */
  502                             pasted = B_N_PITEM_HEAD(tb->L[0],n-1);
  503                             if ( is_direntry_le_ih (pasted) )
  504                                 pos_in_item += ih_entry_count(pasted);
  505                             else
  506                                 pos_in_item += ih_item_len(pasted);
  507                         }
  508 
  509                     /* Shift lnum[0] - 1 items in whole. Shift lbytes - 1 byte from item number lnum[0] */
  510                     ret_val = leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
  511                     /* Append to body of item in L[0] */
  512                     bi.tb = tb;
  513                     bi.bi_bh = tb->L[0];
  514                     bi.bi_parent = tb->FL[0];
  515                     bi.bi_position = get_left_neighbor_position (tb, 0);
  516                     leaf_paste_in_buffer (&bi, n + item_pos - ret_val, pos_in_item, tb->insert_size[0],
  517                                           body, zeros_num);
  518 
  519                     /* if appended item is directory, paste entry */
  520                     pasted = B_N_PITEM_HEAD (tb->L[0], n + item_pos - ret_val);
  521                     if (is_direntry_le_ih (pasted))
  522                         leaf_paste_entries (
  523                             bi.bi_bh, n + item_pos - ret_val, pos_in_item, 1, 
  524                             (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
  525                             );
  526                     /* if appended item is indirect item, put unformatted node into un list */
  527                     if (is_indirect_le_ih (pasted))
  528                         set_ih_free_space (pasted, 0);
  529                     tb->insert_size[0] = 0;
  530                     zeros_num = 0;
  531                 }
  532                 break;
  533             default:    /* cases d and t */
  534                 reiserfs_panic (tb->tb_sb, "PAP-12130: balance_leaf: lnum > 0: unexpectable mode: %s(%d)",
  535                                 (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
  536             }
  537         } else { 
  538             /* new item doesn't fall into L[0] */
  539             leaf_shift_left(tb,tb->lnum[0],tb->lbytes);
  540         }
  541     }   /* tb->lnum[0] > 0 */
  542 
  543     /* Calculate new item position */
  544     item_pos -= ( tb->lnum[0] - (( tb->lbytes != -1 ) ? 1 : 0));
  545 
  546     if ( tb->rnum[0] > 0 ) {
  547         /* shift rnum[0] items from S[0] to the right neighbor R[0] */
  548         n = B_NR_ITEMS(tbS0);
  549         switch ( flag ) {
  550 
  551         case M_INSERT:   /* insert item */
  552             if ( n - tb->rnum[0] < item_pos )
  553             { /* new item or its part falls to R[0] */
  554                 if ( item_pos == n - tb->rnum[0] + 1 && tb->rbytes != -1 )
  555                 { /* part of new item falls into R[0] */
  556                     loff_t old_key_comp, old_len, r_zeros_number;
  557                     const char * r_body;
  558                     int version;
  559                     loff_t offset;
  560 
  561                     leaf_shift_right(tb,tb->rnum[0]-1,-1);
  562 
  563                     version = ih_version(ih);
  564                     /* Remember key component and item length */
  565                     old_key_comp = le_ih_k_offset( ih );
  566                     old_len = ih_item_len(ih);
  567 
  568                     /* Calculate key component and item length to insert into R[0] */
  569                     offset = le_ih_k_offset( ih ) + ((old_len - tb->rbytes )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits-UNFM_P_SHIFT:0));
  570                     set_le_ih_k_offset( ih, offset );
  571                     put_ih_item_len( ih, tb->rbytes);
  572                     /* Insert part of the item into R[0] */
  573                     bi.tb = tb;
  574                     bi.bi_bh = tb->R[0];
  575                     bi.bi_parent = tb->FR[0];
  576                     bi.bi_position = get_right_neighbor_position (tb, 0);
  577                     if ( (old_len - tb->rbytes) > zeros_num ) {
  578                         r_zeros_number = 0;
  579                         r_body = body + (old_len - tb->rbytes) - zeros_num;
  580                     }
  581                     else {
  582                         r_body = body;
  583                         r_zeros_number = zeros_num - (old_len - tb->rbytes);
  584                         zeros_num -= r_zeros_number;
  585                     }
  586 
  587                     leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
  588 
  589                     /* Replace right delimiting key by first key in R[0] */
  590                     replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
  591 
  592                     /* Calculate key component and item length to insert into S[0] */
  593                     set_le_ih_k_offset( ih, old_key_comp );
  594                     put_ih_item_len( ih, old_len - tb->rbytes );
  595 
  596                     tb->insert_size[0] -= tb->rbytes;
  597 
  598                 }
  599                 else /* whole new item falls into R[0] */
  600                 {                                         
  601                     /* Shift rnum[0]-1 items to R[0] */
  602                     ret_val = leaf_shift_right(tb,tb->rnum[0]-1,tb->rbytes);
  603                     /* Insert new item into R[0] */
  604                     bi.tb = tb;
  605                     bi.bi_bh = tb->R[0];
  606                     bi.bi_parent = tb->FR[0];
  607                     bi.bi_position = get_right_neighbor_position (tb, 0);
  608                     leaf_insert_into_buf (&bi, item_pos - n + tb->rnum[0] - 1, ih, body, zeros_num);
  609 
  610                     if ( item_pos - n + tb->rnum[0] - 1 == 0 ) {
  611                         replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
  612 
  613                     }
  614                     zeros_num = tb->insert_size[0] = 0;
  615                 }
  616             }
  617             else /* new item or part of it doesn't fall into R[0] */
  618             {
  619                 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
  620             }
  621             break;
  622 
  623         case M_PASTE:   /* append item */
  624 
  625             if ( n - tb->rnum[0] <= item_pos )  /* pasted item or part of it falls to R[0] */
  626             {
  627                 if ( item_pos == n - tb->rnum[0] && tb->rbytes != -1 )
  628                 { /* we must shift the part of the appended item */
  629                     if ( is_direntry_le_ih (B_N_PITEM_HEAD(tbS0, item_pos)))
  630                     { /* we append to directory item */
  631                         int entry_count;
  632 
  633                         RFALSE( zeros_num,
  634                                 "PAP-12145: illegal parametr in case of a directory");
  635                         entry_count = I_ENTRY_COUNT(B_N_PITEM_HEAD(tbS0, item_pos));
  636                         if ( entry_count - tb->rbytes < pos_in_item )
  637                             /* new directory entry falls into R[0] */
  638                         {
  639                             int paste_entry_position;
  640 
  641                             RFALSE( tb->rbytes - 1 >= entry_count || 
  642                                     ! tb->insert_size[0],
  643                                     "PAP-12150: no enough of entries to shift to R[0]: rbytes=%d, entry_count=%d",
  644                                     tb->rbytes, entry_count);
  645                             /* Shift rnum[0]-1 items in whole. Shift rbytes-1 directory entries from directory item number rnum[0] */
  646                             leaf_shift_right(tb,tb->rnum[0],tb->rbytes - 1);
  647                             /* Paste given directory entry to directory item */
  648                             paste_entry_position = pos_in_item - entry_count + tb->rbytes - 1;
  649                             bi.tb = tb;
  650                             bi.bi_bh = tb->R[0];
  651                             bi.bi_parent = tb->FR[0];
  652                             bi.bi_position = get_right_neighbor_position (tb, 0);
  653                             leaf_paste_in_buffer (&bi, 0, paste_entry_position,
  654                                                   tb->insert_size[0],body,zeros_num);
  655                             /* paste entry */
  656                             leaf_paste_entries (
  657                                 bi.bi_bh, 0, paste_entry_position, 1, (struct reiserfs_de_head *)body, 
  658                                 body + DEH_SIZE, tb->insert_size[0]
  659                                 );                                                              
  660                                                 
  661                             if ( paste_entry_position == 0 ) {
  662                                 /* change delimiting keys */
  663                                 replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
  664                             }
  665 
  666                             tb->insert_size[0] = 0;
  667                             pos_in_item++;
  668                         }
  669                         else /* new directory entry doesn't fall into R[0] */
  670                         {
  671                             leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
  672                         }
  673                     }
  674                     else /* regular object */
  675                     {
  676                         int n_shift, n_rem, r_zeros_number;
  677                         const char * r_body;
  678 
  679                         /* Calculate number of bytes which must be shifted from appended item */
  680                         if ( (n_shift = tb->rbytes - tb->insert_size[0]) < 0 )
  681                             n_shift = 0;
  682 
  683                         RFALSE(pos_in_item != ih_item_len(B_N_PITEM_HEAD (tbS0, item_pos)),
  684                                "PAP-12155: invalid position to paste. ih_item_len=%d, pos_in_item=%d",
  685                                pos_in_item, ih_item_len( B_N_PITEM_HEAD(tbS0,item_pos)));
  686 
  687                         leaf_shift_right(tb,tb->rnum[0],n_shift);
  688                         /* Calculate number of bytes which must remain in body after appending to R[0] */
  689                         if ( (n_rem = tb->insert_size[0] - tb->rbytes) < 0 )
  690                             n_rem = 0;
  691                         
  692                         {
  693                           int version;
  694                           unsigned long temp_rem = n_rem;
  695                           
  696                           version = ih_version (B_N_PITEM_HEAD (tb->R[0],0));
  697                           if (is_indirect_le_key(version,B_N_PKEY(tb->R[0],0))){
  698                               temp_rem = n_rem << (tb->tb_sb->s_blocksize_bits -
  699                                          UNFM_P_SHIFT);
  700                           }
  701                           set_le_key_k_offset (version, B_N_PKEY(tb->R[0],0), 
  702                                                le_key_k_offset (version, B_N_PKEY(tb->R[0],0)) + temp_rem);
  703                           set_le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0]), 
  704                                                le_key_k_offset (version, B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) + temp_rem);
  705                         }
  706 /*                k_offset (B_N_PKEY(tb->R[0],0)) += n_rem;
  707                   k_offset (B_N_PDELIM_KEY(tb->CFR[0],tb->rkey[0])) += n_rem;*/
  708                         do_balance_mark_internal_dirty (tb, tb->CFR[0], 0);
  709 
  710                         /* Append part of body into R[0] */
  711                         bi.tb = tb;
  712                         bi.bi_bh = tb->R[0];
  713                         bi.bi_parent = tb->FR[0];
  714                         bi.bi_position = get_right_neighbor_position (tb, 0);
  715                         if ( n_rem > zeros_num ) {
  716                             r_zeros_number = 0;
  717                             r_body = body + n_rem - zeros_num;
  718                         }
  719                         else {
  720                             r_body = body;
  721                             r_zeros_number = zeros_num - n_rem;
  722                             zeros_num -= r_zeros_number;
  723                         }
  724 
  725                         leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0] - n_rem, r_body, r_zeros_number);
  726 
  727                         if (is_indirect_le_ih (B_N_PITEM_HEAD(tb->R[0],0))) {
  728 #if 0
  729                             RFALSE( n_rem,
  730                                     "PAP-12160: paste more than one unformatted node pointer");
  731 #endif
  732                             set_ih_free_space (B_N_PITEM_HEAD(tb->R[0],0), 0);
  733                         }
  734                         tb->insert_size[0] = n_rem;
  735                         if ( ! n_rem )
  736                             pos_in_item ++;
  737                     }
  738                 }
  739                 else /* pasted item in whole falls into R[0] */
  740                 {
  741                     struct item_head * pasted;
  742 
  743                     ret_val = leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
  744                     /* append item in R[0] */
  745                     if ( pos_in_item >= 0 ) {
  746                         bi.tb = tb;
  747                         bi.bi_bh = tb->R[0];
  748                         bi.bi_parent = tb->FR[0];
  749                         bi.bi_position = get_right_neighbor_position (tb, 0);
  750                         leaf_paste_in_buffer(&bi,item_pos - n + tb->rnum[0], pos_in_item,
  751                                              tb->insert_size[0],body, zeros_num);
  752                     }
  753 
  754                     /* paste new entry, if item is directory item */
  755                     pasted = B_N_PITEM_HEAD(tb->R[0], item_pos - n + tb->rnum[0]);
  756                     if (is_direntry_le_ih (pasted) && pos_in_item >= 0 ) {
  757                         leaf_paste_entries (
  758                             bi.bi_bh, item_pos - n + tb->rnum[0], pos_in_item, 1, 
  759                             (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
  760                             );
  761                         if ( ! pos_in_item ) {
  762 
  763                             RFALSE( item_pos - n + tb->rnum[0],
  764                                     "PAP-12165: directory item must be first item of node when pasting is in 0th position");
  765 
  766                             /* update delimiting keys */
  767                             replace_key(tb, tb->CFR[0],tb->rkey[0],tb->R[0],0);
  768                         }
  769                     }
  770 
  771                     if (is_indirect_le_ih (pasted))
  772                         set_ih_free_space (pasted, 0);
  773                     zeros_num = tb->insert_size[0] = 0;
  774                 }
  775             }
  776             else /* new item doesn't fall into R[0] */
  777             {
  778                 leaf_shift_right(tb,tb->rnum[0],tb->rbytes);
  779             }
  780             break;
  781         default:    /* cases d and t */
  782             reiserfs_panic (tb->tb_sb, "PAP-12175: balance_leaf: rnum > 0: unexpectable mode: %s(%d)",
  783                             (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
  784         }
  785     
  786     }   /* tb->rnum[0] > 0 */
  787 
  788 
  789     RFALSE( tb->blknum[0] > 3,
  790             "PAP-12180: blknum can not be %d. It must be <= 3",  tb->blknum[0]);
  791     RFALSE( tb->blknum[0] < 0,
  792             "PAP-12185: blknum can not be %d. It must be >= 0",  tb->blknum[0]);
  793 
  794     /* if while adding to a node we discover that it is possible to split
  795        it in two, and merge the left part into the left neighbor and the
  796        right part into the right neighbor, eliminating the node */
  797     if ( tb->blknum[0] == 0 ) { /* node S[0] is empty now */
  798 
  799         RFALSE( ! tb->lnum[0] || ! tb->rnum[0],
  800                 "PAP-12190: lnum and rnum must not be zero");
  801         /* if insertion was done before 0-th position in R[0], right
  802            delimiting key of the tb->L[0]'s and left delimiting key are
  803            not set correctly */
  804         if (tb->CFL[0]) {
  805             if (!tb->CFR[0])
  806                 reiserfs_panic (tb->tb_sb, "vs-12195: balance_leaf: CFR not initialized");
  807             copy_key (B_N_PDELIM_KEY (tb->CFL[0], tb->lkey[0]), B_N_PDELIM_KEY (tb->CFR[0], tb->rkey[0]));
  808             do_balance_mark_internal_dirty (tb, tb->CFL[0], 0);
  809         }
  810 
  811         reiserfs_invalidate_buffer(tb,tbS0);                                                                    
  812         return 0;
  813     }
  814 
  815 
  816     /* Fill new nodes that appear in place of S[0] */
  817 
  818     /* I am told that this copying is because we need an array to enable
  819        the looping code. -Hans */
  820     snum[0] = tb->s1num,
  821         snum[1] = tb->s2num;
  822     sbytes[0] = tb->s1bytes;
  823     sbytes[1] = tb->s2bytes;
  824     for( i = tb->blknum[0] - 2; i >= 0; i-- ) {
  825 
  826         RFALSE( !snum[i], "PAP-12200: snum[%d] == %d. Must be > 0", i, snum[i]);
  827 
  828         /* here we shift from S to S_new nodes */
  829 
  830         S_new[i] = get_FEB(tb);
  831 
  832         /* initialized block type and tree level */
  833         set_blkh_level( B_BLK_HEAD(S_new[i]), DISK_LEAF_NODE_LEVEL );
  834 
  835 
  836         n = B_NR_ITEMS(tbS0);
  837         
  838         switch (flag) {
  839         case M_INSERT:   /* insert item */
  840 
  841             if ( n - snum[i] < item_pos )
  842             { /* new item or it's part falls to first new node S_new[i]*/
  843                 if ( item_pos == n - snum[i] + 1 && sbytes[i] != -1 )
  844                 { /* part of new item falls into S_new[i] */
  845                     int old_key_comp, old_len, r_zeros_number;
  846                     const char * r_body;
  847                     int version;
  848 
  849                     /* Move snum[i]-1 items from S[0] to S_new[i] */
  850                     leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, -1, S_new[i]);
  851                     /* Remember key component and item length */
  852                     version = ih_version (ih);
  853                     old_key_comp = le_ih_k_offset( ih );
  854                     old_len = ih_item_len(ih);
  855 
  856                     /* Calculate key component and item length to insert into S_new[i] */
  857                     set_le_ih_k_offset( ih,
  858                                 le_ih_k_offset(ih) + ((old_len - sbytes[i] )<<(is_indirect_le_ih(ih)?tb->tb_sb->s_blocksize_bits-UNFM_P_SHIFT:0) ));
  859 
  860                     put_ih_item_len( ih, sbytes[i] );
  861 
  862                     /* Insert part of the item into S_new[i] before 0-th item */
  863                     bi.tb = tb;
  864                     bi.bi_bh = S_new[i];
  865                     bi.bi_parent = 0;
  866                     bi.bi_position = 0;
  867 
  868                     if ( (old_len - sbytes[i]) > zeros_num ) {
  869                         r_zeros_number = 0;
  870                         r_body = body + (old_len - sbytes[i]) - zeros_num;
  871                     }
  872                     else {
  873                         r_body = body;
  874                         r_zeros_number = zeros_num - (old_len - sbytes[i]);
  875                         zeros_num -= r_zeros_number;
  876                     }
  877 
  878                     leaf_insert_into_buf (&bi, 0, ih, r_body, r_zeros_number);
  879 
  880                     /* Calculate key component and item length to insert into S[i] */
  881                     set_le_ih_k_offset( ih, old_key_comp );
  882                     put_ih_item_len( ih, old_len - sbytes[i] );
  883                     tb->insert_size[0] -= sbytes[i];
  884                 }
  885                 else /* whole new item falls into S_new[i] */
  886                 {
  887                     /* Shift snum[0] - 1 items to S_new[i] (sbytes[i] of split item) */
  888                     leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i] - 1, sbytes[i], S_new[i]);
  889 
  890                     /* Insert new item into S_new[i] */
  891                     bi.tb = tb;
  892                     bi.bi_bh = S_new[i];
  893                     bi.bi_parent = 0;
  894                     bi.bi_position = 0;
  895                     leaf_insert_into_buf (&bi, item_pos - n + snum[i] - 1, ih, body, zeros_num);
  896 
  897                     zeros_num = tb->insert_size[0] = 0;
  898                 }
  899             }
  900 
  901             else /* new item or it part don't falls into S_new[i] */
  902             {
  903                 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
  904             }
  905             break;
  906 
  907         case M_PASTE:   /* append item */
  908 
  909             if ( n - snum[i] <= item_pos )  /* pasted item or part if it falls to S_new[i] */
  910             {
  911                 if ( item_pos == n - snum[i] && sbytes[i] != -1 )
  912                 { /* we must shift part of the appended item */
  913                     struct item_head * aux_ih;
  914 
  915                     RFALSE( ih, "PAP-12210: ih must be 0");
  916 
  917                     if ( is_direntry_le_ih (aux_ih = B_N_PITEM_HEAD(tbS0,item_pos))) {
  918                         /* we append to directory item */
  919 
  920                         int entry_count;
  921                 
  922                         entry_count = ih_entry_count(aux_ih);
  923 
  924                         if ( entry_count - sbytes[i] < pos_in_item  && pos_in_item <= entry_count ) {
  925                             /* new directory entry falls into S_new[i] */
  926                   
  927                             RFALSE( ! tb->insert_size[0],
  928                                     "PAP-12215: insert_size is already 0");
  929                             RFALSE( sbytes[i] - 1 >= entry_count,
  930                                     "PAP-12220: there are no so much entries (%d), only %d",
  931                                     sbytes[i] - 1, entry_count);
  932 
  933                             /* Shift snum[i]-1 items in whole. Shift sbytes[i] directory entries from directory item number snum[i] */
  934                             leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i]-1, S_new[i]);
  935                             /* Paste given directory entry to directory item */
  936                             bi.tb = tb;
  937                             bi.bi_bh = S_new[i];
  938                             bi.bi_parent = 0;
  939                             bi.bi_position = 0;
  940                             leaf_paste_in_buffer (&bi, 0, pos_in_item - entry_count + sbytes[i] - 1,
  941                                                   tb->insert_size[0], body,zeros_num);
  942                             /* paste new directory entry */
  943                             leaf_paste_entries (
  944                                 bi.bi_bh, 0, pos_in_item - entry_count + sbytes[i] - 1,
  945                                 1, (struct reiserfs_de_head *)body, body + DEH_SIZE,
  946                                 tb->insert_size[0]
  947                                 );
  948                             tb->insert_size[0] = 0;
  949                             pos_in_item++;
  950                         } else { /* new directory entry doesn't fall into S_new[i] */
  951                             leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
  952                         }
  953                     }
  954                     else /* regular object */
  955                     {
  956                         int n_shift, n_rem, r_zeros_number;
  957                         const char * r_body;
  958 
  959                         RFALSE( pos_in_item != ih_item_len(B_N_PITEM_HEAD(tbS0,item_pos)) ||
  960                                 tb->insert_size[0] <= 0,
  961                                 "PAP-12225: item too short or insert_size <= 0");
  962 
  963                         /* Calculate number of bytes which must be shifted from appended item */
  964                         n_shift = sbytes[i] - tb->insert_size[0];
  965                         if ( n_shift < 0 )
  966                             n_shift = 0;
  967                         leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], n_shift, S_new[i]);
  968 
  969                         /* Calculate number of bytes which must remain in body after append to S_new[i] */
  970                         n_rem = tb->insert_size[0] - sbytes[i];
  971                         if ( n_rem < 0 )
  972                             n_rem = 0;
  973                         /* Append part of body into S_new[0] */
  974                         bi.tb = tb;
  975                         bi.bi_bh = S_new[i];
  976                         bi.bi_parent = 0;
  977                         bi.bi_position = 0;
  978 
  979                         if ( n_rem > zeros_num ) {
  980                             r_zeros_number = 0;
  981                             r_body = body + n_rem - zeros_num;
  982                         }
  983                         else {
  984                             r_body = body;
  985                             r_zeros_number = zeros_num - n_rem;
  986                             zeros_num -= r_zeros_number;
  987                         }
  988 
  989                         leaf_paste_in_buffer(&bi, 0, n_shift, tb->insert_size[0]-n_rem, r_body,r_zeros_number);
  990                         {
  991                             struct item_head * tmp;
  992 
  993                             tmp = B_N_PITEM_HEAD(S_new[i],0);
  994                             if (is_indirect_le_ih (tmp)) {
  995                                 set_ih_free_space (tmp, 0);
  996                                 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + 
  997                                                     (n_rem << (tb->tb_sb->s_blocksize_bits -
  998                                                      UNFM_P_SHIFT)));
  999                             } else {
 1000                                 set_le_ih_k_offset( tmp, le_ih_k_offset(tmp) + 
 1001                                                     n_rem );
 1002                             }
 1003                         }
 1004 
 1005                         tb->insert_size[0] = n_rem;
 1006                         if ( ! n_rem )
 1007                             pos_in_item++;
 1008                     }
 1009                 }
 1010                 else
 1011                     /* item falls wholly into S_new[i] */
 1012                 {
 1013                     int ret_val;
 1014                     struct item_head * pasted;
 1015 
 1016 #ifdef CONFIG_REISERFS_CHECK
 1017                     struct item_head * ih = B_N_PITEM_HEAD(tbS0,item_pos);
 1018 
 1019                     if ( ! is_direntry_le_ih(ih) && (pos_in_item != ih_item_len(ih) ||
 1020                                                      tb->insert_size[0] <= 0) )
 1021                         reiserfs_panic (tb->tb_sb, "PAP-12235: balance_leaf: pos_in_item must be equal to ih_item_len");
 1022 #endif /* CONFIG_REISERFS_CHECK */
 1023 
 1024                     ret_val = leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
 1025 
 1026                     RFALSE( ret_val,
 1027                             "PAP-12240: unexpected value returned by leaf_move_items (%d)",
 1028                             ret_val);
 1029 
 1030                     /* paste into item */
 1031                     bi.tb = tb;
 1032                     bi.bi_bh = S_new[i];
 1033                     bi.bi_parent = 0;
 1034                     bi.bi_position = 0;
 1035                     leaf_paste_in_buffer(&bi, item_pos - n + snum[i], pos_in_item, tb->insert_size[0], body, zeros_num);
 1036 
 1037                     pasted = B_N_PITEM_HEAD(S_new[i], item_pos - n + snum[i]);
 1038                     if (is_direntry_le_ih (pasted))
 1039                     {
 1040                         leaf_paste_entries (
 1041                             bi.bi_bh, item_pos - n + snum[i], pos_in_item, 1, 
 1042                             (struct reiserfs_de_head *)body, body + DEH_SIZE, tb->insert_size[0]
 1043                             );
 1044                     }
 1045 
 1046                     /* if we paste to indirect item update ih_free_space */
 1047                     if (is_indirect_le_ih (pasted))
 1048                         set_ih_free_space (pasted, 0);
 1049                     zeros_num = tb->insert_size[0] = 0;
 1050                 }
 1051             }
 1052 
 1053             else /* pasted item doesn't fall into S_new[i] */
 1054             {
 1055                 leaf_move_items (LEAF_FROM_S_TO_SNEW, tb, snum[i], sbytes[i], S_new[i]);
 1056             }
 1057             break;
 1058         default:    /* cases d and t */
 1059             reiserfs_panic (tb->tb_sb, "PAP-12245: balance_leaf: blknum > 2: unexpectable mode: %s(%d)",
 1060                             (flag == M_DELETE) ? "DELETE" : ((flag == M_CUT) ? "CUT" : "UNKNOWN"), flag);
 1061         }
 1062 
 1063         memcpy (insert_key + i,B_N_PKEY(S_new[i],0),KEY_SIZE);
 1064         insert_ptr[i] = S_new[i];
 1065 
 1066         RFALSE( (atomic_read (&(S_new[i]->b_count)) != 1) &&
 1067                 (atomic_read(&(S_new[i]->b_count)) != 2 ||
 1068                  !(buffer_journaled(S_new[i]) || 
 1069                    buffer_journal_dirty(S_new[i]))), 
 1070                 "PAP-12247: S_new[%d] : (%b)\n", i, S_new[i]);
 1071 
 1072 
 1073     }
 1074 
 1075     /* if the affected item was not wholly shifted then we perform all necessary operations on that part or whole of the
 1076        affected item which remains in S */
 1077     if ( 0 <= item_pos && item_pos < tb->s0num )
 1078     { /* if we must insert or append into buffer S[0] */
 1079 
 1080         switch (flag)
 1081         {
 1082         case M_INSERT:   /* insert item into S[0] */
 1083             bi.tb = tb;
 1084             bi.bi_bh = tbS0;
 1085             bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
 1086             bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
 1087             leaf_insert_into_buf (&bi, item_pos, ih, body, zeros_num);
 1088 
 1089             /* If we insert the first key change the delimiting key */
 1090             if( item_pos == 0 ) {
 1091                 if (tb->CFL[0]) /* can be 0 in reiserfsck */
 1092                     replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
 1093 
 1094             }
 1095             break;
 1096 
 1097         case M_PASTE: {  /* append item in S[0] */
 1098             struct item_head * pasted;
 1099 
 1100             pasted = B_N_PITEM_HEAD (tbS0, item_pos);
 1101             /* when directory, may be new entry already pasted */
 1102             if (is_direntry_le_ih (pasted)) {
 1103                 if ( pos_in_item >= 0 &&
 1104                     pos_in_item <= ih_entry_count(pasted) ) {
 1105 
 1106                     RFALSE( ! tb->insert_size[0], 
 1107                             "PAP-12260: insert_size is 0 already");
 1108 
 1109                     /* prepare space */
 1110                     bi.tb = tb;
 1111                     bi.bi_bh = tbS0;
 1112                     bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
 1113                     bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
 1114                     leaf_paste_in_buffer(&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
 1115 
 1116                     /* paste entry */
 1117                     leaf_paste_entries (
 1118                         bi.bi_bh, item_pos, pos_in_item, 1, (struct reiserfs_de_head *)body,
 1119                         body + DEH_SIZE, tb->insert_size[0]
 1120                         );
 1121                     if ( ! item_pos && ! pos_in_item ) {
 1122                         RFALSE( !tb->CFL[0] || !tb->L[0], 
 1123                                 "PAP-12270: CFL[0]/L[0] must be specified");
 1124                         if (tb->CFL[0]) {
 1125                             replace_key(tb, tb->CFL[0], tb->lkey[0],tbS0,0);
 1126 
 1127                         }
 1128                     }
 1129                     tb->insert_size[0] = 0;
 1130                 }
 1131             } else { /* regular object */
 1132                 if ( pos_in_item == ih_item_len(pasted) ) {
 1133 
 1134                     RFALSE( tb->insert_size[0] <= 0,
 1135                             "PAP-12275: insert size must not be %d",
 1136                             tb->insert_size[0]);
 1137                     bi.tb = tb;
 1138                     bi.bi_bh = tbS0;
 1139                     bi.bi_parent = PATH_H_PPARENT (tb->tb_path, 0);
 1140                     bi.bi_position = PATH_H_POSITION (tb->tb_path, 1);
 1141                     leaf_paste_in_buffer (&bi, item_pos, pos_in_item, tb->insert_size[0], body, zeros_num);
 1142 
 1143                     if (is_indirect_le_ih (pasted)) {
 1144 #if 0
 1145                         RFALSE( tb->insert_size[0] != UNFM_P_SIZE,
 1146                                 "PAP-12280: insert_size for indirect item must be %d, not %d",
 1147                                 UNFM_P_SIZE, tb->insert_size[0]);
 1148 #endif
 1149                         set_ih_free_space (pasted, 0);
 1150                     }
 1151                     tb->insert_size[0] = 0;
 1152                 }
 1153 
 1154 #ifdef CONFIG_REISERFS_CHECK
 1155                 else {
 1156                     if ( tb->insert_size[0] ) {
 1157                         print_cur_tb ("12285");
 1158                         reiserfs_panic (tb->tb_sb, "PAP-12285: balance_leaf: insert_size must be 0 (%d)", tb->insert_size[0]);
 1159                     }
 1160                 }
 1161 #endif /* CONFIG_REISERFS_CHECK */
 1162             
 1163             }
 1164         } /* case M_PASTE: */
 1165         }
 1166     }
 1167 
 1168 #ifdef CONFIG_REISERFS_CHECK
 1169     if ( flag == M_PASTE && tb->insert_size[0] ) {
 1170         print_cur_tb ("12290");
 1171         reiserfs_panic (tb->tb_sb, "PAP-12290: balance_leaf: insert_size is still not 0 (%d)", tb->insert_size[0]);
 1172     }
 1173 #endif /* CONFIG_REISERFS_CHECK */
 1174 
 1175     return 0;
 1176 } /* Leaf level of the tree is balanced (end of balance_leaf) */
 1177 
 1178 
 1179 
 1180 /* Make empty node */
 1181 void make_empty_node (struct buffer_info * bi)
 1182 {
 1183     struct block_head * blkh;
 1184 
 1185     RFALSE( bi->bi_bh == NULL, "PAP-12295: pointer to the buffer is NULL");
 1186 
 1187     blkh = B_BLK_HEAD(bi->bi_bh);
 1188     set_blkh_nr_item( blkh, 0 );
 1189     set_blkh_free_space( blkh, MAX_CHILD_SIZE(bi->bi_bh) );
 1190 
 1191     if (bi->bi_parent)
 1192         B_N_CHILD (bi->bi_parent, bi->bi_position)->dc_size = 0; /* Endian safe if 0 */
 1193 }
 1194 
 1195 
 1196 /* Get first empty buffer */
 1197 struct buffer_head * get_FEB (struct tree_balance * tb)
 1198 {
 1199     int i;
 1200     struct buffer_head * first_b;
 1201     struct buffer_info bi;
 1202 
 1203     for (i = 0; i < MAX_FEB_SIZE; i ++)
 1204         if (tb->FEB[i] != 0)
 1205             break;
 1206 
 1207     if (i == MAX_FEB_SIZE)
 1208         reiserfs_panic(tb->tb_sb, "vs-12300: get_FEB: FEB list is empty");
 1209 
 1210     bi.tb = tb;
 1211     bi.bi_bh = first_b = tb->FEB[i];
 1212     bi.bi_parent = 0;
 1213     bi.bi_position = 0;
 1214     make_empty_node (&bi);
 1215     set_bit(BH_Uptodate, &first_b->b_state);
 1216     tb->FEB[i] = 0;
 1217     tb->used[i] = first_b;
 1218 
 1219     return(first_b);
 1220 }
 1221 
 1222 
 1223 /* This is now used because reiserfs_free_block has to be able to
 1224 ** schedule.
 1225 */
 1226 static void store_thrown (struct tree_balance * tb, struct buffer_head * bh)
 1227 {
 1228     int i;
 1229 
 1230     if (buffer_dirty (bh))
 1231       reiserfs_warning (tb->tb_sb, "store_thrown deals with dirty buffer\n");
 1232     for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i ++)
 1233         if (!tb->thrown[i]) {
 1234             tb->thrown[i] = bh;
 1235             get_bh(bh) ; /* free_thrown puts this */
 1236             return;
 1237         }
 1238     reiserfs_warning (tb->tb_sb, "store_thrown: too many thrown buffers\n");
 1239 }
 1240 
 1241 static void free_thrown(struct tree_balance *tb) {
 1242     int i ;
 1243     unsigned long blocknr ;
 1244     for (i = 0; i < sizeof (tb->thrown)/sizeof (tb->thrown[0]); i++) {
 1245         if (tb->thrown[i]) {
 1246             blocknr = tb->thrown[i]->b_blocknr ;
 1247             if (buffer_dirty (tb->thrown[i]))
 1248               reiserfs_warning (tb->tb_sb, "free_thrown deals with dirty buffer %ld\n", blocknr);
 1249             brelse(tb->thrown[i]) ; /* incremented in store_thrown */
 1250             reiserfs_free_block (tb->transaction_handle, blocknr);
 1251         }
 1252     }
 1253 }
 1254 
 1255 void reiserfs_invalidate_buffer (struct tree_balance * tb, struct buffer_head * bh)
 1256 {
 1257     struct block_head *blkh;
 1258     blkh = B_BLK_HEAD(bh);
 1259     set_blkh_level( blkh, FREE_LEVEL );
 1260     set_blkh_nr_item( blkh, 0 );
 1261     
 1262     mark_buffer_clean (bh);
 1263     /* reiserfs_free_block is no longer schedule safe 
 1264     reiserfs_free_block (tb->transaction_handle, tb->tb_sb, bh->b_blocknr);
 1265     */
 1266 
 1267     store_thrown (tb, bh);
 1268 }
 1269 
 1270 /* Replace n_dest'th key in buffer dest by n_src'th key of buffer src.*/
 1271 void replace_key (struct tree_balance * tb, struct buffer_head * dest, int n_dest,
 1272                   struct buffer_head * src, int n_src)
 1273 {
 1274 
 1275     RFALSE( dest == NULL || src == NULL,
 1276             "vs-12305: source or destination buffer is 0 (src=%p, dest=%p)",
 1277             src, dest);
 1278     RFALSE( ! B_IS_KEYS_LEVEL (dest),
 1279             "vs-12310: invalid level (%z) for destination buffer. dest must be leaf",
 1280             dest);
 1281     RFALSE( n_dest < 0 || n_src < 0,
 1282             "vs-12315: src(%d) or dest(%d) key number < 0", n_src, n_dest);
 1283     RFALSE( n_dest >= B_NR_ITEMS(dest) || n_src >= B_NR_ITEMS(src),
 1284             "vs-12320: src(%d(%d)) or dest(%d(%d)) key number is too big",
 1285             n_src, B_NR_ITEMS(src), n_dest, B_NR_ITEMS(dest));
 1286    
 1287     if (B_IS_ITEMS_LEVEL (src))
 1288         /* source buffer contains leaf node */
 1289         memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PITEM_HEAD(src,n_src), KEY_SIZE);
 1290     else
 1291         memcpy (B_N_PDELIM_KEY(dest,n_dest), B_N_PDELIM_KEY(src,n_src), KEY_SIZE);
 1292 
 1293     do_balance_mark_internal_dirty (tb, dest, 0);
 1294 }
 1295 
 1296 
 1297 int get_left_neighbor_position (
 1298                                 struct tree_balance * tb, 
 1299                                 int h
 1300                                 )
 1301 {
 1302   int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
 1303 
 1304   RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FL[h] == 0,
 1305           "vs-12325: FL[%d](%p) or F[%d](%p) does not exist", 
 1306           h, tb->FL[h], h, PATH_H_PPARENT (tb->tb_path, h));
 1307 
 1308   if (Sh_position == 0)
 1309     return B_NR_ITEMS (tb->FL[h]);
 1310   else
 1311     return Sh_position - 1;
 1312 }
 1313 
 1314 
 1315 int get_right_neighbor_position (struct tree_balance * tb, int h)
 1316 {
 1317   int Sh_position = PATH_H_POSITION (tb->tb_path, h + 1);
 1318 
 1319   RFALSE( PATH_H_PPARENT (tb->tb_path, h) == 0 || tb->FR[h] == 0,
 1320           "vs-12330: F[%d](%p) or FR[%d](%p) does not exist", 
 1321           h, PATH_H_PPARENT (tb->tb_path, h), h, tb->FR[h]);
 1322 
 1323   if (Sh_position == B_NR_ITEMS (PATH_H_PPARENT (tb->tb_path, h)))
 1324     return 0;
 1325   else
 1326     return Sh_position + 1;
 1327 }
 1328 
 1329 
 1330 #ifdef CONFIG_REISERFS_CHECK
 1331 
 1332 int is_reusable (struct super_block * s, unsigned long block, int bit_value);
 1333 static void check_internal_node (struct super_block * s, struct buffer_head * bh, char * mes)
 1334 {
 1335   struct disk_child * dc;
 1336   int i;
 1337 
 1338   RFALSE( !bh, "PAP-12336: bh == 0");
 1339 
 1340   if (!bh || !B_IS_IN_TREE (bh))
 1341     return;
 1342  
 1343   RFALSE( !buffer_dirty (bh) && 
 1344           !(buffer_journaled(bh) || buffer_journal_dirty(bh)),
 1345           "PAP-12337: buffer (%b) must be dirty", bh);
 1346   dc = B_N_CHILD (bh, 0);
 1347 
 1348   for (i = 0; i <= B_NR_ITEMS (bh); i ++, dc ++) {
 1349     if (!is_reusable (s, dc_block_number(dc), 1) ) {
 1350       print_cur_tb (mes);
 1351       reiserfs_panic (s, "PAP-12338: check_internal_node: invalid child pointer %y in %b", dc, bh);
 1352     }
 1353   }
 1354 }
 1355 
 1356 
 1357 static int locked_or_not_in_tree (struct buffer_head * bh, char * which)
 1358 {
 1359   if ( buffer_locked (bh) || !B_IS_IN_TREE (bh) ) {
 1360     reiserfs_warning (NULL, "vs-12339: locked_or_not_in_tree: %s (%b)\n", which, bh);
 1361     return 1;
 1362   } 
 1363   return 0;
 1364 }
 1365 
 1366 
 1367 static int check_before_balancing (struct tree_balance * tb)
 1368 {
 1369   int retval = 0;       
 1370 
 1371   if ( cur_tb ) {
 1372     reiserfs_panic (tb->tb_sb, "vs-12335: check_before_balancing: "
 1373                     "suspect that schedule occurred based on cur_tb not being null at this point in code. "
 1374                     "do_balance cannot properly handle schedule occuring while it runs.");
 1375   }
 1376   
 1377   /* double check that buffers that we will modify are unlocked. (fix_nodes should already have
 1378      prepped all of these for us). */
 1379   if ( tb->lnum[0] ) {
 1380     retval |= locked_or_not_in_tree (tb->L[0], "L[0]");
 1381     retval |= locked_or_not_in_tree (tb->FL[0], "FL[0]");
 1382     retval |= locked_or_not_in_tree (tb->CFL[0], "CFL[0]");
 1383     check_leaf (tb->L[0]);
 1384   }
 1385   if ( tb->rnum[0] ) {
 1386     retval |= locked_or_not_in_tree (tb->R[0], "R[0]");
 1387     retval |= locked_or_not_in_tree (tb->FR[0], "FR[0]");
 1388     retval |= locked_or_not_in_tree (tb->CFR[0], "CFR[0]");
 1389     check_leaf (tb->R[0]);
 1390   }
 1391   retval |= locked_or_not_in_tree (PATH_PLAST_BUFFER (tb->tb_path), "S[0]");
 1392   check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
 1393 
 1394   return retval;
 1395 }
 1396 
 1397 
 1398 void check_after_balance_leaf (struct tree_balance * tb)
 1399 {
 1400     if (tb->lnum[0]) {
 1401         if (B_FREE_SPACE (tb->L[0]) != 
 1402             MAX_CHILD_SIZE (tb->L[0]) - dc_size(B_N_CHILD (tb->FL[0], get_left_neighbor_position (tb, 0)))) {
 1403             print_cur_tb ("12221");
 1404             reiserfs_panic (tb->tb_sb, "PAP-12355: check_after_balance_leaf: shift to left was incorrect");
 1405         }
 1406     }
 1407     if (tb->rnum[0]) {
 1408         if (B_FREE_SPACE (tb->R[0]) != 
 1409             MAX_CHILD_SIZE (tb->R[0]) - dc_size(B_N_CHILD (tb->FR[0], get_right_neighbor_position (tb, 0)))) {
 1410             print_cur_tb ("12222");
 1411             reiserfs_panic (tb->tb_sb, "PAP-12360: check_after_balance_leaf: shift to right was incorrect");
 1412         }
 1413     }
 1414     if (PATH_H_PBUFFER(tb->tb_path,1) &&
 1415         (B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) != 
 1416                     (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
 1417                     dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
 1418                     PATH_H_POSITION (tb->tb_path, 1)))) )) {
 1419         int left = B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0));
 1420         int right = (MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)) -
 1421                     dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1),
 1422                         PATH_H_POSITION (tb->tb_path, 1))));
 1423         print_cur_tb ("12223");
 1424         reiserfs_warning( tb->tb_sb, 
 1425             "B_FREE_SPACE (PATH_H_PBUFFER(tb->tb_path,0)) = %d; "
 1426             "MAX_CHILD_SIZE (%d) - dc_size( %y, %d ) [%d] = %d\n",
 1427             left,
 1428             MAX_CHILD_SIZE (PATH_H_PBUFFER(tb->tb_path,0)),
 1429             PATH_H_PBUFFER(tb->tb_path,1),
 1430             PATH_H_POSITION (tb->tb_path, 1),
 1431             dc_size(B_N_CHILD (PATH_H_PBUFFER(tb->tb_path,1), PATH_H_POSITION (tb->tb_path, 1 )) ),
 1432             right );
 1433         reiserfs_panic (tb->tb_sb, "PAP-12365: check_after_balance_leaf: S is incorrect");
 1434     }
 1435 }
 1436 
 1437 
 1438 void check_leaf_level (struct tree_balance * tb)
 1439 {
 1440   check_leaf (tb->L[0]);
 1441   check_leaf (tb->R[0]);
 1442   check_leaf (PATH_PLAST_BUFFER (tb->tb_path));
 1443 }
 1444 
 1445 void check_internal_levels (struct tree_balance * tb)
 1446 {
 1447   int h;
 1448 
 1449   /* check all internal nodes */
 1450   for (h = 1; tb->insert_size[h]; h ++) {
 1451     check_internal_node (tb->tb_sb, PATH_H_PBUFFER (tb->tb_path, h), "BAD BUFFER ON PATH");
 1452     if (tb->lnum[h])
 1453       check_internal_node (tb->tb_sb, tb->L[h], "BAD L");
 1454     if (tb->rnum[h])
 1455       check_internal_node (tb->tb_sb, tb->R[h], "BAD R");
 1456   }
 1457 
 1458 }
 1459 
 1460 #endif
 1461 
 1462 
 1463 
 1464 
 1465 
 1466 
 1467 /* Now we have all of the buffers that must be used in balancing of
 1468    the tree.  We rely on the assumption that schedule() will not occur
 1469    while do_balance works. ( Only interrupt handlers are acceptable.)
 1470    We balance the tree according to the analysis made before this,
 1471    using buffers already obtained.  For SMP support it will someday be
 1472    necessary to add ordered locking of tb. */
 1473 
 1474 /* Some interesting rules of balancing:
 1475 
 1476    we delete a maximum of two nodes per level per balancing: we never
 1477    delete R, when we delete two of three nodes L, S, R then we move
 1478    them into R.
 1479 
 1480    we only delete L if we are deleting two nodes, if we delete only
 1481    one node we delete S
 1482 
 1483    if we shift leaves then we shift as much as we can: this is a
 1484    deliberate policy of extremism in node packing which results in
 1485    higher average utilization after repeated random balance operations
 1486    at the cost of more memory copies and more balancing as a result of
 1487    small insertions to full nodes.
 1488 
 1489    if we shift internal nodes we try to evenly balance the node
 1490    utilization, with consequent less balancing at the cost of lower
 1491    utilization.
 1492 
 1493    one could argue that the policy for directories in leaves should be
 1494    that of internal nodes, but we will wait until another day to
 1495    evaluate this....  It would be nice to someday measure and prove
 1496    these assumptions as to what is optimal....
 1497 
 1498 */
 1499 
 1500 static inline void do_balance_starts (struct tree_balance *tb)
 1501 {
 1502     /* use print_cur_tb() to see initial state of struct
 1503        tree_balance */
 1504 
 1505     /* store_print_tb (tb); */
 1506 
 1507     /* do not delete, just comment it out */
 1508 /*    print_tb(flag, PATH_LAST_POSITION(tb->tb_path), tb->tb_path->pos_in_item, tb, 
 1509              "check");*/
 1510     RFALSE( check_before_balancing (tb), "PAP-12340: locked buffers in TB");
 1511 #ifdef CONFIG_REISERFS_CHECK
 1512     cur_tb = tb;
 1513 #endif
 1514 }
 1515 
 1516 
 1517 static inline void do_balance_completed (struct tree_balance * tb)
 1518 {
 1519     
 1520 #ifdef CONFIG_REISERFS_CHECK
 1521     check_leaf_level (tb);
 1522     check_internal_levels (tb);
 1523     cur_tb = NULL;
 1524 #endif
 1525 
 1526     /* reiserfs_free_block is no longer schedule safe.  So, we need to
 1527     ** put the buffers we want freed on the thrown list during do_balance,
 1528     ** and then free them now
 1529     */
 1530 
 1531     tb->tb_sb->u.reiserfs_sb.s_do_balance ++;
 1532 
 1533 
 1534     /* release all nodes hold to perform the balancing */
 1535     unfix_nodes(tb);
 1536 
 1537     free_thrown(tb) ;
 1538 }
 1539 
 1540 
 1541 
 1542 
 1543 
 1544 void do_balance (struct tree_balance * tb, /* tree_balance structure */
 1545                  struct item_head * ih,    /* item header of inserted item */
 1546                  const char * body,  /* body  of inserted item or bytes to paste */
 1547                  int flag)  /* i - insert, d - delete
 1548                                c - cut, p - paste
 1549                                                       
 1550                                Cut means delete part of an item
 1551                                (includes removing an entry from a
 1552                                directory).
 1553                                                       
 1554                                Delete means delete whole item.
 1555                                                       
 1556                                Insert means add a new item into the
 1557                                tree.
 1558                                                                                                       
 1559                                Paste means to append to the end of an
 1560                                existing file or to insert a directory
 1561                                entry.  */
 1562 {
 1563     int child_pos, /* position of a child node in its parent */
 1564         h;         /* level of the tree being processed */
 1565     struct item_head insert_key[2]; /* in our processing of one level
 1566                                        we sometimes determine what
 1567                                        must be inserted into the next
 1568                                        higher level.  This insertion
 1569                                        consists of a key or two keys
 1570                                        and their corresponding
 1571                                        pointers */
 1572     struct buffer_head *insert_ptr[2]; /* inserted node-ptrs for the next
 1573                                           level */
 1574 
 1575     tb->tb_mode = flag;
 1576     tb->need_balance_dirty = 0;
 1577 
 1578     if (FILESYSTEM_CHANGED_TB(tb)) {
 1579         reiserfs_panic(tb->tb_sb, "clm-6000: do_balance, fs generation has changed\n") ;
 1580     }
 1581     /* if we have no real work to do  */
 1582     if ( ! tb->insert_size[0] ) {
 1583         reiserfs_warning (tb->tb_sb, "PAP-12350: do_balance: insert_size == 0, mode == %c\n ",
 1584                           flag);
 1585         unfix_nodes(tb);
 1586         return;
 1587     }
 1588 
 1589     atomic_inc (&(fs_generation (tb->tb_sb)));
 1590     do_balance_starts (tb);
 1591     
 1592         /* balance leaf returns 0 except if combining L R and S into
 1593            one node.  see balance_internal() for explanation of this
 1594            line of code.*/
 1595         child_pos = PATH_H_B_ITEM_ORDER (tb->tb_path, 0) +
 1596           balance_leaf (tb, ih, body, flag, insert_key, insert_ptr);
 1597 
 1598 #ifdef CONFIG_REISERFS_CHECK
 1599     check_after_balance_leaf (tb);
 1600 #endif
 1601 
 1602     /* Balance internal level of the tree. */
 1603     for ( h = 1; h < MAX_HEIGHT && tb->insert_size[h]; h++ )
 1604         child_pos = balance_internal (tb, h, child_pos, insert_key, insert_ptr);
 1605 
 1606 
 1607     do_balance_completed (tb);
 1608 
 1609 }

Cache object: 1a83211900301398e077ab99716b7a5d


[ source navigation ] [ diff markup ] [ identifier search ] [ freetext search ] [ file search ] [ list types ] [ track identifier ]


This page is part of the FreeBSD/Linux Linux Kernel Cross-Reference, and was automatically generated using a modified version of the LXR engine.