]> Pileus Git - ~andy/linux/blob - fs/xfs/xfs_ialloc_btree.c
[XFS] Remove xfs_macros.c, xfs_macros.h, rework headers a whole lot.
[~andy/linux] / fs / xfs / xfs_ialloc_btree.c
1 /*
2  * Copyright (c) 2000-2001 Silicon Graphics, Inc.  All Rights Reserved.
3  *
4  * This program is free software; you can redistribute it and/or modify it
5  * under the terms of version 2 of the GNU General Public License as
6  * published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it would be useful, but
9  * WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.
11  *
12  * Further, this software is distributed without any warranty that it is
13  * free of the rightful claim of any third person regarding infringement
14  * or the like.  Any license provided herein, whether implied or
15  * otherwise, applies only to this software file.  Patent licenses, if
16  * any, provided herein do not apply to combinations of this program with
17  * other software, or any other product whatsoever.
18  *
19  * You should have received a copy of the GNU General Public License along
20  * with this program; if not, write the Free Software Foundation, Inc., 59
21  * Temple Place - Suite 330, Boston MA 02111-1307, USA.
22  *
23  * Contact information: Silicon Graphics, Inc., 1600 Amphitheatre Pkwy,
24  * Mountain View, CA  94043, or:
25  *
26  * http://www.sgi.com
27  *
28  * For further information regarding this notice, see:
29  *
30  * http://oss.sgi.com/projects/GenInfo/SGIGPLNoticeExplan/
31  */
32 #include "xfs.h"
33 #include "xfs_fs.h"
34 #include "xfs_types.h"
35 #include "xfs_bit.h"
36 #include "xfs_log.h"
37 #include "xfs_inum.h"
38 #include "xfs_trans.h"
39 #include "xfs_sb.h"
40 #include "xfs_ag.h"
41 #include "xfs_dir.h"
42 #include "xfs_dir2.h"
43 #include "xfs_dmapi.h"
44 #include "xfs_mount.h"
45 #include "xfs_bmap_btree.h"
46 #include "xfs_alloc_btree.h"
47 #include "xfs_ialloc_btree.h"
48 #include "xfs_dir_sf.h"
49 #include "xfs_dir2_sf.h"
50 #include "xfs_attr_sf.h"
51 #include "xfs_dinode.h"
52 #include "xfs_inode.h"
53 #include "xfs_btree.h"
54 #include "xfs_ialloc.h"
55 #include "xfs_alloc.h"
56 #include "xfs_error.h"
57
58 /*
59  * Inode allocation management for XFS.
60  */
61
62 /*
63  * Prototypes for internal functions.
64  */
65
66 STATIC void xfs_inobt_log_block(xfs_trans_t *, xfs_buf_t *, int);
67 STATIC void xfs_inobt_log_keys(xfs_btree_cur_t *, xfs_buf_t *, int, int);
68 STATIC void xfs_inobt_log_ptrs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
69 STATIC void xfs_inobt_log_recs(xfs_btree_cur_t *, xfs_buf_t *, int, int);
70 STATIC int xfs_inobt_lshift(xfs_btree_cur_t *, int, int *);
71 STATIC int xfs_inobt_newroot(xfs_btree_cur_t *, int *);
72 STATIC int xfs_inobt_rshift(xfs_btree_cur_t *, int, int *);
73 STATIC int xfs_inobt_split(xfs_btree_cur_t *, int, xfs_agblock_t *,
74                 xfs_inobt_key_t *, xfs_btree_cur_t **, int *);
75 STATIC int xfs_inobt_updkey(xfs_btree_cur_t *, xfs_inobt_key_t *, int);
76
77 /*
78  * Internal functions.
79  */
80
81 /*
82  * Single level of the xfs_inobt_delete record deletion routine.
83  * Delete record pointed to by cur/level.
84  * Remove the record from its block then rebalance the tree.
85  * Return 0 for error, 1 for done, 2 to go on to the next level.
86  */
87 STATIC int                              /* error */
88 xfs_inobt_delrec(
89         xfs_btree_cur_t         *cur,   /* btree cursor */
90         int                     level,  /* level removing record from */
91         int                     *stat)  /* fail/done/go-on */
92 {
93         xfs_buf_t               *agbp;  /* buffer for a.g. inode header */
94         xfs_mount_t             *mp;    /* mount structure */
95         xfs_agi_t               *agi;   /* allocation group inode header */
96         xfs_inobt_block_t       *block; /* btree block record/key lives in */
97         xfs_agblock_t           bno;    /* btree block number */
98         xfs_buf_t               *bp;    /* buffer for block */
99         int                     error;  /* error return value */
100         int                     i;      /* loop index */
101         xfs_inobt_key_t         key;    /* kp points here if block is level 0 */
102         xfs_inobt_key_t         *kp = NULL;     /* pointer to btree keys */
103         xfs_agblock_t           lbno;   /* left block's block number */
104         xfs_buf_t               *lbp;   /* left block's buffer pointer */
105         xfs_inobt_block_t       *left;  /* left btree block */
106         xfs_inobt_key_t         *lkp;   /* left block key pointer */
107         xfs_inobt_ptr_t         *lpp;   /* left block address pointer */
108         int                     lrecs = 0;      /* number of records in left block */
109         xfs_inobt_rec_t         *lrp;   /* left block record pointer */
110         xfs_inobt_ptr_t         *pp = NULL;     /* pointer to btree addresses */
111         int                     ptr;    /* index in btree block for this rec */
112         xfs_agblock_t           rbno;   /* right block's block number */
113         xfs_buf_t               *rbp;   /* right block's buffer pointer */
114         xfs_inobt_block_t       *right; /* right btree block */
115         xfs_inobt_key_t         *rkp;   /* right block key pointer */
116         xfs_inobt_rec_t         *rp;    /* pointer to btree records */
117         xfs_inobt_ptr_t         *rpp;   /* right block address pointer */
118         int                     rrecs = 0;      /* number of records in right block */
119         int                     numrecs;
120         xfs_inobt_rec_t         *rrp;   /* right block record pointer */
121         xfs_btree_cur_t         *tcur;  /* temporary btree cursor */
122
123         mp = cur->bc_mp;
124
125         /*
126          * Get the index of the entry being deleted, check for nothing there.
127          */
128         ptr = cur->bc_ptrs[level];
129         if (ptr == 0) {
130                 *stat = 0;
131                 return 0;
132         }
133
134         /*
135          * Get the buffer & block containing the record or key/ptr.
136          */
137         bp = cur->bc_bufs[level];
138         block = XFS_BUF_TO_INOBT_BLOCK(bp);
139 #ifdef DEBUG
140         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
141                 return error;
142 #endif
143         /*
144          * Fail if we're off the end of the block.
145          */
146
147         numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
148         if (ptr > numrecs) {
149                 *stat = 0;
150                 return 0;
151         }
152         /*
153          * It's a nonleaf.  Excise the key and ptr being deleted, by
154          * sliding the entries past them down one.
155          * Log the changed areas of the block.
156          */
157         if (level > 0) {
158                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
159                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
160 #ifdef DEBUG
161                 for (i = ptr; i < numrecs; i++) {
162                         if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i], ARCH_CONVERT), level)))
163                                 return error;
164                 }
165 #endif
166                 if (ptr < numrecs) {
167                         memmove(&kp[ptr - 1], &kp[ptr],
168                                 (numrecs - ptr) * sizeof(*kp));
169                         memmove(&pp[ptr - 1], &pp[ptr],
170                                 (numrecs - ptr) * sizeof(*kp));
171                         xfs_inobt_log_keys(cur, bp, ptr, numrecs - 1);
172                         xfs_inobt_log_ptrs(cur, bp, ptr, numrecs - 1);
173                 }
174         }
175         /*
176          * It's a leaf.  Excise the record being deleted, by sliding the
177          * entries past it down one.  Log the changed areas of the block.
178          */
179         else {
180                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
181                 if (ptr < numrecs) {
182                         memmove(&rp[ptr - 1], &rp[ptr],
183                                 (numrecs - ptr) * sizeof(*rp));
184                         xfs_inobt_log_recs(cur, bp, ptr, numrecs - 1);
185                 }
186                 /*
187                  * If it's the first record in the block, we'll need a key
188                  * structure to pass up to the next level (updkey).
189                  */
190                 if (ptr == 1) {
191                         key.ir_startino = rp->ir_startino;
192                         kp = &key;
193                 }
194         }
195         /*
196          * Decrement and log the number of entries in the block.
197          */
198         numrecs--;
199         INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
200         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
201         /*
202          * Is this the root level?  If so, we're almost done.
203          */
204         if (level == cur->bc_nlevels - 1) {
205                 /*
206                  * If this is the root level,
207                  * and there's only one entry left,
208                  * and it's NOT the leaf level,
209                  * then we can get rid of this level.
210                  */
211                 if (numrecs == 1 && level > 0) {
212                         agbp = cur->bc_private.i.agbp;
213                         agi = XFS_BUF_TO_AGI(agbp);
214                         /*
215                          * pp is still set to the first pointer in the block.
216                          * Make it the new root of the btree.
217                          */
218                         bno = INT_GET(agi->agi_root, ARCH_CONVERT);
219                         agi->agi_root = *pp;
220                         INT_MOD(agi->agi_level, ARCH_CONVERT, -1);
221                         /*
222                          * Free the block.
223                          */
224                         if ((error = xfs_free_extent(cur->bc_tp,
225                                 XFS_AGB_TO_FSB(mp, cur->bc_private.i.agno, bno), 1)))
226                                 return error;
227                         xfs_trans_binval(cur->bc_tp, bp);
228                         xfs_ialloc_log_agi(cur->bc_tp, agbp,
229                                 XFS_AGI_ROOT | XFS_AGI_LEVEL);
230                         /*
231                          * Update the cursor so there's one fewer level.
232                          */
233                         cur->bc_bufs[level] = NULL;
234                         cur->bc_nlevels--;
235                 } else if (level > 0 &&
236                            (error = xfs_inobt_decrement(cur, level, &i)))
237                         return error;
238                 *stat = 1;
239                 return 0;
240         }
241         /*
242          * If we deleted the leftmost entry in the block, update the
243          * key values above us in the tree.
244          */
245         if (ptr == 1 && (error = xfs_inobt_updkey(cur, kp, level + 1)))
246                 return error;
247         /*
248          * If the number of records remaining in the block is at least
249          * the minimum, we're done.
250          */
251         if (numrecs >= XFS_INOBT_BLOCK_MINRECS(level, cur)) {
252                 if (level > 0 &&
253                     (error = xfs_inobt_decrement(cur, level, &i)))
254                         return error;
255                 *stat = 1;
256                 return 0;
257         }
258         /*
259          * Otherwise, we have to move some records around to keep the
260          * tree balanced.  Look at the left and right sibling blocks to
261          * see if we can re-balance by moving only one record.
262          */
263         rbno = INT_GET(block->bb_rightsib, ARCH_CONVERT);
264         lbno = INT_GET(block->bb_leftsib, ARCH_CONVERT);
265         bno = NULLAGBLOCK;
266         ASSERT(rbno != NULLAGBLOCK || lbno != NULLAGBLOCK);
267         /*
268          * Duplicate the cursor so our btree manipulations here won't
269          * disrupt the next level up.
270          */
271         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
272                 return error;
273         /*
274          * If there's a right sibling, see if it's ok to shift an entry
275          * out of it.
276          */
277         if (rbno != NULLAGBLOCK) {
278                 /*
279                  * Move the temp cursor to the last entry in the next block.
280                  * Actually any entry but the first would suffice.
281                  */
282                 i = xfs_btree_lastrec(tcur, level);
283                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
284                 if ((error = xfs_inobt_increment(tcur, level, &i)))
285                         goto error0;
286                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
287                 i = xfs_btree_lastrec(tcur, level);
288                 XFS_WANT_CORRUPTED_GOTO(i == 1, error0);
289                 /*
290                  * Grab a pointer to the block.
291                  */
292                 rbp = tcur->bc_bufs[level];
293                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
294 #ifdef DEBUG
295                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
296                         goto error0;
297 #endif
298                 /*
299                  * Grab the current block number, for future use.
300                  */
301                 bno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
302                 /*
303                  * If right block is full enough so that removing one entry
304                  * won't make it too empty, and left-shifting an entry out
305                  * of right to us works, we're done.
306                  */
307                 if (INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1 >=
308                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
309                         if ((error = xfs_inobt_lshift(tcur, level, &i)))
310                                 goto error0;
311                         if (i) {
312                                 ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
313                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
314                                 xfs_btree_del_cursor(tcur,
315                                                      XFS_BTREE_NOERROR);
316                                 if (level > 0 &&
317                                     (error = xfs_inobt_decrement(cur, level,
318                                                 &i)))
319                                         return error;
320                                 *stat = 1;
321                                 return 0;
322                         }
323                 }
324                 /*
325                  * Otherwise, grab the number of records in right for
326                  * future reference, and fix up the temp cursor to point
327                  * to our block again (last record).
328                  */
329                 rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
330                 if (lbno != NULLAGBLOCK) {
331                         xfs_btree_firstrec(tcur, level);
332                         if ((error = xfs_inobt_decrement(tcur, level, &i)))
333                                 goto error0;
334                 }
335         }
336         /*
337          * If there's a left sibling, see if it's ok to shift an entry
338          * out of it.
339          */
340         if (lbno != NULLAGBLOCK) {
341                 /*
342                  * Move the temp cursor to the first entry in the
343                  * previous block.
344                  */
345                 xfs_btree_firstrec(tcur, level);
346                 if ((error = xfs_inobt_decrement(tcur, level, &i)))
347                         goto error0;
348                 xfs_btree_firstrec(tcur, level);
349                 /*
350                  * Grab a pointer to the block.
351                  */
352                 lbp = tcur->bc_bufs[level];
353                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
354 #ifdef DEBUG
355                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
356                         goto error0;
357 #endif
358                 /*
359                  * Grab the current block number, for future use.
360                  */
361                 bno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
362                 /*
363                  * If left block is full enough so that removing one entry
364                  * won't make it too empty, and right-shifting an entry out
365                  * of left to us works, we're done.
366                  */
367                 if (INT_GET(left->bb_numrecs, ARCH_CONVERT) - 1 >=
368                      XFS_INOBT_BLOCK_MINRECS(level, cur)) {
369                         if ((error = xfs_inobt_rshift(tcur, level, &i)))
370                                 goto error0;
371                         if (i) {
372                                 ASSERT(INT_GET(block->bb_numrecs, ARCH_CONVERT) >=
373                                        XFS_INOBT_BLOCK_MINRECS(level, cur));
374                                 xfs_btree_del_cursor(tcur,
375                                                      XFS_BTREE_NOERROR);
376                                 if (level == 0)
377                                         cur->bc_ptrs[0]++;
378                                 *stat = 1;
379                                 return 0;
380                         }
381                 }
382                 /*
383                  * Otherwise, grab the number of records in right for
384                  * future reference.
385                  */
386                 lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
387         }
388         /*
389          * Delete the temp cursor, we're done with it.
390          */
391         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
392         /*
393          * If here, we need to do a join to keep the tree balanced.
394          */
395         ASSERT(bno != NULLAGBLOCK);
396         /*
397          * See if we can join with the left neighbor block.
398          */
399         if (lbno != NULLAGBLOCK &&
400             lrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
401                 /*
402                  * Set "right" to be the starting block,
403                  * "left" to be the left neighbor.
404                  */
405                 rbno = bno;
406                 right = block;
407                 rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
408                 rbp = bp;
409                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
410                                 cur->bc_private.i.agno, lbno, 0, &lbp,
411                                 XFS_INO_BTREE_REF)))
412                         return error;
413                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
414                 lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
415                 if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
416                         return error;
417         }
418         /*
419          * If that won't work, see if we can join with the right neighbor block.
420          */
421         else if (rbno != NULLAGBLOCK &&
422                  rrecs + numrecs <= XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
423                 /*
424                  * Set "left" to be the starting block,
425                  * "right" to be the right neighbor.
426                  */
427                 lbno = bno;
428                 left = block;
429                 lrecs = INT_GET(left->bb_numrecs, ARCH_CONVERT);
430                 lbp = bp;
431                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
432                                 cur->bc_private.i.agno, rbno, 0, &rbp,
433                                 XFS_INO_BTREE_REF)))
434                         return error;
435                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
436                 rrecs = INT_GET(right->bb_numrecs, ARCH_CONVERT);
437                 if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
438                         return error;
439         }
440         /*
441          * Otherwise, we can't fix the imbalance.
442          * Just return.  This is probably a logic error, but it's not fatal.
443          */
444         else {
445                 if (level > 0 && (error = xfs_inobt_decrement(cur, level, &i)))
446                         return error;
447                 *stat = 1;
448                 return 0;
449         }
450         /*
451          * We're now going to join "left" and "right" by moving all the stuff
452          * in "right" to "left" and deleting "right".
453          */
454         if (level > 0) {
455                 /*
456                  * It's a non-leaf.  Move keys and pointers.
457                  */
458                 lkp = XFS_INOBT_KEY_ADDR(left, lrecs + 1, cur);
459                 lpp = XFS_INOBT_PTR_ADDR(left, lrecs + 1, cur);
460                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
461                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
462 #ifdef DEBUG
463                 for (i = 0; i < rrecs; i++) {
464                         if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
465                                 return error;
466                 }
467 #endif
468                 memcpy(lkp, rkp, rrecs * sizeof(*lkp));
469                 memcpy(lpp, rpp, rrecs * sizeof(*lpp));
470                 xfs_inobt_log_keys(cur, lbp, lrecs + 1, lrecs + rrecs);
471                 xfs_inobt_log_ptrs(cur, lbp, lrecs + 1, lrecs + rrecs);
472         } else {
473                 /*
474                  * It's a leaf.  Move records.
475                  */
476                 lrp = XFS_INOBT_REC_ADDR(left, lrecs + 1, cur);
477                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
478                 memcpy(lrp, rrp, rrecs * sizeof(*lrp));
479                 xfs_inobt_log_recs(cur, lbp, lrecs + 1, lrecs + rrecs);
480         }
481         /*
482          * If we joined with the left neighbor, set the buffer in the
483          * cursor to the left block, and fix up the index.
484          */
485         if (bp != lbp) {
486                 xfs_btree_setbuf(cur, level, lbp);
487                 cur->bc_ptrs[level] += lrecs;
488         }
489         /*
490          * If we joined with the right neighbor and there's a level above
491          * us, increment the cursor at that level.
492          */
493         else if (level + 1 < cur->bc_nlevels &&
494                  (error = xfs_alloc_increment(cur, level + 1, &i)))
495                 return error;
496         /*
497          * Fix up the number of records in the surviving block.
498          */
499         lrecs += rrecs;
500         INT_SET(left->bb_numrecs, ARCH_CONVERT, lrecs);
501         /*
502          * Fix up the right block pointer in the surviving block, and log it.
503          */
504         left->bb_rightsib = right->bb_rightsib;
505         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
506         /*
507          * If there is a right sibling now, make it point to the
508          * remaining block.
509          */
510         if (INT_GET(left->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
511                 xfs_inobt_block_t       *rrblock;
512                 xfs_buf_t               *rrbp;
513
514                 if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
515                                 cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0,
516                                 &rrbp, XFS_INO_BTREE_REF)))
517                         return error;
518                 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
519                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
520                         return error;
521                 INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, lbno);
522                 xfs_inobt_log_block(cur->bc_tp, rrbp, XFS_BB_LEFTSIB);
523         }
524         /*
525          * Free the deleting block.
526          */
527         if ((error = xfs_free_extent(cur->bc_tp, XFS_AGB_TO_FSB(mp,
528                                      cur->bc_private.i.agno, rbno), 1)))
529                 return error;
530         xfs_trans_binval(cur->bc_tp, rbp);
531         /*
532          * Readjust the ptr at this level if it's not a leaf, since it's
533          * still pointing at the deletion point, which makes the cursor
534          * inconsistent.  If this makes the ptr 0, the caller fixes it up.
535          * We can't use decrement because it would change the next level up.
536          */
537         if (level > 0)
538                 cur->bc_ptrs[level]--;
539         /*
540          * Return value means the next level up has something to do.
541          */
542         *stat = 2;
543         return 0;
544
545 error0:
546         xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
547         return error;
548 }
549
550 /*
551  * Insert one record/level.  Return information to the caller
552  * allowing the next level up to proceed if necessary.
553  */
554 STATIC int                              /* error */
555 xfs_inobt_insrec(
556         xfs_btree_cur_t         *cur,   /* btree cursor */
557         int                     level,  /* level to insert record at */
558         xfs_agblock_t           *bnop,  /* i/o: block number inserted */
559         xfs_inobt_rec_t         *recp,  /* i/o: record data inserted */
560         xfs_btree_cur_t         **curp, /* output: new cursor replacing cur */
561         int                     *stat)  /* success/failure */
562 {
563         xfs_inobt_block_t       *block; /* btree block record/key lives in */
564         xfs_buf_t               *bp;    /* buffer for block */
565         int                     error;  /* error return value */
566         int                     i;      /* loop index */
567         xfs_inobt_key_t         key;    /* key value being inserted */
568         xfs_inobt_key_t         *kp=NULL;       /* pointer to btree keys */
569         xfs_agblock_t           nbno;   /* block number of allocated block */
570         xfs_btree_cur_t         *ncur;  /* new cursor to be used at next lvl */
571         xfs_inobt_key_t         nkey;   /* new key value, from split */
572         xfs_inobt_rec_t         nrec;   /* new record value, for caller */
573         int                     numrecs;
574         int                     optr;   /* old ptr value */
575         xfs_inobt_ptr_t         *pp;    /* pointer to btree addresses */
576         int                     ptr;    /* index in btree block for this rec */
577         xfs_inobt_rec_t         *rp=NULL;       /* pointer to btree records */
578
579         /*
580          * If we made it to the root level, allocate a new root block
581          * and we're done.
582          */
583         if (level >= cur->bc_nlevels) {
584                 error = xfs_inobt_newroot(cur, &i);
585                 *bnop = NULLAGBLOCK;
586                 *stat = i;
587                 return error;
588         }
589         /*
590          * Make a key out of the record data to be inserted, and save it.
591          */
592         key.ir_startino = recp->ir_startino; /* INT_: direct copy */
593         optr = ptr = cur->bc_ptrs[level];
594         /*
595          * If we're off the left edge, return failure.
596          */
597         if (ptr == 0) {
598                 *stat = 0;
599                 return 0;
600         }
601         /*
602          * Get pointers to the btree buffer and block.
603          */
604         bp = cur->bc_bufs[level];
605         block = XFS_BUF_TO_INOBT_BLOCK(bp);
606         numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
607 #ifdef DEBUG
608         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
609                 return error;
610         /*
611          * Check that the new entry is being inserted in the right place.
612          */
613         if (ptr <= numrecs) {
614                 if (level == 0) {
615                         rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
616                         xfs_btree_check_rec(cur->bc_btnum, recp, rp);
617                 } else {
618                         kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
619                         xfs_btree_check_key(cur->bc_btnum, &key, kp);
620                 }
621         }
622 #endif
623         nbno = NULLAGBLOCK;
624         ncur = (xfs_btree_cur_t *)0;
625         /*
626          * If the block is full, we can't insert the new entry until we
627          * make the block un-full.
628          */
629         if (numrecs == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
630                 /*
631                  * First, try shifting an entry to the right neighbor.
632                  */
633                 if ((error = xfs_inobt_rshift(cur, level, &i)))
634                         return error;
635                 if (i) {
636                         /* nothing */
637                 }
638                 /*
639                  * Next, try shifting an entry to the left neighbor.
640                  */
641                 else {
642                         if ((error = xfs_inobt_lshift(cur, level, &i)))
643                                 return error;
644                         if (i) {
645                                 optr = ptr = cur->bc_ptrs[level];
646                         } else {
647                                 /*
648                                  * Next, try splitting the current block
649                                  * in half. If this works we have to
650                                  * re-set our variables because
651                                  * we could be in a different block now.
652                                  */
653                                 if ((error = xfs_inobt_split(cur, level, &nbno,
654                                                 &nkey, &ncur, &i)))
655                                         return error;
656                                 if (i) {
657                                         bp = cur->bc_bufs[level];
658                                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
659 #ifdef DEBUG
660                                         if ((error = xfs_btree_check_sblock(cur,
661                                                         block, level, bp)))
662                                                 return error;
663 #endif
664                                         ptr = cur->bc_ptrs[level];
665                                         nrec.ir_startino = nkey.ir_startino; /* INT_: direct copy */
666                                 } else {
667                                         /*
668                                          * Otherwise the insert fails.
669                                          */
670                                         *stat = 0;
671                                         return 0;
672                                 }
673                         }
674                 }
675         }
676         /*
677          * At this point we know there's room for our new entry in the block
678          * we're pointing at.
679          */
680         numrecs = INT_GET(block->bb_numrecs, ARCH_CONVERT);
681         if (level > 0) {
682                 /*
683                  * It's a non-leaf entry.  Make a hole for the new data
684                  * in the key and ptr regions of the block.
685                  */
686                 kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
687                 pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
688 #ifdef DEBUG
689                 for (i = numrecs; i >= ptr; i--) {
690                         if ((error = xfs_btree_check_sptr(cur, INT_GET(pp[i - 1], ARCH_CONVERT), level)))
691                                 return error;
692                 }
693 #endif
694                 memmove(&kp[ptr], &kp[ptr - 1],
695                         (numrecs - ptr + 1) * sizeof(*kp));
696                 memmove(&pp[ptr], &pp[ptr - 1],
697                         (numrecs - ptr + 1) * sizeof(*pp));
698                 /*
699                  * Now stuff the new data in, bump numrecs and log the new data.
700                  */
701 #ifdef DEBUG
702                 if ((error = xfs_btree_check_sptr(cur, *bnop, level)))
703                         return error;
704 #endif
705                 kp[ptr - 1] = key; /* INT_: struct copy */
706                 INT_SET(pp[ptr - 1], ARCH_CONVERT, *bnop);
707                 numrecs++;
708                 INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
709                 xfs_inobt_log_keys(cur, bp, ptr, numrecs);
710                 xfs_inobt_log_ptrs(cur, bp, ptr, numrecs);
711         } else {
712                 /*
713                  * It's a leaf entry.  Make a hole for the new record.
714                  */
715                 rp = XFS_INOBT_REC_ADDR(block, 1, cur);
716                 memmove(&rp[ptr], &rp[ptr - 1],
717                         (numrecs - ptr + 1) * sizeof(*rp));
718                 /*
719                  * Now stuff the new record in, bump numrecs
720                  * and log the new data.
721                  */
722                 rp[ptr - 1] = *recp; /* INT_: struct copy */
723                 numrecs++;
724                 INT_SET(block->bb_numrecs, ARCH_CONVERT, numrecs);
725                 xfs_inobt_log_recs(cur, bp, ptr, numrecs);
726         }
727         /*
728          * Log the new number of records in the btree header.
729          */
730         xfs_inobt_log_block(cur->bc_tp, bp, XFS_BB_NUMRECS);
731 #ifdef DEBUG
732         /*
733          * Check that the key/record is in the right place, now.
734          */
735         if (ptr < numrecs) {
736                 if (level == 0)
737                         xfs_btree_check_rec(cur->bc_btnum, rp + ptr - 1,
738                                 rp + ptr);
739                 else
740                         xfs_btree_check_key(cur->bc_btnum, kp + ptr - 1,
741                                 kp + ptr);
742         }
743 #endif
744         /*
745          * If we inserted at the start of a block, update the parents' keys.
746          */
747         if (optr == 1 && (error = xfs_inobt_updkey(cur, &key, level + 1)))
748                 return error;
749         /*
750          * Return the new block number, if any.
751          * If there is one, give back a record value and a cursor too.
752          */
753         *bnop = nbno;
754         if (nbno != NULLAGBLOCK) {
755                 *recp = nrec; /* INT_: struct copy */
756                 *curp = ncur;
757         }
758         *stat = 1;
759         return 0;
760 }
761
762 /*
763  * Log header fields from a btree block.
764  */
765 STATIC void
766 xfs_inobt_log_block(
767         xfs_trans_t             *tp,    /* transaction pointer */
768         xfs_buf_t               *bp,    /* buffer containing btree block */
769         int                     fields) /* mask of fields: XFS_BB_... */
770 {
771         int                     first;  /* first byte offset logged */
772         int                     last;   /* last byte offset logged */
773         static const short      offsets[] = {   /* table of offsets */
774                 offsetof(xfs_inobt_block_t, bb_magic),
775                 offsetof(xfs_inobt_block_t, bb_level),
776                 offsetof(xfs_inobt_block_t, bb_numrecs),
777                 offsetof(xfs_inobt_block_t, bb_leftsib),
778                 offsetof(xfs_inobt_block_t, bb_rightsib),
779                 sizeof(xfs_inobt_block_t)
780         };
781
782         xfs_btree_offsets(fields, offsets, XFS_BB_NUM_BITS, &first, &last);
783         xfs_trans_log_buf(tp, bp, first, last);
784 }
785
786 /*
787  * Log keys from a btree block (nonleaf).
788  */
789 STATIC void
790 xfs_inobt_log_keys(
791         xfs_btree_cur_t         *cur,   /* btree cursor */
792         xfs_buf_t               *bp,    /* buffer containing btree block */
793         int                     kfirst, /* index of first key to log */
794         int                     klast)  /* index of last key to log */
795 {
796         xfs_inobt_block_t       *block; /* btree block to log from */
797         int                     first;  /* first byte offset logged */
798         xfs_inobt_key_t         *kp;    /* key pointer in btree block */
799         int                     last;   /* last byte offset logged */
800
801         block = XFS_BUF_TO_INOBT_BLOCK(bp);
802         kp = XFS_INOBT_KEY_ADDR(block, 1, cur);
803         first = (int)((xfs_caddr_t)&kp[kfirst - 1] - (xfs_caddr_t)block);
804         last = (int)(((xfs_caddr_t)&kp[klast] - 1) - (xfs_caddr_t)block);
805         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
806 }
807
808 /*
809  * Log block pointer fields from a btree block (nonleaf).
810  */
811 STATIC void
812 xfs_inobt_log_ptrs(
813         xfs_btree_cur_t         *cur,   /* btree cursor */
814         xfs_buf_t               *bp,    /* buffer containing btree block */
815         int                     pfirst, /* index of first pointer to log */
816         int                     plast)  /* index of last pointer to log */
817 {
818         xfs_inobt_block_t       *block; /* btree block to log from */
819         int                     first;  /* first byte offset logged */
820         int                     last;   /* last byte offset logged */
821         xfs_inobt_ptr_t         *pp;    /* block-pointer pointer in btree blk */
822
823         block = XFS_BUF_TO_INOBT_BLOCK(bp);
824         pp = XFS_INOBT_PTR_ADDR(block, 1, cur);
825         first = (int)((xfs_caddr_t)&pp[pfirst - 1] - (xfs_caddr_t)block);
826         last = (int)(((xfs_caddr_t)&pp[plast] - 1) - (xfs_caddr_t)block);
827         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
828 }
829
830 /*
831  * Log records from a btree block (leaf).
832  */
833 STATIC void
834 xfs_inobt_log_recs(
835         xfs_btree_cur_t         *cur,   /* btree cursor */
836         xfs_buf_t               *bp,    /* buffer containing btree block */
837         int                     rfirst, /* index of first record to log */
838         int                     rlast)  /* index of last record to log */
839 {
840         xfs_inobt_block_t       *block; /* btree block to log from */
841         int                     first;  /* first byte offset logged */
842         int                     last;   /* last byte offset logged */
843         xfs_inobt_rec_t         *rp;    /* record pointer for btree block */
844
845         block = XFS_BUF_TO_INOBT_BLOCK(bp);
846         rp = XFS_INOBT_REC_ADDR(block, 1, cur);
847         first = (int)((xfs_caddr_t)&rp[rfirst - 1] - (xfs_caddr_t)block);
848         last = (int)(((xfs_caddr_t)&rp[rlast] - 1) - (xfs_caddr_t)block);
849         xfs_trans_log_buf(cur->bc_tp, bp, first, last);
850 }
851
852 /*
853  * Lookup the record.  The cursor is made to point to it, based on dir.
854  * Return 0 if can't find any such record, 1 for success.
855  */
856 STATIC int                              /* error */
857 xfs_inobt_lookup(
858         xfs_btree_cur_t         *cur,   /* btree cursor */
859         xfs_lookup_t            dir,    /* <=, ==, or >= */
860         int                     *stat)  /* success/failure */
861 {
862         xfs_agblock_t           agbno;  /* a.g. relative btree block number */
863         xfs_agnumber_t          agno;   /* allocation group number */
864         xfs_inobt_block_t       *block=NULL;    /* current btree block */
865         __int64_t               diff;   /* difference for the current key */
866         int                     error;  /* error return value */
867         int                     keyno=0;        /* current key number */
868         int                     level;  /* level in the btree */
869         xfs_mount_t             *mp;    /* file system mount point */
870
871         /*
872          * Get the allocation group header, and the root block number.
873          */
874         mp = cur->bc_mp;
875         {
876                 xfs_agi_t       *agi;   /* a.g. inode header */
877
878                 agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
879                 agno = INT_GET(agi->agi_seqno, ARCH_CONVERT);
880                 agbno = INT_GET(agi->agi_root, ARCH_CONVERT);
881         }
882         /*
883          * Iterate over each level in the btree, starting at the root.
884          * For each level above the leaves, find the key we need, based
885          * on the lookup record, then follow the corresponding block
886          * pointer down to the next level.
887          */
888         for (level = cur->bc_nlevels - 1, diff = 1; level >= 0; level--) {
889                 xfs_buf_t       *bp;    /* buffer pointer for btree block */
890                 xfs_daddr_t     d;      /* disk address of btree block */
891
892                 /*
893                  * Get the disk address we're looking for.
894                  */
895                 d = XFS_AGB_TO_DADDR(mp, agno, agbno);
896                 /*
897                  * If the old buffer at this level is for a different block,
898                  * throw it away, otherwise just use it.
899                  */
900                 bp = cur->bc_bufs[level];
901                 if (bp && XFS_BUF_ADDR(bp) != d)
902                         bp = (xfs_buf_t *)0;
903                 if (!bp) {
904                         /*
905                          * Need to get a new buffer.  Read it, then
906                          * set it in the cursor, releasing the old one.
907                          */
908                         if ((error = xfs_btree_read_bufs(mp, cur->bc_tp,
909                                         agno, agbno, 0, &bp, XFS_INO_BTREE_REF)))
910                                 return error;
911                         xfs_btree_setbuf(cur, level, bp);
912                         /*
913                          * Point to the btree block, now that we have the buffer
914                          */
915                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
916                         if ((error = xfs_btree_check_sblock(cur, block, level,
917                                         bp)))
918                                 return error;
919                 } else
920                         block = XFS_BUF_TO_INOBT_BLOCK(bp);
921                 /*
922                  * If we already had a key match at a higher level, we know
923                  * we need to use the first entry in this block.
924                  */
925                 if (diff == 0)
926                         keyno = 1;
927                 /*
928                  * Otherwise we need to search this block.  Do a binary search.
929                  */
930                 else {
931                         int             high;   /* high entry number */
932                         xfs_inobt_key_t *kkbase=NULL;/* base of keys in block */
933                         xfs_inobt_rec_t *krbase=NULL;/* base of records in block */
934                         int             low;    /* low entry number */
935
936                         /*
937                          * Get a pointer to keys or records.
938                          */
939                         if (level > 0)
940                                 kkbase = XFS_INOBT_KEY_ADDR(block, 1, cur);
941                         else
942                                 krbase = XFS_INOBT_REC_ADDR(block, 1, cur);
943                         /*
944                          * Set low and high entry numbers, 1-based.
945                          */
946                         low = 1;
947                         if (!(high = INT_GET(block->bb_numrecs, ARCH_CONVERT))) {
948                                 /*
949                                  * If the block is empty, the tree must
950                                  * be an empty leaf.
951                                  */
952                                 ASSERT(level == 0 && cur->bc_nlevels == 1);
953                                 cur->bc_ptrs[0] = dir != XFS_LOOKUP_LE;
954                                 *stat = 0;
955                                 return 0;
956                         }
957                         /*
958                          * Binary search the block.
959                          */
960                         while (low <= high) {
961                                 xfs_agino_t     startino;       /* key value */
962
963                                 /*
964                                  * keyno is average of low and high.
965                                  */
966                                 keyno = (low + high) >> 1;
967                                 /*
968                                  * Get startino.
969                                  */
970                                 if (level > 0) {
971                                         xfs_inobt_key_t *kkp;
972
973                                         kkp = kkbase + keyno - 1;
974                                         startino = INT_GET(kkp->ir_startino, ARCH_CONVERT);
975                                 } else {
976                                         xfs_inobt_rec_t *krp;
977
978                                         krp = krbase + keyno - 1;
979                                         startino = INT_GET(krp->ir_startino, ARCH_CONVERT);
980                                 }
981                                 /*
982                                  * Compute difference to get next direction.
983                                  */
984                                 diff = (__int64_t)
985                                         startino - cur->bc_rec.i.ir_startino;
986                                 /*
987                                  * Less than, move right.
988                                  */
989                                 if (diff < 0)
990                                         low = keyno + 1;
991                                 /*
992                                  * Greater than, move left.
993                                  */
994                                 else if (diff > 0)
995                                         high = keyno - 1;
996                                 /*
997                                  * Equal, we're done.
998                                  */
999                                 else
1000                                         break;
1001                         }
1002                 }
1003                 /*
1004                  * If there are more levels, set up for the next level
1005                  * by getting the block number and filling in the cursor.
1006                  */
1007                 if (level > 0) {
1008                         /*
1009                          * If we moved left, need the previous key number,
1010                          * unless there isn't one.
1011                          */
1012                         if (diff > 0 && --keyno < 1)
1013                                 keyno = 1;
1014                         agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, keyno, cur), ARCH_CONVERT);
1015 #ifdef DEBUG
1016                         if ((error = xfs_btree_check_sptr(cur, agbno, level)))
1017                                 return error;
1018 #endif
1019                         cur->bc_ptrs[level] = keyno;
1020                 }
1021         }
1022         /*
1023          * Done with the search.
1024          * See if we need to adjust the results.
1025          */
1026         if (dir != XFS_LOOKUP_LE && diff < 0) {
1027                 keyno++;
1028                 /*
1029                  * If ge search and we went off the end of the block, but it's
1030                  * not the last block, we're in the wrong block.
1031                  */
1032                 if (dir == XFS_LOOKUP_GE &&
1033                     keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT) &&
1034                     INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
1035                         int     i;
1036
1037                         cur->bc_ptrs[0] = keyno;
1038                         if ((error = xfs_inobt_increment(cur, 0, &i)))
1039                                 return error;
1040                         ASSERT(i == 1);
1041                         *stat = 1;
1042                         return 0;
1043                 }
1044         }
1045         else if (dir == XFS_LOOKUP_LE && diff > 0)
1046                 keyno--;
1047         cur->bc_ptrs[0] = keyno;
1048         /*
1049          * Return if we succeeded or not.
1050          */
1051         if (keyno == 0 || keyno > INT_GET(block->bb_numrecs, ARCH_CONVERT))
1052                 *stat = 0;
1053         else
1054                 *stat = ((dir != XFS_LOOKUP_EQ) || (diff == 0));
1055         return 0;
1056 }
1057
1058 /*
1059  * Move 1 record left from cur/level if possible.
1060  * Update cur to reflect the new path.
1061  */
1062 STATIC int                              /* error */
1063 xfs_inobt_lshift(
1064         xfs_btree_cur_t         *cur,   /* btree cursor */
1065         int                     level,  /* level to shift record on */
1066         int                     *stat)  /* success/failure */
1067 {
1068         int                     error;  /* error return value */
1069 #ifdef DEBUG
1070         int                     i;      /* loop index */
1071 #endif
1072         xfs_inobt_key_t         key;    /* key value for leaf level upward */
1073         xfs_buf_t               *lbp;   /* buffer for left neighbor block */
1074         xfs_inobt_block_t       *left;  /* left neighbor btree block */
1075         xfs_inobt_key_t         *lkp=NULL;      /* key pointer for left block */
1076         xfs_inobt_ptr_t         *lpp;   /* address pointer for left block */
1077         xfs_inobt_rec_t         *lrp=NULL;      /* record pointer for left block */
1078         int                     nrec;   /* new number of left block entries */
1079         xfs_buf_t               *rbp;   /* buffer for right (current) block */
1080         xfs_inobt_block_t       *right; /* right (current) btree block */
1081         xfs_inobt_key_t         *rkp=NULL;      /* key pointer for right block */
1082         xfs_inobt_ptr_t         *rpp=NULL;      /* address pointer for right block */
1083         xfs_inobt_rec_t         *rrp=NULL;      /* record pointer for right block */
1084
1085         /*
1086          * Set up variables for this block as "right".
1087          */
1088         rbp = cur->bc_bufs[level];
1089         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1090 #ifdef DEBUG
1091         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1092                 return error;
1093 #endif
1094         /*
1095          * If we've got no left sibling then we can't shift an entry left.
1096          */
1097         if (INT_GET(right->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
1098                 *stat = 0;
1099                 return 0;
1100         }
1101         /*
1102          * If the cursor entry is the one that would be moved, don't
1103          * do it... it's too complicated.
1104          */
1105         if (cur->bc_ptrs[level] <= 1) {
1106                 *stat = 0;
1107                 return 0;
1108         }
1109         /*
1110          * Set up the left neighbor as "left".
1111          */
1112         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1113                         cur->bc_private.i.agno, INT_GET(right->bb_leftsib, ARCH_CONVERT), 0, &lbp,
1114                         XFS_INO_BTREE_REF)))
1115                 return error;
1116         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1117         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1118                 return error;
1119         /*
1120          * If it's full, it can't take another entry.
1121          */
1122         if (INT_GET(left->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1123                 *stat = 0;
1124                 return 0;
1125         }
1126         nrec = INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1;
1127         /*
1128          * If non-leaf, copy a key and a ptr to the left block.
1129          */
1130         if (level > 0) {
1131                 lkp = XFS_INOBT_KEY_ADDR(left, nrec, cur);
1132                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1133                 *lkp = *rkp;
1134                 xfs_inobt_log_keys(cur, lbp, nrec, nrec);
1135                 lpp = XFS_INOBT_PTR_ADDR(left, nrec, cur);
1136                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1137 #ifdef DEBUG
1138                 if ((error = xfs_btree_check_sptr(cur, INT_GET(*rpp, ARCH_CONVERT), level)))
1139                         return error;
1140 #endif
1141                 *lpp = *rpp; /* INT_: no-change copy */
1142                 xfs_inobt_log_ptrs(cur, lbp, nrec, nrec);
1143         }
1144         /*
1145          * If leaf, copy a record to the left block.
1146          */
1147         else {
1148                 lrp = XFS_INOBT_REC_ADDR(left, nrec, cur);
1149                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1150                 *lrp = *rrp;
1151                 xfs_inobt_log_recs(cur, lbp, nrec, nrec);
1152         }
1153         /*
1154          * Bump and log left's numrecs, decrement and log right's numrecs.
1155          */
1156         INT_MOD(left->bb_numrecs, ARCH_CONVERT, +1);
1157         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1158 #ifdef DEBUG
1159         if (level > 0)
1160                 xfs_btree_check_key(cur->bc_btnum, lkp - 1, lkp);
1161         else
1162                 xfs_btree_check_rec(cur->bc_btnum, lrp - 1, lrp);
1163 #endif
1164         INT_MOD(right->bb_numrecs, ARCH_CONVERT, -1);
1165         xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1166         /*
1167          * Slide the contents of right down one entry.
1168          */
1169         if (level > 0) {
1170 #ifdef DEBUG
1171                 for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
1172                         if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i + 1], ARCH_CONVERT),
1173                                         level)))
1174                                 return error;
1175                 }
1176 #endif
1177                 memmove(rkp, rkp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
1178                 memmove(rpp, rpp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
1179                 xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1180                 xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1181         } else {
1182                 memmove(rrp, rrp + 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
1183                 xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1184                 key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
1185                 rkp = &key;
1186         }
1187         /*
1188          * Update the parent key values of right.
1189          */
1190         if ((error = xfs_inobt_updkey(cur, rkp, level + 1)))
1191                 return error;
1192         /*
1193          * Slide the cursor value left one.
1194          */
1195         cur->bc_ptrs[level]--;
1196         *stat = 1;
1197         return 0;
1198 }
1199
1200 /*
1201  * Allocate a new root block, fill it in.
1202  */
1203 STATIC int                              /* error */
1204 xfs_inobt_newroot(
1205         xfs_btree_cur_t         *cur,   /* btree cursor */
1206         int                     *stat)  /* success/failure */
1207 {
1208         xfs_agi_t               *agi;   /* a.g. inode header */
1209         xfs_alloc_arg_t         args;   /* allocation argument structure */
1210         xfs_inobt_block_t       *block; /* one half of the old root block */
1211         xfs_buf_t               *bp;    /* buffer containing block */
1212         int                     error;  /* error return value */
1213         xfs_inobt_key_t         *kp;    /* btree key pointer */
1214         xfs_agblock_t           lbno;   /* left block number */
1215         xfs_buf_t               *lbp;   /* left buffer pointer */
1216         xfs_inobt_block_t       *left;  /* left btree block */
1217         xfs_buf_t               *nbp;   /* new (root) buffer */
1218         xfs_inobt_block_t       *new;   /* new (root) btree block */
1219         int                     nptr;   /* new value for key index, 1 or 2 */
1220         xfs_inobt_ptr_t         *pp;    /* btree address pointer */
1221         xfs_agblock_t           rbno;   /* right block number */
1222         xfs_buf_t               *rbp;   /* right buffer pointer */
1223         xfs_inobt_block_t       *right; /* right btree block */
1224         xfs_inobt_rec_t         *rp;    /* btree record pointer */
1225
1226         ASSERT(cur->bc_nlevels < XFS_IN_MAXLEVELS(cur->bc_mp));
1227
1228         /*
1229          * Get a block & a buffer.
1230          */
1231         agi = XFS_BUF_TO_AGI(cur->bc_private.i.agbp);
1232         args.tp = cur->bc_tp;
1233         args.mp = cur->bc_mp;
1234         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno,
1235                 INT_GET(agi->agi_root, ARCH_CONVERT));
1236         args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1237                 args.isfl = args.userdata = args.minalignslop = 0;
1238         args.minlen = args.maxlen = args.prod = 1;
1239         args.type = XFS_ALLOCTYPE_NEAR_BNO;
1240         if ((error = xfs_alloc_vextent(&args)))
1241                 return error;
1242         /*
1243          * None available, we fail.
1244          */
1245         if (args.fsbno == NULLFSBLOCK) {
1246                 *stat = 0;
1247                 return 0;
1248         }
1249         ASSERT(args.len == 1);
1250         nbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1251         new = XFS_BUF_TO_INOBT_BLOCK(nbp);
1252         /*
1253          * Set the root data in the a.g. inode structure.
1254          */
1255         INT_SET(agi->agi_root, ARCH_CONVERT, args.agbno);
1256         INT_MOD(agi->agi_level, ARCH_CONVERT, 1);
1257         xfs_ialloc_log_agi(args.tp, cur->bc_private.i.agbp,
1258                 XFS_AGI_ROOT | XFS_AGI_LEVEL);
1259         /*
1260          * At the previous root level there are now two blocks: the old
1261          * root, and the new block generated when it was split.
1262          * We don't know which one the cursor is pointing at, so we
1263          * set up variables "left" and "right" for each case.
1264          */
1265         bp = cur->bc_bufs[cur->bc_nlevels - 1];
1266         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1267 #ifdef DEBUG
1268         if ((error = xfs_btree_check_sblock(cur, block, cur->bc_nlevels - 1, bp)))
1269                 return error;
1270 #endif
1271         if (INT_GET(block->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
1272                 /*
1273                  * Our block is left, pick up the right block.
1274                  */
1275                 lbp = bp;
1276                 lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1277                 left = block;
1278                 rbno = INT_GET(left->bb_rightsib, ARCH_CONVERT);
1279                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1280                                 rbno, 0, &rbp, XFS_INO_BTREE_REF)))
1281                         return error;
1282                 bp = rbp;
1283                 right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1284                 if ((error = xfs_btree_check_sblock(cur, right,
1285                                 cur->bc_nlevels - 1, rbp)))
1286                         return error;
1287                 nptr = 1;
1288         } else {
1289                 /*
1290                  * Our block is right, pick up the left block.
1291                  */
1292                 rbp = bp;
1293                 rbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(rbp));
1294                 right = block;
1295                 lbno = INT_GET(right->bb_leftsib, ARCH_CONVERT);
1296                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1297                                 lbno, 0, &lbp, XFS_INO_BTREE_REF)))
1298                         return error;
1299                 bp = lbp;
1300                 left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1301                 if ((error = xfs_btree_check_sblock(cur, left,
1302                                 cur->bc_nlevels - 1, lbp)))
1303                         return error;
1304                 nptr = 2;
1305         }
1306         /*
1307          * Fill in the new block's btree header and log it.
1308          */
1309         INT_SET(new->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
1310         INT_SET(new->bb_level, ARCH_CONVERT, (__uint16_t)cur->bc_nlevels);
1311         INT_SET(new->bb_numrecs, ARCH_CONVERT, 2);
1312         INT_SET(new->bb_leftsib, ARCH_CONVERT, NULLAGBLOCK);
1313         INT_SET(new->bb_rightsib, ARCH_CONVERT, NULLAGBLOCK);
1314         xfs_inobt_log_block(args.tp, nbp, XFS_BB_ALL_BITS);
1315         ASSERT(lbno != NULLAGBLOCK && rbno != NULLAGBLOCK);
1316         /*
1317          * Fill in the key data in the new root.
1318          */
1319         kp = XFS_INOBT_KEY_ADDR(new, 1, cur);
1320         if (INT_GET(left->bb_level, ARCH_CONVERT) > 0) {
1321                 kp[0] = *XFS_INOBT_KEY_ADDR(left, 1, cur); /* INT_: struct copy */
1322                 kp[1] = *XFS_INOBT_KEY_ADDR(right, 1, cur); /* INT_: struct copy */
1323         } else {
1324                 rp = XFS_INOBT_REC_ADDR(left, 1, cur);
1325                 INT_COPY(kp[0].ir_startino, rp->ir_startino, ARCH_CONVERT);
1326                 rp = XFS_INOBT_REC_ADDR(right, 1, cur);
1327                 INT_COPY(kp[1].ir_startino, rp->ir_startino, ARCH_CONVERT);
1328         }
1329         xfs_inobt_log_keys(cur, nbp, 1, 2);
1330         /*
1331          * Fill in the pointer data in the new root.
1332          */
1333         pp = XFS_INOBT_PTR_ADDR(new, 1, cur);
1334         INT_SET(pp[0], ARCH_CONVERT, lbno);
1335         INT_SET(pp[1], ARCH_CONVERT, rbno);
1336         xfs_inobt_log_ptrs(cur, nbp, 1, 2);
1337         /*
1338          * Fix up the cursor.
1339          */
1340         xfs_btree_setbuf(cur, cur->bc_nlevels, nbp);
1341         cur->bc_ptrs[cur->bc_nlevels] = nptr;
1342         cur->bc_nlevels++;
1343         *stat = 1;
1344         return 0;
1345 }
1346
1347 /*
1348  * Move 1 record right from cur/level if possible.
1349  * Update cur to reflect the new path.
1350  */
1351 STATIC int                              /* error */
1352 xfs_inobt_rshift(
1353         xfs_btree_cur_t         *cur,   /* btree cursor */
1354         int                     level,  /* level to shift record on */
1355         int                     *stat)  /* success/failure */
1356 {
1357         int                     error;  /* error return value */
1358         int                     i;      /* loop index */
1359         xfs_inobt_key_t         key;    /* key value for leaf level upward */
1360         xfs_buf_t               *lbp;   /* buffer for left (current) block */
1361         xfs_inobt_block_t       *left;  /* left (current) btree block */
1362         xfs_inobt_key_t         *lkp;   /* key pointer for left block */
1363         xfs_inobt_ptr_t         *lpp;   /* address pointer for left block */
1364         xfs_inobt_rec_t         *lrp;   /* record pointer for left block */
1365         xfs_buf_t               *rbp;   /* buffer for right neighbor block */
1366         xfs_inobt_block_t       *right; /* right neighbor btree block */
1367         xfs_inobt_key_t         *rkp;   /* key pointer for right block */
1368         xfs_inobt_ptr_t         *rpp;   /* address pointer for right block */
1369         xfs_inobt_rec_t         *rrp=NULL;      /* record pointer for right block */
1370         xfs_btree_cur_t         *tcur;  /* temporary cursor */
1371
1372         /*
1373          * Set up variables for this block as "left".
1374          */
1375         lbp = cur->bc_bufs[level];
1376         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1377 #ifdef DEBUG
1378         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1379                 return error;
1380 #endif
1381         /*
1382          * If we've got no right sibling then we can't shift an entry right.
1383          */
1384         if (INT_GET(left->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
1385                 *stat = 0;
1386                 return 0;
1387         }
1388         /*
1389          * If the cursor entry is the one that would be moved, don't
1390          * do it... it's too complicated.
1391          */
1392         if (cur->bc_ptrs[level] >= INT_GET(left->bb_numrecs, ARCH_CONVERT)) {
1393                 *stat = 0;
1394                 return 0;
1395         }
1396         /*
1397          * Set up the right neighbor as "right".
1398          */
1399         if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1400                         cur->bc_private.i.agno, INT_GET(left->bb_rightsib, ARCH_CONVERT), 0, &rbp,
1401                         XFS_INO_BTREE_REF)))
1402                 return error;
1403         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1404         if ((error = xfs_btree_check_sblock(cur, right, level, rbp)))
1405                 return error;
1406         /*
1407          * If it's full, it can't take another entry.
1408          */
1409         if (INT_GET(right->bb_numrecs, ARCH_CONVERT) == XFS_INOBT_BLOCK_MAXRECS(level, cur)) {
1410                 *stat = 0;
1411                 return 0;
1412         }
1413         /*
1414          * Make a hole at the start of the right neighbor block, then
1415          * copy the last left block entry to the hole.
1416          */
1417         if (level > 0) {
1418                 lkp = XFS_INOBT_KEY_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
1419                 lpp = XFS_INOBT_PTR_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
1420                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1421                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1422 #ifdef DEBUG
1423                 for (i = INT_GET(right->bb_numrecs, ARCH_CONVERT) - 1; i >= 0; i--) {
1424                         if ((error = xfs_btree_check_sptr(cur, INT_GET(rpp[i], ARCH_CONVERT), level)))
1425                                 return error;
1426                 }
1427 #endif
1428                 memmove(rkp + 1, rkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
1429                 memmove(rpp + 1, rpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
1430 #ifdef DEBUG
1431                 if ((error = xfs_btree_check_sptr(cur, INT_GET(*lpp, ARCH_CONVERT), level)))
1432                         return error;
1433 #endif
1434                 *rkp = *lkp; /* INT_: no change copy */
1435                 *rpp = *lpp; /* INT_: no change copy */
1436                 xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
1437                 xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
1438         } else {
1439                 lrp = XFS_INOBT_REC_ADDR(left, INT_GET(left->bb_numrecs, ARCH_CONVERT), cur);
1440                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1441                 memmove(rrp + 1, rrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
1442                 *rrp = *lrp;
1443                 xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1);
1444                 key.ir_startino = rrp->ir_startino; /* INT_: direct copy */
1445                 rkp = &key;
1446         }
1447         /*
1448          * Decrement and log left's numrecs, bump and log right's numrecs.
1449          */
1450         INT_MOD(left->bb_numrecs, ARCH_CONVERT, -1);
1451         xfs_inobt_log_block(cur->bc_tp, lbp, XFS_BB_NUMRECS);
1452         INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
1453 #ifdef DEBUG
1454         if (level > 0)
1455                 xfs_btree_check_key(cur->bc_btnum, rkp, rkp + 1);
1456         else
1457                 xfs_btree_check_rec(cur->bc_btnum, rrp, rrp + 1);
1458 #endif
1459         xfs_inobt_log_block(cur->bc_tp, rbp, XFS_BB_NUMRECS);
1460         /*
1461          * Using a temporary cursor, update the parent key values of the
1462          * block on the right.
1463          */
1464         if ((error = xfs_btree_dup_cursor(cur, &tcur)))
1465                 return error;
1466         xfs_btree_lastrec(tcur, level);
1467         if ((error = xfs_inobt_increment(tcur, level, &i)) ||
1468             (error = xfs_inobt_updkey(tcur, rkp, level + 1))) {
1469                 xfs_btree_del_cursor(tcur, XFS_BTREE_ERROR);
1470                 return error;
1471         }
1472         xfs_btree_del_cursor(tcur, XFS_BTREE_NOERROR);
1473         *stat = 1;
1474         return 0;
1475 }
1476
1477 /*
1478  * Split cur/level block in half.
1479  * Return new block number and its first record (to be inserted into parent).
1480  */
1481 STATIC int                              /* error */
1482 xfs_inobt_split(
1483         xfs_btree_cur_t         *cur,   /* btree cursor */
1484         int                     level,  /* level to split */
1485         xfs_agblock_t           *bnop,  /* output: block number allocated */
1486         xfs_inobt_key_t         *keyp,  /* output: first key of new block */
1487         xfs_btree_cur_t         **curp, /* output: new cursor */
1488         int                     *stat)  /* success/failure */
1489 {
1490         xfs_alloc_arg_t         args;   /* allocation argument structure */
1491         int                     error;  /* error return value */
1492         int                     i;      /* loop index/record number */
1493         xfs_agblock_t           lbno;   /* left (current) block number */
1494         xfs_buf_t               *lbp;   /* buffer for left block */
1495         xfs_inobt_block_t       *left;  /* left (current) btree block */
1496         xfs_inobt_key_t         *lkp;   /* left btree key pointer */
1497         xfs_inobt_ptr_t         *lpp;   /* left btree address pointer */
1498         xfs_inobt_rec_t         *lrp;   /* left btree record pointer */
1499         xfs_buf_t               *rbp;   /* buffer for right block */
1500         xfs_inobt_block_t       *right; /* right (new) btree block */
1501         xfs_inobt_key_t         *rkp;   /* right btree key pointer */
1502         xfs_inobt_ptr_t         *rpp;   /* right btree address pointer */
1503         xfs_inobt_rec_t         *rrp;   /* right btree record pointer */
1504
1505         /*
1506          * Set up left block (current one).
1507          */
1508         lbp = cur->bc_bufs[level];
1509         args.tp = cur->bc_tp;
1510         args.mp = cur->bc_mp;
1511         lbno = XFS_DADDR_TO_AGBNO(args.mp, XFS_BUF_ADDR(lbp));
1512         /*
1513          * Allocate the new block.
1514          * If we can't do it, we're toast.  Give up.
1515          */
1516         args.fsbno = XFS_AGB_TO_FSB(args.mp, cur->bc_private.i.agno, lbno);
1517         args.mod = args.minleft = args.alignment = args.total = args.wasdel =
1518                 args.isfl = args.userdata = args.minalignslop = 0;
1519         args.minlen = args.maxlen = args.prod = 1;
1520         args.type = XFS_ALLOCTYPE_NEAR_BNO;
1521         if ((error = xfs_alloc_vextent(&args)))
1522                 return error;
1523         if (args.fsbno == NULLFSBLOCK) {
1524                 *stat = 0;
1525                 return 0;
1526         }
1527         ASSERT(args.len == 1);
1528         rbp = xfs_btree_get_bufs(args.mp, args.tp, args.agno, args.agbno, 0);
1529         /*
1530          * Set up the new block as "right".
1531          */
1532         right = XFS_BUF_TO_INOBT_BLOCK(rbp);
1533         /*
1534          * "Left" is the current (according to the cursor) block.
1535          */
1536         left = XFS_BUF_TO_INOBT_BLOCK(lbp);
1537 #ifdef DEBUG
1538         if ((error = xfs_btree_check_sblock(cur, left, level, lbp)))
1539                 return error;
1540 #endif
1541         /*
1542          * Fill in the btree header for the new block.
1543          */
1544         INT_SET(right->bb_magic, ARCH_CONVERT, xfs_magics[cur->bc_btnum]);
1545         right->bb_level = left->bb_level; /* INT_: direct copy */
1546         INT_SET(right->bb_numrecs, ARCH_CONVERT, (__uint16_t)(INT_GET(left->bb_numrecs, ARCH_CONVERT) / 2));
1547         /*
1548          * Make sure that if there's an odd number of entries now, that
1549          * each new block will have the same number of entries.
1550          */
1551         if ((INT_GET(left->bb_numrecs, ARCH_CONVERT) & 1) &&
1552             cur->bc_ptrs[level] <= INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1)
1553                 INT_MOD(right->bb_numrecs, ARCH_CONVERT, +1);
1554         i = INT_GET(left->bb_numrecs, ARCH_CONVERT) - INT_GET(right->bb_numrecs, ARCH_CONVERT) + 1;
1555         /*
1556          * For non-leaf blocks, copy keys and addresses over to the new block.
1557          */
1558         if (level > 0) {
1559                 lkp = XFS_INOBT_KEY_ADDR(left, i, cur);
1560                 lpp = XFS_INOBT_PTR_ADDR(left, i, cur);
1561                 rkp = XFS_INOBT_KEY_ADDR(right, 1, cur);
1562                 rpp = XFS_INOBT_PTR_ADDR(right, 1, cur);
1563 #ifdef DEBUG
1564                 for (i = 0; i < INT_GET(right->bb_numrecs, ARCH_CONVERT); i++) {
1565                         if ((error = xfs_btree_check_sptr(cur, INT_GET(lpp[i], ARCH_CONVERT), level)))
1566                                 return error;
1567                 }
1568 #endif
1569                 memcpy(rkp, lkp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rkp));
1570                 memcpy(rpp, lpp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rpp));
1571                 xfs_inobt_log_keys(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1572                 xfs_inobt_log_ptrs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1573                 *keyp = *rkp;
1574         }
1575         /*
1576          * For leaf blocks, copy records over to the new block.
1577          */
1578         else {
1579                 lrp = XFS_INOBT_REC_ADDR(left, i, cur);
1580                 rrp = XFS_INOBT_REC_ADDR(right, 1, cur);
1581                 memcpy(rrp, lrp, INT_GET(right->bb_numrecs, ARCH_CONVERT) * sizeof(*rrp));
1582                 xfs_inobt_log_recs(cur, rbp, 1, INT_GET(right->bb_numrecs, ARCH_CONVERT));
1583                 keyp->ir_startino = rrp->ir_startino; /* INT_: direct copy */
1584         }
1585         /*
1586          * Find the left block number by looking in the buffer.
1587          * Adjust numrecs, sibling pointers.
1588          */
1589         INT_MOD(left->bb_numrecs, ARCH_CONVERT, -(INT_GET(right->bb_numrecs, ARCH_CONVERT)));
1590         right->bb_rightsib = left->bb_rightsib; /* INT_: direct copy */
1591         INT_SET(left->bb_rightsib, ARCH_CONVERT, args.agbno);
1592         INT_SET(right->bb_leftsib, ARCH_CONVERT, lbno);
1593         xfs_inobt_log_block(args.tp, rbp, XFS_BB_ALL_BITS);
1594         xfs_inobt_log_block(args.tp, lbp, XFS_BB_NUMRECS | XFS_BB_RIGHTSIB);
1595         /*
1596          * If there's a block to the new block's right, make that block
1597          * point back to right instead of to left.
1598          */
1599         if (INT_GET(right->bb_rightsib, ARCH_CONVERT) != NULLAGBLOCK) {
1600                 xfs_inobt_block_t       *rrblock;       /* rr btree block */
1601                 xfs_buf_t               *rrbp;          /* buffer for rrblock */
1602
1603                 if ((error = xfs_btree_read_bufs(args.mp, args.tp, args.agno,
1604                                 INT_GET(right->bb_rightsib, ARCH_CONVERT), 0, &rrbp,
1605                                 XFS_INO_BTREE_REF)))
1606                         return error;
1607                 rrblock = XFS_BUF_TO_INOBT_BLOCK(rrbp);
1608                 if ((error = xfs_btree_check_sblock(cur, rrblock, level, rrbp)))
1609                         return error;
1610                 INT_SET(rrblock->bb_leftsib, ARCH_CONVERT, args.agbno);
1611                 xfs_inobt_log_block(args.tp, rrbp, XFS_BB_LEFTSIB);
1612         }
1613         /*
1614          * If the cursor is really in the right block, move it there.
1615          * If it's just pointing past the last entry in left, then we'll
1616          * insert there, so don't change anything in that case.
1617          */
1618         if (cur->bc_ptrs[level] > INT_GET(left->bb_numrecs, ARCH_CONVERT) + 1) {
1619                 xfs_btree_setbuf(cur, level, rbp);
1620                 cur->bc_ptrs[level] -= INT_GET(left->bb_numrecs, ARCH_CONVERT);
1621         }
1622         /*
1623          * If there are more levels, we'll need another cursor which refers
1624          * the right block, no matter where this cursor was.
1625          */
1626         if (level + 1 < cur->bc_nlevels) {
1627                 if ((error = xfs_btree_dup_cursor(cur, curp)))
1628                         return error;
1629                 (*curp)->bc_ptrs[level + 1]++;
1630         }
1631         *bnop = args.agbno;
1632         *stat = 1;
1633         return 0;
1634 }
1635
1636 /*
1637  * Update keys at all levels from here to the root along the cursor's path.
1638  */
1639 STATIC int                              /* error */
1640 xfs_inobt_updkey(
1641         xfs_btree_cur_t         *cur,   /* btree cursor */
1642         xfs_inobt_key_t         *keyp,  /* new key value to update to */
1643         int                     level)  /* starting level for update */
1644 {
1645         int                     ptr;    /* index of key in block */
1646
1647         /*
1648          * Go up the tree from this level toward the root.
1649          * At each level, update the key value to the value input.
1650          * Stop when we reach a level where the cursor isn't pointing
1651          * at the first entry in the block.
1652          */
1653         for (ptr = 1; ptr == 1 && level < cur->bc_nlevels; level++) {
1654                 xfs_buf_t               *bp;    /* buffer for block */
1655                 xfs_inobt_block_t       *block; /* btree block */
1656 #ifdef DEBUG
1657                 int                     error;  /* error return value */
1658 #endif
1659                 xfs_inobt_key_t         *kp;    /* ptr to btree block keys */
1660
1661                 bp = cur->bc_bufs[level];
1662                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1663 #ifdef DEBUG
1664                 if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1665                         return error;
1666 #endif
1667                 ptr = cur->bc_ptrs[level];
1668                 kp = XFS_INOBT_KEY_ADDR(block, ptr, cur);
1669                 *kp = *keyp;
1670                 xfs_inobt_log_keys(cur, bp, ptr, ptr);
1671         }
1672         return 0;
1673 }
1674
1675 /*
1676  * Externally visible routines.
1677  */
1678
1679 /*
1680  * Decrement cursor by one record at the level.
1681  * For nonzero levels the leaf-ward information is untouched.
1682  */
1683 int                                     /* error */
1684 xfs_inobt_decrement(
1685         xfs_btree_cur_t         *cur,   /* btree cursor */
1686         int                     level,  /* level in btree, 0 is leaf */
1687         int                     *stat)  /* success/failure */
1688 {
1689         xfs_inobt_block_t       *block; /* btree block */
1690         int                     error;
1691         int                     lev;    /* btree level */
1692
1693         ASSERT(level < cur->bc_nlevels);
1694         /*
1695          * Read-ahead to the left at this level.
1696          */
1697         xfs_btree_readahead(cur, level, XFS_BTCUR_LEFTRA);
1698         /*
1699          * Decrement the ptr at this level.  If we're still in the block
1700          * then we're done.
1701          */
1702         if (--cur->bc_ptrs[level] > 0) {
1703                 *stat = 1;
1704                 return 0;
1705         }
1706         /*
1707          * Get a pointer to the btree block.
1708          */
1709         block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[level]);
1710 #ifdef DEBUG
1711         if ((error = xfs_btree_check_sblock(cur, block, level,
1712                         cur->bc_bufs[level])))
1713                 return error;
1714 #endif
1715         /*
1716          * If we just went off the left edge of the tree, return failure.
1717          */
1718         if (INT_GET(block->bb_leftsib, ARCH_CONVERT) == NULLAGBLOCK) {
1719                 *stat = 0;
1720                 return 0;
1721         }
1722         /*
1723          * March up the tree decrementing pointers.
1724          * Stop when we don't go off the left edge of a block.
1725          */
1726         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1727                 if (--cur->bc_ptrs[lev] > 0)
1728                         break;
1729                 /*
1730                  * Read-ahead the left block, we're going to read it
1731                  * in the next loop.
1732                  */
1733                 xfs_btree_readahead(cur, lev, XFS_BTCUR_LEFTRA);
1734         }
1735         /*
1736          * If we went off the root then we are seriously confused.
1737          */
1738         ASSERT(lev < cur->bc_nlevels);
1739         /*
1740          * Now walk back down the tree, fixing up the cursor's buffer
1741          * pointers and key numbers.
1742          */
1743         for (block = XFS_BUF_TO_INOBT_BLOCK(cur->bc_bufs[lev]); lev > level; ) {
1744                 xfs_agblock_t   agbno;  /* block number of btree block */
1745                 xfs_buf_t       *bp;    /* buffer containing btree block */
1746
1747                 agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
1748                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1749                                 cur->bc_private.i.agno, agbno, 0, &bp,
1750                                 XFS_INO_BTREE_REF)))
1751                         return error;
1752                 lev--;
1753                 xfs_btree_setbuf(cur, lev, bp);
1754                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1755                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1756                         return error;
1757                 cur->bc_ptrs[lev] = INT_GET(block->bb_numrecs, ARCH_CONVERT);
1758         }
1759         *stat = 1;
1760         return 0;
1761 }
1762
1763 /*
1764  * Delete the record pointed to by cur.
1765  * The cursor refers to the place where the record was (could be inserted)
1766  * when the operation returns.
1767  */
1768 int                                     /* error */
1769 xfs_inobt_delete(
1770         xfs_btree_cur_t *cur,           /* btree cursor */
1771         int             *stat)          /* success/failure */
1772 {
1773         int             error;
1774         int             i;              /* result code */
1775         int             level;          /* btree level */
1776
1777         /*
1778          * Go up the tree, starting at leaf level.
1779          * If 2 is returned then a join was done; go to the next level.
1780          * Otherwise we are done.
1781          */
1782         for (level = 0, i = 2; i == 2; level++) {
1783                 if ((error = xfs_inobt_delrec(cur, level, &i)))
1784                         return error;
1785         }
1786         if (i == 0) {
1787                 for (level = 1; level < cur->bc_nlevels; level++) {
1788                         if (cur->bc_ptrs[level] == 0) {
1789                                 if ((error = xfs_inobt_decrement(cur, level, &i)))
1790                                         return error;
1791                                 break;
1792                         }
1793                 }
1794         }
1795         *stat = i;
1796         return 0;
1797 }
1798
1799
1800 /*
1801  * Get the data from the pointed-to record.
1802  */
1803 int                                     /* error */
1804 xfs_inobt_get_rec(
1805         xfs_btree_cur_t         *cur,   /* btree cursor */
1806         xfs_agino_t             *ino,   /* output: starting inode of chunk */
1807         __int32_t               *fcnt,  /* output: number of free inodes */
1808         xfs_inofree_t           *free,  /* output: free inode mask */
1809         int                     *stat)  /* output: success/failure */
1810 {
1811         xfs_inobt_block_t       *block; /* btree block */
1812         xfs_buf_t               *bp;    /* buffer containing btree block */
1813 #ifdef DEBUG
1814         int                     error;  /* error return value */
1815 #endif
1816         int                     ptr;    /* record number */
1817         xfs_inobt_rec_t         *rec;   /* record data */
1818
1819         bp = cur->bc_bufs[0];
1820         ptr = cur->bc_ptrs[0];
1821         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1822 #ifdef DEBUG
1823         if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
1824                 return error;
1825 #endif
1826         /*
1827          * Off the right end or left end, return failure.
1828          */
1829         if (ptr > INT_GET(block->bb_numrecs, ARCH_CONVERT) || ptr <= 0) {
1830                 *stat = 0;
1831                 return 0;
1832         }
1833         /*
1834          * Point to the record and extract its data.
1835          */
1836         rec = XFS_INOBT_REC_ADDR(block, ptr, cur);
1837         *ino = INT_GET(rec->ir_startino, ARCH_CONVERT);
1838         *fcnt = INT_GET(rec->ir_freecount, ARCH_CONVERT);
1839         *free = INT_GET(rec->ir_free, ARCH_CONVERT);
1840         *stat = 1;
1841         return 0;
1842 }
1843
1844 /*
1845  * Increment cursor by one record at the level.
1846  * For nonzero levels the leaf-ward information is untouched.
1847  */
1848 int                                     /* error */
1849 xfs_inobt_increment(
1850         xfs_btree_cur_t         *cur,   /* btree cursor */
1851         int                     level,  /* level in btree, 0 is leaf */
1852         int                     *stat)  /* success/failure */
1853 {
1854         xfs_inobt_block_t       *block; /* btree block */
1855         xfs_buf_t               *bp;    /* buffer containing btree block */
1856         int                     error;  /* error return value */
1857         int                     lev;    /* btree level */
1858
1859         ASSERT(level < cur->bc_nlevels);
1860         /*
1861          * Read-ahead to the right at this level.
1862          */
1863         xfs_btree_readahead(cur, level, XFS_BTCUR_RIGHTRA);
1864         /*
1865          * Get a pointer to the btree block.
1866          */
1867         bp = cur->bc_bufs[level];
1868         block = XFS_BUF_TO_INOBT_BLOCK(bp);
1869 #ifdef DEBUG
1870         if ((error = xfs_btree_check_sblock(cur, block, level, bp)))
1871                 return error;
1872 #endif
1873         /*
1874          * Increment the ptr at this level.  If we're still in the block
1875          * then we're done.
1876          */
1877         if (++cur->bc_ptrs[level] <= INT_GET(block->bb_numrecs, ARCH_CONVERT)) {
1878                 *stat = 1;
1879                 return 0;
1880         }
1881         /*
1882          * If we just went off the right edge of the tree, return failure.
1883          */
1884         if (INT_GET(block->bb_rightsib, ARCH_CONVERT) == NULLAGBLOCK) {
1885                 *stat = 0;
1886                 return 0;
1887         }
1888         /*
1889          * March up the tree incrementing pointers.
1890          * Stop when we don't go off the right edge of a block.
1891          */
1892         for (lev = level + 1; lev < cur->bc_nlevels; lev++) {
1893                 bp = cur->bc_bufs[lev];
1894                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1895 #ifdef DEBUG
1896                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1897                         return error;
1898 #endif
1899                 if (++cur->bc_ptrs[lev] <= INT_GET(block->bb_numrecs, ARCH_CONVERT))
1900                         break;
1901                 /*
1902                  * Read-ahead the right block, we're going to read it
1903                  * in the next loop.
1904                  */
1905                 xfs_btree_readahead(cur, lev, XFS_BTCUR_RIGHTRA);
1906         }
1907         /*
1908          * If we went off the root then we are seriously confused.
1909          */
1910         ASSERT(lev < cur->bc_nlevels);
1911         /*
1912          * Now walk back down the tree, fixing up the cursor's buffer
1913          * pointers and key numbers.
1914          */
1915         for (bp = cur->bc_bufs[lev], block = XFS_BUF_TO_INOBT_BLOCK(bp);
1916              lev > level; ) {
1917                 xfs_agblock_t   agbno;  /* block number of btree block */
1918
1919                 agbno = INT_GET(*XFS_INOBT_PTR_ADDR(block, cur->bc_ptrs[lev], cur), ARCH_CONVERT);
1920                 if ((error = xfs_btree_read_bufs(cur->bc_mp, cur->bc_tp,
1921                                 cur->bc_private.i.agno, agbno, 0, &bp,
1922                                 XFS_INO_BTREE_REF)))
1923                         return error;
1924                 lev--;
1925                 xfs_btree_setbuf(cur, lev, bp);
1926                 block = XFS_BUF_TO_INOBT_BLOCK(bp);
1927                 if ((error = xfs_btree_check_sblock(cur, block, lev, bp)))
1928                         return error;
1929                 cur->bc_ptrs[lev] = 1;
1930         }
1931         *stat = 1;
1932         return 0;
1933 }
1934
1935 /*
1936  * Insert the current record at the point referenced by cur.
1937  * The cursor may be inconsistent on return if splits have been done.
1938  */
1939 int                                     /* error */
1940 xfs_inobt_insert(
1941         xfs_btree_cur_t *cur,           /* btree cursor */
1942         int             *stat)          /* success/failure */
1943 {
1944         int             error;          /* error return value */
1945         int             i;              /* result value, 0 for failure */
1946         int             level;          /* current level number in btree */
1947         xfs_agblock_t   nbno;           /* new block number (split result) */
1948         xfs_btree_cur_t *ncur;          /* new cursor (split result) */
1949         xfs_inobt_rec_t nrec;           /* record being inserted this level */
1950         xfs_btree_cur_t *pcur;          /* previous level's cursor */
1951
1952         level = 0;
1953         nbno = NULLAGBLOCK;
1954         INT_SET(nrec.ir_startino, ARCH_CONVERT, cur->bc_rec.i.ir_startino);
1955         INT_SET(nrec.ir_freecount, ARCH_CONVERT, cur->bc_rec.i.ir_freecount);
1956         INT_SET(nrec.ir_free, ARCH_CONVERT, cur->bc_rec.i.ir_free);
1957         ncur = (xfs_btree_cur_t *)0;
1958         pcur = cur;
1959         /*
1960          * Loop going up the tree, starting at the leaf level.
1961          * Stop when we don't get a split block, that must mean that
1962          * the insert is finished with this level.
1963          */
1964         do {
1965                 /*
1966                  * Insert nrec/nbno into this level of the tree.
1967                  * Note if we fail, nbno will be null.
1968                  */
1969                 if ((error = xfs_inobt_insrec(pcur, level++, &nbno, &nrec, &ncur,
1970                                 &i))) {
1971                         if (pcur != cur)
1972                                 xfs_btree_del_cursor(pcur, XFS_BTREE_ERROR);
1973                         return error;
1974                 }
1975                 /*
1976                  * See if the cursor we just used is trash.
1977                  * Can't trash the caller's cursor, but otherwise we should
1978                  * if ncur is a new cursor or we're about to be done.
1979                  */
1980                 if (pcur != cur && (ncur || nbno == NULLAGBLOCK)) {
1981                         cur->bc_nlevels = pcur->bc_nlevels;
1982                         xfs_btree_del_cursor(pcur, XFS_BTREE_NOERROR);
1983                 }
1984                 /*
1985                  * If we got a new cursor, switch to it.
1986                  */
1987                 if (ncur) {
1988                         pcur = ncur;
1989                         ncur = (xfs_btree_cur_t *)0;
1990                 }
1991         } while (nbno != NULLAGBLOCK);
1992         *stat = i;
1993         return 0;
1994 }
1995
1996 /*
1997  * Lookup the record equal to ino in the btree given by cur.
1998  */
1999 int                                     /* error */
2000 xfs_inobt_lookup_eq(
2001         xfs_btree_cur_t *cur,           /* btree cursor */
2002         xfs_agino_t     ino,            /* starting inode of chunk */
2003         __int32_t       fcnt,           /* free inode count */
2004         xfs_inofree_t   free,           /* free inode mask */
2005         int             *stat)          /* success/failure */
2006 {
2007         cur->bc_rec.i.ir_startino = ino;
2008         cur->bc_rec.i.ir_freecount = fcnt;
2009         cur->bc_rec.i.ir_free = free;
2010         return xfs_inobt_lookup(cur, XFS_LOOKUP_EQ, stat);
2011 }
2012
2013 /*
2014  * Lookup the first record greater than or equal to ino
2015  * in the btree given by cur.
2016  */
2017 int                                     /* error */
2018 xfs_inobt_lookup_ge(
2019         xfs_btree_cur_t *cur,           /* btree cursor */
2020         xfs_agino_t     ino,            /* starting inode of chunk */
2021         __int32_t       fcnt,           /* free inode count */
2022         xfs_inofree_t   free,           /* free inode mask */
2023         int             *stat)          /* success/failure */
2024 {
2025         cur->bc_rec.i.ir_startino = ino;
2026         cur->bc_rec.i.ir_freecount = fcnt;
2027         cur->bc_rec.i.ir_free = free;
2028         return xfs_inobt_lookup(cur, XFS_LOOKUP_GE, stat);
2029 }
2030
2031 /*
2032  * Lookup the first record less than or equal to ino
2033  * in the btree given by cur.
2034  */
2035 int                                     /* error */
2036 xfs_inobt_lookup_le(
2037         xfs_btree_cur_t *cur,           /* btree cursor */
2038         xfs_agino_t     ino,            /* starting inode of chunk */
2039         __int32_t       fcnt,           /* free inode count */
2040         xfs_inofree_t   free,           /* free inode mask */
2041         int             *stat)          /* success/failure */
2042 {
2043         cur->bc_rec.i.ir_startino = ino;
2044         cur->bc_rec.i.ir_freecount = fcnt;
2045         cur->bc_rec.i.ir_free = free;
2046         return xfs_inobt_lookup(cur, XFS_LOOKUP_LE, stat);
2047 }
2048
2049 /*
2050  * Update the record referred to by cur, to the value given
2051  * by [ino, fcnt, free].
2052  * This either works (return 0) or gets an EFSCORRUPTED error.
2053  */
2054 int                                     /* error */
2055 xfs_inobt_update(
2056         xfs_btree_cur_t         *cur,   /* btree cursor */
2057         xfs_agino_t             ino,    /* starting inode of chunk */
2058         __int32_t               fcnt,   /* free inode count */
2059         xfs_inofree_t           free)   /* free inode mask */
2060 {
2061         xfs_inobt_block_t       *block; /* btree block to update */
2062         xfs_buf_t               *bp;    /* buffer containing btree block */
2063         int                     error;  /* error return value */
2064         int                     ptr;    /* current record number (updating) */
2065         xfs_inobt_rec_t         *rp;    /* pointer to updated record */
2066
2067         /*
2068          * Pick up the current block.
2069          */
2070         bp = cur->bc_bufs[0];
2071         block = XFS_BUF_TO_INOBT_BLOCK(bp);
2072 #ifdef DEBUG
2073         if ((error = xfs_btree_check_sblock(cur, block, 0, bp)))
2074                 return error;
2075 #endif
2076         /*
2077          * Get the address of the rec to be updated.
2078          */
2079         ptr = cur->bc_ptrs[0];
2080         rp = XFS_INOBT_REC_ADDR(block, ptr, cur);
2081         /*
2082          * Fill in the new contents and log them.
2083          */
2084         INT_SET(rp->ir_startino, ARCH_CONVERT, ino);
2085         INT_SET(rp->ir_freecount, ARCH_CONVERT, fcnt);
2086         INT_SET(rp->ir_free, ARCH_CONVERT, free);
2087         xfs_inobt_log_recs(cur, bp, ptr, ptr);
2088         /*
2089          * Updating first record in leaf. Pass new key value up to our parent.
2090          */
2091         if (ptr == 1) {
2092                 xfs_inobt_key_t key;    /* key containing [ino] */
2093
2094                 INT_SET(key.ir_startino, ARCH_CONVERT, ino);
2095                 if ((error = xfs_inobt_updkey(cur, &key, 1)))
2096                         return error;
2097         }
2098         return 0;
2099 }