]> Pileus Git - ~andy/linux/blob - fs/btrfs/extent-tree.c
Btrfs: Fix cache_block_group to catch holes at the start of the group
[~andy/linux] / fs / btrfs / extent-tree.c
1 /*
2  * Copyright (C) 2007 Oracle.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 #include <linux/sched.h>
20 #include "ctree.h"
21 #include "disk-io.h"
22 #include "print-tree.h"
23 #include "transaction.h"
24
25 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
26                                  btrfs_root *extent_root);
27 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
28                                btrfs_root *extent_root);
29
30 static int cache_block_group(struct btrfs_root *root,
31                              struct btrfs_block_group_cache *block_group)
32 {
33         struct btrfs_path *path;
34         int ret;
35         struct btrfs_key key;
36         struct btrfs_leaf *leaf;
37         struct radix_tree_root *extent_radix;
38         int slot;
39         u64 i;
40         u64 last = 0;
41         u64 hole_size;
42         u64 first_free;
43         int found = 0;
44
45         root = root->fs_info->extent_root;
46         extent_radix = &root->fs_info->extent_map_radix;
47
48         if (block_group->cached)
49                 return 0;
50         if (block_group->data)
51                 return 0;
52         path = btrfs_alloc_path();
53         if (!path)
54                 return -ENOMEM;
55
56         path->reada = 2;
57         first_free = block_group->key.objectid;
58         key.objectid = block_group->key.objectid;
59         key.flags = 0;
60         key.offset = 0;
61
62         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
63         ret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
64
65         if (ret < 0)
66                 return ret;
67
68         if (ret && path->slots[0] > 0)
69                 path->slots[0]--;
70
71         while(1) {
72                 leaf = btrfs_buffer_leaf(path->nodes[0]);
73                 slot = path->slots[0];
74                 if (slot >= btrfs_header_nritems(&leaf->header)) {
75                         ret = btrfs_next_leaf(root, path);
76                         if (ret < 0)
77                                 goto err;
78                         if (ret == 0) {
79                                 continue;
80                         } else {
81                                 break;
82                         }
83                 }
84
85                 btrfs_disk_key_to_cpu(&key, &leaf->items[slot].key);
86                 if (key.objectid < block_group->key.objectid) {
87                         if (key.objectid + key.offset > first_free)
88                                 first_free = key.objectid + key.offset;
89                         goto next;
90                 }
91
92                 if (key.objectid >= block_group->key.objectid +
93                     block_group->key.offset) {
94                         break;
95                 }
96
97                 if (btrfs_key_type(&key) == BTRFS_EXTENT_ITEM_KEY) {
98                         if (!found) {
99                                 last = first_free;
100                                 found = 1;
101                         }
102                         hole_size = key.objectid - last;
103                         for (i = 0; i < hole_size; i++) {
104                                 set_radix_bit(extent_radix, last + i);
105                         }
106                         last = key.objectid + key.offset;
107                 }
108 next:
109                 path->slots[0]++;
110         }
111
112         if (!found)
113                 last = first_free;
114         if (block_group->key.objectid +
115             block_group->key.offset > last) {
116                 hole_size = block_group->key.objectid +
117                         block_group->key.offset - last;
118                 for (i = 0; i < hole_size; i++) {
119                         set_radix_bit(extent_radix,
120                                         last + i);
121                 }
122         }
123         block_group->cached = 1;
124 err:
125         btrfs_free_path(path);
126         return 0;
127 }
128
129 struct btrfs_block_group_cache *btrfs_lookup_block_group(struct
130                                                          btrfs_fs_info *info,
131                                                          u64 blocknr)
132 {
133         struct btrfs_block_group_cache *block_group;
134         int ret;
135
136         ret = radix_tree_gang_lookup(&info->block_group_radix,
137                                      (void **)&block_group,
138                                      blocknr, 1);
139         if (ret) {
140                 if (block_group->key.objectid <= blocknr && blocknr <=
141                     block_group->key.objectid + block_group->key.offset)
142                         return block_group;
143         }
144         ret = radix_tree_gang_lookup(&info->block_group_data_radix,
145                                      (void **)&block_group,
146                                      blocknr, 1);
147         if (ret) {
148                 if (block_group->key.objectid <= blocknr && blocknr <=
149                     block_group->key.objectid + block_group->key.offset)
150                         return block_group;
151         }
152         return NULL;
153 }
154
155 static u64 leaf_range(struct btrfs_root *root)
156 {
157         u64 size = BTRFS_LEAF_DATA_SIZE(root);
158         do_div(size, sizeof(struct btrfs_extent_item) +
159                 sizeof(struct btrfs_item));
160         return size;
161 }
162
163 static u64 find_search_start(struct btrfs_root *root,
164                              struct btrfs_block_group_cache **cache_ret,
165                              u64 search_start, int num)
166 {
167         unsigned long gang[8];
168         int ret;
169         struct btrfs_block_group_cache *cache = *cache_ret;
170         u64 last = max(search_start, cache->key.objectid);
171
172         if (cache->data)
173                 goto out;
174 again:
175         ret = cache_block_group(root, cache);
176         if (ret)
177                 goto out;
178         while(1) {
179                 ret = find_first_radix_bit(&root->fs_info->extent_map_radix,
180                                            gang, last, ARRAY_SIZE(gang));
181                 if (!ret)
182                         goto out;
183                 last = gang[ret-1] + 1;
184                 if (num > 1) {
185                         if (ret != ARRAY_SIZE(gang)) {
186                                 goto new_group;
187                         }
188                         if (gang[ret-1] - gang[0] > leaf_range(root)) {
189                                 continue;
190                         }
191                 }
192                 if (gang[0] >= cache->key.objectid + cache->key.offset) {
193                         goto new_group;
194                 }
195                 return gang[0];
196         }
197 out:
198         return max(cache->last_alloc, search_start);
199
200 new_group:
201         cache = btrfs_lookup_block_group(root->fs_info,
202                                          last + cache->key.offset - 1);
203         if (!cache) {
204                 return max((*cache_ret)->last_alloc, search_start);
205         }
206         cache = btrfs_find_block_group(root, cache,
207                                        last + cache->key.offset - 1, 0, 0);
208         *cache_ret = cache;
209         goto again;
210 }
211
212 static u64 div_factor(u64 num, int factor)
213 {
214         num *= factor;
215         do_div(num, 10);
216         return num;
217 }
218
219 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
220                                                  struct btrfs_block_group_cache
221                                                  *hint, u64 search_start,
222                                                  int data, int owner)
223 {
224         struct btrfs_block_group_cache *cache[8];
225         struct btrfs_block_group_cache *found_group = NULL;
226         struct btrfs_fs_info *info = root->fs_info;
227         struct radix_tree_root *radix;
228         struct radix_tree_root *swap_radix;
229         u64 used;
230         u64 last = 0;
231         u64 hint_last;
232         int i;
233         int ret;
234         int full_search = 0;
235         int factor = 8;
236         int data_swap = 0;
237
238         if (!owner)
239                 factor = 5;
240
241         if (data) {
242                 radix = &info->block_group_data_radix;
243                 swap_radix = &info->block_group_radix;
244         } else {
245                 radix = &info->block_group_radix;
246                 swap_radix = &info->block_group_data_radix;
247         }
248
249         if (search_start) {
250                 struct btrfs_block_group_cache *shint;
251                 shint = btrfs_lookup_block_group(info, search_start);
252                 if (shint && shint->data == data) {
253                         used = btrfs_block_group_used(&shint->item);
254                         if (used + shint->pinned <
255                             div_factor(shint->key.offset, factor)) {
256                                 return shint;
257                         }
258                 }
259         }
260         if (hint && hint->data == data) {
261                 used = btrfs_block_group_used(&hint->item);
262                 if (used + hint->pinned <
263                     div_factor(hint->key.offset, factor)) {
264                         return hint;
265                 }
266                 if (used >= div_factor(hint->key.offset, 8)) {
267                         radix_tree_tag_clear(radix,
268                                              hint->key.objectid +
269                                              hint->key.offset - 1,
270                                              BTRFS_BLOCK_GROUP_AVAIL);
271                 }
272                 last = hint->key.offset * 3;
273                 if (hint->key.objectid >= last)
274                         last = max(search_start + hint->key.offset - 1,
275                                    hint->key.objectid - last);
276                 else
277                         last = hint->key.objectid + hint->key.offset;
278                 hint_last = last;
279         } else {
280                 if (hint)
281                         hint_last = max(hint->key.objectid, search_start);
282                 else
283                         hint_last = search_start;
284
285                 last = hint_last;
286         }
287         while(1) {
288                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
289                                                  last, ARRAY_SIZE(cache),
290                                                  BTRFS_BLOCK_GROUP_AVAIL);
291                 if (!ret)
292                         break;
293                 for (i = 0; i < ret; i++) {
294                         last = cache[i]->key.objectid +
295                                 cache[i]->key.offset;
296                         used = btrfs_block_group_used(&cache[i]->item);
297                         if (used + cache[i]->pinned <
298                             div_factor(cache[i]->key.offset, factor)) {
299                                 found_group = cache[i];
300                                 goto found;
301                         }
302                         if (used >= div_factor(cache[i]->key.offset, 8)) {
303                                 radix_tree_tag_clear(radix,
304                                                      cache[i]->key.objectid +
305                                                      cache[i]->key.offset - 1,
306                                                      BTRFS_BLOCK_GROUP_AVAIL);
307                         }
308                 }
309                 cond_resched();
310         }
311         last = hint_last;
312 again:
313         while(1) {
314                 ret = radix_tree_gang_lookup(radix, (void **)cache,
315                                              last, ARRAY_SIZE(cache));
316                 if (!ret)
317                         break;
318                 for (i = 0; i < ret; i++) {
319                         last = cache[i]->key.objectid +
320                                 cache[i]->key.offset;
321                         used = btrfs_block_group_used(&cache[i]->item);
322                         if (used + cache[i]->pinned < cache[i]->key.offset) {
323                                 found_group = cache[i];
324                                 goto found;
325                         }
326                         if (used >= cache[i]->key.offset) {
327                                 radix_tree_tag_clear(radix,
328                                                      cache[i]->key.objectid +
329                                                      cache[i]->key.offset - 1,
330                                                      BTRFS_BLOCK_GROUP_AVAIL);
331                         }
332                 }
333                 cond_resched();
334         }
335         if (!full_search) {
336                 last = search_start;
337                 full_search = 1;
338                 goto again;
339         }
340         if (!data_swap) {
341                 struct radix_tree_root *tmp = radix;
342                 data_swap = 1;
343                 radix = swap_radix;
344                 swap_radix = tmp;
345                 last = search_start;
346                 goto again;
347         }
348         if (!found_group) {
349                 ret = radix_tree_gang_lookup(radix,
350                                              (void **)&found_group, 0, 1);
351                 if (ret == 0) {
352                         ret = radix_tree_gang_lookup(swap_radix,
353                                                      (void **)&found_group,
354                                                      0, 1);
355                 }
356                 BUG_ON(ret != 1);
357         }
358 found:
359         return found_group;
360 }
361
362 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
363                                 struct btrfs_root *root,
364                                 u64 blocknr, u64 num_blocks)
365 {
366         struct btrfs_path *path;
367         int ret;
368         struct btrfs_key key;
369         struct btrfs_leaf *l;
370         struct btrfs_extent_item *item;
371         u32 refs;
372
373         path = btrfs_alloc_path();
374         if (!path)
375                 return -ENOMEM;
376
377         key.objectid = blocknr;
378         key.flags = 0;
379         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
380         key.offset = num_blocks;
381         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
382                                 0, 1);
383         if (ret < 0)
384                 return ret;
385         if (ret != 0) {
386                 BUG();
387         }
388         BUG_ON(ret != 0);
389         l = btrfs_buffer_leaf(path->nodes[0]);
390         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
391         refs = btrfs_extent_refs(item);
392         btrfs_set_extent_refs(item, refs + 1);
393         btrfs_mark_buffer_dirty(path->nodes[0]);
394
395         btrfs_release_path(root->fs_info->extent_root, path);
396         btrfs_free_path(path);
397         finish_current_insert(trans, root->fs_info->extent_root);
398         del_pending_extents(trans, root->fs_info->extent_root);
399         return 0;
400 }
401
402 int btrfs_extent_post_op(struct btrfs_trans_handle *trans,
403                          struct btrfs_root *root)
404 {
405         finish_current_insert(trans, root->fs_info->extent_root);
406         del_pending_extents(trans, root->fs_info->extent_root);
407         return 0;
408 }
409
410 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
411                              struct btrfs_root *root, u64 blocknr,
412                              u64 num_blocks, u32 *refs)
413 {
414         struct btrfs_path *path;
415         int ret;
416         struct btrfs_key key;
417         struct btrfs_leaf *l;
418         struct btrfs_extent_item *item;
419
420         path = btrfs_alloc_path();
421         key.objectid = blocknr;
422         key.offset = num_blocks;
423         key.flags = 0;
424         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
425         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
426                                 0, 0);
427         if (ret < 0)
428                 goto out;
429         if (ret != 0)
430                 BUG();
431         l = btrfs_buffer_leaf(path->nodes[0]);
432         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
433         *refs = btrfs_extent_refs(item);
434 out:
435         btrfs_free_path(path);
436         return 0;
437 }
438
439 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
440                        struct btrfs_root *root)
441 {
442         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
443 }
444
445 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
446                   struct buffer_head *buf)
447 {
448         u64 blocknr;
449         struct btrfs_node *buf_node;
450         struct btrfs_leaf *buf_leaf;
451         struct btrfs_disk_key *key;
452         struct btrfs_file_extent_item *fi;
453         int i;
454         int leaf;
455         int ret;
456         int faili;
457         int err;
458
459         if (!root->ref_cows)
460                 return 0;
461         buf_node = btrfs_buffer_node(buf);
462         leaf = btrfs_is_leaf(buf_node);
463         buf_leaf = btrfs_buffer_leaf(buf);
464         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
465                 if (leaf) {
466                         u64 disk_blocknr;
467                         key = &buf_leaf->items[i].key;
468                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
469                                 continue;
470                         fi = btrfs_item_ptr(buf_leaf, i,
471                                             struct btrfs_file_extent_item);
472                         if (btrfs_file_extent_type(fi) ==
473                             BTRFS_FILE_EXTENT_INLINE)
474                                 continue;
475                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
476                         if (disk_blocknr == 0)
477                                 continue;
478                         ret = btrfs_inc_extent_ref(trans, root, disk_blocknr,
479                                     btrfs_file_extent_disk_num_blocks(fi));
480                         if (ret) {
481                                 faili = i;
482                                 goto fail;
483                         }
484                 } else {
485                         blocknr = btrfs_node_blockptr(buf_node, i);
486                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
487                         if (ret) {
488                                 faili = i;
489                                 goto fail;
490                         }
491                 }
492         }
493         return 0;
494 fail:
495         WARN_ON(1);
496         for (i =0; i < faili; i++) {
497                 if (leaf) {
498                         u64 disk_blocknr;
499                         key = &buf_leaf->items[i].key;
500                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
501                                 continue;
502                         fi = btrfs_item_ptr(buf_leaf, i,
503                                             struct btrfs_file_extent_item);
504                         if (btrfs_file_extent_type(fi) ==
505                             BTRFS_FILE_EXTENT_INLINE)
506                                 continue;
507                         disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
508                         if (disk_blocknr == 0)
509                                 continue;
510                         err = btrfs_free_extent(trans, root, disk_blocknr,
511                                     btrfs_file_extent_disk_num_blocks(fi), 0);
512                         BUG_ON(err);
513                 } else {
514                         blocknr = btrfs_node_blockptr(buf_node, i);
515                         err = btrfs_free_extent(trans, root, blocknr, 1, 0);
516                         BUG_ON(err);
517                 }
518         }
519         return ret;
520 }
521
522 static int write_one_cache_group(struct btrfs_trans_handle *trans,
523                                  struct btrfs_root *root,
524                                  struct btrfs_path *path,
525                                  struct btrfs_block_group_cache *cache)
526 {
527         int ret;
528         int pending_ret;
529         struct btrfs_root *extent_root = root->fs_info->extent_root;
530         struct btrfs_block_group_item *bi;
531
532         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
533         if (ret < 0)
534                 goto fail;
535         BUG_ON(ret);
536         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
537                             struct btrfs_block_group_item);
538         memcpy(bi, &cache->item, sizeof(*bi));
539         btrfs_mark_buffer_dirty(path->nodes[0]);
540         btrfs_release_path(extent_root, path);
541 fail:
542         finish_current_insert(trans, extent_root);
543         pending_ret = del_pending_extents(trans, extent_root);
544         if (ret)
545                 return ret;
546         if (pending_ret)
547                 return pending_ret;
548         if (cache->data)
549                 cache->last_alloc = cache->first_free;
550         return 0;
551
552 }
553
554 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
555                                    struct btrfs_root *root,
556                                    struct radix_tree_root *radix)
557 {
558         struct btrfs_block_group_cache *cache[8];
559         int ret;
560         int err = 0;
561         int werr = 0;
562         int i;
563         struct btrfs_path *path;
564         unsigned long off = 0;
565
566         path = btrfs_alloc_path();
567         if (!path)
568                 return -ENOMEM;
569
570         while(1) {
571                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
572                                                  off, ARRAY_SIZE(cache),
573                                                  BTRFS_BLOCK_GROUP_DIRTY);
574                 if (!ret)
575                         break;
576                 for (i = 0; i < ret; i++) {
577                         err = write_one_cache_group(trans, root,
578                                                     path, cache[i]);
579                         /*
580                          * if we fail to write the cache group, we want
581                          * to keep it marked dirty in hopes that a later
582                          * write will work
583                          */
584                         if (err) {
585                                 werr = err;
586                                 off = cache[i]->key.objectid +
587                                         cache[i]->key.offset;
588                                 continue;
589                         }
590
591                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
592                                              cache[i]->key.offset - 1,
593                                              BTRFS_BLOCK_GROUP_DIRTY);
594                 }
595         }
596         btrfs_free_path(path);
597         return werr;
598 }
599
600 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
601                                    struct btrfs_root *root)
602 {
603         int ret;
604         int ret2;
605         ret = write_dirty_block_radix(trans, root,
606                                       &root->fs_info->block_group_radix);
607         ret2 = write_dirty_block_radix(trans, root,
608                                       &root->fs_info->block_group_data_radix);
609         if (ret)
610                 return ret;
611         if (ret2)
612                 return ret2;
613         return 0;
614 }
615
616 static int update_block_group(struct btrfs_trans_handle *trans,
617                               struct btrfs_root *root,
618                               u64 blocknr, u64 num, int alloc, int mark_free,
619                               int data)
620 {
621         struct btrfs_block_group_cache *cache;
622         struct btrfs_fs_info *info = root->fs_info;
623         u64 total = num;
624         u64 old_val;
625         u64 block_in_group;
626         u64 i;
627         int ret;
628
629         while(total) {
630                 cache = btrfs_lookup_block_group(info, blocknr);
631                 if (!cache) {
632                         return -1;
633                 }
634                 block_in_group = blocknr - cache->key.objectid;
635                 WARN_ON(block_in_group > cache->key.offset);
636                 radix_tree_tag_set(cache->radix, cache->key.objectid +
637                                    cache->key.offset - 1,
638                                    BTRFS_BLOCK_GROUP_DIRTY);
639
640                 old_val = btrfs_block_group_used(&cache->item);
641                 num = min(total, cache->key.offset - block_in_group);
642                 if (alloc) {
643                         if (blocknr > cache->last_alloc)
644                                 cache->last_alloc = blocknr;
645                         if (!cache->data) {
646                                 for (i = 0; i < num; i++) {
647                                         clear_radix_bit(&info->extent_map_radix,
648                                                         blocknr + i);
649                                 }
650                         }
651                         if (cache->data != data &&
652                             old_val < (cache->key.offset >> 1)) {
653                                 cache->data = data;
654                                 radix_tree_delete(cache->radix,
655                                                   cache->key.objectid +
656                                                   cache->key.offset - 1);
657
658                                 if (data) {
659                                         cache->radix =
660                                                 &info->block_group_data_radix;
661                                         cache->item.flags |=
662                                                 BTRFS_BLOCK_GROUP_DATA;
663                                 } else {
664                                         cache->radix = &info->block_group_radix;
665                                         cache->item.flags &=
666                                                 ~BTRFS_BLOCK_GROUP_DATA;
667                                 }
668                                 ret = radix_tree_insert(cache->radix,
669                                                         cache->key.objectid +
670                                                         cache->key.offset - 1,
671                                                         (void *)cache);
672                         }
673                         old_val += num;
674                 } else {
675                         old_val -= num;
676                         if (blocknr < cache->first_free)
677                                 cache->first_free = blocknr;
678                         if (!cache->data && mark_free) {
679                                 for (i = 0; i < num; i++) {
680                                         set_radix_bit(&info->extent_map_radix,
681                                                       blocknr + i);
682                                 }
683                         }
684                         if (old_val < (cache->key.offset >> 1) &&
685                             old_val + num >= (cache->key.offset >> 1)) {
686                                 radix_tree_tag_set(cache->radix,
687                                                    cache->key.objectid +
688                                                    cache->key.offset - 1,
689                                                    BTRFS_BLOCK_GROUP_AVAIL);
690                         }
691                 }
692                 btrfs_set_block_group_used(&cache->item, old_val);
693                 total -= num;
694                 blocknr += num;
695         }
696         return 0;
697 }
698
699 int btrfs_copy_pinned(struct btrfs_root *root, struct radix_tree_root *copy)
700 {
701         unsigned long gang[8];
702         u64 last = 0;
703         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
704         int ret;
705         int i;
706
707         while(1) {
708                 ret = find_first_radix_bit(pinned_radix, gang, last,
709                                            ARRAY_SIZE(gang));
710                 if (!ret)
711                         break;
712                 for (i = 0 ; i < ret; i++) {
713                         set_radix_bit(copy, gang[i]);
714                         last = gang[i] + 1;
715                 }
716         }
717         ret = find_first_radix_bit(&root->fs_info->extent_ins_radix, gang, 0,
718                                    ARRAY_SIZE(gang));
719         WARN_ON(ret);
720         return 0;
721 }
722
723 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans,
724                                struct btrfs_root *root,
725                                struct radix_tree_root *unpin_radix)
726 {
727         unsigned long gang[8];
728         struct btrfs_block_group_cache *block_group;
729         u64 first = 0;
730         int ret;
731         int i;
732         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
733         struct radix_tree_root *extent_radix = &root->fs_info->extent_map_radix;
734
735         while(1) {
736                 ret = find_first_radix_bit(unpin_radix, gang, 0,
737                                            ARRAY_SIZE(gang));
738                 if (!ret)
739                         break;
740                 if (!first)
741                         first = gang[0];
742                 for (i = 0; i < ret; i++) {
743                         clear_radix_bit(pinned_radix, gang[i]);
744                         clear_radix_bit(unpin_radix, gang[i]);
745                         block_group = btrfs_lookup_block_group(root->fs_info,
746                                                                gang[i]);
747                         if (block_group) {
748                                 WARN_ON(block_group->pinned == 0);
749                                 block_group->pinned--;
750                                 if (gang[i] < block_group->last_alloc)
751                                         block_group->last_alloc = gang[i];
752                                 if (!block_group->data)
753                                         set_radix_bit(extent_radix, gang[i]);
754                         }
755                 }
756         }
757         return 0;
758 }
759
760 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
761                                  btrfs_root *extent_root)
762 {
763         struct btrfs_key ins;
764         struct btrfs_extent_item extent_item;
765         int i;
766         int ret;
767         int err;
768         unsigned long gang[8];
769         struct btrfs_fs_info *info = extent_root->fs_info;
770
771         btrfs_set_extent_refs(&extent_item, 1);
772         ins.offset = 1;
773         ins.flags = 0;
774         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
775         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
776
777         while(1) {
778                 ret = find_first_radix_bit(&info->extent_ins_radix, gang, 0,
779                                            ARRAY_SIZE(gang));
780                 if (!ret)
781                         break;
782
783                 for (i = 0; i < ret; i++) {
784                         ins.objectid = gang[i];
785                         err = btrfs_insert_item(trans, extent_root, &ins,
786                                                 &extent_item,
787                                                 sizeof(extent_item));
788                         clear_radix_bit(&info->extent_ins_radix, gang[i]);
789                         WARN_ON(err);
790                 }
791         }
792         return 0;
793 }
794
795 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
796 {
797         int err;
798         struct btrfs_header *header;
799         struct buffer_head *bh;
800
801         if (!pending) {
802                 bh = btrfs_find_tree_block(root, blocknr);
803                 if (bh) {
804                         if (buffer_uptodate(bh)) {
805                                 u64 transid =
806                                     root->fs_info->running_transaction->transid;
807                                 header = btrfs_buffer_header(bh);
808                                 if (btrfs_header_generation(header) ==
809                                     transid) {
810                                         btrfs_block_release(root, bh);
811                                         return 0;
812                                 }
813                         }
814                         btrfs_block_release(root, bh);
815                 }
816                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
817                 if (!err) {
818                         struct btrfs_block_group_cache *cache;
819                         cache = btrfs_lookup_block_group(root->fs_info,
820                                                          blocknr);
821                         if (cache)
822                                 cache->pinned++;
823                 }
824         } else {
825                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
826         }
827         BUG_ON(err < 0);
828         return 0;
829 }
830
831 /*
832  * remove an extent from the root, returns 0 on success
833  */
834 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
835                          *root, u64 blocknr, u64 num_blocks, int pin,
836                          int mark_free)
837 {
838         struct btrfs_path *path;
839         struct btrfs_key key;
840         struct btrfs_fs_info *info = root->fs_info;
841         struct btrfs_root *extent_root = info->extent_root;
842         int ret;
843         struct btrfs_extent_item *ei;
844         u32 refs;
845
846         key.objectid = blocknr;
847         key.flags = 0;
848         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
849         key.offset = num_blocks;
850
851         path = btrfs_alloc_path();
852         if (!path)
853                 return -ENOMEM;
854
855         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
856         if (ret < 0)
857                 return ret;
858         BUG_ON(ret);
859         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
860                             struct btrfs_extent_item);
861         BUG_ON(ei->refs == 0);
862         refs = btrfs_extent_refs(ei) - 1;
863         btrfs_set_extent_refs(ei, refs);
864         btrfs_mark_buffer_dirty(path->nodes[0]);
865         if (refs == 0) {
866                 u64 super_blocks_used, root_blocks_used;
867
868                 if (pin) {
869                         ret = pin_down_block(root, blocknr, 0);
870                         BUG_ON(ret);
871                 }
872
873                 /* block accounting for super block */
874                 super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
875                 btrfs_set_super_blocks_used(&info->super_copy,
876                                             super_blocks_used - num_blocks);
877
878                 /* block accounting for root item */
879                 root_blocks_used = btrfs_root_blocks_used(&root->root_item);
880                 btrfs_set_root_blocks_used(&root->root_item,
881                                            root_blocks_used - num_blocks);
882
883                 ret = btrfs_del_item(trans, extent_root, path);
884                 if (ret) {
885                         return ret;
886                 }
887                 ret = update_block_group(trans, root, blocknr, num_blocks, 0,
888                                          mark_free, 0);
889                 BUG_ON(ret);
890         }
891         btrfs_free_path(path);
892         finish_current_insert(trans, extent_root);
893         return ret;
894 }
895
896 /*
897  * find all the blocks marked as pending in the radix tree and remove
898  * them from the extent map
899  */
900 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
901                                btrfs_root *extent_root)
902 {
903         int ret;
904         int wret;
905         int err = 0;
906         unsigned long gang[4];
907         int i;
908         struct radix_tree_root *pending_radix;
909         struct radix_tree_root *pinned_radix;
910         struct btrfs_block_group_cache *cache;
911
912         pending_radix = &extent_root->fs_info->pending_del_radix;
913         pinned_radix = &extent_root->fs_info->pinned_radix;
914
915         while(1) {
916                 ret = find_first_radix_bit(pending_radix, gang, 0,
917                                            ARRAY_SIZE(gang));
918                 if (!ret)
919                         break;
920                 for (i = 0; i < ret; i++) {
921                         wret = set_radix_bit(pinned_radix, gang[i]);
922                         if (wret == 0) {
923                                 cache =
924                                   btrfs_lookup_block_group(extent_root->fs_info,
925                                                            gang[i]);
926                                 if (cache)
927                                         cache->pinned++;
928                         }
929                         if (wret < 0) {
930                                 printk(KERN_CRIT "set_radix_bit, err %d\n",
931                                        wret);
932                                 BUG_ON(wret < 0);
933                         }
934                         wret = clear_radix_bit(pending_radix, gang[i]);
935                         BUG_ON(wret);
936                         wret = __free_extent(trans, extent_root,
937                                              gang[i], 1, 0, 0);
938                         if (wret)
939                                 err = wret;
940                 }
941         }
942         return err;
943 }
944
945 /*
946  * remove an extent from the root, returns 0 on success
947  */
948 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
949                       *root, u64 blocknr, u64 num_blocks, int pin)
950 {
951         struct btrfs_root *extent_root = root->fs_info->extent_root;
952         int pending_ret;
953         int ret;
954
955         if (root == extent_root) {
956                 pin_down_block(root, blocknr, 1);
957                 return 0;
958         }
959         ret = __free_extent(trans, root, blocknr, num_blocks, pin, pin == 0);
960         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
961         return ret ? ret : pending_ret;
962 }
963
964 /*
965  * walks the btree of allocated extents and find a hole of a given size.
966  * The key ins is changed to record the hole:
967  * ins->objectid == block start
968  * ins->flags = BTRFS_EXTENT_ITEM_KEY
969  * ins->offset == number of blocks
970  * Any available blocks before search_start are skipped.
971  */
972 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
973                             *orig_root, u64 num_blocks, u64 empty_size,
974                             u64 search_start, u64 search_end, u64 hint_block,
975                             struct btrfs_key *ins, u64 exclude_start,
976                             u64 exclude_nr, int data)
977 {
978         struct btrfs_path *path;
979         struct btrfs_key key;
980         int ret;
981         u64 hole_size = 0;
982         int slot = 0;
983         u64 last_block = 0;
984         u64 test_block;
985         u64 orig_search_start = search_start;
986         int start_found;
987         struct btrfs_leaf *l;
988         struct btrfs_root * root = orig_root->fs_info->extent_root;
989         struct btrfs_fs_info *info = root->fs_info;
990         int total_needed = num_blocks;
991         int level;
992         struct btrfs_block_group_cache *block_group;
993         int full_scan = 0;
994         int wrapped = 0;
995
996         WARN_ON(num_blocks < 1);
997         ins->flags = 0;
998         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
999
1000         level = btrfs_header_level(btrfs_buffer_header(root->node));
1001         if (search_end == (u64)-1)
1002                 search_end = btrfs_super_total_blocks(&info->super_copy);
1003         if (hint_block) {
1004                 block_group = btrfs_lookup_block_group(info, hint_block);
1005                 block_group = btrfs_find_block_group(root, block_group,
1006                                                      hint_block, data, 1);
1007         } else {
1008                 block_group = btrfs_find_block_group(root,
1009                                                      trans->block_group, 0,
1010                                                      data, 1);
1011         }
1012
1013         total_needed += empty_size;
1014         path = btrfs_alloc_path();
1015
1016 check_failed:
1017         if (!block_group->data)
1018                 search_start = find_search_start(root, &block_group,
1019                                                  search_start, total_needed);
1020         else if (!full_scan)
1021                 search_start = max(block_group->last_alloc, search_start);
1022
1023         btrfs_init_path(path);
1024         ins->objectid = search_start;
1025         ins->offset = 0;
1026         start_found = 0;
1027         path->reada = 2;
1028
1029         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1030         if (ret < 0)
1031                 goto error;
1032
1033         if (path->slots[0] > 0) {
1034                 path->slots[0]--;
1035         }
1036
1037         l = btrfs_buffer_leaf(path->nodes[0]);
1038         btrfs_disk_key_to_cpu(&key, &l->items[path->slots[0]].key);
1039         /*
1040          * a rare case, go back one key if we hit a block group item
1041          * instead of an extent item
1042          */
1043         if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY &&
1044             key.objectid + key.offset >= search_start) {
1045                 ins->objectid = key.objectid;
1046                 ins->offset = key.offset - 1;
1047                 btrfs_release_path(root, path);
1048                 ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
1049                 if (ret < 0)
1050                         goto error;
1051
1052                 if (path->slots[0] > 0) {
1053                         path->slots[0]--;
1054                 }
1055         }
1056
1057         while (1) {
1058                 l = btrfs_buffer_leaf(path->nodes[0]);
1059                 slot = path->slots[0];
1060                 if (slot >= btrfs_header_nritems(&l->header)) {
1061                         ret = btrfs_next_leaf(root, path);
1062                         if (ret == 0)
1063                                 continue;
1064                         if (ret < 0)
1065                                 goto error;
1066                         if (!start_found) {
1067                                 ins->objectid = search_start;
1068                                 ins->offset = search_end - search_start;
1069                                 start_found = 1;
1070                                 goto check_pending;
1071                         }
1072                         ins->objectid = last_block > search_start ?
1073                                         last_block : search_start;
1074                         ins->offset = search_end - ins->objectid;
1075                         goto check_pending;
1076                 }
1077
1078                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
1079                 if (key.objectid >= search_start && key.objectid > last_block &&
1080                     start_found) {
1081                         if (last_block < search_start)
1082                                 last_block = search_start;
1083                         hole_size = key.objectid - last_block;
1084                         if (hole_size >= num_blocks) {
1085                                 ins->objectid = last_block;
1086                                 ins->offset = hole_size;
1087                                 goto check_pending;
1088                         }
1089                 }
1090
1091                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
1092                         goto next;
1093
1094                 start_found = 1;
1095                 last_block = key.objectid + key.offset;
1096                 if (!full_scan && last_block >= block_group->key.objectid +
1097                     block_group->key.offset) {
1098                         btrfs_release_path(root, path);
1099                         search_start = block_group->key.objectid +
1100                                 block_group->key.offset * 2;
1101                         goto new_group;
1102                 }
1103 next:
1104                 path->slots[0]++;
1105                 cond_resched();
1106         }
1107 check_pending:
1108         /* we have to make sure we didn't find an extent that has already
1109          * been allocated by the map tree or the original allocation
1110          */
1111         btrfs_release_path(root, path);
1112         BUG_ON(ins->objectid < search_start);
1113
1114         if (ins->objectid + num_blocks >= search_end) {
1115                 if (full_scan) {
1116                         ret = -ENOSPC;
1117                         goto error;
1118                 }
1119                 search_start = orig_search_start;
1120                 if (wrapped) {
1121                         if (!full_scan)
1122                                 total_needed -= empty_size;
1123                         full_scan = 1;
1124                 } else
1125                         wrapped = 1;
1126                 goto new_group;
1127         }
1128         for (test_block = ins->objectid;
1129              test_block < ins->objectid + num_blocks; test_block++) {
1130                 if (test_radix_bit(&info->pinned_radix, test_block) ||
1131                     test_radix_bit(&info->extent_ins_radix, test_block)) {
1132                         search_start = test_block + 1;
1133                         goto new_group;
1134                 }
1135         }
1136         if (exclude_nr > 0 && (ins->objectid + num_blocks > exclude_start &&
1137             ins->objectid < exclude_start + exclude_nr)) {
1138                 search_start = exclude_start + exclude_nr;
1139                 goto new_group;
1140         }
1141         if (!data) {
1142                 block_group = btrfs_lookup_block_group(info, ins->objectid);
1143                 if (block_group)
1144                         trans->block_group = block_group;
1145         }
1146         ins->offset = num_blocks;
1147         btrfs_free_path(path);
1148         return 0;
1149
1150 new_group:
1151         if (search_start + num_blocks >= search_end) {
1152                 search_start = orig_search_start;
1153                 if (full_scan) {
1154                         ret = -ENOSPC;
1155                         goto error;
1156                 }
1157                 if (wrapped) {
1158                         if (!full_scan)
1159                                 total_needed -= empty_size;
1160                         full_scan = 1;
1161                 } else
1162                         wrapped = 1;
1163         }
1164         block_group = btrfs_lookup_block_group(info, search_start);
1165         cond_resched();
1166         if (!full_scan)
1167                 block_group = btrfs_find_block_group(root, block_group,
1168                                                      search_start, data, 0);
1169         goto check_failed;
1170
1171 error:
1172         btrfs_release_path(root, path);
1173         btrfs_free_path(path);
1174         return ret;
1175 }
1176 /*
1177  * finds a free extent and does all the dirty work required for allocation
1178  * returns the key for the extent through ins, and a tree buffer for
1179  * the first block of the extent through buf.
1180  *
1181  * returns 0 if everything worked, non-zero otherwise.
1182  */
1183 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
1184                        struct btrfs_root *root, u64 owner,
1185                        u64 num_blocks, u64 empty_size, u64 hint_block,
1186                        u64 search_end, struct btrfs_key *ins, int data)
1187 {
1188         int ret;
1189         int pending_ret;
1190         u64 super_blocks_used, root_blocks_used;
1191         u64 search_start = 0;
1192         struct btrfs_fs_info *info = root->fs_info;
1193         struct btrfs_root *extent_root = info->extent_root;
1194         struct btrfs_extent_item extent_item;
1195
1196         btrfs_set_extent_refs(&extent_item, 1);
1197         btrfs_set_extent_owner(&extent_item, owner);
1198
1199         WARN_ON(num_blocks < 1);
1200         ret = find_free_extent(trans, root, num_blocks, empty_size,
1201                                search_start, search_end, hint_block, ins,
1202                                trans->alloc_exclude_start,
1203                                trans->alloc_exclude_nr, data);
1204         BUG_ON(ret);
1205         if (ret)
1206                 return ret;
1207
1208         /* block accounting for super block */
1209         super_blocks_used = btrfs_super_blocks_used(&info->super_copy);
1210         btrfs_set_super_blocks_used(&info->super_copy, super_blocks_used +
1211                                     num_blocks);
1212
1213         /* block accounting for root item */
1214         root_blocks_used = btrfs_root_blocks_used(&root->root_item);
1215         btrfs_set_root_blocks_used(&root->root_item, root_blocks_used +
1216                                    num_blocks);
1217
1218         if (root == extent_root) {
1219                 BUG_ON(num_blocks != 1);
1220                 set_radix_bit(&root->fs_info->extent_ins_radix, ins->objectid);
1221                 goto update_block;
1222         }
1223
1224         WARN_ON(trans->alloc_exclude_nr);
1225         trans->alloc_exclude_start = ins->objectid;
1226         trans->alloc_exclude_nr = ins->offset;
1227         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
1228                                 sizeof(extent_item));
1229
1230         trans->alloc_exclude_start = 0;
1231         trans->alloc_exclude_nr = 0;
1232
1233         BUG_ON(ret);
1234         finish_current_insert(trans, extent_root);
1235         pending_ret = del_pending_extents(trans, extent_root);
1236         if (ret) {
1237                 return ret;
1238         }
1239         if (pending_ret) {
1240                 return pending_ret;
1241         }
1242
1243 update_block:
1244         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1, 0,
1245                                  data);
1246         BUG_ON(ret);
1247         return 0;
1248 }
1249
1250 /*
1251  * helper function to allocate a block for a given tree
1252  * returns the tree buffer or NULL.
1253  */
1254 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
1255                                            struct btrfs_root *root, u64 hint,
1256                                            u64 empty_size)
1257 {
1258         struct btrfs_key ins;
1259         int ret;
1260         struct buffer_head *buf;
1261
1262         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
1263                                  1, empty_size, hint, (u64)-1, &ins, 0);
1264         if (ret) {
1265                 BUG_ON(ret > 0);
1266                 return ERR_PTR(ret);
1267         }
1268         buf = btrfs_find_create_tree_block(root, ins.objectid);
1269         if (!buf) {
1270                 btrfs_free_extent(trans, root, ins.objectid, 1, 0);
1271                 return ERR_PTR(-ENOMEM);
1272         }
1273         WARN_ON(buffer_dirty(buf));
1274         set_buffer_uptodate(buf);
1275         set_buffer_checked(buf);
1276         set_buffer_defrag(buf);
1277         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
1278         return buf;
1279 }
1280
1281 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
1282                          struct btrfs_root *root, struct buffer_head *cur)
1283 {
1284         struct btrfs_disk_key *key;
1285         struct btrfs_leaf *leaf;
1286         struct btrfs_file_extent_item *fi;
1287         int i;
1288         int nritems;
1289         int ret;
1290
1291         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
1292         leaf = btrfs_buffer_leaf(cur);
1293         nritems = btrfs_header_nritems(&leaf->header);
1294         for (i = 0; i < nritems; i++) {
1295                 u64 disk_blocknr;
1296                 key = &leaf->items[i].key;
1297                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
1298                         continue;
1299                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
1300                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
1301                         continue;
1302                 /*
1303                  * FIXME make sure to insert a trans record that
1304                  * repeats the snapshot del on crash
1305                  */
1306                 disk_blocknr = btrfs_file_extent_disk_blocknr(fi);
1307                 if (disk_blocknr == 0)
1308                         continue;
1309                 ret = btrfs_free_extent(trans, root, disk_blocknr,
1310                                         btrfs_file_extent_disk_num_blocks(fi),
1311                                         0);
1312                 BUG_ON(ret);
1313         }
1314         return 0;
1315 }
1316
1317 static void reada_walk_down(struct btrfs_root *root,
1318                             struct btrfs_node *node)
1319 {
1320         int i;
1321         u32 nritems;
1322         u64 blocknr;
1323         int ret;
1324         u32 refs;
1325
1326         nritems = btrfs_header_nritems(&node->header);
1327         for (i = 0; i < nritems; i++) {
1328                 blocknr = btrfs_node_blockptr(node, i);
1329                 ret = lookup_extent_ref(NULL, root, blocknr, 1, &refs);
1330                 BUG_ON(ret);
1331                 if (refs != 1)
1332                         continue;
1333                 mutex_unlock(&root->fs_info->fs_mutex);
1334                 ret = readahead_tree_block(root, blocknr);
1335                 cond_resched();
1336                 mutex_lock(&root->fs_info->fs_mutex);
1337                 if (ret)
1338                         break;
1339         }
1340 }
1341
1342 /*
1343  * helper function for drop_snapshot, this walks down the tree dropping ref
1344  * counts as it goes.
1345  */
1346 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1347                           *root, struct btrfs_path *path, int *level)
1348 {
1349         struct buffer_head *next;
1350         struct buffer_head *cur;
1351         u64 blocknr;
1352         int ret;
1353         u32 refs;
1354
1355         WARN_ON(*level < 0);
1356         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1357         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
1358                                1, &refs);
1359         BUG_ON(ret);
1360         if (refs > 1)
1361                 goto out;
1362
1363         /*
1364          * walk down to the last node level and free all the leaves
1365          */
1366         while(*level >= 0) {
1367                 WARN_ON(*level < 0);
1368                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
1369                 cur = path->nodes[*level];
1370
1371                 if (*level > 0 && path->slots[*level] == 0)
1372                         reada_walk_down(root, btrfs_buffer_node(cur));
1373
1374                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
1375                         WARN_ON(1);
1376
1377                 if (path->slots[*level] >=
1378                     btrfs_header_nritems(btrfs_buffer_header(cur)))
1379                         break;
1380                 if (*level == 0) {
1381                         ret = drop_leaf_ref(trans, root, cur);
1382                         BUG_ON(ret);
1383                         break;
1384                 }
1385                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1386                                               path->slots[*level]);
1387                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1388                 BUG_ON(ret);
1389                 if (refs != 1) {
1390                         path->slots[*level]++;
1391                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1392                         BUG_ON(ret);
1393                         continue;
1394                 }
1395                 next = btrfs_find_tree_block(root, blocknr);
1396                 if (!next || !buffer_uptodate(next)) {
1397                         brelse(next);
1398                         mutex_unlock(&root->fs_info->fs_mutex);
1399                         next = read_tree_block(root, blocknr);
1400                         mutex_lock(&root->fs_info->fs_mutex);
1401
1402                         /* we dropped the lock, check one more time */
1403                         ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1404                         BUG_ON(ret);
1405                         if (refs != 1) {
1406                                 path->slots[*level]++;
1407                                 brelse(next);
1408                                 ret = btrfs_free_extent(trans, root,
1409                                                         blocknr, 1, 1);
1410                                 BUG_ON(ret);
1411                                 continue;
1412                         }
1413                 }
1414                 WARN_ON(*level <= 0);
1415                 if (path->nodes[*level-1])
1416                         btrfs_block_release(root, path->nodes[*level-1]);
1417                 path->nodes[*level-1] = next;
1418                 *level = btrfs_header_level(btrfs_buffer_header(next));
1419                 path->slots[*level] = 0;
1420         }
1421 out:
1422         WARN_ON(*level < 0);
1423         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1424         ret = btrfs_free_extent(trans, root,
1425                                 bh_blocknr(path->nodes[*level]), 1, 1);
1426         btrfs_block_release(root, path->nodes[*level]);
1427         path->nodes[*level] = NULL;
1428         *level += 1;
1429         BUG_ON(ret);
1430         return 0;
1431 }
1432
1433 /*
1434  * helper for dropping snapshots.  This walks back up the tree in the path
1435  * to find the first node higher up where we haven't yet gone through
1436  * all the slots
1437  */
1438 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1439                         *root, struct btrfs_path *path, int *level)
1440 {
1441         int i;
1442         int slot;
1443         int ret;
1444         struct btrfs_root_item *root_item = &root->root_item;
1445
1446         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1447                 slot = path->slots[i];
1448                 if (slot < btrfs_header_nritems(
1449                     btrfs_buffer_header(path->nodes[i])) - 1) {
1450                         struct btrfs_node *node;
1451                         node = btrfs_buffer_node(path->nodes[i]);
1452                         path->slots[i]++;
1453                         *level = i;
1454                         WARN_ON(*level == 0);
1455                         memcpy(&root_item->drop_progress,
1456                                &node->ptrs[path->slots[i]].key,
1457                                sizeof(root_item->drop_progress));
1458                         root_item->drop_level = i;
1459                         return 0;
1460                 } else {
1461                         ret = btrfs_free_extent(trans, root,
1462                                                 bh_blocknr(path->nodes[*level]),
1463                                                 1, 1);
1464                         BUG_ON(ret);
1465                         btrfs_block_release(root, path->nodes[*level]);
1466                         path->nodes[*level] = NULL;
1467                         *level = i + 1;
1468                 }
1469         }
1470         return 1;
1471 }
1472
1473 /*
1474  * drop the reference count on the tree rooted at 'snap'.  This traverses
1475  * the tree freeing any blocks that have a ref count of zero after being
1476  * decremented.
1477  */
1478 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1479                         *root)
1480 {
1481         int ret = 0;
1482         int wret;
1483         int level;
1484         struct btrfs_path *path;
1485         int i;
1486         int orig_level;
1487         struct btrfs_root_item *root_item = &root->root_item;
1488
1489         path = btrfs_alloc_path();
1490         BUG_ON(!path);
1491
1492         level = btrfs_header_level(btrfs_buffer_header(root->node));
1493         orig_level = level;
1494         if (btrfs_disk_key_objectid(&root_item->drop_progress) == 0) {
1495                 path->nodes[level] = root->node;
1496                 path->slots[level] = 0;
1497         } else {
1498                 struct btrfs_key key;
1499                 struct btrfs_disk_key *found_key;
1500                 struct btrfs_node *node;
1501
1502                 btrfs_disk_key_to_cpu(&key, &root_item->drop_progress);
1503                 level = root_item->drop_level;
1504                 path->lowest_level = level;
1505                 wret = btrfs_search_slot(NULL, root, &key, path, 0, 0);
1506                 if (wret < 0) {
1507                         ret = wret;
1508                         goto out;
1509                 }
1510                 node = btrfs_buffer_node(path->nodes[level]);
1511                 found_key = &node->ptrs[path->slots[level]].key;
1512                 WARN_ON(memcmp(found_key, &root_item->drop_progress,
1513                                sizeof(*found_key)));
1514         }
1515         while(1) {
1516                 wret = walk_down_tree(trans, root, path, &level);
1517                 if (wret > 0)
1518                         break;
1519                 if (wret < 0)
1520                         ret = wret;
1521
1522                 wret = walk_up_tree(trans, root, path, &level);
1523                 if (wret > 0)
1524                         break;
1525                 if (wret < 0)
1526                         ret = wret;
1527                 ret = -EAGAIN;
1528                 get_bh(root->node);
1529                 break;
1530         }
1531         for (i = 0; i <= orig_level; i++) {
1532                 if (path->nodes[i]) {
1533                         btrfs_block_release(root, path->nodes[i]);
1534                         path->nodes[i] = 0;
1535                 }
1536         }
1537 out:
1538         btrfs_free_path(path);
1539         return ret;
1540 }
1541
1542 static int free_block_group_radix(struct radix_tree_root *radix)
1543 {
1544         int ret;
1545         struct btrfs_block_group_cache *cache[8];
1546         int i;
1547
1548         while(1) {
1549                 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1550                                              ARRAY_SIZE(cache));
1551                 if (!ret)
1552                         break;
1553                 for (i = 0; i < ret; i++) {
1554                         radix_tree_delete(radix, cache[i]->key.objectid +
1555                                           cache[i]->key.offset - 1);
1556                         kfree(cache[i]);
1557                 }
1558         }
1559         return 0;
1560 }
1561
1562 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1563 {
1564         int ret;
1565         int ret2;
1566         unsigned long gang[16];
1567         int i;
1568
1569         ret = free_block_group_radix(&info->block_group_radix);
1570         ret2 = free_block_group_radix(&info->block_group_data_radix);
1571         if (ret)
1572                 return ret;
1573         if (ret2)
1574                 return ret2;
1575
1576         while(1) {
1577                 ret = find_first_radix_bit(&info->extent_map_radix,
1578                                            gang, 0, ARRAY_SIZE(gang));
1579                 if (!ret)
1580                         break;
1581                 for (i = 0; i < ret; i++) {
1582                         clear_radix_bit(&info->extent_map_radix, gang[i]);
1583                 }
1584         }
1585         return 0;
1586 }
1587
1588 int btrfs_read_block_groups(struct btrfs_root *root)
1589 {
1590         struct btrfs_path *path;
1591         int ret;
1592         int err = 0;
1593         struct btrfs_block_group_item *bi;
1594         struct btrfs_block_group_cache *cache;
1595         struct btrfs_fs_info *info = root->fs_info;
1596         struct radix_tree_root *radix;
1597         struct btrfs_key key;
1598         struct btrfs_key found_key;
1599         struct btrfs_leaf *leaf;
1600         u64 group_size_blocks;
1601         u64 used;
1602
1603         group_size_blocks = BTRFS_BLOCK_GROUP_SIZE >>
1604                 root->fs_info->sb->s_blocksize_bits;
1605         root = info->extent_root;
1606         key.objectid = 0;
1607         key.offset = group_size_blocks;
1608         key.flags = 0;
1609         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1610
1611         path = btrfs_alloc_path();
1612         if (!path)
1613                 return -ENOMEM;
1614
1615         while(1) {
1616                 ret = btrfs_search_slot(NULL, info->extent_root,
1617                                         &key, path, 0, 0);
1618                 if (ret != 0) {
1619                         err = ret;
1620                         break;
1621                 }
1622                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1623                 btrfs_disk_key_to_cpu(&found_key,
1624                                       &leaf->items[path->slots[0]].key);
1625                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1626                 if (!cache) {
1627                         err = -1;
1628                         break;
1629                 }
1630
1631                 bi = btrfs_item_ptr(leaf, path->slots[0],
1632                                     struct btrfs_block_group_item);
1633                 if (bi->flags & BTRFS_BLOCK_GROUP_DATA) {
1634                         radix = &info->block_group_data_radix;
1635                         cache->data = 1;
1636                 } else {
1637                         radix = &info->block_group_radix;
1638                         cache->data = 0;
1639                 }
1640
1641                 memcpy(&cache->item, bi, sizeof(*bi));
1642                 memcpy(&cache->key, &found_key, sizeof(found_key));
1643                 cache->last_alloc = cache->key.objectid;
1644                 cache->first_free = cache->key.objectid;
1645                 cache->pinned = 0;
1646                 cache->cached = 0;
1647
1648                 cache->radix = radix;
1649
1650                 key.objectid = found_key.objectid + found_key.offset;
1651                 btrfs_release_path(root, path);
1652                 ret = radix_tree_insert(radix, found_key.objectid +
1653                                         found_key.offset - 1,
1654                                         (void *)cache);
1655                 BUG_ON(ret);
1656                 used = btrfs_block_group_used(bi);
1657                 if (used < div_factor(key.offset, 8)) {
1658                         radix_tree_tag_set(radix, found_key.objectid +
1659                                            found_key.offset - 1,
1660                                            BTRFS_BLOCK_GROUP_AVAIL);
1661                 }
1662                 if (key.objectid >=
1663                     btrfs_super_total_blocks(&info->super_copy))
1664                         break;
1665         }
1666
1667         btrfs_free_path(path);
1668         return 0;
1669 }