]> Pileus Git - ~andy/linux/blob - fs/btrfs/extent-tree.c
Btrfs: allocator and tuning
[~andy/linux] / fs / btrfs / extent-tree.c
1 #include <linux/module.h>
2 #include "ctree.h"
3 #include "disk-io.h"
4 #include "print-tree.h"
5 #include "transaction.h"
6
7 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
8                             *orig_root, u64 num_blocks, u64 search_start, u64
9                             search_end, struct btrfs_key *ins, int data);
10 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
11                                  btrfs_root *extent_root);
12 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
13                                btrfs_root *extent_root);
14
15 static struct btrfs_block_group_cache *lookup_block_group(struct
16                                                           btrfs_fs_info *info,
17                                                           u64 blocknr)
18 {
19         struct btrfs_block_group_cache *block_group;
20         int ret;
21
22         ret = radix_tree_gang_lookup(&info->block_group_radix,
23                                      (void **)&block_group,
24                                      blocknr, 1);
25         if (ret) {
26                 if (block_group->key.objectid <= blocknr && blocknr <=
27                     block_group->key.objectid + block_group->key.offset)
28                         return block_group;
29         }
30         ret = radix_tree_gang_lookup(&info->block_group_data_radix,
31                                      (void **)&block_group,
32                                      blocknr, 1);
33         if (ret) {
34                 if (block_group->key.objectid <= blocknr && blocknr <=
35                     block_group->key.objectid + block_group->key.offset)
36                         return block_group;
37         }
38         WARN_ON(1);
39         printk("lookup_block_group fails for blocknr %Lu\n", blocknr);
40         printk("last ret was %d\n", ret);
41         if (ret) {
42                 printk("last block group was %Lu %Lu\n", block_group->key.objectid, block_group->key.offset);
43         }
44         return NULL;
45 }
46
47 struct btrfs_block_group_cache *btrfs_find_block_group(struct btrfs_root *root,
48                                                  struct btrfs_block_group_cache
49                                                  *hint, u64 search_start,
50                                                  int data)
51 {
52         struct btrfs_block_group_cache *cache[8];
53         struct btrfs_block_group_cache *found_group = NULL;
54         struct btrfs_fs_info *info = root->fs_info;
55         struct radix_tree_root *radix;
56         u64 used;
57         u64 last = 0;
58         u64 hint_last;
59         int i;
60         int ret;
61         int full_search = 0;
62
63         if (data)
64                 radix = &info->block_group_data_radix;
65         else
66                 radix = &info->block_group_radix;
67
68         if (search_start) {
69                 struct btrfs_block_group_cache *shint;
70                 shint = lookup_block_group(info, search_start);
71                 if (shint->data == data) {
72                         used = btrfs_block_group_used(&shint->item);
73                         if (used + shint->pinned <
74                             (shint->key.offset * 8) / 10) {
75                                 return shint;
76                         }
77                 }
78         }
79         if (hint && hint->data == data) {
80                 used = btrfs_block_group_used(&hint->item);
81                 if (used + hint->pinned < (hint->key.offset * 8) / 10) {
82                         return hint;
83                 }
84                 if (used >= (hint->key.offset * 8) / 10) {
85                         radix_tree_tag_clear(radix,
86                                              hint->key.objectid +
87                                              hint->key.offset - 1,
88                                              BTRFS_BLOCK_GROUP_AVAIL);
89                 }
90                 last = hint->key.offset * 2;
91                 if (hint->key.objectid >= last)
92                         last = max(search_start, hint->key.objectid - last);
93                 else
94                         last = hint->key.objectid + hint->key.offset;
95                 hint_last = last;
96         } else {
97                 hint_last = search_start;
98                 last = search_start;
99         }
100         while(1) {
101                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
102                                                  last, ARRAY_SIZE(cache),
103                                                  BTRFS_BLOCK_GROUP_AVAIL);
104                 if (!ret)
105                         break;
106                 for (i = 0; i < ret; i++) {
107                         last = cache[i]->key.objectid +
108                                 cache[i]->key.offset;
109                         used = btrfs_block_group_used(&cache[i]->item);
110                         if (used + cache[i]->pinned <
111                             (cache[i]->key.offset * 8) / 10) {
112                                 found_group = cache[i];
113                                 goto found;
114                         }
115                         if (used >= (cache[i]->key.offset * 8) / 10) {
116                                 radix_tree_tag_clear(radix,
117                                                      cache[i]->key.objectid +
118                                                      cache[i]->key.offset - 1,
119                                                      BTRFS_BLOCK_GROUP_AVAIL);
120                         }
121                 }
122         }
123         last = hint_last;
124 again:
125         while(1) {
126                 ret = radix_tree_gang_lookup(radix, (void **)cache,
127                                              last, ARRAY_SIZE(cache));
128                 if (!ret)
129                         break;
130                 for (i = 0; i < ret; i++) {
131                         last = cache[i]->key.objectid +
132                                 cache[i]->key.offset;
133                         used = btrfs_block_group_used(&cache[i]->item);
134                         if (used + cache[i]->pinned < cache[i]->key.offset) {
135                                 found_group = cache[i];
136                                 goto found;
137                         }
138                         if (used >= cache[i]->key.offset) {
139                                 radix_tree_tag_clear(radix,
140                                                      cache[i]->key.objectid +
141                                                      cache[i]->key.offset - 1,
142                                                      BTRFS_BLOCK_GROUP_AVAIL);
143                         }
144                 }
145         }
146         if (!full_search) {
147                 last = search_start;
148                 full_search = 1;
149                 goto again;
150         }
151         if (!found_group) {
152                 ret = radix_tree_gang_lookup(radix,
153                                              (void **)&found_group, 0, 1);
154                 BUG_ON(ret != 1);
155         }
156 found:
157         return found_group;
158 }
159
160 int btrfs_inc_extent_ref(struct btrfs_trans_handle *trans,
161                                 struct btrfs_root *root,
162                                 u64 blocknr, u64 num_blocks)
163 {
164         struct btrfs_path *path;
165         int ret;
166         struct btrfs_key key;
167         struct btrfs_leaf *l;
168         struct btrfs_extent_item *item;
169         struct btrfs_key ins;
170         u32 refs;
171
172         find_free_extent(trans, root->fs_info->extent_root, 0, 0, (u64)-1,
173                          &ins, 0);
174         path = btrfs_alloc_path();
175         BUG_ON(!path);
176         btrfs_init_path(path);
177         key.objectid = blocknr;
178         key.flags = 0;
179         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
180         key.offset = num_blocks;
181         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
182                                 0, 1);
183         if (ret != 0) {
184 printk("can't find block %Lu %Lu\n", blocknr, num_blocks);
185                 BUG();
186         }
187         BUG_ON(ret != 0);
188         l = btrfs_buffer_leaf(path->nodes[0]);
189         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
190         refs = btrfs_extent_refs(item);
191         btrfs_set_extent_refs(item, refs + 1);
192         btrfs_mark_buffer_dirty(path->nodes[0]);
193
194         btrfs_release_path(root->fs_info->extent_root, path);
195         btrfs_free_path(path);
196         finish_current_insert(trans, root->fs_info->extent_root);
197         del_pending_extents(trans, root->fs_info->extent_root);
198         return 0;
199 }
200
201 static int lookup_extent_ref(struct btrfs_trans_handle *trans,
202                              struct btrfs_root *root, u64 blocknr,
203                              u64 num_blocks, u32 *refs)
204 {
205         struct btrfs_path *path;
206         int ret;
207         struct btrfs_key key;
208         struct btrfs_leaf *l;
209         struct btrfs_extent_item *item;
210
211         path = btrfs_alloc_path();
212         btrfs_init_path(path);
213         key.objectid = blocknr;
214         key.offset = num_blocks;
215         key.flags = 0;
216         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
217         ret = btrfs_search_slot(trans, root->fs_info->extent_root, &key, path,
218                                 0, 0);
219         if (ret != 0)
220                 BUG();
221         l = btrfs_buffer_leaf(path->nodes[0]);
222         item = btrfs_item_ptr(l, path->slots[0], struct btrfs_extent_item);
223         *refs = btrfs_extent_refs(item);
224         btrfs_release_path(root->fs_info->extent_root, path);
225         btrfs_free_path(path);
226         return 0;
227 }
228
229 int btrfs_inc_root_ref(struct btrfs_trans_handle *trans,
230                        struct btrfs_root *root)
231 {
232         return btrfs_inc_extent_ref(trans, root, bh_blocknr(root->node), 1);
233 }
234
235 int btrfs_inc_ref(struct btrfs_trans_handle *trans, struct btrfs_root *root,
236                   struct buffer_head *buf)
237 {
238         u64 blocknr;
239         struct btrfs_node *buf_node;
240         struct btrfs_leaf *buf_leaf;
241         struct btrfs_disk_key *key;
242         struct btrfs_file_extent_item *fi;
243         int i;
244         int leaf;
245         int ret;
246
247         if (!root->ref_cows)
248                 return 0;
249         buf_node = btrfs_buffer_node(buf);
250         leaf = btrfs_is_leaf(buf_node);
251         buf_leaf = btrfs_buffer_leaf(buf);
252         for (i = 0; i < btrfs_header_nritems(&buf_node->header); i++) {
253                 if (leaf) {
254                         key = &buf_leaf->items[i].key;
255                         if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
256                                 continue;
257                         fi = btrfs_item_ptr(buf_leaf, i,
258                                             struct btrfs_file_extent_item);
259                         if (btrfs_file_extent_type(fi) ==
260                             BTRFS_FILE_EXTENT_INLINE)
261                                 continue;
262                         ret = btrfs_inc_extent_ref(trans, root,
263                                     btrfs_file_extent_disk_blocknr(fi),
264                                     btrfs_file_extent_disk_num_blocks(fi));
265                         BUG_ON(ret);
266                 } else {
267                         blocknr = btrfs_node_blockptr(buf_node, i);
268                         ret = btrfs_inc_extent_ref(trans, root, blocknr, 1);
269                         BUG_ON(ret);
270                 }
271         }
272         return 0;
273 }
274
275 static int write_one_cache_group(struct btrfs_trans_handle *trans,
276                                  struct btrfs_root *root,
277                                  struct btrfs_path *path,
278                                  struct btrfs_block_group_cache *cache)
279 {
280         int ret;
281         int pending_ret;
282         struct btrfs_root *extent_root = root->fs_info->extent_root;
283         struct btrfs_block_group_item *bi;
284         struct btrfs_key ins;
285
286         find_free_extent(trans, extent_root, 0, 0, (u64)-1, &ins, 0);
287         ret = btrfs_search_slot(trans, extent_root, &cache->key, path, 0, 1);
288         BUG_ON(ret);
289         bi = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
290                             struct btrfs_block_group_item);
291         memcpy(bi, &cache->item, sizeof(*bi));
292         mark_buffer_dirty(path->nodes[0]);
293         btrfs_release_path(extent_root, path);
294
295         finish_current_insert(trans, extent_root);
296         pending_ret = del_pending_extents(trans, extent_root);
297         if (ret)
298                 return ret;
299         if (pending_ret)
300                 return pending_ret;
301         if (cache->data)
302                 cache->last_alloc = cache->first_free;
303         return 0;
304
305 }
306
307 static int write_dirty_block_radix(struct btrfs_trans_handle *trans,
308                                    struct btrfs_root *root,
309                                    struct radix_tree_root *radix)
310 {
311         struct btrfs_block_group_cache *cache[8];
312         int ret;
313         int err = 0;
314         int werr = 0;
315         int i;
316         struct btrfs_path *path;
317
318         path = btrfs_alloc_path();
319         if (!path)
320                 return -ENOMEM;
321
322         while(1) {
323                 ret = radix_tree_gang_lookup_tag(radix, (void **)cache,
324                                                  0, ARRAY_SIZE(cache),
325                                                  BTRFS_BLOCK_GROUP_DIRTY);
326                 if (!ret)
327                         break;
328                 for (i = 0; i < ret; i++) {
329                         radix_tree_tag_clear(radix, cache[i]->key.objectid +
330                                              cache[i]->key.offset - 1,
331                                              BTRFS_BLOCK_GROUP_DIRTY);
332                         err = write_one_cache_group(trans, root,
333                                                     path, cache[i]);
334                         if (err)
335                                 werr = err;
336                 }
337         }
338         btrfs_free_path(path);
339         return werr;
340 }
341
342 int btrfs_write_dirty_block_groups(struct btrfs_trans_handle *trans,
343                                    struct btrfs_root *root)
344 {
345         int ret;
346         int ret2;
347         ret = write_dirty_block_radix(trans, root,
348                                       &root->fs_info->block_group_radix);
349         ret2 = write_dirty_block_radix(trans, root,
350                                       &root->fs_info->block_group_data_radix);
351         if (ret)
352                 return ret;
353         if (ret2)
354                 return ret2;
355         return 0;
356 }
357
358 static int update_block_group(struct btrfs_trans_handle *trans,
359                               struct btrfs_root *root,
360                               u64 blocknr, u64 num, int alloc)
361 {
362         struct btrfs_block_group_cache *cache;
363         struct btrfs_fs_info *info = root->fs_info;
364         u64 total = num;
365         u64 old_val;
366         u64 block_in_group;
367
368         while(total) {
369                 cache = lookup_block_group(info, blocknr);
370                 if (!cache) {
371                         printk(KERN_CRIT "blocknr %Lu lookup failed\n",
372                                blocknr);
373                         return -1;
374                 }
375                 block_in_group = blocknr - cache->key.objectid;
376                 WARN_ON(block_in_group > cache->key.offset);
377                 radix_tree_tag_set(cache->radix, cache->key.objectid +
378                                    cache->key.offset - 1,
379                                    BTRFS_BLOCK_GROUP_DIRTY);
380
381                 old_val = btrfs_block_group_used(&cache->item);
382                 num = min(total, cache->key.offset - block_in_group);
383                 total -= num;
384                 blocknr += num;
385                 if (alloc) {
386                         old_val += num;
387                         if (blocknr > cache->last_alloc)
388                                 cache->last_alloc = blocknr;
389                 } else {
390                         old_val -= num;
391                         if (blocknr < cache->first_free)
392                                 cache->first_free = blocknr;
393                 }
394                 btrfs_set_block_group_used(&cache->item, old_val);
395         }
396         return 0;
397 }
398
399 static int try_remove_page(struct address_space *mapping, unsigned long index)
400 {
401         int ret;
402         ret = invalidate_mapping_pages(mapping, index, index);
403         return ret;
404 }
405
406 int btrfs_finish_extent_commit(struct btrfs_trans_handle *trans, struct
407                                btrfs_root *root)
408 {
409         unsigned long gang[8];
410         struct inode *btree_inode = root->fs_info->btree_inode;
411         struct btrfs_block_group_cache *block_group;
412         u64 first = 0;
413         int ret;
414         int i;
415         struct radix_tree_root *pinned_radix = &root->fs_info->pinned_radix;
416
417         while(1) {
418                 ret = find_first_radix_bit(pinned_radix, gang,
419                                            ARRAY_SIZE(gang));
420                 if (!ret)
421                         break;
422                 if (!first)
423                         first = gang[0];
424                 for (i = 0; i < ret; i++) {
425                         clear_radix_bit(pinned_radix, gang[i]);
426                         block_group = lookup_block_group(root->fs_info,
427                                                          gang[i]);
428                         if (block_group) {
429                                 WARN_ON(block_group->pinned == 0);
430                                 block_group->pinned--;
431                                 if (gang[i] < block_group->last_alloc)
432                                         block_group->last_alloc = gang[i];
433                         }
434                         try_remove_page(btree_inode->i_mapping,
435                                         gang[i] << (PAGE_CACHE_SHIFT -
436                                                     btree_inode->i_blkbits));
437                 }
438         }
439         return 0;
440 }
441
442 static int finish_current_insert(struct btrfs_trans_handle *trans, struct
443                                  btrfs_root *extent_root)
444 {
445         struct btrfs_key ins;
446         struct btrfs_extent_item extent_item;
447         int i;
448         int ret;
449         u64 super_blocks_used;
450         struct btrfs_fs_info *info = extent_root->fs_info;
451
452         btrfs_set_extent_refs(&extent_item, 1);
453         ins.offset = 1;
454         ins.flags = 0;
455         btrfs_set_key_type(&ins, BTRFS_EXTENT_ITEM_KEY);
456         btrfs_set_extent_owner(&extent_item, extent_root->root_key.objectid);
457
458         for (i = 0; i < extent_root->fs_info->extent_tree_insert_nr; i++) {
459                 ins.objectid = extent_root->fs_info->extent_tree_insert[i];
460                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
461                 btrfs_set_super_blocks_used(info->disk_super,
462                                             super_blocks_used + 1);
463                 ret = btrfs_insert_item(trans, extent_root, &ins, &extent_item,
464                                         sizeof(extent_item));
465                 BUG_ON(ret);
466         }
467         extent_root->fs_info->extent_tree_insert_nr = 0;
468         extent_root->fs_info->extent_tree_prealloc_nr = 0;
469         return 0;
470 }
471
472 static int pin_down_block(struct btrfs_root *root, u64 blocknr, int pending)
473 {
474         int err;
475         struct btrfs_header *header;
476         struct buffer_head *bh;
477
478         if (!pending) {
479                 bh = btrfs_find_tree_block(root, blocknr);
480                 if (bh) {
481                         if (buffer_uptodate(bh)) {
482                                 u64 transid =
483                                     root->fs_info->running_transaction->transid;
484                                 header = btrfs_buffer_header(bh);
485                                 if (btrfs_header_generation(header) ==
486                                     transid) {
487                                         btrfs_block_release(root, bh);
488                                         return 0;
489                                 }
490                         }
491                         btrfs_block_release(root, bh);
492                 }
493                 err = set_radix_bit(&root->fs_info->pinned_radix, blocknr);
494                 if (!err) {
495                         struct btrfs_block_group_cache *cache;
496                         cache = lookup_block_group(root->fs_info, blocknr);
497                         if (cache)
498                                 cache->pinned++;
499                 }
500         } else {
501                 err = set_radix_bit(&root->fs_info->pending_del_radix, blocknr);
502         }
503         BUG_ON(err < 0);
504         return 0;
505 }
506
507 /*
508  * remove an extent from the root, returns 0 on success
509  */
510 static int __free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
511                          *root, u64 blocknr, u64 num_blocks, int pin)
512 {
513         struct btrfs_path *path;
514         struct btrfs_key key;
515         struct btrfs_fs_info *info = root->fs_info;
516         struct btrfs_root *extent_root = info->extent_root;
517         int ret;
518         struct btrfs_extent_item *ei;
519         struct btrfs_key ins;
520         u32 refs;
521
522         key.objectid = blocknr;
523         key.flags = 0;
524         btrfs_set_key_type(&key, BTRFS_EXTENT_ITEM_KEY);
525         key.offset = num_blocks;
526
527         find_free_extent(trans, root, 0, 0, (u64)-1, &ins, 0);
528         path = btrfs_alloc_path();
529         BUG_ON(!path);
530         btrfs_init_path(path);
531
532         ret = btrfs_search_slot(trans, extent_root, &key, path, -1, 1);
533         if (ret) {
534                 printk("failed to find %Lu\n", key.objectid);
535                 btrfs_print_tree(extent_root, extent_root->node);
536                 printk("failed to find %Lu\n", key.objectid);
537                 BUG();
538         }
539         ei = btrfs_item_ptr(btrfs_buffer_leaf(path->nodes[0]), path->slots[0],
540                             struct btrfs_extent_item);
541         BUG_ON(ei->refs == 0);
542         refs = btrfs_extent_refs(ei) - 1;
543         btrfs_set_extent_refs(ei, refs);
544         btrfs_mark_buffer_dirty(path->nodes[0]);
545         if (refs == 0) {
546                 u64 super_blocks_used;
547
548                 if (pin) {
549                         ret = pin_down_block(root, blocknr, 0);
550                         BUG_ON(ret);
551                 }
552
553                 super_blocks_used = btrfs_super_blocks_used(info->disk_super);
554                 btrfs_set_super_blocks_used(info->disk_super,
555                                             super_blocks_used - num_blocks);
556                 ret = btrfs_del_item(trans, extent_root, path);
557                 if (ret)
558                         BUG();
559                 ret = update_block_group(trans, root, blocknr, num_blocks, 0);
560                 BUG_ON(ret);
561         }
562         btrfs_release_path(extent_root, path);
563         btrfs_free_path(path);
564         finish_current_insert(trans, extent_root);
565         return ret;
566 }
567
568 /*
569  * find all the blocks marked as pending in the radix tree and remove
570  * them from the extent map
571  */
572 static int del_pending_extents(struct btrfs_trans_handle *trans, struct
573                                btrfs_root *extent_root)
574 {
575         int ret;
576         int wret;
577         int err = 0;
578         unsigned long gang[4];
579         int i;
580         struct radix_tree_root *pending_radix;
581         struct radix_tree_root *pinned_radix;
582         struct btrfs_block_group_cache *cache;
583
584         pending_radix = &extent_root->fs_info->pending_del_radix;
585         pinned_radix = &extent_root->fs_info->pinned_radix;
586
587         while(1) {
588                 ret = find_first_radix_bit(pending_radix, gang,
589                                            ARRAY_SIZE(gang));
590                 if (!ret)
591                         break;
592                 for (i = 0; i < ret; i++) {
593                         wret = set_radix_bit(pinned_radix, gang[i]);
594                         if (wret == 0) {
595                                 cache = lookup_block_group(extent_root->fs_info,
596                                                            gang[i]);
597                                 if (cache)
598                                         cache->pinned++;
599                         }
600                         if (wret < 0) {
601                                 printk(KERN_CRIT "set_radix_bit, err %d\n",
602                                        wret);
603                                 BUG_ON(wret < 0);
604                         }
605                         wret = clear_radix_bit(pending_radix, gang[i]);
606                         BUG_ON(wret);
607                         wret = __free_extent(trans, extent_root,
608                                              gang[i], 1, 0);
609                         if (wret)
610                                 err = wret;
611                 }
612         }
613         return err;
614 }
615
616 /*
617  * remove an extent from the root, returns 0 on success
618  */
619 int btrfs_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
620                       *root, u64 blocknr, u64 num_blocks, int pin)
621 {
622         struct btrfs_root *extent_root = root->fs_info->extent_root;
623         int pending_ret;
624         int ret;
625
626         if (root == extent_root) {
627                 pin_down_block(root, blocknr, 1);
628                 return 0;
629         }
630         ret = __free_extent(trans, root, blocknr, num_blocks, pin);
631         pending_ret = del_pending_extents(trans, root->fs_info->extent_root);
632         return ret ? ret : pending_ret;
633 }
634
635 /*
636  * walks the btree of allocated extents and find a hole of a given size.
637  * The key ins is changed to record the hole:
638  * ins->objectid == block start
639  * ins->flags = BTRFS_EXTENT_ITEM_KEY
640  * ins->offset == number of blocks
641  * Any available blocks before search_start are skipped.
642  */
643 static int find_free_extent(struct btrfs_trans_handle *trans, struct btrfs_root
644                             *orig_root, u64 num_blocks, u64 search_start, u64
645                             search_end, struct btrfs_key *ins, int data)
646 {
647         struct btrfs_path *path;
648         struct btrfs_key key;
649         int ret;
650         u64 hole_size = 0;
651         int slot = 0;
652         u64 last_block = 0;
653         u64 test_block;
654         u64 orig_search_start = search_start;
655         int start_found;
656         struct btrfs_leaf *l;
657         struct btrfs_root * root = orig_root->fs_info->extent_root;
658         struct btrfs_fs_info *info = root->fs_info;
659         int total_needed = num_blocks;
660         int total_found = 0;
661         int fill_prealloc = 0;
662         int level;
663         struct btrfs_block_group_cache *block_group;
664         int full_scan = 0;
665
666         path = btrfs_alloc_path();
667         ins->flags = 0;
668         btrfs_set_key_type(ins, BTRFS_EXTENT_ITEM_KEY);
669
670         level = btrfs_header_level(btrfs_buffer_header(root->node));
671         if (num_blocks == 0) {
672                 fill_prealloc = 1;
673                 num_blocks = 1;
674                 total_needed = (min(level + 1, BTRFS_MAX_LEVEL) + 2) * 3;
675         }
676         if (search_end == (u64)-1)
677                 search_end = btrfs_super_total_blocks(info->disk_super);
678         if (search_start) {
679                 block_group = lookup_block_group(info, search_start);
680                 block_group = btrfs_find_block_group(root, block_group,
681                                                      search_start, data);
682         } else {
683                 block_group = btrfs_find_block_group(root,
684                                                      trans->block_group, 0,
685                                                      data);
686         }
687
688 check_failed:
689         if (!full_scan && block_group->data != data)
690                 WARN_ON(1);
691         if (block_group->last_alloc > search_start)
692                 search_start = block_group->last_alloc;
693         btrfs_init_path(path);
694         ins->objectid = search_start;
695         ins->offset = 0;
696         start_found = 0;
697         ret = btrfs_search_slot(trans, root, ins, path, 0, 0);
698         if (ret < 0)
699                 goto error;
700
701         if (path->slots[0] > 0)
702                 path->slots[0]--;
703
704         while (1) {
705                 l = btrfs_buffer_leaf(path->nodes[0]);
706                 slot = path->slots[0];
707                 if (slot >= btrfs_header_nritems(&l->header)) {
708                         if (fill_prealloc) {
709                                 info->extent_tree_prealloc_nr = 0;
710                                 total_found = 0;
711                         }
712                         ret = btrfs_next_leaf(root, path);
713                         if (ret == 0)
714                                 continue;
715                         if (ret < 0)
716                                 goto error;
717                         if (!start_found) {
718                                 ins->objectid = search_start;
719                                 ins->offset = search_end - search_start;
720                                 start_found = 1;
721                                 goto check_pending;
722                         }
723                         ins->objectid = last_block > search_start ?
724                                         last_block : search_start;
725                         ins->offset = search_end - ins->objectid;
726                         goto check_pending;
727                 }
728                 btrfs_disk_key_to_cpu(&key, &l->items[slot].key);
729                 if (btrfs_key_type(&key) != BTRFS_EXTENT_ITEM_KEY)
730                         goto next;
731                 if (key.objectid >= search_start) {
732                         if (start_found) {
733                                 if (last_block < search_start)
734                                         last_block = search_start;
735                                 hole_size = key.objectid - last_block;
736                                 if (hole_size >= num_blocks) {
737                                         ins->objectid = last_block;
738                                         ins->offset = hole_size;
739                                         goto check_pending;
740                                 }
741                         }
742                 }
743                 start_found = 1;
744                 last_block = key.objectid + key.offset;
745                 if (last_block >= block_group->key.objectid +
746                     block_group->key.offset) {
747                         btrfs_release_path(root, path);
748                         search_start = block_group->key.objectid +
749                                 block_group->key.offset * 2;
750                         goto new_group;
751                 }
752 next:
753                 path->slots[0]++;
754         }
755         // FIXME -ENOSPC
756 check_pending:
757         /* we have to make sure we didn't find an extent that has already
758          * been allocated by the map tree or the original allocation
759          */
760         btrfs_release_path(root, path);
761         BUG_ON(ins->objectid < search_start);
762         if (ins->objectid + num_blocks >= search_end) {
763                 if (full_scan)
764                         return -ENOSPC;
765                 search_start = orig_search_start;
766                 full_scan = 1;
767                 goto new_group;
768         }
769         for (test_block = ins->objectid;
770              test_block < ins->objectid + num_blocks; test_block++) {
771                 if (test_radix_bit(&info->pinned_radix, test_block)) {
772                         search_start = test_block + 1;
773                         goto new_group;
774                 }
775         }
776         if (!fill_prealloc && info->extent_tree_insert_nr) {
777                 u64 last =
778                   info->extent_tree_insert[info->extent_tree_insert_nr - 1];
779                 if (ins->objectid + num_blocks >
780                     info->extent_tree_insert[0] &&
781                     ins->objectid <= last) {
782                         search_start = last + 1;
783                         WARN_ON(1);
784                         goto new_group;
785                 }
786         }
787         if (!fill_prealloc && info->extent_tree_prealloc_nr) {
788                 u64 first =
789                   info->extent_tree_prealloc[info->extent_tree_prealloc_nr - 1];
790                 if (ins->objectid + num_blocks > first &&
791                     ins->objectid <= info->extent_tree_prealloc[0]) {
792                         search_start = info->extent_tree_prealloc[0] + 1;
793                         WARN_ON(1);
794                         goto new_group;
795                 }
796         }
797         if (fill_prealloc) {
798                 int nr;
799                 test_block = ins->objectid;
800                 while(test_block < ins->objectid + ins->offset &&
801                       total_found < total_needed) {
802                         nr = total_needed - total_found - 1;
803                         BUG_ON(nr < 0);
804                         info->extent_tree_prealloc[nr] = test_block;
805                         total_found++;
806                         test_block++;
807                 }
808                 if (total_found < total_needed) {
809                         search_start = test_block;
810                         goto new_group;
811                 }
812                 info->extent_tree_prealloc_nr = total_found;
813         }
814         block_group = lookup_block_group(info, ins->objectid);
815         if (block_group) {
816                 block_group->last_alloc = ins->objectid;
817                 if (!data)
818                         trans->block_group = block_group;
819         }
820         ins->offset = num_blocks;
821         btrfs_free_path(path);
822         return 0;
823
824 new_group:
825         if (search_start + num_blocks >= search_end) {
826                 search_start = orig_search_start;
827                 full_scan = 1;
828         }
829         block_group = lookup_block_group(info, search_start);
830         if (!full_scan)
831                 block_group = btrfs_find_block_group(root, block_group,
832                                                      search_start, data);
833         goto check_failed;
834
835 error:
836         btrfs_release_path(root, path);
837         btrfs_free_path(path);
838         return ret;
839 }
840 /*
841  * finds a free extent and does all the dirty work required for allocation
842  * returns the key for the extent through ins, and a tree buffer for
843  * the first block of the extent through buf.
844  *
845  * returns 0 if everything worked, non-zero otherwise.
846  */
847 int btrfs_alloc_extent(struct btrfs_trans_handle *trans,
848                        struct btrfs_root *root, u64 owner,
849                        u64 num_blocks, u64 search_start,
850                        u64 search_end, struct btrfs_key *ins, int data)
851 {
852         int ret;
853         int pending_ret;
854         u64 super_blocks_used;
855         struct btrfs_fs_info *info = root->fs_info;
856         struct btrfs_root *extent_root = info->extent_root;
857         struct btrfs_extent_item extent_item;
858         struct btrfs_key prealloc_key;
859
860         btrfs_set_extent_refs(&extent_item, 1);
861         btrfs_set_extent_owner(&extent_item, owner);
862
863         if (root == extent_root) {
864                 int nr;
865                 BUG_ON(info->extent_tree_prealloc_nr == 0);
866                 BUG_ON(num_blocks != 1);
867                 ins->offset = 1;
868                 info->extent_tree_prealloc_nr--;
869                 nr = info->extent_tree_prealloc_nr;
870                 ins->objectid = info->extent_tree_prealloc[nr];
871                 info->extent_tree_insert[info->extent_tree_insert_nr++] =
872                         ins->objectid;
873                 ret = update_block_group(trans, root,
874                                          ins->objectid, ins->offset, 1);
875                 BUG_ON(ret);
876                 return 0;
877         }
878         /* do the real allocation */
879         ret = find_free_extent(trans, root, num_blocks, search_start,
880                                search_end, ins, data);
881         if (ret)
882                 return ret;
883
884         /* then do prealloc for the extent tree */
885         if (ins->objectid + ins->offset >= search_end)
886                 search_end = ins->objectid - 1;
887         else
888                 search_start = ins->objectid + ins->offset;
889
890         ret = find_free_extent(trans, root, 0, search_start,
891                                search_end, &prealloc_key, 0);
892         if (ret)
893                 return ret;
894
895         super_blocks_used = btrfs_super_blocks_used(info->disk_super);
896         btrfs_set_super_blocks_used(info->disk_super, super_blocks_used +
897                                     num_blocks);
898         ret = btrfs_insert_item(trans, extent_root, ins, &extent_item,
899                                 sizeof(extent_item));
900
901         finish_current_insert(trans, extent_root);
902         pending_ret = del_pending_extents(trans, extent_root);
903         if (ret)
904                 return ret;
905         if (pending_ret)
906                 return pending_ret;
907         ret = update_block_group(trans, root, ins->objectid, ins->offset, 1);
908         return 0;
909 }
910
911 /*
912  * helper function to allocate a block for a given tree
913  * returns the tree buffer or NULL.
914  */
915 struct buffer_head *btrfs_alloc_free_block(struct btrfs_trans_handle *trans,
916                                            struct btrfs_root *root, u64 hint)
917 {
918         struct btrfs_key ins;
919         int ret;
920         struct buffer_head *buf;
921
922         ret = btrfs_alloc_extent(trans, root, root->root_key.objectid,
923                                  1, hint, (unsigned long)-1, &ins, 0);
924         if (ret) {
925                 BUG();
926                 return NULL;
927         }
928         BUG_ON(ret);
929         buf = btrfs_find_create_tree_block(root, ins.objectid);
930         set_buffer_uptodate(buf);
931         set_buffer_checked(buf);
932         set_radix_bit(&trans->transaction->dirty_pages, buf->b_page->index);
933         return buf;
934 }
935
936 static int drop_leaf_ref(struct btrfs_trans_handle *trans,
937                          struct btrfs_root *root, struct buffer_head *cur)
938 {
939         struct btrfs_disk_key *key;
940         struct btrfs_leaf *leaf;
941         struct btrfs_file_extent_item *fi;
942         int i;
943         int nritems;
944         int ret;
945
946         BUG_ON(!btrfs_is_leaf(btrfs_buffer_node(cur)));
947         leaf = btrfs_buffer_leaf(cur);
948         nritems = btrfs_header_nritems(&leaf->header);
949         for (i = 0; i < nritems; i++) {
950                 key = &leaf->items[i].key;
951                 if (btrfs_disk_key_type(key) != BTRFS_EXTENT_DATA_KEY)
952                         continue;
953                 fi = btrfs_item_ptr(leaf, i, struct btrfs_file_extent_item);
954                 if (btrfs_file_extent_type(fi) == BTRFS_FILE_EXTENT_INLINE)
955                         continue;
956                 /*
957                  * FIXME make sure to insert a trans record that
958                  * repeats the snapshot del on crash
959                  */
960                 ret = btrfs_free_extent(trans, root,
961                                         btrfs_file_extent_disk_blocknr(fi),
962                                         btrfs_file_extent_disk_num_blocks(fi),
963                                         0);
964                 BUG_ON(ret);
965         }
966         return 0;
967 }
968
969 /*
970  * helper function for drop_snapshot, this walks down the tree dropping ref
971  * counts as it goes.
972  */
973 static int walk_down_tree(struct btrfs_trans_handle *trans, struct btrfs_root
974                           *root, struct btrfs_path *path, int *level)
975 {
976         struct buffer_head *next;
977         struct buffer_head *cur;
978         u64 blocknr;
979         int ret;
980         u32 refs;
981
982         WARN_ON(*level < 0);
983         WARN_ON(*level >= BTRFS_MAX_LEVEL);
984         ret = lookup_extent_ref(trans, root, bh_blocknr(path->nodes[*level]),
985                                1, &refs);
986         BUG_ON(ret);
987         if (refs > 1)
988                 goto out;
989         /*
990          * walk down to the last node level and free all the leaves
991          */
992         while(*level >= 0) {
993                 WARN_ON(*level < 0);
994                 WARN_ON(*level >= BTRFS_MAX_LEVEL);
995                 cur = path->nodes[*level];
996                 if (btrfs_header_level(btrfs_buffer_header(cur)) != *level)
997                         WARN_ON(1);
998                 if (path->slots[*level] >=
999                     btrfs_header_nritems(btrfs_buffer_header(cur)))
1000                         break;
1001                 if (*level == 0) {
1002                         ret = drop_leaf_ref(trans, root, cur);
1003                         BUG_ON(ret);
1004                         break;
1005                 }
1006                 blocknr = btrfs_node_blockptr(btrfs_buffer_node(cur),
1007                                               path->slots[*level]);
1008                 ret = lookup_extent_ref(trans, root, blocknr, 1, &refs);
1009                 BUG_ON(ret);
1010                 if (refs != 1) {
1011                         path->slots[*level]++;
1012                         ret = btrfs_free_extent(trans, root, blocknr, 1, 1);
1013                         BUG_ON(ret);
1014                         continue;
1015                 }
1016                 next = read_tree_block(root, blocknr);
1017                 WARN_ON(*level <= 0);
1018                 if (path->nodes[*level-1])
1019                         btrfs_block_release(root, path->nodes[*level-1]);
1020                 path->nodes[*level-1] = next;
1021                 *level = btrfs_header_level(btrfs_buffer_header(next));
1022                 path->slots[*level] = 0;
1023         }
1024 out:
1025         WARN_ON(*level < 0);
1026         WARN_ON(*level >= BTRFS_MAX_LEVEL);
1027         ret = btrfs_free_extent(trans, root,
1028                                 bh_blocknr(path->nodes[*level]), 1, 1);
1029         btrfs_block_release(root, path->nodes[*level]);
1030         path->nodes[*level] = NULL;
1031         *level += 1;
1032         BUG_ON(ret);
1033         return 0;
1034 }
1035
1036 /*
1037  * helper for dropping snapshots.  This walks back up the tree in the path
1038  * to find the first node higher up where we haven't yet gone through
1039  * all the slots
1040  */
1041 static int walk_up_tree(struct btrfs_trans_handle *trans, struct btrfs_root
1042                         *root, struct btrfs_path *path, int *level)
1043 {
1044         int i;
1045         int slot;
1046         int ret;
1047         for(i = *level; i < BTRFS_MAX_LEVEL - 1 && path->nodes[i]; i++) {
1048                 slot = path->slots[i];
1049                 if (slot < btrfs_header_nritems(
1050                     btrfs_buffer_header(path->nodes[i])) - 1) {
1051                         path->slots[i]++;
1052                         *level = i;
1053                         return 0;
1054                 } else {
1055                         ret = btrfs_free_extent(trans, root,
1056                                                 bh_blocknr(path->nodes[*level]),
1057                                                 1, 1);
1058                         BUG_ON(ret);
1059                         btrfs_block_release(root, path->nodes[*level]);
1060                         path->nodes[*level] = NULL;
1061                         *level = i + 1;
1062                 }
1063         }
1064         return 1;
1065 }
1066
1067 /*
1068  * drop the reference count on the tree rooted at 'snap'.  This traverses
1069  * the tree freeing any blocks that have a ref count of zero after being
1070  * decremented.
1071  */
1072 int btrfs_drop_snapshot(struct btrfs_trans_handle *trans, struct btrfs_root
1073                         *root, struct buffer_head *snap)
1074 {
1075         int ret = 0;
1076         int wret;
1077         int level;
1078         struct btrfs_path *path;
1079         int i;
1080         int orig_level;
1081
1082         path = btrfs_alloc_path();
1083         BUG_ON(!path);
1084         btrfs_init_path(path);
1085
1086         level = btrfs_header_level(btrfs_buffer_header(snap));
1087         orig_level = level;
1088         path->nodes[level] = snap;
1089         path->slots[level] = 0;
1090         while(1) {
1091                 wret = walk_down_tree(trans, root, path, &level);
1092                 if (wret > 0)
1093                         break;
1094                 if (wret < 0)
1095                         ret = wret;
1096
1097                 wret = walk_up_tree(trans, root, path, &level);
1098                 if (wret > 0)
1099                         break;
1100                 if (wret < 0)
1101                         ret = wret;
1102                 btrfs_btree_balance_dirty(root);
1103         }
1104         for (i = 0; i <= orig_level; i++) {
1105                 if (path->nodes[i]) {
1106                         btrfs_block_release(root, path->nodes[i]);
1107                 }
1108         }
1109         btrfs_free_path(path);
1110         return ret;
1111 }
1112
1113 static int free_block_group_radix(struct radix_tree_root *radix)
1114 {
1115         int ret;
1116         struct btrfs_block_group_cache *cache[8];
1117         int i;
1118
1119         while(1) {
1120                 ret = radix_tree_gang_lookup(radix, (void **)cache, 0,
1121                                              ARRAY_SIZE(cache));
1122                 if (!ret)
1123                         break;
1124                 for (i = 0; i < ret; i++) {
1125                         radix_tree_delete(radix, cache[i]->key.objectid +
1126                                           cache[i]->key.offset - 1);
1127                         kfree(cache[i]);
1128                 }
1129         }
1130         return 0;
1131 }
1132
1133 int btrfs_free_block_groups(struct btrfs_fs_info *info)
1134 {
1135         int ret;
1136         int ret2;
1137
1138         ret = free_block_group_radix(&info->block_group_radix);
1139         ret2 = free_block_group_radix(&info->block_group_data_radix);
1140         if (ret)
1141                 return ret;
1142         if (ret2)
1143                 return ret2;
1144         return 0;
1145 }
1146
1147 int btrfs_read_block_groups(struct btrfs_root *root)
1148 {
1149         struct btrfs_path *path;
1150         int ret;
1151         int err = 0;
1152         struct btrfs_block_group_item *bi;
1153         struct btrfs_block_group_cache *cache;
1154         struct btrfs_fs_info *info = root->fs_info;
1155         struct radix_tree_root *radix;
1156         struct btrfs_key key;
1157         struct btrfs_key found_key;
1158         struct btrfs_leaf *leaf;
1159         u64 group_size_blocks = BTRFS_BLOCK_GROUP_SIZE / root->blocksize;
1160         u64 used;
1161         u64 nr = 0;
1162
1163         root = info->extent_root;
1164         key.objectid = 0;
1165         key.offset = group_size_blocks;
1166         key.flags = 0;
1167         btrfs_set_key_type(&key, BTRFS_BLOCK_GROUP_ITEM_KEY);
1168
1169         path = btrfs_alloc_path();
1170         if (!path)
1171                 return -ENOMEM;
1172
1173         while(1) {
1174                 ret = btrfs_search_slot(NULL, info->extent_root,
1175                                         &key, path, 0, 0);
1176                 if (ret != 0) {
1177                         err = ret;
1178                         break;
1179                 }
1180                 leaf = btrfs_buffer_leaf(path->nodes[0]);
1181                 btrfs_disk_key_to_cpu(&found_key,
1182                                       &leaf->items[path->slots[0]].key);
1183                 cache = kmalloc(sizeof(*cache), GFP_NOFS);
1184                 if (!cache) {
1185                         err = -1;
1186                         break;
1187                 }
1188
1189                 if (nr & 1)
1190                         radix = &info->block_group_data_radix;
1191                 else
1192                         radix = &info->block_group_radix;
1193
1194                 bi = btrfs_item_ptr(leaf, path->slots[0],
1195                                     struct btrfs_block_group_item);
1196                 memcpy(&cache->item, bi, sizeof(*bi));
1197                 memcpy(&cache->key, &found_key, sizeof(found_key));
1198                 cache->last_alloc = cache->key.objectid;
1199                 cache->first_free = cache->key.objectid;
1200                 cache->pinned = 0;
1201                 cache->data = (nr & 1);
1202                 cache->radix = radix;
1203
1204                 key.objectid = found_key.objectid + found_key.offset;
1205                 btrfs_release_path(root, path);
1206                 ret = radix_tree_insert(radix, found_key.objectid +
1207                                         found_key.offset - 1,
1208                                         (void *)cache);
1209                 BUG_ON(ret);
1210                 used = btrfs_block_group_used(bi);
1211                 if (used < (key.offset * 8) / 10) {
1212                         radix_tree_tag_set(radix, found_key.objectid +
1213                                            found_key.offset - 1,
1214                                            BTRFS_BLOCK_GROUP_AVAIL);
1215                 }
1216                 if (key.objectid >=
1217                     btrfs_super_total_blocks(info->disk_super))
1218                         break;
1219                 nr++;
1220         }
1221
1222         btrfs_free_path(path);
1223         return 0;
1224 }