]> Pileus Git - ~andy/linux/blob - fs/btrfs/check-integrity.c
Merge branch 'x86-reboot-for-linus' of git://git.kernel.org/pub/scm/linux/kernel...
[~andy/linux] / fs / btrfs / check-integrity.c
1 /*
2  * Copyright (C) STRATO AG 2011.  All rights reserved.
3  *
4  * This program is free software; you can redistribute it and/or
5  * modify it under the terms of the GNU General Public
6  * License v2 as published by the Free Software Foundation.
7  *
8  * This program is distributed in the hope that it will be useful,
9  * but WITHOUT ANY WARRANTY; without even the implied warranty of
10  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
11  * General Public License for more details.
12  *
13  * You should have received a copy of the GNU General Public
14  * License along with this program; if not, write to the
15  * Free Software Foundation, Inc., 59 Temple Place - Suite 330,
16  * Boston, MA 021110-1307, USA.
17  */
18
19 /*
20  * This module can be used to catch cases when the btrfs kernel
21  * code executes write requests to the disk that bring the file
22  * system in an inconsistent state. In such a state, a power-loss
23  * or kernel panic event would cause that the data on disk is
24  * lost or at least damaged.
25  *
26  * Code is added that examines all block write requests during
27  * runtime (including writes of the super block). Three rules
28  * are verified and an error is printed on violation of the
29  * rules:
30  * 1. It is not allowed to write a disk block which is
31  *    currently referenced by the super block (either directly
32  *    or indirectly).
33  * 2. When a super block is written, it is verified that all
34  *    referenced (directly or indirectly) blocks fulfill the
35  *    following requirements:
36  *    2a. All referenced blocks have either been present when
37  *        the file system was mounted, (i.e., they have been
38  *        referenced by the super block) or they have been
39  *        written since then and the write completion callback
40  *        was called and a FLUSH request to the device where
41  *        these blocks are located was received and completed.
42  *    2b. All referenced blocks need to have a generation
43  *        number which is equal to the parent's number.
44  *
45  * One issue that was found using this module was that the log
46  * tree on disk became temporarily corrupted because disk blocks
47  * that had been in use for the log tree had been freed and
48  * reused too early, while being referenced by the written super
49  * block.
50  *
51  * The search term in the kernel log that can be used to filter
52  * on the existence of detected integrity issues is
53  * "btrfs: attempt".
54  *
55  * The integrity check is enabled via mount options. These
56  * mount options are only supported if the integrity check
57  * tool is compiled by defining BTRFS_FS_CHECK_INTEGRITY.
58  *
59  * Example #1, apply integrity checks to all metadata:
60  * mount /dev/sdb1 /mnt -o check_int
61  *
62  * Example #2, apply integrity checks to all metadata and
63  * to data extents:
64  * mount /dev/sdb1 /mnt -o check_int_data
65  *
66  * Example #3, apply integrity checks to all metadata and dump
67  * the tree that the super block references to kernel messages
68  * each time after a super block was written:
69  * mount /dev/sdb1 /mnt -o check_int,check_int_print_mask=263
70  *
71  * If the integrity check tool is included and activated in
72  * the mount options, plenty of kernel memory is used, and
73  * plenty of additional CPU cycles are spent. Enabling this
74  * functionality is not intended for normal use. In most
75  * cases, unless you are a btrfs developer who needs to verify
76  * the integrity of (super)-block write requests, do not
77  * enable the config option BTRFS_FS_CHECK_INTEGRITY to
78  * include and compile the integrity check tool.
79  */
80
81 #include <linux/sched.h>
82 #include <linux/slab.h>
83 #include <linux/buffer_head.h>
84 #include <linux/mutex.h>
85 #include <linux/crc32c.h>
86 #include <linux/genhd.h>
87 #include <linux/blkdev.h>
88 #include "ctree.h"
89 #include "disk-io.h"
90 #include "transaction.h"
91 #include "extent_io.h"
92 #include "volumes.h"
93 #include "print-tree.h"
94 #include "locking.h"
95 #include "check-integrity.h"
96 #include "rcu-string.h"
97
98 #define BTRFSIC_BLOCK_HASHTABLE_SIZE 0x10000
99 #define BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE 0x10000
100 #define BTRFSIC_DEV2STATE_HASHTABLE_SIZE 0x100
101 #define BTRFSIC_BLOCK_MAGIC_NUMBER 0x14491051
102 #define BTRFSIC_BLOCK_LINK_MAGIC_NUMBER 0x11070807
103 #define BTRFSIC_DEV2STATE_MAGIC_NUMBER 0x20111530
104 #define BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER 20111300
105 #define BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL (200 - 6)    /* in characters,
106                                                          * excluding " [...]" */
107 #define BTRFSIC_GENERATION_UNKNOWN ((u64)-1)
108
109 /*
110  * The definition of the bitmask fields for the print_mask.
111  * They are specified with the mount option check_integrity_print_mask.
112  */
113 #define BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE                     0x00000001
114 #define BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION         0x00000002
115 #define BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE                  0x00000004
116 #define BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE                 0x00000008
117 #define BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH                        0x00000010
118 #define BTRFSIC_PRINT_MASK_END_IO_BIO_BH                        0x00000020
119 #define BTRFSIC_PRINT_MASK_VERBOSE                              0x00000040
120 #define BTRFSIC_PRINT_MASK_VERY_VERBOSE                         0x00000080
121 #define BTRFSIC_PRINT_MASK_INITIAL_TREE                         0x00000100
122 #define BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES                    0x00000200
123 #define BTRFSIC_PRINT_MASK_INITIAL_DATABASE                     0x00000400
124 #define BTRFSIC_PRINT_MASK_NUM_COPIES                           0x00000800
125 #define BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS                0x00001000
126
127 struct btrfsic_dev_state;
128 struct btrfsic_state;
129
130 struct btrfsic_block {
131         u32 magic_num;          /* only used for debug purposes */
132         unsigned int is_metadata:1;     /* if it is meta-data, not data-data */
133         unsigned int is_superblock:1;   /* if it is one of the superblocks */
134         unsigned int is_iodone:1;       /* if is done by lower subsystem */
135         unsigned int iodone_w_error:1;  /* error was indicated to endio */
136         unsigned int never_written:1;   /* block was added because it was
137                                          * referenced, not because it was
138                                          * written */
139         unsigned int mirror_num:2;      /* large enough to hold
140                                          * BTRFS_SUPER_MIRROR_MAX */
141         struct btrfsic_dev_state *dev_state;
142         u64 dev_bytenr;         /* key, physical byte num on disk */
143         u64 logical_bytenr;     /* logical byte num on disk */
144         u64 generation;
145         struct btrfs_disk_key disk_key; /* extra info to print in case of
146                                          * issues, will not always be correct */
147         struct list_head collision_resolving_node;      /* list node */
148         struct list_head all_blocks_node;       /* list node */
149
150         /* the following two lists contain block_link items */
151         struct list_head ref_to_list;   /* list */
152         struct list_head ref_from_list; /* list */
153         struct btrfsic_block *next_in_same_bio;
154         void *orig_bio_bh_private;
155         union {
156                 bio_end_io_t *bio;
157                 bh_end_io_t *bh;
158         } orig_bio_bh_end_io;
159         int submit_bio_bh_rw;
160         u64 flush_gen; /* only valid if !never_written */
161 };
162
163 /*
164  * Elements of this type are allocated dynamically and required because
165  * each block object can refer to and can be ref from multiple blocks.
166  * The key to lookup them in the hashtable is the dev_bytenr of
167  * the block ref to plus the one from the block refered from.
168  * The fact that they are searchable via a hashtable and that a
169  * ref_cnt is maintained is not required for the btrfs integrity
170  * check algorithm itself, it is only used to make the output more
171  * beautiful in case that an error is detected (an error is defined
172  * as a write operation to a block while that block is still referenced).
173  */
174 struct btrfsic_block_link {
175         u32 magic_num;          /* only used for debug purposes */
176         u32 ref_cnt;
177         struct list_head node_ref_to;   /* list node */
178         struct list_head node_ref_from; /* list node */
179         struct list_head collision_resolving_node;      /* list node */
180         struct btrfsic_block *block_ref_to;
181         struct btrfsic_block *block_ref_from;
182         u64 parent_generation;
183 };
184
185 struct btrfsic_dev_state {
186         u32 magic_num;          /* only used for debug purposes */
187         struct block_device *bdev;
188         struct btrfsic_state *state;
189         struct list_head collision_resolving_node;      /* list node */
190         struct btrfsic_block dummy_block_for_bio_bh_flush;
191         u64 last_flush_gen;
192         char name[BDEVNAME_SIZE];
193 };
194
195 struct btrfsic_block_hashtable {
196         struct list_head table[BTRFSIC_BLOCK_HASHTABLE_SIZE];
197 };
198
199 struct btrfsic_block_link_hashtable {
200         struct list_head table[BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE];
201 };
202
203 struct btrfsic_dev_state_hashtable {
204         struct list_head table[BTRFSIC_DEV2STATE_HASHTABLE_SIZE];
205 };
206
207 struct btrfsic_block_data_ctx {
208         u64 start;              /* virtual bytenr */
209         u64 dev_bytenr;         /* physical bytenr on device */
210         u32 len;
211         struct btrfsic_dev_state *dev;
212         char **datav;
213         struct page **pagev;
214         void *mem_to_free;
215 };
216
217 /* This structure is used to implement recursion without occupying
218  * any stack space, refer to btrfsic_process_metablock() */
219 struct btrfsic_stack_frame {
220         u32 magic;
221         u32 nr;
222         int error;
223         int i;
224         int limit_nesting;
225         int num_copies;
226         int mirror_num;
227         struct btrfsic_block *block;
228         struct btrfsic_block_data_ctx *block_ctx;
229         struct btrfsic_block *next_block;
230         struct btrfsic_block_data_ctx next_block_ctx;
231         struct btrfs_header *hdr;
232         struct btrfsic_stack_frame *prev;
233 };
234
235 /* Some state per mounted filesystem */
236 struct btrfsic_state {
237         u32 print_mask;
238         int include_extent_data;
239         int csum_size;
240         struct list_head all_blocks_list;
241         struct btrfsic_block_hashtable block_hashtable;
242         struct btrfsic_block_link_hashtable block_link_hashtable;
243         struct btrfs_root *root;
244         u64 max_superblock_generation;
245         struct btrfsic_block *latest_superblock;
246         u32 metablock_size;
247         u32 datablock_size;
248 };
249
250 static void btrfsic_block_init(struct btrfsic_block *b);
251 static struct btrfsic_block *btrfsic_block_alloc(void);
252 static void btrfsic_block_free(struct btrfsic_block *b);
253 static void btrfsic_block_link_init(struct btrfsic_block_link *n);
254 static struct btrfsic_block_link *btrfsic_block_link_alloc(void);
255 static void btrfsic_block_link_free(struct btrfsic_block_link *n);
256 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds);
257 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void);
258 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds);
259 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h);
260 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
261                                         struct btrfsic_block_hashtable *h);
262 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b);
263 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
264                 struct block_device *bdev,
265                 u64 dev_bytenr,
266                 struct btrfsic_block_hashtable *h);
267 static void btrfsic_block_link_hashtable_init(
268                 struct btrfsic_block_link_hashtable *h);
269 static void btrfsic_block_link_hashtable_add(
270                 struct btrfsic_block_link *l,
271                 struct btrfsic_block_link_hashtable *h);
272 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l);
273 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
274                 struct block_device *bdev_ref_to,
275                 u64 dev_bytenr_ref_to,
276                 struct block_device *bdev_ref_from,
277                 u64 dev_bytenr_ref_from,
278                 struct btrfsic_block_link_hashtable *h);
279 static void btrfsic_dev_state_hashtable_init(
280                 struct btrfsic_dev_state_hashtable *h);
281 static void btrfsic_dev_state_hashtable_add(
282                 struct btrfsic_dev_state *ds,
283                 struct btrfsic_dev_state_hashtable *h);
284 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds);
285 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
286                 struct block_device *bdev,
287                 struct btrfsic_dev_state_hashtable *h);
288 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void);
289 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf);
290 static int btrfsic_process_superblock(struct btrfsic_state *state,
291                                       struct btrfs_fs_devices *fs_devices);
292 static int btrfsic_process_metablock(struct btrfsic_state *state,
293                                      struct btrfsic_block *block,
294                                      struct btrfsic_block_data_ctx *block_ctx,
295                                      int limit_nesting, int force_iodone_flag);
296 static void btrfsic_read_from_block_data(
297         struct btrfsic_block_data_ctx *block_ctx,
298         void *dst, u32 offset, size_t len);
299 static int btrfsic_create_link_to_next_block(
300                 struct btrfsic_state *state,
301                 struct btrfsic_block *block,
302                 struct btrfsic_block_data_ctx
303                 *block_ctx, u64 next_bytenr,
304                 int limit_nesting,
305                 struct btrfsic_block_data_ctx *next_block_ctx,
306                 struct btrfsic_block **next_blockp,
307                 int force_iodone_flag,
308                 int *num_copiesp, int *mirror_nump,
309                 struct btrfs_disk_key *disk_key,
310                 u64 parent_generation);
311 static int btrfsic_handle_extent_data(struct btrfsic_state *state,
312                                       struct btrfsic_block *block,
313                                       struct btrfsic_block_data_ctx *block_ctx,
314                                       u32 item_offset, int force_iodone_flag);
315 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
316                              struct btrfsic_block_data_ctx *block_ctx_out,
317                              int mirror_num);
318 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
319                                   u32 len, struct block_device *bdev,
320                                   struct btrfsic_block_data_ctx *block_ctx_out);
321 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx);
322 static int btrfsic_read_block(struct btrfsic_state *state,
323                               struct btrfsic_block_data_ctx *block_ctx);
324 static void btrfsic_dump_database(struct btrfsic_state *state);
325 static void btrfsic_complete_bio_end_io(struct bio *bio, int err);
326 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
327                                      char **datav, unsigned int num_pages);
328 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
329                                           u64 dev_bytenr, char **mapped_datav,
330                                           unsigned int num_pages,
331                                           struct bio *bio, int *bio_is_patched,
332                                           struct buffer_head *bh,
333                                           int submit_bio_bh_rw);
334 static int btrfsic_process_written_superblock(
335                 struct btrfsic_state *state,
336                 struct btrfsic_block *const block,
337                 struct btrfs_super_block *const super_hdr);
338 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status);
339 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate);
340 static int btrfsic_is_block_ref_by_superblock(const struct btrfsic_state *state,
341                                               const struct btrfsic_block *block,
342                                               int recursion_level);
343 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
344                                         struct btrfsic_block *const block,
345                                         int recursion_level);
346 static void btrfsic_print_add_link(const struct btrfsic_state *state,
347                                    const struct btrfsic_block_link *l);
348 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
349                                    const struct btrfsic_block_link *l);
350 static char btrfsic_get_block_type(const struct btrfsic_state *state,
351                                    const struct btrfsic_block *block);
352 static void btrfsic_dump_tree(const struct btrfsic_state *state);
353 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
354                                   const struct btrfsic_block *block,
355                                   int indent_level);
356 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
357                 struct btrfsic_state *state,
358                 struct btrfsic_block_data_ctx *next_block_ctx,
359                 struct btrfsic_block *next_block,
360                 struct btrfsic_block *from_block,
361                 u64 parent_generation);
362 static struct btrfsic_block *btrfsic_block_lookup_or_add(
363                 struct btrfsic_state *state,
364                 struct btrfsic_block_data_ctx *block_ctx,
365                 const char *additional_string,
366                 int is_metadata,
367                 int is_iodone,
368                 int never_written,
369                 int mirror_num,
370                 int *was_created);
371 static int btrfsic_process_superblock_dev_mirror(
372                 struct btrfsic_state *state,
373                 struct btrfsic_dev_state *dev_state,
374                 struct btrfs_device *device,
375                 int superblock_mirror_num,
376                 struct btrfsic_dev_state **selected_dev_state,
377                 struct btrfs_super_block *selected_super);
378 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
379                 struct block_device *bdev);
380 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
381                                            u64 bytenr,
382                                            struct btrfsic_dev_state *dev_state,
383                                            u64 dev_bytenr);
384
385 static struct mutex btrfsic_mutex;
386 static int btrfsic_is_initialized;
387 static struct btrfsic_dev_state_hashtable btrfsic_dev_state_hashtable;
388
389
390 static void btrfsic_block_init(struct btrfsic_block *b)
391 {
392         b->magic_num = BTRFSIC_BLOCK_MAGIC_NUMBER;
393         b->dev_state = NULL;
394         b->dev_bytenr = 0;
395         b->logical_bytenr = 0;
396         b->generation = BTRFSIC_GENERATION_UNKNOWN;
397         b->disk_key.objectid = 0;
398         b->disk_key.type = 0;
399         b->disk_key.offset = 0;
400         b->is_metadata = 0;
401         b->is_superblock = 0;
402         b->is_iodone = 0;
403         b->iodone_w_error = 0;
404         b->never_written = 0;
405         b->mirror_num = 0;
406         b->next_in_same_bio = NULL;
407         b->orig_bio_bh_private = NULL;
408         b->orig_bio_bh_end_io.bio = NULL;
409         INIT_LIST_HEAD(&b->collision_resolving_node);
410         INIT_LIST_HEAD(&b->all_blocks_node);
411         INIT_LIST_HEAD(&b->ref_to_list);
412         INIT_LIST_HEAD(&b->ref_from_list);
413         b->submit_bio_bh_rw = 0;
414         b->flush_gen = 0;
415 }
416
417 static struct btrfsic_block *btrfsic_block_alloc(void)
418 {
419         struct btrfsic_block *b;
420
421         b = kzalloc(sizeof(*b), GFP_NOFS);
422         if (NULL != b)
423                 btrfsic_block_init(b);
424
425         return b;
426 }
427
428 static void btrfsic_block_free(struct btrfsic_block *b)
429 {
430         BUG_ON(!(NULL == b || BTRFSIC_BLOCK_MAGIC_NUMBER == b->magic_num));
431         kfree(b);
432 }
433
434 static void btrfsic_block_link_init(struct btrfsic_block_link *l)
435 {
436         l->magic_num = BTRFSIC_BLOCK_LINK_MAGIC_NUMBER;
437         l->ref_cnt = 1;
438         INIT_LIST_HEAD(&l->node_ref_to);
439         INIT_LIST_HEAD(&l->node_ref_from);
440         INIT_LIST_HEAD(&l->collision_resolving_node);
441         l->block_ref_to = NULL;
442         l->block_ref_from = NULL;
443 }
444
445 static struct btrfsic_block_link *btrfsic_block_link_alloc(void)
446 {
447         struct btrfsic_block_link *l;
448
449         l = kzalloc(sizeof(*l), GFP_NOFS);
450         if (NULL != l)
451                 btrfsic_block_link_init(l);
452
453         return l;
454 }
455
456 static void btrfsic_block_link_free(struct btrfsic_block_link *l)
457 {
458         BUG_ON(!(NULL == l || BTRFSIC_BLOCK_LINK_MAGIC_NUMBER == l->magic_num));
459         kfree(l);
460 }
461
462 static void btrfsic_dev_state_init(struct btrfsic_dev_state *ds)
463 {
464         ds->magic_num = BTRFSIC_DEV2STATE_MAGIC_NUMBER;
465         ds->bdev = NULL;
466         ds->state = NULL;
467         ds->name[0] = '\0';
468         INIT_LIST_HEAD(&ds->collision_resolving_node);
469         ds->last_flush_gen = 0;
470         btrfsic_block_init(&ds->dummy_block_for_bio_bh_flush);
471         ds->dummy_block_for_bio_bh_flush.is_iodone = 1;
472         ds->dummy_block_for_bio_bh_flush.dev_state = ds;
473 }
474
475 static struct btrfsic_dev_state *btrfsic_dev_state_alloc(void)
476 {
477         struct btrfsic_dev_state *ds;
478
479         ds = kzalloc(sizeof(*ds), GFP_NOFS);
480         if (NULL != ds)
481                 btrfsic_dev_state_init(ds);
482
483         return ds;
484 }
485
486 static void btrfsic_dev_state_free(struct btrfsic_dev_state *ds)
487 {
488         BUG_ON(!(NULL == ds ||
489                  BTRFSIC_DEV2STATE_MAGIC_NUMBER == ds->magic_num));
490         kfree(ds);
491 }
492
493 static void btrfsic_block_hashtable_init(struct btrfsic_block_hashtable *h)
494 {
495         int i;
496
497         for (i = 0; i < BTRFSIC_BLOCK_HASHTABLE_SIZE; i++)
498                 INIT_LIST_HEAD(h->table + i);
499 }
500
501 static void btrfsic_block_hashtable_add(struct btrfsic_block *b,
502                                         struct btrfsic_block_hashtable *h)
503 {
504         const unsigned int hashval =
505             (((unsigned int)(b->dev_bytenr >> 16)) ^
506              ((unsigned int)((uintptr_t)b->dev_state->bdev))) &
507              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
508
509         list_add(&b->collision_resolving_node, h->table + hashval);
510 }
511
512 static void btrfsic_block_hashtable_remove(struct btrfsic_block *b)
513 {
514         list_del(&b->collision_resolving_node);
515 }
516
517 static struct btrfsic_block *btrfsic_block_hashtable_lookup(
518                 struct block_device *bdev,
519                 u64 dev_bytenr,
520                 struct btrfsic_block_hashtable *h)
521 {
522         const unsigned int hashval =
523             (((unsigned int)(dev_bytenr >> 16)) ^
524              ((unsigned int)((uintptr_t)bdev))) &
525              (BTRFSIC_BLOCK_HASHTABLE_SIZE - 1);
526         struct list_head *elem;
527
528         list_for_each(elem, h->table + hashval) {
529                 struct btrfsic_block *const b =
530                     list_entry(elem, struct btrfsic_block,
531                                collision_resolving_node);
532
533                 if (b->dev_state->bdev == bdev && b->dev_bytenr == dev_bytenr)
534                         return b;
535         }
536
537         return NULL;
538 }
539
540 static void btrfsic_block_link_hashtable_init(
541                 struct btrfsic_block_link_hashtable *h)
542 {
543         int i;
544
545         for (i = 0; i < BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE; i++)
546                 INIT_LIST_HEAD(h->table + i);
547 }
548
549 static void btrfsic_block_link_hashtable_add(
550                 struct btrfsic_block_link *l,
551                 struct btrfsic_block_link_hashtable *h)
552 {
553         const unsigned int hashval =
554             (((unsigned int)(l->block_ref_to->dev_bytenr >> 16)) ^
555              ((unsigned int)(l->block_ref_from->dev_bytenr >> 16)) ^
556              ((unsigned int)((uintptr_t)l->block_ref_to->dev_state->bdev)) ^
557              ((unsigned int)((uintptr_t)l->block_ref_from->dev_state->bdev)))
558              & (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
559
560         BUG_ON(NULL == l->block_ref_to);
561         BUG_ON(NULL == l->block_ref_from);
562         list_add(&l->collision_resolving_node, h->table + hashval);
563 }
564
565 static void btrfsic_block_link_hashtable_remove(struct btrfsic_block_link *l)
566 {
567         list_del(&l->collision_resolving_node);
568 }
569
570 static struct btrfsic_block_link *btrfsic_block_link_hashtable_lookup(
571                 struct block_device *bdev_ref_to,
572                 u64 dev_bytenr_ref_to,
573                 struct block_device *bdev_ref_from,
574                 u64 dev_bytenr_ref_from,
575                 struct btrfsic_block_link_hashtable *h)
576 {
577         const unsigned int hashval =
578             (((unsigned int)(dev_bytenr_ref_to >> 16)) ^
579              ((unsigned int)(dev_bytenr_ref_from >> 16)) ^
580              ((unsigned int)((uintptr_t)bdev_ref_to)) ^
581              ((unsigned int)((uintptr_t)bdev_ref_from))) &
582              (BTRFSIC_BLOCK_LINK_HASHTABLE_SIZE - 1);
583         struct list_head *elem;
584
585         list_for_each(elem, h->table + hashval) {
586                 struct btrfsic_block_link *const l =
587                     list_entry(elem, struct btrfsic_block_link,
588                                collision_resolving_node);
589
590                 BUG_ON(NULL == l->block_ref_to);
591                 BUG_ON(NULL == l->block_ref_from);
592                 if (l->block_ref_to->dev_state->bdev == bdev_ref_to &&
593                     l->block_ref_to->dev_bytenr == dev_bytenr_ref_to &&
594                     l->block_ref_from->dev_state->bdev == bdev_ref_from &&
595                     l->block_ref_from->dev_bytenr == dev_bytenr_ref_from)
596                         return l;
597         }
598
599         return NULL;
600 }
601
602 static void btrfsic_dev_state_hashtable_init(
603                 struct btrfsic_dev_state_hashtable *h)
604 {
605         int i;
606
607         for (i = 0; i < BTRFSIC_DEV2STATE_HASHTABLE_SIZE; i++)
608                 INIT_LIST_HEAD(h->table + i);
609 }
610
611 static void btrfsic_dev_state_hashtable_add(
612                 struct btrfsic_dev_state *ds,
613                 struct btrfsic_dev_state_hashtable *h)
614 {
615         const unsigned int hashval =
616             (((unsigned int)((uintptr_t)ds->bdev)) &
617              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
618
619         list_add(&ds->collision_resolving_node, h->table + hashval);
620 }
621
622 static void btrfsic_dev_state_hashtable_remove(struct btrfsic_dev_state *ds)
623 {
624         list_del(&ds->collision_resolving_node);
625 }
626
627 static struct btrfsic_dev_state *btrfsic_dev_state_hashtable_lookup(
628                 struct block_device *bdev,
629                 struct btrfsic_dev_state_hashtable *h)
630 {
631         const unsigned int hashval =
632             (((unsigned int)((uintptr_t)bdev)) &
633              (BTRFSIC_DEV2STATE_HASHTABLE_SIZE - 1));
634         struct list_head *elem;
635
636         list_for_each(elem, h->table + hashval) {
637                 struct btrfsic_dev_state *const ds =
638                     list_entry(elem, struct btrfsic_dev_state,
639                                collision_resolving_node);
640
641                 if (ds->bdev == bdev)
642                         return ds;
643         }
644
645         return NULL;
646 }
647
648 static int btrfsic_process_superblock(struct btrfsic_state *state,
649                                       struct btrfs_fs_devices *fs_devices)
650 {
651         int ret = 0;
652         struct btrfs_super_block *selected_super;
653         struct list_head *dev_head = &fs_devices->devices;
654         struct btrfs_device *device;
655         struct btrfsic_dev_state *selected_dev_state = NULL;
656         int pass;
657
658         BUG_ON(NULL == state);
659         selected_super = kzalloc(sizeof(*selected_super), GFP_NOFS);
660         if (NULL == selected_super) {
661                 printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
662                 return -1;
663         }
664
665         list_for_each_entry(device, dev_head, dev_list) {
666                 int i;
667                 struct btrfsic_dev_state *dev_state;
668
669                 if (!device->bdev || !device->name)
670                         continue;
671
672                 dev_state = btrfsic_dev_state_lookup(device->bdev);
673                 BUG_ON(NULL == dev_state);
674                 for (i = 0; i < BTRFS_SUPER_MIRROR_MAX; i++) {
675                         ret = btrfsic_process_superblock_dev_mirror(
676                                         state, dev_state, device, i,
677                                         &selected_dev_state, selected_super);
678                         if (0 != ret && 0 == i) {
679                                 kfree(selected_super);
680                                 return ret;
681                         }
682                 }
683         }
684
685         if (NULL == state->latest_superblock) {
686                 printk(KERN_INFO "btrfsic: no superblock found!\n");
687                 kfree(selected_super);
688                 return -1;
689         }
690
691         state->csum_size = btrfs_super_csum_size(selected_super);
692
693         for (pass = 0; pass < 3; pass++) {
694                 int num_copies;
695                 int mirror_num;
696                 u64 next_bytenr;
697
698                 switch (pass) {
699                 case 0:
700                         next_bytenr = btrfs_super_root(selected_super);
701                         if (state->print_mask &
702                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
703                                 printk(KERN_INFO "root@%llu\n",
704                                        (unsigned long long)next_bytenr);
705                         break;
706                 case 1:
707                         next_bytenr = btrfs_super_chunk_root(selected_super);
708                         if (state->print_mask &
709                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
710                                 printk(KERN_INFO "chunk@%llu\n",
711                                        (unsigned long long)next_bytenr);
712                         break;
713                 case 2:
714                         next_bytenr = btrfs_super_log_root(selected_super);
715                         if (0 == next_bytenr)
716                                 continue;
717                         if (state->print_mask &
718                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
719                                 printk(KERN_INFO "log@%llu\n",
720                                        (unsigned long long)next_bytenr);
721                         break;
722                 }
723
724                 num_copies =
725                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
726                                      next_bytenr, state->metablock_size);
727                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
728                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
729                                (unsigned long long)next_bytenr, num_copies);
730
731                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
732                         struct btrfsic_block *next_block;
733                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
734                         struct btrfsic_block_link *l;
735
736                         ret = btrfsic_map_block(state, next_bytenr,
737                                                 state->metablock_size,
738                                                 &tmp_next_block_ctx,
739                                                 mirror_num);
740                         if (ret) {
741                                 printk(KERN_INFO "btrfsic:"
742                                        " btrfsic_map_block(root @%llu,"
743                                        " mirror %d) failed!\n",
744                                        (unsigned long long)next_bytenr,
745                                        mirror_num);
746                                 kfree(selected_super);
747                                 return -1;
748                         }
749
750                         next_block = btrfsic_block_hashtable_lookup(
751                                         tmp_next_block_ctx.dev->bdev,
752                                         tmp_next_block_ctx.dev_bytenr,
753                                         &state->block_hashtable);
754                         BUG_ON(NULL == next_block);
755
756                         l = btrfsic_block_link_hashtable_lookup(
757                                         tmp_next_block_ctx.dev->bdev,
758                                         tmp_next_block_ctx.dev_bytenr,
759                                         state->latest_superblock->dev_state->
760                                         bdev,
761                                         state->latest_superblock->dev_bytenr,
762                                         &state->block_link_hashtable);
763                         BUG_ON(NULL == l);
764
765                         ret = btrfsic_read_block(state, &tmp_next_block_ctx);
766                         if (ret < (int)PAGE_CACHE_SIZE) {
767                                 printk(KERN_INFO
768                                        "btrfsic: read @logical %llu failed!\n",
769                                        (unsigned long long)
770                                        tmp_next_block_ctx.start);
771                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
772                                 kfree(selected_super);
773                                 return -1;
774                         }
775
776                         ret = btrfsic_process_metablock(state,
777                                                         next_block,
778                                                         &tmp_next_block_ctx,
779                                                         BTRFS_MAX_LEVEL + 3, 1);
780                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
781                 }
782         }
783
784         kfree(selected_super);
785         return ret;
786 }
787
788 static int btrfsic_process_superblock_dev_mirror(
789                 struct btrfsic_state *state,
790                 struct btrfsic_dev_state *dev_state,
791                 struct btrfs_device *device,
792                 int superblock_mirror_num,
793                 struct btrfsic_dev_state **selected_dev_state,
794                 struct btrfs_super_block *selected_super)
795 {
796         struct btrfs_super_block *super_tmp;
797         u64 dev_bytenr;
798         struct buffer_head *bh;
799         struct btrfsic_block *superblock_tmp;
800         int pass;
801         struct block_device *const superblock_bdev = device->bdev;
802
803         /* super block bytenr is always the unmapped device bytenr */
804         dev_bytenr = btrfs_sb_offset(superblock_mirror_num);
805         if (dev_bytenr + BTRFS_SUPER_INFO_SIZE > device->total_bytes)
806                 return -1;
807         bh = __bread(superblock_bdev, dev_bytenr / 4096,
808                      BTRFS_SUPER_INFO_SIZE);
809         if (NULL == bh)
810                 return -1;
811         super_tmp = (struct btrfs_super_block *)
812             (bh->b_data + (dev_bytenr & 4095));
813
814         if (btrfs_super_bytenr(super_tmp) != dev_bytenr ||
815             strncmp((char *)(&(super_tmp->magic)), BTRFS_MAGIC,
816                     sizeof(super_tmp->magic)) ||
817             memcmp(device->uuid, super_tmp->dev_item.uuid, BTRFS_UUID_SIZE) ||
818             btrfs_super_nodesize(super_tmp) != state->metablock_size ||
819             btrfs_super_leafsize(super_tmp) != state->metablock_size ||
820             btrfs_super_sectorsize(super_tmp) != state->datablock_size) {
821                 brelse(bh);
822                 return 0;
823         }
824
825         superblock_tmp =
826             btrfsic_block_hashtable_lookup(superblock_bdev,
827                                            dev_bytenr,
828                                            &state->block_hashtable);
829         if (NULL == superblock_tmp) {
830                 superblock_tmp = btrfsic_block_alloc();
831                 if (NULL == superblock_tmp) {
832                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
833                         brelse(bh);
834                         return -1;
835                 }
836                 /* for superblock, only the dev_bytenr makes sense */
837                 superblock_tmp->dev_bytenr = dev_bytenr;
838                 superblock_tmp->dev_state = dev_state;
839                 superblock_tmp->logical_bytenr = dev_bytenr;
840                 superblock_tmp->generation = btrfs_super_generation(super_tmp);
841                 superblock_tmp->is_metadata = 1;
842                 superblock_tmp->is_superblock = 1;
843                 superblock_tmp->is_iodone = 1;
844                 superblock_tmp->never_written = 0;
845                 superblock_tmp->mirror_num = 1 + superblock_mirror_num;
846                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
847                         printk_in_rcu(KERN_INFO "New initial S-block (bdev %p, %s)"
848                                      " @%llu (%s/%llu/%d)\n",
849                                      superblock_bdev,
850                                      rcu_str_deref(device->name),
851                                      (unsigned long long)dev_bytenr,
852                                      dev_state->name,
853                                      (unsigned long long)dev_bytenr,
854                                      superblock_mirror_num);
855                 list_add(&superblock_tmp->all_blocks_node,
856                          &state->all_blocks_list);
857                 btrfsic_block_hashtable_add(superblock_tmp,
858                                             &state->block_hashtable);
859         }
860
861         /* select the one with the highest generation field */
862         if (btrfs_super_generation(super_tmp) >
863             state->max_superblock_generation ||
864             0 == state->max_superblock_generation) {
865                 memcpy(selected_super, super_tmp, sizeof(*selected_super));
866                 *selected_dev_state = dev_state;
867                 state->max_superblock_generation =
868                     btrfs_super_generation(super_tmp);
869                 state->latest_superblock = superblock_tmp;
870         }
871
872         for (pass = 0; pass < 3; pass++) {
873                 u64 next_bytenr;
874                 int num_copies;
875                 int mirror_num;
876                 const char *additional_string = NULL;
877                 struct btrfs_disk_key tmp_disk_key;
878
879                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
880                 tmp_disk_key.offset = 0;
881                 switch (pass) {
882                 case 0:
883                         tmp_disk_key.objectid =
884                             cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
885                         additional_string = "initial root ";
886                         next_bytenr = btrfs_super_root(super_tmp);
887                         break;
888                 case 1:
889                         tmp_disk_key.objectid =
890                             cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
891                         additional_string = "initial chunk ";
892                         next_bytenr = btrfs_super_chunk_root(super_tmp);
893                         break;
894                 case 2:
895                         tmp_disk_key.objectid =
896                             cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
897                         additional_string = "initial log ";
898                         next_bytenr = btrfs_super_log_root(super_tmp);
899                         if (0 == next_bytenr)
900                                 continue;
901                         break;
902                 }
903
904                 num_copies =
905                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
906                                      next_bytenr, state->metablock_size);
907                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
908                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
909                                (unsigned long long)next_bytenr, num_copies);
910                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
911                         struct btrfsic_block *next_block;
912                         struct btrfsic_block_data_ctx tmp_next_block_ctx;
913                         struct btrfsic_block_link *l;
914
915                         if (btrfsic_map_block(state, next_bytenr,
916                                               state->metablock_size,
917                                               &tmp_next_block_ctx,
918                                               mirror_num)) {
919                                 printk(KERN_INFO "btrfsic: btrfsic_map_block("
920                                        "bytenr @%llu, mirror %d) failed!\n",
921                                        (unsigned long long)next_bytenr,
922                                        mirror_num);
923                                 brelse(bh);
924                                 return -1;
925                         }
926
927                         next_block = btrfsic_block_lookup_or_add(
928                                         state, &tmp_next_block_ctx,
929                                         additional_string, 1, 1, 0,
930                                         mirror_num, NULL);
931                         if (NULL == next_block) {
932                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
933                                 brelse(bh);
934                                 return -1;
935                         }
936
937                         next_block->disk_key = tmp_disk_key;
938                         next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
939                         l = btrfsic_block_link_lookup_or_add(
940                                         state, &tmp_next_block_ctx,
941                                         next_block, superblock_tmp,
942                                         BTRFSIC_GENERATION_UNKNOWN);
943                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
944                         if (NULL == l) {
945                                 brelse(bh);
946                                 return -1;
947                         }
948                 }
949         }
950         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_ALL_TREES)
951                 btrfsic_dump_tree_sub(state, superblock_tmp, 0);
952
953         brelse(bh);
954         return 0;
955 }
956
957 static struct btrfsic_stack_frame *btrfsic_stack_frame_alloc(void)
958 {
959         struct btrfsic_stack_frame *sf;
960
961         sf = kzalloc(sizeof(*sf), GFP_NOFS);
962         if (NULL == sf)
963                 printk(KERN_INFO "btrfsic: alloc memory failed!\n");
964         else
965                 sf->magic = BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER;
966         return sf;
967 }
968
969 static void btrfsic_stack_frame_free(struct btrfsic_stack_frame *sf)
970 {
971         BUG_ON(!(NULL == sf ||
972                  BTRFSIC_BLOCK_STACK_FRAME_MAGIC_NUMBER == sf->magic));
973         kfree(sf);
974 }
975
976 static int btrfsic_process_metablock(
977                 struct btrfsic_state *state,
978                 struct btrfsic_block *const first_block,
979                 struct btrfsic_block_data_ctx *const first_block_ctx,
980                 int first_limit_nesting, int force_iodone_flag)
981 {
982         struct btrfsic_stack_frame initial_stack_frame = { 0 };
983         struct btrfsic_stack_frame *sf;
984         struct btrfsic_stack_frame *next_stack;
985         struct btrfs_header *const first_hdr =
986                 (struct btrfs_header *)first_block_ctx->datav[0];
987
988         BUG_ON(!first_hdr);
989         sf = &initial_stack_frame;
990         sf->error = 0;
991         sf->i = -1;
992         sf->limit_nesting = first_limit_nesting;
993         sf->block = first_block;
994         sf->block_ctx = first_block_ctx;
995         sf->next_block = NULL;
996         sf->hdr = first_hdr;
997         sf->prev = NULL;
998
999 continue_with_new_stack_frame:
1000         sf->block->generation = le64_to_cpu(sf->hdr->generation);
1001         if (0 == sf->hdr->level) {
1002                 struct btrfs_leaf *const leafhdr =
1003                     (struct btrfs_leaf *)sf->hdr;
1004
1005                 if (-1 == sf->i) {
1006                         sf->nr = le32_to_cpu(leafhdr->header.nritems);
1007
1008                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1009                                 printk(KERN_INFO
1010                                        "leaf %llu items %d generation %llu"
1011                                        " owner %llu\n",
1012                                        (unsigned long long)
1013                                        sf->block_ctx->start,
1014                                        sf->nr,
1015                                        (unsigned long long)
1016                                        le64_to_cpu(leafhdr->header.generation),
1017                                        (unsigned long long)
1018                                        le64_to_cpu(leafhdr->header.owner));
1019                 }
1020
1021 continue_with_current_leaf_stack_frame:
1022                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1023                         sf->i++;
1024                         sf->num_copies = 0;
1025                 }
1026
1027                 if (sf->i < sf->nr) {
1028                         struct btrfs_item disk_item;
1029                         u32 disk_item_offset =
1030                                 (uintptr_t)(leafhdr->items + sf->i) -
1031                                 (uintptr_t)leafhdr;
1032                         struct btrfs_disk_key *disk_key;
1033                         u8 type;
1034                         u32 item_offset;
1035
1036                         if (disk_item_offset + sizeof(struct btrfs_item) >
1037                             sf->block_ctx->len) {
1038 leaf_item_out_of_bounce_error:
1039                                 printk(KERN_INFO
1040                                        "btrfsic: leaf item out of bounce at logical %llu, dev %s\n",
1041                                        sf->block_ctx->start,
1042                                        sf->block_ctx->dev->name);
1043                                 goto one_stack_frame_backwards;
1044                         }
1045                         btrfsic_read_from_block_data(sf->block_ctx,
1046                                                      &disk_item,
1047                                                      disk_item_offset,
1048                                                      sizeof(struct btrfs_item));
1049                         item_offset = le32_to_cpu(disk_item.offset);
1050                         disk_key = &disk_item.key;
1051                         type = disk_key->type;
1052
1053                         if (BTRFS_ROOT_ITEM_KEY == type) {
1054                                 struct btrfs_root_item root_item;
1055                                 u32 root_item_offset;
1056                                 u64 next_bytenr;
1057
1058                                 root_item_offset = item_offset +
1059                                         offsetof(struct btrfs_leaf, items);
1060                                 if (root_item_offset +
1061                                     sizeof(struct btrfs_root_item) >
1062                                     sf->block_ctx->len)
1063                                         goto leaf_item_out_of_bounce_error;
1064                                 btrfsic_read_from_block_data(
1065                                         sf->block_ctx, &root_item,
1066                                         root_item_offset,
1067                                         sizeof(struct btrfs_root_item));
1068                                 next_bytenr = le64_to_cpu(root_item.bytenr);
1069
1070                                 sf->error =
1071                                     btrfsic_create_link_to_next_block(
1072                                                 state,
1073                                                 sf->block,
1074                                                 sf->block_ctx,
1075                                                 next_bytenr,
1076                                                 sf->limit_nesting,
1077                                                 &sf->next_block_ctx,
1078                                                 &sf->next_block,
1079                                                 force_iodone_flag,
1080                                                 &sf->num_copies,
1081                                                 &sf->mirror_num,
1082                                                 disk_key,
1083                                                 le64_to_cpu(root_item.
1084                                                 generation));
1085                                 if (sf->error)
1086                                         goto one_stack_frame_backwards;
1087
1088                                 if (NULL != sf->next_block) {
1089                                         struct btrfs_header *const next_hdr =
1090                                             (struct btrfs_header *)
1091                                             sf->next_block_ctx.datav[0];
1092
1093                                         next_stack =
1094                                             btrfsic_stack_frame_alloc();
1095                                         if (NULL == next_stack) {
1096                                                 btrfsic_release_block_ctx(
1097                                                                 &sf->
1098                                                                 next_block_ctx);
1099                                                 goto one_stack_frame_backwards;
1100                                         }
1101
1102                                         next_stack->i = -1;
1103                                         next_stack->block = sf->next_block;
1104                                         next_stack->block_ctx =
1105                                             &sf->next_block_ctx;
1106                                         next_stack->next_block = NULL;
1107                                         next_stack->hdr = next_hdr;
1108                                         next_stack->limit_nesting =
1109                                             sf->limit_nesting - 1;
1110                                         next_stack->prev = sf;
1111                                         sf = next_stack;
1112                                         goto continue_with_new_stack_frame;
1113                                 }
1114                         } else if (BTRFS_EXTENT_DATA_KEY == type &&
1115                                    state->include_extent_data) {
1116                                 sf->error = btrfsic_handle_extent_data(
1117                                                 state,
1118                                                 sf->block,
1119                                                 sf->block_ctx,
1120                                                 item_offset,
1121                                                 force_iodone_flag);
1122                                 if (sf->error)
1123                                         goto one_stack_frame_backwards;
1124                         }
1125
1126                         goto continue_with_current_leaf_stack_frame;
1127                 }
1128         } else {
1129                 struct btrfs_node *const nodehdr = (struct btrfs_node *)sf->hdr;
1130
1131                 if (-1 == sf->i) {
1132                         sf->nr = le32_to_cpu(nodehdr->header.nritems);
1133
1134                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1135                                 printk(KERN_INFO "node %llu level %d items %d"
1136                                        " generation %llu owner %llu\n",
1137                                        (unsigned long long)
1138                                        sf->block_ctx->start,
1139                                        nodehdr->header.level, sf->nr,
1140                                        (unsigned long long)
1141                                        le64_to_cpu(nodehdr->header.generation),
1142                                        (unsigned long long)
1143                                        le64_to_cpu(nodehdr->header.owner));
1144                 }
1145
1146 continue_with_current_node_stack_frame:
1147                 if (0 == sf->num_copies || sf->mirror_num > sf->num_copies) {
1148                         sf->i++;
1149                         sf->num_copies = 0;
1150                 }
1151
1152                 if (sf->i < sf->nr) {
1153                         struct btrfs_key_ptr key_ptr;
1154                         u32 key_ptr_offset;
1155                         u64 next_bytenr;
1156
1157                         key_ptr_offset = (uintptr_t)(nodehdr->ptrs + sf->i) -
1158                                           (uintptr_t)nodehdr;
1159                         if (key_ptr_offset + sizeof(struct btrfs_key_ptr) >
1160                             sf->block_ctx->len) {
1161                                 printk(KERN_INFO
1162                                        "btrfsic: node item out of bounce at logical %llu, dev %s\n",
1163                                        sf->block_ctx->start,
1164                                        sf->block_ctx->dev->name);
1165                                 goto one_stack_frame_backwards;
1166                         }
1167                         btrfsic_read_from_block_data(
1168                                 sf->block_ctx, &key_ptr, key_ptr_offset,
1169                                 sizeof(struct btrfs_key_ptr));
1170                         next_bytenr = le64_to_cpu(key_ptr.blockptr);
1171
1172                         sf->error = btrfsic_create_link_to_next_block(
1173                                         state,
1174                                         sf->block,
1175                                         sf->block_ctx,
1176                                         next_bytenr,
1177                                         sf->limit_nesting,
1178                                         &sf->next_block_ctx,
1179                                         &sf->next_block,
1180                                         force_iodone_flag,
1181                                         &sf->num_copies,
1182                                         &sf->mirror_num,
1183                                         &key_ptr.key,
1184                                         le64_to_cpu(key_ptr.generation));
1185                         if (sf->error)
1186                                 goto one_stack_frame_backwards;
1187
1188                         if (NULL != sf->next_block) {
1189                                 struct btrfs_header *const next_hdr =
1190                                     (struct btrfs_header *)
1191                                     sf->next_block_ctx.datav[0];
1192
1193                                 next_stack = btrfsic_stack_frame_alloc();
1194                                 if (NULL == next_stack)
1195                                         goto one_stack_frame_backwards;
1196
1197                                 next_stack->i = -1;
1198                                 next_stack->block = sf->next_block;
1199                                 next_stack->block_ctx = &sf->next_block_ctx;
1200                                 next_stack->next_block = NULL;
1201                                 next_stack->hdr = next_hdr;
1202                                 next_stack->limit_nesting =
1203                                     sf->limit_nesting - 1;
1204                                 next_stack->prev = sf;
1205                                 sf = next_stack;
1206                                 goto continue_with_new_stack_frame;
1207                         }
1208
1209                         goto continue_with_current_node_stack_frame;
1210                 }
1211         }
1212
1213 one_stack_frame_backwards:
1214         if (NULL != sf->prev) {
1215                 struct btrfsic_stack_frame *const prev = sf->prev;
1216
1217                 /* the one for the initial block is freed in the caller */
1218                 btrfsic_release_block_ctx(sf->block_ctx);
1219
1220                 if (sf->error) {
1221                         prev->error = sf->error;
1222                         btrfsic_stack_frame_free(sf);
1223                         sf = prev;
1224                         goto one_stack_frame_backwards;
1225                 }
1226
1227                 btrfsic_stack_frame_free(sf);
1228                 sf = prev;
1229                 goto continue_with_new_stack_frame;
1230         } else {
1231                 BUG_ON(&initial_stack_frame != sf);
1232         }
1233
1234         return sf->error;
1235 }
1236
1237 static void btrfsic_read_from_block_data(
1238         struct btrfsic_block_data_ctx *block_ctx,
1239         void *dstv, u32 offset, size_t len)
1240 {
1241         size_t cur;
1242         size_t offset_in_page;
1243         char *kaddr;
1244         char *dst = (char *)dstv;
1245         size_t start_offset = block_ctx->start & ((u64)PAGE_CACHE_SIZE - 1);
1246         unsigned long i = (start_offset + offset) >> PAGE_CACHE_SHIFT;
1247
1248         WARN_ON(offset + len > block_ctx->len);
1249         offset_in_page = (start_offset + offset) &
1250                          ((unsigned long)PAGE_CACHE_SIZE - 1);
1251
1252         while (len > 0) {
1253                 cur = min(len, ((size_t)PAGE_CACHE_SIZE - offset_in_page));
1254                 BUG_ON(i >= (block_ctx->len + PAGE_CACHE_SIZE - 1) >>
1255                             PAGE_CACHE_SHIFT);
1256                 kaddr = block_ctx->datav[i];
1257                 memcpy(dst, kaddr + offset_in_page, cur);
1258
1259                 dst += cur;
1260                 len -= cur;
1261                 offset_in_page = 0;
1262                 i++;
1263         }
1264 }
1265
1266 static int btrfsic_create_link_to_next_block(
1267                 struct btrfsic_state *state,
1268                 struct btrfsic_block *block,
1269                 struct btrfsic_block_data_ctx *block_ctx,
1270                 u64 next_bytenr,
1271                 int limit_nesting,
1272                 struct btrfsic_block_data_ctx *next_block_ctx,
1273                 struct btrfsic_block **next_blockp,
1274                 int force_iodone_flag,
1275                 int *num_copiesp, int *mirror_nump,
1276                 struct btrfs_disk_key *disk_key,
1277                 u64 parent_generation)
1278 {
1279         struct btrfsic_block *next_block = NULL;
1280         int ret;
1281         struct btrfsic_block_link *l;
1282         int did_alloc_block_link;
1283         int block_was_created;
1284
1285         *next_blockp = NULL;
1286         if (0 == *num_copiesp) {
1287                 *num_copiesp =
1288                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
1289                                      next_bytenr, state->metablock_size);
1290                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1291                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1292                                (unsigned long long)next_bytenr, *num_copiesp);
1293                 *mirror_nump = 1;
1294         }
1295
1296         if (*mirror_nump > *num_copiesp)
1297                 return 0;
1298
1299         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1300                 printk(KERN_INFO
1301                        "btrfsic_create_link_to_next_block(mirror_num=%d)\n",
1302                        *mirror_nump);
1303         ret = btrfsic_map_block(state, next_bytenr,
1304                                 state->metablock_size,
1305                                 next_block_ctx, *mirror_nump);
1306         if (ret) {
1307                 printk(KERN_INFO
1308                        "btrfsic: btrfsic_map_block(@%llu, mirror=%d) failed!\n",
1309                        (unsigned long long)next_bytenr, *mirror_nump);
1310                 btrfsic_release_block_ctx(next_block_ctx);
1311                 *next_blockp = NULL;
1312                 return -1;
1313         }
1314
1315         next_block = btrfsic_block_lookup_or_add(state,
1316                                                  next_block_ctx, "referenced ",
1317                                                  1, force_iodone_flag,
1318                                                  !force_iodone_flag,
1319                                                  *mirror_nump,
1320                                                  &block_was_created);
1321         if (NULL == next_block) {
1322                 btrfsic_release_block_ctx(next_block_ctx);
1323                 *next_blockp = NULL;
1324                 return -1;
1325         }
1326         if (block_was_created) {
1327                 l = NULL;
1328                 next_block->generation = BTRFSIC_GENERATION_UNKNOWN;
1329         } else {
1330                 if (next_block->logical_bytenr != next_bytenr &&
1331                     !(!next_block->is_metadata &&
1332                       0 == next_block->logical_bytenr)) {
1333                         printk(KERN_INFO
1334                                "Referenced block @%llu (%s/%llu/%d)"
1335                                " found in hash table, %c,"
1336                                " bytenr mismatch (!= stored %llu).\n",
1337                                (unsigned long long)next_bytenr,
1338                                next_block_ctx->dev->name,
1339                                (unsigned long long)next_block_ctx->dev_bytenr,
1340                                *mirror_nump,
1341                                btrfsic_get_block_type(state, next_block),
1342                                (unsigned long long)next_block->logical_bytenr);
1343                 } else if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1344                         printk(KERN_INFO
1345                                "Referenced block @%llu (%s/%llu/%d)"
1346                                " found in hash table, %c.\n",
1347                                (unsigned long long)next_bytenr,
1348                                next_block_ctx->dev->name,
1349                                (unsigned long long)next_block_ctx->dev_bytenr,
1350                                *mirror_nump,
1351                                btrfsic_get_block_type(state, next_block));
1352                 next_block->logical_bytenr = next_bytenr;
1353
1354                 next_block->mirror_num = *mirror_nump;
1355                 l = btrfsic_block_link_hashtable_lookup(
1356                                 next_block_ctx->dev->bdev,
1357                                 next_block_ctx->dev_bytenr,
1358                                 block_ctx->dev->bdev,
1359                                 block_ctx->dev_bytenr,
1360                                 &state->block_link_hashtable);
1361         }
1362
1363         next_block->disk_key = *disk_key;
1364         if (NULL == l) {
1365                 l = btrfsic_block_link_alloc();
1366                 if (NULL == l) {
1367                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
1368                         btrfsic_release_block_ctx(next_block_ctx);
1369                         *next_blockp = NULL;
1370                         return -1;
1371                 }
1372
1373                 did_alloc_block_link = 1;
1374                 l->block_ref_to = next_block;
1375                 l->block_ref_from = block;
1376                 l->ref_cnt = 1;
1377                 l->parent_generation = parent_generation;
1378
1379                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1380                         btrfsic_print_add_link(state, l);
1381
1382                 list_add(&l->node_ref_to, &block->ref_to_list);
1383                 list_add(&l->node_ref_from, &next_block->ref_from_list);
1384
1385                 btrfsic_block_link_hashtable_add(l,
1386                                                  &state->block_link_hashtable);
1387         } else {
1388                 did_alloc_block_link = 0;
1389                 if (0 == limit_nesting) {
1390                         l->ref_cnt++;
1391                         l->parent_generation = parent_generation;
1392                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1393                                 btrfsic_print_add_link(state, l);
1394                 }
1395         }
1396
1397         if (limit_nesting > 0 && did_alloc_block_link) {
1398                 ret = btrfsic_read_block(state, next_block_ctx);
1399                 if (ret < (int)next_block_ctx->len) {
1400                         printk(KERN_INFO
1401                                "btrfsic: read block @logical %llu failed!\n",
1402                                (unsigned long long)next_bytenr);
1403                         btrfsic_release_block_ctx(next_block_ctx);
1404                         *next_blockp = NULL;
1405                         return -1;
1406                 }
1407
1408                 *next_blockp = next_block;
1409         } else {
1410                 *next_blockp = NULL;
1411         }
1412         (*mirror_nump)++;
1413
1414         return 0;
1415 }
1416
1417 static int btrfsic_handle_extent_data(
1418                 struct btrfsic_state *state,
1419                 struct btrfsic_block *block,
1420                 struct btrfsic_block_data_ctx *block_ctx,
1421                 u32 item_offset, int force_iodone_flag)
1422 {
1423         int ret;
1424         struct btrfs_file_extent_item file_extent_item;
1425         u64 file_extent_item_offset;
1426         u64 next_bytenr;
1427         u64 num_bytes;
1428         u64 generation;
1429         struct btrfsic_block_link *l;
1430
1431         file_extent_item_offset = offsetof(struct btrfs_leaf, items) +
1432                                   item_offset;
1433         if (file_extent_item_offset +
1434             offsetof(struct btrfs_file_extent_item, disk_num_bytes) >
1435             block_ctx->len) {
1436                 printk(KERN_INFO
1437                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1438                        block_ctx->start, block_ctx->dev->name);
1439                 return -1;
1440         }
1441
1442         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1443                 file_extent_item_offset,
1444                 offsetof(struct btrfs_file_extent_item, disk_num_bytes));
1445         if (BTRFS_FILE_EXTENT_REG != file_extent_item.type ||
1446             ((u64)0) == le64_to_cpu(file_extent_item.disk_bytenr)) {
1447                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1448                         printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu\n",
1449                                file_extent_item.type,
1450                                (unsigned long long)
1451                                le64_to_cpu(file_extent_item.disk_bytenr));
1452                 return 0;
1453         }
1454
1455         if (file_extent_item_offset + sizeof(struct btrfs_file_extent_item) >
1456             block_ctx->len) {
1457                 printk(KERN_INFO
1458                        "btrfsic: file item out of bounce at logical %llu, dev %s\n",
1459                        block_ctx->start, block_ctx->dev->name);
1460                 return -1;
1461         }
1462         btrfsic_read_from_block_data(block_ctx, &file_extent_item,
1463                                      file_extent_item_offset,
1464                                      sizeof(struct btrfs_file_extent_item));
1465         next_bytenr = le64_to_cpu(file_extent_item.disk_bytenr) +
1466                       le64_to_cpu(file_extent_item.offset);
1467         generation = le64_to_cpu(file_extent_item.generation);
1468         num_bytes = le64_to_cpu(file_extent_item.num_bytes);
1469         generation = le64_to_cpu(file_extent_item.generation);
1470
1471         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1472                 printk(KERN_INFO "extent_data: type %u, disk_bytenr = %llu,"
1473                        " offset = %llu, num_bytes = %llu\n",
1474                        file_extent_item.type,
1475                        (unsigned long long)
1476                        le64_to_cpu(file_extent_item.disk_bytenr),
1477                        (unsigned long long)le64_to_cpu(file_extent_item.offset),
1478                        (unsigned long long)num_bytes);
1479         while (num_bytes > 0) {
1480                 u32 chunk_len;
1481                 int num_copies;
1482                 int mirror_num;
1483
1484                 if (num_bytes > state->datablock_size)
1485                         chunk_len = state->datablock_size;
1486                 else
1487                         chunk_len = num_bytes;
1488
1489                 num_copies =
1490                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
1491                                      next_bytenr, state->datablock_size);
1492                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
1493                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
1494                                (unsigned long long)next_bytenr, num_copies);
1495                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
1496                         struct btrfsic_block_data_ctx next_block_ctx;
1497                         struct btrfsic_block *next_block;
1498                         int block_was_created;
1499
1500                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1501                                 printk(KERN_INFO "btrfsic_handle_extent_data("
1502                                        "mirror_num=%d)\n", mirror_num);
1503                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERY_VERBOSE)
1504                                 printk(KERN_INFO
1505                                        "\tdisk_bytenr = %llu, num_bytes %u\n",
1506                                        (unsigned long long)next_bytenr,
1507                                        chunk_len);
1508                         ret = btrfsic_map_block(state, next_bytenr,
1509                                                 chunk_len, &next_block_ctx,
1510                                                 mirror_num);
1511                         if (ret) {
1512                                 printk(KERN_INFO
1513                                        "btrfsic: btrfsic_map_block(@%llu,"
1514                                        " mirror=%d) failed!\n",
1515                                        (unsigned long long)next_bytenr,
1516                                        mirror_num);
1517                                 return -1;
1518                         }
1519
1520                         next_block = btrfsic_block_lookup_or_add(
1521                                         state,
1522                                         &next_block_ctx,
1523                                         "referenced ",
1524                                         0,
1525                                         force_iodone_flag,
1526                                         !force_iodone_flag,
1527                                         mirror_num,
1528                                         &block_was_created);
1529                         if (NULL == next_block) {
1530                                 printk(KERN_INFO
1531                                        "btrfsic: error, kmalloc failed!\n");
1532                                 btrfsic_release_block_ctx(&next_block_ctx);
1533                                 return -1;
1534                         }
1535                         if (!block_was_created) {
1536                                 if (next_block->logical_bytenr != next_bytenr &&
1537                                     !(!next_block->is_metadata &&
1538                                       0 == next_block->logical_bytenr)) {
1539                                         printk(KERN_INFO
1540                                                "Referenced block"
1541                                                " @%llu (%s/%llu/%d)"
1542                                                " found in hash table, D,"
1543                                                " bytenr mismatch"
1544                                                " (!= stored %llu).\n",
1545                                                (unsigned long long)next_bytenr,
1546                                                next_block_ctx.dev->name,
1547                                                (unsigned long long)
1548                                                next_block_ctx.dev_bytenr,
1549                                                mirror_num,
1550                                                (unsigned long long)
1551                                                next_block->logical_bytenr);
1552                                 }
1553                                 next_block->logical_bytenr = next_bytenr;
1554                                 next_block->mirror_num = mirror_num;
1555                         }
1556
1557                         l = btrfsic_block_link_lookup_or_add(state,
1558                                                              &next_block_ctx,
1559                                                              next_block, block,
1560                                                              generation);
1561                         btrfsic_release_block_ctx(&next_block_ctx);
1562                         if (NULL == l)
1563                                 return -1;
1564                 }
1565
1566                 next_bytenr += chunk_len;
1567                 num_bytes -= chunk_len;
1568         }
1569
1570         return 0;
1571 }
1572
1573 static int btrfsic_map_block(struct btrfsic_state *state, u64 bytenr, u32 len,
1574                              struct btrfsic_block_data_ctx *block_ctx_out,
1575                              int mirror_num)
1576 {
1577         int ret;
1578         u64 length;
1579         struct btrfs_bio *multi = NULL;
1580         struct btrfs_device *device;
1581
1582         length = len;
1583         ret = btrfs_map_block(&state->root->fs_info->mapping_tree, READ,
1584                               bytenr, &length, &multi, mirror_num);
1585
1586         device = multi->stripes[0].dev;
1587         block_ctx_out->dev = btrfsic_dev_state_lookup(device->bdev);
1588         block_ctx_out->dev_bytenr = multi->stripes[0].physical;
1589         block_ctx_out->start = bytenr;
1590         block_ctx_out->len = len;
1591         block_ctx_out->datav = NULL;
1592         block_ctx_out->pagev = NULL;
1593         block_ctx_out->mem_to_free = NULL;
1594
1595         if (0 == ret)
1596                 kfree(multi);
1597         if (NULL == block_ctx_out->dev) {
1598                 ret = -ENXIO;
1599                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#1)!\n");
1600         }
1601
1602         return ret;
1603 }
1604
1605 static int btrfsic_map_superblock(struct btrfsic_state *state, u64 bytenr,
1606                                   u32 len, struct block_device *bdev,
1607                                   struct btrfsic_block_data_ctx *block_ctx_out)
1608 {
1609         block_ctx_out->dev = btrfsic_dev_state_lookup(bdev);
1610         block_ctx_out->dev_bytenr = bytenr;
1611         block_ctx_out->start = bytenr;
1612         block_ctx_out->len = len;
1613         block_ctx_out->datav = NULL;
1614         block_ctx_out->pagev = NULL;
1615         block_ctx_out->mem_to_free = NULL;
1616         if (NULL != block_ctx_out->dev) {
1617                 return 0;
1618         } else {
1619                 printk(KERN_INFO "btrfsic: error, cannot lookup dev (#2)!\n");
1620                 return -ENXIO;
1621         }
1622 }
1623
1624 static void btrfsic_release_block_ctx(struct btrfsic_block_data_ctx *block_ctx)
1625 {
1626         if (block_ctx->mem_to_free) {
1627                 unsigned int num_pages;
1628
1629                 BUG_ON(!block_ctx->datav);
1630                 BUG_ON(!block_ctx->pagev);
1631                 num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1632                             PAGE_CACHE_SHIFT;
1633                 while (num_pages > 0) {
1634                         num_pages--;
1635                         if (block_ctx->datav[num_pages]) {
1636                                 kunmap(block_ctx->pagev[num_pages]);
1637                                 block_ctx->datav[num_pages] = NULL;
1638                         }
1639                         if (block_ctx->pagev[num_pages]) {
1640                                 __free_page(block_ctx->pagev[num_pages]);
1641                                 block_ctx->pagev[num_pages] = NULL;
1642                         }
1643                 }
1644
1645                 kfree(block_ctx->mem_to_free);
1646                 block_ctx->mem_to_free = NULL;
1647                 block_ctx->pagev = NULL;
1648                 block_ctx->datav = NULL;
1649         }
1650 }
1651
1652 static int btrfsic_read_block(struct btrfsic_state *state,
1653                               struct btrfsic_block_data_ctx *block_ctx)
1654 {
1655         unsigned int num_pages;
1656         unsigned int i;
1657         u64 dev_bytenr;
1658         int ret;
1659
1660         BUG_ON(block_ctx->datav);
1661         BUG_ON(block_ctx->pagev);
1662         BUG_ON(block_ctx->mem_to_free);
1663         if (block_ctx->dev_bytenr & ((u64)PAGE_CACHE_SIZE - 1)) {
1664                 printk(KERN_INFO
1665                        "btrfsic: read_block() with unaligned bytenr %llu\n",
1666                        (unsigned long long)block_ctx->dev_bytenr);
1667                 return -1;
1668         }
1669
1670         num_pages = (block_ctx->len + (u64)PAGE_CACHE_SIZE - 1) >>
1671                     PAGE_CACHE_SHIFT;
1672         block_ctx->mem_to_free = kzalloc((sizeof(*block_ctx->datav) +
1673                                           sizeof(*block_ctx->pagev)) *
1674                                          num_pages, GFP_NOFS);
1675         if (!block_ctx->mem_to_free)
1676                 return -1;
1677         block_ctx->datav = block_ctx->mem_to_free;
1678         block_ctx->pagev = (struct page **)(block_ctx->datav + num_pages);
1679         for (i = 0; i < num_pages; i++) {
1680                 block_ctx->pagev[i] = alloc_page(GFP_NOFS);
1681                 if (!block_ctx->pagev[i])
1682                         return -1;
1683         }
1684
1685         dev_bytenr = block_ctx->dev_bytenr;
1686         for (i = 0; i < num_pages;) {
1687                 struct bio *bio;
1688                 unsigned int j;
1689                 DECLARE_COMPLETION_ONSTACK(complete);
1690
1691                 bio = bio_alloc(GFP_NOFS, num_pages - i);
1692                 if (!bio) {
1693                         printk(KERN_INFO
1694                                "btrfsic: bio_alloc() for %u pages failed!\n",
1695                                num_pages - i);
1696                         return -1;
1697                 }
1698                 bio->bi_bdev = block_ctx->dev->bdev;
1699                 bio->bi_sector = dev_bytenr >> 9;
1700                 bio->bi_end_io = btrfsic_complete_bio_end_io;
1701                 bio->bi_private = &complete;
1702
1703                 for (j = i; j < num_pages; j++) {
1704                         ret = bio_add_page(bio, block_ctx->pagev[j],
1705                                            PAGE_CACHE_SIZE, 0);
1706                         if (PAGE_CACHE_SIZE != ret)
1707                                 break;
1708                 }
1709                 if (j == i) {
1710                         printk(KERN_INFO
1711                                "btrfsic: error, failed to add a single page!\n");
1712                         return -1;
1713                 }
1714                 submit_bio(READ, bio);
1715
1716                 /* this will also unplug the queue */
1717                 wait_for_completion(&complete);
1718
1719                 if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) {
1720                         printk(KERN_INFO
1721                                "btrfsic: read error at logical %llu dev %s!\n",
1722                                block_ctx->start, block_ctx->dev->name);
1723                         bio_put(bio);
1724                         return -1;
1725                 }
1726                 bio_put(bio);
1727                 dev_bytenr += (j - i) * PAGE_CACHE_SIZE;
1728                 i = j;
1729         }
1730         for (i = 0; i < num_pages; i++) {
1731                 block_ctx->datav[i] = kmap(block_ctx->pagev[i]);
1732                 if (!block_ctx->datav[i]) {
1733                         printk(KERN_INFO "btrfsic: kmap() failed (dev %s)!\n",
1734                                block_ctx->dev->name);
1735                         return -1;
1736                 }
1737         }
1738
1739         return block_ctx->len;
1740 }
1741
1742 static void btrfsic_complete_bio_end_io(struct bio *bio, int err)
1743 {
1744         complete((struct completion *)bio->bi_private);
1745 }
1746
1747 static void btrfsic_dump_database(struct btrfsic_state *state)
1748 {
1749         struct list_head *elem_all;
1750
1751         BUG_ON(NULL == state);
1752
1753         printk(KERN_INFO "all_blocks_list:\n");
1754         list_for_each(elem_all, &state->all_blocks_list) {
1755                 const struct btrfsic_block *const b_all =
1756                     list_entry(elem_all, struct btrfsic_block,
1757                                all_blocks_node);
1758                 struct list_head *elem_ref_to;
1759                 struct list_head *elem_ref_from;
1760
1761                 printk(KERN_INFO "%c-block @%llu (%s/%llu/%d)\n",
1762                        btrfsic_get_block_type(state, b_all),
1763                        (unsigned long long)b_all->logical_bytenr,
1764                        b_all->dev_state->name,
1765                        (unsigned long long)b_all->dev_bytenr,
1766                        b_all->mirror_num);
1767
1768                 list_for_each(elem_ref_to, &b_all->ref_to_list) {
1769                         const struct btrfsic_block_link *const l =
1770                             list_entry(elem_ref_to,
1771                                        struct btrfsic_block_link,
1772                                        node_ref_to);
1773
1774                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1775                                " refers %u* to"
1776                                " %c @%llu (%s/%llu/%d)\n",
1777                                btrfsic_get_block_type(state, b_all),
1778                                (unsigned long long)b_all->logical_bytenr,
1779                                b_all->dev_state->name,
1780                                (unsigned long long)b_all->dev_bytenr,
1781                                b_all->mirror_num,
1782                                l->ref_cnt,
1783                                btrfsic_get_block_type(state, l->block_ref_to),
1784                                (unsigned long long)
1785                                l->block_ref_to->logical_bytenr,
1786                                l->block_ref_to->dev_state->name,
1787                                (unsigned long long)l->block_ref_to->dev_bytenr,
1788                                l->block_ref_to->mirror_num);
1789                 }
1790
1791                 list_for_each(elem_ref_from, &b_all->ref_from_list) {
1792                         const struct btrfsic_block_link *const l =
1793                             list_entry(elem_ref_from,
1794                                        struct btrfsic_block_link,
1795                                        node_ref_from);
1796
1797                         printk(KERN_INFO " %c @%llu (%s/%llu/%d)"
1798                                " is ref %u* from"
1799                                " %c @%llu (%s/%llu/%d)\n",
1800                                btrfsic_get_block_type(state, b_all),
1801                                (unsigned long long)b_all->logical_bytenr,
1802                                b_all->dev_state->name,
1803                                (unsigned long long)b_all->dev_bytenr,
1804                                b_all->mirror_num,
1805                                l->ref_cnt,
1806                                btrfsic_get_block_type(state, l->block_ref_from),
1807                                (unsigned long long)
1808                                l->block_ref_from->logical_bytenr,
1809                                l->block_ref_from->dev_state->name,
1810                                (unsigned long long)
1811                                l->block_ref_from->dev_bytenr,
1812                                l->block_ref_from->mirror_num);
1813                 }
1814
1815                 printk(KERN_INFO "\n");
1816         }
1817 }
1818
1819 /*
1820  * Test whether the disk block contains a tree block (leaf or node)
1821  * (note that this test fails for the super block)
1822  */
1823 static int btrfsic_test_for_metadata(struct btrfsic_state *state,
1824                                      char **datav, unsigned int num_pages)
1825 {
1826         struct btrfs_header *h;
1827         u8 csum[BTRFS_CSUM_SIZE];
1828         u32 crc = ~(u32)0;
1829         unsigned int i;
1830
1831         if (num_pages * PAGE_CACHE_SIZE < state->metablock_size)
1832                 return 1; /* not metadata */
1833         num_pages = state->metablock_size >> PAGE_CACHE_SHIFT;
1834         h = (struct btrfs_header *)datav[0];
1835
1836         if (memcmp(h->fsid, state->root->fs_info->fsid, BTRFS_UUID_SIZE))
1837                 return 1;
1838
1839         for (i = 0; i < num_pages; i++) {
1840                 u8 *data = i ? datav[i] : (datav[i] + BTRFS_CSUM_SIZE);
1841                 size_t sublen = i ? PAGE_CACHE_SIZE :
1842                                     (PAGE_CACHE_SIZE - BTRFS_CSUM_SIZE);
1843
1844                 crc = crc32c(crc, data, sublen);
1845         }
1846         btrfs_csum_final(crc, csum);
1847         if (memcmp(csum, h->csum, state->csum_size))
1848                 return 1;
1849
1850         return 0; /* is metadata */
1851 }
1852
1853 static void btrfsic_process_written_block(struct btrfsic_dev_state *dev_state,
1854                                           u64 dev_bytenr, char **mapped_datav,
1855                                           unsigned int num_pages,
1856                                           struct bio *bio, int *bio_is_patched,
1857                                           struct buffer_head *bh,
1858                                           int submit_bio_bh_rw)
1859 {
1860         int is_metadata;
1861         struct btrfsic_block *block;
1862         struct btrfsic_block_data_ctx block_ctx;
1863         int ret;
1864         struct btrfsic_state *state = dev_state->state;
1865         struct block_device *bdev = dev_state->bdev;
1866         unsigned int processed_len;
1867
1868         if (NULL != bio_is_patched)
1869                 *bio_is_patched = 0;
1870
1871 again:
1872         if (num_pages == 0)
1873                 return;
1874
1875         processed_len = 0;
1876         is_metadata = (0 == btrfsic_test_for_metadata(state, mapped_datav,
1877                                                       num_pages));
1878
1879         block = btrfsic_block_hashtable_lookup(bdev, dev_bytenr,
1880                                                &state->block_hashtable);
1881         if (NULL != block) {
1882                 u64 bytenr = 0;
1883                 struct list_head *elem_ref_to;
1884                 struct list_head *tmp_ref_to;
1885
1886                 if (block->is_superblock) {
1887                         bytenr = le64_to_cpu(((struct btrfs_super_block *)
1888                                               mapped_datav[0])->bytenr);
1889                         if (num_pages * PAGE_CACHE_SIZE <
1890                             BTRFS_SUPER_INFO_SIZE) {
1891                                 printk(KERN_INFO
1892                                        "btrfsic: cannot work with too short bios!\n");
1893                                 return;
1894                         }
1895                         is_metadata = 1;
1896                         BUG_ON(BTRFS_SUPER_INFO_SIZE & (PAGE_CACHE_SIZE - 1));
1897                         processed_len = BTRFS_SUPER_INFO_SIZE;
1898                         if (state->print_mask &
1899                             BTRFSIC_PRINT_MASK_TREE_BEFORE_SB_WRITE) {
1900                                 printk(KERN_INFO
1901                                        "[before new superblock is written]:\n");
1902                                 btrfsic_dump_tree_sub(state, block, 0);
1903                         }
1904                 }
1905                 if (is_metadata) {
1906                         if (!block->is_superblock) {
1907                                 if (num_pages * PAGE_CACHE_SIZE <
1908                                     state->metablock_size) {
1909                                         printk(KERN_INFO
1910                                                "btrfsic: cannot work with too short bios!\n");
1911                                         return;
1912                                 }
1913                                 processed_len = state->metablock_size;
1914                                 bytenr = le64_to_cpu(((struct btrfs_header *)
1915                                                       mapped_datav[0])->bytenr);
1916                                 btrfsic_cmp_log_and_dev_bytenr(state, bytenr,
1917                                                                dev_state,
1918                                                                dev_bytenr);
1919                         }
1920                         if (block->logical_bytenr != bytenr) {
1921                                 printk(KERN_INFO
1922                                        "Written block @%llu (%s/%llu/%d)"
1923                                        " found in hash table, %c,"
1924                                        " bytenr mismatch"
1925                                        " (!= stored %llu).\n",
1926                                        (unsigned long long)bytenr,
1927                                        dev_state->name,
1928                                        (unsigned long long)dev_bytenr,
1929                                        block->mirror_num,
1930                                        btrfsic_get_block_type(state, block),
1931                                        (unsigned long long)
1932                                        block->logical_bytenr);
1933                                 block->logical_bytenr = bytenr;
1934                         } else if (state->print_mask &
1935                                    BTRFSIC_PRINT_MASK_VERBOSE)
1936                                 printk(KERN_INFO
1937                                        "Written block @%llu (%s/%llu/%d)"
1938                                        " found in hash table, %c.\n",
1939                                        (unsigned long long)bytenr,
1940                                        dev_state->name,
1941                                        (unsigned long long)dev_bytenr,
1942                                        block->mirror_num,
1943                                        btrfsic_get_block_type(state, block));
1944                 } else {
1945                         if (num_pages * PAGE_CACHE_SIZE <
1946                             state->datablock_size) {
1947                                 printk(KERN_INFO
1948                                        "btrfsic: cannot work with too short bios!\n");
1949                                 return;
1950                         }
1951                         processed_len = state->datablock_size;
1952                         bytenr = block->logical_bytenr;
1953                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1954                                 printk(KERN_INFO
1955                                        "Written block @%llu (%s/%llu/%d)"
1956                                        " found in hash table, %c.\n",
1957                                        (unsigned long long)bytenr,
1958                                        dev_state->name,
1959                                        (unsigned long long)dev_bytenr,
1960                                        block->mirror_num,
1961                                        btrfsic_get_block_type(state, block));
1962                 }
1963
1964                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
1965                         printk(KERN_INFO
1966                                "ref_to_list: %cE, ref_from_list: %cE\n",
1967                                list_empty(&block->ref_to_list) ? ' ' : '!',
1968                                list_empty(&block->ref_from_list) ? ' ' : '!');
1969                 if (btrfsic_is_block_ref_by_superblock(state, block, 0)) {
1970                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1971                                " @%llu (%s/%llu/%d), old(gen=%llu,"
1972                                " objectid=%llu, type=%d, offset=%llu),"
1973                                " new(gen=%llu),"
1974                                " which is referenced by most recent superblock"
1975                                " (superblockgen=%llu)!\n",
1976                                btrfsic_get_block_type(state, block),
1977                                (unsigned long long)bytenr,
1978                                dev_state->name,
1979                                (unsigned long long)dev_bytenr,
1980                                block->mirror_num,
1981                                (unsigned long long)block->generation,
1982                                (unsigned long long)
1983                                le64_to_cpu(block->disk_key.objectid),
1984                                block->disk_key.type,
1985                                (unsigned long long)
1986                                le64_to_cpu(block->disk_key.offset),
1987                                (unsigned long long)
1988                                le64_to_cpu(((struct btrfs_header *)
1989                                             mapped_datav[0])->generation),
1990                                (unsigned long long)
1991                                state->max_superblock_generation);
1992                         btrfsic_dump_tree(state);
1993                 }
1994
1995                 if (!block->is_iodone && !block->never_written) {
1996                         printk(KERN_INFO "btrfs: attempt to overwrite %c-block"
1997                                " @%llu (%s/%llu/%d), oldgen=%llu, newgen=%llu,"
1998                                " which is not yet iodone!\n",
1999                                btrfsic_get_block_type(state, block),
2000                                (unsigned long long)bytenr,
2001                                dev_state->name,
2002                                (unsigned long long)dev_bytenr,
2003                                block->mirror_num,
2004                                (unsigned long long)block->generation,
2005                                (unsigned long long)
2006                                le64_to_cpu(((struct btrfs_header *)
2007                                             mapped_datav[0])->generation));
2008                         /* it would not be safe to go on */
2009                         btrfsic_dump_tree(state);
2010                         goto continue_loop;
2011                 }
2012
2013                 /*
2014                  * Clear all references of this block. Do not free
2015                  * the block itself even if is not referenced anymore
2016                  * because it still carries valueable information
2017                  * like whether it was ever written and IO completed.
2018                  */
2019                 list_for_each_safe(elem_ref_to, tmp_ref_to,
2020                                    &block->ref_to_list) {
2021                         struct btrfsic_block_link *const l =
2022                             list_entry(elem_ref_to,
2023                                        struct btrfsic_block_link,
2024                                        node_ref_to);
2025
2026                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2027                                 btrfsic_print_rem_link(state, l);
2028                         l->ref_cnt--;
2029                         if (0 == l->ref_cnt) {
2030                                 list_del(&l->node_ref_to);
2031                                 list_del(&l->node_ref_from);
2032                                 btrfsic_block_link_hashtable_remove(l);
2033                                 btrfsic_block_link_free(l);
2034                         }
2035                 }
2036
2037                 if (block->is_superblock)
2038                         ret = btrfsic_map_superblock(state, bytenr,
2039                                                      processed_len,
2040                                                      bdev, &block_ctx);
2041                 else
2042                         ret = btrfsic_map_block(state, bytenr, processed_len,
2043                                                 &block_ctx, 0);
2044                 if (ret) {
2045                         printk(KERN_INFO
2046                                "btrfsic: btrfsic_map_block(root @%llu)"
2047                                " failed!\n", (unsigned long long)bytenr);
2048                         goto continue_loop;
2049                 }
2050                 block_ctx.datav = mapped_datav;
2051                 /* the following is required in case of writes to mirrors,
2052                  * use the same that was used for the lookup */
2053                 block_ctx.dev = dev_state;
2054                 block_ctx.dev_bytenr = dev_bytenr;
2055
2056                 if (is_metadata || state->include_extent_data) {
2057                         block->never_written = 0;
2058                         block->iodone_w_error = 0;
2059                         if (NULL != bio) {
2060                                 block->is_iodone = 0;
2061                                 BUG_ON(NULL == bio_is_patched);
2062                                 if (!*bio_is_patched) {
2063                                         block->orig_bio_bh_private =
2064                                             bio->bi_private;
2065                                         block->orig_bio_bh_end_io.bio =
2066                                             bio->bi_end_io;
2067                                         block->next_in_same_bio = NULL;
2068                                         bio->bi_private = block;
2069                                         bio->bi_end_io = btrfsic_bio_end_io;
2070                                         *bio_is_patched = 1;
2071                                 } else {
2072                                         struct btrfsic_block *chained_block =
2073                                             (struct btrfsic_block *)
2074                                             bio->bi_private;
2075
2076                                         BUG_ON(NULL == chained_block);
2077                                         block->orig_bio_bh_private =
2078                                             chained_block->orig_bio_bh_private;
2079                                         block->orig_bio_bh_end_io.bio =
2080                                             chained_block->orig_bio_bh_end_io.
2081                                             bio;
2082                                         block->next_in_same_bio = chained_block;
2083                                         bio->bi_private = block;
2084                                 }
2085                         } else if (NULL != bh) {
2086                                 block->is_iodone = 0;
2087                                 block->orig_bio_bh_private = bh->b_private;
2088                                 block->orig_bio_bh_end_io.bh = bh->b_end_io;
2089                                 block->next_in_same_bio = NULL;
2090                                 bh->b_private = block;
2091                                 bh->b_end_io = btrfsic_bh_end_io;
2092                         } else {
2093                                 block->is_iodone = 1;
2094                                 block->orig_bio_bh_private = NULL;
2095                                 block->orig_bio_bh_end_io.bio = NULL;
2096                                 block->next_in_same_bio = NULL;
2097                         }
2098                 }
2099
2100                 block->flush_gen = dev_state->last_flush_gen + 1;
2101                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2102                 if (is_metadata) {
2103                         block->logical_bytenr = bytenr;
2104                         block->is_metadata = 1;
2105                         if (block->is_superblock) {
2106                                 BUG_ON(PAGE_CACHE_SIZE !=
2107                                        BTRFS_SUPER_INFO_SIZE);
2108                                 ret = btrfsic_process_written_superblock(
2109                                                 state,
2110                                                 block,
2111                                                 (struct btrfs_super_block *)
2112                                                 mapped_datav[0]);
2113                                 if (state->print_mask &
2114                                     BTRFSIC_PRINT_MASK_TREE_AFTER_SB_WRITE) {
2115                                         printk(KERN_INFO
2116                                         "[after new superblock is written]:\n");
2117                                         btrfsic_dump_tree_sub(state, block, 0);
2118                                 }
2119                         } else {
2120                                 block->mirror_num = 0;  /* unknown */
2121                                 ret = btrfsic_process_metablock(
2122                                                 state,
2123                                                 block,
2124                                                 &block_ctx,
2125                                                 0, 0);
2126                         }
2127                         if (ret)
2128                                 printk(KERN_INFO
2129                                        "btrfsic: btrfsic_process_metablock"
2130                                        "(root @%llu) failed!\n",
2131                                        (unsigned long long)dev_bytenr);
2132                 } else {
2133                         block->is_metadata = 0;
2134                         block->mirror_num = 0;  /* unknown */
2135                         block->generation = BTRFSIC_GENERATION_UNKNOWN;
2136                         if (!state->include_extent_data
2137                             && list_empty(&block->ref_from_list)) {
2138                                 /*
2139                                  * disk block is overwritten with extent
2140                                  * data (not meta data) and we are configured
2141                                  * to not include extent data: take the
2142                                  * chance and free the block's memory
2143                                  */
2144                                 btrfsic_block_hashtable_remove(block);
2145                                 list_del(&block->all_blocks_node);
2146                                 btrfsic_block_free(block);
2147                         }
2148                 }
2149                 btrfsic_release_block_ctx(&block_ctx);
2150         } else {
2151                 /* block has not been found in hash table */
2152                 u64 bytenr;
2153
2154                 if (!is_metadata) {
2155                         processed_len = state->datablock_size;
2156                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2157                                 printk(KERN_INFO "Written block (%s/%llu/?)"
2158                                        " !found in hash table, D.\n",
2159                                        dev_state->name,
2160                                        (unsigned long long)dev_bytenr);
2161                         if (!state->include_extent_data) {
2162                                 /* ignore that written D block */
2163                                 goto continue_loop;
2164                         }
2165
2166                         /* this is getting ugly for the
2167                          * include_extent_data case... */
2168                         bytenr = 0;     /* unknown */
2169                         block_ctx.start = bytenr;
2170                         block_ctx.len = processed_len;
2171                         block_ctx.mem_to_free = NULL;
2172                         block_ctx.pagev = NULL;
2173                 } else {
2174                         processed_len = state->metablock_size;
2175                         bytenr = le64_to_cpu(((struct btrfs_header *)
2176                                               mapped_datav[0])->bytenr);
2177                         btrfsic_cmp_log_and_dev_bytenr(state, bytenr, dev_state,
2178                                                        dev_bytenr);
2179                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2180                                 printk(KERN_INFO
2181                                        "Written block @%llu (%s/%llu/?)"
2182                                        " !found in hash table, M.\n",
2183                                        (unsigned long long)bytenr,
2184                                        dev_state->name,
2185                                        (unsigned long long)dev_bytenr);
2186
2187                         ret = btrfsic_map_block(state, bytenr, processed_len,
2188                                                 &block_ctx, 0);
2189                         if (ret) {
2190                                 printk(KERN_INFO
2191                                        "btrfsic: btrfsic_map_block(root @%llu)"
2192                                        " failed!\n",
2193                                        (unsigned long long)dev_bytenr);
2194                                 goto continue_loop;
2195                         }
2196                 }
2197                 block_ctx.datav = mapped_datav;
2198                 /* the following is required in case of writes to mirrors,
2199                  * use the same that was used for the lookup */
2200                 block_ctx.dev = dev_state;
2201                 block_ctx.dev_bytenr = dev_bytenr;
2202
2203                 block = btrfsic_block_alloc();
2204                 if (NULL == block) {
2205                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2206                         btrfsic_release_block_ctx(&block_ctx);
2207                         goto continue_loop;
2208                 }
2209                 block->dev_state = dev_state;
2210                 block->dev_bytenr = dev_bytenr;
2211                 block->logical_bytenr = bytenr;
2212                 block->is_metadata = is_metadata;
2213                 block->never_written = 0;
2214                 block->iodone_w_error = 0;
2215                 block->mirror_num = 0;  /* unknown */
2216                 block->flush_gen = dev_state->last_flush_gen + 1;
2217                 block->submit_bio_bh_rw = submit_bio_bh_rw;
2218                 if (NULL != bio) {
2219                         block->is_iodone = 0;
2220                         BUG_ON(NULL == bio_is_patched);
2221                         if (!*bio_is_patched) {
2222                                 block->orig_bio_bh_private = bio->bi_private;
2223                                 block->orig_bio_bh_end_io.bio = bio->bi_end_io;
2224                                 block->next_in_same_bio = NULL;
2225                                 bio->bi_private = block;
2226                                 bio->bi_end_io = btrfsic_bio_end_io;
2227                                 *bio_is_patched = 1;
2228                         } else {
2229                                 struct btrfsic_block *chained_block =
2230                                     (struct btrfsic_block *)
2231                                     bio->bi_private;
2232
2233                                 BUG_ON(NULL == chained_block);
2234                                 block->orig_bio_bh_private =
2235                                     chained_block->orig_bio_bh_private;
2236                                 block->orig_bio_bh_end_io.bio =
2237                                     chained_block->orig_bio_bh_end_io.bio;
2238                                 block->next_in_same_bio = chained_block;
2239                                 bio->bi_private = block;
2240                         }
2241                 } else if (NULL != bh) {
2242                         block->is_iodone = 0;
2243                         block->orig_bio_bh_private = bh->b_private;
2244                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
2245                         block->next_in_same_bio = NULL;
2246                         bh->b_private = block;
2247                         bh->b_end_io = btrfsic_bh_end_io;
2248                 } else {
2249                         block->is_iodone = 1;
2250                         block->orig_bio_bh_private = NULL;
2251                         block->orig_bio_bh_end_io.bio = NULL;
2252                         block->next_in_same_bio = NULL;
2253                 }
2254                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2255                         printk(KERN_INFO
2256                                "New written %c-block @%llu (%s/%llu/%d)\n",
2257                                is_metadata ? 'M' : 'D',
2258                                (unsigned long long)block->logical_bytenr,
2259                                block->dev_state->name,
2260                                (unsigned long long)block->dev_bytenr,
2261                                block->mirror_num);
2262                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2263                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2264
2265                 if (is_metadata) {
2266                         ret = btrfsic_process_metablock(state, block,
2267                                                         &block_ctx, 0, 0);
2268                         if (ret)
2269                                 printk(KERN_INFO
2270                                        "btrfsic: process_metablock(root @%llu)"
2271                                        " failed!\n",
2272                                        (unsigned long long)dev_bytenr);
2273                 }
2274                 btrfsic_release_block_ctx(&block_ctx);
2275         }
2276
2277 continue_loop:
2278         BUG_ON(!processed_len);
2279         dev_bytenr += processed_len;
2280         mapped_datav += processed_len >> PAGE_CACHE_SHIFT;
2281         num_pages -= processed_len >> PAGE_CACHE_SHIFT;
2282         goto again;
2283 }
2284
2285 static void btrfsic_bio_end_io(struct bio *bp, int bio_error_status)
2286 {
2287         struct btrfsic_block *block = (struct btrfsic_block *)bp->bi_private;
2288         int iodone_w_error;
2289
2290         /* mutex is not held! This is not save if IO is not yet completed
2291          * on umount */
2292         iodone_w_error = 0;
2293         if (bio_error_status)
2294                 iodone_w_error = 1;
2295
2296         BUG_ON(NULL == block);
2297         bp->bi_private = block->orig_bio_bh_private;
2298         bp->bi_end_io = block->orig_bio_bh_end_io.bio;
2299
2300         do {
2301                 struct btrfsic_block *next_block;
2302                 struct btrfsic_dev_state *const dev_state = block->dev_state;
2303
2304                 if ((dev_state->state->print_mask &
2305                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2306                         printk(KERN_INFO
2307                                "bio_end_io(err=%d) for %c @%llu (%s/%llu/%d)\n",
2308                                bio_error_status,
2309                                btrfsic_get_block_type(dev_state->state, block),
2310                                (unsigned long long)block->logical_bytenr,
2311                                dev_state->name,
2312                                (unsigned long long)block->dev_bytenr,
2313                                block->mirror_num);
2314                 next_block = block->next_in_same_bio;
2315                 block->iodone_w_error = iodone_w_error;
2316                 if (block->submit_bio_bh_rw & REQ_FLUSH) {
2317                         dev_state->last_flush_gen++;
2318                         if ((dev_state->state->print_mask &
2319                              BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2320                                 printk(KERN_INFO
2321                                        "bio_end_io() new %s flush_gen=%llu\n",
2322                                        dev_state->name,
2323                                        (unsigned long long)
2324                                        dev_state->last_flush_gen);
2325                 }
2326                 if (block->submit_bio_bh_rw & REQ_FUA)
2327                         block->flush_gen = 0; /* FUA completed means block is
2328                                                * on disk */
2329                 block->is_iodone = 1; /* for FLUSH, this releases the block */
2330                 block = next_block;
2331         } while (NULL != block);
2332
2333         bp->bi_end_io(bp, bio_error_status);
2334 }
2335
2336 static void btrfsic_bh_end_io(struct buffer_head *bh, int uptodate)
2337 {
2338         struct btrfsic_block *block = (struct btrfsic_block *)bh->b_private;
2339         int iodone_w_error = !uptodate;
2340         struct btrfsic_dev_state *dev_state;
2341
2342         BUG_ON(NULL == block);
2343         dev_state = block->dev_state;
2344         if ((dev_state->state->print_mask & BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2345                 printk(KERN_INFO
2346                        "bh_end_io(error=%d) for %c @%llu (%s/%llu/%d)\n",
2347                        iodone_w_error,
2348                        btrfsic_get_block_type(dev_state->state, block),
2349                        (unsigned long long)block->logical_bytenr,
2350                        block->dev_state->name,
2351                        (unsigned long long)block->dev_bytenr,
2352                        block->mirror_num);
2353
2354         block->iodone_w_error = iodone_w_error;
2355         if (block->submit_bio_bh_rw & REQ_FLUSH) {
2356                 dev_state->last_flush_gen++;
2357                 if ((dev_state->state->print_mask &
2358                      BTRFSIC_PRINT_MASK_END_IO_BIO_BH))
2359                         printk(KERN_INFO
2360                                "bh_end_io() new %s flush_gen=%llu\n",
2361                                dev_state->name,
2362                                (unsigned long long)dev_state->last_flush_gen);
2363         }
2364         if (block->submit_bio_bh_rw & REQ_FUA)
2365                 block->flush_gen = 0; /* FUA completed means block is on disk */
2366
2367         bh->b_private = block->orig_bio_bh_private;
2368         bh->b_end_io = block->orig_bio_bh_end_io.bh;
2369         block->is_iodone = 1; /* for FLUSH, this releases the block */
2370         bh->b_end_io(bh, uptodate);
2371 }
2372
2373 static int btrfsic_process_written_superblock(
2374                 struct btrfsic_state *state,
2375                 struct btrfsic_block *const superblock,
2376                 struct btrfs_super_block *const super_hdr)
2377 {
2378         int pass;
2379
2380         superblock->generation = btrfs_super_generation(super_hdr);
2381         if (!(superblock->generation > state->max_superblock_generation ||
2382               0 == state->max_superblock_generation)) {
2383                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2384                         printk(KERN_INFO
2385                                "btrfsic: superblock @%llu (%s/%llu/%d)"
2386                                " with old gen %llu <= %llu\n",
2387                                (unsigned long long)superblock->logical_bytenr,
2388                                superblock->dev_state->name,
2389                                (unsigned long long)superblock->dev_bytenr,
2390                                superblock->mirror_num,
2391                                (unsigned long long)
2392                                btrfs_super_generation(super_hdr),
2393                                (unsigned long long)
2394                                state->max_superblock_generation);
2395         } else {
2396                 if (state->print_mask & BTRFSIC_PRINT_MASK_SUPERBLOCK_WRITE)
2397                         printk(KERN_INFO
2398                                "btrfsic: got new superblock @%llu (%s/%llu/%d)"
2399                                " with new gen %llu > %llu\n",
2400                                (unsigned long long)superblock->logical_bytenr,
2401                                superblock->dev_state->name,
2402                                (unsigned long long)superblock->dev_bytenr,
2403                                superblock->mirror_num,
2404                                (unsigned long long)
2405                                btrfs_super_generation(super_hdr),
2406                                (unsigned long long)
2407                                state->max_superblock_generation);
2408
2409                 state->max_superblock_generation =
2410                     btrfs_super_generation(super_hdr);
2411                 state->latest_superblock = superblock;
2412         }
2413
2414         for (pass = 0; pass < 3; pass++) {
2415                 int ret;
2416                 u64 next_bytenr;
2417                 struct btrfsic_block *next_block;
2418                 struct btrfsic_block_data_ctx tmp_next_block_ctx;
2419                 struct btrfsic_block_link *l;
2420                 int num_copies;
2421                 int mirror_num;
2422                 const char *additional_string = NULL;
2423                 struct btrfs_disk_key tmp_disk_key;
2424
2425                 tmp_disk_key.type = BTRFS_ROOT_ITEM_KEY;
2426                 tmp_disk_key.offset = 0;
2427
2428                 switch (pass) {
2429                 case 0:
2430                         tmp_disk_key.objectid =
2431                             cpu_to_le64(BTRFS_ROOT_TREE_OBJECTID);
2432                         additional_string = "root ";
2433                         next_bytenr = btrfs_super_root(super_hdr);
2434                         if (state->print_mask &
2435                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2436                                 printk(KERN_INFO "root@%llu\n",
2437                                        (unsigned long long)next_bytenr);
2438                         break;
2439                 case 1:
2440                         tmp_disk_key.objectid =
2441                             cpu_to_le64(BTRFS_CHUNK_TREE_OBJECTID);
2442                         additional_string = "chunk ";
2443                         next_bytenr = btrfs_super_chunk_root(super_hdr);
2444                         if (state->print_mask &
2445                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2446                                 printk(KERN_INFO "chunk@%llu\n",
2447                                        (unsigned long long)next_bytenr);
2448                         break;
2449                 case 2:
2450                         tmp_disk_key.objectid =
2451                             cpu_to_le64(BTRFS_TREE_LOG_OBJECTID);
2452                         additional_string = "log ";
2453                         next_bytenr = btrfs_super_log_root(super_hdr);
2454                         if (0 == next_bytenr)
2455                                 continue;
2456                         if (state->print_mask &
2457                             BTRFSIC_PRINT_MASK_ROOT_CHUNK_LOG_TREE_LOCATION)
2458                                 printk(KERN_INFO "log@%llu\n",
2459                                        (unsigned long long)next_bytenr);
2460                         break;
2461                 }
2462
2463                 num_copies =
2464                     btrfs_num_copies(&state->root->fs_info->mapping_tree,
2465                                      next_bytenr, BTRFS_SUPER_INFO_SIZE);
2466                 if (state->print_mask & BTRFSIC_PRINT_MASK_NUM_COPIES)
2467                         printk(KERN_INFO "num_copies(log_bytenr=%llu) = %d\n",
2468                                (unsigned long long)next_bytenr, num_copies);
2469                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2470                         int was_created;
2471
2472                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2473                                 printk(KERN_INFO
2474                                        "btrfsic_process_written_superblock("
2475                                        "mirror_num=%d)\n", mirror_num);
2476                         ret = btrfsic_map_block(state, next_bytenr,
2477                                                 BTRFS_SUPER_INFO_SIZE,
2478                                                 &tmp_next_block_ctx,
2479                                                 mirror_num);
2480                         if (ret) {
2481                                 printk(KERN_INFO
2482                                        "btrfsic: btrfsic_map_block(@%llu,"
2483                                        " mirror=%d) failed!\n",
2484                                        (unsigned long long)next_bytenr,
2485                                        mirror_num);
2486                                 return -1;
2487                         }
2488
2489                         next_block = btrfsic_block_lookup_or_add(
2490                                         state,
2491                                         &tmp_next_block_ctx,
2492                                         additional_string,
2493                                         1, 0, 1,
2494                                         mirror_num,
2495                                         &was_created);
2496                         if (NULL == next_block) {
2497                                 printk(KERN_INFO
2498                                        "btrfsic: error, kmalloc failed!\n");
2499                                 btrfsic_release_block_ctx(&tmp_next_block_ctx);
2500                                 return -1;
2501                         }
2502
2503                         next_block->disk_key = tmp_disk_key;
2504                         if (was_created)
2505                                 next_block->generation =
2506                                     BTRFSIC_GENERATION_UNKNOWN;
2507                         l = btrfsic_block_link_lookup_or_add(
2508                                         state,
2509                                         &tmp_next_block_ctx,
2510                                         next_block,
2511                                         superblock,
2512                                         BTRFSIC_GENERATION_UNKNOWN);
2513                         btrfsic_release_block_ctx(&tmp_next_block_ctx);
2514                         if (NULL == l)
2515                                 return -1;
2516                 }
2517         }
2518
2519         if (-1 == btrfsic_check_all_ref_blocks(state, superblock, 0)) {
2520                 WARN_ON(1);
2521                 btrfsic_dump_tree(state);
2522         }
2523
2524         return 0;
2525 }
2526
2527 static int btrfsic_check_all_ref_blocks(struct btrfsic_state *state,
2528                                         struct btrfsic_block *const block,
2529                                         int recursion_level)
2530 {
2531         struct list_head *elem_ref_to;
2532         int ret = 0;
2533
2534         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2535                 /*
2536                  * Note that this situation can happen and does not
2537                  * indicate an error in regular cases. It happens
2538                  * when disk blocks are freed and later reused.
2539                  * The check-integrity module is not aware of any
2540                  * block free operations, it just recognizes block
2541                  * write operations. Therefore it keeps the linkage
2542                  * information for a block until a block is
2543                  * rewritten. This can temporarily cause incorrect
2544                  * and even circular linkage informations. This
2545                  * causes no harm unless such blocks are referenced
2546                  * by the most recent super block.
2547                  */
2548                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2549                         printk(KERN_INFO
2550                                "btrfsic: abort cyclic linkage (case 1).\n");
2551
2552                 return ret;
2553         }
2554
2555         /*
2556          * This algorithm is recursive because the amount of used stack
2557          * space is very small and the max recursion depth is limited.
2558          */
2559         list_for_each(elem_ref_to, &block->ref_to_list) {
2560                 const struct btrfsic_block_link *const l =
2561                     list_entry(elem_ref_to, struct btrfsic_block_link,
2562                                node_ref_to);
2563
2564                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2565                         printk(KERN_INFO
2566                                "rl=%d, %c @%llu (%s/%llu/%d)"
2567                                " %u* refers to %c @%llu (%s/%llu/%d)\n",
2568                                recursion_level,
2569                                btrfsic_get_block_type(state, block),
2570                                (unsigned long long)block->logical_bytenr,
2571                                block->dev_state->name,
2572                                (unsigned long long)block->dev_bytenr,
2573                                block->mirror_num,
2574                                l->ref_cnt,
2575                                btrfsic_get_block_type(state, l->block_ref_to),
2576                                (unsigned long long)
2577                                l->block_ref_to->logical_bytenr,
2578                                l->block_ref_to->dev_state->name,
2579                                (unsigned long long)l->block_ref_to->dev_bytenr,
2580                                l->block_ref_to->mirror_num);
2581                 if (l->block_ref_to->never_written) {
2582                         printk(KERN_INFO "btrfs: attempt to write superblock"
2583                                " which references block %c @%llu (%s/%llu/%d)"
2584                                " which is never written!\n",
2585                                btrfsic_get_block_type(state, l->block_ref_to),
2586                                (unsigned long long)
2587                                l->block_ref_to->logical_bytenr,
2588                                l->block_ref_to->dev_state->name,
2589                                (unsigned long long)l->block_ref_to->dev_bytenr,
2590                                l->block_ref_to->mirror_num);
2591                         ret = -1;
2592                 } else if (!l->block_ref_to->is_iodone) {
2593                         printk(KERN_INFO "btrfs: attempt to write superblock"
2594                                " which references block %c @%llu (%s/%llu/%d)"
2595                                " which is not yet iodone!\n",
2596                                btrfsic_get_block_type(state, l->block_ref_to),
2597                                (unsigned long long)
2598                                l->block_ref_to->logical_bytenr,
2599                                l->block_ref_to->dev_state->name,
2600                                (unsigned long long)l->block_ref_to->dev_bytenr,
2601                                l->block_ref_to->mirror_num);
2602                         ret = -1;
2603                 } else if (l->parent_generation !=
2604                            l->block_ref_to->generation &&
2605                            BTRFSIC_GENERATION_UNKNOWN !=
2606                            l->parent_generation &&
2607                            BTRFSIC_GENERATION_UNKNOWN !=
2608                            l->block_ref_to->generation) {
2609                         printk(KERN_INFO "btrfs: attempt to write superblock"
2610                                " which references block %c @%llu (%s/%llu/%d)"
2611                                " with generation %llu !="
2612                                " parent generation %llu!\n",
2613                                btrfsic_get_block_type(state, l->block_ref_to),
2614                                (unsigned long long)
2615                                l->block_ref_to->logical_bytenr,
2616                                l->block_ref_to->dev_state->name,
2617                                (unsigned long long)l->block_ref_to->dev_bytenr,
2618                                l->block_ref_to->mirror_num,
2619                                (unsigned long long)l->block_ref_to->generation,
2620                                (unsigned long long)l->parent_generation);
2621                         ret = -1;
2622                 } else if (l->block_ref_to->flush_gen >
2623                            l->block_ref_to->dev_state->last_flush_gen) {
2624                         printk(KERN_INFO "btrfs: attempt to write superblock"
2625                                " which references block %c @%llu (%s/%llu/%d)"
2626                                " which is not flushed out of disk's write cache"
2627                                " (block flush_gen=%llu,"
2628                                " dev->flush_gen=%llu)!\n",
2629                                btrfsic_get_block_type(state, l->block_ref_to),
2630                                (unsigned long long)
2631                                l->block_ref_to->logical_bytenr,
2632                                l->block_ref_to->dev_state->name,
2633                                (unsigned long long)l->block_ref_to->dev_bytenr,
2634                                l->block_ref_to->mirror_num,
2635                                (unsigned long long)block->flush_gen,
2636                                (unsigned long long)
2637                                l->block_ref_to->dev_state->last_flush_gen);
2638                         ret = -1;
2639                 } else if (-1 == btrfsic_check_all_ref_blocks(state,
2640                                                               l->block_ref_to,
2641                                                               recursion_level +
2642                                                               1)) {
2643                         ret = -1;
2644                 }
2645         }
2646
2647         return ret;
2648 }
2649
2650 static int btrfsic_is_block_ref_by_superblock(
2651                 const struct btrfsic_state *state,
2652                 const struct btrfsic_block *block,
2653                 int recursion_level)
2654 {
2655         struct list_head *elem_ref_from;
2656
2657         if (recursion_level >= 3 + BTRFS_MAX_LEVEL) {
2658                 /* refer to comment at "abort cyclic linkage (case 1)" */
2659                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2660                         printk(KERN_INFO
2661                                "btrfsic: abort cyclic linkage (case 2).\n");
2662
2663                 return 0;
2664         }
2665
2666         /*
2667          * This algorithm is recursive because the amount of used stack space
2668          * is very small and the max recursion depth is limited.
2669          */
2670         list_for_each(elem_ref_from, &block->ref_from_list) {
2671                 const struct btrfsic_block_link *const l =
2672                     list_entry(elem_ref_from, struct btrfsic_block_link,
2673                                node_ref_from);
2674
2675                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2676                         printk(KERN_INFO
2677                                "rl=%d, %c @%llu (%s/%llu/%d)"
2678                                " is ref %u* from %c @%llu (%s/%llu/%d)\n",
2679                                recursion_level,
2680                                btrfsic_get_block_type(state, block),
2681                                (unsigned long long)block->logical_bytenr,
2682                                block->dev_state->name,
2683                                (unsigned long long)block->dev_bytenr,
2684                                block->mirror_num,
2685                                l->ref_cnt,
2686                                btrfsic_get_block_type(state, l->block_ref_from),
2687                                (unsigned long long)
2688                                l->block_ref_from->logical_bytenr,
2689                                l->block_ref_from->dev_state->name,
2690                                (unsigned long long)
2691                                l->block_ref_from->dev_bytenr,
2692                                l->block_ref_from->mirror_num);
2693                 if (l->block_ref_from->is_superblock &&
2694                     state->latest_superblock->dev_bytenr ==
2695                     l->block_ref_from->dev_bytenr &&
2696                     state->latest_superblock->dev_state->bdev ==
2697                     l->block_ref_from->dev_state->bdev)
2698                         return 1;
2699                 else if (btrfsic_is_block_ref_by_superblock(state,
2700                                                             l->block_ref_from,
2701                                                             recursion_level +
2702                                                             1))
2703                         return 1;
2704         }
2705
2706         return 0;
2707 }
2708
2709 static void btrfsic_print_add_link(const struct btrfsic_state *state,
2710                                    const struct btrfsic_block_link *l)
2711 {
2712         printk(KERN_INFO
2713                "Add %u* link from %c @%llu (%s/%llu/%d)"
2714                " to %c @%llu (%s/%llu/%d).\n",
2715                l->ref_cnt,
2716                btrfsic_get_block_type(state, l->block_ref_from),
2717                (unsigned long long)l->block_ref_from->logical_bytenr,
2718                l->block_ref_from->dev_state->name,
2719                (unsigned long long)l->block_ref_from->dev_bytenr,
2720                l->block_ref_from->mirror_num,
2721                btrfsic_get_block_type(state, l->block_ref_to),
2722                (unsigned long long)l->block_ref_to->logical_bytenr,
2723                l->block_ref_to->dev_state->name,
2724                (unsigned long long)l->block_ref_to->dev_bytenr,
2725                l->block_ref_to->mirror_num);
2726 }
2727
2728 static void btrfsic_print_rem_link(const struct btrfsic_state *state,
2729                                    const struct btrfsic_block_link *l)
2730 {
2731         printk(KERN_INFO
2732                "Rem %u* link from %c @%llu (%s/%llu/%d)"
2733                " to %c @%llu (%s/%llu/%d).\n",
2734                l->ref_cnt,
2735                btrfsic_get_block_type(state, l->block_ref_from),
2736                (unsigned long long)l->block_ref_from->logical_bytenr,
2737                l->block_ref_from->dev_state->name,
2738                (unsigned long long)l->block_ref_from->dev_bytenr,
2739                l->block_ref_from->mirror_num,
2740                btrfsic_get_block_type(state, l->block_ref_to),
2741                (unsigned long long)l->block_ref_to->logical_bytenr,
2742                l->block_ref_to->dev_state->name,
2743                (unsigned long long)l->block_ref_to->dev_bytenr,
2744                l->block_ref_to->mirror_num);
2745 }
2746
2747 static char btrfsic_get_block_type(const struct btrfsic_state *state,
2748                                    const struct btrfsic_block *block)
2749 {
2750         if (block->is_superblock &&
2751             state->latest_superblock->dev_bytenr == block->dev_bytenr &&
2752             state->latest_superblock->dev_state->bdev == block->dev_state->bdev)
2753                 return 'S';
2754         else if (block->is_superblock)
2755                 return 's';
2756         else if (block->is_metadata)
2757                 return 'M';
2758         else
2759                 return 'D';
2760 }
2761
2762 static void btrfsic_dump_tree(const struct btrfsic_state *state)
2763 {
2764         btrfsic_dump_tree_sub(state, state->latest_superblock, 0);
2765 }
2766
2767 static void btrfsic_dump_tree_sub(const struct btrfsic_state *state,
2768                                   const struct btrfsic_block *block,
2769                                   int indent_level)
2770 {
2771         struct list_head *elem_ref_to;
2772         int indent_add;
2773         static char buf[80];
2774         int cursor_position;
2775
2776         /*
2777          * Should better fill an on-stack buffer with a complete line and
2778          * dump it at once when it is time to print a newline character.
2779          */
2780
2781         /*
2782          * This algorithm is recursive because the amount of used stack space
2783          * is very small and the max recursion depth is limited.
2784          */
2785         indent_add = sprintf(buf, "%c-%llu(%s/%llu/%d)",
2786                              btrfsic_get_block_type(state, block),
2787                              (unsigned long long)block->logical_bytenr,
2788                              block->dev_state->name,
2789                              (unsigned long long)block->dev_bytenr,
2790                              block->mirror_num);
2791         if (indent_level + indent_add > BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2792                 printk("[...]\n");
2793                 return;
2794         }
2795         printk(buf);
2796         indent_level += indent_add;
2797         if (list_empty(&block->ref_to_list)) {
2798                 printk("\n");
2799                 return;
2800         }
2801         if (block->mirror_num > 1 &&
2802             !(state->print_mask & BTRFSIC_PRINT_MASK_TREE_WITH_ALL_MIRRORS)) {
2803                 printk(" [...]\n");
2804                 return;
2805         }
2806
2807         cursor_position = indent_level;
2808         list_for_each(elem_ref_to, &block->ref_to_list) {
2809                 const struct btrfsic_block_link *const l =
2810                     list_entry(elem_ref_to, struct btrfsic_block_link,
2811                                node_ref_to);
2812
2813                 while (cursor_position < indent_level) {
2814                         printk(" ");
2815                         cursor_position++;
2816                 }
2817                 if (l->ref_cnt > 1)
2818                         indent_add = sprintf(buf, " %d*--> ", l->ref_cnt);
2819                 else
2820                         indent_add = sprintf(buf, " --> ");
2821                 if (indent_level + indent_add >
2822                     BTRFSIC_TREE_DUMP_MAX_INDENT_LEVEL) {
2823                         printk("[...]\n");
2824                         cursor_position = 0;
2825                         continue;
2826                 }
2827
2828                 printk(buf);
2829
2830                 btrfsic_dump_tree_sub(state, l->block_ref_to,
2831                                       indent_level + indent_add);
2832                 cursor_position = 0;
2833         }
2834 }
2835
2836 static struct btrfsic_block_link *btrfsic_block_link_lookup_or_add(
2837                 struct btrfsic_state *state,
2838                 struct btrfsic_block_data_ctx *next_block_ctx,
2839                 struct btrfsic_block *next_block,
2840                 struct btrfsic_block *from_block,
2841                 u64 parent_generation)
2842 {
2843         struct btrfsic_block_link *l;
2844
2845         l = btrfsic_block_link_hashtable_lookup(next_block_ctx->dev->bdev,
2846                                                 next_block_ctx->dev_bytenr,
2847                                                 from_block->dev_state->bdev,
2848                                                 from_block->dev_bytenr,
2849                                                 &state->block_link_hashtable);
2850         if (NULL == l) {
2851                 l = btrfsic_block_link_alloc();
2852                 if (NULL == l) {
2853                         printk(KERN_INFO
2854                                "btrfsic: error, kmalloc" " failed!\n");
2855                         return NULL;
2856                 }
2857
2858                 l->block_ref_to = next_block;
2859                 l->block_ref_from = from_block;
2860                 l->ref_cnt = 1;
2861                 l->parent_generation = parent_generation;
2862
2863                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2864                         btrfsic_print_add_link(state, l);
2865
2866                 list_add(&l->node_ref_to, &from_block->ref_to_list);
2867                 list_add(&l->node_ref_from, &next_block->ref_from_list);
2868
2869                 btrfsic_block_link_hashtable_add(l,
2870                                                  &state->block_link_hashtable);
2871         } else {
2872                 l->ref_cnt++;
2873                 l->parent_generation = parent_generation;
2874                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2875                         btrfsic_print_add_link(state, l);
2876         }
2877
2878         return l;
2879 }
2880
2881 static struct btrfsic_block *btrfsic_block_lookup_or_add(
2882                 struct btrfsic_state *state,
2883                 struct btrfsic_block_data_ctx *block_ctx,
2884                 const char *additional_string,
2885                 int is_metadata,
2886                 int is_iodone,
2887                 int never_written,
2888                 int mirror_num,
2889                 int *was_created)
2890 {
2891         struct btrfsic_block *block;
2892
2893         block = btrfsic_block_hashtable_lookup(block_ctx->dev->bdev,
2894                                                block_ctx->dev_bytenr,
2895                                                &state->block_hashtable);
2896         if (NULL == block) {
2897                 struct btrfsic_dev_state *dev_state;
2898
2899                 block = btrfsic_block_alloc();
2900                 if (NULL == block) {
2901                         printk(KERN_INFO "btrfsic: error, kmalloc failed!\n");
2902                         return NULL;
2903                 }
2904                 dev_state = btrfsic_dev_state_lookup(block_ctx->dev->bdev);
2905                 if (NULL == dev_state) {
2906                         printk(KERN_INFO
2907                                "btrfsic: error, lookup dev_state failed!\n");
2908                         btrfsic_block_free(block);
2909                         return NULL;
2910                 }
2911                 block->dev_state = dev_state;
2912                 block->dev_bytenr = block_ctx->dev_bytenr;
2913                 block->logical_bytenr = block_ctx->start;
2914                 block->is_metadata = is_metadata;
2915                 block->is_iodone = is_iodone;
2916                 block->never_written = never_written;
2917                 block->mirror_num = mirror_num;
2918                 if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
2919                         printk(KERN_INFO
2920                                "New %s%c-block @%llu (%s/%llu/%d)\n",
2921                                additional_string,
2922                                btrfsic_get_block_type(state, block),
2923                                (unsigned long long)block->logical_bytenr,
2924                                dev_state->name,
2925                                (unsigned long long)block->dev_bytenr,
2926                                mirror_num);
2927                 list_add(&block->all_blocks_node, &state->all_blocks_list);
2928                 btrfsic_block_hashtable_add(block, &state->block_hashtable);
2929                 if (NULL != was_created)
2930                         *was_created = 1;
2931         } else {
2932                 if (NULL != was_created)
2933                         *was_created = 0;
2934         }
2935
2936         return block;
2937 }
2938
2939 static void btrfsic_cmp_log_and_dev_bytenr(struct btrfsic_state *state,
2940                                            u64 bytenr,
2941                                            struct btrfsic_dev_state *dev_state,
2942                                            u64 dev_bytenr)
2943 {
2944         int num_copies;
2945         int mirror_num;
2946         int ret;
2947         struct btrfsic_block_data_ctx block_ctx;
2948         int match = 0;
2949
2950         num_copies = btrfs_num_copies(&state->root->fs_info->mapping_tree,
2951                                       bytenr, state->metablock_size);
2952
2953         for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2954                 ret = btrfsic_map_block(state, bytenr, state->metablock_size,
2955                                         &block_ctx, mirror_num);
2956                 if (ret) {
2957                         printk(KERN_INFO "btrfsic:"
2958                                " btrfsic_map_block(logical @%llu,"
2959                                " mirror %d) failed!\n",
2960                                (unsigned long long)bytenr, mirror_num);
2961                         continue;
2962                 }
2963
2964                 if (dev_state->bdev == block_ctx.dev->bdev &&
2965                     dev_bytenr == block_ctx.dev_bytenr) {
2966                         match++;
2967                         btrfsic_release_block_ctx(&block_ctx);
2968                         break;
2969                 }
2970                 btrfsic_release_block_ctx(&block_ctx);
2971         }
2972
2973         if (!match) {
2974                 printk(KERN_INFO "btrfs: attempt to write M-block which contains logical bytenr that doesn't map to dev+physical bytenr of submit_bio,"
2975                        " buffer->log_bytenr=%llu, submit_bio(bdev=%s,"
2976                        " phys_bytenr=%llu)!\n",
2977                        (unsigned long long)bytenr, dev_state->name,
2978                        (unsigned long long)dev_bytenr);
2979                 for (mirror_num = 1; mirror_num <= num_copies; mirror_num++) {
2980                         ret = btrfsic_map_block(state, bytenr,
2981                                                 state->metablock_size,
2982                                                 &block_ctx, mirror_num);
2983                         if (ret)
2984                                 continue;
2985
2986                         printk(KERN_INFO "Read logical bytenr @%llu maps to"
2987                                " (%s/%llu/%d)\n",
2988                                (unsigned long long)bytenr,
2989                                block_ctx.dev->name,
2990                                (unsigned long long)block_ctx.dev_bytenr,
2991                                mirror_num);
2992                 }
2993                 WARN_ON(1);
2994         }
2995 }
2996
2997 static struct btrfsic_dev_state *btrfsic_dev_state_lookup(
2998                 struct block_device *bdev)
2999 {
3000         struct btrfsic_dev_state *ds;
3001
3002         ds = btrfsic_dev_state_hashtable_lookup(bdev,
3003                                                 &btrfsic_dev_state_hashtable);
3004         return ds;
3005 }
3006
3007 int btrfsic_submit_bh(int rw, struct buffer_head *bh)
3008 {
3009         struct btrfsic_dev_state *dev_state;
3010
3011         if (!btrfsic_is_initialized)
3012                 return submit_bh(rw, bh);
3013
3014         mutex_lock(&btrfsic_mutex);
3015         /* since btrfsic_submit_bh() might also be called before
3016          * btrfsic_mount(), this might return NULL */
3017         dev_state = btrfsic_dev_state_lookup(bh->b_bdev);
3018
3019         /* Only called to write the superblock (incl. FLUSH/FUA) */
3020         if (NULL != dev_state &&
3021             (rw & WRITE) && bh->b_size > 0) {
3022                 u64 dev_bytenr;
3023
3024                 dev_bytenr = 4096 * bh->b_blocknr;
3025                 if (dev_state->state->print_mask &
3026                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3027                         printk(KERN_INFO
3028                                "submit_bh(rw=0x%x, blocknr=%lu (bytenr %llu),"
3029                                " size=%lu, data=%p, bdev=%p)\n",
3030                                rw, (unsigned long)bh->b_blocknr,
3031                                (unsigned long long)dev_bytenr,
3032                                (unsigned long)bh->b_size, bh->b_data,
3033                                bh->b_bdev);
3034                 btrfsic_process_written_block(dev_state, dev_bytenr,
3035                                               &bh->b_data, 1, NULL,
3036                                               NULL, bh, rw);
3037         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3038                 if (dev_state->state->print_mask &
3039                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3040                         printk(KERN_INFO
3041                                "submit_bh(rw=0x%x FLUSH, bdev=%p)\n",
3042                                rw, bh->b_bdev);
3043                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3044                         if ((dev_state->state->print_mask &
3045                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3046                               BTRFSIC_PRINT_MASK_VERBOSE)))
3047                                 printk(KERN_INFO
3048                                        "btrfsic_submit_bh(%s) with FLUSH"
3049                                        " but dummy block already in use"
3050                                        " (ignored)!\n",
3051                                        dev_state->name);
3052                 } else {
3053                         struct btrfsic_block *const block =
3054                                 &dev_state->dummy_block_for_bio_bh_flush;
3055
3056                         block->is_iodone = 0;
3057                         block->never_written = 0;
3058                         block->iodone_w_error = 0;
3059                         block->flush_gen = dev_state->last_flush_gen + 1;
3060                         block->submit_bio_bh_rw = rw;
3061                         block->orig_bio_bh_private = bh->b_private;
3062                         block->orig_bio_bh_end_io.bh = bh->b_end_io;
3063                         block->next_in_same_bio = NULL;
3064                         bh->b_private = block;
3065                         bh->b_end_io = btrfsic_bh_end_io;
3066                 }
3067         }
3068         mutex_unlock(&btrfsic_mutex);
3069         return submit_bh(rw, bh);
3070 }
3071
3072 void btrfsic_submit_bio(int rw, struct bio *bio)
3073 {
3074         struct btrfsic_dev_state *dev_state;
3075
3076         if (!btrfsic_is_initialized) {
3077                 submit_bio(rw, bio);
3078                 return;
3079         }
3080
3081         mutex_lock(&btrfsic_mutex);
3082         /* since btrfsic_submit_bio() is also called before
3083          * btrfsic_mount(), this might return NULL */
3084         dev_state = btrfsic_dev_state_lookup(bio->bi_bdev);
3085         if (NULL != dev_state &&
3086             (rw & WRITE) && NULL != bio->bi_io_vec) {
3087                 unsigned int i;
3088                 u64 dev_bytenr;
3089                 int bio_is_patched;
3090                 char **mapped_datav;
3091
3092                 dev_bytenr = 512 * bio->bi_sector;
3093                 bio_is_patched = 0;
3094                 if (dev_state->state->print_mask &
3095                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3096                         printk(KERN_INFO
3097                                "submit_bio(rw=0x%x, bi_vcnt=%u,"
3098                                " bi_sector=%lu (bytenr %llu), bi_bdev=%p)\n",
3099                                rw, bio->bi_vcnt, (unsigned long)bio->bi_sector,
3100                                (unsigned long long)dev_bytenr,
3101                                bio->bi_bdev);
3102
3103                 mapped_datav = kmalloc(sizeof(*mapped_datav) * bio->bi_vcnt,
3104                                        GFP_NOFS);
3105                 if (!mapped_datav)
3106                         goto leave;
3107                 for (i = 0; i < bio->bi_vcnt; i++) {
3108                         BUG_ON(bio->bi_io_vec[i].bv_len != PAGE_CACHE_SIZE);
3109                         mapped_datav[i] = kmap(bio->bi_io_vec[i].bv_page);
3110                         if (!mapped_datav[i]) {
3111                                 while (i > 0) {
3112                                         i--;
3113                                         kunmap(bio->bi_io_vec[i].bv_page);
3114                                 }
3115                                 kfree(mapped_datav);
3116                                 goto leave;
3117                         }
3118                         if ((BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3119                              BTRFSIC_PRINT_MASK_VERBOSE) ==
3120                             (dev_state->state->print_mask &
3121                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3122                               BTRFSIC_PRINT_MASK_VERBOSE)))
3123                                 printk(KERN_INFO
3124                                        "#%u: page=%p, len=%u, offset=%u\n",
3125                                        i, bio->bi_io_vec[i].bv_page,
3126                                        bio->bi_io_vec[i].bv_len,
3127                                        bio->bi_io_vec[i].bv_offset);
3128                 }
3129                 btrfsic_process_written_block(dev_state, dev_bytenr,
3130                                               mapped_datav, bio->bi_vcnt,
3131                                               bio, &bio_is_patched,
3132                                               NULL, rw);
3133                 while (i > 0) {
3134                         i--;
3135                         kunmap(bio->bi_io_vec[i].bv_page);
3136                 }
3137                 kfree(mapped_datav);
3138         } else if (NULL != dev_state && (rw & REQ_FLUSH)) {
3139                 if (dev_state->state->print_mask &
3140                     BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH)
3141                         printk(KERN_INFO
3142                                "submit_bio(rw=0x%x FLUSH, bdev=%p)\n",
3143                                rw, bio->bi_bdev);
3144                 if (!dev_state->dummy_block_for_bio_bh_flush.is_iodone) {
3145                         if ((dev_state->state->print_mask &
3146                              (BTRFSIC_PRINT_MASK_SUBMIT_BIO_BH |
3147                               BTRFSIC_PRINT_MASK_VERBOSE)))
3148                                 printk(KERN_INFO
3149                                        "btrfsic_submit_bio(%s) with FLUSH"
3150                                        " but dummy block already in use"
3151                                        " (ignored)!\n",
3152                                        dev_state->name);
3153                 } else {
3154                         struct btrfsic_block *const block =
3155                                 &dev_state->dummy_block_for_bio_bh_flush;
3156
3157                         block->is_iodone = 0;
3158                         block->never_written = 0;
3159                         block->iodone_w_error = 0;
3160                         block->flush_gen = dev_state->last_flush_gen + 1;
3161                         block->submit_bio_bh_rw = rw;
3162                         block->orig_bio_bh_private = bio->bi_private;
3163                         block->orig_bio_bh_end_io.bio = bio->bi_end_io;
3164                         block->next_in_same_bio = NULL;
3165                         bio->bi_private = block;
3166                         bio->bi_end_io = btrfsic_bio_end_io;
3167                 }
3168         }
3169 leave:
3170         mutex_unlock(&btrfsic_mutex);
3171
3172         submit_bio(rw, bio);
3173 }
3174
3175 int btrfsic_mount(struct btrfs_root *root,
3176                   struct btrfs_fs_devices *fs_devices,
3177                   int including_extent_data, u32 print_mask)
3178 {
3179         int ret;
3180         struct btrfsic_state *state;
3181         struct list_head *dev_head = &fs_devices->devices;
3182         struct btrfs_device *device;
3183
3184         if (root->nodesize != root->leafsize) {
3185                 printk(KERN_INFO
3186                        "btrfsic: cannot handle nodesize %d != leafsize %d!\n",
3187                        root->nodesize, root->leafsize);
3188                 return -1;
3189         }
3190         if (root->nodesize & ((u64)PAGE_CACHE_SIZE - 1)) {
3191                 printk(KERN_INFO
3192                        "btrfsic: cannot handle nodesize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3193                        root->nodesize, (unsigned long)PAGE_CACHE_SIZE);
3194                 return -1;
3195         }
3196         if (root->leafsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3197                 printk(KERN_INFO
3198                        "btrfsic: cannot handle leafsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3199                        root->leafsize, (unsigned long)PAGE_CACHE_SIZE);
3200                 return -1;
3201         }
3202         if (root->sectorsize & ((u64)PAGE_CACHE_SIZE - 1)) {
3203                 printk(KERN_INFO
3204                        "btrfsic: cannot handle sectorsize %d not being a multiple of PAGE_CACHE_SIZE %ld!\n",
3205                        root->sectorsize, (unsigned long)PAGE_CACHE_SIZE);
3206                 return -1;
3207         }
3208         state = kzalloc(sizeof(*state), GFP_NOFS);
3209         if (NULL == state) {
3210                 printk(KERN_INFO "btrfs check-integrity: kmalloc() failed!\n");
3211                 return -1;
3212         }
3213
3214         if (!btrfsic_is_initialized) {
3215                 mutex_init(&btrfsic_mutex);
3216                 btrfsic_dev_state_hashtable_init(&btrfsic_dev_state_hashtable);
3217                 btrfsic_is_initialized = 1;
3218         }
3219         mutex_lock(&btrfsic_mutex);
3220         state->root = root;
3221         state->print_mask = print_mask;
3222         state->include_extent_data = including_extent_data;
3223         state->csum_size = 0;
3224         state->metablock_size = root->nodesize;
3225         state->datablock_size = root->sectorsize;
3226         INIT_LIST_HEAD(&state->all_blocks_list);
3227         btrfsic_block_hashtable_init(&state->block_hashtable);
3228         btrfsic_block_link_hashtable_init(&state->block_link_hashtable);
3229         state->max_superblock_generation = 0;
3230         state->latest_superblock = NULL;
3231
3232         list_for_each_entry(device, dev_head, dev_list) {
3233                 struct btrfsic_dev_state *ds;
3234                 char *p;
3235
3236                 if (!device->bdev || !device->name)
3237                         continue;
3238
3239                 ds = btrfsic_dev_state_alloc();
3240                 if (NULL == ds) {
3241                         printk(KERN_INFO
3242                                "btrfs check-integrity: kmalloc() failed!\n");
3243                         mutex_unlock(&btrfsic_mutex);
3244                         return -1;
3245                 }
3246                 ds->bdev = device->bdev;
3247                 ds->state = state;
3248                 bdevname(ds->bdev, ds->name);
3249                 ds->name[BDEVNAME_SIZE - 1] = '\0';
3250                 for (p = ds->name; *p != '\0'; p++);
3251                 while (p > ds->name && *p != '/')
3252                         p--;
3253                 if (*p == '/')
3254                         p++;
3255                 strlcpy(ds->name, p, sizeof(ds->name));
3256                 btrfsic_dev_state_hashtable_add(ds,
3257                                                 &btrfsic_dev_state_hashtable);
3258         }
3259
3260         ret = btrfsic_process_superblock(state, fs_devices);
3261         if (0 != ret) {
3262                 mutex_unlock(&btrfsic_mutex);
3263                 btrfsic_unmount(root, fs_devices);
3264                 return ret;
3265         }
3266
3267         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_DATABASE)
3268                 btrfsic_dump_database(state);
3269         if (state->print_mask & BTRFSIC_PRINT_MASK_INITIAL_TREE)
3270                 btrfsic_dump_tree(state);
3271
3272         mutex_unlock(&btrfsic_mutex);
3273         return 0;
3274 }
3275
3276 void btrfsic_unmount(struct btrfs_root *root,
3277                      struct btrfs_fs_devices *fs_devices)
3278 {
3279         struct list_head *elem_all;
3280         struct list_head *tmp_all;
3281         struct btrfsic_state *state;
3282         struct list_head *dev_head = &fs_devices->devices;
3283         struct btrfs_device *device;
3284
3285         if (!btrfsic_is_initialized)
3286                 return;
3287
3288         mutex_lock(&btrfsic_mutex);
3289
3290         state = NULL;
3291         list_for_each_entry(device, dev_head, dev_list) {
3292                 struct btrfsic_dev_state *ds;
3293
3294                 if (!device->bdev || !device->name)
3295                         continue;
3296
3297                 ds = btrfsic_dev_state_hashtable_lookup(
3298                                 device->bdev,
3299                                 &btrfsic_dev_state_hashtable);
3300                 if (NULL != ds) {
3301                         state = ds->state;
3302                         btrfsic_dev_state_hashtable_remove(ds);
3303                         btrfsic_dev_state_free(ds);
3304                 }
3305         }
3306
3307         if (NULL == state) {
3308                 printk(KERN_INFO
3309                        "btrfsic: error, cannot find state information"
3310                        " on umount!\n");
3311                 mutex_unlock(&btrfsic_mutex);
3312                 return;
3313         }
3314
3315         /*
3316          * Don't care about keeping the lists' state up to date,
3317          * just free all memory that was allocated dynamically.
3318          * Free the blocks and the block_links.
3319          */
3320         list_for_each_safe(elem_all, tmp_all, &state->all_blocks_list) {
3321                 struct btrfsic_block *const b_all =
3322                     list_entry(elem_all, struct btrfsic_block,
3323                                all_blocks_node);
3324                 struct list_head *elem_ref_to;
3325                 struct list_head *tmp_ref_to;
3326
3327                 list_for_each_safe(elem_ref_to, tmp_ref_to,
3328                                    &b_all->ref_to_list) {
3329                         struct btrfsic_block_link *const l =
3330                             list_entry(elem_ref_to,
3331                                        struct btrfsic_block_link,
3332                                        node_ref_to);
3333
3334                         if (state->print_mask & BTRFSIC_PRINT_MASK_VERBOSE)
3335                                 btrfsic_print_rem_link(state, l);
3336
3337                         l->ref_cnt--;
3338                         if (0 == l->ref_cnt)
3339                                 btrfsic_block_link_free(l);
3340                 }
3341
3342                 if (b_all->is_iodone || b_all->never_written)
3343                         btrfsic_block_free(b_all);
3344                 else
3345                         printk(KERN_INFO "btrfs: attempt to free %c-block"
3346                                " @%llu (%s/%llu/%d) on umount which is"
3347                                " not yet iodone!\n",
3348                                btrfsic_get_block_type(state, b_all),
3349                                (unsigned long long)b_all->logical_bytenr,
3350                                b_all->dev_state->name,
3351                                (unsigned long long)b_all->dev_bytenr,
3352                                b_all->mirror_num);
3353         }
3354
3355         mutex_unlock(&btrfsic_mutex);
3356
3357         kfree(state);
3358 }